Get PCM audio data

更新时间:
复制 MD 格式

Use the AICallKit SDK to get Pulse Code Modulation (PCM) audio data from users or AI agents.

Introduction

PCM is a technology that converts analog signals into digital ones. The amplitude of the analog signals is sampled. Each sample is quantized to the nearest value within a range of digital steps. Then, the quantized values are encoded into binary numbers.

In Real-time Conversational AI, you can call AICallKit SDK interfaces to get PCM audio data from the user or AI agent.

Use cases

PCM audio data helps synchronize avatar lip movements, facial expressions, and gestures with spoken content for a more immersive experience. You can also store PCM data for later use, such as analysis.

Procedure

AICallKit SDK does not provide direct interfaces for PCM audio data. Instead, use the audio raw data callbacks from the underlying ARTC SDK through the AliRtcEngine object that AICallKit exposes.

Step 1: Obtain AliRtcEngine

Obtain the ARTC SDK engine object AliRtcEngine in the onCallBegin callback of the AICallKit SDK.

Sample code:

Android

@Override
public void onCallBegin() {
    AliRtcEngine rtcEngine = mARTCAICallEngine.getRtcEngine();
    if (rtcEngine != null) {
        // Call RTC methods.
    }
 }

iOS

// Make sure your project module has been configured with the necessary dependencies for AliVCSDK_*** SDK. Otherwise, the import will fail.
#if canImport(AliVCSDK_ARTC)
import AliVCSDK_ARTC
#elseif canImport(AliVCSDK_InteractiveLive)
import AliVCSDK_InteractiveLive
#elseif canImport(AliVCSDK_Standard)
import AliVCSDK_Standard
#endif

public func onCallBegin() {
    let rtc = self.engine.getRTCInstance() as? AliRtcEngine
    // Call RTC methods.
}

Step 2: Implement AliRtcEngine.AliRtcAudioFrameObserver

The AliRtcAudioFrameObserver class has callbacks for audio data at different stages, such as captured raw data, data after 3A audio processing, and ingested data. Choose the one that best suits your needs.

Sample code:

Android

public class AUIAICallInCallActivity extends AppCompatActivity implements AliRtcEngine.AliRtcAudioFrameObserver{

     @Override
    public boolean onCapturedAudioFrame(AliRtcEngine.AliRtcAudioFrame frame) {
        // Process captured audio data (corresponding to AliRtcAudioSourceCaptured).
        return false;
    }
    @Override
    public boolean onProcessCapturedAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
        // Process audio data after 3A audio processing (corresponding to AliRtcAudioSourceProcessCaptured).
        return false;
    }
    @Override
    public boolean onPublishAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
        // Process ingested audio data (corresponding to AliRtcAudioSourcePub).
        return false;
    }
    @Override
    public boolean onPlaybackAudioFrame(AliRtcEngine.AliRtcAudioFrame frame) {
        // Process playback audio data (corresponding to AliRtcAudioSourcePlayback).
        return false;
    }
    @Override
    public boolean onMixedAllAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
        // Process mixed audio data from ingested and playback audio (corresponding to AliRtcAudioSourceMixedAll).
        return false;
    }
    @Override
    public boolean onRemoteUserAudioFrame(String uid, AliRtcEngine.AliRtcAudioFrame frame){
        // Process remote audio data from stream pulling (corresponding to AliRtcAudioSourceRemoteUser).
        return false;
    }
}

iOS

extension AUIAICallViewController: AliRtcAudioFrameDelegate {
    
    public func onCapturedAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
        // Process captured audio raw data (corresponding to AliRtcAudioSourceCaptured).
        return true
    }

    public func onProcessCapturedAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
        // Process audio data after 3A audio processing (corresponding to AliRtcAudioSourceProcessCaptured).
        return false
    }

    public func onPublishAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
        // Process ingested audio data (corresponding to AliRtcAudioSourcePub).
        return false
    }

    public func onPlaybackAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
        // Process playback audio data (corresponding to AliRtcAudioSourcePlayback).
        return false
    }

    public func onMixedAllAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
        // Process mixed audio data from ingested and playback audio (corresponding to AliRtcAudioSourceMixedAll).
        return false
    }

    public func onRemoteUserAudioFrame(_ uid: String?, frame: AliRtcAudioFrame) -> Bool {
        // Process remote audio data from stream pulling (corresponding to AliRtcAudioSourceRemoteUser).
        return false
    }

}

Step 3: Register PCM audio data callback interface

AliRtcEngine provides registerAudioFrameObserver to register audio raw data callbacks. Use enableAudioFrameObserver to enable or disable the callbacks.

Android

@Override
public void onCallBegin() {
    AliRtcEngine rtcEngine = mARTCAICallEngine.getRtcEngine();
    if (rtcEngine != null) {
        // Set audio raw data callbacks.
        rtcEngine.registerAudioFrameObserver(AUIAICallInCallActivity.this);
        // Enable audio frame observer for specific audio source types.
        //AliRtcAudioSourceCaptured: captured audio data, AliRtcAudioSourceProcessCaptured: audio data after 3A audio processing, AliRtcAudioSourcePub: ingested audio data
        //AliRtcAudioSourcePlayback: playback audio data, AliRtcAudioSourceMixedAll: mixed audio data from ingested and playback audio, AliRtcAudioSourceRemoteUser: remote audio data from stream pulling
        rtcEngine.enableAudioFrameObserver(true, AliRtcAudioSourceCaptured, null);
    }
 }

iOS

// Make sure your project module has been configured with the necessary dependencies for AliVCSDK_*** SDK. Otherwise, the import will fail.
#if canImport(AliVCSDK_ARTC)
import AliVCSDK_ARTC
#elseif canImport(AliVCSDK_InteractiveLive)
import AliVCSDK_InteractiveLive
#elseif canImport(AliVCSDK_Standard)
import AliVCSDK_Standard
#endif

public func onCallBegin() {
    let rtc = self.controller.currentEngine.getRTCInstance() as? AliRtcEngine
    // Register the instance as an audio frame observer.
    rtc?.registerAudioFrameObserver(self)
    // Enable the audio frame observer for a specific audio source type, such as onCapturedAudioFrame (captured audio data).
    rtc?.enableAudioFrameObserver(true, audioSource: .captured, config: nil)
}