Use the AICallKit SDK to get Pulse Code Modulation (PCM) audio data from users or AI agents.
Introduction
PCM is a technology that converts analog signals into digital ones. The amplitude of the analog signals is sampled. Each sample is quantized to the nearest value within a range of digital steps. Then, the quantized values are encoded into binary numbers.
In Real-time Conversational AI, you can call AICallKit SDK interfaces to get PCM audio data from the user or AI agent.
Use cases
PCM audio data helps synchronize avatar lip movements, facial expressions, and gestures with spoken content for a more immersive experience. You can also store PCM data for later use, such as analysis.
Procedure
AICallKit SDK does not provide direct interfaces for PCM audio data. Instead, use the audio raw data callbacks from the underlying ARTC SDK through the AliRtcEngine object that AICallKit exposes.
Step 1: Obtain AliRtcEngine
Obtain the ARTC SDK engine object AliRtcEngine in the onCallBegin callback of the AICallKit SDK.
Sample code:
Android
@Override
public void onCallBegin() {
AliRtcEngine rtcEngine = mARTCAICallEngine.getRtcEngine();
if (rtcEngine != null) {
// Call RTC methods.
}
}
iOS
// Make sure your project module has been configured with the necessary dependencies for AliVCSDK_*** SDK. Otherwise, the import will fail.
#if canImport(AliVCSDK_ARTC)
import AliVCSDK_ARTC
#elseif canImport(AliVCSDK_InteractiveLive)
import AliVCSDK_InteractiveLive
#elseif canImport(AliVCSDK_Standard)
import AliVCSDK_Standard
#endif
public func onCallBegin() {
let rtc = self.engine.getRTCInstance() as? AliRtcEngine
// Call RTC methods.
}
Step 2: Implement AliRtcEngine.AliRtcAudioFrameObserver
The AliRtcAudioFrameObserver class has callbacks for audio data at different stages, such as captured raw data, data after 3A audio processing, and ingested data. Choose the one that best suits your needs.
Sample code:
Android
public class AUIAICallInCallActivity extends AppCompatActivity implements AliRtcEngine.AliRtcAudioFrameObserver{
@Override
public boolean onCapturedAudioFrame(AliRtcEngine.AliRtcAudioFrame frame) {
// Process captured audio data (corresponding to AliRtcAudioSourceCaptured).
return false;
}
@Override
public boolean onProcessCapturedAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
// Process audio data after 3A audio processing (corresponding to AliRtcAudioSourceProcessCaptured).
return false;
}
@Override
public boolean onPublishAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
// Process ingested audio data (corresponding to AliRtcAudioSourcePub).
return false;
}
@Override
public boolean onPlaybackAudioFrame(AliRtcEngine.AliRtcAudioFrame frame) {
// Process playback audio data (corresponding to AliRtcAudioSourcePlayback).
return false;
}
@Override
public boolean onMixedAllAudioFrame(AliRtcEngine.AliRtcAudioFrame frame){
// Process mixed audio data from ingested and playback audio (corresponding to AliRtcAudioSourceMixedAll).
return false;
}
@Override
public boolean onRemoteUserAudioFrame(String uid, AliRtcEngine.AliRtcAudioFrame frame){
// Process remote audio data from stream pulling (corresponding to AliRtcAudioSourceRemoteUser).
return false;
}
}
iOS
extension AUIAICallViewController: AliRtcAudioFrameDelegate {
public func onCapturedAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
// Process captured audio raw data (corresponding to AliRtcAudioSourceCaptured).
return true
}
public func onProcessCapturedAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
// Process audio data after 3A audio processing (corresponding to AliRtcAudioSourceProcessCaptured).
return false
}
public func onPublishAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
// Process ingested audio data (corresponding to AliRtcAudioSourcePub).
return false
}
public func onPlaybackAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
// Process playback audio data (corresponding to AliRtcAudioSourcePlayback).
return false
}
public func onMixedAllAudioFrame(_ frame: AliRtcAudioFrame) -> Bool {
// Process mixed audio data from ingested and playback audio (corresponding to AliRtcAudioSourceMixedAll).
return false
}
public func onRemoteUserAudioFrame(_ uid: String?, frame: AliRtcAudioFrame) -> Bool {
// Process remote audio data from stream pulling (corresponding to AliRtcAudioSourceRemoteUser).
return false
}
}
Step 3: Register PCM audio data callback interface
AliRtcEngine provides registerAudioFrameObserver to register audio raw data callbacks. Use enableAudioFrameObserver to enable or disable the callbacks.
Android
@Override
public void onCallBegin() {
AliRtcEngine rtcEngine = mARTCAICallEngine.getRtcEngine();
if (rtcEngine != null) {
// Set audio raw data callbacks.
rtcEngine.registerAudioFrameObserver(AUIAICallInCallActivity.this);
// Enable audio frame observer for specific audio source types.
//AliRtcAudioSourceCaptured: captured audio data, AliRtcAudioSourceProcessCaptured: audio data after 3A audio processing, AliRtcAudioSourcePub: ingested audio data
//AliRtcAudioSourcePlayback: playback audio data, AliRtcAudioSourceMixedAll: mixed audio data from ingested and playback audio, AliRtcAudioSourceRemoteUser: remote audio data from stream pulling
rtcEngine.enableAudioFrameObserver(true, AliRtcAudioSourceCaptured, null);
}
}
iOS
// Make sure your project module has been configured with the necessary dependencies for AliVCSDK_*** SDK. Otherwise, the import will fail.
#if canImport(AliVCSDK_ARTC)
import AliVCSDK_ARTC
#elseif canImport(AliVCSDK_InteractiveLive)
import AliVCSDK_InteractiveLive
#elseif canImport(AliVCSDK_Standard)
import AliVCSDK_Standard
#endif
public func onCallBegin() {
let rtc = self.controller.currentEngine.getRTCInstance() as? AliRtcEngine
// Register the instance as an audio frame observer.
rtc?.registerAudioFrameObserver(self)
// Enable the audio frame observer for a specific audio source type, such as onCapturedAudioFrame (captured audio data).
rtc?.enableAudioFrameObserver(true, audioSource: .captured, config: nil)
}