CosyVoice
将合成音频保存为文件
将LLM生成的文本实时转成语音并通过扬声器播放
以下代码展示通过本地设备播放通义千问大语言模型(qwen-turbo)实时返回的文本内容。
运行 Python 示例前,需要通过 pip 安装第三方音频播放库。
import pyaudio
import dashscope
from dashscope.audio.tts_v2 import *
from http import HTTPStatus
from dashscope import Generation
model = "cosyvoice-v1"
voice = "longxiaochun"
class Callback (ResultCallback ):
_player = None
_stream = None
def on_open (self ):
print ("websocket is open." )
self._player = pyaudio.PyAudio()
self._stream = self._player.open (
format =pyaudio.paInt16, channels=1 , rate=22050 , output=True
)
def on_complete (self ):
print ("speech synthesis task complete successfully." )
def on_error (self, message: str ):
print (f"speech synthesis task failed, {message} " )
def on_close (self ):
print ("websocket is closed." )
self._stream.stop_stream()
self._stream.close()
self._player.terminate()
def on_event (self, message ):
print (f"recv speech synthsis message {message} " )
def on_data (self, data: bytes ) -> None :
print ("audio result length:" , len (data))
self._stream.write(data)
def synthesizer_with_llm ():
callback = Callback()
synthesizer = SpeechSynthesizer(
model=model,
voice=voice,
format =AudioFormat.PCM_22050HZ_MONO_16BIT,
callback=callback,
)
messages = [{"role" : "user" , "content" : "请介绍一下你自己" }]
responses = Generation.call(
model="qwen-turbo" ,
messages=messages,
result_format="message" ,
stream=True ,
incremental_output=True ,
)
for response in responses:
if response.status_code == HTTPStatus.OK:
print (response.output.choices[0 ]["message" ]["content" ], end="" )
synthesizer.streaming_call(response.output.choices[0 ]["message" ]["content" ])
else :
print (
"Request id: %s, Status code: %s, error code: %s, error message: %s"
% (
response.request_id,
response.status_code,
response.code,
response.message,
)
)
synthesizer.streaming_complete()
print ('requestId: ' , synthesizer.get_last_request_id())
if __name__ == "__main__" :
synthesizer_with_llm()
import com.alibaba.dashscope.aigc.generation.Generation;
import com.alibaba.dashscope.aigc.generation.GenerationParam;
import com.alibaba.dashscope.aigc.generation.GenerationResult;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisAudioFormat;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.ttsv2.SpeechSynthesizer;
import com.alibaba.dashscope.common.Message;
import com.alibaba.dashscope.common.ResultCallback;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.InputRequiredException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import io.reactivex.Flowable;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.sound.sampled.*;
public class Main {
private static String model = "cosyvoice-v1" ;
private static String voice = "longxiaochun" ;
public static void process () throws NoApiKeyException, InputRequiredException {
class PlaybackRunnable implements Runnable {
private AudioFormat af = new AudioFormat (22050 , 16 , 1 , true , false );
private DataLine.Info info = new DataLine .Info(SourceDataLine.class, af);
private SourceDataLine targetSource = null ;
private AtomicBoolean runFlag = new AtomicBoolean (true );
private ConcurrentLinkedQueue<ByteBuffer> queue =
new ConcurrentLinkedQueue <>();
public void prepare () throws LineUnavailableException {
targetSource = (SourceDataLine) AudioSystem.getLine(info);
targetSource.open(af, 4096 );
targetSource.start();
}
public void put (ByteBuffer buffer) {
queue.add(buffer);
}
public void stop () {
runFlag.set(false );
}
@Override
public void run () {
if (targetSource == null ) {
return ;
}
while (runFlag.get()) {
if (queue.isEmpty()) {
try {
Thread.sleep(100 );
} catch (InterruptedException e) {
}
continue ;
}
ByteBuffer buffer = queue.poll();
if (buffer == null ) {
continue ;
}
byte [] data = buffer.array();
targetSource.write(data, 0 , data.length);
}
if (!queue.isEmpty()) {
ByteBuffer buffer = null ;
while ((buffer = queue.poll()) != null ) {
byte [] data = buffer.array();
targetSource.write(data, 0 , data.length);
}
}
targetSource.drain();
targetSource.stop();
targetSource.close();
}
}
class ReactCallback extends ResultCallback <SpeechSynthesisResult> {
private PlaybackRunnable playbackRunnable = null ;
public ReactCallback (PlaybackRunnable playbackRunnable) {
this .playbackRunnable = playbackRunnable;
}
@Override
public void onEvent (SpeechSynthesisResult result) {
if (result.getAudioFrame() != null ) {
playbackRunnable.put(result.getAudioFrame());
}
}
@Override
public void onComplete () {
playbackRunnable.stop();
}
@Override
public void onError (Exception e) {
System.out.println(e);
playbackRunnable.stop();
}
}
PlaybackRunnable playbackRunnable = new PlaybackRunnable ();
try {
playbackRunnable.prepare();
} catch (LineUnavailableException e) {
throw new RuntimeException (e);
}
Thread playbackThread = new Thread (playbackRunnable);
playbackThread.start();
Generation gen = new Generation ();
Message userMsg = Message.builder()
.role(Role.USER.getValue())
.content("请介绍一下你自己" )
.build();
GenerationParam genParam =
GenerationParam.builder()
.model("qwen-turbo" )
.messages(Arrays.asList(userMsg))
.resultFormat(GenerationParam.ResultFormat.MESSAGE)
.topP(0.8 )
.incrementalOutput(true )
.build();
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
.model(model)
.voice(voice)
.format(SpeechSynthesisAudioFormat
.PCM_22050HZ_MONO_16BIT)
.build();
SpeechSynthesizer synthesizer =
new SpeechSynthesizer (param, new ReactCallback (playbackRunnable));
Flowable<GenerationResult> result = gen.streamCall(genParam);
result.blockingForEach(message -> {
String text =
message.getOutput().getChoices().get(0 ).getMessage().getContent();
System.out.println("LLM output:" + text);
synthesizer.streamingCall(text);
});
synthesizer.streamingComplete();
System.out.print("requestId: " + synthesizer.getLastRequestId());
try {
playbackThread.join();
} catch (InterruptedException e) {
throw new RuntimeException (e);
}
}
public static void main (String[] args) throws NoApiKeyException, InputRequiredException {
process();
System.exit(0 );
}
}
Sambert
合成语音后,通过本地设备播放实时返回的音频内容。
运行 Python 示例前,需要通过 pip 安装第三方音频播放库。
Python
import dashscope
import sys
import pyaudio
from dashscope.api_entities.dashscope_response import SpeechSynthesisResponse
from dashscope.audio.tts import ResultCallback, SpeechSynthesizer, SpeechSynthesisResult
class Callback (ResultCallback ):
_player = None
_stream = None
def on_open (self ):
print ('Speech synthesizer is opened.' )
self._player = pyaudio.PyAudio()
self._stream = self._player.open (
format =pyaudio.paInt16,
channels=1 ,
rate=48000 ,
output=True )
def on_complete (self ):
print ('Speech synthesizer is completed.' )
def on_error (self, response: SpeechSynthesisResponse ):
print ('Speech synthesizer failed, response is %s' % (str (response)))
def on_close (self ):
print ('Speech synthesizer is closed.' )
self._stream.stop_stream()
self._stream.close()
self._player.terminate()
def on_event (self, result: SpeechSynthesisResult ):
if result.get_audio_frame() is not None :
print ('audio result length:' , sys.getsizeof(result.get_audio_frame()))
self._stream.write(result.get_audio_frame())
if result.get_timestamp() is not None :
print ('timestamp result:' , str (result.get_timestamp()))
callback = Callback()
result = SpeechSynthesizer.call(model='sambert-zhichu-v1' ,
text='今天天气怎么样' ,
sample_rate=48000 ,
format ='pcm' ,
callback=callback)
print ('requestId: ' , result.get_response()['request_id' ])
Java
import com.alibaba.dashscope.audio.tts.SpeechSynthesizer;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisAudioFormat;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisParam;
import com.alibaba.dashscope.audio.tts.SpeechSynthesisResult;
import com.alibaba.dashscope.common.ResultCallback;
import java.nio.ByteBuffer;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import javax.sound.sampled.*;
public class Main {
public static void StreamAuidoDataToSpeaker () {
CountDownLatch latch = new CountDownLatch (1 );
SpeechSynthesizer synthesizer = new SpeechSynthesizer ();
SpeechSynthesisParam param =
SpeechSynthesisParam.builder()
.text("今天天气怎么样" )
.model("sambert-zhichu-v1" )
.sampleRate(48000 )
.format(SpeechSynthesisAudioFormat.PCM)
.build();
class PlaybackRunnable implements Runnable {
private AudioFormat af = new AudioFormat (48000 , 16 , 1 , true , false );
private DataLine.Info info = new DataLine .Info(SourceDataLine.class, af);
private SourceDataLine targetSource = null ;
private AtomicBoolean runFlag = new AtomicBoolean (true );
private ConcurrentLinkedQueue<ByteBuffer> queue = new ConcurrentLinkedQueue <>();
public void prepare () throws LineUnavailableException {
targetSource = (SourceDataLine) AudioSystem.getLine(info);
targetSource.open(af, 4096 );
targetSource.start();
}
public void put (ByteBuffer buffer) {
queue.add(buffer);
}
public void stop () {
runFlag.set(false );
}
@Override
public void run () {
if (targetSource == null ) {
return ;
}
while (runFlag.get()) {
if (queue.isEmpty()) {
try {
Thread.sleep(100 );
} catch (InterruptedException e) {
}
continue ;
}
ByteBuffer buffer = queue.poll();
if (buffer == null ) {
continue ;
}
byte [] data = buffer.array();
targetSource.write(data, 0 , data.length);
}
if (!queue.isEmpty()) {
ByteBuffer buffer = null ;
while ((buffer = queue.poll()) != null ) {
byte [] data = buffer.array();
targetSource.write(data, 0 , data.length);
}
}
targetSource.drain();
targetSource.stop();
targetSource.close();
}
}
class ReactCallback extends ResultCallback <SpeechSynthesisResult> {
private PlaybackRunnable playbackRunnable = null ;
public ReactCallback (PlaybackRunnable playbackRunnable) {
this .playbackRunnable = playbackRunnable;
}
@Override
public void onEvent (SpeechSynthesisResult result) {
if (result.getAudioFrame() != null ) {
playbackRunnable.put(result.getAudioFrame());
}
}
@Override
public void onComplete () {
playbackRunnable.stop();
latch.countDown();
}
@Override
public void onError (Exception e) {
System.out.println(e);
playbackRunnable.stop();
latch.countDown();
}
}
PlaybackRunnable playbackRunnable = new PlaybackRunnable ();
try {
playbackRunnable.prepare();
} catch (LineUnavailableException e) {
throw new RuntimeException (e);
}
Thread playbackThread = new Thread (playbackRunnable);
playbackThread.start();
synthesizer.call(param, new ReactCallback (playbackRunnable));
System.out.println("requestId: " + synthesizer.getLastRequestId());
try {
latch.await();
playbackThread.join();
} catch (InterruptedException e) {
throw new RuntimeException (e);
}
}
public static void main (String[] args) {
StreamAuidoDataToSpeaker();
System.exit(0 );
}
}