音色克隆接入指南

更新时间:
复制为 MD 格式

本文详细介绍了通过API实现音色上传、克隆任务创建及状态轮询等音色合成服务的关键技术流程。

操作流程

  1. 使用获取上传凭证API获取音色上传凭证并上传文件。

  2. 通过自定义语音克隆API创建音色克隆任务。

  3. 通过查询音色API轮询音色克隆状态,获取音色克隆结果。

1 上传音色

  1. 使用获取上传凭证API获取音色上传凭证。

  2. 上传音色文件,音色文件要求如下:

    1. 格式:wav、mp3、m4a。

    2. 采样率:大于等于 16000Hz。

    3. 文件大小:10MB 以内。

    4. 音频时长:10s~60s。

1.1 获取音色上传凭证

private static GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData upload(Client client, String type, File file) throws Exception {
    //获取上传凭证
    GetUploadPolicyRequest getUploadPolicyRequest = new GetUploadPolicyRequest();
    getUploadPolicyRequest.setType("INPUT_TRAIN_AUDIO");
    GetUploadPolicyResponse uploadPolicy = client.getUploadPolicy(getUploadPolicyRequest);
    GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData data = uploadPolicy.getBody().getData();
    System.out.println("获取上传凭证:" + JSON.toJSONString(data));
    return data;
}

1.2 上传音色文件

private static void uploadFile(GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData data, File file) throws IOException {
    CloseableHttpClient httpClient = HttpClients.createDefault();
    try {
        HttpPost uploadFile = new HttpPost("https://" + data.getOssPolicy().getHost());
        MultipartEntityBuilder builder = MultipartEntityBuilder.create();

        ContentType contentType = ContentType.create("multipart/form-data", Consts.UTF_8);

        // 添加OSS所需的表单字段
        builder.addTextBody("key", data.getOssKey() + "/" + file.getName(), contentType);
        builder.addTextBody("policy", data.getOssPolicy().getPolicy(), contentType);
        builder.addTextBody("OSSAccessKeyId", data.getOssPolicy().getAccessId(), contentType);
        builder.addTextBody("signature", data.getOssPolicy().getSignature(), contentType);
        // 添加文件
        builder.addBinaryBody("file", Files.newInputStream(file.toPath()), ContentType.create("audio/mpeg"), file.getName());
        HttpEntity multipart = builder.build();
        uploadFile.setEntity(multipart);
        CloseableHttpResponse response = httpClient.execute(uploadFile);
        try {
            HttpEntity responseEntity = response.getEntity();
            System.err.println("Upload result: " + response.getStatusLine());
            if (responseEntity != null) {
                String responseString = EntityUtils.toString(responseEntity);
                System.err.println("Response content: " + responseString);
            }
            EntityUtils.consume(responseEntity);
        } finally {
            response.close();
        }
    } finally {
        httpClient.close();
    }
}

2 创建音色克隆任务

private static CreateTTSVoiceCustomResponse createTTSVoiceCustom(Client client, GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData ttsVoiceData, File ttsVoiceFile) throws Exception {
    //2.克隆音色创建自定义音色
    CreateTTSVoiceCustomRequest createTTSVoiceCustomRequest = new CreateTTSVoiceCustomRequest();
    createTTSVoiceCustomRequest.setName("自定义音色创建接口Demo测试");
    createTTSVoiceCustomRequest.setOssKey(ttsVoiceData.getOssKey());
    createTTSVoiceCustomRequest.setFileName(ttsVoiceFile.getName());
    //取值 FEMALE、MALE,可为空
    createTTSVoiceCustomRequest.setGender("");
    CreateTTSVoiceCustomResponse ttsVoiceCustom = client.createTTSVoiceCustom(createTTSVoiceCustomRequest);
    System.out.println("创建自定义音色:" + JSON.toJSONString(ttsVoiceCustom));
    return ttsVoiceCustom;
}

3 获取音色克隆结果

根据上一步获取到的音色ID通过查询音色API轮询查询音色克隆任务状态。由于音色克隆需要一定的时间,所以该接口需要定时轮询调用,建议轮询间隔3s,轮询过于频繁可能会导致查询失败。查询音色克隆状态直到状态显示为成功或者失败,状态为成功的时候,可以使用音色进行数字人的创建等操作,针对失败的克隆任务可以根据对应的失败原因进行修改重新提交。

private static GetTTSVoiceByIdCustomResponse getTTSVoiceByIdCustom(Client client, String voiceId) throws Exception {
    GetTTSVoiceByIdCustomRequest getTTSVoiceByIdCustomRequest = new GetTTSVoiceByIdCustomRequest();
    getTTSVoiceByIdCustomRequest.setVoiceId(voiceId);
    GetTTSVoiceByIdCustomResponse getTTSVoiceByIdCustom = client.getTTSVoiceByIdCustom(getTTSVoiceByIdCustomRequest);
    System.out.println("查询音色详情:" + JSON.toJSONString(getTTSVoiceByIdCustom));
    return getTTSVoiceByIdCustom;
}

音色克隆完整调用示例代码

引入SDK

<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>lingmou20250527</artifactId>
    <version>1.5.3</version>
</dependency>

示例代码

package com.alibaba.humanaigc.console.server.start;

import com.alibaba.fastjson.JSON;
import com.aliyun.lingmou20250527.Client;
import com.aliyun.lingmou20250527.models.*;
import com.aliyun.teaopenapi.models.Config;
import org.apache.http.Consts;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.mime.MultipartEntityBuilder;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;

import java.io.File;
import java.io.IOException;
import java.nio.file.Files;

public class Demo {

    public static void main(String[] args) throws Exception {

        Client client = Demo.getInstance();
        //1.上传音色文件
        File ttsVoiceFile = new File("/Users/admin/Downloads/日语清唱_副本.MP3");
        GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData ttsVoiceData = upload(client, "INPUT_TRAIN_AUDIO", ttsVoiceFile);

        //2.克隆音色创建自定义音色
        CreateTTSVoiceCustomRequest createTTSVoiceCustomRequest = new CreateTTSVoiceCustomRequest();
        createTTSVoiceCustomRequest.setName("自定义音色创建接口Demo测试");
        createTTSVoiceCustomRequest.setOssKey(ttsVoiceData.getOssKey());
        createTTSVoiceCustomRequest.setFileName(ttsVoiceFile.getName());
        //取值 FEMALE、MALE,可为空
        createTTSVoiceCustomRequest.setGender("");
        CreateTTSVoiceCustomResponse ttsVoiceCustom = client.createTTSVoiceCustom(createTTSVoiceCustomRequest);
        System.out.println("创建自定义音色:" + JSON.toJSONString(ttsVoiceCustom));

        //3.查询音色详情
        GetTTSVoiceByIdCustomRequest getTTSVoiceByIdCustomRequest = new GetTTSVoiceByIdCustomRequest();
        getTTSVoiceByIdCustomRequest.setVoiceId(ttsVoiceCustom.getBody().getData().getId());
        GetTTSVoiceByIdCustomResponse getTTSVoiceByIdCustom = client.getTTSVoiceByIdCustom(getTTSVoiceByIdCustomRequest);
        System.out.println("查询音色详情:" + JSON.toJSONString(getTTSVoiceByIdCustom));

        //4.查询音色列表
        ListPrivateTTSVoicesCustomRequest listPrivateTTSVoicesCustomRequest = new ListPrivateTTSVoicesCustomRequest();
        // name 可为空
        listPrivateTTSVoicesCustomRequest.setName("");
        listPrivateTTSVoicesCustomRequest.setPageIndex(1);
        listPrivateTTSVoicesCustomRequest.setPageSize(10);
        ListPrivateTTSVoicesCustomResponse listPrivateTTSVoicesCustom = client.listPrivateTTSVoicesCustom(listPrivateTTSVoicesCustomRequest);
        System.out.println("查询音色列表:" + JSON.toJSONString(listPrivateTTSVoicesCustom));
    }

    private static GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData upload(Client client, String type, File file) throws Exception {
        //获取上传凭证
        GetUploadPolicyRequest getUploadPolicyRequest = new GetUploadPolicyRequest();
        getUploadPolicyRequest.setType(type);
        GetUploadPolicyResponse uploadPolicy = client.getUploadPolicy(getUploadPolicyRequest);
        GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData data = uploadPolicy.getBody().getData();
        System.out.println("获取上传凭证:" + JSON.toJSONString(data));
        // HttpClient上传文件
        uploadFile(data, file);
        return data;
    }


    private static void uploadFile(GetUploadPolicyResponseBody.GetUploadPolicyResponseBodyData data, File file) throws IOException {
        CloseableHttpClient httpClient = HttpClients.createDefault();
        try {
            HttpPost uploadFile = new HttpPost("https://" + data.getOssPolicy().getHost());
            MultipartEntityBuilder builder = MultipartEntityBuilder.create();

            ContentType contentType = ContentType.create("multipart/form-data", Consts.UTF_8);

            // 添加OSS所需的表单字段
            builder.addTextBody("key", data.getOssKey() + "/" + file.getName(), contentType);
            builder.addTextBody("policy", data.getOssPolicy().getPolicy(), contentType);
            builder.addTextBody("OSSAccessKeyId", data.getOssPolicy().getAccessId(), contentType);
            builder.addTextBody("signature", data.getOssPolicy().getSignature(), contentType);
            // 添加文件
            builder.addBinaryBody("file", Files.newInputStream(file.toPath()), ContentType.create("audio/mpeg"), file.getName());
            HttpEntity multipart = builder.build();
            uploadFile.setEntity(multipart);
            CloseableHttpResponse response = httpClient.execute(uploadFile);
            try {
                HttpEntity responseEntity = response.getEntity();
                System.err.println("Upload result: " + response.getStatusLine());
                if (responseEntity != null) {
                    String responseString = EntityUtils.toString(responseEntity);
                    System.err.println("Response content: " + responseString);
                }
                EntityUtils.consume(responseEntity);
            } finally {
                response.close();
            }
        } finally {
            httpClient.close();
        }
    }

    public static Client getInstance() throws Exception {
        String accessKeyId = "xxx";
        String accessKeySecret = "yyy";
        String endpoint = "lingmou.cn-beijing.aliyuncs.com";

        Config config = new Config();
        // noinspection AklessInspection
        config.setAccessKeyId(accessKeyId);
        config.setAccessKeySecret(accessKeySecret);
        config.setEndpoint(endpoint);
        return new Client(config);
    }
}