快速搭建数字人视频生成接口
使用LivePortrait技术快速构建文本驱动的数字人视频生成API,实现AI数字人视频制作能力。
概述
本指南将帮助您:
使用LivePortrait技术创建数字人视频工作流
将工作流部署为可调用的API接口
通过Python SDK集成到您的应用中
实现从静态人像到动态视频的转换
预计完成时间: 25分钟 技术难度: 中级
前提条件
在开始之前,请确保您已:
拥有阿里云账号并完成实名认证
开通智作工坊服务
安装Python 3.8+环境
准备人像照片(推荐:清晰的正面照片)
准备驱动视频(可选:表情、动作参考视频)
架构说明
步骤1:准备LivePortrait工作流
保存下面的内容到t2v.json
文件:
{
"last_node_id": 199,
"last_link_id": 31,
"nodes": [
{
"id": 8,
"type": "VHS_LoadVideo",
"pos": {
"0": 70,
"1": 0
},
"size": [
250,
620
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null,
"label": "批次管理"
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
21
],
"shape": 3,
"label": "图像"
},
{
"name": "frame_count",
"type": "INT",
"links": null,
"slot_index": 1,
"shape": 3,
"label": "帧计数"
},
{
"name": "audio",
"type": "AUDIO",
"links": [
31
],
"slot_index": 2,
"shape": 3,
"label": "音频"
},
{
"name": "video_info",
"type": "VHS_VIDEOINFO",
"links": null,
"shape": 3,
"label": "视频信息"
}
],
"properties": {
"Node name for S&R": "VHS_LoadVideo"
},
"widgets_values": {
"video": "_sp_auto_upload_01j7fhv11v4vmfydkhnpz8h67w.mp4",
"force_rate": 0,
"force_size": "Disabled",
"custom_width": 512,
"custom_height": 512,
"frame_load_cap": 0,
"skip_first_frames": 0,
"select_every_nth": 1,
"choose video to upload": "image",
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "_sp_auto_upload_01j7fhv11v4vmfydkhnpz8h67w.mp4",
"type": "input",
"format": "video/mp4",
"frame_load_cap": 0,
"skip_first_frames": 0,
"force_rate": 0,
"select_every_nth": 1
},
"muted": false
}
}
},
{
"id": 196,
"type": "LoadImage",
"pos": {
"0": 30,
"1": 680
},
"size": {
"0": 320,
"1": 310
},
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
13
],
"shape": 3,
"label": "图像"
},
{
"name": "MASK",
"type": "MASK",
"links": null,
"shape": 3,
"label": "遮罩"
}
],
"properties": {
"Node name for S&R": "LoadImage"
},
"widgets_values": [
"_sp_auto_upload_01j7fhtp8s3k82dyma6479jk9b.png",
"image"
]
},
{
"id": 165,
"type": "ImageResizeKJ",
"pos": {
"0": 390,
"1": 680
},
"size": {
"0": 320,
"1": 270
},
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "image",
"type": "IMAGE",
"link": 13,
"label": "图像"
},
{
"name": "get_image_size",
"type": "IMAGE",
"link": null,
"label": "参考图像大小"
},
{
"name": "width_input",
"type": "INT",
"link": null,
"widget": {
"name": "width_input"
},
"label": "宽度"
},
{
"name": "height_input",
"type": "INT",
"link": null,
"widget": {
"name": "height_input"
},
"label": "高度"
}
],
"outputs": [
{
"name": "IMAGE",
"type": "IMAGE",
"links": [
17,
22
],
"shape": 3,
"label": "图像"
},
{
"name": "width",
"type": "INT",
"links": null,
"shape": 3,
"label": "宽度"
},
{
"name": "height",
"type": "INT",
"links": null,
"shape": 3,
"label": "高度"
}
],
"properties": {
"Node name for S&R": "ImageResizeKJ"
},
"widgets_values": [
512,
512,
"lanczos",
true,
2,
0,
0,
"disabled"
]
},
{
"id": 198,
"type": "LivePortraitLoadMediaPipeCropper",
"pos": {
"0": 350,
"1": 320
},
"size": {
"0": 320,
"1": 80
},
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "cropper",
"type": "LPCROPPER",
"links": [
16
],
"shape": 3,
"label": "裁剪框架"
}
],
"properties": {
"Node name for S&R": "LivePortraitLoadMediaPipeCropper"
},
"widgets_values": [
"CUDA",
true
]
},
{
"id": 190,
"type": "LivePortraitProcess",
"pos": {
"0": 1120,
"1": 110
},
"size": {
"0": 430,
"1": 330
},
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "LIVEPORTRAITPIPE",
"link": 18,
"label": "LivePortrait管线"
},
{
"name": "crop_info",
"type": "CROPINFO",
"link": 19,
"label": "裁剪信息"
},
{
"name": "source_image",
"type": "IMAGE",
"link": 20,
"label": "原图像"
},
{
"name": "driving_images",
"type": "IMAGE",
"link": 21,
"label": "驱动图像"
},
{
"name": "opt_retargeting_info",
"type": "RETARGETINGINFO",
"link": null,
"label": "重定向信息(可选)"
}
],
"outputs": [
{
"name": "cropped_image",
"type": "IMAGE",
"links": [
23
],
"shape": 3,
"label": "裁剪图像"
},
{
"name": "output",
"type": "LP_OUT",
"links": [
24
],
"shape": 3,
"label": "LivePOrtrait输出"
}
],
"properties": {
"Node name for S&R": "LivePortraitProcess"
},
"widgets_values": [
false,
0.03,
true,
1,
"constant",
"relative",
0.000003,
false,
1
]
},
{
"id": 191,
"type": "LivePortraitComposite",
"pos": {
"0": 1600,
"1": 130
},
"size": {
"0": 360,
"1": 90
},
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "source_image",
"type": "IMAGE",
"link": 22,
"label": "原图像"
},
{
"name": "cropped_image",
"type": "IMAGE",
"link": 23,
"label": "裁剪图像"
},
{
"name": "liveportrait_out",
"type": "LP_OUT",
"link": 24,
"label": "LivePOrtrait输出"
},
{
"name": "mask",
"type": "MASK",
"link": null,
"label": "遮罩"
}
],
"outputs": [
{
"name": "full_images",
"type": "IMAGE",
"links": [
14
],
"shape": 3,
"label": "图像"
},
{
"name": "mask",
"type": "MASK",
"links": null,
"shape": 3,
"label": "遮罩"
}
],
"properties": {
"Node name for S&R": "LivePortraitComposite"
}
},
{
"id": 189,
"type": "LivePortraitCropper",
"pos": {
"0": 740,
"1": 350
},
"size": {
"0": 330,
"1": 240
},
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "pipeline",
"type": "LIVEPORTRAITPIPE",
"link": 15,
"label": "LivePortrait管线"
},
{
"name": "cropper",
"type": "LPCROPPER",
"link": 16,
"label": "裁剪框架"
},
{
"name": "source_image",
"type": "IMAGE",
"link": 17,
"label": "原图像"
}
],
"outputs": [
{
"name": "cropped_image",
"type": "IMAGE",
"links": [
20
],
"shape": 3
},
{
"name": "crop_info",
"type": "CROPINFO",
"links": [
19
],
"shape": 3,
"label": "裁剪信息"
}
],
"properties": {
"Node name for S&R": "LivePortraitCropper"
},
"widgets_values": [
512,
2.3000000000000003,
0,
-0.125,
0,
"large-small",
true
]
},
{
"id": 168,
"type": "VHS_VideoCombine",
"pos": {
"0": 1660,
"1": 300
},
"size": [
210,
500
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "images",
"type": "IMAGE",
"link": 14,
"label": "图像"
},
{
"name": "audio",
"type": "AUDIO",
"link": 31,
"label": "音频"
},
{
"name": "meta_batch",
"type": "VHS_BatchManager",
"link": null,
"label": "批次管理"
},
{
"name": "vae",
"type": "VAE",
"link": null
}
],
"outputs": [
{
"name": "Filenames",
"type": "VHS_FILENAMES",
"links": null,
"shape": 3,
"label": "文件名"
}
],
"properties": {
"Node name for S&R": "VHS_VideoCombine"
},
"widgets_values": {
"frame_rate": 30,
"loop_count": 0,
"filename_prefix": "LivePortrait/",
"format": "video/h264-mp4",
"pix_fmt": "yuv420p",
"crf": 19,
"save_metadata": true,
"pingpong": false,
"save_output": true,
"videopreview": {
"hidden": false,
"paused": false,
"params": {
"filename": "_sp_auto_upload_https://sd-fc-prod.oss-cn-shanghai.aliyuncs.com/comfy%2Foutput%2FLivePortraitd869915d5e824dedaf9feed80af48eb4_00001-audio.mp4?Expires=1728618753&OSSAccessKeyId=LTAI****************&Signature=5tFC**************************",
"format": "video/h264-mp4",
"frame_rate": 30,
"object_key": "comfy/output/LivePortraitd869915d5e824dedaf9feed80af48eb4_00001-audio.mp4",
"subfolder": "",
"type": "output"
},
"muted": false
}
}
},
{
"id": 1,
"type": "DownloadAndLoadLivePortraitModels",
"pos": {
"0": 80,
"1": -150
},
"size": {
"0": 320,
"1": 80
},
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "live_portrait_pipe",
"type": "LIVEPORTRAITPIPE",
"links": [
15,
18
],
"shape": 3,
"label": "LivePortrait管线"
}
],
"properties": {
"Node name for S&R": "DownloadAndLoadLivePortraitModels"
},
"widgets_values": [
"fp16",
"human"
]
}
],
"links": [
[
13,
196,
0,
165,
0,
"IMAGE"
],
[
14,
191,
0,
168,
0,
"IMAGE"
],
[
15,
1,
0,
189,
0,
"LIVEPORTRAITPIPE"
],
[
16,
198,
0,
189,
1,
"LPCROPPER"
],
[
17,
165,
0,
189,
2,
"IMAGE"
],
[
18,
1,
0,
190,
0,
"LIVEPORTRAITPIPE"
],
[
19,
189,
1,
190,
1,
"CROPINFO"
],
[
20,
189,
0,
190,
2,
"IMAGE"
],
[
21,
8,
0,
190,
3,
"IMAGE"
],
[
22,
165,
0,
191,
0,
"IMAGE"
],
[
23,
190,
0,
191,
1,
"IMAGE"
],
[
24,
190,
1,
191,
2,
"LP_OUT"
],
[
31,
8,
2,
168,
1,
"*"
]
],
"groups": [],
"config": {},
"extra": {
"ds": {
"scale": 0.5559917313492244,
"offset": [
249.9019008932993,
305.3913911427086
]
}
},
"version": 0.4
}
工作流说明
节点 | 功能 | 输入要求 |
节点 | 功能 | 输入要求 |
LoadImage | 加载人像照片 | PNG/JPG格式,建议512x512像素 |
VHS_LoadVideo | 加载驱动视频 | MP4格式,包含面部表情/动作 |
LivePortraitModels | AI模型处理 | 自动下载和加载 |
VideoCombine | 视频合成 | 输出最终数字人视频 |
步骤2:创建工作流
导入工作流
进入智作工坊控制台
点击新建工作流 → 本地工作流文件导入
上传准备好的
digital_human_workflow.json
文件等待工作流加载完成
参考输入:
人像照片 | 驱动视频 |
测试工作流
上传测试人像照片到
LoadImage
节点上传测试驱动视频到
VHS_LoadVideo
节点点击运行工作流验证效果
预期效果: 生成包含人像和表情动作的数字人视频
快速搭建数字人视频生成接口
步骤3:发布API接口
配置接口参数
输入参数设置
配置工作流对外暴露的参数:
参数名 | 节点路径 | 类型 | 描述 |
参数名 | 节点路径 | 类型 | 描述 |
| LoadImage.image | File | 人像照片文件 |
| VHS_LoadVideo.video | File | 驱动视频文件 |
| VideoCombine.frame_rate | Int | 输出视频帧率(默认30) |
输出参数设置
选择VideoCombine
节点的输出作为API返回结果,将输出字段重命名为 videos
发布和版本管理
填写版本描述:
数字人视频生成v1.0
点击提交发布
创建别名:
main
记录工作流ID和别名供后续使用
步骤4:获取API凭据
接口发布之后,想要调用接口,需要先新建应用,使用应用的AKSK通过SDK调用。打开智作工坊控制台应用管理tab页
点击右上角创建应用
填写应用名称以及应用类型,点击确认
确认后会自动跳转应用详情页,复制对应的AK、SK备用
步骤5:Python SDK集成
环境配置
# 安装SDK
pip install speedpix
# 设置环境变量
export SPEEDPIX_APP_KEY="your-app-key"
export SPEEDPIX_APP_SECRET="your-app-secret"
export SPEEDPIX_ENDPOINT="https://openai.edu-aliyun.com"
基础实现
创建digital_human_generator.py
:
from speedpix import Client
import os
import time
# 初始化客户端
client = Client()
def generate_digital_human(portrait_path, driving_video_path, workflow_id, alias_id="digital_human_v1"):
"""
生成数字人视频
Args:
portrait_path (str): 人像照片路径
driving_video_path (str): 驱动视频路径
workflow_id (str): 工作流ID
alias_id (str): 工作流别名
Returns:
str: 生成的视频文件路径
"""
print(f"开始生成数字人视频...")
print(f"人像照片: {portrait_path}")
print(f"驱动视频: {driving_video_path}")
start_time = time.time()
# 调用工作流
result = client.run(
workflow_id=workflow_id,
input={
"portrait_image": portrait_path,
"driving_video": driving_video_path,
"frame_rate": 30
},
alias_id=alias_id
)
# 保存生成的视频
os.makedirs("output", exist_ok=True)
timestamp = int(time.time())
output_path = f"output/digital_human_{timestamp}.mp4"
if 'videos' in result and result['videos']:
result['videos']['url'].save(output_path)
generation_time = time.time() - start_time
print("生成成功!")
print(f"输出文件: {output_path}")
print(f"生成耗时: {generation_time:.2f}秒")
return output_path
else:
raise Exception("未生成视频输出")
# 使用示例
if __name__ == "__main__":
try:
output_file = generate_digital_human(
portrait_path="./portrait.png",
driving_video_path="./driving_video.mp4",
workflow_id="01jz1yvm3f7n9xc8kgbhmcf031", # 替换为实际的工作流ID
alias_id="main"
)
print(f"数字人视频已保存到: {output_file}")
except Exception as e:
print(f"生成失败: {e}")
步骤6:测试和验证
运行测试
# 测试生成
python digital_human_generator.py
# 检查输出
ls -la output/
预期输出
开始生成数字人视频...
人像照片: ./portrait.jpg
驱动视频: ./driving_video.mp4
生成成功!
输出文件: output/digital_human_1234567890.mp4
生成耗时: 45.67秒
数字人视频已保存到: output/digital_human_1234567890.mp4
性能优化和故障排除
常见问题
问题 | 解决方案 |
问题 | 解决方案 |
视频生成失败 | 确保人像为JPG/PNG,视频为MP4格式 |
面部识别错误 | 使用清晰的正面人像照片 |
生成时间过长 | 调整输入视频分辨率到512x512 |
简单调试
# 验证文件是否存在
import os
if not os.path.exists("./portrait.jpg"):
print("人像照片不存在")
if not os.path.exists("./driving_video.mp4"):
print("驱动视频不存在")
总结
通过本指南,您已经成功:
部署了LivePortrait数字人视频生成工作流
创建了可调用的API接口
用最精简的Python代码实现数字人视频生成
相关资源
- 本页导读
- 概述
- 前提条件
- 架构说明
- 步骤1:准备LivePortrait工作流
- 工作流说明
- 步骤2:创建工作流
- 导入工作流
- 测试工作流
- 步骤3:发布API接口
- 配置接口参数
- 发布和版本管理
- 步骤4:获取API凭据
- 步骤5:Python SDK集成
- 环境配置
- 基础实现
- 步骤6:测试和验证
- 运行测试
- 预期输出
- 性能优化和故障排除
- 常见问题
- 简单调试
- 总结
- 相关资源