请求体 | 高精识别以下为调用高精识别内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [{
"role": "user",
"content": [{
"image": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False}]
}]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为高精识别
ocr_options={"task": "advanced_recognition"}
)
# 高精识别任务以纯文本的形式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
// dashscope SDK的版本 >= 2.21.8
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.ADVANCED_RECOGNITION)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '
{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [
{
"image": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "advanced_recognition"
}
}
}
'
信息抽取以下为调用信息抽取内置任务的代码示例,详情请参见调用内置任务。 # use [pip install -U dashscope] to update sdk
import os
import dashscope
from dashscope import MultiModalConversation
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [
{
"role":"user",
"content":[
{
"image":"http://duguang-labelling.oss-cn-shanghai.aliyuncs.com/demo_ocr/receipt_zh_demo.jpg",
"min_pixels": 32 * 32 * 3,
"max_pixels": 32 * 32 *8192,
"enable_rotate": False
}
]
}
]
# 指定抽取字段
params = {
"ocr_options":{
"task": "key_information_extraction",
"task_config": {
"result_schema": {
"乘车日期": "对应图中乘车日期时间,格式为年-月-日,比如2025-03-05",
"发票代码": "提取图中的发票代码,通常为一组数字或字母组合",
"发票号码": "提取发票上的号码,通常由纯数字组成。"
}
}
}
}
response = MultiModalConversation.call(
model='qwen-vl-ocr-latest',
messages=messages,
**params,
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv('DASHSCOPE_API_KEY'))
print(response.output.choices[0].message.content[0]["ocr_result"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.google.gson.JsonObject;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "http://duguang-labelling.oss-cn-shanghai.aliyuncs.com/demo_ocr/receipt_zh_demo.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels",3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
// 创建主JSON对象
JsonObject resultSchema = new JsonObject();
resultSchema.addProperty("乘车日期", "对应图中乘车日期时间,格式为年-月-日,比如2025-03-05");
resultSchema.addProperty("发票代码", "提取图中的发票代码,通常为一组数字或字母组合");
resultSchema.addProperty("发票号码", "提取发票上的号码,通常由纯数字组成。");
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.KEY_INFORMATION_EXTRACTION)
.taskConfig(OcrOptions.TaskConfig.builder()
.resultSchema(resultSchema)
.build())
.build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("ocr_result"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '
{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [
{
"image": "http://duguang-labelling.oss-cn-shanghai.aliyuncs.com/demo_ocr/receipt_zh_demo.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "key_information_extraction",
"task_config": {
"result_schema": {
"乘车日期": "对应图中乘车日期时间,格式为年-月-日,比如2025-03-05",
"发票代码": "提取图中的发票代码,通常为一组数字或字母组合",
"发票号码": "提取发票上的号码,通常由纯数字组成。"
}
}
}
}
}
'
表格解析以下为调用表格解析内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [{
"role": "user",
"content": [{
"image": "http://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/doc_parsing/tables/photo/eng/17.jpg",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False}]
}]
response = dashscope.MultiModalConversation.call(
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为表格解析
ocr_options= {"task": "table_parsing"}
)
# 表格解析任务以HTML格式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/doc_parsing/tables/photo/eng/17.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.TABLE_PARSING)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '
{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [
{
"image": "http://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/doc_parsing/tables/photo/eng/17.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "table_parsing"
}
}
}
'
文档解析以下为调用文档解析内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [{
"role": "user",
"content": [{
"image": "https://img.alicdn.com/imgextra/i1/O1CN01ukECva1cisjyK6ZDK_!!6000000003635-0-tps-1500-1734.jpg",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False}]
}]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为文档解析
ocr_options= {"task": "document_parsing"}
)
# 文档解析任务以LaTeX格式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://img.alicdn.com/imgextra/i1/O1CN01ukECva1cisjyK6ZDK_!!6000000003635-0-tps-1500-1734.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.DOCUMENT_PARSING)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation'\
--header "Authorization: Bearer $DASHSCOPE_API_KEY"\
--header 'Content-Type: application/json'\
--data '{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [{
"image": "https://img.alicdn.com/imgextra/i1/O1CN01ukECva1cisjyK6ZDK_!!6000000003635-0-tps-1500-1734.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "document_parsing"
}
}
}
'
公式识别以下为调用公式识别内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
messages = [{
"role": "user",
"content": [{
"image": "http://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/formula_handwriting/test/inline_5_4.jpg",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False }]
}]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为公式识别
ocr_options= {"task": "formula_recognition"}
)
# 公式识别任务以LaTeX格式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "http://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/formula_handwriting/test/inline_5_4.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.FORMULA_RECOGNITION)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '
{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [
{
"image": "http://duguang-llm.oss-cn-hangzhou.aliyuncs.com/llm_data_keeper/data/formula_handwriting/test/inline_5_4.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "formula_recognition"
}
}
}
'
通用文字识别以下为调用通用文字识别内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [{
"role": "user",
"content": [{
"image": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False}]
}]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为通用文字识别
ocr_options= {"task": "text_recognition"}
)
# 通用文字识别任务以纯文本格式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.TEXT_RECOGNITION)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation'\
--header "Authorization: Bearer $DASHSCOPE_API_KEY"\
--header 'Content-Type: application/json'\
--data '{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [{
"image": "https://help-static-aliyun-doc.aliyuncs.com/file-manage-files/zh-CN/20241108/ctdzex/biaozhun.jpg",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "text_recognition"
}
}
}'
多语言识别以下为调用通用多语言识别内置任务的代码示例,详情请参见调用内置任务。 import os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
messages = [{
"role": "user",
"content": [{
"image": "https://img.alicdn.com/imgextra/i2/O1CN01VvUMNP1yq8YvkSDFY_!!6000000006629-2-tps-6000-3000.png",
# 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False }]
}]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv('DASHSCOPE_API_KEY'),
model='qwen-vl-ocr-latest',
messages=messages,
# 设置内置任务为多语言识别
ocr_options={"task": "multi_lan"}
)
# 多语言识别任务以纯文本的形式返回结果
print(response["output"]["choices"][0]["message"].content[0]["text"])
import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.HashMap;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.aigc.multimodalconversation.OcrOptions;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://img.alicdn.com/imgextra/i2/O1CN01VvUMNP1yq8YvkSDFY_!!6000000006629-2-tps-6000-3000.png");
// 输入图像的最大像素阈值,超过该值图像会进行缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会进行放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
// 配置内置的OCR任务
OcrOptions ocrOptions = OcrOptions.builder()
.task(OcrOptions.Task.MULTI_LAN)
.build();
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map
)).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.ocrOptions(ocrOptions)
.build();
MultiModalConversationResult result = conv.call(param);
System.out.println(result.getOutput().getChoices().get(0).getMessage().getContent().get(0).get("text"));
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
--data '
{
"model": "qwen-vl-ocr-latest",
"input": {
"messages": [
{
"role": "user",
"content": [
{
"image": "https://img.alicdn.com/imgextra/i2/O1CN01VvUMNP1yq8YvkSDFY_!!6000000006629-2-tps-6000-3000.png",
"min_pixels": 3072,
"max_pixels": 8388608,
"enable_rotate": false
}
]
}
]
},
"parameters": {
"ocr_options": {
"task": "multi_lan"
}
}
}
'
流式输出Pythonimport os
import dashscope
# 若使用新加坡地域的模型,请取消下列注释
# dashscope.base_http_api_url = "https://dashscope-intl.aliyuncs.com/api/v1"
PROMPT_TICKET_EXTRACTION = """
请提取车票图像中的发票号码、车次、起始站、终点站、发车日期和时间点、座位号、席别类型、票价、身份证号码、购票人姓名。
要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。
返回数据格式以json方式输出,格式为:{'发票号码': 'xxx', '发票号码': 'xxx', '起始站': 'xxx', '终点站': 'xxx', '发车日期和时间点':'xxx', '座位号': 'xxx', '座位号': 'xxx','票价':'xxx', '身份证号码': 'xxx', '购票人姓名': 'xxx'"},
"""
messages = [
{
"role": "user",
"content": [
{
"image": "https://img.alicdn.com/imgextra/i2/O1CN01ktT8451iQutqReELT_!!6000000004408-0-tps-689-487.jpg",
# 输入图像的最小像素阈值,小于该值图像会放大,直到总像素大于min_pixels
"min_pixels": 32 * 32 * 3,
# 输入图像的最大像素阈值,超过该值图像会缩小,直到总像素低于max_pixels
"max_pixels": 32 * 32 * 8192,
# 开启图像自动转正功能
"enable_rotate": False,
},
# 未设置内置任务时,支持在text字段中传入Prompt,若未传入则使用默认的Prompt:Please output only the text content from the image without any additional descriptions or formatting.
{
"type": "text",
"text": PROMPT_TICKET_EXTRACTION,
},
],
}
]
response = dashscope.MultiModalConversation.call(
# 若没有配置环境变量,请用百炼API Key将下行替换为:api_key="sk-xxx",
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
api_key=os.getenv("DASHSCOPE_API_KEY"),
model="qwen-vl-ocr-latest",
messages=messages,
stream=True,
incremental_output=True,
)
full_content = ""
print("流式输出内容为:")
for response in response:
try:
print(response["output"]["choices"][0]["message"].content[0]["text"])
full_content += response["output"]["choices"][0]["message"].content[0]["text"]
except:
pass
print(f"完整内容为:{full_content}")
Javaimport java.util.*;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversation;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationParam;
import com.alibaba.dashscope.aigc.multimodalconversation.MultiModalConversationResult;
import com.alibaba.dashscope.common.MultiModalMessage;
import com.alibaba.dashscope.common.Role;
import com.alibaba.dashscope.exception.ApiException;
import com.alibaba.dashscope.exception.NoApiKeyException;
import com.alibaba.dashscope.exception.UploadFileException;
import io.reactivex.Flowable;
import com.alibaba.dashscope.utils.Constants;
public class Main {
// 若使用新加坡地域的模型,请取消下列注释
// static {Constants.baseHttpApiUrl="https://dashscope-intl.aliyuncs.com/api/v1";}
public static void simpleMultiModalConversationCall()
throws ApiException, NoApiKeyException, UploadFileException {
MultiModalConversation conv = new MultiModalConversation();
Map<String, Object> map = new HashMap<>();
map.put("image", "https://img.alicdn.com/imgextra/i2/O1CN01ktT8451iQutqReELT_!!6000000004408-0-tps-689-487.jpg");
// 输入图像的最大像素阈值,超过该值图像会缩小,直到总像素低于max_pixels
map.put("max_pixels", 8388608);
// 输入图像的最小像素阈值,小于该值图像会放大,直到总像素大于min_pixels
map.put("min_pixels", 3072);
// 开启图像自动转正功能
map.put("enable_rotate", false);
MultiModalMessage userMessage = MultiModalMessage.builder().role(Role.USER.getValue())
.content(Arrays.asList(
map,
// 模型未设置内置任务时,支持在text字段中传入Prompt,若未传入则使用默认的Prompt:Please output only the text content from the image without any additional descriptions or formatting.
Collections.singletonMap("text", "请提取车票图像中的发票号码、车次、起始站、终点站、发车日期和时间点、座位号、席别类型、票价、身份证号码、购票人姓名。要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以json方式输出,格式为:{'发票号码': 'xxx', '发票号码': 'xxx', '起始站': 'xxx', '终点站': 'xxx', '发车日期和时间点':'xxx', '座位号': 'xxx', '座位号': 'xxx','票价':'xxx', '身份证号码': 'xxx', '购票人姓名': 'xxx'"))).build();
MultiModalConversationParam param = MultiModalConversationParam.builder()
// 若没有配置环境变量,请用百炼API Key将下行替换为:.apiKey("sk-xxx")
// 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
.apiKey(System.getenv("DASHSCOPE_API_KEY"))
.model("qwen-vl-ocr-latest")
.message(userMessage)
.incrementalOutput(true)
.build();
Flowable<MultiModalConversationResult> result = conv.streamCall(param);
result.blockingForEach(item -> {
try {
List<Map<String, Object>> contentList = item.getOutput().getChoices().get(0).getMessage().getContent();
if (!contentList.isEmpty()){
System.out.println(contentList.get(0).get("text"));
}//
} catch (Exception e){
System.exit(0);
}
});
}
public static void main(String[] args) {
try {
simpleMultiModalConversationCall();
} catch (ApiException | NoApiKeyException | UploadFileException e) {
System.out.println(e.getMessage());
}
System.exit(0);
}
}
curl# ======= 重要提示 =======
# 新加坡和北京地域的API Key不同。获取API Key:https://help.aliyun.com/zh/model-studio/get-api-key
# 以下是北京地域base-url,如果使用新加坡地域的模型,需要将base_url替换为:https://dashscope-intl.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation \
# === 执行时请删除该注释 ===
curl --location 'https://dashscope.aliyuncs.com/api/v1/services/aigc/multimodal-generation/generation' \
--header "Authorization: Bearer $DASHSCOPE_API_KEY" \
--header 'Content-Type: application/json' \
-H 'X-DashScope-SSE: enable' \
--data '{
"model": "qwen-vl-ocr-latest",
"input":{
"messages":[
{
"role": "user",
"content": [
{
"image": "https://img.alicdn.com/imgextra/i2/O1CN01ktT8451iQutqReELT_!!6000000004408-0-tps-689-487.jpg",
"min_pixels": 3072,
"max_pixels": 8388608
},
{"type": "text", "text": "请提取车票图像中的发票号码、车次、起始站、终点站、发车日期和时间点、座位号、席别类型、票价、身份证号码、购票人姓名。要求准确无误的提取上述关键信息、不要遗漏和捏造虚假信息,模糊或者强光遮挡的单个文字可以用英文问号?代替。返回数据格式以json方式输出,格式为:{'发票号码': 'xxx', '发票号码': 'xxx', '起始站': 'xxx', '终点站': 'xxx', '发车日期和时间点':'xxx', '座位号': 'xxx', '座位号': 'xxx','票价':'xxx', '身份证号码': 'xxx', '购票人姓名': 'xxx'"}
]
}
]
},
"parameters": {
"incremental_output": true
}
}'
|