文档介绍了表格智能解析API的调用方式,调用前,请先阅读API使用指南。
表格智能解析接口可以进行表格抽取和理解,从PDF或图片格式的表格文档中提取出表格样式、表格内容、文本KV、表格KV等信息。
表格智能解析接口为异步接口,需要先调用表格智能解析异步提交服务SubmitTableUnderstandingJob接口进行异步任务提交,然后调用表格智能解析结果查询服务GetTableUnderstandingResult接口进行结果轮询,建议每10秒轮询一次,最多轮询120分钟,如果120分钟还未查询到处理完成结果,则视为处理超时。
当异步任务处理提交后,用户可以在处理结束后的24小时之内查询处理结果,超过24小时后将无法查询到处理结果。
免费额度为100页,用完即止。若您的免费额度或资源包消耗完毕,系统将默认采用按量付费的后付费计费方式。
首次解析超过100页的文档,解析成功后,系统会自动进入后付费计费模式,产生后付费账单。
步骤一:调用表格智能解析异步提交服务
异步提交服务支持本地文件和URL文件两种方式:
URL上传的异步提交服务接口为:SubmitTableUnderstandingJob接口。
本地文件上传的异步提交服务接口为:SubmitTableUnderstandingJobAdvance接口。
请求参数
名称 | 类型 | 必填 | 描述 | 示例值 |
FileUrl | string | 是 | 以文档URL方式时使用。 单个文档(支持1000页以内、100 MB以内的PDF文档,支持20 MB以内的单张图片)。 | https://example.com/example.pdf |
FileUrlObject | stream | 是 | 以本地文件上传方式调用接口时使用。 单个文档(支持1000页以内、100 MB以内的PDF文档,支持20 MB以内的单张图片)。 | 本地文件生成的FileInputStream |
FileName | string | 否 | 文件名,需带文件类型后缀。与fileNameExtension二选一 | example.pdf |
FileNameExtension | string | 否 | 文件类型,与fileName二选一。支持类型:pdf、jpg、jpeg、png、bmp、gif |
支持的文档格式:pdf和图片,图片支持jpg、jpeg、png、bmp、gif。
返回参数
名称 | 类型 | 描述 | 示例值 |
RequestId | string | 请求唯一ID | 43A29C77-405E-4CC0-BC55-EE694AD0**** |
Data | object | 返回数据 | {"Id": "docmind-20220712-b15f****"} |
+id | string | 业务订单号,用于后续查询接口进行查询的唯一标识 | docmind-20220712-b15f**** |
Code | string | 状态码 | 200 |
Message | string | 详细信息 | message |
示例
本接口支持本地文档上传和传入文档URL这两种调用方式。
以Java SDK为例,本地文档上传调用方式的请求示例代码如下,调用submitTableUnderstandingJobAdvance接口,通过fileUrlObject参数实现本地文档上传。
获取并使用AccessKey信息的方式,可参考SDK概述中不同语言的SDK使用指南。
import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
import com.aliyun.teautil.models.RuntimeOptions;
import java.io.File;
import java.io.FileInputStream;
public static void submit() throws Exception {
// 使用默认凭证初始化Credentials Client。
com.aliyun.credentials.Client credentialClient = new com.aliyun.credentials.Client();
Config config = new Config()
// 通过credentials获取配置中的AccessKey ID
.setAccessKeyId(credentialClient.getAccessKeyId())
// 通过credentials获取配置中的AccessKey Secret
.setAccessKeySecret(credentialClient.getAccessKeySecret());
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
config.endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
Client client = new Client(config);
// 创建RuntimeObject实例并设置运行参数
RuntimeOptions runtime = new RuntimeOptions();
SubmitTableUnderstandingJobAdvanceRequest advanceRequest = new SubmitTableUnderstandingJobAdvanceRequest();
File file = new File("D:\\example.pdf");
advanceRequest.fileUrlObject = new FileInputStream(file);
advanceRequest.fileName = "example.pdf";
// 4 发起请求并处理应答或异常。
SubmitTableUnderstandingJobResponse response =
client.submitTableUnderstandingJobAdvance(advanceRequest, runtime);
}
const Client = require('@alicloud/docmind-api20220711');
const Credential = require('@alicloud/credentials');
const Util = require('@alicloud/tea-util');
const fs = require('fs');
const getResult = async () => {
// 使用默认凭证初始化Credentials Client
const cred = new Credential.default();
const client = new Client.default({
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
endpoint: 'docmind-api.cn-hangzhou.aliyuncs.com',
// 通过credentials获取配置中的AccessKey ID
accessKeyId: cred.credential.accessKeyId,
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret: cred.credential.accessKeySecret,
type: 'access_key',
regionId: 'cn-hangzhou'
});
const advanceRequest = new Client.SubmitTableUnderstandingJobAdvanceRequest();
const file = fs.createReadStream('./example.pdf');
advanceRequest.fileUrlObject = file;
advanceRequest.fileName = 'example.pdf';
const runtimeObject = new Util.RuntimeOptions({});
const response = await client.submitTableUnderstandingJobAdvance(advanceRequest, runtimeObject);
return response.body;
}
from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_tea_util import models as util_models
from alibabacloud_credentials.client import Client as CredClient
def submit_file():
cred=CredClient()
config = open_api_models.Config(
# 通过credentials获取配置中的AccessKey ID
access_key_id=cred.get_access_key_id(),
# 通过credentials获取配置中的AccessKey Secret
access_key_secret=cred.get_access_key_secret()
)
# 访问的域名
config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
client = docmind_api20220711Client(config)
request = docmind_api20220711_models.SubmitTableUnderstandingJobAdvanceRequest(
# file_url_object : 本地文件流
file_url_object=open("./example.pdf", "rb"),
# file_name :文件名称。名称必须包含文件类型
file_name='123.pdf',
# file_name_extension : 文件后缀格式。与文件名二选一
file_name_extension='pdf'
)
runtime = util_models.RuntimeOptions()
try:
# 复制代码运行请自行打印 API 的返回值
response = client.submit_table_understanding_job_advance(request, runtime)
# API返回值格式层级为 body -> data -> 具体属性。可根据业务需要打印相应的结果。如下示例为打印返回的业务id格式
# 获取属性值均以小写开头,
print(response.body.data.id)
except Exception as error:
# 如有需要,请打印 error
UtilClient.assert_as_string(error.message)
import (
"fmt"
"os"
openClient "github.com/alibabacloud-go/darabonba-openapi/v2/client"
"github.com/alibabacloud-go/docmind-api-20220711/client"
"github.com/alibabacloud-go/tea-utils/v2/service"
"github.com/aliyun/credentials-go/credentials"
)
func submit(){
// 使用默认凭证初始化Credentials Client。
credential, err := credentials.NewCredential(nil)
// 通过credentials获取配置中的AccessKey ID
accessKeyId, err := credential.GetAccessKeyId()
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret, err := credential.GetAccessKeySecret()
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
var endpoint string = "docmind-api.cn-hangzhou.aliyuncs.com"
config := openClient.Config{AccessKeyId: accessKeyId, AccessKeySecret: accessKeySecret, Endpoint: &endpoint}
// 初始化client
cli, err := client.NewClient(&config)
if err != nil {
panic(err)
}
// 上传本地文档调用接口
filename := "D:\\example.pdf"
f, err := os.Open(filename)
if err != nil {
panic(err)
}
// 初始化接口request
request := client.SubmitTableUnderstandingJobAdvanceRequest{
FileName: &filename,
FileUrlObject: f,
}
// 创建RuntimeObject实例并设置运行参数
options := service.RuntimeOptions{}
response, err := cli.SubmitTableUnderstandingJobAdvance(&request, &options)
if err != nil {
panic(err)
}
// 打印结果
fmt.Println(response.Body.String())
}
using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Tea;
using Tea.Utils;
public static void SubmitFile()
{
// 使用默认凭证初始化Credentials Client。
var akCredential = new Aliyun.Credentials.Client(null);
AlibabaCloud.OpenApiClient.Models.Config config = new AlibabaCloud.OpenApiClient.Models.Config
{
// 通过credentials获取配置中的AccessKey Secret
AccessKeyId = akCredential.GetAccessKeyId(),
// 通过credentials获取配置中的AccessKey Secret
AccessKeySecret = akCredential.GetAccessKeySecret(),
};
// 访问的域名
config.Endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
//需要安装额外的依赖库--> AlibabaCloud.DarabonbaStream
AlibabaCloud.SDK.Docmind_api20220711.Client client = new AlibabaCloud.SDK.Docmind_api20220711.Client(config);
Stream bodySyream = AlibabaCloud.DarabonbaStream.StreamUtil.ReadFromFilePath("<YOUR-FILE-PATH>");
AlibabaCloud.SDK.Docmind_api20220711.Models.SubmitTableUnderstandingJobAdvanceRequest request = new AlibabaCloud.SDK.Docmind_api20220711.Models.SubmitTableUnderstandingJobAdvanceRequest
{
FileUrlObject = bodySyream,
FileNameExtension = "pdf"
};
AlibabaCloud.TeaUtil.Models.RuntimeOptions runtime = new AlibabaCloud.TeaUtil.Models.RuntimeOptions();
try
{
// 复制代码运行请自行打印 API 的返回值
client.SubmitTableUnderstandingJobAdvance(request, runtime);
}
catch (TeaException error)
{
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
catch (Exception _error)
{
TeaException error = new TeaException(new Dictionary<string, object>
{
{ "message", _error.Message }
});
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
}
以Java SDK为例,传入文档URL调用方式的请求示例代码如下,调用submitTableUnderstandingJob接口,通过fileUrl参数实现传入文档URL。请注意,您传入的文档URL必须为公网可访问下载的公网URL地址,无跨域限制,URL不带特殊转义字符。
获取并使用AccessKey信息的方式,可参考SDK概述中不同语言的SDK使用指南。
import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
public static void submit() throws Exception {
// 使用默认凭证初始化Credentials Client。
com.aliyun.credentials.Client credentialClient = new com.aliyun.credentials.Client();
Config config = new Config()
// 通过credentials获取配置中的AccessKey ID
.setAccessKeyId(credentialClient.getAccessKeyId())
// 通过credentials获取配置中的AccessKey Secret
.setAccessKeySecret(credentialClient.getAccessKeySecret());
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
config.endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
Client client = new Client(config);
SubmitTableUnderstandingJobRequest request = new SubmitTableUnderstandingJobRequest();
request.fileName = "example.pdf";
request.fileUrl = "https://example.com/example.pdf";
SubmitTableUnderstandingJobResponse response = client.submitTableUnderstandingJob(request);
}
const Client = require('@alicloud/docmind-api20220711');
const Credential = require('@alicloud/credentials');
const getResult = async () => {
// 使用默认凭证初始化Credentials Client
const cred = new Credential.default();
const client = new Client.default({
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
endpoint: 'docmind-api.cn-hangzhou.aliyuncs.com',
// 通过credentials获取配置中的AccessKey ID
accessKeyId: cred.credential.accessKeyId,
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret: cred.credential.accessKeySecret,
type: 'access_key',
regionId: 'cn-hangzhou'
});
const advanceRequest = new Client.SubmitDocStructureJobAdvanceRequest();
const file = fs.createReadStream('./example.pdf');
advanceRequest.fileUrlObject = file;
advanceRequest.fileName = 'example.pdf';
const runtimeObject = new Util.RuntimeOptions({});
const response = await client.submitDocStructureJobAdvance(advanceRequest, runtimeObject);
return response.body;
}
from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_credentials.client import Client as CredClient
def submit_url():
cred=CredClient()
config = open_api_models.Config(
# 通过credentials获取配置中的AccessKey ID
access_key_id=cred.get_access_key_id(),
# 通过credentials获取配置中的AccessKey Secret
access_key_secret=cred.get_access_key_secret()
)
# 访问的域名
config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
client = docmind_api20220711Client(config)
request = docmind_api20220711_models.SubmitTableUnderstandingJobRequest(
# file_url : 文件url地址
file_url='https://example.com/example.pdf',
# file_name :文件名称。名称必须包含文件类型
file_name='123.pdf',
# file_name_extension : 文件后缀格式。与文件名二选一
file_name_extension='pdf'
)
try:
# 复制代码运行请自行打印 API 的返回值
response = client.submit_table_understanding_job(request)
# API返回值格式层级为 body -> data -> 具体属性。可根据业务需要打印相应的结果。如下示例为打印返回的业务id格式
# 获取属性值均以小写开头,
print(response.body.data.id)
except Exception as error:
# 如有需要,请打印 error
UtilClient.assert_as_string(error.message)
import (
"fmt"
openClient "github.com/alibabacloud-go/darabonba-openapi/v2/client"
"github.com/alibabacloud-go/docmind-api-20220711/client"
"github.com/aliyun/credentials-go/credentials"
)
func submit(){
// 使用默认凭证初始化Credentials Client。
credential, err := credentials.NewCredential(nil)
// 通过credentials获取配置中的AccessKey ID
accessKeyId, err := credential.GetAccessKeyId()
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret, err := credential.GetAccessKeySecret()
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
var endpoint string = "docmind-api.cn-hangzhou.aliyuncs.com"
config := openClient.Config{AccessKeyId: accessKeyId, AccessKeySecret: accessKeySecret, Endpoint: &endpoint}
// 初始化client
cli, err := client.NewClient(&config)
if err != nil {
panic(err)
}
// 文件URL
fileURL := "https://example.com/example.pdf"
// 文件名
fileName := "example.pdf"
// 初始化接口request
request := client.SubmitTableUnderstandingJobRequest{
FileUrl: &fileURL,
FileName: &fileName,
}
response, err := cli.SubmitTableUnderstandingJob(&request)
if err != nil {
panic(err)
}
// 打印结果
fmt.Println(response.Body.String())
}
using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Tea;
using Tea.Utils;
public static void SubmitUrl()
{
// 使用默认凭证初始化Credentials Client。
var akCredential = new Aliyun.Credentials.Client(null);
AlibabaCloud.OpenApiClient.Models.Config config = new AlibabaCloud.OpenApiClient.Models.Config
{
// 通过credentials获取配置中的AccessKey Secret
AccessKeyId = akCredential.GetAccessKeyId(),
// 通过credentials获取配置中的AccessKey Secret
AccessKeySecret = akCredential.GetAccessKeySecret(),
};
// 访问的域名
config.Endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
AlibabaCloud.SDK.Docmind_api20220711.Client client = new AlibabaCloud.SDK.Docmind_api20220711.Client(config);
AlibabaCloud.SDK.Docmind_api20220711.Models.SubmitTableUnderstandingJobRequest request = new AlibabaCloud.SDK.Docmind_api20220711.Models.SubmitTableUnderstandingJobRequest
{
FileUrl = "https://example.pdf",
FileNameExtension = "pdf"
};
try
{
// 复制代码运行请自行打印 API 的返回值
client.SubmitTableUnderstandingJob(request);
}
catch (TeaException error)
{
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
catch (Exception _error)
{
TeaException error = new TeaException(new Dictionary<string, object>
{
{ "message", _error.Message }
});
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
}
use AlibabaCloud\SDK\Docmindapi\V20220711\Docmindapi;
use AlibabaCloud\SDK\Docmindapi\V20220711\Models\SubmitTableUnderstandingJobRequest;
use Darabonba\OpenApi\Models\Config;
use AlibabaCloud\Tea\Utils\Utils\RuntimeOptions;
use AlibabaCloud\Tea\Exception\TeaUnableRetryError;
use AlibabaCloud\Credentials\Credential;
// 使用默认凭证初始化Credentials Client。
$bearerToken = new Credential();
$config = new Config();
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
$config->endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
// 通过credentials获取配置中的AccessKey ID
$config->accessKeyId = $bearerToken->getCredential()->getAccessKeyId();
// 通过credentials获取配置中的AccessKey Secret
$config->accessKeySecret = $bearerToken->getCredential()->getAccessKeySecret();
$config->type = "access_key";
$config->regionId = "cn-hangzhou";
$client = new Docmindapi($config);
$request = new SubmitTableUnderstandingJobRequest();
$runtime = new RuntimeOptions();
$runtime->maxIdleConns = 3;
$runtime->connectTimeout = 10000;
$runtime->readTimeout = 10000;
$request->fileName = "example.pdf";
$request->fileUrl = "https://example.com/example.pdf";
try {
$response = $client->submitTableUnderstandingJob($request, $runtime);
var_dump($response->toMap());
} catch (TeaUnableRetryError $e) {
var_dump($e->getMessage());
var_dump($e->getErrorInfo());
var_dump($e->getLastException());
var_dump($e->getLastRequest());
}
正常返回示例
JSON
格式
{
"RequestId": "43A29C77-405E-4CC0-BC55-EE694AD0****",
"Data": {
"Id": "docmind-20220712-b15f****"
}
}
步骤二:轮询表格智能解析结果查询服务GetTableUnderstandingResult接口
调用查询接口的入参ID就是前面异步任务提交接口返回的出参ID,查询结果有处理中、处理成功、处理失败三种情况。建议每10秒轮询一次,最多轮询120分钟。若明确返回Completed为true或者超过轮询最大时间,则终止轮询。
请求参数
名称 | 类型 | 必填 | 描述 | 示例值 |
Id | string | 是 | 需要查询的业务订单号,订单号从提交接口的返回结果中获取 | docmind-20220712-b15f**** |
返回参数
名称 | 类型 | 描述 | 示例值 |
RequestId | string | 请求唯一ID | 43A29C77-405E-4CC0-BC55-EE694AD0**** |
Completed | boolean | 异步任务是否处理完成,false表示任务仍在处理中,true代表任务处理完成,有处理成功或处理失败的明确结果 | true |
Status | String | 异步任务处理完成的状态,最终处理结束后的状态。Success为处理成功,Fail为处理失败 | Success |
Data | string | 返回数据,表格智能解析的解析结果,输出从表格中解析出表格样式、表格内容、表格KV等内容的JSON数据结构返回 | |
Code | string | 状态码 | 200 |
Message | string | 详细信息 | message |
示例
以Java SDK为例,调用文档智能解析接口的结果查询类API示例代码如下,调用getTableUnderstandingResult接口,通过ID参数传入查询流水号。
获取并使用AccessKey信息的方式,可参考SDK概述中不同语言的SDK使用指南。
import com.aliyun.docmind_api20220711.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.docmind_api20220711.Client;
public static void submit() throws Exception {
// 使用默认凭证初始化Credentials Client。
com.aliyun.credentials.Client credentialClient = new com.aliyun.credentials.Client();
Config config = new Config()
// 通过credentials获取配置中的AccessKey ID
.setAccessKeyId(credentialClient.getAccessKeyId())
// 通过credentials获取配置中的AccessKey Secret
.setAccessKeySecret(credentialClient.getAccessKeySecret());
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
config.endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
Client client = new Client(config);
GetTableUnderstandingResultRequest resultRequest = new GetTableUnderstandingResultRequest();
resultRequest.id = "docmind-20220902-824b****";
GetTableUnderstandingResultResponse response = client.getTableUnderstandingResult(resultRequest);
}
const Client = require('@alicloud/docmind-api20220711');
const Credential = require('@alicloud/credentials');
const getResult = async () => {
// 使用默认凭证初始化Credentials Client
const cred = new Credential.default();
const client = new Client.default({
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
endpoint: 'docmind-api.cn-hangzhou.aliyuncs.com',
// 通过credentials获取配置中的AccessKey ID
accessKeyId: cred.credential.accessKeyId,
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret: cred.credential.accessKeySecret,
type: 'access_key',
regionId: 'cn-hangzhou'
});
const resultRequest = new Client.GetTableUnderstandingResultRequest();
resultRequest.id = "docmind-20220902-824b****";
const response = await client.getTableUnderstandingResult(resultRequest);
return response.body;
}
from alibabacloud_docmind_api20220711.client import Client as docmind_api20220711Client
from alibabacloud_tea_openapi import models as open_api_models
from alibabacloud_docmind_api20220711 import models as docmind_api20220711_models
from alibabacloud_tea_util.client import Client as UtilClient
from alibabacloud_credentials.client import Client as CredClient
def query():
cred=CredClient()
config = open_api_models.Config(
# 通过credentials获取配置中的AccessKey ID
access_key_id=cred.get_access_key_id(),
# 通过credentials获取配置中的AccessKey Secret
access_key_secret=cred.get_access_key_secret()
)
# 访问的域名
config.endpoint = f'docmind-api.cn-hangzhou.aliyuncs.com'
client = docmind_api20220711Client(config)
request = docmind_api20220711_models.GetTableUnderstandingResultRequest(
# id : 任务提交接口返回的id
id='docmind-20220902-824b****'
)
try:
# 复制代码运行请自行打印 API 的返回值
response = client.get_table_understanding_result(request)
# API返回值格式层级为 body -> data -> 具体属性。可根据业务需要打印相应的结果。获取属性值均以小写开头
# 获取异步任务处理情况,可根据response.body.completed判断是否需要继续轮询结果
print(response.body.completed)
# 获取返回结果。建议先把response.body.data转成json,然后再从json里面取具体需要的值。
print(response.body.data)
except Exception as error:
# 如有需要,请打印 error
UtilClient.assert_as_string(error.message)
import (
"fmt"
openClient "github.com/alibabacloud-go/darabonba-openapi/v2/client"
"github.com/alibabacloud-go/docmind-api-20220711/client"
"github.com/aliyun/credentials-go/credentials"
)
func submit(){
// 使用默认凭证初始化Credentials Client。
credential, err := credentials.NewCredential(nil)
// 通过credentials获取配置中的AccessKey ID
accessKeyId, err := credential.GetAccessKeyId()
// 通过credentials获取配置中的AccessKey Secret
accessKeySecret, err := credential.GetAccessKeySecret()
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
var endpoint string = "docmind-api.cn-hangzhou.aliyuncs.com"
config := openClient.Config{AccessKeyId: accessKeyId, AccessKeySecret: accessKeySecret, Endpoint: &endpoint}
// 初始化client
cli, err := client.NewClient(&config)
if err != nil {
panic(err)
}
id := "docmind-20220925-76b1****"
// 调用查询接口
request := client.GetTableUnderstandingResultRequest{Id: &id}
response, err := cli.GetTableUnderstandingResult(&request)
if err != nil {
panic(err)
}
// 打印查询结果
fmt.Println(response.Body.String())
}
using Newtonsoft.Json;
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using Tea;
using Tea.Utils;
public static void GetResult()
{
// 使用默认凭证初始化Credentials Client。
var akCredential = new Aliyun.Credentials.Client(null);
AlibabaCloud.OpenApiClient.Models.Config config = new AlibabaCloud.OpenApiClient.Models.Config
{
// 通过credentials获取配置中的AccessKey Secret
AccessKeyId = akCredential.GetAccessKeyId(),
// 通过credentials获取配置中的AccessKey Secret
AccessKeySecret = akCredential.GetAccessKeySecret(),
};
// 访问的域名
config.Endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
AlibabaCloud.SDK.Docmind_api20220711.Client client = new AlibabaCloud.SDK.Docmind_api20220711.Client(config);
AlibabaCloud.SDK.Docmind_api20220711.Models.GetTableUnderstandingResultRequest request = new AlibabaCloud.SDK.Docmind_api20220711.Models.GetTableUnderstandingResultRequest
{
Id = "docmind-20220902-824b****"
};
AlibabaCloud.TeaUtil.Models.RuntimeOptions runtime = new AlibabaCloud.TeaUtil.Models.RuntimeOptions();
try
{
// 复制代码运行请自行打印 API 的返回值
client.GetTableUnderstandingResult(request);
}
catch (TeaException error)
{
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
catch (Exception _error)
{
TeaException error = new TeaException(new Dictionary<string, object>
{
{ "message", _error.Message }
});
// 如有需要,请打印 error
AlibabaCloud.TeaUtil.Common.AssertAsString(error.Message);
}
}
use AlibabaCloud\SDK\Docmindapi\V20220711\Docmindapi;
use AlibabaCloud\SDK\Docmindapi\V20220711\Models\GetTableUnderstandingResultRequest;
use Darabonba\OpenApi\Models\Config;
use AlibabaCloud\Tea\Utils\Utils\RuntimeOptions;
use AlibabaCloud\Tea\Exception\TeaUnableRetryError;
use AlibabaCloud\Credentials\Credential;
// 使用默认凭证初始化Credentials Client。
$bearerToken = new Credential();
$config = new Config();
// 访问的域名,支持ipv4和ipv6两种方式,ipv6请使用docmind-api-dualstack.cn-hangzhou.aliyuncs.com
$config->endpoint = "docmind-api.cn-hangzhou.aliyuncs.com";
// 通过credentials获取配置中的AccessKey ID
$config->accessKeyId = $bearerToken->getCredential()->getAccessKeyId();
// 通过credentials获取配置中的AccessKey Secret
$config->accessKeySecret = $bearerToken->getCredential()->getAccessKeySecret();
$config->type = "access_key";
$config->regionId = "cn-hangzhou";
$client = new Docmindapi($config);
$request = new GetTableUnderstandingResultRequest();
$request->id = "docmind-20220902-824b****";
$runtime = new RuntimeOptions();
$runtime->maxIdleConns = 3;
$runtime->connectTimeout = 10000;
$runtime->readTimeout = 10000;
try {
$response = $client->getTableUnderstandingResult($request, $runtime);
var_dump($response->toMap());
} catch (TeaUnableRetryError $e) {
var_dump($e->getMessage());
var_dump($e->getErrorInfo());
var_dump($e->getLastException());
var_dump($e->getLastRequest());
}
查询结果有处理中、处理成功、处理失败三种情况,分别说明每种情况的返回结果示例。
处理中的返回结果如下所示:
{
"RequestId": "2AABD2C2-D24F-12F7-875D-683A27C3****",
"Completed": false,
"Code": "DocProcessing",
"Message": "Document processing",
"HostId": "ocr-api.cn-hangzhou.aliyuncs.com",
"Recommend": "https://next.api.aliyun.com/troubleshoot?q=DocProcessing&product=docmind-api"
}
处理中Completed会返回false,表示任务没有处理结束,仍在处理中。这种情况需要继续轮询,直到明确返回Completed为true或者超过轮询最大时间。
处理失败的返回结果如下所示:
{
"RequestId": "A8EF3A36-1380-1116-A39E-B377BE27****",
"Completed": true,
"Status": "Fail",
"Code": "UrlNotLegal",
"Message": "Failed to process the document. The document url you provided is not legal.",
"HostId": "docmind-api.cn-hangzhou.aliyuncs.com",
"Recommend": "https://next.api.aliyun.com/troubleshoot?q=IDP.UrlNotLegal&product=docmind-api"
}
处理失败Completed会返回true,表示任务处理结束,同时会返回Status为字符串的Fail,表示处理成功失败,同时会返回失败Code和详细原因Message。访问错误码可以查看错误码详细介绍。
处理成功的返回结果如下所示:
{
"Status": "Success",
"RequestId": "73134E1A-E281-1B2C-A105-D0ECFE2D****",
"Completed": true,
"Data": {
"tables": [{
"tableName": "表格1",
"tableNote": null,
"sheetName": "表格1",
"sheetNumber": 1,
"cells": [{
"content": "序号",
"autoWrap": false,
"colNumber": 0,
"rowNumber": 0,
"pos": [{
"x": 178,
"y": 1517
},
{
"x": 704,
"y": 1515
},
{
"x": 706,
"y": 1630
},
{
"x": 180,
"y": 1631
}
],
"filledColor": null,
"borderLine": null,
"borderStyle": null,
"borderColor": null,
"blocks": [{
"font": "Times_New_Roman",
"fontColor": "000000",
"fontSize": 10,
"bold": false,
"underline": false,
"deleteline": false,
"italic": false,
"alignment": null
}],
"mergedCell": false,
"firstMergedCell": false
}],
"tableKVs": [{
"type": "kv",
"relations": [{
"key": [
"序号"
],
"value": [
"测试1",
"测试2"
],
"keyConfidence": null,
"valueConfidence": null
}],
"cellIdRelations": [{
"key": [
"0"
],
"value": [
"4",
"7"
],
"keyConfidence": null,
"valueConfidence": null
}]
}]
}]
}
}
处理成功Completed会返回true,表示任务处理结束,同时会返回Status为字符串的Success,表示处理成功。具体的处理结果在Data节点中,接下拉介绍下Data节点的具体格式:
名称 | 类型 | 描述 |
requestId | string | 请求唯一ID |
success | bool | 是否成功 |
code | string | 错误码 |
msg | string | 总错误消息 |
data | object | 解析结果 |
+ tables | array | 表格信息 |
++ tableName | string | 表格名称 |
++ tableNote | string | 表注 |
++ sheetName | string | sheet名称 |
++ sheetNumber | int | sheet页码 |
++ cells | array | 单元格信息 |
+++ content | string | 单元格文本内容 |
+++ autoWrap | bool | 文本是否换行 |
+++ isMergedCell | bool | 是否合并单元格 |
+++ isFirstMergedCell | bool | 是否合并起始单元格 |
+++ colNumber | int | 列号 |
+++ rowNumber | int | 行号 |
+++ filledColor | string | 单元格填充色 |
+++ borderLine | string | 单元格边框线 |
+++ borderStyle | string | 边框样式 |
+++ borderColor | string | 边框颜色 |
+++ pos | array | 坐标 |
+++ blocks | array | 字块信息列表 |
++++ font | int | 字体 |
++++ fontColor | string | 字体颜色 |
++++ fontSize | int | 字体大小 |
++++ bold | bool | 是否粗体 |
++++ underline | bool | 是否下划线 |
++++ deleteline | bool | 是否删除线 |
++++ italic | bool | 是否斜体 |
++++ alignment | string | 对齐方式 |
++ tableKVs | array | 表格抽取内容 |
+++ type | string | 表格抽取内容类型 |
+++ relations | array | 表格抽取内容列表 |
++++ key | array | key列表 |
++++ keyConfidence | array | key的置信度列表 |
++++ value | array | value列表 |
++++ valueConfidence | array | value的置信度列表 |
+++ cellIdRelations | array | 单元格对应关系列表 |
++++ key | array | key所在单元格ID列表 |
++++ value | array | value所在单元格ID列表 |