用于查询源数据的处理进度,为了保证查询服务的稳定性,该方法存在分钟级的缓存信息,对于实时性要求高的场景支持配置秒级的缓存,单次查询支持最大文档数为50。
请求入参
一级参数 | 说明 | 类型 | 是否必传 |
serviceId | 服务Id | Long | 是 |
dataIds | 数据主键Id | List<Long> | 是 |
dataImportId | 任务id | Long | 否 |
响应参数
一级参数 | 说明 | 类型 |
datas | 响应结果 | List<data> |
status | 任务状态 | String |
一级参数 | 二级参数 | 说明 | 类型 |
data | dataId | 数据主键Id | String |
status | 状态 | String | |
versionValue | 版本信息 | String | |
chunkNum | chunk数 | String | |
opStatus | 算子状态信息统计 | Map<String,Integer> | |
errorDataList | 错误详细信息 | List<errorData> |
二级参数 | 三级参数 | 说明 | 类型 |
errorData | opType | 算子类型 | String |
errorCode | 错误码 | String | |
count | 错误数量 | Integer |
status类型
pending (待处理)
running (处理中)
success (处理成功)
fail (处理失败)
错误码说明
//系统初始化异常10****
SYSTEM_ERROR(100001, "系统异常")
,INTERNAL_ERROR(100002, "内部异常")
,INTERNAL_JOB_CONFIG_ERROR(100003, "系统工作任务配置异常")
,INTERNAL_WORKFLOW_CONFIG_ERROR(100004, "系统流程任务配置异常")
,INTERNAL_WORKFLOW_CHUNK_CONFIG_ERROR(100005, "系统流程切分任务配置异常")
,INTERNAL_WORKFLOW_CONFIG_FIELD_NOT_FOUND(100006, "无法找到合适的源数据")
,INTERNAL_WORKFLOW_CONFIG_FIELD_TYPE_ERROR(100007, "系统无法支持数据源")
//文本抽取异常列表11****
TEXTEXTRACTION_IDP_ERROR(110001,"idp解析异常")
,TEXTEXTRACTION_PARSE_SYSTEM_ERROR(110099,"抽取文本内部异常")
,TEXTEXTRACTION_FILE_URL_ERROR(110002,"系统文件路径解析异常")
,TEXTEXTRACTION_FILE_CONTENT_EX(110003,"系统文件内容解析异常")
,TEXTEXTRACTION_FILE_URL_NOT_FOUND(110004,"文档url获取文档失败")
,TEXTEXTRACTION_OSS_AUTH_FAIL(110005,"oss鉴权异常")
,TEXTEXTRACTION_SUBMIT_IDP_ERROR(110006,"提交idp任务异常")
,TEXTEXTRACTION_SUBMIT_IDP_RESULT_ERROR(110007,"提交idp任务获取结果异常")
,TEXTEXTRACTION_QUERY_IDP_ERROR(110008,"查询IDP任务异常")
,TEXTEXTRACTION_QUERY_IDP_RESULT_ERROR(110009,"查询IDP任务获取结果异常")
,TEXTEXTRACTION_QUERY_IDP_RESULT_TIME_OUT(110010,"IDP任务运行超时")
,TEXTEXTRACTION_QUERY_IDP_SYSTEM_ERROR(110011,"查询IDP任务未知异常")
,TEXTEXTRACTION_FILE_TYPE_NOT_SUPPORT(110012,"文件类型不支持")
//文本切分异常列表12****
CHUNK_TEXT_ERROR(120001,"切分文本异常")
,CHUNK_TEXT_PARSE_SYSTEM_ERROR(120099,"切分文本内容出现内部异常")
,CHUNK_TEXT_IS_EMPTY(120002,"切分文本为空")
,CHUNK_SUB_TASK_TEXT_IS_EMPTY(120003,"切分文本子任务文本内容为空")
,CHUNK_TASK_RESULT_IS_EMPTY(120004,"切分文本任务结果内容为空")
,CHUNK_TEXT_SERVER_INVOKE_ERROR(120005,"切分文本服务调用出现异常")
,CHUNK_TEXT_READ_LAYOUT_ERROR(120006,"读取文件layout出现异常")
,CHUNK_TEXT_PARSE_LAYOUT_ERROR(120007,"解析文件layout出现异常")
,CHUNK_TEXT_PARSE_ERROR(120008,"切分文本内容出现异常")
,CHUNK_TEXT_SERVER_RESULT_PARSE_ERROR(120009,"切分文本服务返回内容出现异常")
,CHUNK_TEXT_SERVER_RESULT_LAYOUT_EMPTY(120010,"切分文本服务返回解析内容为空")
//特征增强异常列表13****
ANALYSIS_FEATURE_CONFIG_ERROR(130001,"特征增强配置异常")
,ANALYSIS_FEATURE_PARSE_SYSTEM_ERROR(130099,"特征增强出现内部异常")
,ANALYSIS_FEATURE_EMBEDDING_CONFIG_ERROR(130002,"embedding配置异常")
,ANALYSIS_FEATURE_EMBEDDING_FIELD_ERROR(130003,"embedding属性校验异常")
,ANALYSIS_FEATURE_EMBEDDING_EMPTY(130004,"embeddingn内容为空")
,ANALYSIS_FEATURE_EMBEDDING_INVOKE_ERROR(130005,"embedding服务调用返回异常")
,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_ERROR(130006,"embedding服务返回解析出现异常")
,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_UNKONW_ERROR(130007,"embedding服务返回解析出现未知异常")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_INVOKE_ERROR(130008,"embedding文本服务调用返回异常")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_ERROR(130009,"embedding文本服务返回解析出现异常")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_UNKONW_ERROR(130010,"embedding文本服务返回解析出现未知异常")
;
SDK调用
Java示例
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>alinlp20200629</artifactId>
<version>2.7.2</version>
</dependency>
<!--出现java.lang.NoSuchMethodError: com.aliyun.credentials.Client.getCredential()Lcom/aliyun/credentials/models/CredentialModel;异常则引入-->
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>credentials-java</artifactId>
<version>0.3.0</version>
</dependency>
public static void main(String[] args) throws Exception {
Config config = new Config();
config.setEndpoint("alinlp.cn-hangzhou.aliyuncs.com");
config.setAccessKeyId("xxx");
config.setAccessKeySecret("xxx");
Client client = new Client(config);
PostMSDataProcessingCountRequest request = new PostMSDataProcessingCountRequest();
request.setServiceId(1831L);
request.setDataIds(Lists.newArrayList("1","2","3","4","5"));
PostMSDataProcessingCountResponse response = client.postMSDataProcessingCount(request);
System.out.println(JacksonUtils.toJson(response));
}
{
"code" : 200,
"data" : {
"dataProcessedStatuses" : [
{
"chunkNum" : "2",
"dataId" : "1",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 2,
"feature_analysis" : 2,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "12",
"dataId" : "2",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 12,
"feature_analysis" : 12,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "1",
"dataId" : "3",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 1,
"feature_analysis" : 1,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "43",
"dataId" : "4",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 43,
"feature_analysis" : 43,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "2",
"dataId" : "5",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 2,
"feature_analysis" : 2,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "0",
"dataId" : "6",
"errorDataList" : [
{
"count" : 1,
"errorCode" : "3009",
"opType" : "txt_extraction"
}
],
"opStatus" : {
"data_export" : 1,
"txt_extraction" : 1
},
"status" : "fail",
"versionValue" : "2024-01-15 10:31:51"
}
],
"status" : "finished"
},
"httpStatusCode" : 200,
"msg" : null,
"requestId" : "D9F2C41C-FF1C-41CE-B00E-F463DB27DB3E",
"success" : true
}
Python示例
pip install alibabacloud-alinlp20200629==2.8.0
import json
from alibabacloud_alinlp20200629 import client
from alibabacloud_tea_openapi import models as api_models
from alibabacloud_alinlp20200629 import models
import os
os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'] = 'xxx'
os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'] = 'xxx'
def data_processing_count():
config = api_models.Config(access_key_id=os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'],
access_key_secret=os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'], region_id="cn-beijing")
nlp_client = client.Client(config)
request = models.PostMSDataProcessingCountRequest()
request.service_id = 1831
request.data_ids = ['1', '2', '3']
response = nlp_client.post_msdata_processing_count(request)
# python2: print(response)
print(json.dumps(response.body.to_map()))
if __name__ == '__main__':
data_processing_count()
{
"Code" : 200,
"Data" : {
"DataProcessedStatuses" : [
{
"ChunkNum" : "1",
"DataId" : "1",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-19 15:22:14"
},
{
"ChunkNum" : "1",
"DataId" : "2",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-19 15:22:14"
},
{
"ChunkNum" : "1",
"DataId" : "3",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-14 11:25:00"
}
]
},
"HttpStatusCode" : 200,
"RequestId" : "86FB9825-4C0C-1149-8684-5C2549C1C703",
"Success" : true
}