数据处理状态查询API

用于查询源数据的处理进度,为了保证查询服务的稳定性,该方法存在分钟级的缓存信息,对于实时性要求高的场景支持配置秒级的缓存,单次查询支持最大文档数为50。

请求入参

一级参数

说明

类型

是否必传

serviceId

服务Id

Long

dataIds

数据主键Id

List<Long>

dataImportId

任务id

Long

响应参数

一级参数

说明

类型

datas

响应结果

List<data>

status

任务状态

String

一级参数

二级参数

说明

类型

data

dataId

数据主键Id

String

status

状态

String

versionValue

版本信息

String

chunkNum

chunk数

String

opStatus

算子状态信息统计

Map<String,Integer>

errorDataList

错误详细信息

List<errorData>

二级参数

三级参数

说明

类型

errorData

opType

算子类型

String

errorCode

错误码

String

count

错误数量

Integer

status类型

pending (待处理)

running (处理中)

success (处理成功)

fail (处理失败)

错误码说明

//系统初始化异常10****

SYSTEM_ERROR(100001, "系统异常")

,INTERNAL_ERROR(100002, "内部异常")

,INTERNAL_JOB_CONFIG_ERROR(100003, "系统工作任务配置异常")

,INTERNAL_WORKFLOW_CONFIG_ERROR(100004, "系统流程任务配置异常")

,INTERNAL_WORKFLOW_CHUNK_CONFIG_ERROR(100005, "系统流程切分任务配置异常")

,INTERNAL_WORKFLOW_CONFIG_FIELD_NOT_FOUND(100006, "无法找到合适的源数据")

,INTERNAL_WORKFLOW_CONFIG_FIELD_TYPE_ERROR(100007, "系统无法支持数据源")

//文本抽取异常列表11****

TEXTEXTRACTION_IDP_ERROR(110001,"idp解析异常")

,TEXTEXTRACTION_PARSE_SYSTEM_ERROR(110099,"抽取文本内部异常")

,TEXTEXTRACTION_FILE_URL_ERROR(110002,"系统文件路径解析异常")

,TEXTEXTRACTION_FILE_CONTENT_EX(110003,"系统文件内容解析异常")

,TEXTEXTRACTION_FILE_URL_NOT_FOUND(110004,"文档url获取文档失败")

,TEXTEXTRACTION_OSS_AUTH_FAIL(110005,"oss鉴权异常")

,TEXTEXTRACTION_SUBMIT_IDP_ERROR(110006,"提交idp任务异常")

,TEXTEXTRACTION_SUBMIT_IDP_RESULT_ERROR(110007,"提交idp任务获取结果异常")

,TEXTEXTRACTION_QUERY_IDP_ERROR(110008,"查询IDP任务异常")

,TEXTEXTRACTION_QUERY_IDP_RESULT_ERROR(110009,"查询IDP任务获取结果异常")

,TEXTEXTRACTION_QUERY_IDP_RESULT_TIME_OUT(110010,"IDP任务运行超时")

,TEXTEXTRACTION_QUERY_IDP_SYSTEM_ERROR(110011,"查询IDP任务未知异常")

,TEXTEXTRACTION_FILE_TYPE_NOT_SUPPORT(110012,"文件类型不支持")

//文本切分异常列表12****

CHUNK_TEXT_ERROR(120001,"切分文本异常")

,CHUNK_TEXT_PARSE_SYSTEM_ERROR(120099,"切分文本内容出现内部异常")

,CHUNK_TEXT_IS_EMPTY(120002,"切分文本为空")

,CHUNK_SUB_TASK_TEXT_IS_EMPTY(120003,"切分文本子任务文本内容为空")

,CHUNK_TASK_RESULT_IS_EMPTY(120004,"切分文本任务结果内容为空")

,CHUNK_TEXT_SERVER_INVOKE_ERROR(120005,"切分文本服务调用出现异常")

,CHUNK_TEXT_READ_LAYOUT_ERROR(120006,"读取文件layout出现异常")

,CHUNK_TEXT_PARSE_LAYOUT_ERROR(120007,"解析文件layout出现异常")

,CHUNK_TEXT_PARSE_ERROR(120008,"切分文本内容出现异常")

,CHUNK_TEXT_SERVER_RESULT_PARSE_ERROR(120009,"切分文本服务返回内容出现异常")

,CHUNK_TEXT_SERVER_RESULT_LAYOUT_EMPTY(120010,"切分文本服务返回解析内容为空")

//特征增强异常列表13****

ANALYSIS_FEATURE_CONFIG_ERROR(130001,"特征增强配置异常")

,ANALYSIS_FEATURE_PARSE_SYSTEM_ERROR(130099,"特征增强出现内部异常")

,ANALYSIS_FEATURE_EMBEDDING_CONFIG_ERROR(130002,"embedding配置异常")

,ANALYSIS_FEATURE_EMBEDDING_FIELD_ERROR(130003,"embedding属性校验异常")

,ANALYSIS_FEATURE_EMBEDDING_EMPTY(130004,"embeddingn内容为空")

,ANALYSIS_FEATURE_EMBEDDING_INVOKE_ERROR(130005,"embedding服务调用返回异常")

,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_ERROR(130006,"embedding服务返回解析出现异常")

,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_UNKONW_ERROR(130007,"embedding服务返回解析出现未知异常")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_INVOKE_ERROR(130008,"embedding文本服务调用返回异常")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_ERROR(130009,"embedding文本服务返回解析出现异常")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_UNKONW_ERROR(130010,"embedding文本服务返回解析出现未知异常")

;

SDK调用

Java示例

<dependency>
  <groupId>com.aliyun</groupId>
  <artifactId>alinlp20200629</artifactId>
  <version>2.7.2</version>
</dependency>

<!--出现java.lang.NoSuchMethodError: com.aliyun.credentials.Client.getCredential()Lcom/aliyun/credentials/models/CredentialModel;异常则引入-->
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>credentials-java</artifactId>
    <version>0.3.0</version>
</dependency>

public static void main(String[] args) throws Exception {
  Config config = new Config();
  config.setEndpoint("alinlp.cn-hangzhou.aliyuncs.com");
  config.setAccessKeyId("xxx");
  config.setAccessKeySecret("xxx");
  Client client = new Client(config);
  PostMSDataProcessingCountRequest request = new PostMSDataProcessingCountRequest();
  request.setServiceId(1831L);
  request.setDataIds(Lists.newArrayList("1","2","3","4","5"));
  PostMSDataProcessingCountResponse response = client.postMSDataProcessingCount(request);
  System.out.println(JacksonUtils.toJson(response));
}
{
  "code" : 200,
  "data" : {
    "dataProcessedStatuses" : [
      {
        "chunkNum" : "2",
        "dataId" : "1",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 2,
          "feature_analysis" : 2,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "12",
        "dataId" : "2",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 12,
          "feature_analysis" : 12,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "1",
        "dataId" : "3",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 1,
          "feature_analysis" : 1,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "43",
        "dataId" : "4",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 43,
          "feature_analysis" : 43,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "2",
        "dataId" : "5",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 2,
          "feature_analysis" : 2,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "0",
        "dataId" : "6",
        "errorDataList" : [
          {
            "count" : 1,
            "errorCode" : "3009",
            "opType" : "txt_extraction"
          }
        ],
        "opStatus" : {
          "data_export" : 1,
          "txt_extraction" : 1
        },
        "status" : "fail",
        "versionValue" : "2024-01-15 10:31:51"
      }
    ],
    "status" : "finished"
  },
  "httpStatusCode" : 200,
  "msg" : null,
  "requestId" : "D9F2C41C-FF1C-41CE-B00E-F463DB27DB3E",
  "success" : true
}

Python示例

pip install alibabacloud-alinlp20200629==2.8.0
import json

from alibabacloud_alinlp20200629 import client
from alibabacloud_tea_openapi import models as api_models
from alibabacloud_alinlp20200629 import models
import os

os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'] = 'xxx'
os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'] = 'xxx'


def data_processing_count():
    config = api_models.Config(access_key_id=os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'],
                               access_key_secret=os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'], region_id="cn-beijing")
    nlp_client = client.Client(config)
    request = models.PostMSDataProcessingCountRequest()
    request.service_id = 1831
    request.data_ids = ['1', '2', '3']
    response = nlp_client.post_msdata_processing_count(request)
    # python2:  print(response)
    print(json.dumps(response.body.to_map()))


if __name__ == '__main__':
    data_processing_count()
{
  "Code" : 200,
  "Data" : {
    "DataProcessedStatuses" : [
      {
        "ChunkNum" : "1",
        "DataId" : "1",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-19 15:22:14"
      },
      {
        "ChunkNum" : "1",
        "DataId" : "2",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-19 15:22:14"
      },
      {
        "ChunkNum" : "1",
        "DataId" : "3",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-14 11:25:00"
      }
    ]
  },
  "HttpStatusCode" : 200,
  "RequestId" : "86FB9825-4C0C-1149-8684-5C2549C1C703",
  "Success" : true
}