Query data processing status-Natural Language Processing(NLP)-阿里云帮助中心

This API is used to query the processing status of source data. To ensure stability, the service caches information at the minute level. In scenarios with high real-time requirements, you can configure caching at the second level. Each query supports a maximum of 50 documents.

Request parameters

Parameter	Description	Type	Required
serviceId	Service ID.	Long	Yes
dataIds	Primary key IDs of the data.	List<Long>	Yes
dataImportId	Task ID.	Long	No

Response parameters

Primary Parameters	Description	Type
datas	Response result.	List<data>
status	Task status.	String

Primary parameters	Sub-parameter	Description	Type
data	dataId	Primary key ID of the data.	String
	status	The status.	String
	versionValue	Version information.	String
	chunkNum	Number of chunks.	String
	opStatus	Operator status statistics.	Map<String,Integer>
	errorDataList	Detailed error information.	List<errorData>

Secondary parameters	Third-level Parameters	Description	Type
errorData	opType	Operator type.	String
	errorCode	Error code.	String
	count	Number of errors.	Integer

Status type

Pending

Running

success: Succeeded

fail: The data processing failed.

Error code descriptions

// System initialization errors (10xxxx)

SYSTEM_ERROR(100001, "A system error occurred.")

,INTERNAL_ERROR(100002, "An internal error occurred.")

,INTERNAL_JOB_CONFIG_ERROR(100003, "An internal job configuration error occurred.")

,INTERNAL_WORKFLOW_CONFIG_ERROR(100004, "An internal workflow configuration error occurred.")

,INTERNAL_WORKFLOW_CHUNK_CONFIG_ERROR(100005, "An internal workflow chunking task configuration error occurred.")

,INTERNAL_WORKFLOW_CONFIG_FIELD_NOT_FOUND(100006, "A suitable field was not found in the source data.")

,INTERNAL_WORKFLOW_CONFIG_FIELD_TYPE_ERROR(100007, "The data source type is not supported.")

// Text extraction errors (11xxxx)

TEXTEXTRACTION_IDP_ERROR(110001,"An IDP parsing error occurred.")

,TEXTEXTRACTION_PARSE_SYSTEM_ERROR(110099,"An internal error occurred during text extraction.")

,TEXTEXTRACTION_FILE_URL_ERROR(110002,"An error occurred while parsing the file path.")

,TEXTEXTRACTION_FILE_CONTENT_EX(110003,"An error occurred while parsing the file content.")

,TEXTEXTRACTION_FILE_URL_NOT_FOUND(110004,"The document could not be retrieved from the URL.")

,TEXTEXTRACTION_OSS_AUTH_FAIL(110005,"OSS authentication failed.")

,TEXTEXTRACTION_SUBMIT_IDP_ERROR(110006,"An error occurred while submitting the IDP task.")

,TEXTEXTRACTION_SUBMIT_IDP_RESULT_ERROR(110007,"An error occurred while retrieving the result after the IDP task was submitted.")

,TEXTEXTRACTION_QUERY_IDP_ERROR(110008,"An error occurred while querying the IDP task.")

,TEXTEXTRACTION_QUERY_IDP_RESULT_ERROR(110009,"An error occurred while retrieving the result of the IDP task query.")

,TEXTEXTRACTION_QUERY_IDP_RESULT_TIME_OUT(110010,"The IDP task query timed out.")

,TEXTEXTRACTION_QUERY_IDP_SYSTEM_ERROR(110011,"An unknown error occurred while querying the IDP task.")

,TEXTEXTRACTION_FILE_TYPE_NOT_SUPPORT(110012,"The file type is not supported.")

// Text chunking errors (12xxxx)

CHUNK_TEXT_ERROR(120001,"A text chunking error occurred.")

,CHUNK_TEXT_PARSE_SYSTEM_ERROR(120099,"An internal error occurred during text chunking.")

,CHUNK_TEXT_IS_EMPTY(120002,"The text for chunking is empty.")

,CHUNK_SUB_TASK_TEXT_IS_EMPTY(120003,"The text content of the chunking subtask is empty.")

,CHUNK_TASK_RESULT_IS_EMPTY(120004,"The result of the chunking task is empty.")

,CHUNK_TEXT_SERVER_INVOKE_ERROR(120005,"An error occurred when calling the text chunking service.")

,CHUNK_TEXT_READ_LAYOUT_ERROR(120006,"An error occurred while reading the file layout.")

,CHUNK_TEXT_PARSE_LAYOUT_ERROR(120007,"An error occurred while parsing the file layout.")

,CHUNK_TEXT_PARSE_ERROR(120008,"An error occurred while parsing the text content for chunking.")

,CHUNK_TEXT_SERVER_RESULT_PARSE_ERROR(120009,"An error occurred while parsing the response from the text chunking service.")

,CHUNK_TEXT_SERVER_RESULT_LAYOUT_EMPTY(120010,"The parsed content from the text chunking service is empty.")

// Feature enhancement errors (13xxxx)

ANALYSIS_FEATURE_CONFIG_ERROR(130001,"A feature enhancement configuration error occurred.")

,ANALYSIS_FEATURE_PARSE_SYSTEM_ERROR(130099,"An internal error occurred during feature enhancement.")

,ANALYSIS_FEATURE_EMBEDDING_CONFIG_ERROR(130002,"An embedding configuration error occurred.")

,ANALYSIS_FEATURE_EMBEDDING_FIELD_ERROR(130003,"An embedding property validation error occurred.")

,ANALYSIS_FEATURE_EMBEDDING_EMPTY(130004,"The embedding content is empty.")

,ANALYSIS_FEATURE_EMBEDDING_INVOKE_ERROR(130005,"An error occurred when calling the embedding service.")

,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_ERROR(130006,"An error occurred while parsing the response from the embedding service.")

,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_UNKONW_ERROR(130007,"An unknown error occurred while parsing the response from the embedding service.")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_INVOKE_ERROR(130008,"An error occurred when calling the text embedding service.")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_ERROR(130009,"An error occurred while parsing the response from the text embedding service.")

,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_UNKONW_ERROR(130010,"An unknown error occurred while parsing the response from the text embedding service.")

;

SDK call

Java example

<dependency>
  <groupId>com.aliyun</groupId>
  <artifactId>alinlp20200629</artifactId>
  <version>2.7.2</version>
</dependency>

<!--Import this dependency if you encounter a java.lang.NoSuchMethodError: com.aliyun.credentials.Client.getCredential()Lcom/aliyun/credentials/models/CredentialModel; error-->
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>credentials-java</artifactId>
    <version>0.3.0</version>
</dependency>

public static void main(String[] args) throws Exception {
  Config config = new Config();
  config.setEndpoint("alinlp.cn-hangzhou.aliyuncs.com");
  config.setAccessKeyId("xxx");
  config.setAccessKeySecret("xxx");
  Client client = new Client(config);
  PostMSDataProcessingCountRequest request = new PostMSDataProcessingCountRequest();
  request.setServiceId(1831L);
  request.setDataIds(Lists.newArrayList("1","2","3","4","5"));
  PostMSDataProcessingCountResponse response = client.postMSDataProcessingCount(request);
  System.out.println(JacksonUtils.toJson(response));
}

{
  "code" : 200,
  "data" : {
    "dataProcessedStatuses" : [
      {
        "chunkNum" : "2",
        "dataId" : "1",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 2,
          "feature_analysis" : 2,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "12",
        "dataId" : "2",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 12,
          "feature_analysis" : 12,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "1",
        "dataId" : "3",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 1,
          "feature_analysis" : 1,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "43",
        "dataId" : "4",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 43,
          "feature_analysis" : 43,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "2",
        "dataId" : "5",
        "errorDataList" : [

        ],
        "opStatus" : {
          "chunk" : 1,
          "data_export" : 1,
          "data_import" : 2,
          "feature_analysis" : 2,
          "txt_extraction" : 1
        },
        "status" : "success",
        "versionValue" : "2024-01-14 11:40:00"
      },
      {
        "chunkNum" : "0",
        "dataId" : "6",
        "errorDataList" : [
          {
            "count" : 1,
            "errorCode" : "3009",
            "opType" : "txt_extraction"
          }
        ],
        "opStatus" : {
          "data_export" : 1,
          "txt_extraction" : 1
        },
        "status" : "fail",
        "versionValue" : "2024-01-15 10:31:51"
      }
    ],
    "status" : "finished"
  },
  "httpStatusCode" : 200,
  "msg" : null,
  "requestId" : "D9F2C41C-FF1C-41CE-B00E-F463DB27DB3E",
  "success" : true
}

Python example

pip install alibabacloud-alinlp20200629==2.8.0

import json

from alibabacloud_alinlp20200629 import client
from alibabacloud_tea_openapi import models as api_models
from alibabacloud_alinlp20200629 import models
import os

os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'] = 'xxx'
os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'] = 'xxx'


def data_processing_count():
    config = api_models.Config(access_key_id=os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'],
                               access_key_secret=os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'], region_id="cn-beijing")
    nlp_client = client.Client(config)
    request = models.PostMSDataProcessingCountRequest()
    request.service_id = 1831
    request.data_ids = ['1', '2', '3']
    response = nlp_client.post_msdata_processing_count(request)
    # For Python 2, use: print(response)
    print(json.dumps(response.body.to_map()))


if __name__ == '__main__':
    data_processing_count()

{
  "Code" : 200,
  "Data" : {
    "DataProcessedStatuses" : [
      {
        "ChunkNum" : "1",
        "DataId" : "1",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-19 15:22:14"
      },
      {
        "ChunkNum" : "1",
        "DataId" : "2",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-19 15:22:14"
      },
      {
        "ChunkNum" : "1",
        "DataId" : "3",
        "ErrorDataList" : [

        ],
        "OpStatus" : {
          "MsFeatureAnalysis" : 1,
          "MsRecordLog" : 1,
          "MsTextChunk" : 1,
          "MsTextExtraction" : 1
        },
        "Status" : "success",
        "VersionValue" : "2024-01-14 11:25:00"
      }
    ]
  },
  "HttpStatusCode" : 200,
  "RequestId" : "86FB9825-4C0C-1149-8684-5C2549C1C703",
  "Success" : true
}