This API is used to query the processing status of source data. To ensure stability, the service caches information at the minute level. In scenarios with high real-time requirements, you can configure caching at the second level. Each query supports a maximum of 50 documents.
Request parameters
Parameter | Description | Type | Required |
serviceId | Service ID. | Long | Yes |
dataIds | Primary key IDs of the data. | List<Long> | Yes |
dataImportId | Task ID. | Long | No |
Response parameters
Primary Parameters | Description | Type |
datas | Response result. | List<data> |
status | Task status. | String |
Primary parameters | Sub-parameter | Description | Type |
data | dataId | Primary key ID of the data. | String |
status | The status. | String | |
versionValue | Version information. | String | |
chunkNum | Number of chunks. | String | |
opStatus | Operator status statistics. | Map<String,Integer> | |
errorDataList | Detailed error information. | List<errorData> |
Secondary parameters | Third-level Parameters | Description | Type |
errorData | opType | Operator type. | String |
errorCode | Error code. | String | |
count | Number of errors. | Integer |
Status type
Pending
Running
success: Succeeded
fail: The data processing failed.
Error code descriptions
// System initialization errors (10xxxx)
SYSTEM_ERROR(100001, "A system error occurred.")
,INTERNAL_ERROR(100002, "An internal error occurred.")
,INTERNAL_JOB_CONFIG_ERROR(100003, "An internal job configuration error occurred.")
,INTERNAL_WORKFLOW_CONFIG_ERROR(100004, "An internal workflow configuration error occurred.")
,INTERNAL_WORKFLOW_CHUNK_CONFIG_ERROR(100005, "An internal workflow chunking task configuration error occurred.")
,INTERNAL_WORKFLOW_CONFIG_FIELD_NOT_FOUND(100006, "A suitable field was not found in the source data.")
,INTERNAL_WORKFLOW_CONFIG_FIELD_TYPE_ERROR(100007, "The data source type is not supported.")
// Text extraction errors (11xxxx)
TEXTEXTRACTION_IDP_ERROR(110001,"An IDP parsing error occurred.")
,TEXTEXTRACTION_PARSE_SYSTEM_ERROR(110099,"An internal error occurred during text extraction.")
,TEXTEXTRACTION_FILE_URL_ERROR(110002,"An error occurred while parsing the file path.")
,TEXTEXTRACTION_FILE_CONTENT_EX(110003,"An error occurred while parsing the file content.")
,TEXTEXTRACTION_FILE_URL_NOT_FOUND(110004,"The document could not be retrieved from the URL.")
,TEXTEXTRACTION_OSS_AUTH_FAIL(110005,"OSS authentication failed.")
,TEXTEXTRACTION_SUBMIT_IDP_ERROR(110006,"An error occurred while submitting the IDP task.")
,TEXTEXTRACTION_SUBMIT_IDP_RESULT_ERROR(110007,"An error occurred while retrieving the result after the IDP task was submitted.")
,TEXTEXTRACTION_QUERY_IDP_ERROR(110008,"An error occurred while querying the IDP task.")
,TEXTEXTRACTION_QUERY_IDP_RESULT_ERROR(110009,"An error occurred while retrieving the result of the IDP task query.")
,TEXTEXTRACTION_QUERY_IDP_RESULT_TIME_OUT(110010,"The IDP task query timed out.")
,TEXTEXTRACTION_QUERY_IDP_SYSTEM_ERROR(110011,"An unknown error occurred while querying the IDP task.")
,TEXTEXTRACTION_FILE_TYPE_NOT_SUPPORT(110012,"The file type is not supported.")
// Text chunking errors (12xxxx)
CHUNK_TEXT_ERROR(120001,"A text chunking error occurred.")
,CHUNK_TEXT_PARSE_SYSTEM_ERROR(120099,"An internal error occurred during text chunking.")
,CHUNK_TEXT_IS_EMPTY(120002,"The text for chunking is empty.")
,CHUNK_SUB_TASK_TEXT_IS_EMPTY(120003,"The text content of the chunking subtask is empty.")
,CHUNK_TASK_RESULT_IS_EMPTY(120004,"The result of the chunking task is empty.")
,CHUNK_TEXT_SERVER_INVOKE_ERROR(120005,"An error occurred when calling the text chunking service.")
,CHUNK_TEXT_READ_LAYOUT_ERROR(120006,"An error occurred while reading the file layout.")
,CHUNK_TEXT_PARSE_LAYOUT_ERROR(120007,"An error occurred while parsing the file layout.")
,CHUNK_TEXT_PARSE_ERROR(120008,"An error occurred while parsing the text content for chunking.")
,CHUNK_TEXT_SERVER_RESULT_PARSE_ERROR(120009,"An error occurred while parsing the response from the text chunking service.")
,CHUNK_TEXT_SERVER_RESULT_LAYOUT_EMPTY(120010,"The parsed content from the text chunking service is empty.")
// Feature enhancement errors (13xxxx)
ANALYSIS_FEATURE_CONFIG_ERROR(130001,"A feature enhancement configuration error occurred.")
,ANALYSIS_FEATURE_PARSE_SYSTEM_ERROR(130099,"An internal error occurred during feature enhancement.")
,ANALYSIS_FEATURE_EMBEDDING_CONFIG_ERROR(130002,"An embedding configuration error occurred.")
,ANALYSIS_FEATURE_EMBEDDING_FIELD_ERROR(130003,"An embedding property validation error occurred.")
,ANALYSIS_FEATURE_EMBEDDING_EMPTY(130004,"The embedding content is empty.")
,ANALYSIS_FEATURE_EMBEDDING_INVOKE_ERROR(130005,"An error occurred when calling the embedding service.")
,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_ERROR(130006,"An error occurred while parsing the response from the embedding service.")
,ANALYSIS_FEATURE_EMBEDDING_RESULT_PARSE_UNKONW_ERROR(130007,"An unknown error occurred while parsing the response from the embedding service.")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_INVOKE_ERROR(130008,"An error occurred when calling the text embedding service.")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_ERROR(130009,"An error occurred while parsing the response from the text embedding service.")
,ANALYSIS_FEATURE_EMBEDDING_TEXT_RESULT_PARSE_UNKONW_ERROR(130010,"An unknown error occurred while parsing the response from the text embedding service.")
;
SDK call
Java example
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>alinlp20200629</artifactId>
<version>2.7.2</version>
</dependency>
<!--Import this dependency if you encounter a java.lang.NoSuchMethodError: com.aliyun.credentials.Client.getCredential()Lcom/aliyun/credentials/models/CredentialModel; error-->
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>credentials-java</artifactId>
<version>0.3.0</version>
</dependency>public static void main(String[] args) throws Exception {
Config config = new Config();
config.setEndpoint("alinlp.cn-hangzhou.aliyuncs.com");
config.setAccessKeyId("xxx");
config.setAccessKeySecret("xxx");
Client client = new Client(config);
PostMSDataProcessingCountRequest request = new PostMSDataProcessingCountRequest();
request.setServiceId(1831L);
request.setDataIds(Lists.newArrayList("1","2","3","4","5"));
PostMSDataProcessingCountResponse response = client.postMSDataProcessingCount(request);
System.out.println(JacksonUtils.toJson(response));
}{
"code" : 200,
"data" : {
"dataProcessedStatuses" : [
{
"chunkNum" : "2",
"dataId" : "1",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 2,
"feature_analysis" : 2,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "12",
"dataId" : "2",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 12,
"feature_analysis" : 12,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "1",
"dataId" : "3",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 1,
"feature_analysis" : 1,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "43",
"dataId" : "4",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 43,
"feature_analysis" : 43,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "2",
"dataId" : "5",
"errorDataList" : [
],
"opStatus" : {
"chunk" : 1,
"data_export" : 1,
"data_import" : 2,
"feature_analysis" : 2,
"txt_extraction" : 1
},
"status" : "success",
"versionValue" : "2024-01-14 11:40:00"
},
{
"chunkNum" : "0",
"dataId" : "6",
"errorDataList" : [
{
"count" : 1,
"errorCode" : "3009",
"opType" : "txt_extraction"
}
],
"opStatus" : {
"data_export" : 1,
"txt_extraction" : 1
},
"status" : "fail",
"versionValue" : "2024-01-15 10:31:51"
}
],
"status" : "finished"
},
"httpStatusCode" : 200,
"msg" : null,
"requestId" : "D9F2C41C-FF1C-41CE-B00E-F463DB27DB3E",
"success" : true
}Python example
pip install alibabacloud-alinlp20200629==2.8.0import json
from alibabacloud_alinlp20200629 import client
from alibabacloud_tea_openapi import models as api_models
from alibabacloud_alinlp20200629 import models
import os
os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'] = 'xxx'
os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'] = 'xxx'
def data_processing_count():
config = api_models.Config(access_key_id=os.environ['ALIBABA_CLOUD_ACCESS_KEY_ID'],
access_key_secret=os.environ['ALIBABA_CLOUD_ACCESS_KEY_SECRET'], region_id="cn-beijing")
nlp_client = client.Client(config)
request = models.PostMSDataProcessingCountRequest()
request.service_id = 1831
request.data_ids = ['1', '2', '3']
response = nlp_client.post_msdata_processing_count(request)
# For Python 2, use: print(response)
print(json.dumps(response.body.to_map()))
if __name__ == '__main__':
data_processing_count()
{
"Code" : 200,
"Data" : {
"DataProcessedStatuses" : [
{
"ChunkNum" : "1",
"DataId" : "1",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-19 15:22:14"
},
{
"ChunkNum" : "1",
"DataId" : "2",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-19 15:22:14"
},
{
"ChunkNum" : "1",
"DataId" : "3",
"ErrorDataList" : [
],
"OpStatus" : {
"MsFeatureAnalysis" : 1,
"MsRecordLog" : 1,
"MsTextChunk" : 1,
"MsTextExtraction" : 1
},
"Status" : "success",
"VersionValue" : "2024-01-14 11:25:00"
}
]
},
"HttpStatusCode" : 200,
"RequestId" : "86FB9825-4C0C-1149-8684-5C2549C1C703",
"Success" : true
}