AI搜索开放平台支持通过SDK的方式调用文档内容解析服务。
前提条件
已开通AI搜索开放平台服务,具体操作请参见开通服务。
已通过API-KEY完成身份鉴权,获取鉴权信息请参见获取API-KEY。
参数说明
请求体body最大不能超过8MB。
更多参数说明请参见文档内容解析。
import base64
import os
import time
from alibabacloud_tea_openapi.models import Config
from alibabacloud_searchplat20240529.client import Client
from alibabacloud_searchplat20240529.models import (
CreateDocumentAnalyzeTaskRequestDocument,
CreateDocumentAnalyzeTaskRequestOutput,
CreateDocumentAnalyzeTaskRequest,
CreateDocumentAnalyzeTaskResponse,
GetDocumentAnalyzeTaskStatusRequest,
GetDocumentAnalyzeTaskStatusResponse
)
if __name__ == '__main__':
config = Config(bearer_token="替换为您的API-KEY",
# endpoint: 配置统一的请求入口 需要去掉http://
endpoint="替换API访问地址",
# 支持 protocol 配置 HTTPS/HTTP
protocol="http")
client = Client(config=config)
# URL模式
document = CreateDocumentAnalyzeTaskRequestDocument(
url="http://test.pdf",
file_type="pdf"
)
# 本地模式,需要额外指定file_name
# file_path = "path/to/xxx.pdf"
# document = CreateDocumentAnalyzeTaskRequestDocument(
# content=base64.b64encode(open(file_path,'rb').read()).decode(),
# file_name=os.path.basename(file_path)
# )
output = CreateDocumentAnalyzeTaskRequestOutput(image_storage="url")
request = CreateDocumentAnalyzeTaskRequest(document=document, output=output)
# default:替换工作空间名称, ops-document-analyze-001: 服务id
response: CreateDocumentAnalyzeTaskResponse = client.create_document_analyze_task(
"default", "ops-document-analyze-001", request)
task_id = response.body.result.task_id
print("task_id: " + task_id)
request = GetDocumentAnalyzeTaskStatusRequest(task_id=task_id)
while True:
response: GetDocumentAnalyzeTaskStatusResponse = client.get_document_analyze_task_status(
"default", "ops-document-analyze-001", request)
status = response.body.result.status
print("status: " + status)
if status == "PENDING":
time.sleep(5)
elif status == "SUCCESS":
data = response.body.result.data
usage = response.body.usage
print("content:\n" + data.content[:1000] + "\n")
print("page count: " + str(data.page_num))
print("usage: " + str(usage))
break
else:
print(response.body.result)
break
文档内容是否对您有帮助?