代码示例
更新时间:
本文为您介绍AnalyticDB for PostgreSQL RAG Service相关接口的代码示例。
前提条件
创建AccessKey。如果您使用的账号是RAM用户,需要为其授权
AliyunGPDBFullAccess
权限。
SDK Client配置
安装SDK
请按照mvn或pip源及时更新版本,SDK下载地址请参见SDK参考。
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>gpdb20160503</artifactId>
<version>3.2.1</version>
</dependency>
<!-- 依赖版本不低于 -->
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>aliyun-java-sdk-core</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>tea-openapi</artifactId>
<version>0.3.1</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>tea</artifactId>
<version>1.2.8</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>openapiutil</artifactId>
<version>0.2.1</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>credentials-java</artifactId>
<version>0.3.0</version>
</dependency>
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>tea-util</artifactId>
<version>0.2.21</version>
</dependency>
alibabacloud_gpdb20160503==4.4.0
alibabacloud_tea_openapi==0.3.12
SDK Client
import com.aliyun.gpdb20160503.Client;
import com.aliyun.teaopenapi.models.Config;
public static Client getClient() throws Exception {
Config config = new Config();
config.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")); // 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。
config.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")); // 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
config.setRegionId("cn-beijing"); // 实例所在region
config.setEndpoint("gpdb.****uncs.com"); // 如果通过公网ip访问,不需配置此项,否则按照https://api.aliyun.com/product/gpdb配置
config.setMaxIdleConns(200); // 最大链接数,按照此Client的最大并发设置
return new Client(config);
}
import os
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models
def get_client():
config = open_api_models.Config(
access_key_id=os.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), # 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。
access_key_secret=os.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), # 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
region_id="cn-beijing", # 实例所在region
endpoint="gpdb.****uncs.com", # 如果通过公网ip访问,不需配置此项,否则按照https://api.aliyun.com/product/gpdb配置
max_idle_conns=200, # 最大链接数,按照此Client的最大并发设置
)
return Client(config)
超时时间
SDK默认超时时间时为10s,但部分API接口(例如UpsertChunks)最大超时时间支持60s,可以按照以下配置。
config.setConnectTimeout(3000); // 连接超时时间3s
config.setReadTimeout(60000); // 接口超时时间60s
config.connect_timeout = 3000 # 连接超时时间3s
config.read_timeout = 60000 # 接口超时时间60s
重试次数
调用接口的失败重试为接口级,即每个接口都有对应的WithOptions接口,可以按照以下配置。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teautil.models.RuntimeOptions;
RuntimeOptions runtime = new RuntimeOptions();
runtime.setMaxAttempts(3); // 最多3次尝试次数
QueryCollectionDataResponse response = client.queryCollectionDataWithOptions(request, runtime);
from alibabacloud_tea_util import models as util_models
runtime = util_models.RuntimeOptions(
max_attempts=5,
)
response = client.query_collection_data_with_options(request, runtime)
初始化实例向量库
每个实例仅需执行一次。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
public static void init() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String managerAccount = "testacc****"; // 实例初始账号,可从控制台实例详情页创建
String managerAccountPassword = "mypassword"; // 实例初始密码,可从控制台实例详情页创建
InitVectorDatabaseRequest request = new InitVectorDatabaseRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setManagerAccount(managerAccount);
request.setManagerAccountPassword(managerAccountPassword);
Client client = getClient();
InitVectorDatabaseResponse response = client.initVectorDatabase(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def init():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
manager_account = "testacc****" # 实例初始账号,可从控制台实例详情页创建
manager_account_password = "mypassword" # 实例初始密码,可从控制台实例详情页创建
request = gpdb_20160503_models.InitVectorDatabaseRequest(
region_id=region,
dbinstance_id=instance_id,
manager_account=manager_account,
manager_account_password=manager_account_password,
)
response = get_client().init_vector_database(request)
print(f"response code: {response.status_code}, body:{response.body}")
创建Namespace
创建自定义namespace和namespace Password。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
public static void createNamespace() throws Exception{
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String managerAccount = "testacc****"; // 实例初始账号,可从控制台实例详情页创建
String managerAccountPassword = "mypassword"; // 实例初始密码,可从控制台实例详情页创建
String namespace = "test_ns"; // 要创建的namespace,命名规范为:英文字母、数字和下划线
String namespacePassword = "test_ns_pass"; // 要创建的namespace对应的密码
CreateNamespaceRequest request = new CreateNamespaceRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setManagerAccount(managerAccount);
request.setManagerAccountPassword(managerAccountPassword);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
Client client = getClient();
CreateNamespaceResponse response = client.createNamespace(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def create_namespace():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
manager_account = "testacc****" # 实例初始账号,可从控制台实例详情页创建
manager_account_password = "mypassword" # 实例初始密码,可从控制台实例详情页创建
namespace = "test_ns" # 要创建的namespace,命名规范为:英文字母、数字和下划线
namespace_password = "test_ns_pass" # 要创建的namespace对应的密码
request = gpdb_20160503_models.CreateNamespaceRequest(
region_id=region,
dbinstance_id=instance_id,
manager_account=manager_account,
manager_account_password=manager_account_password,
namespace=namespace,
namespace_password=namespace_password,
)
response = get_client().create_namespace(request)
print(f"response code: {response.status_code}, body:{response.body}")
向量接口
创建Collection
创建自定义collection名称,设置metadata、fullTextRetrievalFields、dimension、metrics等参数。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
import java.util.HashMap;
import java.util.Map;
public static void createCollection() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String managerAccount = "testacc****"; // 实例初始账号,可从控制台实例详情页创建
String managerAccountPassword = "mypassword"; // 实例初始密码,可从控制台实例详情页创建
String namespace = "test_ns"; // 已创建的namespace名称
String collection = "test_collection"; // 要创建的collection,命名规范为:英文字母、数字和下划线
// Metadata字段,key为列名,value为ADBPG字段类型:
Map<String, String> metadata = new HashMap<>();
metadata.put("doc_name", "text");
metadata.put("title", "text");
metadata.put("content", "text");
metadata.put("page", "int");
metadata.put("tags", "text[]");
String fullTextRetrievalFields = "title,content"; // 用于全文检索字段,以“,”分隔的多个Metadata里的key
Long dimension = 768L; // 向量维度
String metrics = "cosine"; // 向量索引相似度算法
CreateCollectionRequest request = new CreateCollectionRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setManagerAccount(managerAccount);
request.setManagerAccountPassword(managerAccountPassword);
request.setNamespace(namespace);
request.setCollection(collection);
request.setMetadata(new Gson().toJson(metadata));
request.setFullTextRetrievalFields(fullTextRetrievalFields);
request.setDimension(dimension);
request.setMetrics(metrics);
Client client = getClient();
CreateCollectionResponse response = client.createCollection(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
import json
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def create_collection():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
manager_account = "testacc****" # 实例初始账号,可从控制台实例详情页创建
manager_account_password = "mypassword" # 实例初始密码,可从控制台实例详情页创建
namespace = "test_ns" # 已创建的namespace名称
collection = "test_collection" # 要创建的collection,命名规范为:英文字母、数字和下划线
# Metadata字段,key为列名,value为ADBPG字段类型
metadata = {
"doc_name": "text",
"title": "text",
"content": "text",
"page": "int",
"tags": "text[]",
}
full_text_retrieval_fields = "title,content" # 用于全文检索字段,以“, ”分隔的多个Metadata里的key
dimension = 768 # 向量维度
metrics = "cosine" # 向量索引相似度算法
request = gpdb_20160503_models.CreateCollectionRequest(
region_id=region,
dbinstance_id=instance_id,
manager_account=manager_account,
manager_account_password=manager_account_password,
namespace=namespace,
collection=collection,
metadata=json.dumps(metadata),
full_text_retrieval_fields=full_text_retrieval_fields,
dimension=dimension,
metrics=metrics,
)
response = get_client().create_collection(request)
print(f"response code: {response.status_code}, body:{response.body}")
上传向量数据
准备好向量数据,调用上传接口。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public static void upsertCollectionData() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_collection"; // 已创建的collection
UpsertCollectionDataRequest request = new UpsertCollectionDataRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setRows(getRows());
Client client = getClient();
UpsertCollectionDataResponse response = client.upsertCollectionData(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
public static List<UpsertCollectionDataRequest.UpsertCollectionDataRequestRows> getRows() throws Exception {
List<UpsertCollectionDataRequest.UpsertCollectionDataRequestRows> rows = new ArrayList<>();
rows.add(getRow());
return rows;
}
public static UpsertCollectionDataRequest.UpsertCollectionDataRequestRows getRow() throws Exception {
UpsertCollectionDataRequest.UpsertCollectionDataRequestRows row = new UpsertCollectionDataRequest.UpsertCollectionDataRequestRows();
Map<String, String> metadata = new HashMap<>();
metadata.put("doc_name", "test.doc");
metadata.put("title", "产品");
metadata.put("content", "产品详细信息");
metadata.put("page", "1");
metadata.put("tags", "{\"v1\", \"v2\"}");
row.setMetadata(metadata);
List<Double> vectors = new ArrayList<>(); // 替换为你自己的向量数据
for (int i = 0; i < 768; i++) {
vectors.add(Math.random());
}
row.setVector(vectors);
return row;
}
import json
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from typing import List
def upsert_collection_data():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_collection" # 已创建的collection
request = gpdb_20160503_models.UpsertCollectionDataRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
rows=get_rows(),
)
response = get_client().upsert_collection_data(request)
print(f"response code: {response.status_code}, body:{response.body}")
def get_rows() -> List[gpdb_20160503_models.UpsertCollectionDataRequestRows]:
rows = [get_row()]
return rows
def get_row() -> gpdb_20160503_models.UpsertCollectionDataRequestRows:
metadata = {
"doc_name": "test.doc",
"title": "产品",
"content": "产品详细信息",
"page": "1",
"tags": '["tag1", "tag2"]',
}
vector = [] # 替换为你自己的向量数据
for i in range(768):
vector.append(float(i))
row = gpdb_20160503_models.UpsertCollectionDataRequestRows(
metadata=metadata,
vector=vector,
)
return row
检索
将检索的请求文本自行向量化,然后使用向量数据检索。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
import java.util.ArrayList;
import java.util.List;
public static void queryCollectionData() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_collection"; // 已创建的collection
Long topK = 3L; // 检索返回的条数
List<Double> vector = new ArrayList<>(); // 替换为你自己的向量数据
for (int i=0;i<768;i++) {
vector.add(Math.random());
}
QueryCollectionDataRequest request = new QueryCollectionDataRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setTopK(topK);
request.setIncludeValues(false);
request.setIncludeMetadataFields("title,content");
request.setMetrics("cosine");
request.setVector(vector);
Client client = getClient();
QueryCollectionDataResponse response = client.queryCollectionData(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def query_collection_data():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_collection" # 已创建的collection
top_k = 3 # 检索返回的条数
vector = [] # 替换为你自己的向量数据
for i in range(768):
vector.append(float(i))
request = gpdb_20160503_models.QueryCollectionDataRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
top_k=top_k,
vector=vector,
include_values=False,
include_metadata_fields="title,content",
metrics="cosine",
)
response = get_client().query_collection_data(request)
print(f"response code: {response.status_code}, body:{response.body}")
文档接口
创建DocumentCollection
自定义collection名称,设置metadata、fullTextRetrievalFields、embeddingModel、metrics等参数并创建。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
public static void createDocumentCollection() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String managerAccount = "testacc"; // 实例初始账号,可从控制台实例详情页创建
String managerAccountPassword = "testpass"; // 实例初始密码,可从控制台实例详情页创建
String namespace = "test_ns"; // 已创建的namespace名称
String collection = "test_doc_collection"; // 要创建的collection,命名规范为:英文字母、数字和下划线
// Metadata字段,key为列名,value为ADBPG字段类型:
Map<String, String> metadata = new HashMap<>();
metadata.put("title", "text");
metadata.put("page", "int");
metadata.put("tags", "text[]");
String fullTextRetrievalFields = "title"; // 用于全文检索字段,以“,”分隔的多个Metadata里的key
String embeddingModel = "m3e-base"; // 使用的向量模型
String metrics = "cosine"; // 向量索引相似度算法
CreateDocumentCollectionRequest request = new CreateDocumentCollectionRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setManagerAccount(managerAccount);
request.setManagerAccountPassword(managerAccountPassword);
request.setNamespace(namespace);
request.setCollection(collection);
request.setMetadata(new Gson().toJson(metadata));
request.setFullTextRetrievalFields(fullTextRetrievalFields);
request.setMetrics(metrics);
request.setEmbeddingModel(embeddingModel);
Client client = getClient();
CreateDocumentCollectionResponse response = client.createDocumentCollection(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def create_document_collection():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
manager_account = "testacc" # 实例初始账号,可从控制台实例详情页创建
manager_account_password = "testpass" # 实例初始密码,可从控制台实例详情页创建
namespace = "test_ns" # 已创建的namespace名称
collection = "test_doc_collection" # 要创建的collection,命名规范为:英文字母、数字和下划线
# Metadata字段,key为列名,value为ADBPG字段类型
metadata = {
"title": "text",
"page": "int",
"tags": "text[]",
}
full_text_retrieval_fields = "title" # 用于全文检索字段,以“, ”分隔的多个Metadata里的key
embedding_model = "m3e-base" # 使用的向量模型
metrics = "cosine" # 向量索引相似度算法
request = gpdb_20160503_models.CreateDocumentCollectionRequest(
region_id=region,
dbinstance_id=instance_id,
manager_account=manager_account,
manager_account_password=manager_account_password,
namespace=namespace,
collection=collection,
metadata=json.dumps(metadata),
full_text_retrieval_fields=full_text_retrieval_fields,
embedding_model=embedding_model,
metrics=metrics,
)
response = get_client().create_document_collection(request)
print(f"response code: {response.status_code}, body:{response.body}")
上传文档
上传切分后的文档
准备好切分后的Chunk文本,然后直接上传。
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;
import java.util.HashMap;
import java.util.Map;
public static void upsertChunks() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_doc_collection"; // 已创建的DocumentCollection
UpsertChunksRequest request = new UpsertChunksRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setTextChunks(getChunks());
Client client = getClient();
UpsertChunksResponse response = client.upsertChunks(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
public static List<UpsertChunksRequest.UpsertChunksRequestTextChunks> getChunks() {
List<UpsertChunksRequest.UpsertChunksRequestTextChunks> textChunks = new ArrayList<>();
textChunks.add(getChunk());
return textChunks;
}
public static UpsertChunksRequest.UpsertChunksRequestTextChunks getChunk() {
UpsertChunksRequest.UpsertChunksRequestTextChunks chunk = new UpsertChunksRequest.UpsertChunksRequestTextChunks();
chunk.setContent("测试文本"); // 替换为你自己的文本
Map<String, Object> metadata = new HashMap<>();
metadata.put("title", "产品");
metadata.put("page", 1);
metadata.put("tags", Arrays.asList("v1", "v2"));
chunk.setMetadata(metadata);
return chunk;
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from typing import List
def upsert_chunks():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_doc_collection" # 已创建的DocumentCollection
request = gpdb_20160503_models.UpsertChunksRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
text_chunks=get_chunks(),
)
response = get_client().upsert_chunks(request)
print(f"response code: {response.status_code}, body:{response.body}")
def get_chunks() -> List[gpdb_20160503_models.UpsertChunksRequestTextChunks]:
chunks = [get_chunk()]
return chunks
def get_chunk() -> gpdb_20160503_models.UpsertChunksRequestTextChunks:
content = "测试文本"
metadata = {
"title": "产品",
"page": 1,
"tags": ["tag1", "tag2"],
}
chunk = gpdb_20160503_models.UpsertChunksRequestTextChunks(
content=content,
metadata=metadata,
)
return chunk
异步上传文档
可以将本地文档通过SDK异步上传,并在超时时间内等待任务成功。
import com.aliyun.gpdb20160503.Client;
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.teautil.models.RuntimeOptions;
import com.google.gson.Gson;
import java.io.File;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.*;
private static final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);
public static void uploadDocument() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_doc_collection"; // 已创建的DocumentCollection
String fileName = "test_file.txt"; // 本地文件名,需要带文件类型后缀
String localFilePath = "/root/test_file.txt"; // 本地文件路径
Map<String, Object> metadata = new HashMap<>();
metadata.put("title", "产品");
metadata.put("tags", Arrays.asList("v1", "v2"));
UploadDocumentAsyncAdvanceRequest request = new UploadDocumentAsyncAdvanceRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setFileName(fileName);
request.setMetadata(metadata);
request.setFileUrlObject(Files.newInputStream(new File(localFilePath).toPath()));
Client client = getClient();
RuntimeOptions runtime = new RuntimeOptions();
UploadDocumentAsyncResponse response = client.uploadDocumentAsyncAdvance(request, runtime);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
Callable<String> waitUploadDocumentJobDone = () -> getUploadDocumentJobStatus(client, response.getBody().getJobId());
try {
String result = executeWithTimeout(waitUploadDocumentJobDone, 1, TimeUnit.HOURS);
System.out.println("任务结果: " + result);
} catch (InterruptedException | ExecutionException | TimeoutException e) {
e.printStackTrace();
}
}
public static String getUploadDocumentJobStatus(Client client, String jobId) {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_doc_collection"; // 已创建的DocumentCollection
GetUploadDocumentJobRequest request = new GetUploadDocumentJobRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setJobId(jobId);
List<String> nonSuccessStatuses = Arrays.asList("Failed", "Cancelling", "Cancelled");
int i = 0;
while (true) {
System.out.println("尝试第 " + (i + 1) + " 次...");
try {
GetUploadDocumentJobResponse response = client.getUploadDocumentJob(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
if (response.getBody().getJob().getCompleted()) {
return response.getBody().getJob().getStatus();
} else if (nonSuccessStatuses.contains(response.getBody().getJob().getStatus())) {
return response.getBody().getJob().getStatus();
}
} catch (Exception e) {
e.printStackTrace();
return "Failed";
}
try {
Thread.sleep(5000); // 每隔5秒尝试一次
} catch (InterruptedException e) {
return "Interrupted";
}
i++;
}
}
public static <T> T executeWithTimeout(Callable<T> task, long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
Future<T> future = null;
try {
future = scheduler.submit(task);
return future.get(timeout, unit); // 等待任务完成或超时
} catch (TimeoutException e) {
if (future != null) {
future.cancel(true); // 取消任务
}
throw new TimeoutException("任务执行超时");
} finally {
scheduler.shutdown(); // 关闭调度器
}
}
import json
import io
import concurrent.futures
import time
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from alibabacloud_tea_util import models as util_models
from typing import List
def upload_document():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_doc_collection" # 已创建的DocumentCollection
metadata = {
"title": "产品",
"tags": ["tag1", "tag2"],
}
file_name = "test_file.txt" # 本地文件名,需要带文件类型后缀
local_file_path = "/root/test_file.txt" # 本地文件路径
with open(local_file_path, 'rb') as file:
file_content = file.read()
request = gpdb_20160503_models.UploadDocumentAsyncAdvanceRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
file_name=file_name,
metadata=metadata,
file_url_object=io.BytesIO(file_content),
)
client = get_client()
response = client.upload_document_async_advance(request, util_models.RuntimeOptions())
print(f"response code: {response.status_code}, body:{response.body}")
def get_upload_document_job_status_wrapper():
return get_upload_document_job_status(client, response.body.job_id)
try:
result = execute_with_timeout(get_upload_document_job_status_wrapper, 3600) # 设置超时时间为1hour
print(f"任务结果: {result}")
except TimeoutError as e:
print(e)
def get_upload_document_job_status(client, job_id):
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_doc_collection" # 已创建的DocumentCollection
request = gpdb_20160503_models.GetUploadDocumentJobRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
job_id=job_id,
)
non_success_statuses = ["Failed", "Cancelling", "Cancelled"]
i = 0
while True:
print(f"尝试第 {i + 1} 次...")
response: gpdb_20160503_models.GetUploadDocumentJobResponse = client.get_upload_document_job(request)
print(f"response code: {response.status_code}, body:{response.body}")
if response.body.job.completed or response.body.job.status in non_success_statuses:
return response.body.job.status
time.sleep(5) # 每隔5秒尝试一次
i += 1
def execute_with_timeout(task, timeout):
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
future = executor.submit(task)
try:
result = future.result(timeout=timeout)
return result
except concurrent.futures.TimeoutError:
future.cancel() # 取消任务
raise TimeoutError("任务执行超时")
检索
import com.aliyun.gpdb20160503.Client;
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.teautil.models.RuntimeOptions;
import com.google.gson.Gson;
import java.io.File;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.*;
public static void queryContent() throws Exception {
String region = "cn-beijing"; // 实例所在region
String instanceId = "gp-2zemvvolfw0d9****"; // 实例id
String namespace = "test_ns"; // 已创建的namespace
String namespacePassword = "test_ns_pass"; // 已创建的namespace密码
String collection = "test_doc_collection"; // 已创建的DocumentCollection
String content = "测试文本"; // 查询文本
String includeMetadataFields = "title,tags"; // 返回的元数据字段
String metrics = "cosine"; // 计算相似度使用的算法
Integer topK = 10; // 返回的topK个结果
QueryContentRequest request = new QueryContentRequest();
request.setRegionId(region);
request.setDBInstanceId(instanceId);
request.setNamespace(namespace);
request.setNamespacePassword(namespacePassword);
request.setCollection(collection);
request.setContent(content);
request.setIncludeMetadataFields(includeMetadataFields);
request.setMetrics(metrics);
request.setTopK(topK);
Client client = getClient();
QueryContentResponse response = client.queryContent(request);
System.out.println(response.getStatusCode());
System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
def query_content():
region = "cn-beijing" # 实例所在region
instance_id = "gp-2zemvvolfw0d9****" # 实例id
namespace = "test_ns" # 已创建的namespace名称
namespace_password = "test_ns_pass" # 已创建的namespace密码
collection = "test_doc_collection" # 已创建的DocumentCollection
content = "测试文本" # 查询文本
include_metadata_fields = "title,tags" # 返回的元数据字段
metrics = "cosine" # 计算相似度使用的算法
top_k = 10 # 返回的topK个结果
request = gpdb_20160503_models.QueryContentRequest(
region_id=region,
dbinstance_id=instance_id,
namespace=namespace,
namespace_password=namespace_password,
collection=collection,
content=content,
include_metadata_fields=include_metadata_fields,
metrics=metrics,
top_k=top_k,
)
response = get_client().query_content(request)
print(f"response code: {response.status_code}, body:{response.body}")
该文章对您有帮助吗?