代码示例

更新时间:

本文为您介绍AnalyticDB for PostgreSQL RAG Service相关接口的代码示例。

前提条件

SDK Client配置

安装SDK

请按照mvnpip源及时更新版本,SDK下载地址请参见SDK参考

<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>gpdb20160503</artifactId>
    <version>3.2.1</version>
</dependency>

<!--  依赖版本不低于 -->
 <dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>aliyun-java-sdk-core</artifactId>
    <version>4.1.2</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>tea-openapi</artifactId>
    <version>0.3.1</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>tea</artifactId>
    <version>1.2.8</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>openapiutil</artifactId>
    <version>0.2.1</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>credentials-java</artifactId>
    <version>0.3.0</version>
</dependency>
<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>tea-util</artifactId>
    <version>0.2.21</version>
</dependency>
alibabacloud_gpdb20160503==4.4.0
alibabacloud_tea_openapi==0.3.12

SDK Client

import com.aliyun.gpdb20160503.Client;
import com.aliyun.teaopenapi.models.Config;

public static Client getClient() throws Exception {
    Config config = new Config();
    config.setAccessKeyId(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"));  // 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。
    config.setAccessKeySecret(System.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"));   // 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
    config.setRegionId("cn-beijing");    // 实例所在region
    config.setEndpoint("gpdb.****uncs.com");   // 如果通过公网ip访问,不需配置此项,否则按照https://api.aliyun.com/product/gpdb配置
    config.setMaxIdleConns(200);  // 最大链接数,按照此Client的最大并发设置
    return new Client(config);
}
import os
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_tea_openapi import models as open_api_models

def get_client():
    config = open_api_models.Config(
        access_key_id=os.getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"),  # 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_ID。
        access_key_secret=os.getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), # 请确保代码运行环境设置了环境变量 ALIBABA_CLOUD_ACCESS_KEY_SECRET。
        region_id="cn-beijing",  # 实例所在region
        endpoint="gpdb.****uncs.com",  # 如果通过公网ip访问,不需配置此项,否则按照https://api.aliyun.com/product/gpdb配置
        max_idle_conns=200,  # 最大链接数,按照此Client的最大并发设置
    )
    return Client(config)

超时时间

SDK默认超时时间时为10s,但部分API接口(例如UpsertChunks)最大超时时间支持60s,可以按照以下配置。

config.setConnectTimeout(3000); // 连接超时时间3s
config.setReadTimeout(60000);  // 接口超时时间60s
config.connect_timeout = 3000  # 连接超时时间3s
config.read_timeout = 60000  # 接口超时时间60s

重试次数

调用接口的失败重试为接口级,即每个接口都有对应的WithOptions接口,可以按照以下配置。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teautil.models.RuntimeOptions;

RuntimeOptions runtime = new RuntimeOptions();
runtime.setMaxAttempts(3);  // 最多3次尝试次数
QueryCollectionDataResponse response = client.queryCollectionDataWithOptions(request, runtime);
from alibabacloud_tea_util import models as util_models

runtime = util_models.RuntimeOptions(
    max_attempts=5,
)
response = client.query_collection_data_with_options(request, runtime)

初始化实例向量库

每个实例仅需执行一次。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

public static void init() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String managerAccount = "testacc****";  // 实例初始账号,可从控制台实例详情页创建
    String managerAccountPassword = "mypassword";  // 实例初始密码,可从控制台实例详情页创建

    InitVectorDatabaseRequest request = new InitVectorDatabaseRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setManagerAccount(managerAccount);
    request.setManagerAccountPassword(managerAccountPassword);
    Client client = getClient();
    InitVectorDatabaseResponse response = client.initVectorDatabase(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models

def init():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    manager_account = "testacc****"  # 实例初始账号,可从控制台实例详情页创建
    manager_account_password = "mypassword"  # 实例初始密码,可从控制台实例详情页创建

    request = gpdb_20160503_models.InitVectorDatabaseRequest(
        region_id=region,
        dbinstance_id=instance_id,
        manager_account=manager_account,
        manager_account_password=manager_account_password,
    )
    response = get_client().init_vector_database(request)
    print(f"response code: {response.status_code}, body:{response.body}")

创建Namespace

创建自定义namespacenamespace Password。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

public static void createNamespace() throws Exception{
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String managerAccount = "testacc****";  // 实例初始账号,可从控制台实例详情页创建
    String managerAccountPassword = "mypassword";  // 实例初始密码,可从控制台实例详情页创建
    
    String namespace = "test_ns";  // 要创建的namespace,命名规范为:英文字母、数字和下划线
    String namespacePassword = "test_ns_pass";  // 要创建的namespace对应的密码
    
    CreateNamespaceRequest request = new CreateNamespaceRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setManagerAccount(managerAccount);
    request.setManagerAccountPassword(managerAccountPassword);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    Client client = getClient();
    CreateNamespaceResponse response = client.createNamespace(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models


def create_namespace():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    manager_account = "testacc****"  # 实例初始账号,可从控制台实例详情页创建
    manager_account_password = "mypassword"  # 实例初始密码,可从控制台实例详情页创建

    namespace = "test_ns"  # 要创建的namespace,命名规范为:英文字母、数字和下划线
    namespace_password = "test_ns_pass"  # 要创建的namespace对应的密码

    request = gpdb_20160503_models.CreateNamespaceRequest(
        region_id=region,
        dbinstance_id=instance_id,
        manager_account=manager_account,
        manager_account_password=manager_account_password,
        namespace=namespace,
        namespace_password=namespace_password,
    )
    response = get_client().create_namespace(request)
    print(f"response code: {response.status_code}, body:{response.body}")

向量接口

创建Collection

创建自定义collection名称,设置metadata、fullTextRetrievalFields、dimension、metrics等参数。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

import java.util.HashMap;
import java.util.Map;

public static void createCollection() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String managerAccount = "testacc****";  // 实例初始账号,可从控制台实例详情页创建
    String managerAccountPassword = "mypassword";  // 实例初始密码,可从控制台实例详情页创建
    String namespace = "test_ns";  // 已创建的namespace名称
    
    String collection = "test_collection"; // 要创建的collection,命名规范为:英文字母、数字和下划线
    // Metadata字段,key为列名,value为ADBPG字段类型:
    Map<String, String> metadata = new HashMap<>();
    metadata.put("doc_name", "text");
    metadata.put("title", "text");
    metadata.put("content", "text");
    metadata.put("page", "int"); 
    metadata.put("tags", "text[]");
    String fullTextRetrievalFields = "title,content";  // 用于全文检索字段,以“,”分隔的多个Metadata里的key
    Long dimension = 768L;  // 向量维度
    String metrics = "cosine";  // 向量索引相似度算法
   
    CreateCollectionRequest request = new CreateCollectionRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setManagerAccount(managerAccount);
    request.setManagerAccountPassword(managerAccountPassword);
    request.setNamespace(namespace);
    request.setCollection(collection);
    request.setMetadata(new Gson().toJson(metadata));
    request.setFullTextRetrievalFields(fullTextRetrievalFields);
    request.setDimension(dimension);
    request.setMetrics(metrics);
    Client client = getClient();
    CreateCollectionResponse response = client.createCollection(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
import json

from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models

def create_collection():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    manager_account = "testacc****"  # 实例初始账号,可从控制台实例详情页创建
    manager_account_password = "mypassword"  # 实例初始密码,可从控制台实例详情页创建
    namespace = "test_ns"  # 已创建的namespace名称

    collection = "test_collection"  # 要创建的collection,命名规范为:英文字母、数字和下划线
    # Metadata字段,key为列名,value为ADBPG字段类型
    metadata = {
        "doc_name": "text",
        "title": "text",
        "content": "text",
        "page": "int",
        "tags": "text[]",
    }
    full_text_retrieval_fields = "title,content"  # 用于全文检索字段,以“, ”分隔的多个Metadata里的key
    dimension = 768   # 向量维度
    metrics = "cosine"  # 向量索引相似度算法

    request = gpdb_20160503_models.CreateCollectionRequest(
        region_id=region,
        dbinstance_id=instance_id,
        manager_account=manager_account,
        manager_account_password=manager_account_password,
        namespace=namespace,
        collection=collection,
        metadata=json.dumps(metadata),
        full_text_retrieval_fields=full_text_retrieval_fields,
        dimension=dimension,
        metrics=metrics,
    )
    response = get_client().create_collection(request)
    print(f"response code: {response.status_code}, body:{response.body}")

上传向量数据

准备好向量数据,调用上传接口。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public static void upsertCollectionData() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_collection";  // 已创建的collection

    UpsertCollectionDataRequest request = new UpsertCollectionDataRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setRows(getRows());
    Client client = getClient();
    UpsertCollectionDataResponse response = client.upsertCollectionData(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}

public static List<UpsertCollectionDataRequest.UpsertCollectionDataRequestRows> getRows() throws Exception {
    List<UpsertCollectionDataRequest.UpsertCollectionDataRequestRows> rows = new ArrayList<>();
    rows.add(getRow());
    return rows;
}

public static UpsertCollectionDataRequest.UpsertCollectionDataRequestRows getRow() throws Exception {
    UpsertCollectionDataRequest.UpsertCollectionDataRequestRows row = new UpsertCollectionDataRequest.UpsertCollectionDataRequestRows();
    Map<String, String> metadata = new HashMap<>();
    metadata.put("doc_name", "test.doc");
    metadata.put("title", "产品");
    metadata.put("content", "产品详细信息");
    metadata.put("page", "1");
    metadata.put("tags", "{\"v1\", \"v2\"}");
    row.setMetadata(metadata);
    List<Double> vectors = new ArrayList<>();  // 替换为你自己的向量数据
    for (int i = 0; i < 768; i++) {
        vectors.add(Math.random());
    }
    row.setVector(vectors);
    return row;
}
import json

from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from typing import List


def upsert_collection_data():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_collection"  # 已创建的collection

    request = gpdb_20160503_models.UpsertCollectionDataRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        rows=get_rows(),
    )
    response = get_client().upsert_collection_data(request)
    print(f"response code: {response.status_code}, body:{response.body}")


def get_rows() -> List[gpdb_20160503_models.UpsertCollectionDataRequestRows]:
    rows = [get_row()]
    return rows


def get_row() -> gpdb_20160503_models.UpsertCollectionDataRequestRows:
    metadata = {
        "doc_name": "test.doc",
        "title": "产品",
        "content": "产品详细信息",
        "page": "1",
        "tags": '["tag1", "tag2"]',
    }
    vector = []  # 替换为你自己的向量数据
    for i in range(768):
        vector.append(float(i))
    row = gpdb_20160503_models.UpsertCollectionDataRequestRows(
        metadata=metadata,
        vector=vector,
    )
    return row

检索

将检索的请求文本自行向量化,然后使用向量数据检索。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

import java.util.ArrayList;
import java.util.List;

public static void queryCollectionData() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_collection";  // 已创建的collection

    Long topK = 3L;  // 检索返回的条数
    List<Double> vector = new ArrayList<>();  // 替换为你自己的向量数据
    for (int i=0;i<768;i++) {
        vector.add(Math.random());
    }

    QueryCollectionDataRequest request = new QueryCollectionDataRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setTopK(topK);
    request.setIncludeValues(false);
    request.setIncludeMetadataFields("title,content");
    request.setMetrics("cosine");
    request.setVector(vector);
    Client client = getClient();
    QueryCollectionDataResponse response = client.queryCollectionData(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models


def query_collection_data():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_collection"  # 已创建的collection

    top_k = 3  # 检索返回的条数
    vector = [] # 替换为你自己的向量数据
    for i in range(768):
        vector.append(float(i))

    request = gpdb_20160503_models.QueryCollectionDataRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        top_k=top_k,
        vector=vector,
        include_values=False,
        include_metadata_fields="title,content",
        metrics="cosine",
    )
    response = get_client().query_collection_data(request)
    print(f"response code: {response.status_code}, body:{response.body}")

文档接口

创建DocumentCollection

自定义collection名称,设置metadata、fullTextRetrievalFields、embeddingModel、metrics等参数并创建。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;

public static void createDocumentCollection() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String managerAccount = "testacc";  // 实例初始账号,可从控制台实例详情页创建
    String managerAccountPassword = "testpass";  // 实例初始密码,可从控制台实例详情页创建
    String namespace = "test_ns";  // 已创建的namespace名称
    
    String collection = "test_doc_collection"; // 要创建的collection,命名规范为:英文字母、数字和下划线
    // Metadata字段,key为列名,value为ADBPG字段类型:
    Map<String, String> metadata = new HashMap<>();
    metadata.put("title", "text");
    metadata.put("page", "int");
    metadata.put("tags", "text[]");
    String fullTextRetrievalFields = "title";  // 用于全文检索字段,以“,”分隔的多个Metadata里的key
    String embeddingModel = "m3e-base"; // 使用的向量模型
    String metrics = "cosine";  // 向量索引相似度算法

    CreateDocumentCollectionRequest request = new CreateDocumentCollectionRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setManagerAccount(managerAccount);
    request.setManagerAccountPassword(managerAccountPassword);
    request.setNamespace(namespace);
    request.setCollection(collection);
    request.setMetadata(new Gson().toJson(metadata));
    request.setFullTextRetrievalFields(fullTextRetrievalFields);
    request.setMetrics(metrics);
    request.setEmbeddingModel(embeddingModel);
    Client client = getClient();
    CreateDocumentCollectionResponse response = client.createDocumentCollection(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models


def create_document_collection():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    manager_account = "testacc"  # 实例初始账号,可从控制台实例详情页创建
    manager_account_password = "testpass"  # 实例初始密码,可从控制台实例详情页创建
    namespace = "test_ns"  # 已创建的namespace名称

    collection = "test_doc_collection"  # 要创建的collection,命名规范为:英文字母、数字和下划线
    # Metadata字段,key为列名,value为ADBPG字段类型
    metadata = {
        "title": "text",
        "page": "int",
        "tags": "text[]",
    }
    full_text_retrieval_fields = "title"  # 用于全文检索字段,以“, ”分隔的多个Metadata里的key
    embedding_model = "m3e-base"  # 使用的向量模型
    metrics = "cosine"  # 向量索引相似度算法

    request = gpdb_20160503_models.CreateDocumentCollectionRequest(
        region_id=region,
        dbinstance_id=instance_id,
        manager_account=manager_account,
        manager_account_password=manager_account_password,
        namespace=namespace,
        collection=collection,
        metadata=json.dumps(metadata),
        full_text_retrieval_fields=full_text_retrieval_fields,
        embedding_model=embedding_model,
        metrics=metrics,
    )
    response = get_client().create_document_collection(request)
    print(f"response code: {response.status_code}, body:{response.body}")

上传文档

上传切分后的文档

准备好切分后的Chunk文本,然后直接上传。

import com.aliyun.gpdb20160503.models.*;
import com.aliyun.gpdb20160503.Client;
import com.google.gson.Gson;

import java.util.HashMap;
import java.util.Map;

public static void upsertChunks() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_doc_collection";  // 已创建的DocumentCollection

    UpsertChunksRequest request = new UpsertChunksRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setTextChunks(getChunks());
    Client client = getClient();
    UpsertChunksResponse response = client.upsertChunks(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}

public static List<UpsertChunksRequest.UpsertChunksRequestTextChunks> getChunks() {
    List<UpsertChunksRequest.UpsertChunksRequestTextChunks> textChunks = new ArrayList<>();
    textChunks.add(getChunk());
    return textChunks;
}

public static UpsertChunksRequest.UpsertChunksRequestTextChunks getChunk() {
    UpsertChunksRequest.UpsertChunksRequestTextChunks chunk = new UpsertChunksRequest.UpsertChunksRequestTextChunks();
    chunk.setContent("测试文本");  // 替换为你自己的文本
    Map<String, Object> metadata = new HashMap<>();
    metadata.put("title", "产品");
    metadata.put("page", 1);
    metadata.put("tags", Arrays.asList("v1", "v2"));
    chunk.setMetadata(metadata);
    return chunk;
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from typing import List


def upsert_chunks():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_doc_collection"  # 已创建的DocumentCollection

    request = gpdb_20160503_models.UpsertChunksRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        text_chunks=get_chunks(),
    )
    response = get_client().upsert_chunks(request)
    print(f"response code: {response.status_code}, body:{response.body}")


def get_chunks() -> List[gpdb_20160503_models.UpsertChunksRequestTextChunks]:
    chunks = [get_chunk()]
    return chunks


def get_chunk() -> gpdb_20160503_models.UpsertChunksRequestTextChunks:
    content = "测试文本"
    metadata = {
        "title": "产品",
        "page": 1,
        "tags": ["tag1", "tag2"],
    }
    chunk = gpdb_20160503_models.UpsertChunksRequestTextChunks(
        content=content,
        metadata=metadata,
    )
    return chunk

异步上传文档

可以将本地文档通过SDK异步上传,并在超时时间内等待任务成功。

import com.aliyun.gpdb20160503.Client;
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.teautil.models.RuntimeOptions;
import com.google.gson.Gson;

import java.io.File;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.*;

private static final ScheduledExecutorService scheduler = Executors.newScheduledThreadPool(1);

public static void uploadDocument() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_doc_collection";  // 已创建的DocumentCollection

    String fileName = "test_file.txt";  // 本地文件名,需要带文件类型后缀
    String localFilePath = "/root/test_file.txt";  // 本地文件路径
    Map<String, Object> metadata = new HashMap<>();
    metadata.put("title", "产品");
    metadata.put("tags", Arrays.asList("v1", "v2"));

    UploadDocumentAsyncAdvanceRequest request = new UploadDocumentAsyncAdvanceRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setFileName(fileName);
    request.setMetadata(metadata);
    request.setFileUrlObject(Files.newInputStream(new File(localFilePath).toPath()));

    Client client = getClient();
    RuntimeOptions runtime = new RuntimeOptions();
    UploadDocumentAsyncResponse response = client.uploadDocumentAsyncAdvance(request, runtime);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));

    Callable<String> waitUploadDocumentJobDone = () -> getUploadDocumentJobStatus(client, response.getBody().getJobId());
    try {
        String result = executeWithTimeout(waitUploadDocumentJobDone, 1, TimeUnit.HOURS);
        System.out.println("任务结果: " + result);
    } catch (InterruptedException | ExecutionException | TimeoutException e) {
        e.printStackTrace();
    }
}

public static String getUploadDocumentJobStatus(Client client, String jobId) {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_doc_collection";  // 已创建的DocumentCollection

    GetUploadDocumentJobRequest request = new GetUploadDocumentJobRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setJobId(jobId);

    List<String> nonSuccessStatuses = Arrays.asList("Failed", "Cancelling", "Cancelled");
    int i = 0;
    while (true) {
        System.out.println("尝试第 " + (i + 1) + " 次...");
        try {
            GetUploadDocumentJobResponse response = client.getUploadDocumentJob(request);
            System.out.println(response.getStatusCode());
            System.out.println(new Gson().toJson(response.getBody()));
            if (response.getBody().getJob().getCompleted()) {
                return response.getBody().getJob().getStatus();
            } else if (nonSuccessStatuses.contains(response.getBody().getJob().getStatus())) {
                return response.getBody().getJob().getStatus();
            }
        } catch (Exception e) {
            e.printStackTrace();
            return "Failed";
        }
        try {
            Thread.sleep(5000); // 每隔5秒尝试一次
        } catch (InterruptedException e) {
            return "Interrupted";
        }
        i++;
    }
}

public static <T> T executeWithTimeout(Callable<T> task, long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
    Future<T> future = null;
    try {
        future = scheduler.submit(task);
        return future.get(timeout, unit); // 等待任务完成或超时
    } catch (TimeoutException e) {
        if (future != null) {
            future.cancel(true); // 取消任务
        }
        throw new TimeoutException("任务执行超时");
    } finally {
        scheduler.shutdown(); // 关闭调度器
    }
}
import json
import io
import concurrent.futures
import time

from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models
from alibabacloud_tea_util import models as util_models
from typing import List


def upload_document():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_doc_collection"  # 已创建的DocumentCollection

    metadata = {
        "title": "产品",
        "tags": ["tag1", "tag2"],
    }
    file_name = "test_file.txt"  # 本地文件名,需要带文件类型后缀
    local_file_path = "/root/test_file.txt"  # 本地文件路径
    with open(local_file_path, 'rb') as file:
        file_content = file.read()
    request = gpdb_20160503_models.UploadDocumentAsyncAdvanceRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        file_name=file_name,
        metadata=metadata,
        file_url_object=io.BytesIO(file_content),
    )
    client = get_client()
    response = client.upload_document_async_advance(request, util_models.RuntimeOptions())
    print(f"response code: {response.status_code}, body:{response.body}")

    def get_upload_document_job_status_wrapper():
        return get_upload_document_job_status(client, response.body.job_id)
    try:
        result = execute_with_timeout(get_upload_document_job_status_wrapper, 3600)  # 设置超时时间为1hour
        print(f"任务结果: {result}")
    except TimeoutError as e:
        print(e)


def get_upload_document_job_status(client, job_id):
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_doc_collection"  # 已创建的DocumentCollection

    request = gpdb_20160503_models.GetUploadDocumentJobRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        job_id=job_id,
    )

    non_success_statuses = ["Failed", "Cancelling", "Cancelled"]
    i = 0
    while True:
        print(f"尝试第 {i + 1} 次...")
        response: gpdb_20160503_models.GetUploadDocumentJobResponse = client.get_upload_document_job(request)
        print(f"response code: {response.status_code}, body:{response.body}")
        if response.body.job.completed or response.body.job.status in non_success_statuses:
            return response.body.job.status
        time.sleep(5)  # 每隔5秒尝试一次
        i += 1


def execute_with_timeout(task, timeout):
    with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor:
        future = executor.submit(task)
        try:
            result = future.result(timeout=timeout)
            return result
        except concurrent.futures.TimeoutError:
            future.cancel()  # 取消任务
            raise TimeoutError("任务执行超时")

检索

import com.aliyun.gpdb20160503.Client;
import com.aliyun.gpdb20160503.models.*;
import com.aliyun.teaopenapi.models.Config;
import com.aliyun.teautil.models.RuntimeOptions;
import com.google.gson.Gson;

import java.io.File;
import java.nio.file.Files;
import java.util.*;
import java.util.concurrent.*;

public static void queryContent() throws Exception {
    String region = "cn-beijing";  // 实例所在region
    String instanceId = "gp-2zemvvolfw0d9****";  // 实例id
    String namespace = "test_ns";  // 已创建的namespace
    String namespacePassword = "test_ns_pass";  // 已创建的namespace密码
    String collection = "test_doc_collection";  // 已创建的DocumentCollection

    String content = "测试文本";  // 查询文本
    String includeMetadataFields = "title,tags";  // 返回的元数据字段
    String metrics = "cosine";  // 计算相似度使用的算法
    Integer topK = 10;  // 返回的topK个结果

    QueryContentRequest request = new QueryContentRequest();
    request.setRegionId(region);
    request.setDBInstanceId(instanceId);
    request.setNamespace(namespace);
    request.setNamespacePassword(namespacePassword);
    request.setCollection(collection);
    request.setContent(content);
    request.setIncludeMetadataFields(includeMetadataFields);
    request.setMetrics(metrics);
    request.setTopK(topK);
    Client client = getClient();
    QueryContentResponse response = client.queryContent(request);
    System.out.println(response.getStatusCode());
    System.out.println(new Gson().toJson(response.getBody()));
}
from alibabacloud_gpdb20160503.client import Client
from alibabacloud_gpdb20160503 import models as gpdb_20160503_models


def query_content():
    region = "cn-beijing"  # 实例所在region
    instance_id = "gp-2zemvvolfw0d9****"  # 实例id
    namespace = "test_ns"  # 已创建的namespace名称
    namespace_password = "test_ns_pass"  # 已创建的namespace密码
    collection = "test_doc_collection"  # 已创建的DocumentCollection

    content = "测试文本"  # 查询文本
    include_metadata_fields = "title,tags"  # 返回的元数据字段
    metrics = "cosine"  # 计算相似度使用的算法
    top_k = 10  # 返回的topK个结果

    request = gpdb_20160503_models.QueryContentRequest(
        region_id=region,
        dbinstance_id=instance_id,
        namespace=namespace,
        namespace_password=namespace_password,
        collection=collection,
        content=content,
        include_metadata_fields=include_metadata_fields,
        metrics=metrics,
        top_k=top_k,
    )
    response = get_client().query_content(request)
    print(f"response code: {response.status_code}, body:{response.body}")