OpenAPI封装了云原生数据仓库AnalyticDB PostgreSQL版向量操作的DDL和DML,使您可以通过OpenAPI来管理向量数据。本文以SDK Python调用方式介绍如何通过API导入并查询向量数据。
前提条件
已创建存储弹性模式6.0版的AnalyticDB PostgreSQL版实例。具体操作,请参见创建实例。
已开启向量引擎优化。具体操作,请参见开启或关闭向量检索引擎优化。
已创建初始账号。具体操作,请参见创建数据库账号。
若您使用RAM用户,则需要对RAM用户进行授权,更多方式请参见使用OpenAPI示例。
操作流程
初始化向量库
在使用向量检索前,需初始化knowledgebase库以及全文检索相关功能。
调用示例如下:
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.InitVectorDatabaseRequest import InitVectorDatabaseRequest
# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)
request = InitVectorDatabaseRequest()
request.set_accept_format('json')
request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")
response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))
参数说明,请参见InitVectorDatabase - 初始化向量数据库。
创建Namespace
Namespace用于Schema隔离,在使用向量前,需至少创建一个Namespace或者使用public的Namespace。
调用示例如下:
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.CreateNamespaceRequest import CreateNamespaceRequest
# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)
request = CreateNamespaceRequest()
request.set_accept_format('json')
request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")
response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))
参数说明,请参见CreateNamespace - 创建命名空间。
创建完后,可以在实例的knowledgebase库查看对应的Schema。
SELECT schema_name FROM information_schema.schemata;
创建Collection
Collection用于存储向量数据,并使用Namespace隔离。
调用示例如下:
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.CreateCollectionRequest import CreateCollectionRequest
# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)
request = CreateCollectionRequest()
request.set_accept_format('json')
request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")
request.set_Namespace("vector_test")
request.set_Collection("document")
request.set_FullTextRetrievalFields(",")
request.set_Dimension(10)
request.set_Parser("zh_ch")
request.set_Metadata("{\"pv\": \"text\",\"link\": \"text\",\"content\": \"text\", \"title\": \"text\"}")
response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))
参数说明,请参见CreateCollection - 创建向量数据集。
创建完后,可以在实例的knowledgebase库查看对应的Table。
SELECT tablename FROM pg_tables WHERE schemaname='vector_test';
上传向量数据
将准备好的Embedding向量数据上传到对应的Collection中。
调用示例如下:
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.UpsertCollectionDataRequest import UpsertCollectionDataRequest
# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)
request = UpsertCollectionDataRequest()
request.set_accept_format('json')
request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_Collection("document")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")
request.set_Rows([
{
"Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
"Metadata":
{
"pv": "1000",
"link": "http://127.X.X.1/document1",
"content": "测试内容",
"title": "测试文档"
},
"Vector": [0.2894745251078251, 0.5364747050266715]
}
])
response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))
参数说明,请参见UpsertCollectionData - 上传向量数据。
上传完成,可以在实例的knowledgebase库查看数据。
SELECT * FROM vector_test.document;
召回向量数据
准备需要召回的查询向量或全文检索字段,执行查询接口。
调用示例如下:
import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.QueryCollectionDataRequest import QueryCollectionDataRequest
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)
request = QueryCollectionDataRequest()
request.set_accept_format('json')
request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_Collection("document")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")
request.set_Content("测试")
request.set_Filter("pv > 10")
request.set_Vector([0.7152607422256894, 0.5524872066437732])
response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))
返回结果如下:
{
"Matches": {
"match": [{
"Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
"Metadata": {
"title": "测试文档",
"content": "测试内容",
"link": "http://127.X.X.1/document1",
"pv": "1000"
},
"Values": [0.2894745251078251, 0.5364747050266715, 0.1276845661831275, 0.22528871956822372, 0.7009319238651552, 0.40267406135256123, 0.8873626696379067, 0.1248525955774931, 0.9115507046412368, 0.2450859133174706]
}]
},
"RequestId": "ABB39CC3-4488-4857-905D-2E4A051D0521",
"Status": "success"
}
反馈
- 本页导读 (1)
文档反馈