文档

Python

更新时间:

OpenAPI封装了云原生数据仓库AnalyticDB PostgreSQL版向量操作的DDL和DML,使您可以通过OpenAPI来管理向量数据。本文以SDK Python调用方式介绍如何通过API导入并查询向量数据。

前提条件

操作流程

  1. 初始化向量库

  2. 创建Namespace

  3. 创建Collection

  4. 上传向量数据

  5. 召回向量数据

初始化向量库

在使用向量检索前,需初始化knowledgebase库以及全文检索相关功能。

调用示例如下:

import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.InitVectorDatabaseRequest import InitVectorDatabaseRequest

# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)

request = InitVectorDatabaseRequest()
request.set_accept_format('json')

request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")

response = client.do_action_with_exception(request)
# python2:  print(response)
print(str(response, encoding='utf-8'))

参数说明,请参见InitVectorDatabase - 初始化向量数据库

创建Namespace

Namespace用于Schema隔离,在使用向量前,需至少创建一个Namespace或者使用public的Namespace。

调用示例如下:

import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.CreateNamespaceRequest import CreateNamespaceRequest

# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)

request = CreateNamespaceRequest()
request.set_accept_format('json')

request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")

response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))

参数说明,请参见CreateNamespace - 创建命名空间

创建完后,可以在实例的knowledgebase库查看对应的Schema。

SELECT schema_name FROM information_schema.schemata;

创建Collection

Collection用于存储向量数据,并使用Namespace隔离。

调用示例如下:

import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.CreateCollectionRequest import CreateCollectionRequest

# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)

request = CreateCollectionRequest()
request.set_accept_format('json')

request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_ManagerAccount("myaccount")
request.set_ManagerAccountPassword("myaccount_password")
request.set_Namespace("vector_test")
request.set_Collection("document")
request.set_FullTextRetrievalFields(",")
request.set_Dimension(10)
request.set_Parser("zh_ch")
request.set_Metadata("{\"pv\": \"text\",\"link\": \"text\",\"content\": \"text\", \"title\": \"text\"}")

response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))

参数说明,请参见CreateCollection - 创建向量数据集

创建完后,可以在实例的knowledgebase库查看对应的Table。

SELECT tablename FROM pg_tables WHERE schemaname='vector_test';

上传向量数据

将准备好的Embedding向量数据上传到对应的Collection中。

调用示例如下:

import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.UpsertCollectionDataRequest import UpsertCollectionDataRequest

# Please ensure that the environment variables ALIBABA_ACCESS_KEY_ID and ALIBABA_ACCESS_KEY_SECRET are set.
credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)

request = UpsertCollectionDataRequest()
request.set_accept_format('json')

request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_Collection("document")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")
request.set_Rows([
 {
  "Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
   "Metadata": 
   {
   "pv": "1000",
   "link": "http://127.X.X.1/document1",
   "content": "测试内容",
   "title": "测试文档"
   },
  "Vector": [0.2894745251078251, 0.5364747050266715]
 }
])

response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))

参数说明,请参见UpsertCollectionData - 上传向量数据

上传完成,可以在实例的knowledgebase库查看数据。

SELECT * FROM vector_test.document;

召回向量数据

准备需要召回的查询向量或全文检索字段,执行查询接口。

调用示例如下:

import os
from aliyunsdkcore.client import AcsClient
from aliyunsdkcore.acs_exception.exceptions import ClientException
from aliyunsdkcore.acs_exception.exceptions import ServerException
from aliyunsdkcore.auth.credentials import AccessKeyCredential
from aliyunsdkcore.auth.credentials import StsTokenCredential
from aliyunsdkgpdb.request.v20160503.QueryCollectionDataRequest import QueryCollectionDataRequest

credentials = AccessKeyCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'])
# use STS Token
# credentials = StsTokenCredential(os.environ['ALIBABA_ACCESS_KEY_ID'], os.environ['ALIBABA_ACCESS_KEY_SECRET'], os.environ['ALIBABA_SECURITY_TOKEN'])
client = AcsClient(region_id='cn-qingdao', credential=credentials)

request = QueryCollectionDataRequest()
request.set_accept_format('json')

request.set_DBInstanceId("gp-bp1c62r3l489****")
request.set_RegionId("cn-qingdao")
request.set_Collection("document")
request.set_Namespace("vector_test")
request.set_NamespacePassword("vector_test_password")
request.set_Content("测试")
request.set_Filter("pv > 10")
request.set_Vector([0.7152607422256894, 0.5524872066437732])

response = client.do_action_with_exception(request)
# python2: print(response)
print(str(response, encoding='utf-8'))

返回结果如下:

{
    "Matches": {
        "match": [{
            "Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
            "Metadata": {
                "title": "测试文档",
                "content": "测试内容",
                "link": "http://127.X.X.1/document1",
                "pv": "1000"
            },
            "Values": [0.2894745251078251, 0.5364747050266715, 0.1276845661831275, 0.22528871956822372, 0.7009319238651552, 0.40267406135256123, 0.8873626696379067, 0.1248525955774931, 0.9115507046412368, 0.2450859133174706]
        }]
    },
    "RequestId": "ABB39CC3-4488-4857-905D-2E4A051D0521",
    "Status": "success"
}

  • 本页导读 (1)
文档反馈