Go

OpenAPI封装了云原生数据仓库AnalyticDB PostgreSQL版向量操作的DDL和DML,使您可以通过OpenAPI来管理向量数据。本文以SDK Go调用方式介绍如何通过API导入并查询向量数据。

前提条件

操作流程

  1. 初始化向量库

  2. 创建Namespace

  3. 创建Collection

  4. 上传向量数据

  5. 召回向量数据

初始化向量库

在使用向量检索前,需初始化knowledgebase库以及全文检索相关功能。

调用示例如下:

package main

import (
	"fmt"
	
	"os"
    "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
    "github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
	gpdb "github.com/aliyun/alibaba-cloud-sdk-go/services/gpdb"
  
)

func main() {
	config := sdk.NewConfig()

	// Please ensure that the environment variables ALIBABA_CLOUD_ACCESS_KEY_ID and ALIBABA_CLOUD_ACCESS_KEY_SECRET are set.
	credential := credentials.NewAccessKeyCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"))
	/* use STS Token 
	credential := credentials.NewStsTokenCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), os.Getenv("ALIBABA_CLOUD_SECURITY_TOKEN"))
	*/
    client, err := gpdb.NewClientWithOptions("cn-qingdao", config, credential)
	if err != nil {
		panic(err)
	}

	request := gpdb.CreateInitVectorDatabaseRequest()

	request.Scheme = "https"

	request.DBInstanceId = "gp-bp1c62r3l489****"
  request.RegionId = "cn-qingdao"
	request.ManagerAccount = "myaccount"
	request.ManagerAccountPassword = "myaccount_password"


	response, err := client.InitVectorDatabase(request)
	if err != nil {
		fmt.Print(err.Error())
	}
	fmt.Printf("response is %#v\n", response)
}

参数说明,请参见InitVectorDatabase - 初始化向量数据库

创建Namespace

Namespace用于Schema隔离,在使用向量前,需至少创建一个Namespace或者使用public的Namespace。

调用示例如下:

package main

import (
	"fmt"
    
	"os"
    "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
    "github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
	gpdb "github.com/aliyun/alibaba-cloud-sdk-go/services/gpdb"
  
)

func main() {
	config := sdk.NewConfig()

	// Please ensure that the environment variables ALIBABA_CLOUD_ACCESS_KEY_ID and ALIBABA_CLOUD_ACCESS_KEY_SECRET are set.
	credential := credentials.NewAccessKeyCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"))
	/* use STS Token 
	credential := credentials.NewStsTokenCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), os.Getenv("ALIBABA_CLOUD_SECURITY_TOKEN"))
	*/
    client, err := gpdb.NewClientWithOptions("cn-qingdao", config, credential)
	if err != nil {
		panic(err)
	}

	request := gpdb.CreateCreateNamespaceRequest()

	request.Scheme = "https"

	request.DBInstanceId = "gp-bp1c62r3l489****"
  request.RegionId = "cn-qingdao"
	request.ManagerAccount = "myaccount"
	request.ManagerAccountPassword = "myaccount_password"
	request.Namespace = "vector_test"
	request.NamespacePassword = "vector_test_password"


	response, err := client.CreateNamespace(request)
	if err != nil {
		fmt.Print(err.Error())
	}
	fmt.Printf("response is %#v\n", response)
}

参数说明,请参见CreateNamespace - 创建命名空间

创建完后,可以在实例的knowledgebase库查看对应的Schema。

SELECT schema_name FROM information_schema.schemata;

创建Collection

Collection用于存储向量数据,并使用Namespace隔离。

调用示例如下:

package main

import (
	"fmt"
	
	"os"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
	gpdb "github.com/aliyun/alibaba-cloud-sdk-go/services/gpdb"
 
	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
 
)

func main() {
	config := sdk.NewConfig()

	// Please ensure that the environment variables ALIBABA_CLOUD_ACCESS_KEY_ID and ALIBABA_CLOUD_ACCESS_KEY_SECRET are set.
	credential := credentials.NewAccessKeyCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"))
	/* use STS Token 
	credential := credentials.NewStsTokenCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), os.Getenv("ALIBABA_CLOUD_SECURITY_TOKEN"))
	*/
 client, err := gpdb.NewClientWithOptions("cn-qingdao", config, credential)
	if err != nil {
		panic(err)
	}

	request := gpdb.CreateCreateCollectionRequest()

	request.Scheme = "https"

	request.DBInstanceId = "gp-bp1c62r3l489****"
  request.RegionId = "cn-qingdao"
	request.ManagerAccount = "myaccount"
	request.ManagerAccountPassword = "myaccount_password"
	request.Namespace = "vector_test"
	request.Collection = "document"
	request.Dimension = requests.NewInteger(10)
	request.FullTextRetrievalFields = ","
	request.Parser = "zh_ch"
	request.Metadata = "{\"pv\": \"text\",\"link\": \"text\",\"content\": \"text\",\"title\": \"text\"}"
	response, err := client.CreateCollection(request)
	if err != nil {
		fmt.Print(err.Error())
	}
	fmt.Printf("response is %#v\n", response)
}

参数说明,请参见CreateCollection - 创建向量数据集

创建完后,可以在实例的knowledgebase库查看对应的Table。

SELECT tablename FROM pg_tables WHERE schemaname='vector_test';

上传向量数据

将准备好的Embedding向量数据上传到对应的Collection中。

调用示例如下:

package main

import (
	"fmt"
	
 "os"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
	gpdb "github.com/aliyun/alibaba-cloud-sdk-go/services/gpdb"
 
	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
 
)

func main() {
	config := sdk.NewConfig()

	// Please ensure that the environment variables ALIBABA_CLOUD_ACCESS_KEY_ID and ALIBABA_CLOUD_ACCESS_KEY_SECRET are set.
	credential := credentials.NewAccessKeyCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"))
	/* use STS Token 
	credential := credentials.NewStsTokenCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), os.Getenv("ALIBABA_CLOUD_SECURITY_TOKEN"))
	*/
 client, err := gpdb.NewClientWithOptions("cn-qingdao", config, credential)
	if err != nil {
		panic(err)
	}

	request := gpdb.CreateUpsertCollectionDataRequest()

	request.Scheme = "https"

	request.DBInstanceId = "gp-bp1c62r3l489****"
  request.RegionId = "cn-qingdao"
	request.Collection = "document"
	request.Namespace = "vector_test"
	request.NamespacePassword = "vector_test_password"
	request.Rows = &[]gpdb.UpsertCollectionDataRows{
		{
			Id: "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
			Metadata: gpdb.UpsertCollectionDataRowsMetadata{
				Pv: "1000",
				Link: "http://127.X.X.1/document1",
				Content: "测试内容",
				Title: "测试文档",
			},
			Vector: &[]number{requests.NewInteger(0.2894745251078251),requests.NewInteger(0.5364747050266715),requests.NewInteger(0.1276845661831275)},
		},
	}


	response, err := client.UpsertCollectionData(request)
	if err != nil {
		fmt.Print(err.Error())
	}
	fmt.Printf("response is %#v\n", response)
}

参数说明,请参见UpsertCollectionData - 上传向量数据

上传完成,可以在实例的knowledgebase库查看数据。

SELECT * FROM vector_test.document;

召回向量数据

准备需要召回的查询向量或全文检索字段,执行查询接口。

调用示例如下:

package main

import (
	"fmt"
	
	"os"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk"
 "github.com/aliyun/alibaba-cloud-sdk-go/sdk/auth/credentials"
	gpdb "github.com/aliyun/alibaba-cloud-sdk-go/services/gpdb"
 
	"github.com/aliyun/alibaba-cloud-sdk-go/sdk/requests"
 
)

func main() {
	config := sdk.NewConfig()

	// Please ensure that the environment variables ALIBABA_CLOUD_ACCESS_KEY_ID and ALIBABA_CLOUD_ACCESS_KEY_SECRET are set.
	credential := credentials.NewAccessKeyCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"))
	/* use STS Token 
	credential := credentials.NewStsTokenCredential(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID"), os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET"), os.Getenv("ALIBABA_CLOUD_SECURITY_TOKEN"))
	*/
 client, err := gpdb.NewClientWithOptions("cn-qingdao", config, credential)
	if err != nil {
		panic(err)
	}

	request := gpdb.CreateQueryCollectionDataRequest()

	request.Scheme = "https"

	request.DBInstanceId = "gp-bp1c62r3l489****"
  request.RegionId = "cn-qingdao"
	request.Collection = "document"
	request.Namespace = "vector_test"
	request.NamespacePassword = "vector_test_password"
	request.Content = "测试"
	request.Filter = "pv > 10"
	request.TopK = requests.NewInteger(10)
	request.Vector = &[]number{requests.NewInteger(0.7152607422256894),requests.NewInteger(0.5524872066437732),requests.NewInteger(0.1168505269851303)}


	response, err := client.QueryCollectionData(request)
	if err != nil {
		fmt.Print(err.Error())
	}
	fmt.Printf("response is %#v\n", response)
}

返回结果如下:

{
    "Matches": {
        "match": [{
            "Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
            "Metadata": {
                "title": "测试文档",
                "content": "测试内容",
                "link": "http://127.X.X.1/document1",
                "pv": "1000"
            },
            "Values": [0.2894745251078251, 0.5364747050266715, 0.1276845661831275, 0.22528871956822372, 0.7009319238651552, 0.40267406135256123, 0.8873626696379067, 0.1248525955774931, 0.9115507046412368, 0.2450859133174706]
        }]
    },
    "RequestId": "ABB39CC3-4488-4857-905D-2E4A051D0521",
    "Status": "success"
}