Go

更新时间:

OpenAPI封装了云原生数据仓库AnalyticDB PostgreSQL向量操作的DDLDML,使您可以通过OpenAPI来管理向量数据。本文以SDK Go调用方式介绍如何通过API导入并查询向量数据。

前提条件

操作流程

  1. 初始化向量库

  2. 创建Namespace

  3. 创建Collection

  4. 上传向量数据

  5. 召回向量数据

初始化向量库

在使用向量检索前,需初始化knowledgebase库以及全文检索相关功能。

调用示例如下:

package main

import (
	"fmt"
	"os"
	
	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	gpdb20160503 "github.com/alibabacloud-go/gpdb-20160503/v4/client"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
)

func CreateClient() (_result *gpdb20160503.Client, _err error) {
	config := &openapi.Config{
		AccessKeyId:     tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")),
		AccessKeySecret: tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")),
	}
	// Endpoint 请参考 https://api.aliyun.com/product/gpdb
	config.Endpoint = tea.String("gpdb.aliyuncs.com")
	_result = &gpdb20160503.Client{}
	_result, _err = gpdb20160503.NewClient(config)
	return _result, _err
}

func main() {
	client, _err := CreateClient()
	if _err != nil {
		panic(_err)
	}

	initVectorDatabaseRequest := &gpdb20160503.InitVectorDatabaseRequest{
		RegionId:               tea.String("cn-beijing"),
		DBInstanceId:           tea.String("gp-bp1c62r3l489****"),
		ManagerAccount:         tea.String("myaccount"),
		ManagerAccountPassword: tea.String("myaccount_password"),
	}
	runtime := &util.RuntimeOptions{}
	response, _err := client.InitVectorDatabaseWithOptions(initVectorDatabaseRequest, runtime)
	if _err != nil {
		panic(_err)
	}
	fmt.Printf("response is %#v\n", response.Body)
}

参数说明,请参见InitVectorDatabase - 初始化向量数据库

创建Namespace

Namespace用于Schema隔离,在使用向量前,需至少创建一个Namespace或者使用publicNamespace。

调用示例如下:

package main

import (
	"fmt"
	"os"
	
	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	gpdb20160503 "github.com/alibabacloud-go/gpdb-20160503/v4/client"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
)

func CreateClient() (_result *gpdb20160503.Client, _err error) {
	config := &openapi.Config{
		AccessKeyId:     tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")),
		AccessKeySecret: tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")),
	}
	// Endpoint 请参考 https://api.aliyun.com/product/gpdb
	config.Endpoint = tea.String("gpdb.aliyuncs.com")
	_result = &gpdb20160503.Client{}
	_result, _err = gpdb20160503.NewClient(config)
	return _result, _err
}

func main() {
	client, _err := CreateClient()
	if _err != nil {
		panic(_err)
	}

	createNamespaceRequest := &gpdb20160503.CreateNamespaceRequest{
		RegionId:               tea.String("cn-beijing"),
		DBInstanceId:           tea.String("gp-bp1c62r3l489****"),
		ManagerAccount:         tea.String("myaccount"),
		ManagerAccountPassword: tea.String("myaccount_password"),
		Namespace:              tea.String("vector_test"),
		NamespacePassword:      tea.String("vector_test_password"),
	}
	runtime := &util.RuntimeOptions{}
	response, _err := client.CreateNamespaceWithOptions(createNamespaceRequest, runtime)
	if _err != nil {
		panic(_err)
	}
	fmt.Printf("response is %#v\n", response.Body)
}

参数说明,请参见CreateNamespace - 创建命名空间

创建完后,可以在实例的knowledgebase库查看对应的Schema。

SELECT schema_name FROM information_schema.schemata;

创建Collection

Collection用于存储向量数据,并使用Namespace隔离。

调用示例如下:

package main

import (
	"fmt"
	"os"

	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	gpdb20160503 "github.com/alibabacloud-go/gpdb-20160503/v4/client"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
)



func CreateClient() (_result *gpdb20160503.Client, _err error) {
	config := &openapi.Config{
		AccessKeyId:     tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")),
		AccessKeySecret: tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")),
	}
	// Endpoint 请参考 https://api.aliyun.com/product/gpdb
	config.Endpoint = tea.String("gpdb.aliyuncs.com")
	_result = &gpdb20160503.Client{}
	_result, _err = gpdb20160503.NewClient(config)
	return _result, _err
}

func main() {
	client, _err := CreateClient()
	if _err != nil {
		panic(_err)
	}

	createCollectionRequest := &gpdb20160503.CreateCollectionRequest{
		RegionId: tea.String("cn-beijing"),
		DBInstanceId: tea.String("gp-bp1c62r3l489****"),
		ManagerAccount: tea.String("myaccount"),
		ManagerAccountPassword: tea.String("myaccount_password"),
		Namespace: tea.String("vector_test"),
		Collection: tea.String("document"),
		Dimension: tea.Int64(3),
		Parser: tea.String("zh_cn"),
		FullTextRetrievalFields: tea.String("title,content"),
		Metadata: tea.String("{\"title\":\"text\",\"content\":\"text\",\"response\":\"int\"}"),
	}
	runtime := &util.RuntimeOptions{}
	response, _err := client.CreateCollectionWithOptions(createCollectionRequest, runtime)
	if _err != nil {
		panic(_err)
	}
	fmt.Printf("response is %#v\n", response.Body)
}

参数说明,请参见CreateCollection - 创建向量数据集

创建完后,可以在实例的knowledgebase库查看对应的Table。

SELECT tablename FROM pg_tables WHERE schemaname='vector_test';

上传向量数据

将准备好的Embedding向量数据上传到对应的Collection中。

调用示例如下:

package main

import (
	"fmt"
	"os"

	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	gpdb20160503 "github.com/alibabacloud-go/gpdb-20160503/v4/client"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
)

func CreateClient() (_result *gpdb20160503.Client, _err error) {
	config := &openapi.Config{
		AccessKeyId:     tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")),
		AccessKeySecret: tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")),
	}
	// Endpoint 请参考 https://api.aliyun.com/product/gpdb
	config.Endpoint = tea.String("gpdb.aliyuncs.com")
	_result = &gpdb20160503.Client{}
	_result, _err = gpdb20160503.NewClient(config)
	return _result, _err
}

func main() {
	client, _err := CreateClient()
	if _err != nil {
		panic(_err)
	}

	rows0Metadata := map[string]*string{
		"title":    tea.String("测试文档"),
		"content":  tea.String("测试内容"),
		"response": tea.String("1"),
	}
	rows0 := &gpdb20160503.UpsertCollectionDataRequestRows{
		Metadata: rows0Metadata,
		Id:       tea.String("0CB55798-ECF5-4064-B81E-FE35B19E01A6"),
		Vector:   []*float64{tea.Float64(0.2894745251078251), tea.Float64(0.5364747050266715), tea.Float64(0.1276845661831275)},
	}
	upsertCollectionDataRequest := &gpdb20160503.UpsertCollectionDataRequest{
		RegionId:          tea.String("cn-beijing"),
		Rows:              []*gpdb20160503.UpsertCollectionDataRequestRows{rows0},
		DBInstanceId:      tea.String("gp-bp1c62r3l489****"),
		Collection:        tea.String("document"),
		Namespace:         tea.String("vector_test"),
		NamespacePassword: tea.String("vector_test_password"),
	}
	runtime := &util.RuntimeOptions{}
	response, _err := client.UpsertCollectionDataWithOptions(upsertCollectionDataRequest, runtime)
	if _err != nil {
		panic(_err)
	}
	fmt.Printf("response is %#v\n", response.Body)
}

参数说明,请参见UpsertCollectionData - 上传向量数据

上传完成,可以在实例的knowledgebase库查看数据。

SELECT * FROM vector_test.document;

召回向量数据

准备需要召回的查询向量或全文检索字段,执行查询接口。

调用示例如下:

package main

import (
	"fmt"
	"os"

	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	gpdb20160503 "github.com/alibabacloud-go/gpdb-20160503/v4/client"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
)

func CreateClient() (_result *gpdb20160503.Client, _err error) {
	config := &openapi.Config{
		AccessKeyId:     tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_ID")),
		AccessKeySecret: tea.String(os.Getenv("ALIBABA_CLOUD_ACCESS_KEY_SECRET")),
	}
	// Endpoint 请参考 https://api.aliyun.com/product/gpdb
	config.Endpoint = tea.String("gpdb.aliyuncs.com")
	_result = &gpdb20160503.Client{}
	_result, _err = gpdb20160503.NewClient(config)
	return _result, _err
}

func main() {
	client, _err := CreateClient()
	if _err != nil {
		panic(_err)
	}

	queryCollectionDataRequest := &gpdb20160503.QueryCollectionDataRequest{
		RegionId:          tea.String("cn-beijing"),
		DBInstanceId:      tea.String("gp-bp1c62r3l489****"),
		Collection:        tea.String("document"),
		Namespace:         tea.String("vector_test"),
		NamespacePassword: tea.String("vector_test_password"),
		Content:           tea.String("测试"),
		Filter:            tea.String("response > 0"),
		TopK:              tea.Int64(10),
		Vector:            []*float64{tea.Float64(0.7152607422256894), tea.Float64(0.5524872066437732), tea.Float64(0.1168505269851303)},
	}
	runtime := &util.RuntimeOptions{}
	response, _err := client.QueryCollectionDataWithOptions(queryCollectionDataRequest, runtime)
	if _err != nil {
		panic(_err)
	}
	fmt.Printf("response is %#v\n", response.Body)
}

返回结果如下:

{
   "Matches": {
      "match": [
         {
            "Id": "0CB55798-ECF5-4064-B81E-FE35B19E01A6",
            "Metadata": {
               "content": "测试内容",
               "response": "1",
               "source": "3",
               "title": "测试文档"
            },
            "MetadataV2": {
               "content": "测试内容",
               "response": 1,
               "source": 3,
               "title": "测试文档"
            },
            "Score": 0.9132830731723668,
            "Values": {
               "value": [
                  0.28947452,
                  0.5364747,
                  0.12768456
               ]
            }
         }
      ]
   },
   "RequestId": "707D2202-61A6-53DF-AAD2-E8DE276CE292",
   "Status": "success"
}