向量 Vectors

向量数据是向量Bucket的核心资源,由三个部分组成:

  • Key(主键):向量的唯一标识符

  • Data(向量数据):高维数值数组

  • Metadata(元数据):Key-Value结构,用于存储向量的附加属性信息(如类别、来源、时间戳等),可用于查询时的前置过滤。

每个向量都存储在特定的向量索引中,继承索引的维度、数据类型和距离度量配置。

写入向量数据

将向量数据及其元数据上传到指定的向量索引中。单个向量索引表最多可存储 5000 万行向量数据。写入向量数据的 API (PutVectors)单批次最大写入 500 条。

控制台

  1. 向量Bucket页面,单击已创建的向量Bucket。

  2. 在刚创建的索引行,单击查看数据,单击向量数据插入

  3. 配置向量数据,可以同时添加多条向量数据:

    • 主键值:为向量设置唯一标识符。

    • 向量数据:输入向量数值数组,格式为用逗号分隔的数字,如:0.1, 0.2, 0.3, 0.4, 0.5。向量维度需要和选择的 embedded 模型的维度一致。

    • 元数据:可添加元数据信息,如类别、标题、时间戳等。总大小最大支持 40KB。

      • 支持添加的元数据类型为 String。

      • 单行向量最多支持添加共 10 个可过滤元数据和不可过滤元数据字段。

      • 不可过滤元数据key的大小是 1 ~ 63字符。

      • 单个可过滤元数据最大支持 2KB。

      • 根据可过滤元数据进行标量后过滤时,单次过滤指令中的可过滤元数据长度最大为 64KB,单次过滤指令中的可过滤元数据数量最多为 1024 个,过滤指令最多支持 8 层,且支持过滤内容为空。

  • 单击确定完成数据插入。

SDK

Python

import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

parser = argparse.ArgumentParser(description="vector put vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)

def main():
    args = parser.parse_args()

    # Loading credentials values from the environment variables
    credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()

    # Using the SDK's default configuration
    cfg = oss.config.load_default()
    cfg.credentials_provider = credentials_provider
    cfg.region = args.region
    cfg.account_id = args.account_id
    if args.endpoint is not None:
        cfg.endpoint = args.endpoint

    vector_client = oss_vectors.Client(cfg)

    vectors = [
        {
            "data": {"float32":  [0.1] * 128},
            "key": "key1",
            "metadata": {"metadata1": "value1", "metadata2": "value2"}
        },
        {
            "data": {"float32": [0.2] * 128},
            "key": "key2",
            "metadata": {"metadata3": "value3", "metadata4": "value4"}
        }
    ]

    result = vector_client.put_vectors(oss_vectors.models.PutVectorsRequest(
        bucket=args.bucket,
        index_name=args.index_name,
        vectors=vectors,
    ))

    print(f'status code: {result.status_code},'
          f' request id: {result.request_id},'
    )

if __name__ == "__main__":
    main()

Go

package main

import (
	"context"
	"flag"
	"log"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)

var (
	region     string
	bucketName string
	accountId  string
)

func init() {
	flag.StringVar(&region, "region", "", "The region in which the vector bucket is located.")
	flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
	flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}

func main() {
	flag.Parse()
	if len(bucketName) == 0 || len(region) == 0 || len(accountId) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters")
	}

	cfg := oss.LoadDefaultConfig().
		WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
		WithRegion(region).WithAccountId(accountId)

	client := vectors.NewVectorsClient(cfg)

	request := &vectors.PutVectorsRequest{
		Bucket:    oss.Ptr(bucketName),
		IndexName: oss.Ptr("exampleIndex"),

		Vectors: [ ]map[string]any{

			{
				"key": "vector1",
				"data": map[string]any{

					"float32": [ ]float32{1.2, 2.5, 3},

				},
				"metadata": map[string]any{
					"Key1": "value2",

					"Key2": [ ]string{"1", "2", "3"},

				},
			},
		},
	}
	result, err := client.PutVectors(context.TODO(), request)
	if err != nil {
		log.Fatalf("failed to put vectors %v", err)
	}
	log.Printf("put vectors result:%#v\n", result)
}

ossutil

在名为 examplebucket 的向量Bucket中,向名为 index 的向量索引添加一个向量,该向量的数据为 1,向量主键为 vector1,元数据为 {"Key1": "32"}。

  • 使用JSON配置文件,vectors.json内容如下:

    [
      {
        "data": {
          "float32": [1]
        },
        "key": "vector1",
        "metadata": {
          "Key1": "32"
        }
      }
    ]

    命令示例:

    ossutil vectors-api put-vectors --bucket examplebucket --index-name index --vectors file://vectors.json
  • 使用JSON配置参数:

    ossutil vectors-api put-vectors --bucket examplebucket --index-name index --vectors "[{\"data\":{\"float32\":[1]},\"key\":\"vector1\",\"metadata\":{\"Key1\":\"32\"}}]"

API

调用PutVectors接口以写入向量数据。

执行向量检索

使用语义内容、元数据等条件,执行向量检索操作,快速定位目标数据。具备亚秒级检索性能,单次检索请求最多返回 30 个检索结果,召回率为 90% 左右。

控制台

当前使用控制台进行向量数据查询仅支持单行向量相似检索,多次循环检索请使用 API 或 SDK。

  1. 向量Bucket页面,单击已创建的向量Bucket。

  2. 在刚创建的索引行,单击查看数据,单击向量数据查询

  3. 配置检索参数:

    • 向量数据:输入查询向量数据,格式与上传时相同,如:0.15, 0.25, 0.35, 0.45, 0.55

    • 可过滤元数据:通过元数据进行结果过滤,如类别、时间范围等。

    • TopK(返回数量):设置返回最相似结果的数量,范围 1~30

    • 返回相似距离:选择是否返回相似度距离值

    • 返回元数据:选择是否返回向量的元数据信息

  4. 单击确定执行查询

系统返回按相似度排序的向量列表。

SDK

Python

import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

parser = argparse.ArgumentParser(description="vector query vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)

def main():
    args = parser.parse_args()

    # Loading credentials values from the environment variables
    credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()

    # Using the SDK's default configuration
    cfg = oss.config.load_default()
    cfg.credentials_provider = credentials_provider
    cfg.region = args.region
    cfg.account_id = args.account_id
    if args.endpoint is not None:
        cfg.endpoint = args.endpoint

    vector_client = oss_vectors.Client(cfg)

    query_filter = {
        "$and": [{
            "type": {
                "$nin": ["comedy", "documentary"]
            }
        }]
    }

    query_vector = {"float32": [0.1] * 128}

    result = vector_client.query_vectors(oss_vectors.models.QueryVectorsRequest(
        bucket=args.bucket,
        index_name=args.index_name,
        filter=query_filter,
        query_vector=query_vector,
        return_distance=True,
        return_metadata=True,
        top_k=10
    ))

    print(f'status code: {result.status_code},'
          f' request id: {result.request_id},'
          )

    if result.vectors:
        for vector in result.vectors:
            print(f'vector: {vector}')


if __name__ == "__main__":
    main()

Go

package main

import (
	"context"
	"flag"
	"log"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)

var (
	region     string
	bucketName string
	accountId  string
	indexName  string
)

func init() {
	flag.StringVar(&region, "region", "", "The region in which the vector bucket is located.")
	flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
	flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
	flag.StringVar(&indexName, "index", "", "The name of vector index.")
}

func main() {
	flag.Parse()
	if len(bucketName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, bucket name required")
	}

	if len(region) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, region required")
	}

	if len(accountId) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, accounId required")
	}

	if len(indexName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, index required")
	}

	cfg := oss.LoadDefaultConfig().
		WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
		WithRegion(region).WithAccountId(accountId)

	client := vectors.NewVectorsClient(cfg)

	request := &vectors.QueryVectorsRequest{
		Bucket:    oss.Ptr(bucketName),
		IndexName: oss.Ptr(indexName),
		Filter: map[string]any{
			"$and": []map[string]any{
				{
					"type": map[string]any{
						"$in": []string{"comedy", "documentary"},
					},
				},
			},
		},
		QueryVector: map[string]any{
			"float32": []float32{float32(32)},
		},
		ReturnMetadata: oss.Ptr(true),
		ReturnDistance: oss.Ptr(true),
		TopK:           oss.Ptr(10),
	}
	result, err := client.QueryVectors(context.TODO(), request)
	if err != nil {
		log.Fatalf("failed to query vectors %v", err)
	}
	log.Printf("query vectors result:%#v\n", result)
}

ossutil

在名为examplebucket的向量Bucket中名为 index 的向量索引里,typecomedydocumentary的向量中与查询向量最相似的前 10 条。

ossutil vectors-api query-vectors --bucket examplebucket --index-name index --filter "{\"$and\":[{\"type\":{\"$in\":[\"comedy\",\"documentary\"]}}]}" --query-vector "{\"float32\":[32]}" --top-k 10

API

调用QueryVectors接口以进行向量相似性检索。

获取向量数据

控制台

向量Bucket页面,单击已创建的向量Bucket,进入索引列表页面,单击索引名称,可以进入向量数据页面查看向量信息。

SDK

Python

import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

parser = argparse.ArgumentParser(description="vector get vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)

def main():
    args = parser.parse_args()

    # Loading credentials values from the environment variables
    credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()

    # Using the SDK's default configuration
    cfg = oss.config.load_default()
    cfg.credentials_provider = credentials_provider
    cfg.region = args.region
    cfg.account_id = args.account_id
    if args.endpoint is not None:
        cfg.endpoint = args.endpoint

    vector_client = oss_vectors.Client(cfg)

    keys = ['key1', 'key2']

    result = vector_client.get_vectors(oss_vectors.models.GetVectorsRequest(
        bucket=args.bucket,
        index_name=args.index_name,
        keys=keys,
        return_data=True,
        return_metadata=True
    ))

    print(f'status code: {result.status_code},'
          f' request id: {result.request_id},'
          )

    if result.vectors:
        for vector in result.vectors:
            print(f'vector id: {vector}')


if __name__ == "__main__":
    main()

Go

package main

import (
	"context"
	"flag"
	"log"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)

var (
	region     string
	bucketName string
	accountId  string
)

func init() {
	flag.StringVar(&region, "region", "", "The region in which the vector bucket is located.")
	flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
	flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}

func main() {
	flag.Parse()
	if len(bucketName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, bucket name required")
	}

	if len(region) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, region required")
	}

	if len(accountId) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, accounId required")
	}

	cfg := oss.LoadDefaultConfig().
		WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
		WithRegion(region).WithAccountId(accountId)

	client := vectors.NewVectorsClient(cfg)

	request := &vectors.GetVectorsRequest{
		Bucket:         oss.Ptr(bucketName),
		IndexName:      oss.Ptr("index"),
		Keys:           []string{"key1", "key2", "key3"},
		ReturnData:     oss.Ptr(true),
		ReturnMetadata: oss.Ptr(false),
	}
	result, err := client.GetVectors(context.TODO(), request)
	if err != nil {
		log.Fatalf("failed to get vectors %v", err)
	}
	log.Printf("get vectors result:%#v\n", result)
}

ossutil

获取名为 examplebucket 的向量Bucket中索引名为index,主键为keykey1的向量属性。

ossutil vectors-api get-vectors --bucket examplebucket --index-name index --keys key,key1

API

调用GetVectors接口以获取指定的向量数据。

列举向量数据

控制台

向量Bucket页面,单击已创建的向量Bucket,进入索引列表页面,单击索引名称。

SDK

Python

import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

parser = argparse.ArgumentParser(description="list vectors sample")

parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--account_id', help='The account id.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--iindex_name', help='The name of the vector index.', required=True)

def main():

    args = parser.parse_args()

    # Loading credentials values from the environment variables
    credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()

    # Using the SDK's default configuration
    cfg = oss.config.load_default()
    cfg.credentials_provider = credentials_provider
    cfg.region = args.region
    cfg.account_id = args.account_id
    if args.endpoint is not None:
        cfg.endpoint = args.endpoint

    client = oss_vectors.Client(cfg)

    # Create the Paginator for the ListVectors operation
    paginator = client.list_vectors_paginator()

    # Create request with bucket and index name
    request = oss_vectors.models.ListVectorsRequest(
        bucket=args.bucket,
        index_name=args.index_name
    )

    # Iterate through the vectors pages
    for page in paginator.iter_page(request):
        for o in page.vectors:
            print(f'Vector: {o}')

if __name__ == "__main__":
    main()

Go

package main

import (
	"context"
	"flag"
	"log"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)

var (
	region     string
	bucketName string
	accountId  string
	indexName  string
)

func init() {
	flag.StringVar(&region, "region", "", "The region in which the vector bucket is located.")
	flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
	flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
	flag.StringVar(&indexName, "index", "", "The name of vector index.")
}

func main() {
	flag.Parse()
	if len(region) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, region required")
	}

	if len(bucketName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, bucket name required")
	}

	if len(accountId) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, accounId required")
	}

	if len(indexName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, index required")
	}

	cfg := oss.LoadDefaultConfig().
		WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
		WithRegion(region).WithAccountId(accountId)

	client := vectors.NewVectorsClient(cfg)

	request := &vectors.ListVectorsRequest{
		Bucket:         oss.Ptr(bucketName),
		IndexName:      oss.Ptr(indexName),
		ReturnMetadata: oss.Ptr(true),
		ReturnData:     oss.Ptr(false),
	}

	p := client.NewListVectorsPaginator(request)

	var i int
	log.Println("Vectors:")
	for p.HasNext() {
		i++

		page, err := p.NextPage(context.TODO())
		if err != nil {
			log.Fatalf("failed to get page %v, %v", i, err)
		}

		for _, v := range page.Vectors {
			log.Printf("vector:%v\n", v)
		}
	}
}

ossutil

列举名为 examplebucket 的向量Bucket中索引名为index下的所有向量。

ossutil vectors-api list-vectors --bucket examplebucket --index-name index

API

调用ListVectors接口以列举向量索引中的所有向量数据。

删除向量数据

支持批量删除向量数据。删除操作不可逆,请谨慎操作,确保已备份重要数据。

控制台

向量Bucket页面,单击已创建的向量Bucket,进入索引列表页面,单击索引名称,进入向量数据页面查看向量信息,选择需要删除的向量数据完成删除操作。

SDK

Python

import argparse
import alibabacloud_oss_v2 as oss
import alibabacloud_oss_v2.vectors as oss_vectors

parser = argparse.ArgumentParser(description="vector delete vectors sample")
parser.add_argument('--region', help='The region in which the bucket is located.', required=True)
parser.add_argument('--bucket', help='The name of the bucket.', required=True)
parser.add_argument('--endpoint', help='The domain names that other services can use to access OSS')
parser.add_argument('--index_name', help='The name of the vector index.', required=True)
parser.add_argument('--account_id', help='The account id.', required=True)

def main():
    args = parser.parse_args()

    # Loading credentials values from the environment variables
    credentials_provider = oss.credentials.EnvironmentVariableCredentialsProvider()

    # Using the SDK's default configuration
    cfg = oss.config.load_default()
    cfg.credentials_provider = credentials_provider
    cfg.region = args.region
    cfg.account_id = args.account_id
    if args.endpoint is not None:
        cfg.endpoint = args.endpoint

    vector_client = oss_vectors.Client(cfg)

    keys = ['key1', 'key2', 'key3']

    result = vector_client.delete_vectors(oss_vectors.models.DeleteVectorsRequest(
        bucket=args.bucket,
        index_name=args.index_name,
        keys=keys,
    ))

    print(f'status code: {result.status_code},'
          f' request id: {result.request_id},'
    )

if __name__ == "__main__":
    main()

Go

package main

import (
	"context"
	"flag"
	"log"

	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/credentials"
	"github.com/aliyun/alibabacloud-oss-go-sdk-v2/oss/vectors"
)

var (
	region     string
	bucketName string
	accountId  string
)

func init() {
	// 定义命令行参数
	flag.StringVar(&region, "region", "", "The region in which the vector bucket is located.")
	flag.StringVar(&bucketName, "bucket", "", "The name of the vector bucket.")
	flag.StringVar(&accountId, "account-id", "", "The id of vector account.")
}

func main() {
	// 解析命令行参数
	flag.Parse()
	
	// 验证必需参数
	if len(bucketName) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, bucket name required")
	}

	if len(region) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, region required")
	}

	if len(accountId) == 0 {
		flag.PrintDefaults()
		log.Fatalf("invalid parameters, accountId required")
	}

	// 创建配置,设置凭证提供者、区域和账号ID
	cfg := oss.LoadDefaultConfig().
		WithCredentialsProvider(credentials.NewEnvironmentVariableCredentialsProvider()).
		WithRegion(region).
		WithAccountId(accountId)

	// 创建向量存储客户端
	client := vectors.NewVectorsClient(cfg)

	// 构造删除向量数据的请求
	request := &vectors.DeleteVectorsRequest{
		Bucket:    oss.Ptr(bucketName),
		IndexName: oss.Ptr("index"),
		Keys: [ ]string{

			"key1", "key2",                          // 要删除的向量键列表
		},
	}
	
	// 执行删除向量数据的操作
	result, err := client.DeleteVectors(context.TODO(), request)
	if err != nil {
		log.Fatalf("failed to delete vectors %v", err)
	}

	// 输出操作结果
	log.Printf("delete vectors result:%#v\n", result)
}

ossutil

删除向量存储空间examplebucket中向量名字为index,向量主键为key、key1的向量。

ossutil vectors-api delete-vectors --bucket examplebucket --index-name index --keys key,key1

API

调用DeleteVectors接口以删除向量数据。