文档

DashVector数据类型定义

更新时间:

本文介绍向量检索服务DashVector的数据类型定义。

Doc

@dataclass(frozen=True)
class Doc(object):
    id: str                                            # 主键
    vector: Union[List[int], List[float], np.ndarray]  # 向量数据
    sparse_vector: Optional[Dict[int, float]] = None   # 稀疏向量数据
    fields: Optional[FieldDataType] = None             # Doc自定义字段
    score: float = 0.0                                 # 向量相似度
@Data
@Builder
public class Doc {
  // 主键
  @NonNull private String id;
  // 向量数据
  @NonNull private Vector vector;
  // 稀疏向量数据
  private TreeMap<Integer, Float> sparseVector;
  // 文档自定义字段
  @Builder.Default private Map<String, Object> fields = new HashMap<>();
  // 向量相似度
  private float score;

  public void addField(String key, String value) {
    this.fields.put(key, value);
  }

  public void addField(String key, Integer value) {
    this.fields.put(key, value);
  }

  public void addField(String key, Float value) {
    this.fields.put(key, value);
  }

  public void addField(String key, Boolean value) {
    this.fields.put(key, value);
  }
}

DocOpResult

@dataclass(frozen=True)
class DocOpResult(object):
    doc_op: DocOp
    id: str
    code: int
    message: str
@Getter
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class DocOpResult implements Serializable {
  @JsonProperty("doc_op")
  private com.aliyun.dashvector.proto.DocOpResult.DocOp docOp;

  private String id;
  private int code;
  private String message;

  public DocOpResult(com.aliyun.dashvector.proto.DocOpResult docOpResult) {
    this.docOp = docOpResult.getDocOp();
    this.id = docOpResult.getId();
    this.code = docOpResult.getCode();
    this.message = docOpResult.getMessage();
  }

}

CollectionMeta

@dataclass(frozen=True)
class CollectionMeta(object):
    name: str                      # Collection名称
    dimension: int                 # 向量维度
    dtype: str                     # 向量数据类型,FLOAT/INT
    metric: str                    # 距离度量方式,euclidean/dotproduct/cosine
    status: Status                 # Collection状态
    fields: Dict[str, str]         # Collection Fields定义,字典value可选值: FLOAT/BOOL/INT/STRING
    partitions: Dict[str, Status]  # Collection 分区信息
@Getter
public class CollectionMeta {
  // Collection名称
  private final String name;
  // 向量维度
  private final int dimension;
  // 向量数据类型,FLOAT/INT
  private final CollectionInfo.DataType dataType;
  // 距离度量方式,euclidean/dotproduct/cosine
  private final CollectionInfo.Metric metric;
  // Collection状态
  private final String status;
  // Collection Fields定义,字典value可选值: FLOAT/BOOL/INT/STRIN
  private final Map<String, FieldType> fieldsSchema;
  // Collection 分区信息
  private final Map<String, Status> partitionStatus;

  public CollectionMeta(CollectionInfo collectionInfo) {
    this.name = collectionInfo.getName();
    this.dimension = collectionInfo.getDimension();
    this.dataType = collectionInfo.getDtype();
    this.metric = collectionInfo.getMetric();
    this.status = collectionInfo.getStatus().name();
    this.fieldsSchema = collectionInfo.getFieldsSchemaMap();
    this.partitionStatus = collectionInfo.getPartitionsMap();
  }
}

CollectionStats

@dataclass(frozen=True)
class CollectionStats(object):
    total_doc_count: int                    # Collection 插入数据总量
    index_completeness: float               # Collection 插入数据完成度
    partitions: Dict[str, PartitionStats]   # Collection 分区信息
@Getter
public class CollectionStats {
  // Collection 插入数据总数
  private final long totalDocCount;
  // Collection 插入数据完成度
  private final float indexCompleteness;
  // Collection 分区信息
  private final Map<String, PartitionStats> partitions;

  public CollectionStats(StatsCollectionResponse.CollectionStats collectionStats) {
    this.totalDocCount = collectionStats.getTotalDocCount();
    this.indexCompleteness = collectionStats.getIndexCompleteness();
    this.partitions = new HashMap<>();
    collectionStats
        .getPartitionsMap()
        .forEach((key, value) -> this.partitions.put(key, new PartitionStats(value)));
  }
}

PartitionStats

@dataclass(frozen=True)
class PartitionStats(object):
    total_doc_count: int                    # Partition 分区内数据总量
@Getter
public class PartitionStats {
  // Partition 分区内数据总量
  private final long totalDocCount;

  public PartitionStats(com.aliyun.dashvector.proto.PartitionStats partitionStats) {
    this.totalDocCount = partitionStats.getTotalDocCount();
  }
}

Status

class Status(IntEnum):
    INITIALIZED = 0                        # Collection/Partition 创建中
    SERVING = 1                            # Collection/Partition 服务中
    DROPPING = 2                           # Collection/Partition 删除中
    ERROR = 3                              # Collection/Partition 状态异常

Group

@dataclass(frozen=True)
class Group(object):
    group_id: str                         # 分组标识
    docs: List[Doc]                       # 分组下的文档列表
@Getter
@Builder
public class Group {
    // 分组标识
    @NonNull private String groupId;
    // 分组下的文档列表
    @Singular private List<Doc> docs;
}

RequestUsage

# read_units 和 write_units 是 oneof 关系
class RequestUsage(object):
    read_units: int                        # 读请求单元数
    write_units: int                       # 写请求单元数
@Data
@Builder
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public class RequestUsage {
    // 读请求单元数
    private int readUnits;
    // 写请求单元数
    private int writeUnits;
}

其他

FieldDataType = Dict[str, Union[Type[str], Type[int], Type[float], Type[bool]]]