文档

Java开发手册

更新时间:

向量检索是面向非结构化向量数据的检索功能,可以帮助您快速查找相似数据。如果您更熟悉基于Java语言的应用开发,可以根据自己的使用习惯选择合适的客户端,并参考本文提供的相关操作来实现不同的向量检索功能。

前提条件

  • 实例的服务类型为Lindorm_V2,且已开通向量引擎和搜索引擎。如何查看服务类型,请参见产品系列

  • 已安装Java环境,要求安装JDK 1.8及以上版本。

  • 已在pom.xml文件中添加相关依赖。具体依赖项请参见完整示例

创建向量索引

使用向量检索功能,要求索引的mappings中必须包含一个或多个向量类型字段,且所有向量字段必须显式定义。

以下示例创建一个索引,其中vector1为向量类型字段、field1为普通类型字段。

Java Client

CreateIndexRequest createIndexRequest = CreateIndexRequest.of(request -> request
  .index("vector_test")
  .settings(settings -> settings
    .index(index -> index
      .numberOfShards("4")
      .knn(true)
    )
  )
  .mappings(mappings -> mappings
    .properties("field1", field1 -> field1
      .long_(f -> f)
    )
    .properties("vector1", vector1 -> vector1
      .knnVector(knnVector -> knnVector
        .dimension(3)
        .method(method -> method
          .name("hnsw")
          .spaceType("l2")
          .engine("lvector")
          .parameters("ef_construction", JsonData.of(128))
          .parameters("m", JsonData.of(24))
        )
      )
    )
    .source(source -> source
      .excludes("vector1")
    )
  )
);
CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest);

Java High Level Rest Client

CreateIndexRequest createIndexRequest = new CreateIndexRequest("vector_test");
Map<String, Object> mappings = new HashMap<>();
{
  Map<String, Object> properties = new HashMap<>();
  Map<String, Object> field1 = new HashMap<>();
  field1.put("type", "long");
  properties.put("field1", field1);
  Map<String, Object> vector1 = new HashMap<>();
  vector1.put("type", "knn_vector");
  vector1.put("dimension", 3);
  Map<String, Object> method = new HashMap<>();
  method.put("name", "hnsw");
  method.put("space_type", "l2");
  method.put("engine", "lvector");
  Map<String, Object> parameters = new HashMap<>();
  parameters.put("m", 24);
  parameters.put("ef_construction", 128);
  method.put("parameters", parameters);
  vector1.put("method", method);
  properties.put("vector1", vector1);
  mappings.put("properties", properties);

  Map<String, Object> source = new HashMap<>();
  source.put("excludes", Collections.singletonList("vector1"));
  mappings.put("_source", source);
}
createIndexRequest.mapping(mappings);
Map<String, Object> settings = new HashMap<>();
{
  Map<String, Object> index = new HashMap<>();
  index.put("knn",true);
  index.put("number_of_shards", 4);
  settings.put("index", index);
}
createIndexRequest.settings(settings);
CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT);

向量列参数的详细介绍,请参见向量列参数说明

数据写入

向量索引的数据写入方式与普通索引的数据写入方式一致。向量字段的数据以数组的形式写入。

单条写入

Java Client

Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{1.2f,1.3f,1.4f});

IndexRequest<JsonData> indexRequest = new IndexRequest.Builder<JsonData>()
  .index("vector_test")
  .id("1")
  .document(JsonData.of(fieldMap))
  .build();
IndexResponse response = client.index(indexRequest);

Java High Level Rest Client

Map<String, Object> fieldMap = new HashMap<>();
fieldMap.put("field1", 1);
fieldMap.put("vector1", new float[]{1.2f,1.3f,1.4f});
IndexRequest indexRequest = new IndexRequest("vector_test");
indexRequest.id("1");
indexRequest.source(fieldMap);
IndexResponse indexResponse = client.index(indexRequest, RequestOptions.DEFAULT);

批量写入

Java Client

JavaBulkRequest.Builder bulkRequest = new BulkRequest.Builder();
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 1);
  fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
  bulkRequest.operations(operations -> operations
    .index(index -> index
      .index(indexName)
      .id("2")
      .document(fieldMap)
    )
  );
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 1);
  fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
  bulkRequest.operations(operations -> operations
    .index(index -> index
      .index(indexName)
      .id("2")
      .document(fieldMap)
    )
  );
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 2);
  fieldMap.put("vector1", new float[]{1.2f,1.3f,4.4f});
  bulkRequest.operations(operations -> operations
    .index(index -> index
      .index(indexName)
      .id("3")
      .document(fieldMap)
    )
  );
}
{
  bulkRequest.operations(operations -> operations
    .delete(delete -> delete
      .index(indexName)
      .id("2")
    )
  );
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 3);
  fieldMap.put("vector1", new float[]{2.2f,3.3f,4.4f});
  bulkRequest.operations(operations -> operations
    .update(update -> update
      .index(indexName)
      .id("1")
      .document(fieldMap)
    )
  );
}
bulkRequest.refresh(Refresh.True);
BulkResponse bulkResponse = client.bulk(bulkRequest.build());

Java High Level Rest Client

BulkRequest bulkRequest = new BulkRequest();
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 1);
  fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
  IndexRequest indexRequest = new IndexRequest("vector_test");
  indexRequest.id("1");
  indexRequest.source(fieldMap);
  bulkRequest.add(indexRequest);
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 1);
  fieldMap.put("vector1", new float[]{2.2f,2.3f,2.4f});
  IndexRequest indexRequest = new IndexRequest("vector_test");
  indexRequest.id("2");
  indexRequest.source(fieldMap);
  bulkRequest.add(indexRequest);
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 2);
  fieldMap.put("vector1", new float[]{1.2f,1.3f,4.4f});
  IndexRequest indexRequest = new IndexRequest("vector_test");
  indexRequest.id("3");
  indexRequest.source(fieldMap);
  bulkRequest.add(indexRequest);
}
{
  DeleteRequest deleteRequest = new DeleteRequest("vector_test", "2");
  bulkRequest.add(deleteRequest);
}
{
  Map<String, Object> fieldMap = new HashMap<>();
  fieldMap.put("field1", 3);
  fieldMap.put("vector1", new float[]{2.2f,3.3f,4.4f});
  UpdateRequest updateRequest = new UpdateRequest();
  updateRequest.index("vector_test");
  updateRequest.id("1");
  updateRequest.doc(fieldMap);
  bulkRequest.add(updateRequest);
}
BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);

数据查询

查询向量数据时需要在查询请求中加入knn结构,并通过ext结构提供相关查询参数。knnext结构细节及其参数说明,请参见参数说明

  • 使用Java Client查询时,可直接使用knnext结构。

  • 使用Java High Level REST Client查询数据时,可通过wrapper查询传递knn结构,通过SearchExtBuilder父类传递ext结构。

    单击展开ext结构-静态类构造示例

    以下示例基于SearchExtBuilder父类构造了LVectorExtBuilder子类。在后续的查询中,可通过LVectorExtBuilder类传递ext结构的信息。

    public static class LVectorExtBuilder extends SearchExtBuilder {
    
      final Map<String, String> searchParams;
      protected final String name;
    
      public LVectorExtBuilder(String name, Map<String, String> searchParams) {
        this.name = name;
        this.searchParams = searchParams;
      }
    
      @Override
      public void writeTo(StreamOutput out) throws IOException {
        out.writeMap(searchParams, StreamOutput::writeString, StreamOutput::writeString);
      }
    
      @Override
      public boolean equals(Object o) {
        if (this == o) {
          return true;
        }
        if (o == null || getClass() != o.getClass()) {
          return false;
        }
        LVectorExtBuilder that = (LVectorExtBuilder) o;
        return Objects.equals(searchParams, that.searchParams) && Objects.equals(name, that.name);
      }
    
      @Override
      public int hashCode() {
        return Objects.hash(searchParams, name);
      }
    
      @Override
      public String getWriteableName() {
        return name;
      }
    
      @Override
      public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
        builder.startObject(name);
        for (Map.Entry<String, String> searchParam : searchParams.entrySet()) {
          builder.field(searchParam.getKey(), searchParam.getValue());
        }
        builder.endObject();
        return builder;
      }
    }

纯向量数据查询

只查询向量字段的数据,可直接使用knn结构的基本形式。以下示例中查询vector1字段中与向量[2.3, 3.3, 4.4]相关的前10条数据,并要求最小得分为0.8。

Java Client

Map<String, Object> ext = new HashMap<>();
ext.put("min_score", "0.8");

SearchResponse<JsonData> searchResponse = client.search(request -> request
  .index("vector_test")
  .query(query -> query
    .knn(knn -> knn
      .field("vector1")
      .vector(2.3f, 3.3f, 4.4f)
      .k(10)
    )
  )
  .ext("lvector", JsonData.of(ext))
  , JsonData.class
);

Java High Level Rest Client

SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
  Map<String, Object> knn = new HashMap<>();
  Map<String, Object> vector1 = new HashMap<>();
  vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
  vector1.put("k", 10);
  knn.put("vector1", vector1);
  queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("min_score", "0.8");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

融合查询

向量数据的查询可与普通数据的查询条件结合,并返回综合的查询结果。

Pre-Filter近似查询

knn查询结构内添加filter结构,并指定filter_type参数为pre_filter,可实现先过滤普通数据,再查询向量数据。

Java Client

Map<String, Object> ext = new HashMap<>();
ext.put("filter_type", "pre_filter");
SearchResponse<JsonData> searchResponse = client.search(request -> request
  .index(indexName)
  .query(query -> query
    .knn(knn -> knn
      .field("vector1")
      .vector(2.3f, 3.3f, 4.4f)
      .k(10)
      .filter(Query.of(filter -> filter
        .range(range-> range
          .field("field1")
          .gte(JsonData.of(0))
        )
      ))
    )
  )
  .ext("lvector", JsonData.of(ext))
  , JsonData.class
);

Java High Level Rest Client

SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
  Map<String, Object> knn = new HashMap<>();
  Map<String, Object> vector1 = new HashMap<>();
  vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
  vector1.put("k", 10);
  Map<String, Object> filter = new HashMap<>();
  Map<String, Object> range = new HashMap<>();
  Map<String, Object> field1 = new HashMap<>();
  field1.put("gte", 0);
  range.put("field1", field1);
  filter.put("range", range);
  vector1.put("filter", filter);
  knn.put("vector1", vector1);
  queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("filter_type", "pre_filter");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

Post-Filter近似查询

knn查询结构内添加filter结构,并指定filter_type参数为post_filter,可实现先查询向量数据,再过滤普通数据。

Java Client

Map<String, Object> ext = new HashMap<>();
ext.put("filter_type", "post_filter");

SearchResponse<JsonData> searchResponse = client.search(request -> request
  .index(indexName)
  .query(query -> query
    .knn(knn -> knn
      .field("vector1")
      .vector(2.3f, 3.3f, 4.4f)
      .k(10)
      .filter(Query.of(filter -> filter
        .range(range-> range
          .field("field1")
          .gte(JsonData.of(0))
        )
      ))
    )
  )
  .ext("lvector", JsonData.of(ext))
  , JsonData.class
);

Java High Level Rest Client

SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
{
  Map<String, Object> knn = new HashMap<>();
  Map<String, Object> vector1 = new HashMap<>();
  vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
  vector1.put("k", 10);
  Map<String, Object> filter = new HashMap<>();
  Map<String, Object> range = new HashMap<>();
  Map<String, Object> field1 = new HashMap<>();
  field1.put("gte", 0);
  range.put("field1", field1);
  filter.put("range", range);
  vector1.put("filter", filter);
  knn.put("vector1", vector1);
  queryBody.put("knn", knn);
}
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
Map<String, String> ext = new HashMap<>();
ext.put("filter_type", "post_filter");
searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

您也可以通过Post Filter结构添加过滤条件,实现Post-Filter近似查询。

Java Client

SearchResponse<JsonData> searchResponse = client.search(request -> request
  .index(indexName)
  .query(query -> query
    .knn(knn -> knn
      .field("vector1")
      .vector(2.3f, 3.3f, 4.4f)
      .k(10)
    )
  )
  .postFilter(filter -> filter
    .range(range-> range
      .field("field1")
      .gte(JsonData.of(0))
    )
  )
  , JsonData.class
);

Java High Level Rest Client

SearchRequest searchRequest = new SearchRequest();
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
Map<String, Object> queryBody = new HashMap<>();
Map<String, Object> knn = new HashMap<>();
Map<String, Object> vector1 = new HashMap<>();
vector1.put("vector", new float[]{2.2f,3.3f,4.4f});
vector1.put("k", 10);
knn.put("vector1", vector1);
queryBody.put("knn", knn);
searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
searchSourceBuilder.postFilter(QueryBuilders.rangeQuery("field1").gte(0));
searchRequest.source(searchSourceBuilder);
searchRequest.indices("vector_test");
SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);

删除向量索引

向量索引的删除方式与普通索引的删除方式一致。

Java Client

DeleteIndexRequest deleteIndexRequest = DeleteIndexRequest.of(request -> request
  .index("vector_test")
);
DeleteIndexResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest);

Java High Level Rest Client

DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName);
AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);

完整示例

Java Client

  1. 添加相关依赖。以Maven项目为例,在pom.xml文件的dependencies中添加依赖项。示例代码如下:

    <dependency>
        <groupId>org.opensearch.client</groupId>
        <artifactId>opensearch-java</artifactId>
        <version>2.10.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.httpcomponents.client5</groupId>
        <artifactId>httpclient5</artifactId>
        <version>5.2.1</version>
    </dependency>
    <dependency>
        <groupId>org.apache.httpcomponents.core5</groupId>
        <artifactId>httpcore5</artifactId>
        <version>5.2.1</version>
    </dependency>
    <!--当您仅使用API时,以下代码可省略-->
    <dependency>
        <groupId>com.google.code.gson</groupId>
        <artifactId>gson</artifactId>
        <version>2.10.1</version>
    </dependency>
  2. 编写完整代码。其中Lindorm搜索引擎的Elasticsearch兼容地址、默认用户名和默认密码的获取方式,请参见查看连接信息

    import org.apache.hc.client5.http.auth.AuthScope;
    import org.apache.hc.client5.http.auth.UsernamePasswordCredentials;
    import org.apache.hc.client5.http.impl.auth.BasicCredentialsProvider;
    import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManager;
    import org.apache.hc.client5.http.impl.nio.PoolingAsyncClientConnectionManagerBuilder;
    import org.apache.hc.core5.http.HttpHost;
    import org.opensearch.client.json.JsonData;
    import org.opensearch.client.json.jackson.JacksonJsonpMapper;
    import org.opensearch.client.opensearch.OpenSearchClient;
    import org.opensearch.client.opensearch._types.Refresh;
    import org.opensearch.client.opensearch._types.query_dsl.Query;
    import org.opensearch.client.opensearch.core.BulkRequest;
    import org.opensearch.client.opensearch.core.BulkResponse;
    import org.opensearch.client.opensearch.core.SearchResponse;
    import org.opensearch.client.opensearch.core.search.Hit;
    import org.opensearch.client.opensearch.indices.CreateIndexRequest;
    import org.opensearch.client.opensearch.indices.CreateIndexResponse;
    import org.opensearch.client.opensearch.indices.DeleteIndexRequest;
    import org.opensearch.client.opensearch.indices.DeleteIndexResponse;
    import org.opensearch.client.transport.httpclient5.ApacheHttpClient5Transport;
    import org.opensearch.client.transport.httpclient5.ApacheHttpClient5TransportBuilder;
    
    import java.io.Closeable;
    import java.io.IOException;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Random;
    
    public class LVectorDemo implements Closeable {
      private OpenSearchClient client;
      private ApacheHttpClient5Transport transport;
      private Random random;
    
      public static void main(String[] args) throws IOException {
        String indexName = "vector_test";
        float[] vector = new float[]{1f,1f,1f,1f,1f};
        try (LVectorDemo lVectorDemo = new LVectorDemo()) {
          lVectorDemo.createIndex(indexName);
          lVectorDemo.writeDocs(indexName);
          lVectorDemo.queryVector(indexName, vector);
          lVectorDemo.queryVectorWithPreFilter(indexName, vector);
          lVectorDemo.queryVectorWithPostFilterType1(indexName, vector);
          lVectorDemo.queryVectorWithPostFilterType2(indexName, vector);
          lVectorDemo.deleteIndex(indexName);
        }
      }
    //请填写Lindorm搜索引擎的Elasticsearch兼容地址
      public LVectorDemo() {
        HttpHost[] hosts = new HttpHost[] {
          new HttpHost("http", "ld-bp106782jm960****-proxy-search-pub.lindorm.aliyuncs.com", 30070)
        };
    //请填写Lindorm搜索引擎的用户名和密码
        transport = ApacheHttpClient5TransportBuilder.builder(hosts)
          .setMapper(new JacksonJsonpMapper())
          .setHttpClientConfigCallback(httpClientBuilder -> {
            BasicCredentialsProvider credentialsProvider = new BasicCredentialsProvider();
            for (HttpHost host : hosts) {
              credentialsProvider.setCredentials(new AuthScope(host), new UsernamePasswordCredentials("username", "password".toCharArray()));
            }
    
            PoolingAsyncClientConnectionManager connectionManager = PoolingAsyncClientConnectionManagerBuilder.create().build();
            return httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider).setConnectionManager(connectionManager);
          })
          .setRequestConfigCallback(a -> a.setResponseTimeout(60, TimeUnit.SECONDS))
          .build();
        client = new OpenSearchClient(transport);
        random = new Random(0);
      }
    
      @Override
      public void close() throws IOException {
        transport.close();
      }
    
    //创建向量索引
      public void createIndex(String name) throws IOException {
        CreateIndexRequest createIndexRequest = CreateIndexRequest.of(request -> request
          .index(name)
          .settings(settings -> settings
            .index(index -> index
              .numberOfShards("4")
              .knn(true)
            )
          )
          .mappings(mappings -> mappings
            .properties("field1", field1 -> field1
              .long_(f -> f)
            )
            .properties("vector1", vector1 -> vector1
              .knnVector(knnVector -> knnVector
                .dimension(5)
                .method(method -> method
                  .name("ivfpq")
                  .spaceType("l2")
                  .engine("lvector")
                  .parameters("nlist", JsonData.of(10))
                  .parameters("centroids_use_hnsw", JsonData.of(true))
                  .parameters("centroids_hnsw_m", JsonData.of(32))
                  .parameters("centroids_hnsw_ef_construct", JsonData.of(200))
                  .parameters("centroids_hnsw_ef_search", JsonData.of(200))
                )
              )
            )
          )
        );
        CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest);
      }
    
    //数据写入
      public void writeDocs(String indexName) throws IOException {
        BulkRequest.Builder bulkRequest = new BulkRequest.Builder();
        for (int i=0; i<1000; i++) {
          String id = String.valueOf(random.nextLong());
          Map<String, Object> fieldMap = new HashMap<>();
          fieldMap.put("field1", random.nextLong());
          float[] vector1 = new float[5];
          for (int j=0; j<vector1.length; j++) {
            vector1[j] = random.nextFloat();
          }
          fieldMap.put("vector1", vector1);
          bulkRequest.operations(operations -> operations
            .index(index -> index
              .index(indexName)
              .id(id)
              .document(fieldMap)
            )
          );
        }
        bulkRequest.refresh(Refresh.True);
        BulkResponse bulkResponse = client.bulk(bulkRequest.build());
      }
    
    //纯向量数据查询
      public void queryVector(String indexName, float[] vector) throws IOException {
        Map<String, Object> ext = new HashMap<>();
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        SearchResponse<JsonData> searchResponse = client.search(request -> request
            .index(indexName)
            .query(query -> query
              .knn(knn -> knn
                .field("vector1")
                .vector(vector)
                .k(10)
              )
            )
            .ext("lvector", JsonData.of(ext))
            , JsonData.class
        );
        printResponse(searchResponse);
      }
    //Pre-Filter近似查询
      public void queryVectorWithPreFilter(String indexName, float[] vector) throws IOException {
        Map<String, Object> ext = new HashMap<>();
        ext.put("filter_type", "pre_filter");
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        SearchResponse<JsonData> searchResponse = client.search(request -> request
          .index(indexName)
          .query(query -> query
            .knn(knn -> knn
              .field("vector1")
              .vector(vector)
              .k(10)
              .filter(Query.of(filter -> filter
                .range(range-> range
                  .field("field1")
                  .gte(JsonData.of(0))
                )
              ))
            )
          )
          .ext("lvector", JsonData.of(ext))
          , JsonData.class
        );
        printResponse(searchResponse);
      }
    
    //Post-Filter近似查询
      public void queryVectorWithPostFilterType1(String indexName, float[] vector) throws IOException {
        Map<String, Object> ext = new HashMap<>();
        ext.put("filter_type", "post_filter");
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        SearchResponse<JsonData> searchResponse = client.search(request -> request
            .index(indexName)
            .query(query -> query
              .knn(knn -> knn
                .field("vector1")
                .vector(vector)
                .k(10)
                .filter(Query.of(filter -> filter
                  .range(range-> range
                    .field("field1")
                    .gte(JsonData.of(0))
                  )
                ))
              )
            )
            .ext("lvector", JsonData.of(ext))
            , JsonData.class
        );
        printResponse(searchResponse);
      }
    
    //在Post Filter结构中添加过滤条件
      public void queryVectorWithPostFilterType2(String indexName, float[] vector) throws IOException {
        Map<String, Object> ext = new HashMap<>();
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        SearchResponse<JsonData> searchResponse = client.search(request -> request
            .index(indexName)
            .query(query -> query
              .knn(knn -> knn
                .field("vector1")
                .vector(vector)
                .k(10)
              )
            )
            .postFilter(filter -> filter
              .range(range-> range
                .field("field1")
                .gte(JsonData.of(0))
              )
            )
            .ext("lvector", JsonData.of(ext))
            , JsonData.class
        );
        printResponse(searchResponse);
      }
    
    //删除向量索引
      public void deleteIndex(String indexName) throws IOException {
        DeleteIndexRequest deleteIndexRequest = DeleteIndexRequest.of(request -> request
          .index(indexName)
        );
        DeleteIndexResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest);
      }
    
    
      private static void printResponse(SearchResponse<JsonData> response) {
          StringBuilder builder = new StringBuilder();
          builder.append("Total: ");
          builder.append(response.hits().total().relation().jsonValue());
          builder.append(" ");
          builder.append(response.hits().total().value());
          builder.append(" [");
          for (Hit<JsonData> hit : response.hits().hits()) {
            builder.append(hit.id());
            builder.append(", ");
          }
          builder.append("]");
          System.out.println(builder);
      }
    }
    

Java High Level Rest Client

  1. 添加相关依赖。以Maven项目为例,在pom.xml文件的dependencies中添加依赖项。示例代码如下:

    <dependency>
        <groupId>org.elasticsearch.client</groupId>
        <artifactId>elasticsearch-rest-high-level-client</artifactId>
        <version>7.13.4</version>
    </dependency>
    <!--当您仅使用API时,以下代码可省略-->
    <dependency>
        <groupId>com.google.code.gson</groupId>
        <artifactId>gson</artifactId>
        <version>2.10.1</version>
    </dependency>
  2. 编写完整代码。其中Lindorm搜索引擎的Elasticsearch兼容地址、默认用户名和默认密码的获取方式,请参见查看连接信息

    import com.google.gson.Gson;
    import org.apache.http.HttpHost;
    import org.apache.http.auth.AuthScope;
    import org.apache.http.auth.UsernamePasswordCredentials;
    import org.apache.http.client.CredentialsProvider;
    import org.apache.http.impl.client.BasicCredentialsProvider;
    import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest;
    import org.elasticsearch.action.bulk.BulkRequest;
    import org.elasticsearch.action.bulk.BulkResponse;
    import org.elasticsearch.action.index.IndexRequest;
    import org.elasticsearch.action.search.SearchRequest;
    import org.elasticsearch.action.search.SearchResponse;
    import org.elasticsearch.action.support.WriteRequest;
    import org.elasticsearch.action.support.master.AcknowledgedResponse;
    import org.elasticsearch.client.RequestOptions;
    import org.elasticsearch.client.RestClient;
    import org.elasticsearch.client.RestClientBuilder;
    import org.elasticsearch.client.RestHighLevelClient;
    import org.elasticsearch.client.indices.CreateIndexRequest;
    import org.elasticsearch.client.indices.CreateIndexResponse;
    import org.elasticsearch.common.io.stream.StreamOutput;
    import org.elasticsearch.common.xcontent.XContentBuilder;
    import org.elasticsearch.index.query.QueryBuilders;
    import org.elasticsearch.search.SearchExtBuilder;
    import org.elasticsearch.search.builder.SearchSourceBuilder;
    
    import java.io.Closeable;
    import java.io.IOException;
    import java.util.Collections;
    import java.util.HashMap;
    import java.util.Map;
    import java.util.Objects;
    import java.util.Random;
    
    public class LVectorRestDemo implements Closeable {
      private RestHighLevelClient client;
      private Random random;
    
      public static void main(String[] args) throws IOException {
        String indexName = "vector_test";
        float[] vector = new float[]{1f,1f,1f,1f,1f};
        try (LVectorRestDemo lVectorDemo = new LVectorRestDemo()) {
          lVectorDemo.createIndex(indexName);
          lVectorDemo.writeDocs(indexName);
          lVectorDemo.queryVector(indexName, vector);
          lVectorDemo.queryVectorWithPreFilter(indexName, vector);
          lVectorDemo.queryVectorWithPostFilterType1(indexName, vector);
          lVectorDemo.queryVectorWithPostFilterType2(indexName, vector);
          lVectorDemo.deleteIndex(indexName);
        }
      }
    //请填写Lindorm搜索引擎的Elasticsearch兼容地址、用户名和密码
      public LVectorRestDemo() {
        CredentialsProvider
          credentialsProvider = new BasicCredentialsProvider();
        credentialsProvider.setCredentials(
          AuthScope.ANY, new UsernamePasswordCredentials("username", "password"));
        HttpHost[] hosts = new HttpHost[] {
          new HttpHost("ld-bp106782jm960****-proxy-search-pub.lindorm.aliyuncs.com", 30070, "http")
        };
    
        RestClientBuilder builder = RestClient.builder(hosts)
          .setHttpClientConfigCallback(httpClientBuilder -> httpClientBuilder.setDefaultCredentialsProvider(credentialsProvider))
          .setRequestConfigCallback(builder1 -> builder1.setSocketTimeout(120000));
    
        client = new RestHighLevelClient(builder);
        random = new Random(0);
      }
    
      @Override
      public void close() throws IOException {
        client.close();
      }
    
    //创建向量索引
      public void createIndex(String name) throws IOException {
        CreateIndexRequest createIndexRequest = new CreateIndexRequest(name);
        Map<String, Object> mappings = new HashMap<>();
        {
          Map<String, Object> properties = new HashMap<>();
          Map<String, Object> field1 = new HashMap<>();
          field1.put("type", "long");
          properties.put("field1", field1);
          Map<String, Object> vector1 = new HashMap<>();
          vector1.put("type", "knn_vector");
          vector1.put("dimension", 5);
          Map<String, Object> method = new HashMap<>();
          method.put("name", "ivfpq");
          method.put("space_type", "l2");
          method.put("engine", "lvector");
          Map<String, Object> parameters = new HashMap<>();
          parameters.put("nlist", 100);
          parameters.put("centroids_use_hnsw", true);
          parameters.put("centroids_hnsw_m", 32);
          parameters.put("centroids_hnsw_ef_construct", 200);
          parameters.put("centroids_hnsw_ef_search", 100);
          method.put("parameters", parameters);
          vector1.put("method", method);
          properties.put("vector1", vector1);
          mappings.put("properties", properties);
    
          Map<String, Object> source = new HashMap<>();
          source.put("excludes", Collections.singletonList("vector1"));
          mappings.put("_source", source);
        }
        createIndexRequest.mapping(mappings);
        Map<String, Object> settings = new HashMap<>();
        {
          Map<String, Object> index = new HashMap<>();
          index.put("knn",true);
          index.put("number_of_shards", 4);
          settings.put("index", index);
        }
        createIndexRequest.settings(settings);
        CreateIndexResponse createIndexResponse = client.indices().create(createIndexRequest, RequestOptions.DEFAULT);
      }
    
    //数据写入
      public void writeDocs(String indexName) throws IOException {
        BulkRequest bulkRequest = new BulkRequest();
        bulkRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
        for (int i=0; i<1000; i++) {
          String id = String.valueOf(random.nextLong());
          Map<String, Object> fieldMap = new HashMap<>();
          fieldMap.put("field1", random.nextLong());
          float[] vector1 = new float[5];
          for (int j=0; j<vector1.length; j++) {
            vector1[j] = random.nextFloat();
          }
          fieldMap.put("vector1", vector1);
          bulkRequest.add(new IndexRequest(indexName).id(id).source(fieldMap));
        }
        BulkResponse bulkResponse = client.bulk(bulkRequest, RequestOptions.DEFAULT);
      }
    
    //纯向量数据查询
      public void queryVector(String indexName, float[] vector) throws IOException {
        SearchRequest searchRequest = new SearchRequest();
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        Map<String, Object> queryBody = new HashMap<>();
        {
          Map<String, Object> knn = new HashMap<>();
          Map<String, Object> vector1 = new HashMap<>();
          vector1.put("vector", vector);
          vector1.put("k", 10);
          knn.put("vector1", vector1);
          queryBody.put("knn", knn);
        }
        searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
        Map<String, String> ext = new HashMap<>();
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
        searchRequest.source(searchSourceBuilder);
        searchRequest.indices(indexName);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(searchResponse);
      }
    
    //Pre-Filter近似查询
      public void queryVectorWithPreFilter(String indexName, float[] vector) throws IOException {
        SearchRequest searchRequest = new SearchRequest();
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        Map<String, Object> queryBody = new HashMap<>();
        {
          Map<String, Object> knn = new HashMap<>();
          Map<String, Object> vector1 = new HashMap<>();
          vector1.put("vector", vector);
          vector1.put("k", 10);
          Map<String, Object> filter = new HashMap<>();
          Map<String, Object> range = new HashMap<>();
          Map<String, Object> field1 = new HashMap<>();
          field1.put("gte", 0);
          range.put("field1", field1);
          filter.put("range", range);
          vector1.put("filter", filter);
          knn.put("vector1", vector1);
          queryBody.put("knn", knn);
        }
        searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
        Map<String, String> ext = new HashMap<>();
        ext.put("filter_type", "pre_filter");
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
        searchRequest.source(searchSourceBuilder);
        searchRequest.indices(indexName);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(searchResponse);
      }
    
    //Post-Filter近似查询
      public void queryVectorWithPostFilterType1(String indexName, float[] vector) throws IOException {
        SearchRequest searchRequest = new SearchRequest();
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        Map<String, Object> queryBody = new HashMap<>();
        {
          Map<String, Object> knn = new HashMap<>();
          Map<String, Object> vector1 = new HashMap<>();
          vector1.put("vector", vector);
          vector1.put("k", 10);
          Map<String, Object> filter = new HashMap<>();
          Map<String, Object> range = new HashMap<>();
          Map<String, Object> field1 = new HashMap<>();
          field1.put("gte", 0);
          range.put("field1", field1);
          filter.put("range", range);
          vector1.put("filter", filter);
          knn.put("vector1", vector1);
          queryBody.put("knn", knn);
        }
        searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
        Map<String, String> ext = new HashMap<>();
        ext.put("filter_type", "post_filter");
        ext.put("min_score", "0.1");
        ext.put("nprobe", "20");
        ext.put("reorder_factor", "20");
        searchSourceBuilder.ext(Collections.singletonList(new LVectorExtBuilder("lvector", ext)));
        searchRequest.source(searchSourceBuilder);
        searchRequest.indices(indexName);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(searchResponse);
      }
    
    //在Post Filter结构中添加过滤条件
      public void queryVectorWithPostFilterType2(String indexName, float[] vector) throws IOException {
        SearchRequest searchRequest = new SearchRequest();
        SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder();
        Map<String, Object> queryBody = new HashMap<>();
        Map<String, Object> knn = new HashMap<>();
        Map<String, Object> vector1 = new HashMap<>();
        vector1.put("vector", vector);
        vector1.put("k", 10);
        knn.put("vector1", vector1);
        queryBody.put("knn", knn);
        searchSourceBuilder.query(QueryBuilders.wrapperQuery(new Gson().toJson(queryBody)));
        searchSourceBuilder.postFilter(QueryBuilders.rangeQuery("field1").gte(0));
        searchRequest.source(searchSourceBuilder);
        searchRequest.indices(indexName);
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        System.out.println(searchResponse);
      }
    
      public void deleteIndex(String indexName) throws IOException {
        DeleteIndexRequest deleteIndexRequest = new DeleteIndexRequest(indexName);
        AcknowledgedResponse deleteIndexResponse = client.indices().delete(deleteIndexRequest, RequestOptions.DEFAULT);
      }
    
      public static class LVectorExtBuilder extends SearchExtBuilder {
    
        final Map<String, String> searchParams;
        protected final String name;
    
        public LVectorExtBuilder(String name, Map<String, String> searchParams) {
          this.name = name;
          this.searchParams = searchParams;
        }
    
        @Override
        public void writeTo(StreamOutput out) throws IOException {
          out.writeMap(searchParams, StreamOutput::writeString, StreamOutput::writeString);
        }
    
        @Override
        public boolean equals(Object o) {
          if (this == o) {
            return true;
          }
          if (o == null || getClass() != o.getClass()) {
            return false;
          }
          LVectorExtBuilder that = (LVectorExtBuilder) o;
          return Objects.equals(searchParams, that.searchParams) && Objects.equals(name, that.name);
        }
    
        @Override
        public int hashCode() {
          return Objects.hash(searchParams, name);
        }
    
        @Override
        public String getWriteableName() {
          return name;
        }
    
        @Override
        public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
          builder.startObject(name);
          for (Map.Entry<String, String> searchParam : searchParams.entrySet()) {
            builder.field(searchParam.getKey(), searchParam.getValue());
          }
          builder.endObject();
          return builder;
        }
      }
    }