快速开始

更新时间:
重要

本文中含有需要您注意的重要提示信息,忽略该信息可能对您的业务造成影响,请务必仔细阅读。

信息检索服务-通用搜索开发快速接入文档

警告

此文档已废弃,如果您已经接入,需要查看接入细节,请使用:多阶段流式API-AISearchV2。 如果您尚未接入,请使用:标准搜索API - GenericSearch进行接入

API简介

提供通用检索、检索后处理阶段结果供客户按需使用。

  • 通用搜索结果(common_search):将覆盖标准网页搜索,并支持多种卡片结果,覆盖网页标题、动态摘要、来源网站、发布时间等关键字段。

  • 检索后处理(post_retrieval):提供对通用检索内容的重排、精选。降低对通用搜索结果的解析、选择成本;

    多阶段结果通过SSE流式返回。

基本概念

名词

说明

通用搜索

(common search)

通用搜索,可以提供开放域、较为实时搜索能力;

检索后处理

(post retrieval)

对检索结果进行rerank,并结合query剔除无关的召回文档,适合作为RAG场景直接使用。

AI搜索

(ai search)

结合用户意图识别、Query改写、通用检索、检索后处理等能力对用户问题提供最好的检索结果

搜索卡片

(search card)

通用搜索对于一些用户问题,提供更具针对性的召回的一种解决方案,如对于天气检索、微博内容、小说内容等,目前支持20余种卡片适配

SSE

(Server Side Event)

是一种服务器推送技术,允许服务器端向客户端实时多次发送更新;对于本接口,一次用户搜索请求,会分多批次实时推送到客户端。客户可以基于需要进行接收处理

快速开始

前置条件

  • 与阿里云同学进行需求沟通,完成产品购买,并联系阿里云同学进行开通审核。

  • 开通阿里云账号,并申请子账号AK/SK

  • 前往RAM系统,对子账号添加权限策略:AliyunLinkedMallFullAccess

服务接入点

地域名称

地域ID

公网接入地址

华北3(张家口)

cn-zhangjiakou

linkedmallretrieval.cn-zhangjiakou.aliyuncs.com

示例代码

Java SDK

  • SDK版本

<dependency>
    <groupId>com.aliyun</groupId>
    <artifactId>alibabacloud-linkedmallretrieval20240501</artifactId>
    <version>2.0.0</version>
</dependency>
  • 调用示例

package com.aliyun.linkedretrieval.example;

import com.aliyun.auth.credentials.Credential;
import com.aliyun.auth.credentials.provider.StaticCredentialProvider;
import com.aliyun.core.http.HttpClient;
import com.aliyun.httpcomponent.httpclient.ApacheAsyncHttpClientBuilder;
import com.aliyun.sdk.gateway.pop.Configuration;
import com.aliyun.sdk.gateway.pop.auth.SignatureVersion;
import com.aliyun.sdk.gateway.pop.exception.PopClientException;
import com.aliyun.sdk.service.linkedmallretrieval20240501.AsyncClient;
import com.aliyun.sdk.service.linkedmallretrieval20240501.models.AISearchV2Request;
import com.aliyun.sdk.service.linkedmallretrieval20240501.models.AISearchV2ResponseBody;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import darabonba.core.ResponseIterable;
import darabonba.core.ResponseIterator;
import darabonba.core.client.ClientOverrideConfiguration;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

import java.time.Duration;
import java.util.UUID;

public class AISearchSSEInvoker {

    private AsyncClient     asyncClient;

    @Before
    public void setup(){
        StaticCredentialProvider normalProvider = StaticCredentialProvider.create(
            Credential.builder()
                .accessKeyId(System.getenv("ACCESS_KEY"))
                .accessKeySecret(System.getenv("ACCESS_SECRET"))
                .build()
        );
        asyncClient = getAsyncClient(normalProvider);
    }

    @After
    public void cleanup(){
        if(asyncClient != null){
            asyncClient.close();
        }
    }


    @Test
    public void testSuccessQuery(){
        String sessionId = UUID.randomUUID().toString();
        invokeSearch(
            asyncClient,
            "无锡拈花湾旅游攻略",
            sessionId);
        invokeSearch(
            asyncClient,
            "历年高考分数线",
            sessionId);

    }

    @Test
    public void testBizErrorEvent(){
        // query长度超过100,报on_error_event
        invokeSearch(
            asyncClient,
            "从9月11日召开的2023北京数字交通大会获悉,目前我国超过多少公里公路完成智能化升级改造,京雄高速河北段、沪杭甬高速、杭州绕城 复线、成宜高速等一批智慧公路已建成运行。本次大会由中国交通运输协会等单位主办,以“数字·新时代、交通·新未来”为主题?",
            UUID.randomUUID().toString());
    }

    @Test
    public void testPopClientException(){
        StaticCredentialProvider noPermissionProvider = StaticCredentialProvider.create(
            Credential.builder()
                .accessKeyId(System.getenv("ACCESS_KEY_NO_PERMISSION"))
                .accessKeySecret(System.getenv("ACCESS_SECRET_NO_PERMISSION"))
                .build());
        AsyncClient asyncClient = getAsyncClient(noPermissionProvider);

        // 使用无权限的子账号AK/SK请求,报错
        invokeSearch(asyncClient, "无锡拈花湾旅游攻略", UUID.randomUUID().toString());
    }

    /**
     * 提供阿里云子账号AK/SK进行的认证信息,建议您使用子账号AK/SK,并且需要对:AliyunLinkedMallFullAccess 进行授权
     * AsyncClient支持跨请求复用,如果不复用,需要在使用后调用close进行关闭
     */
    private static AsyncClient getAsyncClient(StaticCredentialProvider credentialProvider){

        //2. 构建http client;SSE类请求超时时间与连接数可适当调大
        HttpClient httpClient = new ApacheAsyncHttpClientBuilder()
            .connectionTimeout(Duration.ofSeconds(60))
            .responseTimeout(Duration.ofSeconds(60))
            .maxConnections(256)
            .maxConnectionsPerRoute(256)
            // 如果需要,可以设置proxy信息
            //.proxy()
            .maxIdleTimeOut(Duration.ofSeconds(60))
            .build();

        //3. 构建client
        AsyncClient client = AsyncClient.builder()
            .region("cn-zhangjiakou")
            .httpClient(httpClient)
            .credentialsProvider(credentialProvider)
            .serviceConfiguration(Configuration.create().setSignatureVersion(SignatureVersion.V3))
            .overrideConfiguration(
                ClientOverrideConfiguration.create().setProtocol("HTTPS")
                    .setEndpointOverride("linkedmallretrieval.cn-zhangjiakou.aliyuncs.com")
            ).build();

        return client;
    }

    private static void invokeSearch(AsyncClient client, String query, String sessionId){
        AISearchV2Request request = AISearchV2Request.builder()
            .query(query)
            .sessionId(sessionId)
            .timeRange("OneYear")
            .build();
        try{
            ResponseIterable<AISearchV2ResponseBody> responseBodies = client.aISearchV2WithResponseIterable(request);
            ResponseIterator<AISearchV2ResponseBody> bodyIterator = responseBodies.iterator();
            while(bodyIterator.hasNext()){
                AISearchV2ResponseBody event = bodyIterator.next();
                String requestId = event.getRequestId();
                String eventName = event.getHeader().getEvent();
                String payload = event.getPayload();

                switch (eventName){
                    case "on_common_search_end":
                        // 关注通用搜索结果,可以使用此结果;payload为JSON格式,详细的格式说明可以参考文档;
                        JsonObject commonSearchRoot = JsonParser.parseString(payload).getAsJsonObject();
                        JsonArray pageItems = commonSearchRoot.getAsJsonArray("pageItems");
                        System.out.println("[on_common_search_end] requestId:" + requestId + ", pageItems:" + pageItems.size() + ", payload:" + payload);
                        break;
                    case "on_post_retrieval_end":
                        JsonArray postRetrievalRoot = JsonParser.parseString(payload).getAsJsonArray();
                        System.out.println("[on_post_retrieval_end] requestId: " + requestId + ", documents:" + postRetrievalRoot.size());
                        break;
                    case "on_error_event":
                        System.err.println("[on_error_event]:" + requestId +", payload: " + payload);
                        break;
                    default:
                }
            }
        }catch (PopClientException pe){
            pe.printStackTrace();
        }catch (Exception e){
            e.printStackTrace();
        }
    }

}

Python SDK

  • SDK依赖

pip install alibabacloud-tea-openapi-sse
  • 调用示例

import asyncio
import json
import uuid

from alibabacloud_tea_sse.exceptions import TeaException

# 注意:此依赖包不是SDK中提供,参见下面的代码块
from aliyun.linked_retrieval.base import AISearch


async def test_success():
    session_id = str(uuid.uuid4())
    # 复用请求Connection
    ai_search = AISearch()

    await invoke_search(
        ai_search,
        query="敬银山一日游",
        time_range="OneYear",
        session_id=session_id)
    await invoke_search(
        ai_search,
        query="如何在家启蒙孩子英文",
        time_range="NoLimit",
        session_id=session_id)

    await ai_search.async_close()


# query 超长event报错
async def test_biz_error_event():
    ai_search = AISearch()
    session_id = str(uuid.uuid4())
    try:
        await invoke_search(
            ai_search,
            query="从9月11日召开的2023北京数字交通大会获悉,目前我国超过多少公里公路完成智能化升级改造,京雄高速河北段、沪杭甬高速、杭州绕城 复线、成宜高速等一批智慧公路已建成运行。本次大会由中国交通运输协会等单位主办,以“数字·新时代、交通·新未来”为主题?",
            time_range="NoLimit",
            session_id=session_id)
    finally:
        await ai_search.async_close()


# AK/SK不存在时,会有TeaException:
# 打印:api exception, requestId:7F076157-CB45-5DEF-B84D-77D951C5BB72, code:InvalidAccessKeyId.NotFound, message:Specified access key is not found.
async def test_api_error():
    ai_search = AISearch(access_key_id="not_exist", access_key_secret="not_exist")
    try:
        session_id = str(uuid.uuid4())
        await invoke_search(
            ai_search,
            query="敬银山一日游",
            time_range="NoLimit",
            session_id=session_id)
    finally:
        await ai_search.async_close()


async def invoke_search(ai_search: AISearch, query: str, session_id: str, time_range: str):
    try:
        async for res in await ai_search.do_sse_query(query, session_id, time_range):
            event = json.loads(res.get("event").data)
            request_id = event.get("requestId")
            header = event.get("header")
            payload = event.get("payload")

            event_name = header.get("event")
            server_cost = header.get("responseTime")
            if event_name == "on_common_search_end":
                common_search_root = json.loads(payload)
                page_items = common_search_root.get("pageItems")
                print(
                    f"[on_common_search_end] requestId:{request_id}, serverCost:{server_cost}, pageItems:{len(page_items)}, payload:{payload}")
            elif event_name == "on_post_retrieval_end":
                post_retrieval_root = json.loads(payload)
                print(
                    f"[on_post_retrieval_end] requestId:{request_id}, serverCost:{server_cost}, post_retrieval:{len(post_retrieval_root)}")
            elif event_name == "on_error_event":
                print(f"[on_error_event] requestId:{request_id}, serverCost:{server_cost}, payload:{payload}")
    except TeaException as e:
        code = e.code
        request_id = e.data.get("RequestId")
        message = e.data.get("Message")
        print(f"api exception, requestId:{request_id}, code:{code}, message:{message}")


if __name__ == "__main__":
    asyncio.run(test_success())
    # asyncio.run(test_biz_error_event())
    # asyncio.run(test_api_error())
  • aliyun.linked_retrieval.base

    这部分代码SDK不包含,可以直接使用,无需修改

import os
import ssl
from typing import Optional

import aiohttp
import certifi
from alibabacloud_tea_openapi_sse import models as open_api_models
from alibabacloud_tea_openapi_sse.client import Client as OpenApiClient
from alibabacloud_tea_util_sse import models as util_models

"""
    AISearch客户端,支持多请求复用;
"""


class AISearch:
    def __init__(self, access_key_id: str = None, access_key_secret: str = None) -> None:
        self.endpoint = "linkedmallretrieval.cn-zhangjiakou.aliyuncs.com"
        self._api_info = self._create_api_info()
        self._runtime = util_models.RuntimeOptions(
            read_timeout=60 * 1000,
            connect_timeout=60 * 1000
        )
        # 自定义连接池 & ssl配置;
        ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
        ssl_context.load_verify_locations(certifi.where())
        self._session = aiohttp.ClientSession(
            connector=aiohttp.TCPConnector(
                limit=256,
                limit_per_host=256,
                keepalive_timeout=600,
                ssl=ssl_context
            )
        )
    
        self.access_key_id = access_key_id if access_key_id is not None else os.environ.get('ACCESS_KEY')
        self.access_key_secret = access_key_secret if access_key_secret is not None else os.environ.get(
            'ACCESS_SECRET')

        assert self.access_key_id is not None and self.access_key_secret is not None
        self._init_app()

    def _init_app(self):
        config = open_api_models.Config(
            access_key_id=self.access_key_id,
            access_key_secret=self.access_key_secret,
            session=self._session,
            endpoint=self.endpoint,
        )
        self._client = OpenApiClient(config)

    # AISearch使用完以后,记得关闭,释放连接池
    async def async_close(self):
        await self._session.close()

    def close(self):
        self._session.close()

    def _create_api_info(self) -> open_api_models.Params:
        """
        API 相关
        @param path: params
        @return: OpenApi.Params
        """
        params = open_api_models.Params(
            # 接口名称
            action='AISearchV2',
            # 接口版本
            version='2024-05-01',
            # 接口协议
            protocol='HTTPS',
            # 接口 HTTP 方法
            method='GET',
            auth_type='AK',
            style='ROA',
            # 接口 PATH,
            pathname='/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/aiSearch',
            # 接口请求体内容格式,
            req_body_type='formData',
            # 接口响应体内容格式,
            body_type='sse'
        )
        return params

    async def do_sse_query(self,
                           search_query: str,
                           session_id: Optional[str] = None,
                           time_range: Optional[str] = None):
        """
        发起 SSE 请求
        :param search_query: 用户Query, 长度: [2,100]
        :param session_id: 多轮交互的session_id
        :param time_range: 时间范围,支持: OneDay, OneWeek, OneMonth, OneYear, NoLimit
        :return:
        """

        assert self._client is not None
        request = open_api_models.OpenApiRequest(
            query={
                "query": search_query,
                "sessionId": session_id,
                "timeRange": time_range
            }
        )
        sse_receiver = self._client.call_sse_api_async(params=self._api_info, request=request, runtime=self._runtime)
        return sse_receiver

Go SDK

  • SDK依赖

require (
  github.com/alibabacloud-go/tea-utils/v2 v2.0.7
)
  • 调用示例

package main

import (
	"encoding/json"
	"fmt"
	"go_example/aliyun/linked_retrieval"
	"time"
)

func main() {
	invokeQuery("董宇辉")
	invokeQuery("黑神话悟空")
	time.Sleep(1 * time.Second)
	invokeQuery("董")

}

func invokeQuery(query string) {
	timeRange := "OneYear"
	sessionId := "testSessionId"
	fmt.Printf("\n\n\n\n************ %s", query)

	events, err := linked_retrieval.DoSseQuery(query, &sessionId, &timeRange)
	if err != nil {
		fmt.Printf("query from linked_retrieval failed, %s", err)
		return
	}

	for event := range events {

		var eventData linked_retrieval.EventData
		json.Unmarshal([]byte(*event.Data), &eventData)

		// 请求RequestId, 排查问题时可以提供此信息
		requestId := eventData.RequestID

		// 当前的eventName,支持: on_common_search_end, on_post_retrieval_end 两种事件,可以根据需要选择
		eventName := eventData.Header.Event

		// 服务端当前Event的时延(从服务端接收到请求开始)
		serverRT := eventData.Header.ResponseTime

		// 具体消息的内容,参考文档说明: 内部是一个JSON,
		payload := eventData.Payload

		if eventName == "on_common_search_end" {
			fmt.Printf("[%s] %s serverRt:%d, payload:%s \n\n", requestId, eventName, serverRT, payload)
		}
		if eventName == "on_post_retrieval_end" {
			fmt.Printf("[%s] %s serverRt:%d, payload:%s \n\n", requestId, eventName, serverRT, payload)
		}
		if eventName == "on_error_event" {
			fmt.Printf("[%s] %s errorPayload:%s \n\n", requestId, eventName, payload)
		}
	}
}
  • go_example/aliyun/linked_retrieval

package linked_retrieval

import (
	openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
	openapiutil "github.com/alibabacloud-go/openapi-util/service"
	util "github.com/alibabacloud-go/tea-utils/v2/service"
	"github.com/alibabacloud-go/tea/tea"
	"io"
	"os"
)

const (
	endpoint           = "linkedmallretrieval.cn-zhangjiakou.aliyuncs.com"
	pathName           = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/aiSearch"
	timeoutMillSeconds = 5000
)

func createApiInfo() *openapi.Params {
	params := &openapi.Params{
		// 接口名称
		Action: tea.String("AISearchV2"),
		// 接口版本
		Version: tea.String("2024-05-01"),
		// 接口协议
		Protocol: tea.String("HTTPS"),
		// 接口 HTTP 方法
		Method:   tea.String("GET"),
		AuthType: tea.String("AK"),
		Style:    tea.String("ROA"),
		// 接口 PATH
		Pathname: tea.String(pathName),
		// 接口请求体内容格式
		ReqBodyType: tea.String("json"),
		// 接口响应体内容格式,注意一定得是binary格式,CallApi才会透传出response body进行ReadAsSSE
		BodyType: tea.String("binary"),
	}
	return params
}

func DoSseQuery(query string, sessionId *string, timeRange *string) (<-chan util.SSEEvent, error) {
	// 在环境变量中设置您子账号的AK/SK,并修改此处的AK/SK的环境变量名
	accessKeyID := os.Getenv("ACCESS_KEY_AIGC_01")
	accessKeySecret := os.Getenv("ACCESS_SECRET_AIGC_01")

	config := &openapi.Config{
		AccessKeyId:     tea.String(accessKeyID),
		AccessKeySecret: tea.String(accessKeySecret),
		Endpoint:        tea.String(endpoint),
		ReadTimeout:     tea.Int(timeoutMillSeconds),
	}
	client, err := openapi.NewClient(config)
	if err != nil {
		return nil, err
	}

	params := createApiInfo()
	// query params
	queries := map[string]interface{}{
		"query":     tea.String(query),
		"sessionId": tea.StringValue(sessionId),
		"timeRange": tea.StringValue(timeRange),
	}

	// runtime options
	runtime := &util.RuntimeOptions{}
	request := &openapi.OpenApiRequest{
		Query: openapiutil.Query(queries),
	}
	// 复制代码运行请自行打印 API 的返回值
	// 返回值为 Map 类型,可从 Map 中获得三类数据:响应体 body、响应头 headers、HTTP 返回的状态码 statusCode。
	resp, err := client.CallApi(params, request, runtime)
	if err != nil {
		return nil, err
	}

	// 迭代读取SSE内容
	events, sseErrors := util.ReadAsSSE(resp["body"].(io.ReadCloser))

	select {
	case sseError := <-sseErrors:
		err = sseError
	default:
		// 没有错误的情况
		err = nil
	}
	return events, err
}

// 定义主结构体
type EventData struct {
	Payload   string `json:"payload"`
	RequestID string `json:"requestId"`
	Header    Header `json:"header"`
}

// 定义 Header 结构体
type Header struct {
	EventID      string `json:"eventId"`
	ResponseTime int    `json:"responseTime"`
	Event        string `json:"event"`
}