快速开始
本文中含有需要您注意的重要提示信息,忽略该信息可能对您的业务造成影响,请务必仔细阅读。
信息检索服务-通用搜索开发快速接入文档
此文档已废弃,如果您已经接入,需要查看接入细节,请使用:多阶段流式API-AISearchV2。 如果您尚未接入,请使用:标准搜索API - GenericSearch进行接入
API简介
提供通用检索、检索后处理阶段结果供客户按需使用。
通用搜索结果(common_search):将覆盖标准网页搜索,并支持多种卡片结果,覆盖网页标题、动态摘要、来源网站、发布时间等关键字段。
检索后处理(post_retrieval):提供对通用检索内容的重排、精选。降低对通用搜索结果的解析、选择成本;
多阶段结果通过SSE流式返回。
基本概念
名词 | 说明 |
通用搜索 (common search) | 通用搜索,可以提供开放域、较为实时搜索能力; |
检索后处理 (post retrieval) | 对检索结果进行rerank,并结合query剔除无关的召回文档,适合作为RAG场景直接使用。 |
AI搜索 (ai search) | 结合用户意图识别、Query改写、通用检索、检索后处理等能力对用户问题提供最好的检索结果 |
搜索卡片 (search card) | 通用搜索对于一些用户问题,提供更具针对性的召回的一种解决方案,如对于天气检索、微博内容、小说内容等,目前支持20余种卡片适配 |
SSE (Server Side Event) | 是一种服务器推送技术,允许服务器端向客户端实时多次发送更新;对于本接口,一次用户搜索请求,会分多批次实时推送到客户端。客户可以基于需要进行接收处理 |
快速开始
前置条件
与阿里云同学进行需求沟通,完成产品购买,并联系阿里云同学进行开通审核。
开通阿里云账号,并申请子账号AK/SK
前往RAM系统,对子账号添加权限策略:AliyunLinkedMallFullAccess
服务接入点
地域名称 | 地域ID | 公网接入地址 |
华北3(张家口) | cn-zhangjiakou | linkedmallretrieval.cn-zhangjiakou.aliyuncs.com |
示例代码
Java SDK
SDK版本
<dependency>
<groupId>com.aliyun</groupId>
<artifactId>alibabacloud-linkedmallretrieval20240501</artifactId>
<version>2.0.0</version>
</dependency>
调用示例
package com.aliyun.linkedretrieval.example;
import com.aliyun.auth.credentials.Credential;
import com.aliyun.auth.credentials.provider.StaticCredentialProvider;
import com.aliyun.core.http.HttpClient;
import com.aliyun.httpcomponent.httpclient.ApacheAsyncHttpClientBuilder;
import com.aliyun.sdk.gateway.pop.Configuration;
import com.aliyun.sdk.gateway.pop.auth.SignatureVersion;
import com.aliyun.sdk.gateway.pop.exception.PopClientException;
import com.aliyun.sdk.service.linkedmallretrieval20240501.AsyncClient;
import com.aliyun.sdk.service.linkedmallretrieval20240501.models.AISearchV2Request;
import com.aliyun.sdk.service.linkedmallretrieval20240501.models.AISearchV2ResponseBody;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
import darabonba.core.ResponseIterable;
import darabonba.core.ResponseIterator;
import darabonba.core.client.ClientOverrideConfiguration;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.time.Duration;
import java.util.UUID;
public class AISearchSSEInvoker {
private AsyncClient asyncClient;
@Before
public void setup(){
StaticCredentialProvider normalProvider = StaticCredentialProvider.create(
Credential.builder()
.accessKeyId(System.getenv("ACCESS_KEY"))
.accessKeySecret(System.getenv("ACCESS_SECRET"))
.build()
);
asyncClient = getAsyncClient(normalProvider);
}
@After
public void cleanup(){
if(asyncClient != null){
asyncClient.close();
}
}
@Test
public void testSuccessQuery(){
String sessionId = UUID.randomUUID().toString();
invokeSearch(
asyncClient,
"无锡拈花湾旅游攻略",
sessionId);
invokeSearch(
asyncClient,
"历年高考分数线",
sessionId);
}
@Test
public void testBizErrorEvent(){
// query长度超过100,报on_error_event
invokeSearch(
asyncClient,
"从9月11日召开的2023北京数字交通大会获悉,目前我国超过多少公里公路完成智能化升级改造,京雄高速河北段、沪杭甬高速、杭州绕城 复线、成宜高速等一批智慧公路已建成运行。本次大会由中国交通运输协会等单位主办,以“数字·新时代、交通·新未来”为主题?",
UUID.randomUUID().toString());
}
@Test
public void testPopClientException(){
StaticCredentialProvider noPermissionProvider = StaticCredentialProvider.create(
Credential.builder()
.accessKeyId(System.getenv("ACCESS_KEY_NO_PERMISSION"))
.accessKeySecret(System.getenv("ACCESS_SECRET_NO_PERMISSION"))
.build());
AsyncClient asyncClient = getAsyncClient(noPermissionProvider);
// 使用无权限的子账号AK/SK请求,报错
invokeSearch(asyncClient, "无锡拈花湾旅游攻略", UUID.randomUUID().toString());
}
/**
* 提供阿里云子账号AK/SK进行的认证信息,建议您使用子账号AK/SK,并且需要对:AliyunLinkedMallFullAccess 进行授权
* AsyncClient支持跨请求复用,如果不复用,需要在使用后调用close进行关闭
*/
private static AsyncClient getAsyncClient(StaticCredentialProvider credentialProvider){
//2. 构建http client;SSE类请求超时时间与连接数可适当调大
HttpClient httpClient = new ApacheAsyncHttpClientBuilder()
.connectionTimeout(Duration.ofSeconds(60))
.responseTimeout(Duration.ofSeconds(60))
.maxConnections(256)
.maxConnectionsPerRoute(256)
// 如果需要,可以设置proxy信息
//.proxy()
.maxIdleTimeOut(Duration.ofSeconds(60))
.build();
//3. 构建client
AsyncClient client = AsyncClient.builder()
.region("cn-zhangjiakou")
.httpClient(httpClient)
.credentialsProvider(credentialProvider)
.serviceConfiguration(Configuration.create().setSignatureVersion(SignatureVersion.V3))
.overrideConfiguration(
ClientOverrideConfiguration.create().setProtocol("HTTPS")
.setEndpointOverride("linkedmallretrieval.cn-zhangjiakou.aliyuncs.com")
).build();
return client;
}
private static void invokeSearch(AsyncClient client, String query, String sessionId){
AISearchV2Request request = AISearchV2Request.builder()
.query(query)
.sessionId(sessionId)
.timeRange("OneYear")
.build();
try{
ResponseIterable<AISearchV2ResponseBody> responseBodies = client.aISearchV2WithResponseIterable(request);
ResponseIterator<AISearchV2ResponseBody> bodyIterator = responseBodies.iterator();
while(bodyIterator.hasNext()){
AISearchV2ResponseBody event = bodyIterator.next();
String requestId = event.getRequestId();
String eventName = event.getHeader().getEvent();
String payload = event.getPayload();
switch (eventName){
case "on_common_search_end":
// 关注通用搜索结果,可以使用此结果;payload为JSON格式,详细的格式说明可以参考文档;
JsonObject commonSearchRoot = JsonParser.parseString(payload).getAsJsonObject();
JsonArray pageItems = commonSearchRoot.getAsJsonArray("pageItems");
System.out.println("[on_common_search_end] requestId:" + requestId + ", pageItems:" + pageItems.size() + ", payload:" + payload);
break;
case "on_post_retrieval_end":
JsonArray postRetrievalRoot = JsonParser.parseString(payload).getAsJsonArray();
System.out.println("[on_post_retrieval_end] requestId: " + requestId + ", documents:" + postRetrievalRoot.size());
break;
case "on_error_event":
System.err.println("[on_error_event]:" + requestId +", payload: " + payload);
break;
default:
}
}
}catch (PopClientException pe){
pe.printStackTrace();
}catch (Exception e){
e.printStackTrace();
}
}
}
Python SDK
SDK依赖
pip install alibabacloud-tea-openapi-sse
调用示例
import asyncio
import json
import uuid
from alibabacloud_tea_sse.exceptions import TeaException
# 注意:此依赖包不是SDK中提供,参见下面的代码块
from aliyun.linked_retrieval.base import AISearch
async def test_success():
session_id = str(uuid.uuid4())
# 复用请求Connection
ai_search = AISearch()
await invoke_search(
ai_search,
query="敬银山一日游",
time_range="OneYear",
session_id=session_id)
await invoke_search(
ai_search,
query="如何在家启蒙孩子英文",
time_range="NoLimit",
session_id=session_id)
await ai_search.async_close()
# query 超长event报错
async def test_biz_error_event():
ai_search = AISearch()
session_id = str(uuid.uuid4())
try:
await invoke_search(
ai_search,
query="从9月11日召开的2023北京数字交通大会获悉,目前我国超过多少公里公路完成智能化升级改造,京雄高速河北段、沪杭甬高速、杭州绕城 复线、成宜高速等一批智慧公路已建成运行。本次大会由中国交通运输协会等单位主办,以“数字·新时代、交通·新未来”为主题?",
time_range="NoLimit",
session_id=session_id)
finally:
await ai_search.async_close()
# AK/SK不存在时,会有TeaException:
# 打印:api exception, requestId:7F076157-CB45-5DEF-B84D-77D951C5BB72, code:InvalidAccessKeyId.NotFound, message:Specified access key is not found.
async def test_api_error():
ai_search = AISearch(access_key_id="not_exist", access_key_secret="not_exist")
try:
session_id = str(uuid.uuid4())
await invoke_search(
ai_search,
query="敬银山一日游",
time_range="NoLimit",
session_id=session_id)
finally:
await ai_search.async_close()
async def invoke_search(ai_search: AISearch, query: str, session_id: str, time_range: str):
try:
async for res in await ai_search.do_sse_query(query, session_id, time_range):
event = json.loads(res.get("event").data)
request_id = event.get("requestId")
header = event.get("header")
payload = event.get("payload")
event_name = header.get("event")
server_cost = header.get("responseTime")
if event_name == "on_common_search_end":
common_search_root = json.loads(payload)
page_items = common_search_root.get("pageItems")
print(
f"[on_common_search_end] requestId:{request_id}, serverCost:{server_cost}, pageItems:{len(page_items)}, payload:{payload}")
elif event_name == "on_post_retrieval_end":
post_retrieval_root = json.loads(payload)
print(
f"[on_post_retrieval_end] requestId:{request_id}, serverCost:{server_cost}, post_retrieval:{len(post_retrieval_root)}")
elif event_name == "on_error_event":
print(f"[on_error_event] requestId:{request_id}, serverCost:{server_cost}, payload:{payload}")
except TeaException as e:
code = e.code
request_id = e.data.get("RequestId")
message = e.data.get("Message")
print(f"api exception, requestId:{request_id}, code:{code}, message:{message}")
if __name__ == "__main__":
asyncio.run(test_success())
# asyncio.run(test_biz_error_event())
# asyncio.run(test_api_error())
aliyun.linked_retrieval.base
这部分代码SDK不包含,可以直接使用,无需修改
import os
import ssl
from typing import Optional
import aiohttp
import certifi
from alibabacloud_tea_openapi_sse import models as open_api_models
from alibabacloud_tea_openapi_sse.client import Client as OpenApiClient
from alibabacloud_tea_util_sse import models as util_models
"""
AISearch客户端,支持多请求复用;
"""
class AISearch:
def __init__(self, access_key_id: str = None, access_key_secret: str = None) -> None:
self.endpoint = "linkedmallretrieval.cn-zhangjiakou.aliyuncs.com"
self._api_info = self._create_api_info()
self._runtime = util_models.RuntimeOptions(
read_timeout=60 * 1000,
connect_timeout=60 * 1000
)
# 自定义连接池 & ssl配置;
ssl_context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH)
ssl_context.load_verify_locations(certifi.where())
self._session = aiohttp.ClientSession(
connector=aiohttp.TCPConnector(
limit=256,
limit_per_host=256,
keepalive_timeout=600,
ssl=ssl_context
)
)
self.access_key_id = access_key_id if access_key_id is not None else os.environ.get('ACCESS_KEY')
self.access_key_secret = access_key_secret if access_key_secret is not None else os.environ.get(
'ACCESS_SECRET')
assert self.access_key_id is not None and self.access_key_secret is not None
self._init_app()
def _init_app(self):
config = open_api_models.Config(
access_key_id=self.access_key_id,
access_key_secret=self.access_key_secret,
session=self._session,
endpoint=self.endpoint,
)
self._client = OpenApiClient(config)
# AISearch使用完以后,记得关闭,释放连接池
async def async_close(self):
await self._session.close()
def close(self):
self._session.close()
def _create_api_info(self) -> open_api_models.Params:
"""
API 相关
@param path: params
@return: OpenApi.Params
"""
params = open_api_models.Params(
# 接口名称
action='AISearchV2',
# 接口版本
version='2024-05-01',
# 接口协议
protocol='HTTPS',
# 接口 HTTP 方法
method='GET',
auth_type='AK',
style='ROA',
# 接口 PATH,
pathname='/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/aiSearch',
# 接口请求体内容格式,
req_body_type='formData',
# 接口响应体内容格式,
body_type='sse'
)
return params
async def do_sse_query(self,
search_query: str,
session_id: Optional[str] = None,
time_range: Optional[str] = None):
"""
发起 SSE 请求
:param search_query: 用户Query, 长度: [2,100]
:param session_id: 多轮交互的session_id
:param time_range: 时间范围,支持: OneDay, OneWeek, OneMonth, OneYear, NoLimit
:return:
"""
assert self._client is not None
request = open_api_models.OpenApiRequest(
query={
"query": search_query,
"sessionId": session_id,
"timeRange": time_range
}
)
sse_receiver = self._client.call_sse_api_async(params=self._api_info, request=request, runtime=self._runtime)
return sse_receiver
Go SDK
SDK依赖
require (
github.com/alibabacloud-go/tea-utils/v2 v2.0.7
)
调用示例
package main
import (
"encoding/json"
"fmt"
"go_example/aliyun/linked_retrieval"
"time"
)
func main() {
invokeQuery("董宇辉")
invokeQuery("黑神话悟空")
time.Sleep(1 * time.Second)
invokeQuery("董")
}
func invokeQuery(query string) {
timeRange := "OneYear"
sessionId := "testSessionId"
fmt.Printf("\n\n\n\n************ %s", query)
events, err := linked_retrieval.DoSseQuery(query, &sessionId, &timeRange)
if err != nil {
fmt.Printf("query from linked_retrieval failed, %s", err)
return
}
for event := range events {
var eventData linked_retrieval.EventData
json.Unmarshal([]byte(*event.Data), &eventData)
// 请求RequestId, 排查问题时可以提供此信息
requestId := eventData.RequestID
// 当前的eventName,支持: on_common_search_end, on_post_retrieval_end 两种事件,可以根据需要选择
eventName := eventData.Header.Event
// 服务端当前Event的时延(从服务端接收到请求开始)
serverRT := eventData.Header.ResponseTime
// 具体消息的内容,参考文档说明: 内部是一个JSON,
payload := eventData.Payload
if eventName == "on_common_search_end" {
fmt.Printf("[%s] %s serverRt:%d, payload:%s \n\n", requestId, eventName, serverRT, payload)
}
if eventName == "on_post_retrieval_end" {
fmt.Printf("[%s] %s serverRt:%d, payload:%s \n\n", requestId, eventName, serverRT, payload)
}
if eventName == "on_error_event" {
fmt.Printf("[%s] %s errorPayload:%s \n\n", requestId, eventName, payload)
}
}
}
go_example/aliyun/linked_retrieval
package linked_retrieval
import (
openapi "github.com/alibabacloud-go/darabonba-openapi/v2/client"
openapiutil "github.com/alibabacloud-go/openapi-util/service"
util "github.com/alibabacloud-go/tea-utils/v2/service"
"github.com/alibabacloud-go/tea/tea"
"io"
"os"
)
const (
endpoint = "linkedmallretrieval.cn-zhangjiakou.aliyuncs.com"
pathName = "/linked-retrieval/linked-retrieval-entry/v2/linkedRetrieval/commands/aiSearch"
timeoutMillSeconds = 5000
)
func createApiInfo() *openapi.Params {
params := &openapi.Params{
// 接口名称
Action: tea.String("AISearchV2"),
// 接口版本
Version: tea.String("2024-05-01"),
// 接口协议
Protocol: tea.String("HTTPS"),
// 接口 HTTP 方法
Method: tea.String("GET"),
AuthType: tea.String("AK"),
Style: tea.String("ROA"),
// 接口 PATH
Pathname: tea.String(pathName),
// 接口请求体内容格式
ReqBodyType: tea.String("json"),
// 接口响应体内容格式,注意一定得是binary格式,CallApi才会透传出response body进行ReadAsSSE
BodyType: tea.String("binary"),
}
return params
}
func DoSseQuery(query string, sessionId *string, timeRange *string) (<-chan util.SSEEvent, error) {
// 在环境变量中设置您子账号的AK/SK,并修改此处的AK/SK的环境变量名
accessKeyID := os.Getenv("ACCESS_KEY_AIGC_01")
accessKeySecret := os.Getenv("ACCESS_SECRET_AIGC_01")
config := &openapi.Config{
AccessKeyId: tea.String(accessKeyID),
AccessKeySecret: tea.String(accessKeySecret),
Endpoint: tea.String(endpoint),
ReadTimeout: tea.Int(timeoutMillSeconds),
}
client, err := openapi.NewClient(config)
if err != nil {
return nil, err
}
params := createApiInfo()
// query params
queries := map[string]interface{}{
"query": tea.String(query),
"sessionId": tea.StringValue(sessionId),
"timeRange": tea.StringValue(timeRange),
}
// runtime options
runtime := &util.RuntimeOptions{}
request := &openapi.OpenApiRequest{
Query: openapiutil.Query(queries),
}
// 复制代码运行请自行打印 API 的返回值
// 返回值为 Map 类型,可从 Map 中获得三类数据:响应体 body、响应头 headers、HTTP 返回的状态码 statusCode。
resp, err := client.CallApi(params, request, runtime)
if err != nil {
return nil, err
}
// 迭代读取SSE内容
events, sseErrors := util.ReadAsSSE(resp["body"].(io.ReadCloser))
select {
case sseError := <-sseErrors:
err = sseError
default:
// 没有错误的情况
err = nil
}
return events, err
}
// 定义主结构体
type EventData struct {
Payload string `json:"payload"`
RequestID string `json:"requestId"`
Header Header `json:"header"`
}
// 定义 Header 结构体
type Header struct {
EventID string `json:"eventId"`
ResponseTime int `json:"responseTime"`
Event string `json:"event"`
}