增强搜索内容时效性

更新时间:
复制为 MD 格式

本文介绍如何通过 ReadPage 接口,增强 Search 搜索结果的时效性。

在 Agent 调用搜索工具时,由于搜索引擎自身的更新频率及缓存机制限制,搜索结果的时效性可能存在一定滞后。对于时延敏感性较低的场景,可通过在线实时抓取网页内容的方式,提升搜索结果的时效性表现。以下是通过 langchain 实现的时效增强的搜索示例。

  1. 实现时效性增强的搜索工具(ReadPage 接口本身延迟较高,增强时效性的同时会显著增加 search 耗时)

import asyncio
import httpx
from langchain_core.tools import tool

async def iqs_search(query: str, max_results: int = 3) -> list[dict]:
    async with httpx.AsyncClient() as client:
        response = await client.post("https://cloud-iqs.aliyuncs.com/search/unified",
                                     headers={
                                         'X-API-Key': "<Your-IQS-ApiKey>"
                                     },
                                     json={
                                         'query': query,
                                         'engineType': "Generic",
                                     })
        if not response.is_success:
            return None

        pageItems = response.json().get("pageItems")
        return [
            {
                "link": item.get("link"),
                "title": item.get("title"),
                "snippet": item.get("snippet"),
                "publishedTime": item.get("publishedTime")
            } for item in pageItems[:max_results]
        ]


async def iqs_readpage(urls: list[str]) -> list[dict]:
    async def post_readpage(client, url):
        response = await client.post("https://cloud-iqs.aliyuncs.com/readpage/scrape",
                                     headers={
                                         'X-API-Key': "<Your-IQS-ApiKey>"
                                     },
                                     json={
                                         "url": url,
                                         "formats": ["markdown"],
                                         "timeout": 15000,
                                         "pageTimeout": 10000,
                                         "maxAge": 3600
                                     }, timeout=20000)
        if not response.is_success:
            return None

        page_result = response.json()
        data = page_result.get("data")
        if not data:
            return None
        return {
            "url": data.get("url"),
            "markdown": data.get("markdown")
        }

    tasks = []
    async with httpx.AsyncClient() as client:
        for url in urls:
            tasks.append(post_readpage(client, url))
        results = await asyncio.gather(*tasks)
    return results


@tool
async def search_freshness_boost(query: str):
    """Search the web for information.
    Args:
        query: Search query
    """
    page_results = await iqs_search(query, 3)
    links = [page.get("link") for page in page_results]
    read_results = await iqs_readpage(links)
    for idx, page_result in enumerate(page_results):
        if read_results[idx]:
            page_result['snippet'] = read_results[idx]["markdown"]
    return page_results
  1. Agent 使用 时效性增强搜索工具

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.chat_models import ChatOpenAI, ChatTongyi
from langchain.agents import AgentExecutor, create_tool_calling_agent


async def main():
    llm = ChatTongyi(
        api_key="<Bailian-ApiKey>",
        model="qwen-plus",
    )

    tools = [search_freshness_boost]
    agent = create_tool_calling_agent(llm, tools,
                                      ChatPromptTemplate.from_messages([
                                          ("system", "你是一个拥有实时互联网搜索能力的 AI 助手。"),
                                          MessagesPlaceholder(variable_name="chat_history", optional=True),
                                          ("human", "{input}"),
                                          MessagesPlaceholder(variable_name="agent_scratchpad"),
                                      ])
                                      )

    agent_executor = AgentExecutor(
        agent=agent,
        tools=tools,
        verbose=True,
        handle_parsing_errors=True
    )
    result = await agent_executor.ainvoke({
        "input": "今日油价"
    })
    print("\n最终回答:", result["output"])


if __name__ == "__main__":
    asyncio.run(main())