Skip to content

Commit e55cf53

Browse files
YuujeeNeilJohnson0930Wendong-Fan
authored
feat: add bing search (camel-ai#1865)
Co-authored-by: NeilJohnson0930 <[email protected]> Co-authored-by: Wendong <[email protected]> Co-authored-by: Wendong-Fan <[email protected]>
1 parent 6a8be8e commit e55cf53

File tree

3 files changed

+240
-33
lines changed

3 files changed

+240
-33
lines changed

camel/toolkits/search_toolkit.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -835,6 +835,116 @@ def search_baidu(self, query: str, max_results: int = 5) -> Dict[str, Any]:
835835
except Exception as e:
836836
return {"error": f"Baidu scraping error: {e!s}"}
837837

838+
def search_bing(self, query: str, max_results: int = 5) -> Dict[str, Any]:
839+
r"""Use Bing search engine to search information for the given query.
840+
841+
This function queries the Chinese version of Bing search engine (cn.
842+
bing.com) using web scraping to retrieve relevant search results. It
843+
extracts search results including titles, snippets, and URLs. This
844+
function is particularly useful when the query is in Chinese or when
845+
Chinese search results are desired.
846+
847+
Args:
848+
query (str): The search query string to submit to Bing. Works best
849+
with Chinese queries or when Chinese results are preferred.
850+
max_results (int): Maximum number of results to return.
851+
(default: :obj:`5`)
852+
853+
Returns:
854+
Dict ([str, Any]): A dictionary containing either:
855+
- 'results': A list of dictionaries, each with:
856+
- 'result_id': The index of the result.
857+
- 'snippet': A brief description of the search result.
858+
- 'title': The title of the search result.
859+
- 'link': The URL of the search result.
860+
- or 'error': An error message if something went wrong.
861+
"""
862+
from typing import Any, Dict, List, cast
863+
from urllib.parse import urlencode
864+
865+
from bs4 import BeautifulSoup, Tag
866+
867+
try:
868+
query = urlencode({"q": query})
869+
url = f'https://cn.bing.com/search?{query}'
870+
headers = {
871+
"User-Agent": (
872+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
873+
"AppleWebKit/537.36 (KHTML, like Gecko) "
874+
"Chrome/120.0.0.0 Safari/537.36"
875+
),
876+
}
877+
# Add timeout to prevent hanging
878+
response = requests.get(url, headers=headers, timeout=10)
879+
880+
# Check if the request was successful
881+
if response.status_code != 200:
882+
return {
883+
"error": (
884+
f"Bing returned status code: "
885+
f"{response.status_code}"
886+
)
887+
}
888+
889+
response.encoding = 'utf-8'
890+
soup = BeautifulSoup(response.text, 'html.parser')
891+
892+
b_results_element = soup.find("ol", id="b_results")
893+
if b_results_element is None:
894+
return {"results": []}
895+
896+
# Ensure b_results is a Tag and find all li elements
897+
b_results_tag = cast(Tag, b_results_element)
898+
result_items = b_results_tag.find_all("li")
899+
900+
results: List[Dict[str, Any]] = []
901+
for i in range(min(len(result_items), max_results)):
902+
row = result_items[i]
903+
if not isinstance(row, Tag):
904+
continue
905+
906+
h2_element = row.find("h2")
907+
if h2_element is None:
908+
continue
909+
h2_tag = cast(Tag, h2_element)
910+
911+
title = h2_tag.get_text().strip()
912+
913+
link_tag_element = h2_tag.find("a")
914+
if link_tag_element is None:
915+
continue
916+
link_tag = cast(Tag, link_tag_element)
917+
918+
link = link_tag.get("href")
919+
if link is None:
920+
continue
921+
922+
content_element = row.find("p", class_="b_algoSlug")
923+
content_text = ""
924+
if content_element is not None and isinstance(
925+
content_element, Tag
926+
):
927+
content_text = content_element.get_text()
928+
929+
row_data = {
930+
"result_id": i + 1,
931+
"snippet": content_text,
932+
"title": title,
933+
"link": link,
934+
}
935+
results.append(row_data)
936+
937+
if not results:
938+
return {
939+
"warning": "No results found. Check if "
940+
"Bing HTML structure has changed."
941+
}
942+
943+
return {"results": results}
944+
945+
except Exception as e:
946+
return {"error": f"Bing scraping error: {e!s}"}
947+
838948
def get_tools(self) -> List[FunctionTool]:
839949
r"""Returns a list of FunctionTool objects representing the
840950
functions in the toolkit.
@@ -853,4 +963,5 @@ def get_tools(self) -> List[FunctionTool]:
853963
FunctionTool(self.search_brave),
854964
FunctionTool(self.search_bocha),
855965
FunctionTool(self.search_baidu),
966+
FunctionTool(self.search_bing),
856967
]

examples/toolkits/search_toolkit.py

Lines changed: 68 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -17,39 +17,6 @@
1717
from camel.agents import ChatAgent
1818
from camel.toolkits import FunctionTool, SearchToolkit
1919

20-
agent = ChatAgent(
21-
system_message="""You are a helpful assistant that can use baidu search
22-
engine to answer questions.""",
23-
tools=[FunctionTool(SearchToolkit().search_baidu)],
24-
)
25-
26-
usr_msg = "今天北京的天气如何"
27-
28-
response = agent.step(input_message=usr_msg, response_format=None)
29-
30-
print(response.msgs[0].content)
31-
32-
"""
33-
===============================================================================
34-
今天北京的天气信息可以通过以下链接查看:
35-
36-
1. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
37-
url=AJhE9PhEO3TmkJ70CUcRsR3NVB3m6wxN5Imdp0ZVsEBK1t8YhtM6YMxrQy3_vRN6dJv4FLHkBCe
38-
fZURnzHTm9gio-dS4-4MwGVgJe40m7prOoggce2eB0h-3DsllbKMm)
39-
2. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
40-
url=1vhNOfl9tV65_104GMQbDnU_fdCZPXDV2BtTJelxdd6isdSZjAHvtoXqOWG3n7D1N-m9zAmOhQG
41-
c-jEGqiXe9K)
42-
3. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
43-
url=Q0URfpodXDpUe1TKBPpToKIyIuCcjSGUR5jorx81g8Pni5XH-Tbc6AXMa7EwCWjBG3jysTZb43S
44-
6ZCsJOKvPw2EbIlQ_bMu42-5sCraqXlS)
45-
4. [中国天气网 - 北京天气预报一周](http://www.baidu.com/link?
46-
url=TtFe8QryJFuwX1kx50YF5WijRcd2TMJRhPudDQvqW7TG4siah68gUZd_frsVWPi1xkYvrxoYL87
47-
QMH0wSjDYOq)
48-
49-
请点击链接查看详细的天气预报信息。
50-
===============================================================================
51-
"""
52-
5320
res_simple = SearchToolkit().query_wolfram_alpha(
5421
query="solve 3x-7=11", is_detailed=False
5522
)
@@ -284,3 +251,71 @@ class PersonInfo(BaseModel):
284251
dly":None},"videos":None}
285252
===============================================================================
286253
"""
254+
255+
256+
agent = ChatAgent(
257+
system_message="""You are a helpful assistant that can use baidu search
258+
engine to answer questions.""",
259+
tools=[FunctionTool(SearchToolkit().search_baidu)],
260+
)
261+
262+
usr_msg = "今天北京的天气如何"
263+
264+
response = agent.step(input_message=usr_msg, response_format=None)
265+
266+
print(response.msgs[0].content)
267+
268+
"""
269+
===============================================================================
270+
今天北京的天气信息可以通过以下链接查看:
271+
272+
1. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
273+
url=AJhE9PhEO3TmkJ70CUcRsR3NVB3m6wxN5Imdp0ZVsEBK1t8YhtM6YMxrQy3_vRN6dJv4FLHkBCe
274+
fZURnzHTm9gio-dS4-4MwGVgJe40m7prOoggce2eB0h-3DsllbKMm)
275+
2. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
276+
url=1vhNOfl9tV65_104GMQbDnU_fdCZPXDV2BtTJelxdd6isdSZjAHvtoXqOWG3n7D1N-m9zAmOhQG
277+
c-jEGqiXe9K)
278+
3. [中国天气网 - 北京天气预报](http://www.baidu.com/link?
279+
url=Q0URfpodXDpUe1TKBPpToKIyIuCcjSGUR5jorx81g8Pni5XH-Tbc6AXMa7EwCWjBG3jysTZb43S
280+
6ZCsJOKvPw2EbIlQ_bMu42-5sCraqXlS)
281+
4. [中国天气网 - 北京天气预报一周](http://www.baidu.com/link?
282+
url=TtFe8QryJFuwX1kx50YF5WijRcd2TMJRhPudDQvqW7TG4siah68gUZd_frsVWPi1xkYvrxoYL87
283+
QMH0wSjDYOq)
284+
285+
请点击链接查看详细的天气预报信息。
286+
===============================================================================
287+
"""
288+
289+
bing_call_agent = ChatAgent(
290+
system_message="""You are a helpful assistant that can use baidu search
291+
engine to answer questions.""",
292+
tools=[FunctionTool(SearchToolkit().search_bing)],
293+
)
294+
295+
bing_usr_msg = "帮忙查询巴黎圣母院最新修复进展"
296+
297+
response = bing_call_agent.step(
298+
input_message=bing_usr_msg, response_format=None
299+
)
300+
301+
print(response.msgs[0].content)
302+
303+
"""
304+
===============================================================================
305+
以下是关于巴黎圣母院最新修复进展的一些信息:
306+
307+
1. **时隔4年,灾后余生的巴黎圣母院即将重生** -
308+
[知乎](https://zhuanlan.zhihu.com/p/619405504)
309+
310+
2. **历时4年,耗资70亿,被烧塌的巴黎圣母院修好了!!** -
311+
[腾讯网](https://news.qq.com/rain/a/20231018A0329F00)
312+
313+
3. **一票难求!巴黎圣母院重新开放!5年修复离不开来自东方的支持** -
314+
[新浪财经](https://finance.sina.com.cn/wm/2024-12-08/doc-incyumnp3384392.shtml)
315+
316+
4. **巴黎圣母院浴火重生!建筑学者:勘探报告近3000页,修复工作复杂** -
317+
[腾讯网](https://news.qq.com/rain/a/20241208A05Q9K00)
318+
319+
这些链接提供了关于巴黎圣母院修复的详细信息和最新进展。
320+
===============================================================================
321+
"""

test/toolkits/test_search_functions.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,67 @@ def test_search_baidu(mock_get, search_toolkit):
248248
)
249249

250250

251+
@patch('requests.get')
252+
def test_search_bing(mock_get, search_toolkit):
253+
# Mock the response from Bing search
254+
mock_response = MagicMock()
255+
mock_response.status_code = 200
256+
mock_response.encoding = "utf-8"
257+
mock_response.text = """
258+
<html>
259+
<head><title>Bing Search</title></head>
260+
<body>
261+
<ol id="b_results">
262+
<li>
263+
<h2><a href="https://example1.com">Test Title 1</a></h2>
264+
<p class="b_algoSlug">Test Snippet 1</p>
265+
</li>
266+
<li>
267+
<h2><a href="https://example2.com">Test Title 2</a></h2>
268+
<p class="b_algoSlug">Test Snippet 2</p>
269+
</li>
270+
</ol>
271+
</body>
272+
</html>
273+
"""
274+
mock_get.return_value = mock_response
275+
276+
# Call the function under test
277+
result = search_toolkit.search_bing(query="test query", max_results=5)
278+
279+
# Expected output
280+
expected_output = {
281+
"results": [
282+
{
283+
"result_id": 1,
284+
"title": "Test Title 1",
285+
"snippet": "Test Snippet 1",
286+
"link": "https://example1.com",
287+
},
288+
{
289+
"result_id": 2,
290+
"title": "Test Title 2",
291+
"snippet": "Test Snippet 2",
292+
"link": "https://example2.com",
293+
},
294+
]
295+
}
296+
297+
# Assertions
298+
assert result == expected_output
299+
mock_get.assert_called_once_with(
300+
"https://cn.bing.com/search?q=test+query",
301+
headers={
302+
"User-Agent": (
303+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
304+
"AppleWebKit/537.36 (KHTML, like Gecko) "
305+
"Chrome/120.0.0.0 Safari/537.36"
306+
),
307+
},
308+
timeout=10,
309+
)
310+
311+
251312
@patch('requests.get')
252313
@patch('wolframalpha.Client')
253314
@patch('os.environ.get')

0 commit comments

Comments
 (0)