Skip to content

Commit 1a2f3c4

Browse files
authored
enhance: add scrapegraph-sdk integration PR2206 (camel-ai#2259)
1 parent cb44039 commit 1a2f3c4

File tree

6 files changed

+82
-45
lines changed

6 files changed

+82
-45
lines changed

camel/loaders/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
from .jina_url_reader import JinaURLReader
2121
from .mineru_extractor import MinerU
2222
from .pandas_reader import PandasReader
23+
from .scrapegraph_reader import ScrapeGraphAI
2324
from .unstructured_io import UnstructuredIO
2425

2526
__all__ = [
@@ -34,4 +35,5 @@
3435
'PandasReader',
3536
'MinerU',
3637
'Crawl4AI',
38+
'ScrapeGraphAI',
3739
]

camel/loaders/scrapegraph_reader.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ========= Copyright 2023-2025@ CAMEL-AI.org. All Rights Reserved. =========
1+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -10,20 +10,19 @@
1010
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
13-
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
13+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414

1515
import os
1616
from typing import Any, Dict, Optional
1717

18-
from pydantic import BaseModel
19-
2018

2119
class ScrapeGraphAI:
22-
r"""ScrapeGraphAI allows you to perform AI-powered web scraping and searching.
20+
r"""ScrapeGraphAI allows you to perform AI-powered web scraping and
21+
searching.
2322
2423
Args:
25-
api_key (Optional[str]): API key for authenticating with the ScrapeGraphAI
26-
API.
24+
api_key (Optional[str]): API key for authenticating with the
25+
ScrapeGraphAI API.
2726
2827
References:
2928
https://scrapegraph.ai/
@@ -50,7 +49,8 @@ def search(
5049
user_prompt (str): The search query or instructions.
5150
5251
Returns:
53-
Dict[str, Any]: The search results including answer and reference URLs.
52+
Dict[str, Any]: The search results including answer and reference
53+
URLs.
5454
5555
Raises:
5656
RuntimeError: If the search process fails.
@@ -72,8 +72,8 @@ def scrape(
7272
Args:
7373
website_url (str): The URL to scrape.
7474
user_prompt (str): Instructions for what data to extract.
75-
website_html (Optional[str]): Optional HTML content to use instead of
76-
fetching from the URL.
75+
website_html (Optional[str]): Optional HTML content to use instead
76+
of fetching from the URL.
7777
7878
Returns:
7979
Dict[str, Any]: The scraped data including request ID and result.
Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ========= Copyright 2023-2025@ CAMEL-AI.org. All Rights Reserved. =========
1+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -10,10 +10,11 @@
1010
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
13-
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
13+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414

1515
"""
16-
Example demonstrating how to use the ScrapeGraphAI reader for web scraping and searching.
16+
Example demonstrating how to use the ScrapeGraphAI reader for web scraping and
17+
searching.
1718
1819
This example shows:
1920
1. How to initialize the ScrapeGraphAI reader
@@ -23,38 +24,38 @@
2324
"""
2425

2526
import os
26-
from typing import Dict, Any
27+
from typing import Any, Dict
2728

2829
from camel.loaders.scrapegraph_reader import ScrapeGraphAI
2930

3031

3132
def search_example(api_key: str) -> Dict[str, Any]:
32-
"""Example of performing an AI-powered web search."""
33+
r"""Example of performing an AI-powered web search."""
3334
# Initialize the ScrapeGraphAI reader
3435
scraper = ScrapeGraphAI(api_key=api_key)
35-
36+
3637
try:
3738
# Perform a search
3839
search_query = "What are the latest developments in AI?"
3940
result = scraper.search(user_prompt=search_query)
40-
41+
4142
print("\nSearch Results:")
4243
print(f"Answer: {result.get('answer', 'No answer found')}")
4344
print("References:")
4445
for url in result.get('references', []):
4546
print(f"- {url}")
46-
47+
4748
return result
4849
finally:
4950
# Always close the connection
5051
scraper.close()
5152

5253

5354
def scrape_example(api_key: str) -> Dict[str, Any]:
54-
"""Example of scraping a website with specific instructions."""
55+
r"""Example of scraping a website with specific instructions."""
5556
# Initialize the ScrapeGraphAI reader
5657
scraper = ScrapeGraphAI(api_key=api_key)
57-
58+
5859
try:
5960
# Scrape a website with specific instructions
6061
website_url = "https://example.com"
@@ -64,17 +65,16 @@ def scrape_example(api_key: str) -> Dict[str, Any]:
6465
2. All paragraph texts
6566
3. Any links to other pages
6667
"""
67-
68+
6869
result = scraper.scrape(
69-
website_url=website_url,
70-
user_prompt=instructions
70+
website_url=website_url, user_prompt=instructions
7171
)
72-
72+
7373
print("\nScraping Results:")
7474
print(f"Request ID: {result.get('request_id', 'No ID')}")
7575
print("Extracted Data:")
7676
print(result.get('result', {}))
77-
77+
7878
return result
7979
finally:
8080
# Always close the connection
@@ -84,17 +84,17 @@ def scrape_example(api_key: str) -> Dict[str, Any]:
8484
def main():
8585
# Get API key from environment variable or use a placeholder
8686
api_key = os.environ.get("SCRAPEGRAPH_API_KEY", "your_api_key_here")
87-
87+
8888
if api_key == "your_api_key_here":
8989
print("Please set your SCRAPEGRAPH_API_KEY environment variable")
9090
return
91-
91+
9292
print("Running search example...")
9393
search_example(api_key)
94-
94+
9595
print("\nRunning scrape example...")
9696
scrape_example(api_key)
9797

9898

9999
if __name__ == "__main__":
100-
main()
100+
main()

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ owl = [
240240
"pandas>=1.5.3,<2",
241241
"rouge>=1.0.1,<2",
242242
"crawl4ai>=0.3.745",
243+
"scrapegraph-py>=1.12.0,<2",
243244
]
244245
all = [
245246
"numpy~=1.26",
@@ -352,6 +353,7 @@ all = [
352353
"crawl4ai>=0.3.745",
353354
"pyautogui>=0.9.54,<0.10",
354355
"pyobvector>=0.1.18",
356+
"scrapegraph-py>=1.12.0,<2",
355357
]
356358

357359
[project.urls]
@@ -538,6 +540,7 @@ module = [
538540
"crawl4ai.*",
539541
"pyautogui",
540542
"pyobvector.*",
543+
"scrapegraph_py.*",
541544
]
542545
ignore_missing_imports = true
543546

Lines changed: 26 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# ========= Copyright 2023-2025@ CAMEL-AI.org. All Rights Reserved. =========
1+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
22
# Licensed under the Apache License, Version 2.0 (the "License");
33
# you may not use this file except in compliance with the License.
44
# You may obtain a copy of the License at
@@ -10,7 +10,7 @@
1010
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1111
# See the License for the specific language governing permissions and
1212
# limitations under the License.
13-
# ========= Copyright 2023-2025 @ CAMEL-AI.org. All Rights Reserved. =========
13+
# ========= Copyright 2023-2024 @ CAMEL-AI.org. All Rights Reserved. =========
1414

1515
import os
1616
from unittest.mock import MagicMock, patch
@@ -22,21 +22,23 @@
2222

2323
@pytest.fixture
2424
def scrapegraph_ai():
25-
with patch("camel.loaders.scrapegraph_reader.Client") as mock_client:
25+
with patch("scrapegraph_py.Client") as mock_client:
2626
mock_client_instance = MagicMock()
2727
mock_client.return_value = mock_client_instance
2828
yield ScrapeGraphAI(api_key="test_api_key")
2929

3030

3131
def test_init_with_api_key():
32-
with patch("camel.loaders.scrapegraph_reader.Client") as mock_client:
32+
with patch("scrapegraph_py.Client") as mock_client:
3333
ScrapeGraphAI(api_key="test_api_key")
3434
mock_client.assert_called_once_with(api_key="test_api_key")
3535

3636

3737
def test_init_with_env_var():
38-
with patch("camel.loaders.scrapegraph_reader.Client") as mock_client, \
39-
patch.dict(os.environ, {"SCRAPEGRAPH_API_KEY": "env_api_key"}):
38+
with (
39+
patch("scrapegraph_py.Client") as mock_client,
40+
patch.dict(os.environ, {"SCRAPEGRAPH_API_KEY": "env_api_key"}),
41+
):
4042
ScrapeGraphAI()
4143
mock_client.assert_called_once_with(api_key="env_api_key")
4244

@@ -47,13 +49,19 @@ def test_search_success(scrapegraph_ai):
4749

4850
result = scrapegraph_ai.search("test query")
4951
assert result == mock_response
50-
scrapegraph_ai.client.searchscraper.assert_called_once_with(user_prompt="test query")
52+
scrapegraph_ai.client.searchscraper.assert_called_once_with(
53+
user_prompt="test query"
54+
)
5155

5256

5357
def test_search_failure(scrapegraph_ai):
54-
scrapegraph_ai.client.searchscraper.side_effect = Exception("Search failed")
58+
scrapegraph_ai.client.searchscraper.side_effect = Exception(
59+
"Search failed"
60+
)
5561

56-
with pytest.raises(RuntimeError, match="Failed to perform search: Search failed"):
62+
with pytest.raises(
63+
RuntimeError, match="Failed to perform search: Search failed"
64+
):
5765
scrapegraph_ai.search("test query")
5866

5967

@@ -63,13 +71,13 @@ def test_scrape_success(scrapegraph_ai):
6371

6472
result = scrapegraph_ai.scrape(
6573
website_url="https://example.com",
66-
user_prompt="Extract title and description"
74+
user_prompt="Extract title and description",
6775
)
6876
assert result == mock_response
6977
scrapegraph_ai.client.smartscraper.assert_called_once_with(
7078
website_url="https://example.com",
7179
user_prompt="Extract title and description",
72-
website_html=None
80+
website_html=None,
7381
)
7482

7583

@@ -80,26 +88,28 @@ def test_scrape_with_html(scrapegraph_ai):
8088
result = scrapegraph_ai.scrape(
8189
website_url="https://example.com",
8290
user_prompt="Extract title and description",
83-
website_html="<html>test</html>"
91+
website_html="<html>test</html>",
8492
)
8593
assert result == mock_response
8694
scrapegraph_ai.client.smartscraper.assert_called_once_with(
8795
website_url="https://example.com",
8896
user_prompt="Extract title and description",
89-
website_html="<html>test</html>"
97+
website_html="<html>test</html>",
9098
)
9199

92100

93101
def test_scrape_failure(scrapegraph_ai):
94102
scrapegraph_ai.client.smartscraper.side_effect = Exception("Scrape failed")
95103

96-
with pytest.raises(RuntimeError, match="Failed to perform scrape: Scrape failed"):
104+
with pytest.raises(
105+
RuntimeError, match="Failed to perform scrape: Scrape failed"
106+
):
97107
scrapegraph_ai.scrape(
98108
website_url="https://example.com",
99-
user_prompt="Extract title and description"
109+
user_prompt="Extract title and description",
100110
)
101111

102112

103113
def test_close(scrapegraph_ai):
104114
scrapegraph_ai.close()
105-
scrapegraph_ai.client.close.assert_called_once()
115+
scrapegraph_ai.client.close.assert_called_once()

uv.lock

Lines changed: 22 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)