modelcontextprotocol · Ray0907 · Mar 22, 2025 · May 7, 2025
diff --git a/src/arxiv/README.md b/src/arxiv/README.md
@@ -0,0 +1,58 @@
+# mcp-arxiv
+
+A Model Context Protocol server that provides arXiv paper search and retrieval capabilities. This server enables LLMs to search for academic papers on arXiv and get cleaned titles, abstracts, and content without dealing with complex HTML parsing.
+
+### Available Tools
+
+- `search` - Search arXiv for papers matching a query.
+
+  - Required arguments:
+    - `query` (string): Search query for arXiv papers (e.g., 'LLM', 'transformer architecture')
+
+- `get` - Get the content of a specific arXiv paper.
+  - Required arguments:
+    - `url` (string): URL of the arXiv paper to retrieve
+
+## Installation
+
+### Using venv
+
+```bash
+cd mcp-arxiv
+sourve .venv/bin/activate
+pip install -r requirements.txt
+```
+
+### Using uv
+
+```bash
+cd mcp-arxiv
+uv venv .venv
+source .venv/bin/activate
+uv pip install -r requirements.txt
+```
+
+## Configuration
+
+### Configure for Claude.app
+
+Add to your Claude settings:
+
+<details>
+<summary>Using uv</summary>
+
+```json
+"mcpServers": {
+  "arxiv": {
+    "command": "uv",
+    "args": [
+        "--directory",
+        "ABSOLUTE_PROJECT_PATH",
+        "run",
+        "arxiv-server.py"
+      ]
+  }
+}
+```
+
+</details>
diff --git a/src/arxiv/arxiv-server.py b/src/arxiv/arxiv-server.py
@@ -0,0 +1,47 @@
+
+import urllib
+from mcp.server.fastmcp import FastMCP
+import requests
+from bs4 import BeautifulSoup
+
+mcp = FastMCP('arXiv-server')
+
+URL ='https://arxiv.org'
+@mcp.tool()
+def search(query):
+	"""
+	Search Arxiv for the given query
+	"""
+	query = urllib.parse.quote_plus(query)
+	res = requests.get(f"""{URL}/search/?query={query}&searchtype=all&abstracts=show&order=-announced_date_first&size=50""")
+	soup = BeautifulSoup(res.text, 'html.parser')
+	items = soup.select('.arxiv-result')
+	data = []
+	for item in items:
+		title = item.select('.title')[0].text
+		title = title.replace('\n', ' ')
+		title = title.strip()
+		title = ' '.join(title.split())
+		abstract = item.select('.abstract')[0].text
+		abstract = abstract.replace('\n', ' ')
+		abstract = ' '.join(abstract.split())
+		url = item.select('.list-title > span > a')[0].get('href')
+		data.append(
+			{
+				'title': title,
+				'abstract': abstract,
+				'url': url
+			}
+		)
+	return data
+@mcp.tool()
+def get(url):
+	"""
+	Get the content of the given URL from arxiv.
+	"""
+	url_prefix = 'https://r.jina.ai/'
+	res = requests.get(url_prefix + url)
+	return res.text
+if __name__ == "__main__":
+	# Initialize and run the server
+	mcp.run(transport='stdio')
diff --git a/src/arxiv/requirements.txt b/src/arxiv/requirements.txt
@@ -0,0 +1,33 @@
+annotated-types==0.7.0
+anyio==4.9.0
+beautifulsoup4==4.13.3
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+h11==0.14.0
+httpcore==1.0.7
+httpx==0.28.1
+httpx-sse==0.4.0
+idna==3.10
+markdown-it-py==3.0.0
+mcp==1.5.0
+mdurl==0.1.2
+pip==25.0.1
+pipp==0.0.1
+pydantic==2.10.6
+pydantic-core==2.27.2
+pydantic-settings==2.8.1
+pygments==2.19.1
+python-dotenv==1.0.1
+requests==2.32.3
+rich==13.9.4
+shellingham==1.5.4
+sniffio==1.3.1
+soupsieve==2.6
+sse-starlette==2.2.1
+starlette==0.46.1
+typer==0.15.2
+typing-extensions==4.12.2
+urllib3==2.3.0
+uv==0.6.9
+uvicorn==0.34.0