unpaywall-mcp-server 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ # Your email address — required by the Unpaywall API as an identifier
2
+ # See https://unpaywall.org/products/api
3
+ UNPAYWALL_EMAIL=your-email@example.com
@@ -0,0 +1,9 @@
1
+ venv/
2
+ __pycache__/
3
+ *.pyc
4
+ .env
5
+ .claude/
6
+ dist/
7
+ build/
8
+ *.egg-info/
9
+ src/*.egg-info/
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 SMABoundless
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,103 @@
1
+ Metadata-Version: 2.4
2
+ Name: unpaywall-mcp-server
3
+ Version: 1.0.0
4
+ Summary: MCP server for the Unpaywall API — open-access availability for 120M+ scholarly articles
5
+ Project-URL: Homepage, https://github.com/SMABoundless/unpaywall-mcp-server
6
+ Project-URL: Repository, https://github.com/SMABoundless/unpaywall-mcp-server
7
+ Author: SMABoundless
8
+ License-Expression: MIT
9
+ License-File: LICENSE
10
+ Keywords: doi,mcp,open-access,scholarly,unpaywall
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Scientific/Engineering
16
+ Requires-Python: >=3.10
17
+ Requires-Dist: httpx>=0.27.0
18
+ Requires-Dist: mcp[cli]>=1.0.0
19
+ Description-Content-Type: text/markdown
20
+
21
+ # Unpaywall MCP Server
22
+
23
+ An MCP (Model Context Protocol) server for the [Unpaywall](https://unpaywall.org/) API — open-access availability for 120M+ scholarly articles.
24
+
25
+ Built with [FastMCP](https://github.com/modelcontextprotocol/python-sdk).
26
+
27
+ ## Tools
28
+
29
+ | Tool | Description |
30
+ |------|-------------|
31
+ | `unpaywall_lookup` | Look up open-access availability by DOI — returns OA status, PDF/landing page URLs, license, all OA locations |
32
+ | `unpaywall_search` | Search 120M+ article titles with OA filtering and pagination |
33
+ | `unpaywall_export_ris` | Export results as RIS (for Zotero, EndNote, etc.) |
34
+ | `unpaywall_export_bibtex` | Export results as BibTeX |
35
+
36
+ ## What you get
37
+
38
+ - Open-access status and OA type (gold, green, hybrid, bronze)
39
+ - Best OA location with direct PDF link
40
+ - All OA locations across repositories and publishers
41
+ - Journal OA status, ISSN, publisher info
42
+ - Author lists, publication dates, DOIs
43
+ - License information per location
44
+
45
+ ## Setup
46
+
47
+ ### 1. Get your email ready
48
+
49
+ Unpaywall requires an email address as your API identifier (no API key needed). See the [Unpaywall API docs](https://unpaywall.org/products/api).
50
+
51
+ ### 2. Install
52
+
53
+ ```bash
54
+ pip install unpaywall-mcp-server
55
+ ```
56
+
57
+ Or install from source:
58
+
59
+ ```bash
60
+ cd unpaywall-mcp-server
61
+ python3 -m venv venv
62
+ source venv/bin/activate
63
+ pip install -r requirements.txt
64
+ ```
65
+
66
+ ### 3. Add to Claude Desktop
67
+
68
+ Add this to your `claude_desktop_config.json`:
69
+
70
+ ```json
71
+ {
72
+ "mcpServers": {
73
+ "unpaywall": {
74
+ "command": "uvx",
75
+ "args": ["unpaywall-mcp-server"],
76
+ "env": {
77
+ "UNPAYWALL_EMAIL": "your-email@example.com"
78
+ }
79
+ }
80
+ }
81
+ }
82
+ ```
83
+
84
+ Or if using Claude Code CLI:
85
+
86
+ ```bash
87
+ claude mcp add unpaywall \
88
+ uvx unpaywall-mcp-server \
89
+ -e UNPAYWALL_EMAIL=your-email@example.com
90
+ ```
91
+
92
+ ## Usage examples
93
+
94
+ - "Is there an open-access version of DOI 10.1038/nature12373?"
95
+ - "Search Unpaywall for open-access papers on CRISPR gene editing"
96
+ - "Export these results as RIS for Zotero"
97
+ - "Find OA articles about climate change mitigation"
98
+
99
+ ## License
100
+
101
+ MIT
102
+
103
+ <!-- mcp-name: io.github.smaboundless/unpaywall -->
@@ -0,0 +1,83 @@
1
+ # Unpaywall MCP Server
2
+
3
+ An MCP (Model Context Protocol) server for the [Unpaywall](https://unpaywall.org/) API — open-access availability for 120M+ scholarly articles.
4
+
5
+ Built with [FastMCP](https://github.com/modelcontextprotocol/python-sdk).
6
+
7
+ ## Tools
8
+
9
+ | Tool | Description |
10
+ |------|-------------|
11
+ | `unpaywall_lookup` | Look up open-access availability by DOI — returns OA status, PDF/landing page URLs, license, all OA locations |
12
+ | `unpaywall_search` | Search 120M+ article titles with OA filtering and pagination |
13
+ | `unpaywall_export_ris` | Export results as RIS (for Zotero, EndNote, etc.) |
14
+ | `unpaywall_export_bibtex` | Export results as BibTeX |
15
+
16
+ ## What you get
17
+
18
+ - Open-access status and OA type (gold, green, hybrid, bronze)
19
+ - Best OA location with direct PDF link
20
+ - All OA locations across repositories and publishers
21
+ - Journal OA status, ISSN, publisher info
22
+ - Author lists, publication dates, DOIs
23
+ - License information per location
24
+
25
+ ## Setup
26
+
27
+ ### 1. Get your email ready
28
+
29
+ Unpaywall requires an email address as your API identifier (no API key needed). See the [Unpaywall API docs](https://unpaywall.org/products/api).
30
+
31
+ ### 2. Install
32
+
33
+ ```bash
34
+ pip install unpaywall-mcp-server
35
+ ```
36
+
37
+ Or install from source:
38
+
39
+ ```bash
40
+ cd unpaywall-mcp-server
41
+ python3 -m venv venv
42
+ source venv/bin/activate
43
+ pip install -r requirements.txt
44
+ ```
45
+
46
+ ### 3. Add to Claude Desktop
47
+
48
+ Add this to your `claude_desktop_config.json`:
49
+
50
+ ```json
51
+ {
52
+ "mcpServers": {
53
+ "unpaywall": {
54
+ "command": "uvx",
55
+ "args": ["unpaywall-mcp-server"],
56
+ "env": {
57
+ "UNPAYWALL_EMAIL": "your-email@example.com"
58
+ }
59
+ }
60
+ }
61
+ }
62
+ ```
63
+
64
+ Or if using Claude Code CLI:
65
+
66
+ ```bash
67
+ claude mcp add unpaywall \
68
+ uvx unpaywall-mcp-server \
69
+ -e UNPAYWALL_EMAIL=your-email@example.com
70
+ ```
71
+
72
+ ## Usage examples
73
+
74
+ - "Is there an open-access version of DOI 10.1038/nature12373?"
75
+ - "Search Unpaywall for open-access papers on CRISPR gene editing"
76
+ - "Export these results as RIS for Zotero"
77
+ - "Find OA articles about climate change mitigation"
78
+
79
+ ## License
80
+
81
+ MIT
82
+
83
+ <!-- mcp-name: io.github.smaboundless/unpaywall -->
@@ -0,0 +1,33 @@
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "unpaywall-mcp-server"
7
+ version = "1.0.0"
8
+ description = "MCP server for the Unpaywall API — open-access availability for 120M+ scholarly articles"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "SMABoundless" },
14
+ ]
15
+ keywords = ["mcp", "unpaywall", "open-access", "scholarly", "doi"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Science/Research",
19
+ "License :: OSI Approved :: MIT License",
20
+ "Programming Language :: Python :: 3",
21
+ "Topic :: Scientific/Engineering",
22
+ ]
23
+ dependencies = [
24
+ "mcp[cli]>=1.0.0",
25
+ "httpx>=0.27.0",
26
+ ]
27
+
28
+ [project.scripts]
29
+ unpaywall-mcp-server = "unpaywall_mcp_server:main"
30
+
31
+ [project.urls]
32
+ Homepage = "https://github.com/SMABoundless/unpaywall-mcp-server"
33
+ Repository = "https://github.com/SMABoundless/unpaywall-mcp-server"
@@ -0,0 +1,2 @@
1
+ mcp[cli]>=1.0.0
2
+ httpx>=0.27.0
@@ -0,0 +1,345 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unpaywall MCP Server for Claude Desktop
4
+
5
+ Provides Claude Desktop with tools to look up open-access availability
6
+ for scholarly articles, search by title, and export citations.
7
+
8
+ Unpaywall API docs: https://unpaywall.org/products/api
9
+ """
10
+
11
+ import os
12
+ from typing import Optional
13
+ from mcp.server.fastmcp import FastMCP
14
+ import httpx
15
+
16
+ # ── Configuration ──────────────────────────────────────────────────────────
17
+
18
+ EMAIL = os.environ.get("UNPAYWALL_EMAIL", "")
19
+ BASE_URL = "https://api.unpaywall.org/v2"
20
+
21
+ mcp = FastMCP("Unpaywall")
22
+
23
+
24
+ # ── Helpers ────────────────────────────────────────────────────────────────
25
+
26
+ async def _get(url: str, params: dict = None) -> dict:
27
+ """Make a GET request to the Unpaywall API."""
28
+ p = params or {}
29
+ p["email"] = EMAIL
30
+ async with httpx.AsyncClient(timeout=30) as client:
31
+ resp = await client.get(url, params=p)
32
+ resp.raise_for_status()
33
+ return resp.json()
34
+
35
+
36
+ def _get_authors(data: dict) -> str:
37
+ """Extract author names from an Unpaywall record."""
38
+ authors = data.get("z_authors") or []
39
+ names = []
40
+ for a in authors:
41
+ given = a.get("given", "")
42
+ family = a.get("family", "")
43
+ name = f"{family}, {given}".strip(", ") if family else given
44
+ if name:
45
+ names.append(name)
46
+ return "; ".join(names) if names else "Unknown"
47
+
48
+
49
+ def _format_location(loc: dict) -> str:
50
+ """Format a single OA location."""
51
+ host = loc.get("host_type", "")
52
+ url = loc.get("url_for_pdf") or loc.get("url_for_landing_page") or loc.get("url", "")
53
+ version = loc.get("version", "")
54
+ license_ = loc.get("license", "")
55
+ source = loc.get("evidence", "")
56
+
57
+ line = f" - [{host}]"
58
+ if version:
59
+ line += f" {version}"
60
+ if license_:
61
+ line += f" ({license_})"
62
+ if url:
63
+ line += f"\n {url}"
64
+ if source:
65
+ line += f"\n Evidence: {source}"
66
+ return line
67
+
68
+
69
+ def _format_result(i: int, data: dict) -> str:
70
+ """Format a single Unpaywall record as readable text."""
71
+ title = data.get("title", "Untitled")
72
+ authors = _get_authors(data)
73
+ year = data.get("year", "")
74
+ doi = data.get("doi", "")
75
+ journal = data.get("journal_name", "")
76
+ publisher = data.get("publisher", "")
77
+ is_oa = data.get("is_oa", False)
78
+ oa_status = data.get("oa_status", "")
79
+
80
+ line = f"{i}. {title}"
81
+ line += f"\n Authors: {authors}"
82
+ if journal:
83
+ line += f"\n Journal: {journal}"
84
+ if year:
85
+ line += f"\n Year: {year}"
86
+ if publisher:
87
+ line += f"\n Publisher: {publisher}"
88
+ if doi:
89
+ line += f"\n DOI: https://doi.org/{doi}"
90
+ line += f"\n Open Access: {'Yes' if is_oa else 'No'}"
91
+ if oa_status:
92
+ line += f" ({oa_status})"
93
+
94
+ best = data.get("best_oa_location")
95
+ if best:
96
+ pdf = best.get("url_for_pdf", "")
97
+ landing = best.get("url_for_landing_page", "")
98
+ license_ = best.get("license", "")
99
+ if pdf:
100
+ line += f"\n PDF: {pdf}"
101
+ elif landing:
102
+ line += f"\n URL: {landing}"
103
+ if license_:
104
+ line += f"\n License: {license_}"
105
+
106
+ return line
107
+
108
+
109
+ def _format_lookup(data: dict) -> str:
110
+ """Format a full DOI lookup response."""
111
+ title = data.get("title", "Untitled")
112
+ authors = _get_authors(data)
113
+ year = data.get("year", "")
114
+ doi = data.get("doi", "")
115
+ journal = data.get("journal_name", "")
116
+ publisher = data.get("publisher", "")
117
+ is_oa = data.get("is_oa", False)
118
+ oa_status = data.get("oa_status", "")
119
+ genre = data.get("genre", "")
120
+ published_date = data.get("published_date", "")
121
+ journal_is_oa = data.get("journal_is_oa", False)
122
+ journal_issns = data.get("journal_issns", "")
123
+
124
+ output = f"Title: {title}\n"
125
+ output += f"Authors: {authors}\n"
126
+ if journal:
127
+ output += f"Journal: {journal}"
128
+ if journal_issns:
129
+ output += f" (ISSN: {journal_issns})"
130
+ output += "\n"
131
+ if journal_is_oa:
132
+ output += "Journal is fully OA: Yes\n"
133
+ if year:
134
+ output += f"Year: {year}\n"
135
+ if published_date:
136
+ output += f"Published: {published_date}\n"
137
+ if genre:
138
+ output += f"Type: {genre}\n"
139
+ if publisher:
140
+ output += f"Publisher: {publisher}\n"
141
+ if doi:
142
+ output += f"DOI: https://doi.org/{doi}\n"
143
+
144
+ output += f"\nOpen Access: {'Yes' if is_oa else 'No'}"
145
+ if oa_status:
146
+ output += f" (status: {oa_status})"
147
+ output += "\n"
148
+
149
+ # Best OA location
150
+ best = data.get("best_oa_location")
151
+ if best:
152
+ output += f"\nBest OA Location:\n{_format_location(best)}\n"
153
+
154
+ # All OA locations
155
+ locations = data.get("oa_locations") or []
156
+ if len(locations) > 1:
157
+ output += f"\nAll OA Locations ({len(locations)}):\n"
158
+ for loc in locations:
159
+ output += f"{_format_location(loc)}\n"
160
+
161
+ return output
162
+
163
+
164
+ def _result_to_ris(data: dict) -> str:
165
+ """Convert an Unpaywall record to RIS format."""
166
+ genre = (data.get("genre") or "").lower()
167
+ ris_type = "JOUR" if "journal" in genre else "GEN"
168
+ lines = [f"TY - {ris_type}"]
169
+
170
+ if data.get("title"):
171
+ lines.append(f"TI - {data['title']}")
172
+
173
+ for a in (data.get("z_authors") or []):
174
+ given = a.get("given", "")
175
+ family = a.get("family", "")
176
+ name = f"{family}, {given}".strip(", ") if family else given
177
+ if name:
178
+ lines.append(f"AU - {name}")
179
+
180
+ if data.get("journal_name"):
181
+ lines.append(f"JO - {data['journal_name']}")
182
+ if data.get("year"):
183
+ lines.append(f"PY - {data['year']}")
184
+ if data.get("doi"):
185
+ lines.append(f"DO - {data['doi']}")
186
+ if data.get("publisher"):
187
+ lines.append(f"PB - {data['publisher']}")
188
+ if data.get("journal_issns"):
189
+ lines.append(f"SN - {data['journal_issns']}")
190
+
191
+ best = data.get("best_oa_location") or {}
192
+ url = best.get("url_for_pdf") or best.get("url_for_landing_page") or ""
193
+ if url:
194
+ lines.append(f"UR - {url}")
195
+
196
+ lines.append("ER - ")
197
+ return "\n".join(lines)
198
+
199
+
200
+ def _result_to_bibtex(data: dict) -> str:
201
+ """Convert an Unpaywall record to BibTeX format."""
202
+ genre = (data.get("genre") or "").lower()
203
+ bib_type = "article" if "journal" in genre else "misc"
204
+
205
+ authors_raw = data.get("z_authors") or []
206
+ first_family = "unknown"
207
+ if authors_raw and authors_raw[0].get("family"):
208
+ first_family = authors_raw[0]["family"].replace(" ", "")
209
+ year = data.get("year", "nd")
210
+ key = f"{first_family}{year}"
211
+
212
+ author_names = []
213
+ for a in authors_raw:
214
+ given = a.get("given", "")
215
+ family = a.get("family", "")
216
+ name = f"{family}, {given}".strip(", ") if family else given
217
+ if name:
218
+ author_names.append(name)
219
+
220
+ lines = [f"@{bib_type}{{{key},"]
221
+ if data.get("title"):
222
+ lines.append(f" title = {{{data['title']}}},")
223
+ if author_names:
224
+ lines.append(f" author = {{{' and '.join(author_names)}}},")
225
+ if data.get("journal_name"):
226
+ lines.append(f" journal = {{{data['journal_name']}}},")
227
+ if year != "nd":
228
+ lines.append(f" year = {{{year}}},")
229
+ if data.get("doi"):
230
+ lines.append(f" doi = {{{data['doi']}}},")
231
+ if data.get("publisher"):
232
+ lines.append(f" publisher = {{{data['publisher']}}},")
233
+ lines.append("}")
234
+ return "\n".join(lines)
235
+
236
+
237
+ # ── Store last results for export ─────────────────────────────────────────
238
+
239
+ _last_results: list = []
240
+
241
+
242
+ # ── Tools ─────────────────────────────────────────────────────────────────
243
+
244
+ @mcp.tool()
245
+ async def unpaywall_lookup(doi: str) -> str:
246
+ """
247
+ Look up open-access availability for a scholarly article by DOI.
248
+
249
+ Returns OA status, best OA location (PDF/landing page URL, license),
250
+ all OA locations, journal info, authors, and publication details.
251
+
252
+ Args:
253
+ doi: The DOI to look up (e.g. "10.1038/nature12373")
254
+ """
255
+ global _last_results
256
+ try:
257
+ data = await _get(f"{BASE_URL}/{doi}")
258
+ _last_results = [data]
259
+ return _format_lookup(data)
260
+ except httpx.HTTPStatusError as e:
261
+ if e.response.status_code == 404:
262
+ return f"DOI not found: {doi}"
263
+ return f"Unpaywall API error: {e.response.status_code} — {e.response.text}"
264
+ except Exception as e:
265
+ return f"Error: {str(e)}"
266
+
267
+
268
+ @mcp.tool()
269
+ async def unpaywall_search(
270
+ query: str,
271
+ is_oa: Optional[bool] = None,
272
+ page: int = 1,
273
+ ) -> str:
274
+ """
275
+ Search Unpaywall for scholarly articles by title.
276
+
277
+ Searches across 120M+ article titles. Supports AND (default),
278
+ quoted phrases, OR, and negation (-term). Returns up to 50 results per page.
279
+
280
+ Args:
281
+ query: Search terms for article titles (e.g. "machine learning")
282
+ is_oa: Filter by open-access status (true = OA only, false = closed only, omit for all)
283
+ page: Page number for pagination (50 results per page, default 1)
284
+ """
285
+ global _last_results
286
+ params = {"query": query, "page": page}
287
+ if is_oa is not None:
288
+ params["is_oa"] = str(is_oa).lower()
289
+
290
+ try:
291
+ data = await _get(f"{BASE_URL}/search/", params)
292
+ results = data.get("results", [])
293
+ elapsed = data.get("elapsed_seconds", "")
294
+
295
+ docs = [r.get("response", {}) for r in results]
296
+ _last_results = docs
297
+
298
+ if not docs:
299
+ return f"No results found for: {query}"
300
+
301
+ header = f"Unpaywall Search: {len(docs)} results (page {page})"
302
+ if elapsed:
303
+ header += f" in {elapsed:.2f}s"
304
+ header += f"\nQuery: {query}\n"
305
+ header += "=" * 60 + "\n\n"
306
+
307
+ formatted = "\n\n".join(
308
+ _format_result(i, doc) for i, doc in enumerate(docs, 1)
309
+ )
310
+ return header + formatted
311
+ except httpx.HTTPStatusError as e:
312
+ return f"Unpaywall API error: {e.response.status_code} — {e.response.text}"
313
+ except Exception as e:
314
+ return f"Error: {str(e)}"
315
+
316
+
317
+ @mcp.tool()
318
+ async def unpaywall_export_ris() -> str:
319
+ """
320
+ Export the most recent unpaywall_lookup or unpaywall_search results as RIS format.
321
+ Save output as a .ris file and import into Zotero: File -> Import.
322
+ """
323
+ if not _last_results:
324
+ return "No results to export. Run unpaywall_lookup or unpaywall_search first."
325
+ records = [_result_to_ris(doc) for doc in _last_results]
326
+ count = len(records)
327
+ return f"RIS Export ({count} records) — Save as .ris and import into Zotero:\n\n" + "\n\n".join(records)
328
+
329
+
330
+ @mcp.tool()
331
+ async def unpaywall_export_bibtex() -> str:
332
+ """
333
+ Export the most recent unpaywall_lookup or unpaywall_search results as BibTeX format.
334
+ """
335
+ if not _last_results:
336
+ return "No results to export. Run unpaywall_lookup or unpaywall_search first."
337
+ records = [_result_to_bibtex(doc) for doc in _last_results]
338
+ count = len(records)
339
+ return f"BibTeX Export ({count} records):\n\n" + "\n\n".join(records)
340
+
341
+
342
+ # ── Run ────────────────────────────────────────────────────────────────────
343
+
344
+ if __name__ == "__main__":
345
+ mcp.run()
@@ -0,0 +1,4 @@
1
+ from .server import mcp
2
+
3
+ def main():
4
+ mcp.run()
@@ -0,0 +1,345 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unpaywall MCP Server for Claude Desktop
4
+
5
+ Provides Claude Desktop with tools to look up open-access availability
6
+ for scholarly articles, search by title, and export citations.
7
+
8
+ Unpaywall API docs: https://unpaywall.org/products/api
9
+ """
10
+
11
+ import os
12
+ from typing import Optional
13
+ from mcp.server.fastmcp import FastMCP
14
+ import httpx
15
+
16
+ # ── Configuration ──────────────────────────────────────────────────────────
17
+
18
+ EMAIL = os.environ.get("UNPAYWALL_EMAIL", "")
19
+ BASE_URL = "https://api.unpaywall.org/v2"
20
+
21
+ mcp = FastMCP("Unpaywall")
22
+
23
+
24
+ # ── Helpers ────────────────────────────────────────────────────────────────
25
+
26
+ async def _get(url: str, params: dict = None) -> dict:
27
+ """Make a GET request to the Unpaywall API."""
28
+ p = params or {}
29
+ p["email"] = EMAIL
30
+ async with httpx.AsyncClient(timeout=30) as client:
31
+ resp = await client.get(url, params=p)
32
+ resp.raise_for_status()
33
+ return resp.json()
34
+
35
+
36
+ def _get_authors(data: dict) -> str:
37
+ """Extract author names from an Unpaywall record."""
38
+ authors = data.get("z_authors") or []
39
+ names = []
40
+ for a in authors:
41
+ given = a.get("given", "")
42
+ family = a.get("family", "")
43
+ name = f"{family}, {given}".strip(", ") if family else given
44
+ if name:
45
+ names.append(name)
46
+ return "; ".join(names) if names else "Unknown"
47
+
48
+
49
+ def _format_location(loc: dict) -> str:
50
+ """Format a single OA location."""
51
+ host = loc.get("host_type", "")
52
+ url = loc.get("url_for_pdf") or loc.get("url_for_landing_page") or loc.get("url", "")
53
+ version = loc.get("version", "")
54
+ license_ = loc.get("license", "")
55
+ source = loc.get("evidence", "")
56
+
57
+ line = f" - [{host}]"
58
+ if version:
59
+ line += f" {version}"
60
+ if license_:
61
+ line += f" ({license_})"
62
+ if url:
63
+ line += f"\n {url}"
64
+ if source:
65
+ line += f"\n Evidence: {source}"
66
+ return line
67
+
68
+
69
+ def _format_result(i: int, data: dict) -> str:
70
+ """Format a single Unpaywall record as readable text."""
71
+ title = data.get("title", "Untitled")
72
+ authors = _get_authors(data)
73
+ year = data.get("year", "")
74
+ doi = data.get("doi", "")
75
+ journal = data.get("journal_name", "")
76
+ publisher = data.get("publisher", "")
77
+ is_oa = data.get("is_oa", False)
78
+ oa_status = data.get("oa_status", "")
79
+
80
+ line = f"{i}. {title}"
81
+ line += f"\n Authors: {authors}"
82
+ if journal:
83
+ line += f"\n Journal: {journal}"
84
+ if year:
85
+ line += f"\n Year: {year}"
86
+ if publisher:
87
+ line += f"\n Publisher: {publisher}"
88
+ if doi:
89
+ line += f"\n DOI: https://doi.org/{doi}"
90
+ line += f"\n Open Access: {'Yes' if is_oa else 'No'}"
91
+ if oa_status:
92
+ line += f" ({oa_status})"
93
+
94
+ best = data.get("best_oa_location")
95
+ if best:
96
+ pdf = best.get("url_for_pdf", "")
97
+ landing = best.get("url_for_landing_page", "")
98
+ license_ = best.get("license", "")
99
+ if pdf:
100
+ line += f"\n PDF: {pdf}"
101
+ elif landing:
102
+ line += f"\n URL: {landing}"
103
+ if license_:
104
+ line += f"\n License: {license_}"
105
+
106
+ return line
107
+
108
+
109
+ def _format_lookup(data: dict) -> str:
110
+ """Format a full DOI lookup response."""
111
+ title = data.get("title", "Untitled")
112
+ authors = _get_authors(data)
113
+ year = data.get("year", "")
114
+ doi = data.get("doi", "")
115
+ journal = data.get("journal_name", "")
116
+ publisher = data.get("publisher", "")
117
+ is_oa = data.get("is_oa", False)
118
+ oa_status = data.get("oa_status", "")
119
+ genre = data.get("genre", "")
120
+ published_date = data.get("published_date", "")
121
+ journal_is_oa = data.get("journal_is_oa", False)
122
+ journal_issns = data.get("journal_issns", "")
123
+
124
+ output = f"Title: {title}\n"
125
+ output += f"Authors: {authors}\n"
126
+ if journal:
127
+ output += f"Journal: {journal}"
128
+ if journal_issns:
129
+ output += f" (ISSN: {journal_issns})"
130
+ output += "\n"
131
+ if journal_is_oa:
132
+ output += "Journal is fully OA: Yes\n"
133
+ if year:
134
+ output += f"Year: {year}\n"
135
+ if published_date:
136
+ output += f"Published: {published_date}\n"
137
+ if genre:
138
+ output += f"Type: {genre}\n"
139
+ if publisher:
140
+ output += f"Publisher: {publisher}\n"
141
+ if doi:
142
+ output += f"DOI: https://doi.org/{doi}\n"
143
+
144
+ output += f"\nOpen Access: {'Yes' if is_oa else 'No'}"
145
+ if oa_status:
146
+ output += f" (status: {oa_status})"
147
+ output += "\n"
148
+
149
+ # Best OA location
150
+ best = data.get("best_oa_location")
151
+ if best:
152
+ output += f"\nBest OA Location:\n{_format_location(best)}\n"
153
+
154
+ # All OA locations
155
+ locations = data.get("oa_locations") or []
156
+ if len(locations) > 1:
157
+ output += f"\nAll OA Locations ({len(locations)}):\n"
158
+ for loc in locations:
159
+ output += f"{_format_location(loc)}\n"
160
+
161
+ return output
162
+
163
+
164
+ def _result_to_ris(data: dict) -> str:
165
+ """Convert an Unpaywall record to RIS format."""
166
+ genre = (data.get("genre") or "").lower()
167
+ ris_type = "JOUR" if "journal" in genre else "GEN"
168
+ lines = [f"TY - {ris_type}"]
169
+
170
+ if data.get("title"):
171
+ lines.append(f"TI - {data['title']}")
172
+
173
+ for a in (data.get("z_authors") or []):
174
+ given = a.get("given", "")
175
+ family = a.get("family", "")
176
+ name = f"{family}, {given}".strip(", ") if family else given
177
+ if name:
178
+ lines.append(f"AU - {name}")
179
+
180
+ if data.get("journal_name"):
181
+ lines.append(f"JO - {data['journal_name']}")
182
+ if data.get("year"):
183
+ lines.append(f"PY - {data['year']}")
184
+ if data.get("doi"):
185
+ lines.append(f"DO - {data['doi']}")
186
+ if data.get("publisher"):
187
+ lines.append(f"PB - {data['publisher']}")
188
+ if data.get("journal_issns"):
189
+ lines.append(f"SN - {data['journal_issns']}")
190
+
191
+ best = data.get("best_oa_location") or {}
192
+ url = best.get("url_for_pdf") or best.get("url_for_landing_page") or ""
193
+ if url:
194
+ lines.append(f"UR - {url}")
195
+
196
+ lines.append("ER - ")
197
+ return "\n".join(lines)
198
+
199
+
200
+ def _result_to_bibtex(data: dict) -> str:
201
+ """Convert an Unpaywall record to BibTeX format."""
202
+ genre = (data.get("genre") or "").lower()
203
+ bib_type = "article" if "journal" in genre else "misc"
204
+
205
+ authors_raw = data.get("z_authors") or []
206
+ first_family = "unknown"
207
+ if authors_raw and authors_raw[0].get("family"):
208
+ first_family = authors_raw[0]["family"].replace(" ", "")
209
+ year = data.get("year", "nd")
210
+ key = f"{first_family}{year}"
211
+
212
+ author_names = []
213
+ for a in authors_raw:
214
+ given = a.get("given", "")
215
+ family = a.get("family", "")
216
+ name = f"{family}, {given}".strip(", ") if family else given
217
+ if name:
218
+ author_names.append(name)
219
+
220
+ lines = [f"@{bib_type}{{{key},"]
221
+ if data.get("title"):
222
+ lines.append(f" title = {{{data['title']}}},")
223
+ if author_names:
224
+ lines.append(f" author = {{{' and '.join(author_names)}}},")
225
+ if data.get("journal_name"):
226
+ lines.append(f" journal = {{{data['journal_name']}}},")
227
+ if year != "nd":
228
+ lines.append(f" year = {{{year}}},")
229
+ if data.get("doi"):
230
+ lines.append(f" doi = {{{data['doi']}}},")
231
+ if data.get("publisher"):
232
+ lines.append(f" publisher = {{{data['publisher']}}},")
233
+ lines.append("}")
234
+ return "\n".join(lines)
235
+
236
+
237
+ # ── Store last results for export ─────────────────────────────────────────
238
+
239
+ _last_results: list = []
240
+
241
+
242
+ # ── Tools ─────────────────────────────────────────────────────────────────
243
+
244
+ @mcp.tool()
245
+ async def unpaywall_lookup(doi: str) -> str:
246
+ """
247
+ Look up open-access availability for a scholarly article by DOI.
248
+
249
+ Returns OA status, best OA location (PDF/landing page URL, license),
250
+ all OA locations, journal info, authors, and publication details.
251
+
252
+ Args:
253
+ doi: The DOI to look up (e.g. "10.1038/nature12373")
254
+ """
255
+ global _last_results
256
+ try:
257
+ data = await _get(f"{BASE_URL}/{doi}")
258
+ _last_results = [data]
259
+ return _format_lookup(data)
260
+ except httpx.HTTPStatusError as e:
261
+ if e.response.status_code == 404:
262
+ return f"DOI not found: {doi}"
263
+ return f"Unpaywall API error: {e.response.status_code} — {e.response.text}"
264
+ except Exception as e:
265
+ return f"Error: {str(e)}"
266
+
267
+
268
+ @mcp.tool()
269
+ async def unpaywall_search(
270
+ query: str,
271
+ is_oa: Optional[bool] = None,
272
+ page: int = 1,
273
+ ) -> str:
274
+ """
275
+ Search Unpaywall for scholarly articles by title.
276
+
277
+ Searches across 120M+ article titles. Supports AND (default),
278
+ quoted phrases, OR, and negation (-term). Returns up to 50 results per page.
279
+
280
+ Args:
281
+ query: Search terms for article titles (e.g. "machine learning")
282
+ is_oa: Filter by open-access status (true = OA only, false = closed only, omit for all)
283
+ page: Page number for pagination (50 results per page, default 1)
284
+ """
285
+ global _last_results
286
+ params = {"query": query, "page": page}
287
+ if is_oa is not None:
288
+ params["is_oa"] = str(is_oa).lower()
289
+
290
+ try:
291
+ data = await _get(f"{BASE_URL}/search/", params)
292
+ results = data.get("results", [])
293
+ elapsed = data.get("elapsed_seconds", "")
294
+
295
+ docs = [r.get("response", {}) for r in results]
296
+ _last_results = docs
297
+
298
+ if not docs:
299
+ return f"No results found for: {query}"
300
+
301
+ header = f"Unpaywall Search: {len(docs)} results (page {page})"
302
+ if elapsed:
303
+ header += f" in {elapsed:.2f}s"
304
+ header += f"\nQuery: {query}\n"
305
+ header += "=" * 60 + "\n\n"
306
+
307
+ formatted = "\n\n".join(
308
+ _format_result(i, doc) for i, doc in enumerate(docs, 1)
309
+ )
310
+ return header + formatted
311
+ except httpx.HTTPStatusError as e:
312
+ return f"Unpaywall API error: {e.response.status_code} — {e.response.text}"
313
+ except Exception as e:
314
+ return f"Error: {str(e)}"
315
+
316
+
317
+ @mcp.tool()
318
+ async def unpaywall_export_ris() -> str:
319
+ """
320
+ Export the most recent unpaywall_lookup or unpaywall_search results as RIS format.
321
+ Save output as a .ris file and import into Zotero: File -> Import.
322
+ """
323
+ if not _last_results:
324
+ return "No results to export. Run unpaywall_lookup or unpaywall_search first."
325
+ records = [_result_to_ris(doc) for doc in _last_results]
326
+ count = len(records)
327
+ return f"RIS Export ({count} records) — Save as .ris and import into Zotero:\n\n" + "\n\n".join(records)
328
+
329
+
330
+ @mcp.tool()
331
+ async def unpaywall_export_bibtex() -> str:
332
+ """
333
+ Export the most recent unpaywall_lookup or unpaywall_search results as BibTeX format.
334
+ """
335
+ if not _last_results:
336
+ return "No results to export. Run unpaywall_lookup or unpaywall_search first."
337
+ records = [_result_to_bibtex(doc) for doc in _last_results]
338
+ count = len(records)
339
+ return f"BibTeX Export ({count} records):\n\n" + "\n\n".join(records)
340
+
341
+
342
+ # ── Run ────────────────────────────────────────────────────────────────────
343
+
344
+ if __name__ == "__main__":
345
+ mcp.run()