tooluniverse 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +56 -5
- tooluniverse/agentic_tool.py +90 -14
- tooluniverse/arxiv_tool.py +113 -0
- tooluniverse/biorxiv_tool.py +97 -0
- tooluniverse/core_tool.py +153 -0
- tooluniverse/crossref_tool.py +73 -0
- tooluniverse/data/agentic_tools.json +2 -2
- tooluniverse/data/arxiv_tools.json +87 -0
- tooluniverse/data/biorxiv_tools.json +70 -0
- tooluniverse/data/core_tools.json +105 -0
- tooluniverse/data/crossref_tools.json +70 -0
- tooluniverse/data/dblp_tools.json +73 -0
- tooluniverse/data/doaj_tools.json +94 -0
- tooluniverse/data/fatcat_tools.json +72 -0
- tooluniverse/data/hal_tools.json +70 -0
- tooluniverse/data/medrxiv_tools.json +70 -0
- tooluniverse/data/odphp_tools.json +354 -0
- tooluniverse/data/openaire_tools.json +85 -0
- tooluniverse/data/osf_preprints_tools.json +77 -0
- tooluniverse/data/pmc_tools.json +109 -0
- tooluniverse/data/pubmed_tools.json +65 -0
- tooluniverse/data/unpaywall_tools.json +86 -0
- tooluniverse/data/wikidata_sparql_tools.json +42 -0
- tooluniverse/data/zenodo_tools.json +82 -0
- tooluniverse/dblp_tool.py +62 -0
- tooluniverse/default_config.py +18 -0
- tooluniverse/doaj_tool.py +124 -0
- tooluniverse/execute_function.py +70 -9
- tooluniverse/fatcat_tool.py +66 -0
- tooluniverse/hal_tool.py +77 -0
- tooluniverse/llm_clients.py +487 -0
- tooluniverse/mcp_tool_registry.py +3 -3
- tooluniverse/medrxiv_tool.py +97 -0
- tooluniverse/odphp_tool.py +226 -0
- tooluniverse/openaire_tool.py +145 -0
- tooluniverse/osf_preprints_tool.py +67 -0
- tooluniverse/pmc_tool.py +181 -0
- tooluniverse/pubmed_tool.py +110 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +2 -2
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +2 -2
- tooluniverse/smcp.py +313 -191
- tooluniverse/smcp_server.py +4 -7
- tooluniverse/test/test_claude_sdk.py +93 -0
- tooluniverse/test/test_odphp_tool.py +166 -0
- tooluniverse/test/test_openrouter_client.py +288 -0
- tooluniverse/test/test_stdio_hooks.py +1 -1
- tooluniverse/test/test_tool_finder.py +1 -1
- tooluniverse/unpaywall_tool.py +63 -0
- tooluniverse/wikidata_sparql_tool.py +61 -0
- tooluniverse/zenodo_tool.py +74 -0
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +101 -74
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +56 -19
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
- tooluniverse-1.0.6.dist-info/licenses/LICENSE +201 -0
- tooluniverse-1.0.4.dist-info/licenses/LICENSE +0 -21
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Dict, Any, Optional, List
|
|
4
|
+
from .base_tool import BaseTool
|
|
5
|
+
from .tool_registry import register_tool
|
|
6
|
+
|
|
7
|
+
# Optional but recommended: text extraction for HTML
|
|
8
|
+
try:
|
|
9
|
+
from bs4 import BeautifulSoup # pip install beautifulsoup4
|
|
10
|
+
except ImportError:
|
|
11
|
+
BeautifulSoup = None # We’ll guard uses so the tool still loads
|
|
12
|
+
|
|
13
|
+
ODPHP_BASE_URL = "https://odphp.health.gov/myhealthfinder/api/v4"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ODPHPRESTTool(BaseTool):
|
|
17
|
+
"""Base class for ODPHP (MyHealthfinder) REST API tools."""
|
|
18
|
+
|
|
19
|
+
def __init__(self, tool_config):
|
|
20
|
+
super().__init__(tool_config)
|
|
21
|
+
self.endpoint = tool_config["fields"]["endpoint"]
|
|
22
|
+
|
|
23
|
+
def _make_request(self, params: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
24
|
+
url = f"{ODPHP_BASE_URL}{self.endpoint}"
|
|
25
|
+
try:
|
|
26
|
+
resp = requests.get(url, params=params, timeout=30)
|
|
27
|
+
resp.raise_for_status()
|
|
28
|
+
data = resp.json()
|
|
29
|
+
return {
|
|
30
|
+
"data": data.get("Result"),
|
|
31
|
+
"metadata": {
|
|
32
|
+
"source": "ODPHP MyHealthfinder",
|
|
33
|
+
"endpoint": url,
|
|
34
|
+
"query": params,
|
|
35
|
+
},
|
|
36
|
+
}
|
|
37
|
+
except requests.exceptions.RequestException as e:
|
|
38
|
+
return {"error": f"Request failed: {str(e)}"}
|
|
39
|
+
except ValueError as e:
|
|
40
|
+
return {"error": f"Failed to parse JSON: {str(e)}"}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def _sections_array(resource: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|
44
|
+
"""
|
|
45
|
+
Tolerant accessor for the sections array.
|
|
46
|
+
Data sometimes uses Sections.Section (capital S) and sometimes Sections.section (lowercase).
|
|
47
|
+
"""
|
|
48
|
+
sect = resource.get("Sections") or {}
|
|
49
|
+
arr = sect.get("Section")
|
|
50
|
+
if not isinstance(arr, list):
|
|
51
|
+
arr = sect.get("section")
|
|
52
|
+
return arr if isinstance(arr, list) else []
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _strip_html_to_text(html: str) -> str:
|
|
56
|
+
if not html:
|
|
57
|
+
return ""
|
|
58
|
+
if BeautifulSoup is None:
|
|
59
|
+
# fallback: very light tag remover
|
|
60
|
+
text = re.sub(r"<[^>]+>", " ", html)
|
|
61
|
+
return re.sub(r"\s+", " ", text).strip()
|
|
62
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
63
|
+
# remove scripts/styles
|
|
64
|
+
for t in soup(["script", "style", "noscript"]):
|
|
65
|
+
t.decompose()
|
|
66
|
+
text = soup.get_text("\n", strip=True)
|
|
67
|
+
text = re.sub(r"\n{2,}", "\n\n", text)
|
|
68
|
+
return text.strip()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@register_tool("ODPHPMyHealthfinder")
|
|
72
|
+
class ODPHPMyHealthfinder(ODPHPRESTTool):
|
|
73
|
+
"""Search for demographic-specific health recommendations (MyHealthfinder)."""
|
|
74
|
+
|
|
75
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
76
|
+
params: Dict[str, Any] = {}
|
|
77
|
+
if "lang" in arguments:
|
|
78
|
+
params["lang"] = arguments["lang"]
|
|
79
|
+
if "age" in arguments:
|
|
80
|
+
params["age"] = arguments["age"]
|
|
81
|
+
if "sex" in arguments:
|
|
82
|
+
params["sex"] = arguments["sex"]
|
|
83
|
+
if "pregnant" in arguments:
|
|
84
|
+
params["pregnant"] = arguments["pregnant"]
|
|
85
|
+
|
|
86
|
+
res = self._make_request(params)
|
|
87
|
+
|
|
88
|
+
# Optional: attach PlainSections if requested
|
|
89
|
+
if isinstance(res, dict) and not res.get("error") and arguments.get("strip_html"):
|
|
90
|
+
data = res.get("data") or {}
|
|
91
|
+
resources = (((data.get("Resources") or {}).get("All") or {}).get("Resource")) or []
|
|
92
|
+
if isinstance(resources, list):
|
|
93
|
+
for r in resources:
|
|
94
|
+
plain = []
|
|
95
|
+
for sec in _sections_array(r):
|
|
96
|
+
plain.append({
|
|
97
|
+
"Title": sec.get("Title", ""),
|
|
98
|
+
"PlainContent": _strip_html_to_text(sec.get("Content", "")),
|
|
99
|
+
})
|
|
100
|
+
if plain:
|
|
101
|
+
r["PlainSections"] = plain
|
|
102
|
+
return res
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
@register_tool("ODPHPItemList")
|
|
106
|
+
class ODPHPItemList(ODPHPRESTTool):
|
|
107
|
+
"""Retrieve list of topics or categories."""
|
|
108
|
+
|
|
109
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
110
|
+
params: Dict[str, Any] = {}
|
|
111
|
+
if "lang" in arguments:
|
|
112
|
+
params["lang"] = arguments["lang"]
|
|
113
|
+
if "type" in arguments:
|
|
114
|
+
params["type"] = arguments["type"]
|
|
115
|
+
return self._make_request(params)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@register_tool("ODPHPTopicSearch")
|
|
119
|
+
class ODPHPTopicSearch(ODPHPRESTTool):
|
|
120
|
+
"""Search for health topics by ID, category, or keyword."""
|
|
121
|
+
|
|
122
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
123
|
+
params: Dict[str, Any] = {}
|
|
124
|
+
if "lang" in arguments:
|
|
125
|
+
params["lang"] = arguments["lang"]
|
|
126
|
+
if "topicId" in arguments:
|
|
127
|
+
params["topicId"] = arguments["topicId"]
|
|
128
|
+
if "categoryId" in arguments:
|
|
129
|
+
params["categoryId"] = arguments["categoryId"]
|
|
130
|
+
if "keyword" in arguments:
|
|
131
|
+
params["keyword"] = arguments["keyword"]
|
|
132
|
+
|
|
133
|
+
res = self._make_request(params)
|
|
134
|
+
|
|
135
|
+
# Optional: attach PlainSections if requested
|
|
136
|
+
if isinstance(res, dict) and not res.get("error") and arguments.get("strip_html"):
|
|
137
|
+
data = res.get("data") or {}
|
|
138
|
+
resources = ((data.get("Resources") or {}).get("Resource")) or []
|
|
139
|
+
if isinstance(resources, list):
|
|
140
|
+
for r in resources:
|
|
141
|
+
plain = []
|
|
142
|
+
for sec in _sections_array(r):
|
|
143
|
+
plain.append({
|
|
144
|
+
"Title": sec.get("Title", ""),
|
|
145
|
+
"PlainContent": _strip_html_to_text(sec.get("Content", "")),
|
|
146
|
+
})
|
|
147
|
+
if plain:
|
|
148
|
+
r["PlainSections"] = plain
|
|
149
|
+
return res
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
@register_tool("ODPHPOutlinkFetch")
|
|
153
|
+
class ODPHPOutlinkFetch(BaseTool):
|
|
154
|
+
"""
|
|
155
|
+
Fetch article pages referenced by AccessibleVersion / RelatedItems.Url and return readable text.
|
|
156
|
+
- HTML: extracts main/article/body text; strips nav/aside/footer/script/style.
|
|
157
|
+
- PDF or non-HTML: returns metadata + URL so the agent can surface it.
|
|
158
|
+
"""
|
|
159
|
+
|
|
160
|
+
def __init__(self, tool_config):
|
|
161
|
+
super().__init__(tool_config)
|
|
162
|
+
self.timeout = 30
|
|
163
|
+
|
|
164
|
+
def _extract_text(self, html: str) -> Dict[str, str]:
|
|
165
|
+
if BeautifulSoup is None:
|
|
166
|
+
# fallback: crude extraction
|
|
167
|
+
title = ""
|
|
168
|
+
# attempt to find <title>
|
|
169
|
+
m = re.search(r"<title[^>]*>(.*?)</title>", html, flags=re.I | re.S)
|
|
170
|
+
if m:
|
|
171
|
+
title = re.sub(r"\s+", " ", m.group(1)).strip()
|
|
172
|
+
text = re.sub(r"<[^>]+>", " ", html)
|
|
173
|
+
text = re.sub(r"\s+", " ", text).strip()
|
|
174
|
+
return {"title": title, "text": text}
|
|
175
|
+
|
|
176
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
177
|
+
# remove non-content
|
|
178
|
+
for tag in soup(["script", "style", "noscript", "footer", "nav", "aside"]):
|
|
179
|
+
tag.decompose()
|
|
180
|
+
|
|
181
|
+
candidate = soup.find("main") or soup.find("article") or soup.body or soup
|
|
182
|
+
title = ""
|
|
183
|
+
# prefer main/article heading, else <title>
|
|
184
|
+
h = candidate.find(["h1", "h2"]) if candidate else None
|
|
185
|
+
if h:
|
|
186
|
+
title = h.get_text(" ", strip=True)
|
|
187
|
+
elif soup.title and soup.title.string:
|
|
188
|
+
title = soup.title.string.strip()
|
|
189
|
+
|
|
190
|
+
text = candidate.get_text("\n", strip=True) if candidate else soup.get_text("\n", strip=True)
|
|
191
|
+
text = re.sub(r"\n{2,}", "\n\n", text)
|
|
192
|
+
return {"title": title, "text": text}
|
|
193
|
+
|
|
194
|
+
def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
|
|
195
|
+
urls: List[str] = arguments.get("urls", [])
|
|
196
|
+
max_chars: Optional[int] = arguments.get("max_chars")
|
|
197
|
+
return_html: bool = bool(arguments.get("return_html", False))
|
|
198
|
+
|
|
199
|
+
if not urls or not isinstance(urls, list):
|
|
200
|
+
return {"error": "Missing required parameter 'urls' (array of 1–3 URLs)."}
|
|
201
|
+
|
|
202
|
+
out: List[Dict[str, Any]] = []
|
|
203
|
+
for u in urls[:3]:
|
|
204
|
+
try:
|
|
205
|
+
resp = requests.get(u, timeout=self.timeout, allow_redirects=True)
|
|
206
|
+
ct = resp.headers.get("Content-Type", "")
|
|
207
|
+
item: Dict[str, Any] = {"url": u, "status": resp.status_code, "content_type": ct}
|
|
208
|
+
|
|
209
|
+
if "text/html" in ct or (not ct and resp.text.startswith("<!")):
|
|
210
|
+
ex = self._extract_text(resp.text)
|
|
211
|
+
if isinstance(max_chars, int) and max_chars > 0:
|
|
212
|
+
ex["text"] = ex["text"][:max_chars]
|
|
213
|
+
item.update(ex)
|
|
214
|
+
if return_html:
|
|
215
|
+
item["html"] = resp.text
|
|
216
|
+
elif "pdf" in ct or u.lower().endswith(".pdf"):
|
|
217
|
+
item["title"] = "(PDF Document)"
|
|
218
|
+
item["text"] = f"[PDF file: {u}]"
|
|
219
|
+
else:
|
|
220
|
+
item["title"] = ""
|
|
221
|
+
item["text"] = ""
|
|
222
|
+
out.append(item)
|
|
223
|
+
except requests.exceptions.RequestException as e:
|
|
224
|
+
out.append({"url": u, "status": 0, "content_type": "", "title": "", "text": "", "error": str(e)})
|
|
225
|
+
|
|
226
|
+
return {"results": out, "metadata": {"source": "ODPHP OutlinkFetch"}}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("OpenAIRETool")
|
|
7
|
+
class OpenAIRETool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search OpenAIRE Explore for research products (publications by default).
|
|
10
|
+
|
|
11
|
+
Parameters (arguments):
|
|
12
|
+
query (str): Query string
|
|
13
|
+
max_results (int): Max number of results (default 10, max 100)
|
|
14
|
+
type (str): product type filter: publications | datasets | software
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://api.openaire.eu/search/publications"
|
|
20
|
+
|
|
21
|
+
def run(self, arguments=None):
|
|
22
|
+
arguments = arguments or {}
|
|
23
|
+
query = arguments.get("query")
|
|
24
|
+
max_results = int(arguments.get("max_results", 10))
|
|
25
|
+
prod_type = arguments.get("type", "publications")
|
|
26
|
+
|
|
27
|
+
if not query:
|
|
28
|
+
return {"error": "`query` parameter is required."}
|
|
29
|
+
|
|
30
|
+
endpoint = self._endpoint_for_type(prod_type)
|
|
31
|
+
if endpoint is None:
|
|
32
|
+
return {
|
|
33
|
+
"error": (
|
|
34
|
+
"Unsupported type. Use publications/datasets/software."
|
|
35
|
+
),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
params = {
|
|
39
|
+
"format": "json",
|
|
40
|
+
"size": max(1, min(max_results, 100)),
|
|
41
|
+
"query": query,
|
|
42
|
+
}
|
|
43
|
+
try:
|
|
44
|
+
resp = requests.get(endpoint, params=params, timeout=20)
|
|
45
|
+
resp.raise_for_status()
|
|
46
|
+
data = resp.json()
|
|
47
|
+
except requests.RequestException as e:
|
|
48
|
+
return {
|
|
49
|
+
"error": "Network/API error calling OpenAIRE",
|
|
50
|
+
"reason": str(e),
|
|
51
|
+
}
|
|
52
|
+
except ValueError:
|
|
53
|
+
return {"error": "Failed to decode OpenAIRE response as JSON"}
|
|
54
|
+
|
|
55
|
+
return self._normalize(data, prod_type)
|
|
56
|
+
|
|
57
|
+
def _endpoint_for_type(self, prod_type):
|
|
58
|
+
if prod_type == "publications":
|
|
59
|
+
return "https://api.openaire.eu/search/publications"
|
|
60
|
+
if prod_type == "datasets":
|
|
61
|
+
return "https://api.openaire.eu/search/datasets"
|
|
62
|
+
if prod_type == "software":
|
|
63
|
+
return "https://api.openaire.eu/search/software"
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _normalize(self, data, prod_type):
|
|
67
|
+
results = []
|
|
68
|
+
# OpenAIRE JSON has a root 'response' with 'results' → 'result' list
|
|
69
|
+
try:
|
|
70
|
+
items = (
|
|
71
|
+
data.get("response", {})
|
|
72
|
+
.get("results", {})
|
|
73
|
+
.get("result", [])
|
|
74
|
+
)
|
|
75
|
+
except Exception:
|
|
76
|
+
items = []
|
|
77
|
+
|
|
78
|
+
for it in items:
|
|
79
|
+
# header may contain identifiers, not used presently
|
|
80
|
+
_ = (
|
|
81
|
+
it.get("header", {})
|
|
82
|
+
if isinstance(it.get("header"), dict)
|
|
83
|
+
else {}
|
|
84
|
+
)
|
|
85
|
+
metadata = (
|
|
86
|
+
it.get("metadata", {})
|
|
87
|
+
if isinstance(it.get("metadata"), dict)
|
|
88
|
+
else {}
|
|
89
|
+
)
|
|
90
|
+
title = None
|
|
91
|
+
authors = []
|
|
92
|
+
year = None
|
|
93
|
+
doi = None
|
|
94
|
+
url = None
|
|
95
|
+
|
|
96
|
+
# Titles can be nested in 'oaf:result' structure
|
|
97
|
+
result_obj = metadata.get("oaf:result", {})
|
|
98
|
+
if isinstance(result_obj, dict):
|
|
99
|
+
t = result_obj.get("title")
|
|
100
|
+
if isinstance(t, list) and t:
|
|
101
|
+
title = t[0].get("$")
|
|
102
|
+
elif isinstance(t, dict):
|
|
103
|
+
title = t.get("$")
|
|
104
|
+
|
|
105
|
+
# Authors
|
|
106
|
+
creators = result_obj.get("creator", [])
|
|
107
|
+
if isinstance(creators, list):
|
|
108
|
+
for c in creators:
|
|
109
|
+
name = c.get("$")
|
|
110
|
+
if name:
|
|
111
|
+
authors.append(name)
|
|
112
|
+
|
|
113
|
+
# Year
|
|
114
|
+
date_obj = (
|
|
115
|
+
result_obj.get("dateofacceptance")
|
|
116
|
+
or result_obj.get("date")
|
|
117
|
+
)
|
|
118
|
+
if isinstance(date_obj, dict):
|
|
119
|
+
year = date_obj.get("year") or date_obj.get("$")
|
|
120
|
+
|
|
121
|
+
# DOI and URL
|
|
122
|
+
pid = result_obj.get("pid", [])
|
|
123
|
+
if isinstance(pid, list):
|
|
124
|
+
for p in pid:
|
|
125
|
+
if p.get("@classid") == "doi":
|
|
126
|
+
doi = p.get("$")
|
|
127
|
+
bestaccessright = result_obj.get("bestaccessright", {})
|
|
128
|
+
if isinstance(bestaccessright, dict):
|
|
129
|
+
url_value = bestaccessright.get("$")
|
|
130
|
+
if url_value:
|
|
131
|
+
url = url_value
|
|
132
|
+
|
|
133
|
+
results.append(
|
|
134
|
+
{
|
|
135
|
+
"title": title,
|
|
136
|
+
"authors": authors,
|
|
137
|
+
"year": year,
|
|
138
|
+
"doi": doi,
|
|
139
|
+
"url": url,
|
|
140
|
+
"type": prod_type,
|
|
141
|
+
"source": "OpenAIRE",
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return results
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("OSFPreprintsTool")
|
|
7
|
+
class OSFPreprintsTool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search OSF Preprints via OSF API v2 filters.
|
|
10
|
+
|
|
11
|
+
Parameters (arguments):
|
|
12
|
+
query (str): Query string
|
|
13
|
+
max_results (int): Max results (default 10, max 100)
|
|
14
|
+
provider (str): Optional preprint provider (e.g., 'osf', 'psyarxiv')
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://api.osf.io/v2/preprints/"
|
|
20
|
+
|
|
21
|
+
def run(self, arguments=None):
|
|
22
|
+
arguments = arguments or {}
|
|
23
|
+
query = arguments.get("query")
|
|
24
|
+
max_results = int(arguments.get("max_results", 10))
|
|
25
|
+
provider = arguments.get("provider")
|
|
26
|
+
|
|
27
|
+
if not query:
|
|
28
|
+
return {"error": "`query` parameter is required."}
|
|
29
|
+
|
|
30
|
+
params = {
|
|
31
|
+
"page[size]": max(1, min(max_results, 100)),
|
|
32
|
+
"filter[title]": query,
|
|
33
|
+
}
|
|
34
|
+
if provider:
|
|
35
|
+
params["filter[provider]"] = provider
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
resp = requests.get(self.base_url, params=params, timeout=20)
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
data = resp.json()
|
|
41
|
+
except requests.RequestException as e:
|
|
42
|
+
return {"error": "Network/API error calling OSF", "reason": str(e)}
|
|
43
|
+
except ValueError:
|
|
44
|
+
return {"error": "Failed to decode OSF response as JSON"}
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
for item in data.get("data", []):
|
|
48
|
+
attrs = item.get("attributes", {})
|
|
49
|
+
title = attrs.get("title")
|
|
50
|
+
date_published = attrs.get("date_published")
|
|
51
|
+
is_published = attrs.get("is_published")
|
|
52
|
+
doi = attrs.get("doi")
|
|
53
|
+
links_obj = item.get("links", {})
|
|
54
|
+
url = links_obj.get("html") or links_obj.get("self")
|
|
55
|
+
|
|
56
|
+
results.append(
|
|
57
|
+
{
|
|
58
|
+
"title": title,
|
|
59
|
+
"date_published": date_published,
|
|
60
|
+
"published": is_published,
|
|
61
|
+
"doi": doi,
|
|
62
|
+
"url": url,
|
|
63
|
+
"source": "OSF Preprints",
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return results
|
tooluniverse/pmc_tool.py
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
PMC (PubMed Central) Tool for searching full-text biomedical literature.
|
|
4
|
+
|
|
5
|
+
PMC is the free full-text archive of biomedical and life sciences journal
|
|
6
|
+
literature at the U.S. National Institutes of Health's National Library of
|
|
7
|
+
Medicine. This tool provides access to millions of full-text articles.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from typing import Dict, List, Any, Optional
|
|
12
|
+
from .base_tool import BaseTool
|
|
13
|
+
from .tool_registry import register_tool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@register_tool("PMCTool")
|
|
17
|
+
class PMCTool(BaseTool):
|
|
18
|
+
"""Tool for searching PMC full-text biomedical literature."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, tool_config=None):
|
|
21
|
+
super().__init__(tool_config)
|
|
22
|
+
self.base_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
|
|
23
|
+
self.session = requests.Session()
|
|
24
|
+
self.session.headers.update({
|
|
25
|
+
'User-Agent': 'ToolUniverse/1.0',
|
|
26
|
+
'Accept': 'application/json'
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
def _search(self, query: str, limit: int = 10,
|
|
30
|
+
date_from: Optional[str] = None,
|
|
31
|
+
date_to: Optional[str] = None,
|
|
32
|
+
article_type: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
33
|
+
"""
|
|
34
|
+
Search for papers using PMC API.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
query: Search query
|
|
38
|
+
limit: Maximum number of results
|
|
39
|
+
date_from: Start date filter (YYYY/MM/DD)
|
|
40
|
+
date_to: End date filter (YYYY/MM/DD)
|
|
41
|
+
article_type: Article type filter (e.g., 'research-article', 'review')
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of paper dictionaries
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
# Step 1: Search PMC for article IDs
|
|
48
|
+
search_params = {
|
|
49
|
+
'db': 'pmc',
|
|
50
|
+
'term': query,
|
|
51
|
+
'retmax': min(limit, 100), # NCBI API max limit
|
|
52
|
+
'retmode': 'json',
|
|
53
|
+
'sort': 'relevance'
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
# Add date filters if provided
|
|
57
|
+
if date_from or date_to:
|
|
58
|
+
date_filter = []
|
|
59
|
+
if date_from:
|
|
60
|
+
date_filter.append(
|
|
61
|
+
f"({date_from}[PDAT]:{date_to or '3000/12/31'}[PDAT])"
|
|
62
|
+
)
|
|
63
|
+
else:
|
|
64
|
+
date_filter.append(f"(:{date_to}[PDAT])")
|
|
65
|
+
search_params['term'] += f" AND {' '.join(date_filter)}"
|
|
66
|
+
|
|
67
|
+
# Add article type filter if provided
|
|
68
|
+
if article_type:
|
|
69
|
+
search_params['term'] += f" AND {article_type}[PT]"
|
|
70
|
+
|
|
71
|
+
# Make search request
|
|
72
|
+
search_response = self.session.get(
|
|
73
|
+
f"{self.base_url}/esearch.fcgi",
|
|
74
|
+
params=search_params,
|
|
75
|
+
timeout=30
|
|
76
|
+
)
|
|
77
|
+
search_response.raise_for_status()
|
|
78
|
+
|
|
79
|
+
search_data = search_response.json()
|
|
80
|
+
pmc_ids = search_data.get('esearchresult', {}).get('idlist', [])
|
|
81
|
+
|
|
82
|
+
if not pmc_ids:
|
|
83
|
+
return []
|
|
84
|
+
|
|
85
|
+
# Step 2: Get detailed information for each article
|
|
86
|
+
summary_params = {
|
|
87
|
+
'db': 'pmc',
|
|
88
|
+
'id': ','.join(pmc_ids),
|
|
89
|
+
'retmode': 'json'
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
summary_response = self.session.get(
|
|
93
|
+
f"{self.base_url}/esummary.fcgi",
|
|
94
|
+
params=summary_params,
|
|
95
|
+
timeout=30
|
|
96
|
+
)
|
|
97
|
+
summary_response.raise_for_status()
|
|
98
|
+
|
|
99
|
+
summary_data = summary_response.json()
|
|
100
|
+
results = []
|
|
101
|
+
|
|
102
|
+
# Parse results
|
|
103
|
+
for pmc_id in pmc_ids:
|
|
104
|
+
article_data = summary_data.get('result', {}).get(pmc_id, {})
|
|
105
|
+
|
|
106
|
+
paper = {
|
|
107
|
+
'title': article_data.get('title', 'No title'),
|
|
108
|
+
'abstract': article_data.get('abstract', 'No abstract available'),
|
|
109
|
+
'authors': self._extract_authors(article_data.get('authors', [])),
|
|
110
|
+
'year': self._extract_year(article_data.get('pubdate')),
|
|
111
|
+
'pmc_id': pmc_id,
|
|
112
|
+
'pmid': article_data.get('pmid'),
|
|
113
|
+
'doi': article_data.get('elocationid'),
|
|
114
|
+
'url': f"https://www.ncbi.nlm.nih.gov/pmc/articles/{pmc_id}/",
|
|
115
|
+
'venue': article_data.get('source'),
|
|
116
|
+
'open_access': True, # PMC only contains open access articles
|
|
117
|
+
'source': 'PMC',
|
|
118
|
+
'article_type': (article_data.get('pubtype', ['Unknown'])[0]
|
|
119
|
+
if article_data.get('pubtype') else 'Unknown'),
|
|
120
|
+
'citations': article_data.get('pmcrefcount', 0)
|
|
121
|
+
}
|
|
122
|
+
results.append(paper)
|
|
123
|
+
|
|
124
|
+
return results
|
|
125
|
+
|
|
126
|
+
except requests.exceptions.RequestException as e:
|
|
127
|
+
return [{'error': f'PMC API request failed: {str(e)}'}]
|
|
128
|
+
except Exception as e:
|
|
129
|
+
return [{'error': f'PMC API error: {str(e)}'}]
|
|
130
|
+
|
|
131
|
+
def _extract_authors(self, authors: List[Dict]) -> List[str]:
|
|
132
|
+
"""Extract author names from PMC API response."""
|
|
133
|
+
if not authors:
|
|
134
|
+
return []
|
|
135
|
+
|
|
136
|
+
author_names = []
|
|
137
|
+
for author in authors:
|
|
138
|
+
name = author.get('name', '')
|
|
139
|
+
if name:
|
|
140
|
+
author_names.append(name)
|
|
141
|
+
|
|
142
|
+
return author_names
|
|
143
|
+
|
|
144
|
+
def _extract_year(self, pubdate: str) -> str:
|
|
145
|
+
"""Extract year from publication date."""
|
|
146
|
+
if not pubdate:
|
|
147
|
+
return 'Unknown'
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
# PMC API returns dates in various formats
|
|
151
|
+
# Extract year from the beginning of the string
|
|
152
|
+
return pubdate[:4]
|
|
153
|
+
except Exception:
|
|
154
|
+
return 'Unknown'
|
|
155
|
+
|
|
156
|
+
def run(self, tool_arguments) -> List[Dict[str, Any]]:
|
|
157
|
+
"""
|
|
158
|
+
Execute the PMC search.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
tool_arguments: Dictionary containing search parameters
|
|
162
|
+
|
|
163
|
+
Returns:
|
|
164
|
+
List of paper dictionaries
|
|
165
|
+
"""
|
|
166
|
+
query = tool_arguments.get('query', '')
|
|
167
|
+
if not query:
|
|
168
|
+
return [{'error': 'Query parameter is required'}]
|
|
169
|
+
|
|
170
|
+
limit = tool_arguments.get('limit', 10)
|
|
171
|
+
date_from = tool_arguments.get('date_from')
|
|
172
|
+
date_to = tool_arguments.get('date_to')
|
|
173
|
+
article_type = tool_arguments.get('article_type')
|
|
174
|
+
|
|
175
|
+
return self._search(
|
|
176
|
+
query=query,
|
|
177
|
+
limit=limit,
|
|
178
|
+
date_from=date_from,
|
|
179
|
+
date_to=date_to,
|
|
180
|
+
article_type=article_type
|
|
181
|
+
)
|