tooluniverse 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +56 -5
- tooluniverse/agentic_tool.py +90 -14
- tooluniverse/arxiv_tool.py +113 -0
- tooluniverse/biorxiv_tool.py +97 -0
- tooluniverse/core_tool.py +153 -0
- tooluniverse/crossref_tool.py +73 -0
- tooluniverse/data/agentic_tools.json +2 -2
- tooluniverse/data/arxiv_tools.json +87 -0
- tooluniverse/data/biorxiv_tools.json +70 -0
- tooluniverse/data/core_tools.json +105 -0
- tooluniverse/data/crossref_tools.json +70 -0
- tooluniverse/data/dblp_tools.json +73 -0
- tooluniverse/data/doaj_tools.json +94 -0
- tooluniverse/data/fatcat_tools.json +72 -0
- tooluniverse/data/hal_tools.json +70 -0
- tooluniverse/data/medrxiv_tools.json +70 -0
- tooluniverse/data/odphp_tools.json +354 -0
- tooluniverse/data/openaire_tools.json +85 -0
- tooluniverse/data/osf_preprints_tools.json +77 -0
- tooluniverse/data/pmc_tools.json +109 -0
- tooluniverse/data/pubmed_tools.json +65 -0
- tooluniverse/data/unpaywall_tools.json +86 -0
- tooluniverse/data/wikidata_sparql_tools.json +42 -0
- tooluniverse/data/zenodo_tools.json +82 -0
- tooluniverse/dblp_tool.py +62 -0
- tooluniverse/default_config.py +18 -0
- tooluniverse/doaj_tool.py +124 -0
- tooluniverse/execute_function.py +70 -9
- tooluniverse/fatcat_tool.py +66 -0
- tooluniverse/hal_tool.py +77 -0
- tooluniverse/llm_clients.py +487 -0
- tooluniverse/mcp_tool_registry.py +3 -3
- tooluniverse/medrxiv_tool.py +97 -0
- tooluniverse/odphp_tool.py +226 -0
- tooluniverse/openaire_tool.py +145 -0
- tooluniverse/osf_preprints_tool.py +67 -0
- tooluniverse/pmc_tool.py +181 -0
- tooluniverse/pubmed_tool.py +110 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +2 -2
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +2 -2
- tooluniverse/smcp.py +313 -191
- tooluniverse/smcp_server.py +4 -7
- tooluniverse/test/test_claude_sdk.py +93 -0
- tooluniverse/test/test_odphp_tool.py +166 -0
- tooluniverse/test/test_openrouter_client.py +288 -0
- tooluniverse/test/test_stdio_hooks.py +1 -1
- tooluniverse/test/test_tool_finder.py +1 -1
- tooluniverse/unpaywall_tool.py +63 -0
- tooluniverse/wikidata_sparql_tool.py +61 -0
- tooluniverse/zenodo_tool.py +74 -0
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +101 -74
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +56 -19
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
- tooluniverse-1.0.6.dist-info/licenses/LICENSE +201 -0
- tooluniverse-1.0.4.dist-info/licenses/LICENSE +0 -21
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
tooluniverse/__init__.py
CHANGED
|
@@ -215,10 +215,28 @@ if not LAZY_LOADING_ENABLED:
|
|
|
215
215
|
GWASAssociationsForStudy,
|
|
216
216
|
)
|
|
217
217
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
218
|
+
from .mcp_client_tool import MCPClientTool, MCPAutoLoaderTool
|
|
219
|
+
from .admetai_tool import ADMETAITool
|
|
220
|
+
from .alphafold_tool import AlphaFoldRESTTool
|
|
221
|
+
from .odphp_tool import (
|
|
222
|
+
ODPHPMyHealthfinder,
|
|
223
|
+
ODPHPItemList,
|
|
224
|
+
ODPHPTopicSearch,
|
|
225
|
+
ODPHPOutlinkFetch,
|
|
226
|
+
)
|
|
227
|
+
# Literature search tools
|
|
228
|
+
from .arxiv_tool import ArXivTool
|
|
229
|
+
from .crossref_tool import CrossrefTool
|
|
230
|
+
from .dblp_tool import DBLPTool
|
|
231
|
+
from .pubmed_tool import PubMedTool
|
|
232
|
+
from .doaj_tool import DOAJTool
|
|
233
|
+
from .unpaywall_tool import UnpaywallTool
|
|
234
|
+
from .biorxiv_tool import BioRxivTool
|
|
235
|
+
from .medrxiv_tool import MedRxivTool
|
|
236
|
+
from .hal_tool import HALTool
|
|
237
|
+
from .core_tool import CoreTool
|
|
238
|
+
from .pmc_tool import PMCTool
|
|
239
|
+
from .zenodo_tool import ZenodoTool
|
|
222
240
|
else:
|
|
223
241
|
# With lazy loading, create lazy import proxies that import modules only when accessed
|
|
224
242
|
MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
|
|
@@ -296,7 +314,23 @@ else:
|
|
|
296
314
|
MCPAutoLoaderTool = _LazyImportProxy("mcp_client_tool", "MCPAutoLoaderTool")
|
|
297
315
|
ADMETAITool = _LazyImportProxy("admetai_tool", "ADMETAITool")
|
|
298
316
|
AlphaFoldRESTTool = _LazyImportProxy("alphafold_tool", "AlphaFoldRESTTool")
|
|
299
|
-
|
|
317
|
+
ODPHPItemList = _LazyImportProxy("odphp_tool", "ODPHPItemList")
|
|
318
|
+
ODPHPMyHealthfinder = _LazyImportProxy("odphp_tool", "ODHPHPMyHealthfinder")
|
|
319
|
+
ODPHPTopicSearch = _LazyImportProxy("odphp_tool", "ODPHPTopicSearch")
|
|
320
|
+
ODPHPOutlinkFetch = _LazyImportProxy("odphp_tool", "ODPHPOutlinkFetch")
|
|
321
|
+
# Literature search tools
|
|
322
|
+
ArXivTool = _LazyImportProxy("arxiv_tool", "ArXivTool")
|
|
323
|
+
CrossrefTool = _LazyImportProxy("crossref_tool", "CrossrefTool")
|
|
324
|
+
DBLPTool = _LazyImportProxy("dblp_tool", "DBLPTool")
|
|
325
|
+
PubMedTool = _LazyImportProxy("pubmed_tool", "PubMedTool")
|
|
326
|
+
DOAJTool = _LazyImportProxy("doaj_tool", "DOAJTool")
|
|
327
|
+
UnpaywallTool = _LazyImportProxy("unpaywall_tool", "UnpaywallTool")
|
|
328
|
+
BioRxivTool = _LazyImportProxy("biorxiv_tool", "BioRxivTool")
|
|
329
|
+
MedRxivTool = _LazyImportProxy("medrxiv_tool", "MedRxivTool")
|
|
330
|
+
HALTool = _LazyImportProxy("hal_tool", "HALTool")
|
|
331
|
+
CoreTool = _LazyImportProxy("core_tool", "CoreTool")
|
|
332
|
+
PMCTool = _LazyImportProxy("pmc_tool", "PMCTool")
|
|
333
|
+
ZenodoTool = _LazyImportProxy("zenodo_tool", "ZenodoTool")
|
|
300
334
|
|
|
301
335
|
__all__ = [
|
|
302
336
|
"__version__",
|
|
@@ -364,4 +398,21 @@ __all__ = [
|
|
|
364
398
|
"EmbeddingSync",
|
|
365
399
|
"ToolFinderEmbedding",
|
|
366
400
|
"AlphaFoldRESTTool",
|
|
401
|
+
"ODPHPMyHealthfinder",
|
|
402
|
+
"ODPHPItemList",
|
|
403
|
+
"ODPHPTopicSearch",
|
|
404
|
+
"ODPHPOutlinkFetch",
|
|
405
|
+
# Literature search tools
|
|
406
|
+
"ArXivTool",
|
|
407
|
+
"CrossrefTool",
|
|
408
|
+
"DBLPTool",
|
|
409
|
+
"PubMedTool",
|
|
410
|
+
"DOAJTool",
|
|
411
|
+
"UnpaywallTool",
|
|
412
|
+
"BioRxivTool",
|
|
413
|
+
"MedRxivTool",
|
|
414
|
+
"HALTool",
|
|
415
|
+
"CoreTool",
|
|
416
|
+
"PMCTool",
|
|
417
|
+
"ZenodoTool",
|
|
367
418
|
]
|
tooluniverse/agentic_tool.py
CHANGED
|
@@ -3,23 +3,25 @@ from __future__ import annotations
|
|
|
3
3
|
import os
|
|
4
4
|
import json
|
|
5
5
|
from datetime import datetime
|
|
6
|
-
from typing import Any, Dict, List, Optional
|
|
6
|
+
from typing import Any, Callable, Dict, List, Optional
|
|
7
7
|
|
|
8
8
|
from .base_tool import BaseTool
|
|
9
9
|
from .tool_registry import register_tool
|
|
10
10
|
from .logging_config import get_logger
|
|
11
|
-
from .llm_clients import AzureOpenAIClient, GeminiClient
|
|
11
|
+
from .llm_clients import AzureOpenAIClient, GeminiClient, OpenRouterClient
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
# Global default fallback configuration
|
|
15
15
|
DEFAULT_FALLBACK_CHAIN = [
|
|
16
16
|
{"api_type": "CHATGPT", "model_id": "gpt-4o-1120"},
|
|
17
|
+
{"api_type": "OPENROUTER", "model_id": "openai/gpt-4o"},
|
|
17
18
|
{"api_type": "GEMINI", "model_id": "gemini-2.0-flash"},
|
|
18
19
|
]
|
|
19
20
|
|
|
20
21
|
# API key environment variable mapping
|
|
21
22
|
API_KEY_ENV_VARS = {
|
|
22
23
|
"CHATGPT": ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT"],
|
|
24
|
+
"OPENROUTER": ["OPENROUTER_API_KEY"],
|
|
23
25
|
"GEMINI": ["GEMINI_API_KEY"],
|
|
24
26
|
}
|
|
25
27
|
|
|
@@ -28,6 +30,8 @@ API_KEY_ENV_VARS = {
|
|
|
28
30
|
class AgenticTool(BaseTool):
|
|
29
31
|
"""Generic wrapper around LLM prompting supporting JSON-defined configs with prompts and input arguments."""
|
|
30
32
|
|
|
33
|
+
STREAM_FLAG_KEY = "_tooluniverse_stream"
|
|
34
|
+
|
|
31
35
|
@staticmethod
|
|
32
36
|
def has_any_api_keys() -> bool:
|
|
33
37
|
"""
|
|
@@ -202,6 +206,8 @@ class AgenticTool(BaseTool):
|
|
|
202
206
|
try:
|
|
203
207
|
if api_type == "CHATGPT":
|
|
204
208
|
self._llm_client = AzureOpenAIClient(model_id, None, self.logger)
|
|
209
|
+
elif api_type == "OPENROUTER":
|
|
210
|
+
self._llm_client = OpenRouterClient(model_id, self.logger)
|
|
205
211
|
elif api_type == "GEMINI":
|
|
206
212
|
self._llm_client = GeminiClient(model_id, self.logger)
|
|
207
213
|
else:
|
|
@@ -237,7 +243,7 @@ class AgenticTool(BaseTool):
|
|
|
237
243
|
|
|
238
244
|
# ------------------------------------------------------------------ LLM utilities -----------
|
|
239
245
|
def _validate_model_config(self):
|
|
240
|
-
supported_api_types = ["CHATGPT", "GEMINI"]
|
|
246
|
+
supported_api_types = ["CHATGPT", "OPENROUTER", "GEMINI"]
|
|
241
247
|
if self._api_type not in supported_api_types:
|
|
242
248
|
raise ValueError(
|
|
243
249
|
f"Unsupported API type: {self._api_type}. Supported types: {supported_api_types}"
|
|
@@ -246,9 +252,18 @@ class AgenticTool(BaseTool):
|
|
|
246
252
|
raise ValueError("max_new_tokens must be positive or None")
|
|
247
253
|
|
|
248
254
|
# ------------------------------------------------------------------ public API --------------
|
|
249
|
-
def run(
|
|
255
|
+
def run(
|
|
256
|
+
self,
|
|
257
|
+
arguments: Dict[str, Any],
|
|
258
|
+
stream_callback: Optional[Callable[[str], None]] = None,
|
|
259
|
+
) -> Dict[str, Any]:
|
|
250
260
|
start_time = datetime.now()
|
|
251
261
|
|
|
262
|
+
# Work on a copy so we can remove control flags without mutating caller data
|
|
263
|
+
arguments = dict(arguments or {})
|
|
264
|
+
stream_flag = bool(arguments.pop("_tooluniverse_stream", False))
|
|
265
|
+
streaming_requested = stream_flag or stream_callback is not None
|
|
266
|
+
|
|
252
267
|
# Check if tool is available before attempting to run
|
|
253
268
|
if not self._is_available:
|
|
254
269
|
error_msg = f"Tool '{self.name}' is not available due to initialization error: {self._initialization_error}"
|
|
@@ -296,16 +311,52 @@ class AgenticTool(BaseTool):
|
|
|
296
311
|
custom_format = arguments.get("response_format", None)
|
|
297
312
|
|
|
298
313
|
# Delegate to client; client handles provider-specific logic
|
|
299
|
-
response =
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
return_json=self._return_json,
|
|
304
|
-
custom_format=custom_format,
|
|
305
|
-
max_retries=self._max_retries,
|
|
306
|
-
retry_delay=self._retry_delay,
|
|
314
|
+
response = None
|
|
315
|
+
|
|
316
|
+
streaming_permitted = (
|
|
317
|
+
streaming_requested and not self._return_json and custom_format is None
|
|
307
318
|
)
|
|
308
319
|
|
|
320
|
+
if streaming_permitted and hasattr(self._llm_client, "infer_stream"):
|
|
321
|
+
try:
|
|
322
|
+
chunks_collected: List[str] = []
|
|
323
|
+
stream_iter = self._llm_client.infer_stream(
|
|
324
|
+
messages=messages,
|
|
325
|
+
temperature=self._temperature,
|
|
326
|
+
max_tokens=None,
|
|
327
|
+
return_json=self._return_json,
|
|
328
|
+
custom_format=custom_format,
|
|
329
|
+
max_retries=self._max_retries,
|
|
330
|
+
retry_delay=self._retry_delay,
|
|
331
|
+
)
|
|
332
|
+
for chunk in stream_iter:
|
|
333
|
+
if not chunk:
|
|
334
|
+
continue
|
|
335
|
+
chunks_collected.append(chunk)
|
|
336
|
+
self._emit_stream_chunk(chunk, stream_callback)
|
|
337
|
+
if chunks_collected:
|
|
338
|
+
response = "".join(chunks_collected)
|
|
339
|
+
except Exception as stream_error: # noqa: BLE001
|
|
340
|
+
self.logger.warning(
|
|
341
|
+
f"Streaming failed for tool '{self.name}': {stream_error}. Falling back to buffered response."
|
|
342
|
+
)
|
|
343
|
+
response = None
|
|
344
|
+
|
|
345
|
+
if response is None:
|
|
346
|
+
response = self._llm_client.infer(
|
|
347
|
+
messages=messages,
|
|
348
|
+
temperature=self._temperature,
|
|
349
|
+
max_tokens=None, # client resolves per-model defaults/env
|
|
350
|
+
return_json=self._return_json,
|
|
351
|
+
custom_format=custom_format,
|
|
352
|
+
max_retries=self._max_retries,
|
|
353
|
+
retry_delay=self._retry_delay,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
if streaming_requested and response:
|
|
357
|
+
for chunk in self._iter_chunks(response):
|
|
358
|
+
self._emit_stream_chunk(chunk, stream_callback)
|
|
359
|
+
|
|
309
360
|
end_time = datetime.now()
|
|
310
361
|
execution_time = (end_time - start_time).total_seconds()
|
|
311
362
|
|
|
@@ -334,7 +385,8 @@ class AgenticTool(BaseTool):
|
|
|
334
385
|
}
|
|
335
386
|
else:
|
|
336
387
|
return response
|
|
337
|
-
|
|
388
|
+
|
|
389
|
+
except Exception as e: # noqa: BLE001
|
|
338
390
|
end_time = datetime.now()
|
|
339
391
|
execution_time = (end_time - start_time).total_seconds()
|
|
340
392
|
self.logger.error(f"Error executing {self.name}: {str(e)}")
|
|
@@ -355,13 +407,35 @@ class AgenticTool(BaseTool):
|
|
|
355
407
|
"model_info": {
|
|
356
408
|
"api_type": self._api_type,
|
|
357
409
|
"model_id": self._model_id,
|
|
410
|
+
"temperature": self._temperature,
|
|
411
|
+
"max_new_tokens": self._max_new_tokens,
|
|
358
412
|
},
|
|
359
413
|
"execution_time_seconds": execution_time,
|
|
360
414
|
"timestamp": start_time.isoformat(),
|
|
361
415
|
},
|
|
362
416
|
}
|
|
363
417
|
else:
|
|
364
|
-
return "error:
|
|
418
|
+
return f"error: {str(e)} error_type: {type(e).__name__}"
|
|
419
|
+
|
|
420
|
+
@staticmethod
|
|
421
|
+
def _iter_chunks(text: str, size: int = 800):
|
|
422
|
+
if not text:
|
|
423
|
+
return
|
|
424
|
+
for idx in range(0, len(text), size):
|
|
425
|
+
yield text[idx : idx + size]
|
|
426
|
+
|
|
427
|
+
def _emit_stream_chunk(
|
|
428
|
+
self, chunk: Optional[str], stream_callback: Optional[Callable[[str], None]]
|
|
429
|
+
) -> None:
|
|
430
|
+
if not stream_callback or not chunk:
|
|
431
|
+
return
|
|
432
|
+
try:
|
|
433
|
+
stream_callback(chunk)
|
|
434
|
+
except Exception as callback_error: # noqa: BLE001
|
|
435
|
+
# Streaming callbacks should not break tool execution; log and continue
|
|
436
|
+
self.logger.debug(
|
|
437
|
+
f"Stream callback for tool '{self.name}' raised an exception: {callback_error}"
|
|
438
|
+
)
|
|
365
439
|
|
|
366
440
|
# ------------------------------------------------------------------ helpers -----------------
|
|
367
441
|
def _validate_arguments(self, arguments: Dict[str, Any]):
|
|
@@ -440,6 +514,8 @@ class AgenticTool(BaseTool):
|
|
|
440
514
|
try:
|
|
441
515
|
if self._api_type == "CHATGPT":
|
|
442
516
|
self._llm_client = AzureOpenAIClient(self._model_id, None, self.logger)
|
|
517
|
+
elif self._api_type == "OPENROUTER":
|
|
518
|
+
self._llm_client = OpenRouterClient(self._model_id, self.logger)
|
|
443
519
|
elif self._api_type == "GEMINI":
|
|
444
520
|
self._llm_client = GeminiClient(self._gemini_model_id, self.logger)
|
|
445
521
|
else:
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
import xml.etree.ElementTree as ET
|
|
3
|
+
from .base_tool import BaseTool
|
|
4
|
+
from .tool_registry import register_tool
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
@register_tool("ArXivTool")
|
|
8
|
+
class ArXivTool(BaseTool):
|
|
9
|
+
"""
|
|
10
|
+
Search arXiv for papers by keyword using the public arXiv API.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(
|
|
14
|
+
self,
|
|
15
|
+
tool_config,
|
|
16
|
+
base_url="http://export.arxiv.org/api/query",
|
|
17
|
+
):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = base_url
|
|
20
|
+
|
|
21
|
+
def run(self, arguments):
|
|
22
|
+
query = arguments.get("query")
|
|
23
|
+
limit = int(arguments.get("limit", 10))
|
|
24
|
+
# sort_by: relevance | lastUpdatedDate | submittedDate
|
|
25
|
+
sort_by = arguments.get("sort_by", "relevance")
|
|
26
|
+
# sort_order: ascending | descending
|
|
27
|
+
sort_order = arguments.get("sort_order", "descending")
|
|
28
|
+
|
|
29
|
+
if not query:
|
|
30
|
+
return {"error": "`query` parameter is required."}
|
|
31
|
+
|
|
32
|
+
return self._search(query, limit, sort_by, sort_order)
|
|
33
|
+
|
|
34
|
+
def _search(self, query, limit, sort_by, sort_order):
|
|
35
|
+
params = {
|
|
36
|
+
"search_query": f"all:{query}",
|
|
37
|
+
"start": 0,
|
|
38
|
+
"max_results": max(1, min(limit, 200)),
|
|
39
|
+
"sortBy": sort_by,
|
|
40
|
+
"sortOrder": sort_order,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
response = requests.get(self.base_url, params=params, timeout=20)
|
|
45
|
+
except requests.RequestException as e:
|
|
46
|
+
return {
|
|
47
|
+
"error": "Network error calling arXiv API",
|
|
48
|
+
"reason": str(e),
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
if response.status_code != 200:
|
|
52
|
+
return {
|
|
53
|
+
"error": f"arXiv API error {response.status_code}",
|
|
54
|
+
"reason": response.reason,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
# Parse Atom XML
|
|
58
|
+
try:
|
|
59
|
+
root = ET.fromstring(response.text)
|
|
60
|
+
except ET.ParseError as e:
|
|
61
|
+
return {
|
|
62
|
+
"error": "Failed to parse arXiv response",
|
|
63
|
+
"reason": str(e),
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
ns = {"atom": "http://www.w3.org/2005/Atom"}
|
|
67
|
+
entries = []
|
|
68
|
+
for entry in root.findall("atom:entry", ns):
|
|
69
|
+
title_text = entry.findtext(
|
|
70
|
+
"atom:title",
|
|
71
|
+
default="",
|
|
72
|
+
namespaces=ns,
|
|
73
|
+
)
|
|
74
|
+
title = (title_text or "").strip()
|
|
75
|
+
summary_text = entry.findtext(
|
|
76
|
+
"atom:summary",
|
|
77
|
+
default="",
|
|
78
|
+
namespaces=ns,
|
|
79
|
+
)
|
|
80
|
+
summary = (summary_text or "").strip()
|
|
81
|
+
link_el = entry.find("atom:link[@type='text/html']", ns)
|
|
82
|
+
if link_el is not None:
|
|
83
|
+
link = link_el.get("href")
|
|
84
|
+
else:
|
|
85
|
+
link = entry.findtext("atom:id", default="", namespaces=ns)
|
|
86
|
+
published = entry.findtext(
|
|
87
|
+
"atom:published", default="", namespaces=ns
|
|
88
|
+
)
|
|
89
|
+
updated = entry.findtext("atom:updated", default="", namespaces=ns)
|
|
90
|
+
authors = [
|
|
91
|
+
a.findtext("atom:name", default="", namespaces=ns)
|
|
92
|
+
for a in entry.findall("atom:author", ns)
|
|
93
|
+
]
|
|
94
|
+
primary_category = ""
|
|
95
|
+
cat_el = entry.find(
|
|
96
|
+
"{http://arxiv.org/schemas/atom}primary_category"
|
|
97
|
+
)
|
|
98
|
+
if cat_el is not None:
|
|
99
|
+
primary_category = cat_el.get("term", "")
|
|
100
|
+
|
|
101
|
+
entries.append(
|
|
102
|
+
{
|
|
103
|
+
"title": title,
|
|
104
|
+
"abstract": summary,
|
|
105
|
+
"authors": authors,
|
|
106
|
+
"published": published,
|
|
107
|
+
"updated": updated,
|
|
108
|
+
"category": primary_category,
|
|
109
|
+
"url": link,
|
|
110
|
+
}
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return entries
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("BioRxivTool")
|
|
7
|
+
class BioRxivTool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search bioRxiv preprints using the public bioRxiv API.
|
|
10
|
+
|
|
11
|
+
Arguments:
|
|
12
|
+
query (str): Search term
|
|
13
|
+
max_results (int): Max results to return (default 10, max 200)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
tool_config,
|
|
19
|
+
base_url="https://api.medrxiv.org/details",
|
|
20
|
+
):
|
|
21
|
+
super().__init__(tool_config)
|
|
22
|
+
self.base_url = base_url
|
|
23
|
+
|
|
24
|
+
def run(self, arguments=None):
|
|
25
|
+
arguments = arguments or {}
|
|
26
|
+
query = arguments.get("query")
|
|
27
|
+
max_results = int(arguments.get("max_results", 10))
|
|
28
|
+
if not query:
|
|
29
|
+
return {"error": "`query` parameter is required."}
|
|
30
|
+
return self._search(query, max_results)
|
|
31
|
+
|
|
32
|
+
def _search(self, query, max_results):
|
|
33
|
+
# Use date range search for recent preprints
|
|
34
|
+
# Format: /biorxiv/{start_date}/{end_date}/{cursor}/json
|
|
35
|
+
from datetime import datetime, timedelta
|
|
36
|
+
|
|
37
|
+
# Search last 30 days
|
|
38
|
+
end_date = datetime.now()
|
|
39
|
+
start_date = end_date - timedelta(days=30)
|
|
40
|
+
|
|
41
|
+
url = (f"{self.base_url}/biorxiv/"
|
|
42
|
+
f"{start_date.strftime('%Y-%m-%d')}/"
|
|
43
|
+
f"{end_date.strftime('%Y-%m-%d')}/0/json")
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
resp = requests.get(url, timeout=20)
|
|
47
|
+
resp.raise_for_status()
|
|
48
|
+
data = resp.json()
|
|
49
|
+
except requests.RequestException as e:
|
|
50
|
+
return {
|
|
51
|
+
"error": "Network/API error calling bioRxiv",
|
|
52
|
+
"reason": str(e),
|
|
53
|
+
}
|
|
54
|
+
except ValueError:
|
|
55
|
+
return {"error": "Failed to decode bioRxiv response as JSON"}
|
|
56
|
+
|
|
57
|
+
results = []
|
|
58
|
+
# The API returns a dictionary with a 'collection' key
|
|
59
|
+
collection = data.get("collection", [])
|
|
60
|
+
if not isinstance(collection, list):
|
|
61
|
+
return {"error": "Unexpected API response format"}
|
|
62
|
+
|
|
63
|
+
for item in collection:
|
|
64
|
+
title = item.get("title")
|
|
65
|
+
authors = item.get("authors", "")
|
|
66
|
+
if isinstance(authors, str):
|
|
67
|
+
authors = [a.strip() for a in authors.split(";") if a.strip()]
|
|
68
|
+
elif isinstance(authors, list):
|
|
69
|
+
authors = [str(a).strip() for a in authors if str(a).strip()]
|
|
70
|
+
else:
|
|
71
|
+
authors = []
|
|
72
|
+
|
|
73
|
+
year = None
|
|
74
|
+
date = item.get("date")
|
|
75
|
+
if date and len(date) >= 4 and date[:4].isdigit():
|
|
76
|
+
year = int(date[:4])
|
|
77
|
+
|
|
78
|
+
doi = item.get("doi")
|
|
79
|
+
url = f"https://www.biorxiv.org/content/{doi}" if doi else None
|
|
80
|
+
|
|
81
|
+
# Filter by query if provided
|
|
82
|
+
if query and query.lower() not in (title or "").lower():
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
results.append(
|
|
86
|
+
{
|
|
87
|
+
"title": title,
|
|
88
|
+
"authors": authors,
|
|
89
|
+
"year": year,
|
|
90
|
+
"doi": doi,
|
|
91
|
+
"url": url,
|
|
92
|
+
"abstract": item.get("abstract", ""),
|
|
93
|
+
"source": "bioRxiv",
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return results[:max_results]
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
CORE API Tool for searching open access academic papers.
|
|
4
|
+
|
|
5
|
+
CORE is the world's largest collection of open access research papers.
|
|
6
|
+
This tool provides access to over 200 million open access papers from
|
|
7
|
+
repositories and journals worldwide.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import requests
|
|
11
|
+
from typing import Dict, List, Any, Optional
|
|
12
|
+
from .base_tool import BaseTool
|
|
13
|
+
from .tool_registry import register_tool
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@register_tool("CoreTool")
|
|
17
|
+
class CoreTool(BaseTool):
|
|
18
|
+
"""Tool for searching CORE open access academic papers."""
|
|
19
|
+
|
|
20
|
+
def __init__(self, tool_config=None):
|
|
21
|
+
super().__init__(tool_config)
|
|
22
|
+
self.base_url = "https://api.core.ac.uk/v3"
|
|
23
|
+
self.session = requests.Session()
|
|
24
|
+
self.session.headers.update({
|
|
25
|
+
'User-Agent': 'ToolUniverse/1.0',
|
|
26
|
+
'Accept': 'application/json'
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
def _search(self, query: str, limit: int = 10,
|
|
30
|
+
year_from: Optional[int] = None,
|
|
31
|
+
year_to: Optional[int] = None,
|
|
32
|
+
language: Optional[str] = None) -> List[Dict[str, Any]]:
|
|
33
|
+
"""
|
|
34
|
+
Search for papers using CORE API.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
query: Search query
|
|
38
|
+
limit: Maximum number of results
|
|
39
|
+
year_from: Start year filter
|
|
40
|
+
year_to: End year filter
|
|
41
|
+
language: Language filter (e.g., 'en', 'es', 'fr')
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
List of paper dictionaries
|
|
45
|
+
"""
|
|
46
|
+
try:
|
|
47
|
+
# Build search parameters
|
|
48
|
+
params = {
|
|
49
|
+
'q': query,
|
|
50
|
+
'limit': min(limit, 100), # CORE API max limit is 100
|
|
51
|
+
'page': 1
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
# Add year filters if provided
|
|
55
|
+
if year_from or year_to:
|
|
56
|
+
year_filter = []
|
|
57
|
+
if year_from:
|
|
58
|
+
year_filter.append(f"year:>={year_from}")
|
|
59
|
+
if year_to:
|
|
60
|
+
year_filter.append(f"year:<={year_to}")
|
|
61
|
+
params['q'] += f" {' '.join(year_filter)}"
|
|
62
|
+
|
|
63
|
+
# Add language filter if provided
|
|
64
|
+
if language:
|
|
65
|
+
params['q'] += f" language:{language}"
|
|
66
|
+
|
|
67
|
+
# Make API request
|
|
68
|
+
response = self.session.get(
|
|
69
|
+
f"{self.base_url}/search/works",
|
|
70
|
+
params=params,
|
|
71
|
+
timeout=30
|
|
72
|
+
)
|
|
73
|
+
response.raise_for_status()
|
|
74
|
+
|
|
75
|
+
data = response.json()
|
|
76
|
+
results = []
|
|
77
|
+
|
|
78
|
+
# Parse results
|
|
79
|
+
for item in data.get('results', []):
|
|
80
|
+
paper = {
|
|
81
|
+
'title': item.get('title', 'No title'),
|
|
82
|
+
'abstract': item.get('abstract', 'No abstract available'),
|
|
83
|
+
'authors': self._extract_authors(item.get('authors', [])),
|
|
84
|
+
'year': self._extract_year(item.get('publishedDate')),
|
|
85
|
+
'doi': item.get('doi'),
|
|
86
|
+
'url': (item.get('downloadUrl') or
|
|
87
|
+
item.get('links', [{}])[0].get('url')),
|
|
88
|
+
'venue': item.get('publisher'),
|
|
89
|
+
'language': item.get('language', {}).get('code', 'Unknown'),
|
|
90
|
+
'open_access': True, # CORE only contains open access papers
|
|
91
|
+
'source': 'CORE',
|
|
92
|
+
'citations': item.get('citationCount', 0),
|
|
93
|
+
'downloads': item.get('downloadCount', 0)
|
|
94
|
+
}
|
|
95
|
+
results.append(paper)
|
|
96
|
+
|
|
97
|
+
return results
|
|
98
|
+
|
|
99
|
+
except requests.exceptions.RequestException as e:
|
|
100
|
+
return [{'error': f'CORE API request failed: {str(e)}'}]
|
|
101
|
+
except Exception as e:
|
|
102
|
+
return [{'error': f'CORE API error: {str(e)}'}]
|
|
103
|
+
|
|
104
|
+
def _extract_authors(self, authors: List[Dict]) -> List[str]:
|
|
105
|
+
"""Extract author names from CORE API response."""
|
|
106
|
+
if not authors:
|
|
107
|
+
return []
|
|
108
|
+
|
|
109
|
+
author_names = []
|
|
110
|
+
for author in authors:
|
|
111
|
+
name = author.get('name', '')
|
|
112
|
+
if name:
|
|
113
|
+
author_names.append(name)
|
|
114
|
+
|
|
115
|
+
return author_names
|
|
116
|
+
|
|
117
|
+
def _extract_year(self, published_date: str) -> str:
|
|
118
|
+
"""Extract year from published date."""
|
|
119
|
+
if not published_date:
|
|
120
|
+
return 'Unknown'
|
|
121
|
+
|
|
122
|
+
try:
|
|
123
|
+
# CORE API returns dates in ISO format
|
|
124
|
+
return published_date[:4]
|
|
125
|
+
except Exception:
|
|
126
|
+
return 'Unknown'
|
|
127
|
+
|
|
128
|
+
def run(self, tool_arguments) -> List[Dict[str, Any]]:
|
|
129
|
+
"""
|
|
130
|
+
Execute the CORE search.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
tool_arguments: Dictionary containing search parameters
|
|
134
|
+
|
|
135
|
+
Returns:
|
|
136
|
+
List of paper dictionaries
|
|
137
|
+
"""
|
|
138
|
+
query = tool_arguments.get('query', '')
|
|
139
|
+
if not query:
|
|
140
|
+
return [{'error': 'Query parameter is required'}]
|
|
141
|
+
|
|
142
|
+
limit = tool_arguments.get('limit', 10)
|
|
143
|
+
year_from = tool_arguments.get('year_from')
|
|
144
|
+
year_to = tool_arguments.get('year_to')
|
|
145
|
+
language = tool_arguments.get('language')
|
|
146
|
+
|
|
147
|
+
return self._search(
|
|
148
|
+
query=query,
|
|
149
|
+
limit=limit,
|
|
150
|
+
year_from=year_from,
|
|
151
|
+
year_to=year_to,
|
|
152
|
+
language=language
|
|
153
|
+
)
|