tooluniverse 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (57) hide show
  1. tooluniverse/__init__.py +56 -5
  2. tooluniverse/agentic_tool.py +90 -14
  3. tooluniverse/arxiv_tool.py +113 -0
  4. tooluniverse/biorxiv_tool.py +97 -0
  5. tooluniverse/core_tool.py +153 -0
  6. tooluniverse/crossref_tool.py +73 -0
  7. tooluniverse/data/agentic_tools.json +2 -2
  8. tooluniverse/data/arxiv_tools.json +87 -0
  9. tooluniverse/data/biorxiv_tools.json +70 -0
  10. tooluniverse/data/core_tools.json +105 -0
  11. tooluniverse/data/crossref_tools.json +70 -0
  12. tooluniverse/data/dblp_tools.json +73 -0
  13. tooluniverse/data/doaj_tools.json +94 -0
  14. tooluniverse/data/fatcat_tools.json +72 -0
  15. tooluniverse/data/hal_tools.json +70 -0
  16. tooluniverse/data/medrxiv_tools.json +70 -0
  17. tooluniverse/data/odphp_tools.json +354 -0
  18. tooluniverse/data/openaire_tools.json +85 -0
  19. tooluniverse/data/osf_preprints_tools.json +77 -0
  20. tooluniverse/data/pmc_tools.json +109 -0
  21. tooluniverse/data/pubmed_tools.json +65 -0
  22. tooluniverse/data/unpaywall_tools.json +86 -0
  23. tooluniverse/data/wikidata_sparql_tools.json +42 -0
  24. tooluniverse/data/zenodo_tools.json +82 -0
  25. tooluniverse/dblp_tool.py +62 -0
  26. tooluniverse/default_config.py +18 -0
  27. tooluniverse/doaj_tool.py +124 -0
  28. tooluniverse/execute_function.py +70 -9
  29. tooluniverse/fatcat_tool.py +66 -0
  30. tooluniverse/hal_tool.py +77 -0
  31. tooluniverse/llm_clients.py +487 -0
  32. tooluniverse/mcp_tool_registry.py +3 -3
  33. tooluniverse/medrxiv_tool.py +97 -0
  34. tooluniverse/odphp_tool.py +226 -0
  35. tooluniverse/openaire_tool.py +145 -0
  36. tooluniverse/osf_preprints_tool.py +67 -0
  37. tooluniverse/pmc_tool.py +181 -0
  38. tooluniverse/pubmed_tool.py +110 -0
  39. tooluniverse/remote/boltz/boltz_mcp_server.py +2 -2
  40. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +2 -2
  41. tooluniverse/smcp.py +313 -191
  42. tooluniverse/smcp_server.py +4 -7
  43. tooluniverse/test/test_claude_sdk.py +93 -0
  44. tooluniverse/test/test_odphp_tool.py +166 -0
  45. tooluniverse/test/test_openrouter_client.py +288 -0
  46. tooluniverse/test/test_stdio_hooks.py +1 -1
  47. tooluniverse/test/test_tool_finder.py +1 -1
  48. tooluniverse/unpaywall_tool.py +63 -0
  49. tooluniverse/wikidata_sparql_tool.py +61 -0
  50. tooluniverse/zenodo_tool.py +74 -0
  51. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +101 -74
  52. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +56 -19
  53. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
  54. tooluniverse-1.0.6.dist-info/licenses/LICENSE +201 -0
  55. tooluniverse-1.0.4.dist-info/licenses/LICENSE +0 -21
  56. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
  57. {tooluniverse-1.0.4.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
tooluniverse/__init__.py CHANGED
@@ -215,10 +215,28 @@ if not LAZY_LOADING_ENABLED:
215
215
  GWASAssociationsForStudy,
216
216
  )
217
217
 
218
- # from .admetai_tool import ADMETAITool
219
- from .mcp_client_tool import MCPClientTool, MCPAutoLoaderTool
220
- from .admetai_tool import ADMETAITool
221
- from .alphafold_tool import AlphaFoldRESTTool
218
+ from .mcp_client_tool import MCPClientTool, MCPAutoLoaderTool
219
+ from .admetai_tool import ADMETAITool
220
+ from .alphafold_tool import AlphaFoldRESTTool
221
+ from .odphp_tool import (
222
+ ODPHPMyHealthfinder,
223
+ ODPHPItemList,
224
+ ODPHPTopicSearch,
225
+ ODPHPOutlinkFetch,
226
+ )
227
+ # Literature search tools
228
+ from .arxiv_tool import ArXivTool
229
+ from .crossref_tool import CrossrefTool
230
+ from .dblp_tool import DBLPTool
231
+ from .pubmed_tool import PubMedTool
232
+ from .doaj_tool import DOAJTool
233
+ from .unpaywall_tool import UnpaywallTool
234
+ from .biorxiv_tool import BioRxivTool
235
+ from .medrxiv_tool import MedRxivTool
236
+ from .hal_tool import HALTool
237
+ from .core_tool import CoreTool
238
+ from .pmc_tool import PMCTool
239
+ from .zenodo_tool import ZenodoTool
222
240
  else:
223
241
  # With lazy loading, create lazy import proxies that import modules only when accessed
224
242
  MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
@@ -296,7 +314,23 @@ else:
296
314
  MCPAutoLoaderTool = _LazyImportProxy("mcp_client_tool", "MCPAutoLoaderTool")
297
315
  ADMETAITool = _LazyImportProxy("admetai_tool", "ADMETAITool")
298
316
  AlphaFoldRESTTool = _LazyImportProxy("alphafold_tool", "AlphaFoldRESTTool")
299
-
317
+ ODPHPItemList = _LazyImportProxy("odphp_tool", "ODPHPItemList")
318
+ ODPHPMyHealthfinder = _LazyImportProxy("odphp_tool", "ODHPHPMyHealthfinder")
319
+ ODPHPTopicSearch = _LazyImportProxy("odphp_tool", "ODPHPTopicSearch")
320
+ ODPHPOutlinkFetch = _LazyImportProxy("odphp_tool", "ODPHPOutlinkFetch")
321
+ # Literature search tools
322
+ ArXivTool = _LazyImportProxy("arxiv_tool", "ArXivTool")
323
+ CrossrefTool = _LazyImportProxy("crossref_tool", "CrossrefTool")
324
+ DBLPTool = _LazyImportProxy("dblp_tool", "DBLPTool")
325
+ PubMedTool = _LazyImportProxy("pubmed_tool", "PubMedTool")
326
+ DOAJTool = _LazyImportProxy("doaj_tool", "DOAJTool")
327
+ UnpaywallTool = _LazyImportProxy("unpaywall_tool", "UnpaywallTool")
328
+ BioRxivTool = _LazyImportProxy("biorxiv_tool", "BioRxivTool")
329
+ MedRxivTool = _LazyImportProxy("medrxiv_tool", "MedRxivTool")
330
+ HALTool = _LazyImportProxy("hal_tool", "HALTool")
331
+ CoreTool = _LazyImportProxy("core_tool", "CoreTool")
332
+ PMCTool = _LazyImportProxy("pmc_tool", "PMCTool")
333
+ ZenodoTool = _LazyImportProxy("zenodo_tool", "ZenodoTool")
300
334
 
301
335
  __all__ = [
302
336
  "__version__",
@@ -364,4 +398,21 @@ __all__ = [
364
398
  "EmbeddingSync",
365
399
  "ToolFinderEmbedding",
366
400
  "AlphaFoldRESTTool",
401
+ "ODPHPMyHealthfinder",
402
+ "ODPHPItemList",
403
+ "ODPHPTopicSearch",
404
+ "ODPHPOutlinkFetch",
405
+ # Literature search tools
406
+ "ArXivTool",
407
+ "CrossrefTool",
408
+ "DBLPTool",
409
+ "PubMedTool",
410
+ "DOAJTool",
411
+ "UnpaywallTool",
412
+ "BioRxivTool",
413
+ "MedRxivTool",
414
+ "HALTool",
415
+ "CoreTool",
416
+ "PMCTool",
417
+ "ZenodoTool",
367
418
  ]
@@ -3,23 +3,25 @@ from __future__ import annotations
3
3
  import os
4
4
  import json
5
5
  from datetime import datetime
6
- from typing import Any, Dict, List, Optional
6
+ from typing import Any, Callable, Dict, List, Optional
7
7
 
8
8
  from .base_tool import BaseTool
9
9
  from .tool_registry import register_tool
10
10
  from .logging_config import get_logger
11
- from .llm_clients import AzureOpenAIClient, GeminiClient
11
+ from .llm_clients import AzureOpenAIClient, GeminiClient, OpenRouterClient
12
12
 
13
13
 
14
14
  # Global default fallback configuration
15
15
  DEFAULT_FALLBACK_CHAIN = [
16
16
  {"api_type": "CHATGPT", "model_id": "gpt-4o-1120"},
17
+ {"api_type": "OPENROUTER", "model_id": "openai/gpt-4o"},
17
18
  {"api_type": "GEMINI", "model_id": "gemini-2.0-flash"},
18
19
  ]
19
20
 
20
21
  # API key environment variable mapping
21
22
  API_KEY_ENV_VARS = {
22
23
  "CHATGPT": ["AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT"],
24
+ "OPENROUTER": ["OPENROUTER_API_KEY"],
23
25
  "GEMINI": ["GEMINI_API_KEY"],
24
26
  }
25
27
 
@@ -28,6 +30,8 @@ API_KEY_ENV_VARS = {
28
30
  class AgenticTool(BaseTool):
29
31
  """Generic wrapper around LLM prompting supporting JSON-defined configs with prompts and input arguments."""
30
32
 
33
+ STREAM_FLAG_KEY = "_tooluniverse_stream"
34
+
31
35
  @staticmethod
32
36
  def has_any_api_keys() -> bool:
33
37
  """
@@ -202,6 +206,8 @@ class AgenticTool(BaseTool):
202
206
  try:
203
207
  if api_type == "CHATGPT":
204
208
  self._llm_client = AzureOpenAIClient(model_id, None, self.logger)
209
+ elif api_type == "OPENROUTER":
210
+ self._llm_client = OpenRouterClient(model_id, self.logger)
205
211
  elif api_type == "GEMINI":
206
212
  self._llm_client = GeminiClient(model_id, self.logger)
207
213
  else:
@@ -237,7 +243,7 @@ class AgenticTool(BaseTool):
237
243
 
238
244
  # ------------------------------------------------------------------ LLM utilities -----------
239
245
  def _validate_model_config(self):
240
- supported_api_types = ["CHATGPT", "GEMINI"]
246
+ supported_api_types = ["CHATGPT", "OPENROUTER", "GEMINI"]
241
247
  if self._api_type not in supported_api_types:
242
248
  raise ValueError(
243
249
  f"Unsupported API type: {self._api_type}. Supported types: {supported_api_types}"
@@ -246,9 +252,18 @@ class AgenticTool(BaseTool):
246
252
  raise ValueError("max_new_tokens must be positive or None")
247
253
 
248
254
  # ------------------------------------------------------------------ public API --------------
249
- def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
255
+ def run(
256
+ self,
257
+ arguments: Dict[str, Any],
258
+ stream_callback: Optional[Callable[[str], None]] = None,
259
+ ) -> Dict[str, Any]:
250
260
  start_time = datetime.now()
251
261
 
262
+ # Work on a copy so we can remove control flags without mutating caller data
263
+ arguments = dict(arguments or {})
264
+ stream_flag = bool(arguments.pop("_tooluniverse_stream", False))
265
+ streaming_requested = stream_flag or stream_callback is not None
266
+
252
267
  # Check if tool is available before attempting to run
253
268
  if not self._is_available:
254
269
  error_msg = f"Tool '{self.name}' is not available due to initialization error: {self._initialization_error}"
@@ -296,16 +311,52 @@ class AgenticTool(BaseTool):
296
311
  custom_format = arguments.get("response_format", None)
297
312
 
298
313
  # Delegate to client; client handles provider-specific logic
299
- response = self._llm_client.infer(
300
- messages=messages,
301
- temperature=self._temperature,
302
- max_tokens=None, # client resolves per-model defaults/env
303
- return_json=self._return_json,
304
- custom_format=custom_format,
305
- max_retries=self._max_retries,
306
- retry_delay=self._retry_delay,
314
+ response = None
315
+
316
+ streaming_permitted = (
317
+ streaming_requested and not self._return_json and custom_format is None
307
318
  )
308
319
 
320
+ if streaming_permitted and hasattr(self._llm_client, "infer_stream"):
321
+ try:
322
+ chunks_collected: List[str] = []
323
+ stream_iter = self._llm_client.infer_stream(
324
+ messages=messages,
325
+ temperature=self._temperature,
326
+ max_tokens=None,
327
+ return_json=self._return_json,
328
+ custom_format=custom_format,
329
+ max_retries=self._max_retries,
330
+ retry_delay=self._retry_delay,
331
+ )
332
+ for chunk in stream_iter:
333
+ if not chunk:
334
+ continue
335
+ chunks_collected.append(chunk)
336
+ self._emit_stream_chunk(chunk, stream_callback)
337
+ if chunks_collected:
338
+ response = "".join(chunks_collected)
339
+ except Exception as stream_error: # noqa: BLE001
340
+ self.logger.warning(
341
+ f"Streaming failed for tool '{self.name}': {stream_error}. Falling back to buffered response."
342
+ )
343
+ response = None
344
+
345
+ if response is None:
346
+ response = self._llm_client.infer(
347
+ messages=messages,
348
+ temperature=self._temperature,
349
+ max_tokens=None, # client resolves per-model defaults/env
350
+ return_json=self._return_json,
351
+ custom_format=custom_format,
352
+ max_retries=self._max_retries,
353
+ retry_delay=self._retry_delay,
354
+ )
355
+
356
+ if streaming_requested and response:
357
+ for chunk in self._iter_chunks(response):
358
+ self._emit_stream_chunk(chunk, stream_callback)
359
+
309
360
  end_time = datetime.now()
310
361
  execution_time = (end_time - start_time).total_seconds()
311
362
 
@@ -334,7 +385,8 @@ class AgenticTool(BaseTool):
334
385
  }
335
386
  else:
336
387
  return response
337
- except Exception as e:
388
+
389
+ except Exception as e: # noqa: BLE001
338
390
  end_time = datetime.now()
339
391
  execution_time = (end_time - start_time).total_seconds()
340
392
  self.logger.error(f"Error executing {self.name}: {str(e)}")
@@ -355,13 +407,35 @@ class AgenticTool(BaseTool):
355
407
  "model_info": {
356
408
  "api_type": self._api_type,
357
409
  "model_id": self._model_id,
410
+ "temperature": self._temperature,
411
+ "max_new_tokens": self._max_new_tokens,
358
412
  },
359
413
  "execution_time_seconds": execution_time,
360
414
  "timestamp": start_time.isoformat(),
361
415
  },
362
416
  }
363
417
  else:
364
- return "error: " + str(e) + " error_type: " + type(e).__name__
418
+ return f"error: {str(e)} error_type: {type(e).__name__}"
419
+
420
+ @staticmethod
421
+ def _iter_chunks(text: str, size: int = 800):
422
+ if not text:
423
+ return
424
+ for idx in range(0, len(text), size):
425
+ yield text[idx : idx + size]
426
+
427
+ def _emit_stream_chunk(
428
+ self, chunk: Optional[str], stream_callback: Optional[Callable[[str], None]]
429
+ ) -> None:
430
+ if not stream_callback or not chunk:
431
+ return
432
+ try:
433
+ stream_callback(chunk)
434
+ except Exception as callback_error: # noqa: BLE001
435
+ # Streaming callbacks should not break tool execution; log and continue
436
+ self.logger.debug(
437
+ f"Stream callback for tool '{self.name}' raised an exception: {callback_error}"
438
+ )
365
439
 
366
440
  # ------------------------------------------------------------------ helpers -----------------
367
441
  def _validate_arguments(self, arguments: Dict[str, Any]):
@@ -440,6 +514,8 @@ class AgenticTool(BaseTool):
440
514
  try:
441
515
  if self._api_type == "CHATGPT":
442
516
  self._llm_client = AzureOpenAIClient(self._model_id, None, self.logger)
517
+ elif self._api_type == "OPENROUTER":
518
+ self._llm_client = OpenRouterClient(self._model_id, self.logger)
443
519
  elif self._api_type == "GEMINI":
444
520
  self._llm_client = GeminiClient(self._gemini_model_id, self.logger)
445
521
  else:
@@ -0,0 +1,113 @@
1
+ import requests
2
+ import xml.etree.ElementTree as ET
3
+ from .base_tool import BaseTool
4
+ from .tool_registry import register_tool
5
+
6
+
7
+ @register_tool("ArXivTool")
8
+ class ArXivTool(BaseTool):
9
+ """
10
+ Search arXiv for papers by keyword using the public arXiv API.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ tool_config,
16
+ base_url="http://export.arxiv.org/api/query",
17
+ ):
18
+ super().__init__(tool_config)
19
+ self.base_url = base_url
20
+
21
+ def run(self, arguments):
22
+ query = arguments.get("query")
23
+ limit = int(arguments.get("limit", 10))
24
+ # sort_by: relevance | lastUpdatedDate | submittedDate
25
+ sort_by = arguments.get("sort_by", "relevance")
26
+ # sort_order: ascending | descending
27
+ sort_order = arguments.get("sort_order", "descending")
28
+
29
+ if not query:
30
+ return {"error": "`query` parameter is required."}
31
+
32
+ return self._search(query, limit, sort_by, sort_order)
33
+
34
+ def _search(self, query, limit, sort_by, sort_order):
35
+ params = {
36
+ "search_query": f"all:{query}",
37
+ "start": 0,
38
+ "max_results": max(1, min(limit, 200)),
39
+ "sortBy": sort_by,
40
+ "sortOrder": sort_order,
41
+ }
42
+
43
+ try:
44
+ response = requests.get(self.base_url, params=params, timeout=20)
45
+ except requests.RequestException as e:
46
+ return {
47
+ "error": "Network error calling arXiv API",
48
+ "reason": str(e),
49
+ }
50
+
51
+ if response.status_code != 200:
52
+ return {
53
+ "error": f"arXiv API error {response.status_code}",
54
+ "reason": response.reason,
55
+ }
56
+
57
+ # Parse Atom XML
58
+ try:
59
+ root = ET.fromstring(response.text)
60
+ except ET.ParseError as e:
61
+ return {
62
+ "error": "Failed to parse arXiv response",
63
+ "reason": str(e),
64
+ }
65
+
66
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
67
+ entries = []
68
+ for entry in root.findall("atom:entry", ns):
69
+ title_text = entry.findtext(
70
+ "atom:title",
71
+ default="",
72
+ namespaces=ns,
73
+ )
74
+ title = (title_text or "").strip()
75
+ summary_text = entry.findtext(
76
+ "atom:summary",
77
+ default="",
78
+ namespaces=ns,
79
+ )
80
+ summary = (summary_text or "").strip()
81
+ link_el = entry.find("atom:link[@type='text/html']", ns)
82
+ if link_el is not None:
83
+ link = link_el.get("href")
84
+ else:
85
+ link = entry.findtext("atom:id", default="", namespaces=ns)
86
+ published = entry.findtext(
87
+ "atom:published", default="", namespaces=ns
88
+ )
89
+ updated = entry.findtext("atom:updated", default="", namespaces=ns)
90
+ authors = [
91
+ a.findtext("atom:name", default="", namespaces=ns)
92
+ for a in entry.findall("atom:author", ns)
93
+ ]
94
+ primary_category = ""
95
+ cat_el = entry.find(
96
+ "{http://arxiv.org/schemas/atom}primary_category"
97
+ )
98
+ if cat_el is not None:
99
+ primary_category = cat_el.get("term", "")
100
+
101
+ entries.append(
102
+ {
103
+ "title": title,
104
+ "abstract": summary,
105
+ "authors": authors,
106
+ "published": published,
107
+ "updated": updated,
108
+ "category": primary_category,
109
+ "url": link,
110
+ }
111
+ )
112
+
113
+ return entries
@@ -0,0 +1,97 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("BioRxivTool")
7
+ class BioRxivTool(BaseTool):
8
+ """
9
+ Search bioRxiv preprints using the public bioRxiv API.
10
+
11
+ Arguments:
12
+ query (str): Search term
13
+ max_results (int): Max results to return (default 10, max 200)
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ tool_config,
19
+ base_url="https://api.medrxiv.org/details",
20
+ ):
21
+ super().__init__(tool_config)
22
+ self.base_url = base_url
23
+
24
+ def run(self, arguments=None):
25
+ arguments = arguments or {}
26
+ query = arguments.get("query")
27
+ max_results = int(arguments.get("max_results", 10))
28
+ if not query:
29
+ return {"error": "`query` parameter is required."}
30
+ return self._search(query, max_results)
31
+
32
+ def _search(self, query, max_results):
33
+ # Use date range search for recent preprints
34
+ # Format: /biorxiv/{start_date}/{end_date}/{cursor}/json
35
+ from datetime import datetime, timedelta
36
+
37
+ # Search last 30 days
38
+ end_date = datetime.now()
39
+ start_date = end_date - timedelta(days=30)
40
+
41
+ url = (f"{self.base_url}/biorxiv/"
42
+ f"{start_date.strftime('%Y-%m-%d')}/"
43
+ f"{end_date.strftime('%Y-%m-%d')}/0/json")
44
+
45
+ try:
46
+ resp = requests.get(url, timeout=20)
47
+ resp.raise_for_status()
48
+ data = resp.json()
49
+ except requests.RequestException as e:
50
+ return {
51
+ "error": "Network/API error calling bioRxiv",
52
+ "reason": str(e),
53
+ }
54
+ except ValueError:
55
+ return {"error": "Failed to decode bioRxiv response as JSON"}
56
+
57
+ results = []
58
+ # The API returns a dictionary with a 'collection' key
59
+ collection = data.get("collection", [])
60
+ if not isinstance(collection, list):
61
+ return {"error": "Unexpected API response format"}
62
+
63
+ for item in collection:
64
+ title = item.get("title")
65
+ authors = item.get("authors", "")
66
+ if isinstance(authors, str):
67
+ authors = [a.strip() for a in authors.split(";") if a.strip()]
68
+ elif isinstance(authors, list):
69
+ authors = [str(a).strip() for a in authors if str(a).strip()]
70
+ else:
71
+ authors = []
72
+
73
+ year = None
74
+ date = item.get("date")
75
+ if date and len(date) >= 4 and date[:4].isdigit():
76
+ year = int(date[:4])
77
+
78
+ doi = item.get("doi")
79
+ url = f"https://www.biorxiv.org/content/{doi}" if doi else None
80
+
81
+ # Filter by query if provided
82
+ if query and query.lower() not in (title or "").lower():
83
+ continue
84
+
85
+ results.append(
86
+ {
87
+ "title": title,
88
+ "authors": authors,
89
+ "year": year,
90
+ "doi": doi,
91
+ "url": url,
92
+ "abstract": item.get("abstract", ""),
93
+ "source": "bioRxiv",
94
+ }
95
+ )
96
+
97
+ return results[:max_results]
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CORE API Tool for searching open access academic papers.
4
+
5
+ CORE is the world's largest collection of open access research papers.
6
+ This tool provides access to over 200 million open access papers from
7
+ repositories and journals worldwide.
8
+ """
9
+
10
+ import requests
11
+ from typing import Dict, List, Any, Optional
12
+ from .base_tool import BaseTool
13
+ from .tool_registry import register_tool
14
+
15
+
16
+ @register_tool("CoreTool")
17
+ class CoreTool(BaseTool):
18
+ """Tool for searching CORE open access academic papers."""
19
+
20
+ def __init__(self, tool_config=None):
21
+ super().__init__(tool_config)
22
+ self.base_url = "https://api.core.ac.uk/v3"
23
+ self.session = requests.Session()
24
+ self.session.headers.update({
25
+ 'User-Agent': 'ToolUniverse/1.0',
26
+ 'Accept': 'application/json'
27
+ })
28
+
29
+ def _search(self, query: str, limit: int = 10,
30
+ year_from: Optional[int] = None,
31
+ year_to: Optional[int] = None,
32
+ language: Optional[str] = None) -> List[Dict[str, Any]]:
33
+ """
34
+ Search for papers using CORE API.
35
+
36
+ Args:
37
+ query: Search query
38
+ limit: Maximum number of results
39
+ year_from: Start year filter
40
+ year_to: End year filter
41
+ language: Language filter (e.g., 'en', 'es', 'fr')
42
+
43
+ Returns:
44
+ List of paper dictionaries
45
+ """
46
+ try:
47
+ # Build search parameters
48
+ params = {
49
+ 'q': query,
50
+ 'limit': min(limit, 100), # CORE API max limit is 100
51
+ 'page': 1
52
+ }
53
+
54
+ # Add year filters if provided
55
+ if year_from or year_to:
56
+ year_filter = []
57
+ if year_from:
58
+ year_filter.append(f"year:>={year_from}")
59
+ if year_to:
60
+ year_filter.append(f"year:<={year_to}")
61
+ params['q'] += f" {' '.join(year_filter)}"
62
+
63
+ # Add language filter if provided
64
+ if language:
65
+ params['q'] += f" language:{language}"
66
+
67
+ # Make API request
68
+ response = self.session.get(
69
+ f"{self.base_url}/search/works",
70
+ params=params,
71
+ timeout=30
72
+ )
73
+ response.raise_for_status()
74
+
75
+ data = response.json()
76
+ results = []
77
+
78
+ # Parse results
79
+ for item in data.get('results', []):
80
+ paper = {
81
+ 'title': item.get('title', 'No title'),
82
+ 'abstract': item.get('abstract', 'No abstract available'),
83
+ 'authors': self._extract_authors(item.get('authors', [])),
84
+ 'year': self._extract_year(item.get('publishedDate')),
85
+ 'doi': item.get('doi'),
86
+ 'url': (item.get('downloadUrl') or
87
+ item.get('links', [{}])[0].get('url')),
88
+ 'venue': item.get('publisher'),
89
+ 'language': item.get('language', {}).get('code', 'Unknown'),
90
+ 'open_access': True, # CORE only contains open access papers
91
+ 'source': 'CORE',
92
+ 'citations': item.get('citationCount', 0),
93
+ 'downloads': item.get('downloadCount', 0)
94
+ }
95
+ results.append(paper)
96
+
97
+ return results
98
+
99
+ except requests.exceptions.RequestException as e:
100
+ return [{'error': f'CORE API request failed: {str(e)}'}]
101
+ except Exception as e:
102
+ return [{'error': f'CORE API error: {str(e)}'}]
103
+
104
+ def _extract_authors(self, authors: List[Dict]) -> List[str]:
105
+ """Extract author names from CORE API response."""
106
+ if not authors:
107
+ return []
108
+
109
+ author_names = []
110
+ for author in authors:
111
+ name = author.get('name', '')
112
+ if name:
113
+ author_names.append(name)
114
+
115
+ return author_names
116
+
117
+ def _extract_year(self, published_date: str) -> str:
118
+ """Extract year from published date."""
119
+ if not published_date:
120
+ return 'Unknown'
121
+
122
+ try:
123
+ # CORE API returns dates in ISO format
124
+ return published_date[:4]
125
+ except Exception:
126
+ return 'Unknown'
127
+
128
+ def run(self, tool_arguments) -> List[Dict[str, Any]]:
129
+ """
130
+ Execute the CORE search.
131
+
132
+ Args:
133
+ tool_arguments: Dictionary containing search parameters
134
+
135
+ Returns:
136
+ List of paper dictionaries
137
+ """
138
+ query = tool_arguments.get('query', '')
139
+ if not query:
140
+ return [{'error': 'Query parameter is required'}]
141
+
142
+ limit = tool_arguments.get('limit', 10)
143
+ year_from = tool_arguments.get('year_from')
144
+ year_to = tool_arguments.get('year_to')
145
+ language = tool_arguments.get('language')
146
+
147
+ return self._search(
148
+ query=query,
149
+ limit=limit,
150
+ year_from=year_from,
151
+ year_to=year_to,
152
+ language=language
153
+ )