tooluniverse 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (45) hide show
  1. tooluniverse/__init__.py +39 -0
  2. tooluniverse/agentic_tool.py +82 -12
  3. tooluniverse/arxiv_tool.py +113 -0
  4. tooluniverse/biorxiv_tool.py +97 -0
  5. tooluniverse/core_tool.py +153 -0
  6. tooluniverse/crossref_tool.py +73 -0
  7. tooluniverse/data/arxiv_tools.json +87 -0
  8. tooluniverse/data/biorxiv_tools.json +70 -0
  9. tooluniverse/data/core_tools.json +105 -0
  10. tooluniverse/data/crossref_tools.json +70 -0
  11. tooluniverse/data/dblp_tools.json +73 -0
  12. tooluniverse/data/doaj_tools.json +94 -0
  13. tooluniverse/data/fatcat_tools.json +72 -0
  14. tooluniverse/data/hal_tools.json +70 -0
  15. tooluniverse/data/medrxiv_tools.json +70 -0
  16. tooluniverse/data/openaire_tools.json +85 -0
  17. tooluniverse/data/osf_preprints_tools.json +77 -0
  18. tooluniverse/data/pmc_tools.json +109 -0
  19. tooluniverse/data/pubmed_tools.json +65 -0
  20. tooluniverse/data/unpaywall_tools.json +86 -0
  21. tooluniverse/data/wikidata_sparql_tools.json +42 -0
  22. tooluniverse/data/zenodo_tools.json +82 -0
  23. tooluniverse/dblp_tool.py +62 -0
  24. tooluniverse/default_config.py +17 -0
  25. tooluniverse/doaj_tool.py +124 -0
  26. tooluniverse/execute_function.py +70 -9
  27. tooluniverse/fatcat_tool.py +66 -0
  28. tooluniverse/hal_tool.py +77 -0
  29. tooluniverse/llm_clients.py +286 -0
  30. tooluniverse/medrxiv_tool.py +97 -0
  31. tooluniverse/openaire_tool.py +145 -0
  32. tooluniverse/osf_preprints_tool.py +67 -0
  33. tooluniverse/pmc_tool.py +181 -0
  34. tooluniverse/pubmed_tool.py +110 -0
  35. tooluniverse/smcp.py +109 -79
  36. tooluniverse/test/test_claude_sdk.py +11 -4
  37. tooluniverse/unpaywall_tool.py +63 -0
  38. tooluniverse/wikidata_sparql_tool.py +61 -0
  39. tooluniverse/zenodo_tool.py +74 -0
  40. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +2 -1
  41. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +45 -13
  42. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
  43. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
  44. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/licenses/LICENSE +0 -0
  45. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
tooluniverse/__init__.py CHANGED
@@ -224,6 +224,19 @@ if not LAZY_LOADING_ENABLED:
224
224
  ODPHPTopicSearch,
225
225
  ODPHPOutlinkFetch,
226
226
  )
227
+ # Literature search tools
228
+ from .arxiv_tool import ArXivTool
229
+ from .crossref_tool import CrossrefTool
230
+ from .dblp_tool import DBLPTool
231
+ from .pubmed_tool import PubMedTool
232
+ from .doaj_tool import DOAJTool
233
+ from .unpaywall_tool import UnpaywallTool
234
+ from .biorxiv_tool import BioRxivTool
235
+ from .medrxiv_tool import MedRxivTool
236
+ from .hal_tool import HALTool
237
+ from .core_tool import CoreTool
238
+ from .pmc_tool import PMCTool
239
+ from .zenodo_tool import ZenodoTool
227
240
  else:
228
241
  # With lazy loading, create lazy import proxies that import modules only when accessed
229
242
  MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
@@ -305,6 +318,19 @@ else:
305
318
  ODPHPMyHealthfinder = _LazyImportProxy("odphp_tool", "ODHPHPMyHealthfinder")
306
319
  ODPHPTopicSearch = _LazyImportProxy("odphp_tool", "ODPHPTopicSearch")
307
320
  ODPHPOutlinkFetch = _LazyImportProxy("odphp_tool", "ODPHPOutlinkFetch")
321
+ # Literature search tools
322
+ ArXivTool = _LazyImportProxy("arxiv_tool", "ArXivTool")
323
+ CrossrefTool = _LazyImportProxy("crossref_tool", "CrossrefTool")
324
+ DBLPTool = _LazyImportProxy("dblp_tool", "DBLPTool")
325
+ PubMedTool = _LazyImportProxy("pubmed_tool", "PubMedTool")
326
+ DOAJTool = _LazyImportProxy("doaj_tool", "DOAJTool")
327
+ UnpaywallTool = _LazyImportProxy("unpaywall_tool", "UnpaywallTool")
328
+ BioRxivTool = _LazyImportProxy("biorxiv_tool", "BioRxivTool")
329
+ MedRxivTool = _LazyImportProxy("medrxiv_tool", "MedRxivTool")
330
+ HALTool = _LazyImportProxy("hal_tool", "HALTool")
331
+ CoreTool = _LazyImportProxy("core_tool", "CoreTool")
332
+ PMCTool = _LazyImportProxy("pmc_tool", "PMCTool")
333
+ ZenodoTool = _LazyImportProxy("zenodo_tool", "ZenodoTool")
308
334
 
309
335
  __all__ = [
310
336
  "__version__",
@@ -376,4 +402,17 @@ __all__ = [
376
402
  "ODPHPItemList",
377
403
  "ODPHPTopicSearch",
378
404
  "ODPHPOutlinkFetch",
405
+ # Literature search tools
406
+ "ArXivTool",
407
+ "CrossrefTool",
408
+ "DBLPTool",
409
+ "PubMedTool",
410
+ "DOAJTool",
411
+ "UnpaywallTool",
412
+ "BioRxivTool",
413
+ "MedRxivTool",
414
+ "HALTool",
415
+ "CoreTool",
416
+ "PMCTool",
417
+ "ZenodoTool",
379
418
  ]
@@ -3,7 +3,7 @@ from __future__ import annotations
3
3
  import os
4
4
  import json
5
5
  from datetime import datetime
6
- from typing import Any, Dict, List, Optional
6
+ from typing import Any, Callable, Dict, List, Optional
7
7
 
8
8
  from .base_tool import BaseTool
9
9
  from .tool_registry import register_tool
@@ -30,6 +30,8 @@ API_KEY_ENV_VARS = {
30
30
  class AgenticTool(BaseTool):
31
31
  """Generic wrapper around LLM prompting supporting JSON-defined configs with prompts and input arguments."""
32
32
 
33
+ STREAM_FLAG_KEY = "_tooluniverse_stream"
34
+
33
35
  @staticmethod
34
36
  def has_any_api_keys() -> bool:
35
37
  """
@@ -250,9 +252,18 @@ class AgenticTool(BaseTool):
250
252
  raise ValueError("max_new_tokens must be positive or None")
251
253
 
252
254
  # ------------------------------------------------------------------ public API --------------
253
- def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
255
+ def run(
256
+ self,
257
+ arguments: Dict[str, Any],
258
+ stream_callback: Optional[Callable[[str], None]] = None,
259
+ ) -> Dict[str, Any]:
254
260
  start_time = datetime.now()
255
261
 
262
+ # Work on a copy so we can remove control flags without mutating caller data
263
+ arguments = dict(arguments or {})
264
+ stream_flag = bool(arguments.pop("_tooluniverse_stream", False))
265
+ streaming_requested = stream_flag or stream_callback is not None
266
+
256
267
  # Check if tool is available before attempting to run
257
268
  if not self._is_available:
258
269
  error_msg = f"Tool '{self.name}' is not available due to initialization error: {self._initialization_error}"
@@ -300,16 +311,52 @@ class AgenticTool(BaseTool):
300
311
  custom_format = arguments.get("response_format", None)
301
312
 
302
313
  # Delegate to client; client handles provider-specific logic
303
- response = self._llm_client.infer(
304
- messages=messages,
305
- temperature=self._temperature,
306
- max_tokens=None, # client resolves per-model defaults/env
307
- return_json=self._return_json,
308
- custom_format=custom_format,
309
- max_retries=self._max_retries,
310
- retry_delay=self._retry_delay,
314
+ response = None
315
+
316
+ streaming_permitted = (
317
+ streaming_requested and not self._return_json and custom_format is None
311
318
  )
312
319
 
320
+ if streaming_permitted and hasattr(self._llm_client, "infer_stream"):
321
+ try:
322
+ chunks_collected: List[str] = []
323
+ stream_iter = self._llm_client.infer_stream(
324
+ messages=messages,
325
+ temperature=self._temperature,
326
+ max_tokens=None,
327
+ return_json=self._return_json,
328
+ custom_format=custom_format,
329
+ max_retries=self._max_retries,
330
+ retry_delay=self._retry_delay,
331
+ )
332
+ for chunk in stream_iter:
333
+ if not chunk:
334
+ continue
335
+ chunks_collected.append(chunk)
336
+ self._emit_stream_chunk(chunk, stream_callback)
337
+ if chunks_collected:
338
+ response = "".join(chunks_collected)
339
+ except Exception as stream_error: # noqa: BLE001
340
+ self.logger.warning(
341
+ f"Streaming failed for tool '{self.name}': {stream_error}. Falling back to buffered response."
342
+ )
343
+ response = None
344
+
345
+ if response is None:
346
+ response = self._llm_client.infer(
347
+ messages=messages,
348
+ temperature=self._temperature,
349
+ max_tokens=None, # client resolves per-model defaults/env
350
+ return_json=self._return_json,
351
+ custom_format=custom_format,
352
+ max_retries=self._max_retries,
353
+ retry_delay=self._retry_delay,
354
+ )
355
+
356
+ if streaming_requested and response:
357
+ for chunk in self._iter_chunks(response):
358
+ self._emit_stream_chunk(chunk, stream_callback)
359
+
313
360
  end_time = datetime.now()
314
361
  execution_time = (end_time - start_time).total_seconds()
315
362
 
@@ -338,7 +385,8 @@ class AgenticTool(BaseTool):
338
385
  }
339
386
  else:
340
387
  return response
341
- except Exception as e:
388
+
389
+ except Exception as e: # noqa: BLE001
342
390
  end_time = datetime.now()
343
391
  execution_time = (end_time - start_time).total_seconds()
344
392
  self.logger.error(f"Error executing {self.name}: {str(e)}")
@@ -359,13 +407,35 @@ class AgenticTool(BaseTool):
359
407
  "model_info": {
360
408
  "api_type": self._api_type,
361
409
  "model_id": self._model_id,
410
+ "temperature": self._temperature,
411
+ "max_new_tokens": self._max_new_tokens,
362
412
  },
363
413
  "execution_time_seconds": execution_time,
364
414
  "timestamp": start_time.isoformat(),
365
415
  },
366
416
  }
367
417
  else:
368
- return "error: " + str(e) + " error_type: " + type(e).__name__
418
+ return f"error: {str(e)} error_type: {type(e).__name__}"
419
+
420
+ @staticmethod
421
+ def _iter_chunks(text: str, size: int = 800):
422
+ if not text:
423
+ return
424
+ for idx in range(0, len(text), size):
425
+ yield text[idx : idx + size]
426
+
427
+ def _emit_stream_chunk(
428
+ self, chunk: Optional[str], stream_callback: Optional[Callable[[str], None]]
429
+ ) -> None:
430
+ if not stream_callback or not chunk:
431
+ return
432
+ try:
433
+ stream_callback(chunk)
434
+ except Exception as callback_error: # noqa: BLE001
435
+ # Streaming callbacks should not break tool execution; log and continue
436
+ self.logger.debug(
437
+ f"Stream callback for tool '{self.name}' raised an exception: {callback_error}"
438
+ )
369
439
 
370
440
  # ------------------------------------------------------------------ helpers -----------------
371
441
  def _validate_arguments(self, arguments: Dict[str, Any]):
@@ -0,0 +1,113 @@
1
+ import requests
2
+ import xml.etree.ElementTree as ET
3
+ from .base_tool import BaseTool
4
+ from .tool_registry import register_tool
5
+
6
+
7
+ @register_tool("ArXivTool")
8
+ class ArXivTool(BaseTool):
9
+ """
10
+ Search arXiv for papers by keyword using the public arXiv API.
11
+ """
12
+
13
+ def __init__(
14
+ self,
15
+ tool_config,
16
+ base_url="http://export.arxiv.org/api/query",
17
+ ):
18
+ super().__init__(tool_config)
19
+ self.base_url = base_url
20
+
21
+ def run(self, arguments):
22
+ query = arguments.get("query")
23
+ limit = int(arguments.get("limit", 10))
24
+ # sort_by: relevance | lastUpdatedDate | submittedDate
25
+ sort_by = arguments.get("sort_by", "relevance")
26
+ # sort_order: ascending | descending
27
+ sort_order = arguments.get("sort_order", "descending")
28
+
29
+ if not query:
30
+ return {"error": "`query` parameter is required."}
31
+
32
+ return self._search(query, limit, sort_by, sort_order)
33
+
34
+ def _search(self, query, limit, sort_by, sort_order):
35
+ params = {
36
+ "search_query": f"all:{query}",
37
+ "start": 0,
38
+ "max_results": max(1, min(limit, 200)),
39
+ "sortBy": sort_by,
40
+ "sortOrder": sort_order,
41
+ }
42
+
43
+ try:
44
+ response = requests.get(self.base_url, params=params, timeout=20)
45
+ except requests.RequestException as e:
46
+ return {
47
+ "error": "Network error calling arXiv API",
48
+ "reason": str(e),
49
+ }
50
+
51
+ if response.status_code != 200:
52
+ return {
53
+ "error": f"arXiv API error {response.status_code}",
54
+ "reason": response.reason,
55
+ }
56
+
57
+ # Parse Atom XML
58
+ try:
59
+ root = ET.fromstring(response.text)
60
+ except ET.ParseError as e:
61
+ return {
62
+ "error": "Failed to parse arXiv response",
63
+ "reason": str(e),
64
+ }
65
+
66
+ ns = {"atom": "http://www.w3.org/2005/Atom"}
67
+ entries = []
68
+ for entry in root.findall("atom:entry", ns):
69
+ title_text = entry.findtext(
70
+ "atom:title",
71
+ default="",
72
+ namespaces=ns,
73
+ )
74
+ title = (title_text or "").strip()
75
+ summary_text = entry.findtext(
76
+ "atom:summary",
77
+ default="",
78
+ namespaces=ns,
79
+ )
80
+ summary = (summary_text or "").strip()
81
+ link_el = entry.find("atom:link[@type='text/html']", ns)
82
+ if link_el is not None:
83
+ link = link_el.get("href")
84
+ else:
85
+ link = entry.findtext("atom:id", default="", namespaces=ns)
86
+ published = entry.findtext(
87
+ "atom:published", default="", namespaces=ns
88
+ )
89
+ updated = entry.findtext("atom:updated", default="", namespaces=ns)
90
+ authors = [
91
+ a.findtext("atom:name", default="", namespaces=ns)
92
+ for a in entry.findall("atom:author", ns)
93
+ ]
94
+ primary_category = ""
95
+ cat_el = entry.find(
96
+ "{http://arxiv.org/schemas/atom}primary_category"
97
+ )
98
+ if cat_el is not None:
99
+ primary_category = cat_el.get("term", "")
100
+
101
+ entries.append(
102
+ {
103
+ "title": title,
104
+ "abstract": summary,
105
+ "authors": authors,
106
+ "published": published,
107
+ "updated": updated,
108
+ "category": primary_category,
109
+ "url": link,
110
+ }
111
+ )
112
+
113
+ return entries
@@ -0,0 +1,97 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("BioRxivTool")
7
+ class BioRxivTool(BaseTool):
8
+ """
9
+ Search bioRxiv preprints using the public bioRxiv API.
10
+
11
+ Arguments:
12
+ query (str): Search term
13
+ max_results (int): Max results to return (default 10, max 200)
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ tool_config,
19
+ base_url="https://api.medrxiv.org/details",
20
+ ):
21
+ super().__init__(tool_config)
22
+ self.base_url = base_url
23
+
24
+ def run(self, arguments=None):
25
+ arguments = arguments or {}
26
+ query = arguments.get("query")
27
+ max_results = int(arguments.get("max_results", 10))
28
+ if not query:
29
+ return {"error": "`query` parameter is required."}
30
+ return self._search(query, max_results)
31
+
32
+ def _search(self, query, max_results):
33
+ # Use date range search for recent preprints
34
+ # Format: /biorxiv/{start_date}/{end_date}/{cursor}/json
35
+ from datetime import datetime, timedelta
36
+
37
+ # Search last 30 days
38
+ end_date = datetime.now()
39
+ start_date = end_date - timedelta(days=30)
40
+
41
+ url = (f"{self.base_url}/biorxiv/"
42
+ f"{start_date.strftime('%Y-%m-%d')}/"
43
+ f"{end_date.strftime('%Y-%m-%d')}/0/json")
44
+
45
+ try:
46
+ resp = requests.get(url, timeout=20)
47
+ resp.raise_for_status()
48
+ data = resp.json()
49
+ except requests.RequestException as e:
50
+ return {
51
+ "error": "Network/API error calling bioRxiv",
52
+ "reason": str(e),
53
+ }
54
+ except ValueError:
55
+ return {"error": "Failed to decode bioRxiv response as JSON"}
56
+
57
+ results = []
58
+ # The API returns a dictionary with a 'collection' key
59
+ collection = data.get("collection", [])
60
+ if not isinstance(collection, list):
61
+ return {"error": "Unexpected API response format"}
62
+
63
+ for item in collection:
64
+ title = item.get("title")
65
+ authors = item.get("authors", "")
66
+ if isinstance(authors, str):
67
+ authors = [a.strip() for a in authors.split(";") if a.strip()]
68
+ elif isinstance(authors, list):
69
+ authors = [str(a).strip() for a in authors if str(a).strip()]
70
+ else:
71
+ authors = []
72
+
73
+ year = None
74
+ date = item.get("date")
75
+ if date and len(date) >= 4 and date[:4].isdigit():
76
+ year = int(date[:4])
77
+
78
+ doi = item.get("doi")
79
+ url = f"https://www.biorxiv.org/content/{doi}" if doi else None
80
+
81
+ # Filter by query if provided
82
+ if query and query.lower() not in (title or "").lower():
83
+ continue
84
+
85
+ results.append(
86
+ {
87
+ "title": title,
88
+ "authors": authors,
89
+ "year": year,
90
+ "doi": doi,
91
+ "url": url,
92
+ "abstract": item.get("abstract", ""),
93
+ "source": "bioRxiv",
94
+ }
95
+ )
96
+
97
+ return results[:max_results]
@@ -0,0 +1,153 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ CORE API Tool for searching open access academic papers.
4
+
5
+ CORE is the world's largest collection of open access research papers.
6
+ This tool provides access to over 200 million open access papers from
7
+ repositories and journals worldwide.
8
+ """
9
+
10
+ import requests
11
+ from typing import Dict, List, Any, Optional
12
+ from .base_tool import BaseTool
13
+ from .tool_registry import register_tool
14
+
15
+
16
+ @register_tool("CoreTool")
17
+ class CoreTool(BaseTool):
18
+ """Tool for searching CORE open access academic papers."""
19
+
20
+ def __init__(self, tool_config=None):
21
+ super().__init__(tool_config)
22
+ self.base_url = "https://api.core.ac.uk/v3"
23
+ self.session = requests.Session()
24
+ self.session.headers.update({
25
+ 'User-Agent': 'ToolUniverse/1.0',
26
+ 'Accept': 'application/json'
27
+ })
28
+
29
+ def _search(self, query: str, limit: int = 10,
30
+ year_from: Optional[int] = None,
31
+ year_to: Optional[int] = None,
32
+ language: Optional[str] = None) -> List[Dict[str, Any]]:
33
+ """
34
+ Search for papers using CORE API.
35
+
36
+ Args:
37
+ query: Search query
38
+ limit: Maximum number of results
39
+ year_from: Start year filter
40
+ year_to: End year filter
41
+ language: Language filter (e.g., 'en', 'es', 'fr')
42
+
43
+ Returns:
44
+ List of paper dictionaries
45
+ """
46
+ try:
47
+ # Build search parameters
48
+ params = {
49
+ 'q': query,
50
+ 'limit': min(limit, 100), # CORE API max limit is 100
51
+ 'page': 1
52
+ }
53
+
54
+ # Add year filters if provided
55
+ if year_from or year_to:
56
+ year_filter = []
57
+ if year_from:
58
+ year_filter.append(f"year:>={year_from}")
59
+ if year_to:
60
+ year_filter.append(f"year:<={year_to}")
61
+ params['q'] += f" {' '.join(year_filter)}"
62
+
63
+ # Add language filter if provided
64
+ if language:
65
+ params['q'] += f" language:{language}"
66
+
67
+ # Make API request
68
+ response = self.session.get(
69
+ f"{self.base_url}/search/works",
70
+ params=params,
71
+ timeout=30
72
+ )
73
+ response.raise_for_status()
74
+
75
+ data = response.json()
76
+ results = []
77
+
78
+ # Parse results
79
+ for item in data.get('results', []):
80
+ paper = {
81
+ 'title': item.get('title', 'No title'),
82
+ 'abstract': item.get('abstract', 'No abstract available'),
83
+ 'authors': self._extract_authors(item.get('authors', [])),
84
+ 'year': self._extract_year(item.get('publishedDate')),
85
+ 'doi': item.get('doi'),
86
+ 'url': (item.get('downloadUrl') or
87
+ item.get('links', [{}])[0].get('url')),
88
+ 'venue': item.get('publisher'),
89
+ 'language': item.get('language', {}).get('code', 'Unknown'),
90
+ 'open_access': True, # CORE only contains open access papers
91
+ 'source': 'CORE',
92
+ 'citations': item.get('citationCount', 0),
93
+ 'downloads': item.get('downloadCount', 0)
94
+ }
95
+ results.append(paper)
96
+
97
+ return results
98
+
99
+ except requests.exceptions.RequestException as e:
100
+ return [{'error': f'CORE API request failed: {str(e)}'}]
101
+ except Exception as e:
102
+ return [{'error': f'CORE API error: {str(e)}'}]
103
+
104
+ def _extract_authors(self, authors: List[Dict]) -> List[str]:
105
+ """Extract author names from CORE API response."""
106
+ if not authors:
107
+ return []
108
+
109
+ author_names = []
110
+ for author in authors:
111
+ name = author.get('name', '')
112
+ if name:
113
+ author_names.append(name)
114
+
115
+ return author_names
116
+
117
+ def _extract_year(self, published_date: str) -> str:
118
+ """Extract year from published date."""
119
+ if not published_date:
120
+ return 'Unknown'
121
+
122
+ try:
123
+ # CORE API returns dates in ISO format
124
+ return published_date[:4]
125
+ except Exception:
126
+ return 'Unknown'
127
+
128
+ def run(self, tool_arguments) -> List[Dict[str, Any]]:
129
+ """
130
+ Execute the CORE search.
131
+
132
+ Args:
133
+ tool_arguments: Dictionary containing search parameters
134
+
135
+ Returns:
136
+ List of paper dictionaries
137
+ """
138
+ query = tool_arguments.get('query', '')
139
+ if not query:
140
+ return [{'error': 'Query parameter is required'}]
141
+
142
+ limit = tool_arguments.get('limit', 10)
143
+ year_from = tool_arguments.get('year_from')
144
+ year_to = tool_arguments.get('year_to')
145
+ language = tool_arguments.get('language')
146
+
147
+ return self._search(
148
+ query=query,
149
+ limit=limit,
150
+ year_from=year_from,
151
+ year_to=year_to,
152
+ language=language
153
+ )
@@ -0,0 +1,73 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("CrossrefTool")
7
+ class CrossrefTool(BaseTool):
8
+ """
9
+ Search Crossref Works API for articles by keyword.
10
+ """
11
+
12
+ def __init__(
13
+ self,
14
+ tool_config,
15
+ base_url="https://api.crossref.org/works",
16
+ ):
17
+ super().__init__(tool_config)
18
+ self.base_url = base_url
19
+
20
+ def run(self, arguments):
21
+ query = arguments.get("query")
22
+ rows = int(arguments.get("limit", 10))
23
+ # e.g., 'type:journal-article,from-pub-date:2020-01-01'
24
+ filter_str = arguments.get("filter")
25
+ if not query:
26
+ return {"error": "`query` parameter is required."}
27
+ return self._search(query, rows, filter_str)
28
+
29
+ def _search(self, query, rows, filter_str):
30
+ params = {"query": query, "rows": max(1, min(rows, 100))}
31
+ if filter_str:
32
+ params["filter"] = filter_str
33
+
34
+ try:
35
+ response = requests.get(self.base_url, params=params, timeout=20)
36
+ except requests.RequestException as e:
37
+ return {
38
+ "error": "Network error calling Crossref API",
39
+ "reason": str(e),
40
+ }
41
+
42
+ if response.status_code != 200:
43
+ return {
44
+ "error": f"Crossref API error {response.status_code}",
45
+ "reason": response.reason,
46
+ }
47
+
48
+ data = response.json().get("message", {}).get("items", [])
49
+ results = []
50
+ for item in data:
51
+ title_list = item.get("title") or []
52
+ title = title_list[0] if title_list else None
53
+ abstract = item.get("abstract")
54
+ year = None
55
+ issued = item.get("issued", {}).get("date-parts") or []
56
+ if issued and issued[0]:
57
+ year = issued[0][0]
58
+ url = item.get("URL")
59
+ doi = item.get("DOI")
60
+ container_title = item.get("container-title") or []
61
+ journal = container_title[0] if container_title else None
62
+ results.append(
63
+ {
64
+ "title": title,
65
+ "abstract": abstract,
66
+ "journal": journal,
67
+ "year": year,
68
+ "doi": doi,
69
+ "url": url,
70
+ }
71
+ )
72
+
73
+ return results