tooluniverse 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (45) hide show
  1. tooluniverse/__init__.py +39 -0
  2. tooluniverse/agentic_tool.py +82 -12
  3. tooluniverse/arxiv_tool.py +113 -0
  4. tooluniverse/biorxiv_tool.py +97 -0
  5. tooluniverse/core_tool.py +153 -0
  6. tooluniverse/crossref_tool.py +73 -0
  7. tooluniverse/data/arxiv_tools.json +87 -0
  8. tooluniverse/data/biorxiv_tools.json +70 -0
  9. tooluniverse/data/core_tools.json +105 -0
  10. tooluniverse/data/crossref_tools.json +70 -0
  11. tooluniverse/data/dblp_tools.json +73 -0
  12. tooluniverse/data/doaj_tools.json +94 -0
  13. tooluniverse/data/fatcat_tools.json +72 -0
  14. tooluniverse/data/hal_tools.json +70 -0
  15. tooluniverse/data/medrxiv_tools.json +70 -0
  16. tooluniverse/data/openaire_tools.json +85 -0
  17. tooluniverse/data/osf_preprints_tools.json +77 -0
  18. tooluniverse/data/pmc_tools.json +109 -0
  19. tooluniverse/data/pubmed_tools.json +65 -0
  20. tooluniverse/data/unpaywall_tools.json +86 -0
  21. tooluniverse/data/wikidata_sparql_tools.json +42 -0
  22. tooluniverse/data/zenodo_tools.json +82 -0
  23. tooluniverse/dblp_tool.py +62 -0
  24. tooluniverse/default_config.py +17 -0
  25. tooluniverse/doaj_tool.py +124 -0
  26. tooluniverse/execute_function.py +70 -9
  27. tooluniverse/fatcat_tool.py +66 -0
  28. tooluniverse/hal_tool.py +77 -0
  29. tooluniverse/llm_clients.py +286 -0
  30. tooluniverse/medrxiv_tool.py +97 -0
  31. tooluniverse/openaire_tool.py +145 -0
  32. tooluniverse/osf_preprints_tool.py +67 -0
  33. tooluniverse/pmc_tool.py +181 -0
  34. tooluniverse/pubmed_tool.py +110 -0
  35. tooluniverse/smcp.py +109 -79
  36. tooluniverse/test/test_claude_sdk.py +11 -4
  37. tooluniverse/unpaywall_tool.py +63 -0
  38. tooluniverse/wikidata_sparql_tool.py +61 -0
  39. tooluniverse/zenodo_tool.py +74 -0
  40. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +2 -1
  41. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +45 -13
  42. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
  43. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
  44. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/licenses/LICENSE +0 -0
  45. {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
@@ -21,6 +21,29 @@ class BaseLLMClient:
21
21
  ) -> Optional[str]:
22
22
  raise NotImplementedError
23
23
 
24
+ def infer_stream(
25
+ self,
26
+ messages: List[Dict[str, str]],
27
+ temperature: Optional[float],
28
+ max_tokens: Optional[int],
29
+ return_json: bool,
30
+ custom_format: Any = None,
31
+ max_retries: int = 5,
32
+ retry_delay: int = 5,
33
+ ):
34
+ """Default streaming implementation falls back to regular inference."""
35
+ result = self.infer(
36
+ messages=messages,
37
+ temperature=temperature,
38
+ max_tokens=max_tokens,
39
+ return_json=return_json,
40
+ custom_format=custom_format,
41
+ max_retries=max_retries,
42
+ retry_delay=retry_delay,
43
+ )
44
+ if result is not None:
45
+ yield result
46
+
24
47
 
25
48
  class AzureOpenAIClient(BaseLLMClient):
26
49
  # Built-in defaults for model families (can be overridden by env)
@@ -305,6 +328,179 @@ class AzureOpenAIClient(BaseLLMClient):
305
328
  self.logger.error("Max retries exceeded. Unable to complete the request.")
306
329
  return None
307
330
 
331
+ def infer_stream(
332
+ self,
333
+ messages: List[Dict[str, str]],
334
+ temperature: Optional[float],
335
+ max_tokens: Optional[int],
336
+ return_json: bool,
337
+ custom_format: Any = None,
338
+ max_retries: int = 5,
339
+ retry_delay: int = 5,
340
+ ):
341
+ if return_json or custom_format is not None:
342
+ yield from super().infer_stream(
343
+ messages,
344
+ temperature,
345
+ max_tokens,
346
+ return_json,
347
+ custom_format,
348
+ max_retries,
349
+ retry_delay,
350
+ )
351
+ return
352
+
353
+ retries = 0
354
+ eff_max = (
355
+ max_tokens
356
+ if max_tokens is not None
357
+ else self._resolve_default_max_tokens(self.model_name)
358
+ )
359
+
360
+ while retries < max_retries:
361
+ try:
362
+ kwargs: Dict[str, Any] = {
363
+ "model": self.model_name,
364
+ "messages": messages,
365
+ "stream": True,
366
+ }
367
+ if temperature is not None:
368
+ kwargs["temperature"] = temperature
369
+ if eff_max is not None:
370
+ kwargs["max_tokens"] = eff_max
371
+
372
+ stream = self.client.chat.completions.create(**kwargs)
373
+ for chunk in stream:
374
+ text = AzureOpenAIClient._extract_text_from_chunk(chunk) # type: ignore[attr-defined]
375
+ if text:
376
+ yield text
377
+ return
378
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
379
+ self.logger.warning(
380
+ f"OpenRouter streaming rate limit hit. Retrying in {retry_delay} seconds..."
381
+ )
382
+ retries += 1
383
+ time.sleep(retry_delay * retries)
384
+ except Exception as e: # noqa: BLE001
385
+ self.logger.error(f"OpenRouter streaming error: {e}")
386
+ break
387
+
388
+ yield from super().infer_stream(
389
+ messages,
390
+ temperature,
391
+ max_tokens,
392
+ return_json,
393
+ custom_format,
394
+ max_retries,
395
+ retry_delay,
396
+ )
397
+
398
+ @staticmethod
399
+ def _extract_text_from_chunk(chunk) -> Optional[str]:
400
+ try:
401
+ choices = getattr(chunk, "choices", None)
402
+ except Exception:
403
+ choices = None
404
+ if not choices:
405
+ return None
406
+
407
+ first_choice = choices[0]
408
+ delta = getattr(first_choice, "delta", None)
409
+ if delta is None and isinstance(first_choice, dict):
410
+ delta = first_choice.get("delta")
411
+ if delta is None:
412
+ return None
413
+
414
+ content = getattr(delta, "content", None)
415
+ if content is None and isinstance(delta, dict):
416
+ content = delta.get("content")
417
+ if not content:
418
+ return None
419
+
420
+ if isinstance(content, str):
421
+ return content
422
+
423
+ if isinstance(content, list):
424
+ fragments: List[str] = []
425
+ for item in content:
426
+ text = getattr(item, "text", None)
427
+ if text is None and isinstance(item, dict):
428
+ text = item.get("text")
429
+ if text:
430
+ fragments.append(text)
431
+ return "".join(fragments) if fragments else None
432
+
433
+ return None
434
+
435
+ def infer_stream(
436
+ self,
437
+ messages: List[Dict[str, str]],
438
+ temperature: Optional[float],
439
+ max_tokens: Optional[int],
440
+ return_json: bool,
441
+ custom_format: Any = None,
442
+ max_retries: int = 5,
443
+ retry_delay: int = 5,
444
+ ):
445
+ if return_json or custom_format is not None:
446
+ yield from super().infer_stream(
447
+ messages,
448
+ temperature,
449
+ max_tokens,
450
+ return_json,
451
+ custom_format,
452
+ max_retries,
453
+ retry_delay,
454
+ )
455
+ return
456
+
457
+ retries = 0
458
+ eff_temp = self._normalize_temperature(self.model_name, temperature)
459
+ eff_max = (
460
+ max_tokens
461
+ if max_tokens is not None
462
+ else self._resolve_default_max_tokens(self.model_name)
463
+ )
464
+
465
+ while retries < max_retries:
466
+ try:
467
+ kwargs: Dict[str, Any] = {
468
+ "model": self.model_name,
469
+ "messages": messages,
470
+ "stream": True,
471
+ }
472
+ if eff_temp is not None:
473
+ kwargs["temperature"] = eff_temp
474
+ if eff_max is not None:
475
+ kwargs["max_tokens"] = eff_max
476
+
477
+ stream = self.client.chat.completions.create(**kwargs)
478
+ for chunk in stream:
479
+ text = self._extract_text_from_chunk(chunk)
480
+ if text:
481
+ yield text
482
+ return
483
+ except self._openai.RateLimitError: # type: ignore[attr-defined]
484
+ self.logger.warning(
485
+ f"Rate limit exceeded. Retrying in {retry_delay} seconds (streaming)..."
486
+ )
487
+ retries += 1
488
+ time.sleep(retry_delay * retries)
489
+ except Exception as e: # noqa: BLE001
490
+ self.logger.error(f"Streaming error: {e}")
491
+ break
492
+
493
+ # Fallback to non-streaming if streaming fails
494
+ yield from super().infer_stream(
495
+ messages,
496
+ temperature,
497
+ max_tokens,
498
+ return_json,
499
+ custom_format,
500
+ max_retries,
501
+ retry_delay,
502
+ )
503
+
308
504
 
309
505
  class GeminiClient(BaseLLMClient):
310
506
  def __init__(self, model_name: str, logger):
@@ -368,6 +564,96 @@ class GeminiClient(BaseLLMClient):
368
564
  time.sleep(retry_delay * retries)
369
565
  return None
370
566
 
567
+ @staticmethod
568
+ def _extract_text_from_stream_chunk(chunk) -> Optional[str]:
569
+ if chunk is None:
570
+ return None
571
+ text = getattr(chunk, "text", None)
572
+ if text:
573
+ return text
574
+
575
+ candidates = getattr(chunk, "candidates", None)
576
+ if not candidates and isinstance(chunk, dict):
577
+ candidates = chunk.get("candidates")
578
+ if not candidates:
579
+ return None
580
+
581
+ candidate = candidates[0]
582
+ content = getattr(candidate, "content", None)
583
+ if content is None and isinstance(candidate, dict):
584
+ content = candidate.get("content")
585
+ if not content:
586
+ return None
587
+
588
+ parts = getattr(content, "parts", None)
589
+ if parts is None and isinstance(content, dict):
590
+ parts = content.get("parts")
591
+ if parts and isinstance(parts, list):
592
+ fragments: List[str] = []
593
+ for part in parts:
594
+ piece = getattr(part, "text", None)
595
+ if piece is None and isinstance(part, dict):
596
+ piece = part.get("text")
597
+ if piece:
598
+ fragments.append(piece)
599
+ return "".join(fragments) if fragments else None
600
+
601
+ final_text = getattr(content, "text", None)
602
+ if final_text is None and isinstance(content, dict):
603
+ final_text = content.get("text")
604
+ return final_text
605
+
606
+ def infer_stream(
607
+ self,
608
+ messages: List[Dict[str, str]],
609
+ temperature: Optional[float],
610
+ max_tokens: Optional[int],
611
+ return_json: bool,
612
+ custom_format: Any = None,
613
+ max_retries: int = 5,
614
+ retry_delay: int = 5,
615
+ ):
616
+ if return_json:
617
+ raise ValueError("Gemini JSON mode not supported here")
618
+
619
+ contents = ""
620
+ for m in messages:
621
+ if m["role"] in ("user", "system"):
622
+ contents += f"{m['content']}\n"
623
+
624
+ retries = 0
625
+ while retries < max_retries:
626
+ try:
627
+ gen_cfg: Dict[str, Any] = {
628
+ "temperature": (temperature if temperature is not None else 0)
629
+ }
630
+ if max_tokens is not None:
631
+ gen_cfg["max_output_tokens"] = max_tokens
632
+
633
+ model = self._build_model()
634
+ stream = model.generate_content(
635
+ contents, generation_config=gen_cfg, stream=True
636
+ )
637
+ for chunk in stream:
638
+ text = self._extract_text_from_stream_chunk(chunk)
639
+ if text:
640
+ yield text
641
+ return
642
+ except Exception as e: # noqa: BLE001
643
+ self.logger.error(f"Gemini streaming error: {e}")
644
+ retries += 1
645
+ time.sleep(retry_delay * retries)
646
+
647
+ yield from super().infer_stream(
648
+ messages,
649
+ temperature,
650
+ max_tokens,
651
+ return_json,
652
+ custom_format,
653
+ max_retries,
654
+ retry_delay,
655
+ )
656
+
371
657
 
372
658
  class OpenRouterClient(BaseLLMClient):
373
659
  """
@@ -0,0 +1,97 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("MedRxivTool")
7
+ class MedRxivTool(BaseTool):
8
+ """
9
+ Search medRxiv preprints using medRxiv's API (same interface as bioRxiv).
10
+
11
+ Arguments:
12
+ query (str): Search term
13
+ max_results (int): Max results to return (default 10, max 200)
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ tool_config,
19
+ base_url="https://api.medrxiv.org/details",
20
+ ):
21
+ super().__init__(tool_config)
22
+ self.base_url = base_url
23
+
24
+ def run(self, arguments=None):
25
+ arguments = arguments or {}
26
+ query = arguments.get("query")
27
+ max_results = int(arguments.get("max_results", 10))
28
+ if not query:
29
+ return {"error": "`query` parameter is required."}
30
+ return self._search(query, max_results)
31
+
32
+ def _search(self, query, max_results):
33
+ # Use date range search for recent preprints
34
+ # Format: /medrxiv/{start_date}/{end_date}/{cursor}/json
35
+ from datetime import datetime, timedelta
36
+
37
+ # Search last 30 days
38
+ end_date = datetime.now()
39
+ start_date = end_date - timedelta(days=30)
40
+
41
+ url = (f"{self.base_url}/medrxiv/"
42
+ f"{start_date.strftime('%Y-%m-%d')}/"
43
+ f"{end_date.strftime('%Y-%m-%d')}/0/json")
44
+
45
+ try:
46
+ resp = requests.get(url, timeout=20)
47
+ resp.raise_for_status()
48
+ data = resp.json()
49
+ except requests.RequestException as e:
50
+ return {
51
+ "error": "Network/API error calling medRxiv",
52
+ "reason": str(e),
53
+ }
54
+ except ValueError:
55
+ return {"error": "Failed to decode medRxiv response as JSON"}
56
+
57
+ results = []
58
+ # The API returns a dictionary with a 'collection' key
59
+ collection = data.get("collection", [])
60
+ if not isinstance(collection, list):
61
+ return {"error": "Unexpected API response format"}
62
+
63
+ for item in collection:
64
+ title = item.get("title")
65
+ authors = item.get("authors", "")
66
+ if isinstance(authors, str):
67
+ authors = [a.strip() for a in authors.split(";") if a.strip()]
68
+ elif isinstance(authors, list):
69
+ authors = [str(a).strip() for a in authors if str(a).strip()]
70
+ else:
71
+ authors = []
72
+
73
+ year = None
74
+ date = item.get("date")
75
+ if date and len(date) >= 4 and date[:4].isdigit():
76
+ year = int(date[:4])
77
+
78
+ doi = item.get("doi")
79
+ url = f"https://www.medrxiv.org/content/{doi}" if doi else None
80
+
81
+ # Filter by query if provided
82
+ if query and query.lower() not in (title or "").lower():
83
+ continue
84
+
85
+ results.append(
86
+ {
87
+ "title": title,
88
+ "authors": authors,
89
+ "year": year,
90
+ "doi": doi,
91
+ "url": url,
92
+ "abstract": item.get("abstract", ""),
93
+ "source": "medRxiv",
94
+ }
95
+ )
96
+
97
+ return results[:max_results]
@@ -0,0 +1,145 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("OpenAIRETool")
7
+ class OpenAIRETool(BaseTool):
8
+ """
9
+ Search OpenAIRE Explore for research products (publications by default).
10
+
11
+ Parameters (arguments):
12
+ query (str): Query string
13
+ max_results (int): Max number of results (default 10, max 100)
14
+ type (str): product type filter: publications | datasets | software
15
+ """
16
+
17
+ def __init__(self, tool_config):
18
+ super().__init__(tool_config)
19
+ self.base_url = "https://api.openaire.eu/search/publications"
20
+
21
+ def run(self, arguments=None):
22
+ arguments = arguments or {}
23
+ query = arguments.get("query")
24
+ max_results = int(arguments.get("max_results", 10))
25
+ prod_type = arguments.get("type", "publications")
26
+
27
+ if not query:
28
+ return {"error": "`query` parameter is required."}
29
+
30
+ endpoint = self._endpoint_for_type(prod_type)
31
+ if endpoint is None:
32
+ return {
33
+ "error": (
34
+ "Unsupported type. Use publications/datasets/software."
35
+ ),
36
+ }
37
+
38
+ params = {
39
+ "format": "json",
40
+ "size": max(1, min(max_results, 100)),
41
+ "query": query,
42
+ }
43
+ try:
44
+ resp = requests.get(endpoint, params=params, timeout=20)
45
+ resp.raise_for_status()
46
+ data = resp.json()
47
+ except requests.RequestException as e:
48
+ return {
49
+ "error": "Network/API error calling OpenAIRE",
50
+ "reason": str(e),
51
+ }
52
+ except ValueError:
53
+ return {"error": "Failed to decode OpenAIRE response as JSON"}
54
+
55
+ return self._normalize(data, prod_type)
56
+
57
+ def _endpoint_for_type(self, prod_type):
58
+ if prod_type == "publications":
59
+ return "https://api.openaire.eu/search/publications"
60
+ if prod_type == "datasets":
61
+ return "https://api.openaire.eu/search/datasets"
62
+ if prod_type == "software":
63
+ return "https://api.openaire.eu/search/software"
64
+ return None
65
+
66
+ def _normalize(self, data, prod_type):
67
+ results = []
68
+ # OpenAIRE JSON has a root 'response' with 'results' → 'result' list
69
+ try:
70
+ items = (
71
+ data.get("response", {})
72
+ .get("results", {})
73
+ .get("result", [])
74
+ )
75
+ except Exception:
76
+ items = []
77
+
78
+ for it in items:
79
+ # header may contain identifiers, not used presently
80
+ _ = (
81
+ it.get("header", {})
82
+ if isinstance(it.get("header"), dict)
83
+ else {}
84
+ )
85
+ metadata = (
86
+ it.get("metadata", {})
87
+ if isinstance(it.get("metadata"), dict)
88
+ else {}
89
+ )
90
+ title = None
91
+ authors = []
92
+ year = None
93
+ doi = None
94
+ url = None
95
+
96
+ # Titles can be nested in 'oaf:result' structure
97
+ result_obj = metadata.get("oaf:result", {})
98
+ if isinstance(result_obj, dict):
99
+ t = result_obj.get("title")
100
+ if isinstance(t, list) and t:
101
+ title = t[0].get("$")
102
+ elif isinstance(t, dict):
103
+ title = t.get("$")
104
+
105
+ # Authors
106
+ creators = result_obj.get("creator", [])
107
+ if isinstance(creators, list):
108
+ for c in creators:
109
+ name = c.get("$")
110
+ if name:
111
+ authors.append(name)
112
+
113
+ # Year
114
+ date_obj = (
115
+ result_obj.get("dateofacceptance")
116
+ or result_obj.get("date")
117
+ )
118
+ if isinstance(date_obj, dict):
119
+ year = date_obj.get("year") or date_obj.get("$")
120
+
121
+ # DOI and URL
122
+ pid = result_obj.get("pid", [])
123
+ if isinstance(pid, list):
124
+ for p in pid:
125
+ if p.get("@classid") == "doi":
126
+ doi = p.get("$")
127
+ bestaccessright = result_obj.get("bestaccessright", {})
128
+ if isinstance(bestaccessright, dict):
129
+ url_value = bestaccessright.get("$")
130
+ if url_value:
131
+ url = url_value
132
+
133
+ results.append(
134
+ {
135
+ "title": title,
136
+ "authors": authors,
137
+ "year": year,
138
+ "doi": doi,
139
+ "url": url,
140
+ "type": prod_type,
141
+ "source": "OpenAIRE",
142
+ }
143
+ )
144
+
145
+ return results
@@ -0,0 +1,67 @@
1
+ import requests
2
+ from .base_tool import BaseTool
3
+ from .tool_registry import register_tool
4
+
5
+
6
+ @register_tool("OSFPreprintsTool")
7
+ class OSFPreprintsTool(BaseTool):
8
+ """
9
+ Search OSF Preprints via OSF API v2 filters.
10
+
11
+ Parameters (arguments):
12
+ query (str): Query string
13
+ max_results (int): Max results (default 10, max 100)
14
+ provider (str): Optional preprint provider (e.g., 'osf', 'psyarxiv')
15
+ """
16
+
17
+ def __init__(self, tool_config):
18
+ super().__init__(tool_config)
19
+ self.base_url = "https://api.osf.io/v2/preprints/"
20
+
21
+ def run(self, arguments=None):
22
+ arguments = arguments or {}
23
+ query = arguments.get("query")
24
+ max_results = int(arguments.get("max_results", 10))
25
+ provider = arguments.get("provider")
26
+
27
+ if not query:
28
+ return {"error": "`query` parameter is required."}
29
+
30
+ params = {
31
+ "page[size]": max(1, min(max_results, 100)),
32
+ "filter[title]": query,
33
+ }
34
+ if provider:
35
+ params["filter[provider]"] = provider
36
+
37
+ try:
38
+ resp = requests.get(self.base_url, params=params, timeout=20)
39
+ resp.raise_for_status()
40
+ data = resp.json()
41
+ except requests.RequestException as e:
42
+ return {"error": "Network/API error calling OSF", "reason": str(e)}
43
+ except ValueError:
44
+ return {"error": "Failed to decode OSF response as JSON"}
45
+
46
+ results = []
47
+ for item in data.get("data", []):
48
+ attrs = item.get("attributes", {})
49
+ title = attrs.get("title")
50
+ date_published = attrs.get("date_published")
51
+ is_published = attrs.get("is_published")
52
+ doi = attrs.get("doi")
53
+ links_obj = item.get("links", {})
54
+ url = links_obj.get("html") or links_obj.get("self")
55
+
56
+ results.append(
57
+ {
58
+ "title": title,
59
+ "date_published": date_published,
60
+ "published": is_published,
61
+ "doi": doi,
62
+ "url": url,
63
+ "source": "OSF Preprints",
64
+ }
65
+ )
66
+
67
+ return results