tooluniverse 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +39 -0
- tooluniverse/agentic_tool.py +82 -12
- tooluniverse/arxiv_tool.py +113 -0
- tooluniverse/biorxiv_tool.py +97 -0
- tooluniverse/core_tool.py +153 -0
- tooluniverse/crossref_tool.py +73 -0
- tooluniverse/data/arxiv_tools.json +87 -0
- tooluniverse/data/biorxiv_tools.json +70 -0
- tooluniverse/data/core_tools.json +105 -0
- tooluniverse/data/crossref_tools.json +70 -0
- tooluniverse/data/dblp_tools.json +73 -0
- tooluniverse/data/doaj_tools.json +94 -0
- tooluniverse/data/fatcat_tools.json +72 -0
- tooluniverse/data/hal_tools.json +70 -0
- tooluniverse/data/medrxiv_tools.json +70 -0
- tooluniverse/data/openaire_tools.json +85 -0
- tooluniverse/data/osf_preprints_tools.json +77 -0
- tooluniverse/data/pmc_tools.json +109 -0
- tooluniverse/data/pubmed_tools.json +65 -0
- tooluniverse/data/unpaywall_tools.json +86 -0
- tooluniverse/data/wikidata_sparql_tools.json +42 -0
- tooluniverse/data/zenodo_tools.json +82 -0
- tooluniverse/dblp_tool.py +62 -0
- tooluniverse/default_config.py +17 -0
- tooluniverse/doaj_tool.py +124 -0
- tooluniverse/execute_function.py +70 -9
- tooluniverse/fatcat_tool.py +66 -0
- tooluniverse/hal_tool.py +77 -0
- tooluniverse/llm_clients.py +286 -0
- tooluniverse/medrxiv_tool.py +97 -0
- tooluniverse/openaire_tool.py +145 -0
- tooluniverse/osf_preprints_tool.py +67 -0
- tooluniverse/pmc_tool.py +181 -0
- tooluniverse/pubmed_tool.py +110 -0
- tooluniverse/smcp.py +109 -79
- tooluniverse/test/test_claude_sdk.py +11 -4
- tooluniverse/unpaywall_tool.py +63 -0
- tooluniverse/wikidata_sparql_tool.py +61 -0
- tooluniverse/zenodo_tool.py +74 -0
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +2 -1
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +45 -13
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0
tooluniverse/llm_clients.py
CHANGED
|
@@ -21,6 +21,29 @@ class BaseLLMClient:
|
|
|
21
21
|
) -> Optional[str]:
|
|
22
22
|
raise NotImplementedError
|
|
23
23
|
|
|
24
|
+
def infer_stream(
|
|
25
|
+
self,
|
|
26
|
+
messages: List[Dict[str, str]],
|
|
27
|
+
temperature: Optional[float],
|
|
28
|
+
max_tokens: Optional[int],
|
|
29
|
+
return_json: bool,
|
|
30
|
+
custom_format: Any = None,
|
|
31
|
+
max_retries: int = 5,
|
|
32
|
+
retry_delay: int = 5,
|
|
33
|
+
):
|
|
34
|
+
"""Default streaming implementation falls back to regular inference."""
|
|
35
|
+
result = self.infer(
|
|
36
|
+
messages=messages,
|
|
37
|
+
temperature=temperature,
|
|
38
|
+
max_tokens=max_tokens,
|
|
39
|
+
return_json=return_json,
|
|
40
|
+
custom_format=custom_format,
|
|
41
|
+
max_retries=max_retries,
|
|
42
|
+
retry_delay=retry_delay,
|
|
43
|
+
)
|
|
44
|
+
if result is not None:
|
|
45
|
+
yield result
|
|
46
|
+
|
|
24
47
|
|
|
25
48
|
class AzureOpenAIClient(BaseLLMClient):
|
|
26
49
|
# Built-in defaults for model families (can be overridden by env)
|
|
@@ -305,6 +328,179 @@ class AzureOpenAIClient(BaseLLMClient):
|
|
|
305
328
|
self.logger.error("Max retries exceeded. Unable to complete the request.")
|
|
306
329
|
return None
|
|
307
330
|
|
|
331
|
+
def infer_stream(
|
|
332
|
+
self,
|
|
333
|
+
messages: List[Dict[str, str]],
|
|
334
|
+
temperature: Optional[float],
|
|
335
|
+
max_tokens: Optional[int],
|
|
336
|
+
return_json: bool,
|
|
337
|
+
custom_format: Any = None,
|
|
338
|
+
max_retries: int = 5,
|
|
339
|
+
retry_delay: int = 5,
|
|
340
|
+
):
|
|
341
|
+
if return_json or custom_format is not None:
|
|
342
|
+
yield from super().infer_stream(
|
|
343
|
+
messages,
|
|
344
|
+
temperature,
|
|
345
|
+
max_tokens,
|
|
346
|
+
return_json,
|
|
347
|
+
custom_format,
|
|
348
|
+
max_retries,
|
|
349
|
+
retry_delay,
|
|
350
|
+
)
|
|
351
|
+
return
|
|
352
|
+
|
|
353
|
+
retries = 0
|
|
354
|
+
eff_max = (
|
|
355
|
+
max_tokens
|
|
356
|
+
if max_tokens is not None
|
|
357
|
+
else self._resolve_default_max_tokens(self.model_name)
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
while retries < max_retries:
|
|
361
|
+
try:
|
|
362
|
+
kwargs: Dict[str, Any] = {
|
|
363
|
+
"model": self.model_name,
|
|
364
|
+
"messages": messages,
|
|
365
|
+
"stream": True,
|
|
366
|
+
}
|
|
367
|
+
if temperature is not None:
|
|
368
|
+
kwargs["temperature"] = temperature
|
|
369
|
+
if eff_max is not None:
|
|
370
|
+
kwargs["max_tokens"] = eff_max
|
|
371
|
+
|
|
372
|
+
stream = self.client.chat.completions.create(**kwargs)
|
|
373
|
+
for chunk in stream:
|
|
374
|
+
text = AzureOpenAIClient._extract_text_from_chunk(chunk) # type: ignore[attr-defined]
|
|
375
|
+
if text:
|
|
376
|
+
yield text
|
|
377
|
+
return
|
|
378
|
+
except self._openai.RateLimitError: # type: ignore[attr-defined]
|
|
379
|
+
self.logger.warning(
|
|
380
|
+
f"OpenRouter streaming rate limit hit. Retrying in {retry_delay} seconds..."
|
|
381
|
+
)
|
|
382
|
+
retries += 1
|
|
383
|
+
time.sleep(retry_delay * retries)
|
|
384
|
+
except Exception as e: # noqa: BLE001
|
|
385
|
+
self.logger.error(f"OpenRouter streaming error: {e}")
|
|
386
|
+
break
|
|
387
|
+
|
|
388
|
+
yield from super().infer_stream(
|
|
389
|
+
messages,
|
|
390
|
+
temperature,
|
|
391
|
+
max_tokens,
|
|
392
|
+
return_json,
|
|
393
|
+
custom_format,
|
|
394
|
+
max_retries,
|
|
395
|
+
retry_delay,
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
@staticmethod
|
|
399
|
+
def _extract_text_from_chunk(chunk) -> Optional[str]:
|
|
400
|
+
try:
|
|
401
|
+
choices = getattr(chunk, "choices", None)
|
|
402
|
+
except Exception:
|
|
403
|
+
choices = None
|
|
404
|
+
if not choices:
|
|
405
|
+
return None
|
|
406
|
+
|
|
407
|
+
first_choice = choices[0]
|
|
408
|
+
delta = getattr(first_choice, "delta", None)
|
|
409
|
+
if delta is None and isinstance(first_choice, dict):
|
|
410
|
+
delta = first_choice.get("delta")
|
|
411
|
+
if delta is None:
|
|
412
|
+
return None
|
|
413
|
+
|
|
414
|
+
content = getattr(delta, "content", None)
|
|
415
|
+
if content is None and isinstance(delta, dict):
|
|
416
|
+
content = delta.get("content")
|
|
417
|
+
if not content:
|
|
418
|
+
return None
|
|
419
|
+
|
|
420
|
+
if isinstance(content, str):
|
|
421
|
+
return content
|
|
422
|
+
|
|
423
|
+
if isinstance(content, list):
|
|
424
|
+
fragments: List[str] = []
|
|
425
|
+
for item in content:
|
|
426
|
+
text = getattr(item, "text", None)
|
|
427
|
+
if text is None and isinstance(item, dict):
|
|
428
|
+
text = item.get("text")
|
|
429
|
+
if text:
|
|
430
|
+
fragments.append(text)
|
|
431
|
+
return "".join(fragments) if fragments else None
|
|
432
|
+
|
|
433
|
+
return None
|
|
434
|
+
|
|
435
|
+
def infer_stream(
|
|
436
|
+
self,
|
|
437
|
+
messages: List[Dict[str, str]],
|
|
438
|
+
temperature: Optional[float],
|
|
439
|
+
max_tokens: Optional[int],
|
|
440
|
+
return_json: bool,
|
|
441
|
+
custom_format: Any = None,
|
|
442
|
+
max_retries: int = 5,
|
|
443
|
+
retry_delay: int = 5,
|
|
444
|
+
):
|
|
445
|
+
if return_json or custom_format is not None:
|
|
446
|
+
yield from super().infer_stream(
|
|
447
|
+
messages,
|
|
448
|
+
temperature,
|
|
449
|
+
max_tokens,
|
|
450
|
+
return_json,
|
|
451
|
+
custom_format,
|
|
452
|
+
max_retries,
|
|
453
|
+
retry_delay,
|
|
454
|
+
)
|
|
455
|
+
return
|
|
456
|
+
|
|
457
|
+
retries = 0
|
|
458
|
+
eff_temp = self._normalize_temperature(self.model_name, temperature)
|
|
459
|
+
eff_max = (
|
|
460
|
+
max_tokens
|
|
461
|
+
if max_tokens is not None
|
|
462
|
+
else self._resolve_default_max_tokens(self.model_name)
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
while retries < max_retries:
|
|
466
|
+
try:
|
|
467
|
+
kwargs: Dict[str, Any] = {
|
|
468
|
+
"model": self.model_name,
|
|
469
|
+
"messages": messages,
|
|
470
|
+
"stream": True,
|
|
471
|
+
}
|
|
472
|
+
if eff_temp is not None:
|
|
473
|
+
kwargs["temperature"] = eff_temp
|
|
474
|
+
if eff_max is not None:
|
|
475
|
+
kwargs["max_tokens"] = eff_max
|
|
476
|
+
|
|
477
|
+
stream = self.client.chat.completions.create(**kwargs)
|
|
478
|
+
for chunk in stream:
|
|
479
|
+
text = self._extract_text_from_chunk(chunk)
|
|
480
|
+
if text:
|
|
481
|
+
yield text
|
|
482
|
+
return
|
|
483
|
+
except self._openai.RateLimitError: # type: ignore[attr-defined]
|
|
484
|
+
self.logger.warning(
|
|
485
|
+
f"Rate limit exceeded. Retrying in {retry_delay} seconds (streaming)..."
|
|
486
|
+
)
|
|
487
|
+
retries += 1
|
|
488
|
+
time.sleep(retry_delay * retries)
|
|
489
|
+
except Exception as e: # noqa: BLE001
|
|
490
|
+
self.logger.error(f"Streaming error: {e}")
|
|
491
|
+
break
|
|
492
|
+
|
|
493
|
+
# Fallback to non-streaming if streaming fails
|
|
494
|
+
yield from super().infer_stream(
|
|
495
|
+
messages,
|
|
496
|
+
temperature,
|
|
497
|
+
max_tokens,
|
|
498
|
+
return_json,
|
|
499
|
+
custom_format,
|
|
500
|
+
max_retries,
|
|
501
|
+
retry_delay,
|
|
502
|
+
)
|
|
503
|
+
|
|
308
504
|
|
|
309
505
|
class GeminiClient(BaseLLMClient):
|
|
310
506
|
def __init__(self, model_name: str, logger):
|
|
@@ -368,6 +564,96 @@ class GeminiClient(BaseLLMClient):
|
|
|
368
564
|
time.sleep(retry_delay * retries)
|
|
369
565
|
return None
|
|
370
566
|
|
|
567
|
+
@staticmethod
|
|
568
|
+
def _extract_text_from_stream_chunk(chunk) -> Optional[str]:
|
|
569
|
+
if chunk is None:
|
|
570
|
+
return None
|
|
571
|
+
text = getattr(chunk, "text", None)
|
|
572
|
+
if text:
|
|
573
|
+
return text
|
|
574
|
+
|
|
575
|
+
candidates = getattr(chunk, "candidates", None)
|
|
576
|
+
if not candidates and isinstance(chunk, dict):
|
|
577
|
+
candidates = chunk.get("candidates")
|
|
578
|
+
if not candidates:
|
|
579
|
+
return None
|
|
580
|
+
|
|
581
|
+
candidate = candidates[0]
|
|
582
|
+
content = getattr(candidate, "content", None)
|
|
583
|
+
if content is None and isinstance(candidate, dict):
|
|
584
|
+
content = candidate.get("content")
|
|
585
|
+
if not content:
|
|
586
|
+
return None
|
|
587
|
+
|
|
588
|
+
parts = getattr(content, "parts", None)
|
|
589
|
+
if parts is None and isinstance(content, dict):
|
|
590
|
+
parts = content.get("parts")
|
|
591
|
+
if parts and isinstance(parts, list):
|
|
592
|
+
fragments: List[str] = []
|
|
593
|
+
for part in parts:
|
|
594
|
+
piece = getattr(part, "text", None)
|
|
595
|
+
if piece is None and isinstance(part, dict):
|
|
596
|
+
piece = part.get("text")
|
|
597
|
+
if piece:
|
|
598
|
+
fragments.append(piece)
|
|
599
|
+
return "".join(fragments) if fragments else None
|
|
600
|
+
|
|
601
|
+
final_text = getattr(content, "text", None)
|
|
602
|
+
if final_text is None and isinstance(content, dict):
|
|
603
|
+
final_text = content.get("text")
|
|
604
|
+
return final_text
|
|
605
|
+
|
|
606
|
+
def infer_stream(
|
|
607
|
+
self,
|
|
608
|
+
messages: List[Dict[str, str]],
|
|
609
|
+
temperature: Optional[float],
|
|
610
|
+
max_tokens: Optional[int],
|
|
611
|
+
return_json: bool,
|
|
612
|
+
custom_format: Any = None,
|
|
613
|
+
max_retries: int = 5,
|
|
614
|
+
retry_delay: int = 5,
|
|
615
|
+
):
|
|
616
|
+
if return_json:
|
|
617
|
+
raise ValueError("Gemini JSON mode not supported here")
|
|
618
|
+
|
|
619
|
+
contents = ""
|
|
620
|
+
for m in messages:
|
|
621
|
+
if m["role"] in ("user", "system"):
|
|
622
|
+
contents += f"{m['content']}\n"
|
|
623
|
+
|
|
624
|
+
retries = 0
|
|
625
|
+
while retries < max_retries:
|
|
626
|
+
try:
|
|
627
|
+
gen_cfg: Dict[str, Any] = {
|
|
628
|
+
"temperature": (temperature if temperature is not None else 0)
|
|
629
|
+
}
|
|
630
|
+
if max_tokens is not None:
|
|
631
|
+
gen_cfg["max_output_tokens"] = max_tokens
|
|
632
|
+
|
|
633
|
+
model = self._build_model()
|
|
634
|
+
stream = model.generate_content(
|
|
635
|
+
contents, generation_config=gen_cfg, stream=True
|
|
636
|
+
)
|
|
637
|
+
for chunk in stream:
|
|
638
|
+
text = self._extract_text_from_stream_chunk(chunk)
|
|
639
|
+
if text:
|
|
640
|
+
yield text
|
|
641
|
+
return
|
|
642
|
+
except Exception as e: # noqa: BLE001
|
|
643
|
+
self.logger.error(f"Gemini streaming error: {e}")
|
|
644
|
+
retries += 1
|
|
645
|
+
time.sleep(retry_delay * retries)
|
|
646
|
+
|
|
647
|
+
yield from super().infer_stream(
|
|
648
|
+
messages,
|
|
649
|
+
temperature,
|
|
650
|
+
max_tokens,
|
|
651
|
+
return_json,
|
|
652
|
+
custom_format,
|
|
653
|
+
max_retries,
|
|
654
|
+
retry_delay,
|
|
655
|
+
)
|
|
656
|
+
|
|
371
657
|
|
|
372
658
|
class OpenRouterClient(BaseLLMClient):
|
|
373
659
|
"""
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("MedRxivTool")
|
|
7
|
+
class MedRxivTool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search medRxiv preprints using medRxiv's API (same interface as bioRxiv).
|
|
10
|
+
|
|
11
|
+
Arguments:
|
|
12
|
+
query (str): Search term
|
|
13
|
+
max_results (int): Max results to return (default 10, max 200)
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
tool_config,
|
|
19
|
+
base_url="https://api.medrxiv.org/details",
|
|
20
|
+
):
|
|
21
|
+
super().__init__(tool_config)
|
|
22
|
+
self.base_url = base_url
|
|
23
|
+
|
|
24
|
+
def run(self, arguments=None):
|
|
25
|
+
arguments = arguments or {}
|
|
26
|
+
query = arguments.get("query")
|
|
27
|
+
max_results = int(arguments.get("max_results", 10))
|
|
28
|
+
if not query:
|
|
29
|
+
return {"error": "`query` parameter is required."}
|
|
30
|
+
return self._search(query, max_results)
|
|
31
|
+
|
|
32
|
+
def _search(self, query, max_results):
|
|
33
|
+
# Use date range search for recent preprints
|
|
34
|
+
# Format: /medrxiv/{start_date}/{end_date}/{cursor}/json
|
|
35
|
+
from datetime import datetime, timedelta
|
|
36
|
+
|
|
37
|
+
# Search last 30 days
|
|
38
|
+
end_date = datetime.now()
|
|
39
|
+
start_date = end_date - timedelta(days=30)
|
|
40
|
+
|
|
41
|
+
url = (f"{self.base_url}/medrxiv/"
|
|
42
|
+
f"{start_date.strftime('%Y-%m-%d')}/"
|
|
43
|
+
f"{end_date.strftime('%Y-%m-%d')}/0/json")
|
|
44
|
+
|
|
45
|
+
try:
|
|
46
|
+
resp = requests.get(url, timeout=20)
|
|
47
|
+
resp.raise_for_status()
|
|
48
|
+
data = resp.json()
|
|
49
|
+
except requests.RequestException as e:
|
|
50
|
+
return {
|
|
51
|
+
"error": "Network/API error calling medRxiv",
|
|
52
|
+
"reason": str(e),
|
|
53
|
+
}
|
|
54
|
+
except ValueError:
|
|
55
|
+
return {"error": "Failed to decode medRxiv response as JSON"}
|
|
56
|
+
|
|
57
|
+
results = []
|
|
58
|
+
# The API returns a dictionary with a 'collection' key
|
|
59
|
+
collection = data.get("collection", [])
|
|
60
|
+
if not isinstance(collection, list):
|
|
61
|
+
return {"error": "Unexpected API response format"}
|
|
62
|
+
|
|
63
|
+
for item in collection:
|
|
64
|
+
title = item.get("title")
|
|
65
|
+
authors = item.get("authors", "")
|
|
66
|
+
if isinstance(authors, str):
|
|
67
|
+
authors = [a.strip() for a in authors.split(";") if a.strip()]
|
|
68
|
+
elif isinstance(authors, list):
|
|
69
|
+
authors = [str(a).strip() for a in authors if str(a).strip()]
|
|
70
|
+
else:
|
|
71
|
+
authors = []
|
|
72
|
+
|
|
73
|
+
year = None
|
|
74
|
+
date = item.get("date")
|
|
75
|
+
if date and len(date) >= 4 and date[:4].isdigit():
|
|
76
|
+
year = int(date[:4])
|
|
77
|
+
|
|
78
|
+
doi = item.get("doi")
|
|
79
|
+
url = f"https://www.medrxiv.org/content/{doi}" if doi else None
|
|
80
|
+
|
|
81
|
+
# Filter by query if provided
|
|
82
|
+
if query and query.lower() not in (title or "").lower():
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
results.append(
|
|
86
|
+
{
|
|
87
|
+
"title": title,
|
|
88
|
+
"authors": authors,
|
|
89
|
+
"year": year,
|
|
90
|
+
"doi": doi,
|
|
91
|
+
"url": url,
|
|
92
|
+
"abstract": item.get("abstract", ""),
|
|
93
|
+
"source": "medRxiv",
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
return results[:max_results]
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("OpenAIRETool")
|
|
7
|
+
class OpenAIRETool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search OpenAIRE Explore for research products (publications by default).
|
|
10
|
+
|
|
11
|
+
Parameters (arguments):
|
|
12
|
+
query (str): Query string
|
|
13
|
+
max_results (int): Max number of results (default 10, max 100)
|
|
14
|
+
type (str): product type filter: publications | datasets | software
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://api.openaire.eu/search/publications"
|
|
20
|
+
|
|
21
|
+
def run(self, arguments=None):
|
|
22
|
+
arguments = arguments or {}
|
|
23
|
+
query = arguments.get("query")
|
|
24
|
+
max_results = int(arguments.get("max_results", 10))
|
|
25
|
+
prod_type = arguments.get("type", "publications")
|
|
26
|
+
|
|
27
|
+
if not query:
|
|
28
|
+
return {"error": "`query` parameter is required."}
|
|
29
|
+
|
|
30
|
+
endpoint = self._endpoint_for_type(prod_type)
|
|
31
|
+
if endpoint is None:
|
|
32
|
+
return {
|
|
33
|
+
"error": (
|
|
34
|
+
"Unsupported type. Use publications/datasets/software."
|
|
35
|
+
),
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
params = {
|
|
39
|
+
"format": "json",
|
|
40
|
+
"size": max(1, min(max_results, 100)),
|
|
41
|
+
"query": query,
|
|
42
|
+
}
|
|
43
|
+
try:
|
|
44
|
+
resp = requests.get(endpoint, params=params, timeout=20)
|
|
45
|
+
resp.raise_for_status()
|
|
46
|
+
data = resp.json()
|
|
47
|
+
except requests.RequestException as e:
|
|
48
|
+
return {
|
|
49
|
+
"error": "Network/API error calling OpenAIRE",
|
|
50
|
+
"reason": str(e),
|
|
51
|
+
}
|
|
52
|
+
except ValueError:
|
|
53
|
+
return {"error": "Failed to decode OpenAIRE response as JSON"}
|
|
54
|
+
|
|
55
|
+
return self._normalize(data, prod_type)
|
|
56
|
+
|
|
57
|
+
def _endpoint_for_type(self, prod_type):
|
|
58
|
+
if prod_type == "publications":
|
|
59
|
+
return "https://api.openaire.eu/search/publications"
|
|
60
|
+
if prod_type == "datasets":
|
|
61
|
+
return "https://api.openaire.eu/search/datasets"
|
|
62
|
+
if prod_type == "software":
|
|
63
|
+
return "https://api.openaire.eu/search/software"
|
|
64
|
+
return None
|
|
65
|
+
|
|
66
|
+
def _normalize(self, data, prod_type):
|
|
67
|
+
results = []
|
|
68
|
+
# OpenAIRE JSON has a root 'response' with 'results' → 'result' list
|
|
69
|
+
try:
|
|
70
|
+
items = (
|
|
71
|
+
data.get("response", {})
|
|
72
|
+
.get("results", {})
|
|
73
|
+
.get("result", [])
|
|
74
|
+
)
|
|
75
|
+
except Exception:
|
|
76
|
+
items = []
|
|
77
|
+
|
|
78
|
+
for it in items:
|
|
79
|
+
# header may contain identifiers, not used presently
|
|
80
|
+
_ = (
|
|
81
|
+
it.get("header", {})
|
|
82
|
+
if isinstance(it.get("header"), dict)
|
|
83
|
+
else {}
|
|
84
|
+
)
|
|
85
|
+
metadata = (
|
|
86
|
+
it.get("metadata", {})
|
|
87
|
+
if isinstance(it.get("metadata"), dict)
|
|
88
|
+
else {}
|
|
89
|
+
)
|
|
90
|
+
title = None
|
|
91
|
+
authors = []
|
|
92
|
+
year = None
|
|
93
|
+
doi = None
|
|
94
|
+
url = None
|
|
95
|
+
|
|
96
|
+
# Titles can be nested in 'oaf:result' structure
|
|
97
|
+
result_obj = metadata.get("oaf:result", {})
|
|
98
|
+
if isinstance(result_obj, dict):
|
|
99
|
+
t = result_obj.get("title")
|
|
100
|
+
if isinstance(t, list) and t:
|
|
101
|
+
title = t[0].get("$")
|
|
102
|
+
elif isinstance(t, dict):
|
|
103
|
+
title = t.get("$")
|
|
104
|
+
|
|
105
|
+
# Authors
|
|
106
|
+
creators = result_obj.get("creator", [])
|
|
107
|
+
if isinstance(creators, list):
|
|
108
|
+
for c in creators:
|
|
109
|
+
name = c.get("$")
|
|
110
|
+
if name:
|
|
111
|
+
authors.append(name)
|
|
112
|
+
|
|
113
|
+
# Year
|
|
114
|
+
date_obj = (
|
|
115
|
+
result_obj.get("dateofacceptance")
|
|
116
|
+
or result_obj.get("date")
|
|
117
|
+
)
|
|
118
|
+
if isinstance(date_obj, dict):
|
|
119
|
+
year = date_obj.get("year") or date_obj.get("$")
|
|
120
|
+
|
|
121
|
+
# DOI and URL
|
|
122
|
+
pid = result_obj.get("pid", [])
|
|
123
|
+
if isinstance(pid, list):
|
|
124
|
+
for p in pid:
|
|
125
|
+
if p.get("@classid") == "doi":
|
|
126
|
+
doi = p.get("$")
|
|
127
|
+
bestaccessright = result_obj.get("bestaccessright", {})
|
|
128
|
+
if isinstance(bestaccessright, dict):
|
|
129
|
+
url_value = bestaccessright.get("$")
|
|
130
|
+
if url_value:
|
|
131
|
+
url = url_value
|
|
132
|
+
|
|
133
|
+
results.append(
|
|
134
|
+
{
|
|
135
|
+
"title": title,
|
|
136
|
+
"authors": authors,
|
|
137
|
+
"year": year,
|
|
138
|
+
"doi": doi,
|
|
139
|
+
"url": url,
|
|
140
|
+
"type": prod_type,
|
|
141
|
+
"source": "OpenAIRE",
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return results
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from .base_tool import BaseTool
|
|
3
|
+
from .tool_registry import register_tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@register_tool("OSFPreprintsTool")
|
|
7
|
+
class OSFPreprintsTool(BaseTool):
|
|
8
|
+
"""
|
|
9
|
+
Search OSF Preprints via OSF API v2 filters.
|
|
10
|
+
|
|
11
|
+
Parameters (arguments):
|
|
12
|
+
query (str): Query string
|
|
13
|
+
max_results (int): Max results (default 10, max 100)
|
|
14
|
+
provider (str): Optional preprint provider (e.g., 'osf', 'psyarxiv')
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, tool_config):
|
|
18
|
+
super().__init__(tool_config)
|
|
19
|
+
self.base_url = "https://api.osf.io/v2/preprints/"
|
|
20
|
+
|
|
21
|
+
def run(self, arguments=None):
|
|
22
|
+
arguments = arguments or {}
|
|
23
|
+
query = arguments.get("query")
|
|
24
|
+
max_results = int(arguments.get("max_results", 10))
|
|
25
|
+
provider = arguments.get("provider")
|
|
26
|
+
|
|
27
|
+
if not query:
|
|
28
|
+
return {"error": "`query` parameter is required."}
|
|
29
|
+
|
|
30
|
+
params = {
|
|
31
|
+
"page[size]": max(1, min(max_results, 100)),
|
|
32
|
+
"filter[title]": query,
|
|
33
|
+
}
|
|
34
|
+
if provider:
|
|
35
|
+
params["filter[provider]"] = provider
|
|
36
|
+
|
|
37
|
+
try:
|
|
38
|
+
resp = requests.get(self.base_url, params=params, timeout=20)
|
|
39
|
+
resp.raise_for_status()
|
|
40
|
+
data = resp.json()
|
|
41
|
+
except requests.RequestException as e:
|
|
42
|
+
return {"error": "Network/API error calling OSF", "reason": str(e)}
|
|
43
|
+
except ValueError:
|
|
44
|
+
return {"error": "Failed to decode OSF response as JSON"}
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
for item in data.get("data", []):
|
|
48
|
+
attrs = item.get("attributes", {})
|
|
49
|
+
title = attrs.get("title")
|
|
50
|
+
date_published = attrs.get("date_published")
|
|
51
|
+
is_published = attrs.get("is_published")
|
|
52
|
+
doi = attrs.get("doi")
|
|
53
|
+
links_obj = item.get("links", {})
|
|
54
|
+
url = links_obj.get("html") or links_obj.get("self")
|
|
55
|
+
|
|
56
|
+
results.append(
|
|
57
|
+
{
|
|
58
|
+
"title": title,
|
|
59
|
+
"date_published": date_published,
|
|
60
|
+
"published": is_published,
|
|
61
|
+
"doi": doi,
|
|
62
|
+
"url": url,
|
|
63
|
+
"source": "OSF Preprints",
|
|
64
|
+
}
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
return results
|