PyPI - tooluniverse - Versions diffs - 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl - Mend

tooluniverse 1.0.5py3-none-any.whl → 1.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tooluniverse might be problematic. Click here for more details.

Files changed (45) hide show

tooluniverse/__init__.py +39 -0
tooluniverse/agentic_tool.py +82 -12
tooluniverse/arxiv_tool.py +113 -0
tooluniverse/biorxiv_tool.py +97 -0
tooluniverse/core_tool.py +153 -0
tooluniverse/crossref_tool.py +73 -0
tooluniverse/data/arxiv_tools.json +87 -0
tooluniverse/data/biorxiv_tools.json +70 -0
tooluniverse/data/core_tools.json +105 -0
tooluniverse/data/crossref_tools.json +70 -0
tooluniverse/data/dblp_tools.json +73 -0
tooluniverse/data/doaj_tools.json +94 -0
tooluniverse/data/fatcat_tools.json +72 -0
tooluniverse/data/hal_tools.json +70 -0
tooluniverse/data/medrxiv_tools.json +70 -0
tooluniverse/data/openaire_tools.json +85 -0
tooluniverse/data/osf_preprints_tools.json +77 -0
tooluniverse/data/pmc_tools.json +109 -0
tooluniverse/data/pubmed_tools.json +65 -0
tooluniverse/data/unpaywall_tools.json +86 -0
tooluniverse/data/wikidata_sparql_tools.json +42 -0
tooluniverse/data/zenodo_tools.json +82 -0
tooluniverse/dblp_tool.py +62 -0
tooluniverse/default_config.py +17 -0
tooluniverse/doaj_tool.py +124 -0
tooluniverse/execute_function.py +70 -9
tooluniverse/fatcat_tool.py +66 -0
tooluniverse/hal_tool.py +77 -0
tooluniverse/llm_clients.py +286 -0
tooluniverse/medrxiv_tool.py +97 -0
tooluniverse/openaire_tool.py +145 -0
tooluniverse/osf_preprints_tool.py +67 -0
tooluniverse/pmc_tool.py +181 -0
tooluniverse/pubmed_tool.py +110 -0
tooluniverse/smcp.py +109 -79
tooluniverse/test/test_claude_sdk.py +11 -4
tooluniverse/unpaywall_tool.py +63 -0
tooluniverse/wikidata_sparql_tool.py +61 -0
tooluniverse/zenodo_tool.py +74 -0
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/METADATA +2 -1
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/RECORD +45 -13
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/entry_points.txt +1 -0
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/WHEEL +0 -0
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/licenses/LICENSE +0 -0
{tooluniverse-1.0.5.dist-info → tooluniverse-1.0.6.dist-info}/top_level.txt +0 -0

tooluniverse/__init__.py CHANGED Viewed

@@ -224,6 +224,19 @@ if not LAZY_LOADING_ENABLED:
         ODPHPTopicSearch,
         ODPHPOutlinkFetch,
     )
+    # Literature search tools
+    from .arxiv_tool import ArXivTool
+    from .crossref_tool import CrossrefTool
+    from .dblp_tool import DBLPTool
+    from .pubmed_tool import PubMedTool
+    from .doaj_tool import DOAJTool
+    from .unpaywall_tool import UnpaywallTool
+    from .biorxiv_tool import BioRxivTool
+    from .medrxiv_tool import MedRxivTool
+    from .hal_tool import HALTool
+    from .core_tool import CoreTool
+    from .pmc_tool import PMCTool
+    from .zenodo_tool import ZenodoTool
 else:
     # With lazy loading, create lazy import proxies that import modules only when accessed
     MonarchTool = _LazyImportProxy("restful_tool", "MonarchTool")
@@ -305,6 +318,19 @@ else:
     ODPHPMyHealthfinder = _LazyImportProxy("odphp_tool", "ODHPHPMyHealthfinder")
     ODPHPTopicSearch = _LazyImportProxy("odphp_tool", "ODPHPTopicSearch")
     ODPHPOutlinkFetch = _LazyImportProxy("odphp_tool", "ODPHPOutlinkFetch")
+    # Literature search tools
+    ArXivTool = _LazyImportProxy("arxiv_tool", "ArXivTool")
+    CrossrefTool = _LazyImportProxy("crossref_tool", "CrossrefTool")
+    DBLPTool = _LazyImportProxy("dblp_tool", "DBLPTool")
+    PubMedTool = _LazyImportProxy("pubmed_tool", "PubMedTool")
+    DOAJTool = _LazyImportProxy("doaj_tool", "DOAJTool")
+    UnpaywallTool = _LazyImportProxy("unpaywall_tool", "UnpaywallTool")
+    BioRxivTool = _LazyImportProxy("biorxiv_tool", "BioRxivTool")
+    MedRxivTool = _LazyImportProxy("medrxiv_tool", "MedRxivTool")
+    HALTool = _LazyImportProxy("hal_tool", "HALTool")
+    CoreTool = _LazyImportProxy("core_tool", "CoreTool")
+    PMCTool = _LazyImportProxy("pmc_tool", "PMCTool")
+    ZenodoTool = _LazyImportProxy("zenodo_tool", "ZenodoTool")
 __all__ = [
     "__version__",
@@ -376,4 +402,17 @@ __all__ = [
     "ODPHPItemList",
     "ODPHPTopicSearch",
     "ODPHPOutlinkFetch",
+    # Literature search tools
+    "ArXivTool",
+    "CrossrefTool",
+    "DBLPTool",
+    "PubMedTool",
+    "DOAJTool",
+    "UnpaywallTool",
+    "BioRxivTool",
+    "MedRxivTool",
+    "HALTool",
+    "CoreTool",
+    "PMCTool",
+    "ZenodoTool",
 ]

tooluniverse/agentic_tool.py CHANGED Viewed

@@ -3,7 +3,7 @@ from __future__ import annotations
 import os
 import json
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Callable, Dict, List, Optional
 from .base_tool import BaseTool
 from .tool_registry import register_tool
@@ -30,6 +30,8 @@ API_KEY_ENV_VARS = {
 class AgenticTool(BaseTool):
     """Generic wrapper around LLM prompting supporting JSON-defined configs with prompts and input arguments."""
+    STREAM_FLAG_KEY = "_tooluniverse_stream"
     @staticmethod
     def has_any_api_keys() -> bool:
         """
@@ -250,9 +252,18 @@ class AgenticTool(BaseTool):
             raise ValueError("max_new_tokens must be positive or None")
     # ------------------------------------------------------------------ public API --------------
-    def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
+    def run(
+        self,
+        arguments: Dict[str, Any],
+        stream_callback: Optional[Callable[[str], None]] = None,
+    ) -> Dict[str, Any]:
         start_time = datetime.now()
+        # Work on a copy so we can remove control flags without mutating caller data
+        arguments = dict(arguments or {})
+        stream_flag = bool(arguments.pop("_tooluniverse_stream", False))
+        streaming_requested = stream_flag or stream_callback is not None
         # Check if tool is available before attempting to run
         if not self._is_available:
             error_msg = f"Tool '{self.name}' is not available due to initialization error: {self._initialization_error}"
@@ -300,16 +311,52 @@ class AgenticTool(BaseTool):
             custom_format = arguments.get("response_format", None)
             # Delegate to client; client handles provider-specific logic
-            response = self._llm_client.infer(
-                messages=messages,
-                temperature=self._temperature,
-                max_tokens=None,  # client resolves per-model defaults/env
-                return_json=self._return_json,
-                custom_format=custom_format,
-                max_retries=self._max_retries,
-                retry_delay=self._retry_delay,
+            response = None
+            streaming_permitted = (
+                streaming_requested and not self._return_json and custom_format is None
             )
+            if streaming_permitted and hasattr(self._llm_client, "infer_stream"):
+                try:
+                    chunks_collected: List[str] = []
+                    stream_iter = self._llm_client.infer_stream(
+                        messages=messages,
+                        temperature=self._temperature,
+                        max_tokens=None,
+                        return_json=self._return_json,
+                        custom_format=custom_format,
+                        max_retries=self._max_retries,
+                        retry_delay=self._retry_delay,
+                    )
+                    for chunk in stream_iter:
+                        if not chunk:
+                            continue
+                        chunks_collected.append(chunk)
+                        self._emit_stream_chunk(chunk, stream_callback)
+                    if chunks_collected:
+                        response = "".join(chunks_collected)
+                except Exception as stream_error:  # noqa: BLE001
+                    self.logger.warning(
+                        f"Streaming failed for tool '{self.name}': {stream_error}. Falling back to buffered response."
+                    )
+                    response = None
+            if response is None:
+                response = self._llm_client.infer(
+                    messages=messages,
+                    temperature=self._temperature,
+                    max_tokens=None,  # client resolves per-model defaults/env
+                    return_json=self._return_json,
+                    custom_format=custom_format,
+                    max_retries=self._max_retries,
+                    retry_delay=self._retry_delay,
+                )
+                if streaming_requested and response:
+                    for chunk in self._iter_chunks(response):
+                        self._emit_stream_chunk(chunk, stream_callback)
             end_time = datetime.now()
             execution_time = (end_time - start_time).total_seconds()
@@ -338,7 +385,8 @@ class AgenticTool(BaseTool):
                 }
             else:
                 return response
-        except Exception as e:
+        except Exception as e:  # noqa: BLE001
             end_time = datetime.now()
             execution_time = (end_time - start_time).total_seconds()
             self.logger.error(f"Error executing {self.name}: {str(e)}")
@@ -359,13 +407,35 @@ class AgenticTool(BaseTool):
                         "model_info": {
                             "api_type": self._api_type,
                             "model_id": self._model_id,
+                            "temperature": self._temperature,
+                            "max_new_tokens": self._max_new_tokens,
                         },
                         "execution_time_seconds": execution_time,
                         "timestamp": start_time.isoformat(),
                     },
                 }
             else:
-                return "error: " + str(e) + " error_type: " + type(e).__name__
+                return f"error: {str(e)} error_type: {type(e).__name__}"
+    @staticmethod
+    def _iter_chunks(text: str, size: int = 800):
+        if not text:
+            return
+        for idx in range(0, len(text), size):
+            yield text[idx : idx + size]
+    def _emit_stream_chunk(
+        self, chunk: Optional[str], stream_callback: Optional[Callable[[str], None]]
+    ) -> None:
+        if not stream_callback or not chunk:
+            return
+        try:
+            stream_callback(chunk)
+        except Exception as callback_error:  # noqa: BLE001
+            # Streaming callbacks should not break tool execution; log and continue
+            self.logger.debug(
+                f"Stream callback for tool '{self.name}' raised an exception: {callback_error}"
+            )
     # ------------------------------------------------------------------ helpers -----------------
     def _validate_arguments(self, arguments: Dict[str, Any]):

tooluniverse/arxiv_tool.py ADDED Viewed

@@ -0,0 +1,113 @@
+import requests
+import xml.etree.ElementTree as ET
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+@register_tool("ArXivTool")
+class ArXivTool(BaseTool):
+    """
+    Search arXiv for papers by keyword using the public arXiv API.
+    """
+    def __init__(
+        self,
+        tool_config,
+        base_url="http://export.arxiv.org/api/query",
+    ):
+        super().__init__(tool_config)
+        self.base_url = base_url
+    def run(self, arguments):
+        query = arguments.get("query")
+        limit = int(arguments.get("limit", 10))
+        # sort_by: relevance | lastUpdatedDate | submittedDate
+        sort_by = arguments.get("sort_by", "relevance")
+        # sort_order: ascending | descending
+        sort_order = arguments.get("sort_order", "descending")
+        if not query:
+            return {"error": "`query` parameter is required."}
+        return self._search(query, limit, sort_by, sort_order)
+    def _search(self, query, limit, sort_by, sort_order):
+        params = {
+            "search_query": f"all:{query}",
+            "start": 0,
+            "max_results": max(1, min(limit, 200)),
+            "sortBy": sort_by,
+            "sortOrder": sort_order,
+        }
+        try:
+            response = requests.get(self.base_url, params=params, timeout=20)
+        except requests.RequestException as e:
+            return {
+                "error": "Network error calling arXiv API",
+                "reason": str(e),
+            }
+        if response.status_code != 200:
+            return {
+                "error": f"arXiv API error {response.status_code}",
+                "reason": response.reason,
+            }
+        # Parse Atom XML
+        try:
+            root = ET.fromstring(response.text)
+        except ET.ParseError as e:
+            return {
+                "error": "Failed to parse arXiv response",
+                "reason": str(e),
+            }
+        ns = {"atom": "http://www.w3.org/2005/Atom"}
+        entries = []
+        for entry in root.findall("atom:entry", ns):
+            title_text = entry.findtext(
+                "atom:title",
+                default="",
+                namespaces=ns,
+            )
+            title = (title_text or "").strip()
+            summary_text = entry.findtext(
+                "atom:summary",
+                default="",
+                namespaces=ns,
+            )
+            summary = (summary_text or "").strip()
+            link_el = entry.find("atom:link[@type='text/html']", ns)
+            if link_el is not None:
+                link = link_el.get("href")
+            else:
+                link = entry.findtext("atom:id", default="", namespaces=ns)
+            published = entry.findtext(
+                "atom:published", default="", namespaces=ns
+            )
+            updated = entry.findtext("atom:updated", default="", namespaces=ns)
+            authors = [
+                a.findtext("atom:name", default="", namespaces=ns)
+                for a in entry.findall("atom:author", ns)
+            ]
+            primary_category = ""
+            cat_el = entry.find(
+                "{http://arxiv.org/schemas/atom}primary_category"
+            )
+            if cat_el is not None:
+                primary_category = cat_el.get("term", "")
+            entries.append(
+                {
+                    "title": title,
+                    "abstract": summary,
+                    "authors": authors,
+                    "published": published,
+                    "updated": updated,
+                    "category": primary_category,
+                    "url": link,
+                }
+            )
+        return entries

tooluniverse/biorxiv_tool.py ADDED Viewed

@@ -0,0 +1,97 @@
+import requests
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+@register_tool("BioRxivTool")
+class BioRxivTool(BaseTool):
+    """
+    Search bioRxiv preprints using the public bioRxiv API.
+    Arguments:
+        query (str): Search term
+        max_results (int): Max results to return (default 10, max 200)
+    """
+    def __init__(
+        self,
+        tool_config,
+        base_url="https://api.medrxiv.org/details",
+    ):
+        super().__init__(tool_config)
+        self.base_url = base_url
+    def run(self, arguments=None):
+        arguments = arguments or {}
+        query = arguments.get("query")
+        max_results = int(arguments.get("max_results", 10))
+        if not query:
+            return {"error": "`query` parameter is required."}
+        return self._search(query, max_results)
+    def _search(self, query, max_results):
+        # Use date range search for recent preprints
+        # Format: /biorxiv/{start_date}/{end_date}/{cursor}/json
+        from datetime import datetime, timedelta
+        # Search last 30 days
+        end_date = datetime.now()
+        start_date = end_date - timedelta(days=30)
+        url = (f"{self.base_url}/biorxiv/"
+               f"{start_date.strftime('%Y-%m-%d')}/"
+               f"{end_date.strftime('%Y-%m-%d')}/0/json")
+        try:
+            resp = requests.get(url, timeout=20)
+            resp.raise_for_status()
+            data = resp.json()
+        except requests.RequestException as e:
+            return {
+                "error": "Network/API error calling bioRxiv",
+                "reason": str(e),
+            }
+        except ValueError:
+            return {"error": "Failed to decode bioRxiv response as JSON"}
+        results = []
+        # The API returns a dictionary with a 'collection' key
+        collection = data.get("collection", [])
+        if not isinstance(collection, list):
+            return {"error": "Unexpected API response format"}
+        for item in collection:
+            title = item.get("title")
+            authors = item.get("authors", "")
+            if isinstance(authors, str):
+                authors = [a.strip() for a in authors.split(";") if a.strip()]
+            elif isinstance(authors, list):
+                authors = [str(a).strip() for a in authors if str(a).strip()]
+            else:
+                authors = []
+            year = None
+            date = item.get("date")
+            if date and len(date) >= 4 and date[:4].isdigit():
+                year = int(date[:4])
+            doi = item.get("doi")
+            url = f"https://www.biorxiv.org/content/{doi}" if doi else None
+            # Filter by query if provided
+            if query and query.lower() not in (title or "").lower():
+                continue
+            results.append(
+                {
+                    "title": title,
+                    "authors": authors,
+                    "year": year,
+                    "doi": doi,
+                    "url": url,
+                    "abstract": item.get("abstract", ""),
+                    "source": "bioRxiv",
+                }
+            )
+        return results[:max_results]

tooluniverse/core_tool.py ADDED Viewed

@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+"""
+CORE API Tool for searching open access academic papers.
+CORE is the world's largest collection of open access research papers.
+This tool provides access to over 200 million open access papers from
+repositories and journals worldwide.
+"""
+import requests
+from typing import Dict, List, Any, Optional
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+@register_tool("CoreTool")
+class CoreTool(BaseTool):
+    """Tool for searching CORE open access academic papers."""
+    def __init__(self, tool_config=None):
+        super().__init__(tool_config)
+        self.base_url = "https://api.core.ac.uk/v3"
+        self.session = requests.Session()
+        self.session.headers.update({
+            'User-Agent': 'ToolUniverse/1.0',
+            'Accept': 'application/json'
+        })
+    def _search(self, query: str, limit: int = 10,
+                year_from: Optional[int] = None,
+                year_to: Optional[int] = None,
+                language: Optional[str] = None) -> List[Dict[str, Any]]:
+        """
+        Search for papers using CORE API.
+        Args:
+            query: Search query
+            limit: Maximum number of results
+            year_from: Start year filter
+            year_to: End year filter
+            language: Language filter (e.g., 'en', 'es', 'fr')
+        Returns:
+            List of paper dictionaries
+        """
+        try:
+            # Build search parameters
+            params = {
+                'q': query,
+                'limit': min(limit, 100),  # CORE API max limit is 100
+                'page': 1
+            }
+            # Add year filters if provided
+            if year_from or year_to:
+                year_filter = []
+                if year_from:
+                    year_filter.append(f"year:>={year_from}")
+                if year_to:
+                    year_filter.append(f"year:<={year_to}")
+                params['q'] += f" {' '.join(year_filter)}"
+            # Add language filter if provided
+            if language:
+                params['q'] += f" language:{language}"
+            # Make API request
+            response = self.session.get(
+                f"{self.base_url}/search/works",
+                params=params,
+                timeout=30
+            )
+            response.raise_for_status()
+            data = response.json()
+            results = []
+            # Parse results
+            for item in data.get('results', []):
+                paper = {
+                    'title': item.get('title', 'No title'),
+                    'abstract': item.get('abstract', 'No abstract available'),
+                    'authors': self._extract_authors(item.get('authors', [])),
+                    'year': self._extract_year(item.get('publishedDate')),
+                    'doi': item.get('doi'),
+                    'url': (item.get('downloadUrl') or
+                            item.get('links', [{}])[0].get('url')),
+                    'venue': item.get('publisher'),
+                    'language': item.get('language', {}).get('code', 'Unknown'),
+                    'open_access': True,  # CORE only contains open access papers
+                    'source': 'CORE',
+                    'citations': item.get('citationCount', 0),
+                    'downloads': item.get('downloadCount', 0)
+                }
+                results.append(paper)
+            return results
+        except requests.exceptions.RequestException as e:
+            return [{'error': f'CORE API request failed: {str(e)}'}]
+        except Exception as e:
+            return [{'error': f'CORE API error: {str(e)}'}]
+    def _extract_authors(self, authors: List[Dict]) -> List[str]:
+        """Extract author names from CORE API response."""
+        if not authors:
+            return []
+        author_names = []
+        for author in authors:
+            name = author.get('name', '')
+            if name:
+                author_names.append(name)
+        return author_names
+    def _extract_year(self, published_date: str) -> str:
+        """Extract year from published date."""
+        if not published_date:
+            return 'Unknown'
+        try:
+            # CORE API returns dates in ISO format
+            return published_date[:4]
+        except Exception:
+            return 'Unknown'
+    def run(self, tool_arguments) -> List[Dict[str, Any]]:
+        """
+        Execute the CORE search.
+        Args:
+            tool_arguments: Dictionary containing search parameters
+        Returns:
+            List of paper dictionaries
+        """
+        query = tool_arguments.get('query', '')
+        if not query:
+            return [{'error': 'Query parameter is required'}]
+        limit = tool_arguments.get('limit', 10)
+        year_from = tool_arguments.get('year_from')
+        year_to = tool_arguments.get('year_to')
+        language = tool_arguments.get('language')
+        return self._search(
+            query=query,
+            limit=limit,
+            year_from=year_from,
+            year_to=year_to,
+            language=language
+        )

tooluniverse/crossref_tool.py ADDED Viewed

@@ -0,0 +1,73 @@
+import requests
+from .base_tool import BaseTool
+from .tool_registry import register_tool
+@register_tool("CrossrefTool")
+class CrossrefTool(BaseTool):
+    """
+    Search Crossref Works API for articles by keyword.
+    """
+    def __init__(
+        self,
+        tool_config,
+        base_url="https://api.crossref.org/works",
+    ):
+        super().__init__(tool_config)
+        self.base_url = base_url
+    def run(self, arguments):
+        query = arguments.get("query")
+        rows = int(arguments.get("limit", 10))
+        # e.g., 'type:journal-article,from-pub-date:2020-01-01'
+        filter_str = arguments.get("filter")
+        if not query:
+            return {"error": "`query` parameter is required."}
+        return self._search(query, rows, filter_str)
+    def _search(self, query, rows, filter_str):
+        params = {"query": query, "rows": max(1, min(rows, 100))}
+        if filter_str:
+            params["filter"] = filter_str
+        try:
+            response = requests.get(self.base_url, params=params, timeout=20)
+        except requests.RequestException as e:
+            return {
+                "error": "Network error calling Crossref API",
+                "reason": str(e),
+            }
+        if response.status_code != 200:
+            return {
+                "error": f"Crossref API error {response.status_code}",
+                "reason": response.reason,
+            }
+        data = response.json().get("message", {}).get("items", [])
+        results = []
+        for item in data:
+            title_list = item.get("title") or []
+            title = title_list[0] if title_list else None
+            abstract = item.get("abstract")
+            year = None
+            issued = item.get("issued", {}).get("date-parts") or []
+            if issued and issued[0]:
+                year = issued[0][0]
+            url = item.get("URL")
+            doi = item.get("DOI")
+            container_title = item.get("container-title") or []
+            journal = container_title[0] if container_title else None
+            results.append(
+                {
+                    "title": title,
+                    "abstract": abstract,
+                    "journal": journal,
+                    "year": year,
+                    "doi": doi,
+                    "url": url,
+                }
+            )
+        return results

tooluniverse 1.0.5__py3-none-any.whl → 1.0.6__py3-none-any.whl

Potentially problematic release.

tooluniverse 1.0.5py3-none-any.whl → 1.0.6py3-none-any.whl