tooluniverse 1.0.11.1__py3-none-any.whl → 1.0.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (65) hide show
  1. tooluniverse/alphafold_tool.py +47 -7
  2. tooluniverse/base_tool.py +9 -1
  3. tooluniverse/build_optimizer.py +115 -22
  4. tooluniverse/data/alphafold_tools.json +7 -12
  5. tooluniverse/data/encode_tools.json +139 -0
  6. tooluniverse/data/gbif_tools.json +152 -0
  7. tooluniverse/data/gdc_tools.json +116 -0
  8. tooluniverse/data/gtex_tools.json +116 -0
  9. tooluniverse/data/icgc_tools.json +0 -0
  10. tooluniverse/data/mgnify_tools.json +121 -0
  11. tooluniverse/data/obis_tools.json +122 -0
  12. tooluniverse/data/optimizer_tools.json +275 -0
  13. tooluniverse/data/rnacentral_tools.json +99 -0
  14. tooluniverse/data/smolagent_tools.json +206 -0
  15. tooluniverse/data/uniprot_tools.json +13 -5
  16. tooluniverse/data/wikipathways_tools.json +106 -0
  17. tooluniverse/default_config.py +12 -0
  18. tooluniverse/encode_tool.py +245 -0
  19. tooluniverse/execute_function.py +185 -17
  20. tooluniverse/gbif_tool.py +166 -0
  21. tooluniverse/gdc_tool.py +175 -0
  22. tooluniverse/generate_tools.py +121 -9
  23. tooluniverse/gtex_tool.py +168 -0
  24. tooluniverse/mgnify_tool.py +181 -0
  25. tooluniverse/obis_tool.py +185 -0
  26. tooluniverse/pypi_package_inspector_tool.py +3 -2
  27. tooluniverse/python_executor_tool.py +43 -13
  28. tooluniverse/rnacentral_tool.py +124 -0
  29. tooluniverse/smcp.py +17 -25
  30. tooluniverse/smcp_server.py +1 -1
  31. tooluniverse/smolagent_tool.py +555 -0
  32. tooluniverse/tools/ArgumentDescriptionOptimizer.py +55 -0
  33. tooluniverse/tools/ENCODE_list_files.py +59 -0
  34. tooluniverse/tools/ENCODE_search_experiments.py +67 -0
  35. tooluniverse/tools/GBIF_search_occurrences.py +67 -0
  36. tooluniverse/tools/GBIF_search_species.py +55 -0
  37. tooluniverse/tools/GDC_list_files.py +55 -0
  38. tooluniverse/tools/GDC_search_cases.py +55 -0
  39. tooluniverse/tools/GTEx_get_expression_summary.py +49 -0
  40. tooluniverse/tools/GTEx_query_eqtl.py +59 -0
  41. tooluniverse/tools/MGnify_list_analyses.py +52 -0
  42. tooluniverse/tools/MGnify_search_studies.py +55 -0
  43. tooluniverse/tools/OBIS_search_occurrences.py +59 -0
  44. tooluniverse/tools/OBIS_search_taxa.py +52 -0
  45. tooluniverse/tools/RNAcentral_get_by_accession.py +46 -0
  46. tooluniverse/tools/RNAcentral_search.py +52 -0
  47. tooluniverse/tools/TestCaseGenerator.py +46 -0
  48. tooluniverse/tools/ToolDescriptionOptimizer.py +67 -0
  49. tooluniverse/tools/ToolDiscover.py +4 -0
  50. tooluniverse/tools/UniProt_search.py +14 -6
  51. tooluniverse/tools/WikiPathways_get_pathway.py +52 -0
  52. tooluniverse/tools/WikiPathways_search.py +52 -0
  53. tooluniverse/tools/__init__.py +43 -1
  54. tooluniverse/tools/advanced_literature_search_agent.py +46 -0
  55. tooluniverse/tools/alphafold_get_annotations.py +4 -10
  56. tooluniverse/tools/download_binary_file.py +3 -6
  57. tooluniverse/tools/open_deep_research_agent.py +46 -0
  58. tooluniverse/uniprot_tool.py +51 -4
  59. tooluniverse/wikipathways_tool.py +122 -0
  60. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/METADATA +3 -1
  61. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/RECORD +65 -24
  62. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/WHEEL +0 -0
  63. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/entry_points.txt +0 -0
  64. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/licenses/LICENSE +0 -0
  65. {tooluniverse-1.0.11.1.dist-info → tooluniverse-1.0.12.dist-info}/top_level.txt +0 -0
@@ -11,7 +11,7 @@ ALPHAFOLD_BASE_URL = "https://alphafold.ebi.ac.uk/api"
11
11
  class AlphaFoldRESTTool(BaseTool):
12
12
  """
13
13
  AlphaFold Protein Structure Database API tool.
14
- Generic wrapper for AlphaFold API endpoints defined in alphafold_tools.json.
14
+ Generic wrapper for AlphaFold API endpoints from alphafold_tools.json.
15
15
  """
16
16
 
17
17
  def __init__(self, tool_config):
@@ -22,6 +22,7 @@ class AlphaFoldRESTTool(BaseTool):
22
22
  self.endpoint_template: str = fields["endpoint"]
23
23
  self.required: List[str] = parameter.get("required", [])
24
24
  self.output_format: str = fields.get("return_format", "JSON")
25
+ self.auto_query_params: Dict[str, Any] = fields.get("auto_query_params", {})
25
26
 
26
27
  def _build_url(self, arguments: Dict[str, Any]) -> str | Dict[str, Any]:
27
28
  # Example: endpoint_template = "/annotations/{qualifier}.json"
@@ -40,14 +41,18 @@ class AlphaFoldRESTTool(BaseTool):
40
41
  # Now url_path = "/annotations/P69905.json"
41
42
 
42
43
  # Treat all remaining args as query parameters
43
- # "type" wasnt a placeholder, so it becomes a query param
44
+ # "type" wasn't a placeholder, so it becomes a query param
44
45
  query_args = {k: v for k, v in arguments.items() if k not in used}
46
+
47
+ # Add auto_query_params from config (e.g., type=MUTAGEN)
48
+ query_args.update(self.auto_query_params)
49
+
45
50
  if query_args:
46
51
  from urllib.parse import urlencode
47
52
 
48
53
  url_path += "?" + urlencode(query_args)
49
54
 
50
- # Final result = "https://alphafold.ebi.ac.uk/api/annotations/P69905.json?type=MUTAGEN"
55
+ # Final example: annotations/P69905.json?type=MUTAGEN
51
56
  return ALPHAFOLD_BASE_URL + url_path
52
57
 
53
58
  def _make_request(self, url: str) -> Dict[str, Any]:
@@ -62,9 +67,37 @@ class AlphaFoldRESTTool(BaseTool):
62
67
  },
63
68
  )
64
69
  except Exception as e:
65
- return {"error": "Request to AlphaFold API failed", "detail": str(e)}
70
+ return {
71
+ "error": "Request to AlphaFold API failed",
72
+ "detail": str(e),
73
+ }
66
74
 
67
75
  if resp.status_code == 404:
76
+ # Try to provide more context about 404 errors
77
+ # Check if protein exists in AlphaFold DB
78
+ try:
79
+ qualifier_match = re.search(r"/annotations/([^/]+)\.json", url)
80
+ if qualifier_match:
81
+ accession = qualifier_match.group(1)
82
+ base = ALPHAFOLD_BASE_URL
83
+ check_url = f"{base}/uniprot/summary/{accession}.json"
84
+ check_resp = requests.get(check_url, timeout=10)
85
+ if check_resp.status_code == 200:
86
+ return {
87
+ "error": "No MUTAGEN annotations available",
88
+ "reason": (
89
+ "Protein exists in AlphaFold DB but "
90
+ "has no MUTAGEN annotations"
91
+ ),
92
+ "endpoint": url,
93
+ }
94
+ else:
95
+ return {
96
+ "error": "Protein not found in AlphaFold DB",
97
+ "endpoint": url,
98
+ }
99
+ except Exception:
100
+ pass # Fall through to generic error
68
101
  return {"error": "Not found", "endpoint": url}
69
102
  if resp.status_code != 200:
70
103
  return {
@@ -98,9 +131,13 @@ class AlphaFoldRESTTool(BaseTool):
98
131
  if self.output_format.upper() == "JSON":
99
132
  try:
100
133
  data = resp.json()
101
- if not data:
134
+ if not data or (isinstance(data, dict) and not data):
102
135
  return {
103
- "error": "AlphaFold returned an empty response",
136
+ "error": "No MUTAGEN annotations available",
137
+ "reason": (
138
+ "Protein exists in AlphaFold DB but "
139
+ "has no MUTAGEN annotations from UniProt"
140
+ ),
104
141
  "endpoint": url,
105
142
  "query": arguments,
106
143
  }
@@ -124,4 +161,7 @@ class AlphaFoldRESTTool(BaseTool):
124
161
  }
125
162
 
126
163
  # Fallback for non-JSON output
127
- return {"data": resp.text, "metadata": {"endpoint": url, "query": arguments}}
164
+ return {
165
+ "data": resp.text,
166
+ "metadata": {"endpoint": url, "query": arguments},
167
+ }
tooluniverse/base_tool.py CHANGED
@@ -183,7 +183,15 @@ class BaseTool:
183
183
  try:
184
184
  import jsonschema
185
185
 
186
- jsonschema.validate(arguments, schema)
186
+ # Filter out internal control parameters before validation
187
+ # Only filter known internal parameters, not all underscore-prefixed params
188
+ # to allow optional streaming parameter _tooluniverse_stream
189
+ internal_params = {"ctx", "_tooluniverse_stream"}
190
+ filtered_arguments = {
191
+ k: v for k, v in arguments.items() if k not in internal_params
192
+ }
193
+
194
+ jsonschema.validate(filtered_arguments, schema)
187
195
  return None
188
196
  except jsonschema.ValidationError as e:
189
197
  return ToolValidationError(
@@ -6,15 +6,52 @@ from pathlib import Path
6
6
  from typing import Dict, Any, Set, Tuple
7
7
 
8
8
 
9
- def calculate_tool_hash(tool_config: Dict[str, Any]) -> str:
10
- """Calculate a hash for tool configuration to detect changes."""
9
+ def _normalize_value(value: Any) -> Any:
10
+ """Recursively normalize values for consistent hashing."""
11
+ if isinstance(value, dict):
12
+ # Sort dictionary keys and normalize values
13
+ return {k: _normalize_value(v) for k, v in sorted(value.items())}
14
+ elif isinstance(value, list):
15
+ # Normalize list elements
16
+ return [_normalize_value(item) for item in value]
17
+ elif isinstance(value, (str, int, float, bool)) or value is None:
18
+ return value
19
+ else:
20
+ # Convert other types to string representation for hashing
21
+ return str(value)
22
+
23
+
24
+ def calculate_tool_hash(tool_config: Dict[str, Any], verbose: bool = False) -> str:
25
+ """Calculate a hash for tool configuration to detect changes.
26
+
27
+ Args:
28
+ tool_config: Tool configuration dictionary
29
+ verbose: If True, print excluded fields (for debugging)
30
+
31
+ Returns:
32
+ MD5 hash string of the normalized configuration
33
+ """
34
+ # Fields to exclude from hash calculation (metadata/timestamp fields)
35
+ excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
36
+
11
37
  # Create a normalized version of the config for hashing
12
38
  normalized_config = {}
13
- for key, value in sorted(tool_config.items()):
14
- if key not in ["timestamp", "last_updated", "created_at"]:
15
- normalized_config[key] = value
39
+ excluded_values = []
16
40
 
17
- config_str = json.dumps(normalized_config, sort_keys=True, separators=(",", ":"))
41
+ for key, value in sorted(tool_config.items()):
42
+ if key not in excluded_fields:
43
+ # Recursively normalize nested structures
44
+ normalized_config[key] = _normalize_value(value)
45
+ elif verbose:
46
+ excluded_values.append(key)
47
+
48
+ if verbose and excluded_values:
49
+ print(f" Excluded fields from hash: {', '.join(excluded_values)}")
50
+
51
+ # Use consistent JSON serialization with sorted keys
52
+ config_str = json.dumps(
53
+ normalized_config, sort_keys=True, separators=(",", ":"), ensure_ascii=False
54
+ )
18
55
  return hashlib.md5(config_str.encode("utf-8")).hexdigest()
19
56
 
20
57
 
@@ -59,29 +96,85 @@ def cleanup_orphaned_files(tools_dir: Path, current_tool_names: Set[str]) -> int
59
96
  return cleaned_count
60
97
 
61
98
 
99
+ def _compare_configs(old_config: Dict[str, Any], new_config: Dict[str, Any]) -> list:
100
+ """Compare two configs and return list of changed field paths."""
101
+ changes = []
102
+
103
+ all_keys = set(old_config.keys()) | set(new_config.keys())
104
+ excluded_fields = {"timestamp", "last_updated", "created_at", "_cache", "_metadata"}
105
+
106
+ for key in all_keys:
107
+ if key in excluded_fields:
108
+ continue
109
+
110
+ old_val = old_config.get(key)
111
+ new_val = new_config.get(key)
112
+
113
+ if old_val != new_val:
114
+ changes.append(key)
115
+
116
+ return changes
117
+
118
+
62
119
  def get_changed_tools(
63
- current_tools: Dict[str, Any], metadata_file: Path
64
- ) -> Tuple[list, list, list]:
65
- """Get lists of new, changed, and unchanged tools."""
120
+ current_tools: Dict[str, Any],
121
+ metadata_file: Path,
122
+ force_regenerate: bool = False,
123
+ verbose: bool = False,
124
+ ) -> Tuple[list, list, list, Dict[str, list]]:
125
+ """Get lists of new, changed, and unchanged tools.
126
+
127
+ Args:
128
+ current_tools: Dictionary of current tool configurations
129
+ metadata_file: Path to metadata file storing previous hashes
130
+ force_regenerate: If True, mark all tools as changed
131
+ verbose: If True, provide detailed change information
132
+
133
+ Returns:
134
+ Tuple of (new_tools, changed_tools, unchanged_tools, change_details)
135
+ where change_details maps tool_name -> list of changed field names
136
+ """
66
137
  old_metadata = load_metadata(metadata_file)
67
138
  new_metadata = {}
68
139
  new_tools = []
69
140
  changed_tools = []
70
141
  unchanged_tools = []
71
-
72
- for tool_name, tool_config in current_tools.items():
73
- current_hash = calculate_tool_hash(tool_config)
74
- new_metadata[tool_name] = current_hash
75
-
76
- old_hash = old_metadata.get(tool_name)
77
- if old_hash is None:
78
- new_tools.append(tool_name)
79
- elif old_hash != current_hash:
80
- changed_tools.append(tool_name)
81
- else:
82
- unchanged_tools.append(tool_name)
142
+ change_details: Dict[str, list] = {}
143
+
144
+ if force_regenerate:
145
+ print("🔄 Force regeneration enabled - all tools will be regenerated")
146
+ for tool_name, tool_config in current_tools.items():
147
+ current_hash = calculate_tool_hash(tool_config, verbose=verbose)
148
+ new_metadata[tool_name] = current_hash
149
+ if tool_name in old_metadata:
150
+ changed_tools.append(tool_name)
151
+ change_details[tool_name] = ["force_regenerate"]
152
+ else:
153
+ new_tools.append(tool_name)
154
+ else:
155
+ for tool_name, tool_config in current_tools.items():
156
+ current_hash = calculate_tool_hash(tool_config, verbose=verbose)
157
+ new_metadata[tool_name] = current_hash
158
+
159
+ old_hash = old_metadata.get(tool_name)
160
+ if old_hash is None:
161
+ new_tools.append(tool_name)
162
+ if verbose:
163
+ print(f" ✨ New tool detected: {tool_name}")
164
+ elif old_hash != current_hash:
165
+ changed_tools.append(tool_name)
166
+ # Try to identify which fields changed (if we have the old config)
167
+ # Note: We only have hashes, so we can't do detailed field comparison
168
+ # This would require storing full configs, which we avoid for size reasons
169
+ change_details[tool_name] = ["hash_mismatch"]
170
+ if verbose:
171
+ print(
172
+ f" 🔄 Tool changed: {tool_name} (hash: {old_hash[:8]}... -> {current_hash[:8]}...)"
173
+ )
174
+ else:
175
+ unchanged_tools.append(tool_name)
83
176
 
84
177
  # Save updated metadata
85
178
  save_metadata(new_metadata, metadata_file)
86
179
 
87
- return new_tools, changed_tools, unchanged_tools
180
+ return new_tools, changed_tools, unchanged_tools, change_details
@@ -387,31 +387,26 @@
387
387
  },
388
388
  {
389
389
  "name": "alphafold_get_annotations",
390
- "description": "Retrieve AlphaFold variant annotations (e.g., missense mutations) for a given UniProt accession. IMPORTANT: The qualifier must be a UniProt ACCESSION (e.g., 'P69905'), along with an annotation type (currently only 'MUTAGEN'). Do NOT use UniProt entry names as they will cause API errors. To find UniProt accession from a gene/protein name, use `UniProt_search` (e.g., query='gene:TP53' organism='human') or `UniProt_id_mapping` for ID conversion. Use this tool to explore predicted pathogenicity or functional effects of substitutions. For experimentally curated variants, use `UniProt_get_disease_variants_by_accession`. To view the full 3D structure, call `alphafold_get_prediction`; for overall model metadata, use `alphafold_get_summary`.",
390
+ "description": "Retrieve AlphaFold MUTAGEN annotations for a given UniProt accession. Returns experimental mutagenesis data mapped onto protein structures from UniProt. The qualifier must be a UniProt ACCESSION (e.g., 'P69905'). Note: Not all proteins have MUTAGEN annotations available in the database.",
391
391
  "type": "AlphaFoldRESTTool",
392
392
  "parameter": {
393
393
  "type": "object",
394
394
  "properties": {
395
395
  "qualifier": {
396
396
  "type": "string",
397
- "description": "Protein identifier: UniProt ACCESSION (e.g., 'P69905'). Do NOT use entry names. To find accession from gene name: use `UniProt_search` or `UniProt_id_mapping`."
398
- },
399
- "type": {
400
- "type": "string",
401
- "description": "Annotation type (currently only 'MUTAGEN' is supported).",
402
- "enum": [
403
- "MUTAGEN"
404
- ]
397
+ "description": "UniProt ACCESSION (e.g., 'P69905'). Must be an accession number, not an entry name."
405
398
  }
406
399
  },
407
400
  "required": [
408
- "qualifier",
409
- "type"
401
+ "qualifier"
410
402
  ]
411
403
  },
412
404
  "fields": {
413
405
  "endpoint": "/annotations/{qualifier}.json",
414
- "return_format": "JSON"
406
+ "return_format": "JSON",
407
+ "auto_query_params": {
408
+ "type": "MUTAGEN"
409
+ }
415
410
  },
416
411
  "return_schema": {
417
412
  "type": "object",
@@ -0,0 +1,139 @@
1
+ [
2
+ {
3
+ "name": "ENCODE_search_experiments",
4
+ "type": "ENCODESearchTool",
5
+ "description": "Search ENCODE functional genomics experiments (e.g., ChIP-seq, ATAC-seq) by assay/target/organism/status. Use to discover datasets and access experiment-level metadata.",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "assay_title": {
10
+ "type": "string",
11
+ "description": "Assay name filter (e.g., 'ChIP-seq', 'ATAC-seq')."
12
+ },
13
+ "target": {
14
+ "type": "string",
15
+ "description": "Target filter (e.g., 'CTCF')."
16
+ },
17
+ "organism": {
18
+ "type": "string",
19
+ "description": "Organism filter (e.g., 'Homo sapiens', 'Mus musculus')."
20
+ },
21
+ "status": {
22
+ "type": "string",
23
+ "default": "released",
24
+ "description": "Record status filter (default 'released')."
25
+ },
26
+ "limit": {
27
+ "type": "integer",
28
+ "default": 10,
29
+ "minimum": 1,
30
+ "maximum": 100,
31
+ "description": "Max number of results (1–100)."
32
+ }
33
+ }
34
+ },
35
+ "fields": {
36
+ "endpoint": "https://www.encodeproject.org/search/",
37
+ "format": "json"
38
+ },
39
+ "return_schema": {
40
+ "type": "object",
41
+ "description": "ENCODE experiments search response",
42
+ "properties": {
43
+ "status": {"type": "string"},
44
+ "data": {
45
+ "type": "object",
46
+ "properties": {
47
+ "total": {"type": "integer"},
48
+ "@graph": {
49
+ "type": "array",
50
+ "items": {
51
+ "type": "object",
52
+ "properties": {
53
+ "accession": {"type": "string"},
54
+ "assay_title": {"type": "string"},
55
+ "target": {"type": "object"},
56
+ "organism": {"type": "string"},
57
+ "status": {"type": "string"}
58
+ }
59
+ }
60
+ }
61
+ }
62
+ },
63
+ "url": {"type": "string"}
64
+ }
65
+ },
66
+ "test_examples": [
67
+ {"assay_title": "ChIP-seq", "limit": 1},
68
+ {"assay_title": "ATAC-seq", "limit": 1}
69
+ ],
70
+ "label": ["ENCODE", "Experiment", "Search"],
71
+ "metadata": {
72
+ "tags": ["functional-genomics", "chip-seq", "atac-seq"],
73
+ "estimated_execution_time": "< 3 seconds"
74
+ }
75
+ },
76
+ {
77
+ "name": "ENCODE_list_files",
78
+ "type": "ENCODEFilesTool",
79
+ "description": "List ENCODE files with filters (file_format, output_type, assay). Use to programmatically retrieve downloadable artifact metadata (FASTQ, BAM, bigWig, peaks).",
80
+ "parameter": {
81
+ "type": "object",
82
+ "properties": {
83
+ "file_type": {
84
+ "type": "string",
85
+ "description": "File type filter (e.g., 'fastq', 'bam', 'bigWig')."
86
+ },
87
+ "assay_title": {
88
+ "type": "string",
89
+ "description": "Assay filter (e.g., 'ChIP-seq')."
90
+ },
91
+ "limit": {
92
+ "type": "integer",
93
+ "default": 10,
94
+ "minimum": 1,
95
+ "maximum": 100,
96
+ "description": "Max number of results (1–100)."
97
+ }
98
+ }
99
+ },
100
+ "fields": {
101
+ "endpoint": "https://www.encodeproject.org/search/",
102
+ "format": "json"
103
+ },
104
+ "return_schema": {
105
+ "type": "object",
106
+ "description": "ENCODE files search response",
107
+ "properties": {
108
+ "status": {"type": "string"},
109
+ "data": {
110
+ "type": "object",
111
+ "properties": {
112
+ "total": {"type": "integer"},
113
+ "@graph": {
114
+ "type": "array",
115
+ "items": {
116
+ "type": "object",
117
+ "properties": {
118
+ "accession": {"type": "string"},
119
+ "file_format": {"type": "string"},
120
+ "output_type": {"type": "string"},
121
+ "file_type": {"type": "string"}
122
+ }
123
+ }
124
+ }
125
+ }
126
+ },
127
+ "url": {"type": "string"}
128
+ }
129
+ },
130
+ "test_examples": [
131
+ {"file_type": "fastq", "limit": 1}
132
+ ],
133
+ "label": ["ENCODE", "File", "Search"],
134
+ "metadata": {
135
+ "tags": ["downloads", "artifacts", "metadata"],
136
+ "estimated_execution_time": "< 3 seconds"
137
+ }
138
+ }
139
+ ]
@@ -0,0 +1,152 @@
1
+ [
2
+ {
3
+ "name": "GBIF_search_species",
4
+ "type": "GBIFTool",
5
+ "description": "Find taxa by keyword (scientific/common names) in GBIF. Use to resolve organism names to stable taxon keys (rank, lineage) for downstream biodiversity/occurrence queries.",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "query": {
10
+ "type": "string",
11
+ "description": "Search string for species/taxa (supports scientific/common names), e.g., 'Homo', 'Atlantic cod'."
12
+ },
13
+ "limit": {
14
+ "type": "integer",
15
+ "default": 10,
16
+ "minimum": 1,
17
+ "maximum": 300,
18
+ "description": "Maximum number of results to return (1–300)."
19
+ },
20
+ "offset": {
21
+ "type": "integer",
22
+ "default": 0,
23
+ "minimum": 0,
24
+ "description": "Result offset for pagination (0-based)."
25
+ }
26
+ },
27
+ "required": ["query"]
28
+ },
29
+ "fields": {
30
+ "endpoint": "https://api.gbif.org/v1/species/search",
31
+ "format": "json"
32
+ },
33
+ "return_schema": {
34
+ "type": "object",
35
+ "description": "GBIF species search response",
36
+ "properties": {
37
+ "status": {"type": "string"},
38
+ "data": {
39
+ "type": "object",
40
+ "properties": {
41
+ "count": {"type": "integer"},
42
+ "results": {
43
+ "type": "array",
44
+ "items": {
45
+ "type": "object",
46
+ "properties": {
47
+ "key": {"type": "integer", "description": "taxonKey"},
48
+ "scientificName": {"type": "string"},
49
+ "rank": {"type": "string"},
50
+ "kingdom": {"type": "string"},
51
+ "phylum": {"type": "string"},
52
+ "class": {"type": "string"},
53
+ "order": {"type": "string"},
54
+ "family": {"type": "string"},
55
+ "genus": {"type": "string"}
56
+ }
57
+ }
58
+ }
59
+ }
60
+ },
61
+ "url": {"type": "string"}
62
+ }
63
+ },
64
+ "test_examples": [
65
+ {"query": "Homo", "limit": 1},
66
+ {"query": "Gadus", "limit": 1}
67
+ ],
68
+ "label": ["GBIF", "Taxonomy", "Search"],
69
+ "metadata": {
70
+ "tags": ["biodiversity", "taxonomy", "species", "search"],
71
+ "estimated_execution_time": "< 2 seconds"
72
+ }
73
+ },
74
+ {
75
+ "name": "GBIF_search_occurrences",
76
+ "type": "GBIFOccurrenceTool",
77
+ "description": "Retrieve species occurrence records from GBIF with optional filters (taxonKey, country, coordinates). Use for distribution mapping, presence-only modeling, and sampling context.",
78
+ "parameter": {
79
+ "type": "object",
80
+ "properties": {
81
+ "taxonKey": {
82
+ "type": "integer",
83
+ "description": "GBIF taxon key to filter occurrences by a specific taxon (from species search)."
84
+ },
85
+ "country": {
86
+ "type": "string",
87
+ "description": "ISO 3166-1 alpha-2 country code filter (e.g., 'US', 'CN')."
88
+ },
89
+ "hasCoordinate": {
90
+ "type": "boolean",
91
+ "default": true,
92
+ "description": "Only return records with valid latitude/longitude coordinates when true."
93
+ },
94
+ "limit": {
95
+ "type": "integer",
96
+ "default": 10,
97
+ "minimum": 1,
98
+ "maximum": 300,
99
+ "description": "Maximum number of results to return (1–300)."
100
+ },
101
+ "offset": {
102
+ "type": "integer",
103
+ "default": 0,
104
+ "minimum": 0,
105
+ "description": "Result offset for pagination (0-based)."
106
+ }
107
+ }
108
+ },
109
+ "fields": {
110
+ "endpoint": "https://api.gbif.org/v1/occurrence/search",
111
+ "format": "json"
112
+ },
113
+ "return_schema": {
114
+ "type": "object",
115
+ "description": "GBIF occurrence search response",
116
+ "properties": {
117
+ "status": {"type": "string"},
118
+ "data": {
119
+ "type": "object",
120
+ "properties": {
121
+ "count": {"type": "integer"},
122
+ "results": {
123
+ "type": "array",
124
+ "items": {
125
+ "type": "object",
126
+ "properties": {
127
+ "key": {"type": "integer"},
128
+ "speciesKey": {"type": "integer"},
129
+ "scientificName": {"type": "string"},
130
+ "decimalLatitude": {"type": "number"},
131
+ "decimalLongitude": {"type": "number"},
132
+ "eventDate": {"type": "string"},
133
+ "countryCode": {"type": "string"}
134
+ }
135
+ }
136
+ }
137
+ }
138
+ },
139
+ "url": {"type": "string"}
140
+ }
141
+ },
142
+ "test_examples": [
143
+ {"hasCoordinate": true, "limit": 1},
144
+ {"country": "US", "limit": 1}
145
+ ],
146
+ "label": ["GBIF", "Occurrence", "Geospatial"],
147
+ "metadata": {
148
+ "tags": ["biodiversity", "occurrence", "distribution", "geospatial"],
149
+ "estimated_execution_time": "< 3 seconds"
150
+ }
151
+ }
152
+ ]