tooluniverse 1.0.11.1__py3-none-any.whl → 1.0.11.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

@@ -11,7 +11,7 @@ ALPHAFOLD_BASE_URL = "https://alphafold.ebi.ac.uk/api"
11
11
  class AlphaFoldRESTTool(BaseTool):
12
12
  """
13
13
  AlphaFold Protein Structure Database API tool.
14
- Generic wrapper for AlphaFold API endpoints defined in alphafold_tools.json.
14
+ Generic wrapper for AlphaFold API endpoints from alphafold_tools.json.
15
15
  """
16
16
 
17
17
  def __init__(self, tool_config):
@@ -22,6 +22,7 @@ class AlphaFoldRESTTool(BaseTool):
22
22
  self.endpoint_template: str = fields["endpoint"]
23
23
  self.required: List[str] = parameter.get("required", [])
24
24
  self.output_format: str = fields.get("return_format", "JSON")
25
+ self.auto_query_params: Dict[str, Any] = fields.get("auto_query_params", {})
25
26
 
26
27
  def _build_url(self, arguments: Dict[str, Any]) -> str | Dict[str, Any]:
27
28
  # Example: endpoint_template = "/annotations/{qualifier}.json"
@@ -40,14 +41,18 @@ class AlphaFoldRESTTool(BaseTool):
40
41
  # Now url_path = "/annotations/P69905.json"
41
42
 
42
43
  # Treat all remaining args as query parameters
43
- # "type" wasnt a placeholder, so it becomes a query param
44
+ # "type" wasn't a placeholder, so it becomes a query param
44
45
  query_args = {k: v for k, v in arguments.items() if k not in used}
46
+
47
+ # Add auto_query_params from config (e.g., type=MUTAGEN)
48
+ query_args.update(self.auto_query_params)
49
+
45
50
  if query_args:
46
51
  from urllib.parse import urlencode
47
52
 
48
53
  url_path += "?" + urlencode(query_args)
49
54
 
50
- # Final result = "https://alphafold.ebi.ac.uk/api/annotations/P69905.json?type=MUTAGEN"
55
+ # Final example: annotations/P69905.json?type=MUTAGEN
51
56
  return ALPHAFOLD_BASE_URL + url_path
52
57
 
53
58
  def _make_request(self, url: str) -> Dict[str, Any]:
@@ -62,9 +67,37 @@ class AlphaFoldRESTTool(BaseTool):
62
67
  },
63
68
  )
64
69
  except Exception as e:
65
- return {"error": "Request to AlphaFold API failed", "detail": str(e)}
70
+ return {
71
+ "error": "Request to AlphaFold API failed",
72
+ "detail": str(e),
73
+ }
66
74
 
67
75
  if resp.status_code == 404:
76
+ # Try to provide more context about 404 errors
77
+ # Check if protein exists in AlphaFold DB
78
+ try:
79
+ qualifier_match = re.search(r"/annotations/([^/]+)\.json", url)
80
+ if qualifier_match:
81
+ accession = qualifier_match.group(1)
82
+ base = ALPHAFOLD_BASE_URL
83
+ check_url = f"{base}/uniprot/summary/{accession}.json"
84
+ check_resp = requests.get(check_url, timeout=10)
85
+ if check_resp.status_code == 200:
86
+ return {
87
+ "error": "No MUTAGEN annotations available",
88
+ "reason": (
89
+ "Protein exists in AlphaFold DB but "
90
+ "has no MUTAGEN annotations"
91
+ ),
92
+ "endpoint": url,
93
+ }
94
+ else:
95
+ return {
96
+ "error": "Protein not found in AlphaFold DB",
97
+ "endpoint": url,
98
+ }
99
+ except Exception:
100
+ pass # Fall through to generic error
68
101
  return {"error": "Not found", "endpoint": url}
69
102
  if resp.status_code != 200:
70
103
  return {
@@ -98,9 +131,13 @@ class AlphaFoldRESTTool(BaseTool):
98
131
  if self.output_format.upper() == "JSON":
99
132
  try:
100
133
  data = resp.json()
101
- if not data:
134
+ if not data or (isinstance(data, dict) and not data):
102
135
  return {
103
- "error": "AlphaFold returned an empty response",
136
+ "error": "No MUTAGEN annotations available",
137
+ "reason": (
138
+ "Protein exists in AlphaFold DB but "
139
+ "has no MUTAGEN annotations from UniProt"
140
+ ),
104
141
  "endpoint": url,
105
142
  "query": arguments,
106
143
  }
@@ -124,4 +161,7 @@ class AlphaFoldRESTTool(BaseTool):
124
161
  }
125
162
 
126
163
  # Fallback for non-JSON output
127
- return {"data": resp.text, "metadata": {"endpoint": url, "query": arguments}}
164
+ return {
165
+ "data": resp.text,
166
+ "metadata": {"endpoint": url, "query": arguments},
167
+ }
tooluniverse/base_tool.py CHANGED
@@ -183,7 +183,15 @@ class BaseTool:
183
183
  try:
184
184
  import jsonschema
185
185
 
186
- jsonschema.validate(arguments, schema)
186
+ # Filter out internal control parameters before validation
187
+ # Only filter known internal parameters, not all underscore-prefixed params
188
+ # to allow optional streaming parameter _tooluniverse_stream
189
+ internal_params = {"ctx", "_tooluniverse_stream"}
190
+ filtered_arguments = {
191
+ k: v for k, v in arguments.items() if k not in internal_params
192
+ }
193
+
194
+ jsonschema.validate(filtered_arguments, schema)
187
195
  return None
188
196
  except jsonschema.ValidationError as e:
189
197
  return ToolValidationError(
@@ -387,31 +387,26 @@
387
387
  },
388
388
  {
389
389
  "name": "alphafold_get_annotations",
390
- "description": "Retrieve AlphaFold variant annotations (e.g., missense mutations) for a given UniProt accession. IMPORTANT: The qualifier must be a UniProt ACCESSION (e.g., 'P69905'), along with an annotation type (currently only 'MUTAGEN'). Do NOT use UniProt entry names as they will cause API errors. To find UniProt accession from a gene/protein name, use `UniProt_search` (e.g., query='gene:TP53' organism='human') or `UniProt_id_mapping` for ID conversion. Use this tool to explore predicted pathogenicity or functional effects of substitutions. For experimentally curated variants, use `UniProt_get_disease_variants_by_accession`. To view the full 3D structure, call `alphafold_get_prediction`; for overall model metadata, use `alphafold_get_summary`.",
390
+ "description": "Retrieve AlphaFold MUTAGEN annotations for a given UniProt accession. Returns experimental mutagenesis data mapped onto protein structures from UniProt. The qualifier must be a UniProt ACCESSION (e.g., 'P69905'). Note: Not all proteins have MUTAGEN annotations available in the database.",
391
391
  "type": "AlphaFoldRESTTool",
392
392
  "parameter": {
393
393
  "type": "object",
394
394
  "properties": {
395
395
  "qualifier": {
396
396
  "type": "string",
397
- "description": "Protein identifier: UniProt ACCESSION (e.g., 'P69905'). Do NOT use entry names. To find accession from gene name: use `UniProt_search` or `UniProt_id_mapping`."
398
- },
399
- "type": {
400
- "type": "string",
401
- "description": "Annotation type (currently only 'MUTAGEN' is supported).",
402
- "enum": [
403
- "MUTAGEN"
404
- ]
397
+ "description": "UniProt ACCESSION (e.g., 'P69905'). Must be an accession number, not an entry name."
405
398
  }
406
399
  },
407
400
  "required": [
408
- "qualifier",
409
- "type"
401
+ "qualifier"
410
402
  ]
411
403
  },
412
404
  "fields": {
413
405
  "endpoint": "/annotations/{qualifier}.json",
414
- "return_format": "JSON"
406
+ "return_format": "JSON",
407
+ "auto_query_params": {
408
+ "type": "MUTAGEN"
409
+ }
415
410
  },
416
411
  "return_schema": {
417
412
  "type": "object",
@@ -210,28 +210,36 @@
210
210
  },
211
211
  {
212
212
  "name": "UniProt_search",
213
- "description": "Search UniProtKB database using flexible query syntax. Supports gene names (e.g., 'gene:TP53'), protein names, organism filters (e.g., 'organism:9606'), and complex queries using AND/OR operators. Returns matching proteins with accession numbers and key information. Use this to find UniProt accessions from gene or protein names.",
213
+ "description": "Search UniProtKB database with flexible query syntax. Returns protein entries with accession numbers and metadata. Query syntax supports: field searches (gene:TP53, organism_id:9606, reviewed:true), ranges (length:[100 TO 500], mass:[20000 TO 50000]), wildcards (gene:MEIOB*), boolean operators (AND/OR/NOT), and parentheses for grouping. Examples: 'gene:TP53 AND organism_id:9606', 'length:[400 TO 500] AND reviewed:true', 'tissue:brain NOT organism_id:10090'.",
214
214
  "parameter": {
215
215
  "type": "object",
216
216
  "properties": {
217
217
  "query": {
218
218
  "type": "string",
219
- "description": "Search query. Can be simple (e.g., 'MEIOB') or advanced ('gene:TP53 AND organism:9606'). Keywords: gene, protein_name, organism, reviewed, etc."
219
+ "description": "Search query using UniProt syntax. Simple: 'MEIOB', 'insulin'. Field searches: 'gene:TP53', 'protein_name:insulin', 'organism_id:9606', 'reviewed:true'. Ranges: 'length:[100 TO 500]', 'mass:[20000 TO 50000]'. Wildcards: 'gene:MEIOB*'. Boolean: 'gene:TP53 AND organism_id:9606', 'tissue:brain OR tissue:liver', 'reviewed:true NOT fragment:true'. Use parentheses for grouping: '(organism_id:9606 OR organism_id:10090) AND gene:TP53'. Note: 'organism:' auto-converts to 'organism_id:'."
220
220
  },
221
221
  "organism": {
222
222
  "type": "string",
223
- "description": "Optional organism filter. Can use common name ('human') or taxonomy ID ('9606'). Automatically combined with query using AND."
223
+ "description": "Optional organism filter. Use common names ('human', 'mouse', 'rat', 'yeast') or taxonomy ID ('9606'). Automatically combined with query using AND. Will not duplicate if organism is already in query."
224
224
  },
225
225
  "limit": {
226
226
  "type": "integer",
227
- "description": "Maximum number of results to return (default: 25, max: 500)"
227
+ "description": "Maximum number of results to return (default: 25, max: 500). Accepts string or integer."
228
+ },
229
+ "min_length": {
230
+ "type": "integer",
231
+ "description": "Minimum sequence length. Auto-converts to 'length:[min TO *]' range query."
232
+ },
233
+ "max_length": {
234
+ "type": "integer",
235
+ "description": "Maximum sequence length. Auto-converts to 'length:[* TO max]' range query."
228
236
  },
229
237
  "fields": {
230
238
  "type": "array",
231
239
  "items": {
232
240
  "type": "string"
233
241
  },
234
- "description": "Optional list of fields to return. Default returns: accession, id, protein_name, gene_names, organism, length"
242
+ "description": "List of field names to return (e.g., ['accession','gene_primary','length','organism_name']). When specified, returns raw API response with requested fields. Common fields: accession, id, gene_names, gene_primary, protein_name, organism_name, organism_id, length, mass, sequence, reviewed, cc_function. See UniProt API docs for full list. Default (no fields): returns formatted response with accession, id, protein_name, gene_names, organism, length."
235
243
  }
236
244
  },
237
245
  "required": ["query"]
@@ -354,6 +354,12 @@ class ToolUniverse:
354
354
  "TOOLUNIVERSE_STRICT_VALIDATION", "false"
355
355
  ).lower() in ("true", "1", "yes")
356
356
 
357
+ # Initialize lenient type coercion feature
358
+ # Default: True for better user experience
359
+ self.lenient_type_coercion = os.getenv(
360
+ "TOOLUNIVERSE_COERCE_TYPES", "true"
361
+ ).lower() in ("true", "1", "yes")
362
+
357
363
  # Initialize dynamic tools namespace
358
364
  self.tools = ToolNamespace(self)
359
365
 
@@ -2113,21 +2119,28 @@ class ToolUniverse:
2113
2119
  )
2114
2120
  return cached_value
2115
2121
 
2122
+ # Coerce types if lenient coercion is enabled
2123
+ if self.lenient_type_coercion:
2124
+ arguments = self._coerce_arguments_to_schema(function_name, arguments)
2125
+ # Update the original dict so coerced arguments are used
2126
+ function_call_json["arguments"] = arguments
2127
+
2116
2128
  # Validate parameters if requested
2117
2129
  if validate:
2118
2130
  validation_error = self._validate_parameters(function_name, arguments)
2119
2131
  if validation_error:
2120
2132
  return self._create_dual_format_error(validation_error)
2121
-
2122
- # Check function call format (existing validation)
2123
- check_status, check_message = self.check_function_call(function_call_json)
2124
- if check_status is False:
2125
- error_msg = "Invalid function call: " + check_message
2126
- return self._create_dual_format_error(
2127
- ToolValidationError(
2128
- error_msg, details={"check_message": check_message}
2133
+ else:
2134
+ # When validate=False, perform lightweight checks:
2135
+ # 1. Verify tool exists in all_tool_dict
2136
+ # 2. No parameter validation (for performance)
2137
+ if function_name not in self.all_tool_dict:
2138
+ return self._create_dual_format_error(
2139
+ ToolValidationError(
2140
+ f"Tool '{function_name}' not found",
2141
+ details={"tool_name": function_name},
2142
+ )
2129
2143
  )
2130
- )
2131
2144
 
2132
2145
  # Execute the tool
2133
2146
  tool_arguments = arguments
@@ -2421,6 +2434,123 @@ class ToolUniverse:
2421
2434
  )
2422
2435
  return hashlib.md5(serialized.encode()).hexdigest()
2423
2436
 
2437
+ def _coerce_value_to_type(self, value: Any, schema: dict) -> Any:
2438
+ """
2439
+ Coerce a value to match the schema's expected type.
2440
+
2441
+ This function attempts to convert string values to integers, floats,
2442
+ or booleans when the schema expects those types. This makes the
2443
+ system more lenient with user input from LLMs that provide numeric
2444
+ values as strings.
2445
+
2446
+ Args:
2447
+ value: The value to coerce
2448
+ schema: The JSON schema definition for this value
2449
+
2450
+ Returns:
2451
+ The coerced value (or original if coercion fails or not applicable)
2452
+ """
2453
+ # Only coerce string values
2454
+ if not isinstance(value, str):
2455
+ return value
2456
+
2457
+ # Handle anyOf/oneOf schemas by recursively trying each option
2458
+ if "anyOf" in schema:
2459
+ for option in schema["anyOf"]:
2460
+ coerced = self._coerce_value_to_type(value, option)
2461
+ if coerced is not value: # Coercion succeeded
2462
+ return coerced
2463
+ return value
2464
+
2465
+ if "oneOf" in schema:
2466
+ for option in schema["oneOf"]:
2467
+ coerced = self._coerce_value_to_type(value, option)
2468
+ if coerced is not value: # Coercion succeeded
2469
+ return coerced
2470
+ return value
2471
+
2472
+ # Handle array types
2473
+ if schema.get("type") == "array" and "items" in schema:
2474
+ if isinstance(value, list):
2475
+ # Recursively coerce array items
2476
+ items_schema = schema["items"]
2477
+ return [
2478
+ self._coerce_value_to_type(item, items_schema) for item in value
2479
+ ]
2480
+ return value
2481
+
2482
+ # Get the expected type
2483
+ expected_type = schema.get("type")
2484
+
2485
+ # Don't coerce if schema expects string type
2486
+ if expected_type == "string":
2487
+ return value
2488
+
2489
+ # Try to coerce based on expected type
2490
+ if expected_type == "integer":
2491
+ try:
2492
+ # Only parse as int if it represents an integer (not a float)
2493
+ if "." not in value:
2494
+ return int(value)
2495
+ except (ValueError, TypeError):
2496
+ # If coercion fails, return the original value as per function design
2497
+ pass
2498
+ elif expected_type == "number":
2499
+ try:
2500
+ return float(value)
2501
+ except (ValueError, TypeError):
2502
+ pass
2503
+ elif expected_type == "boolean":
2504
+ # Handle common boolean string representations
2505
+ lower_value = value.lower().strip()
2506
+ if lower_value in ("true", "1", "yes", "on"):
2507
+ return True
2508
+ elif lower_value in ("false", "0", "no", "off"):
2509
+ return False
2510
+
2511
+ return value
2512
+
2513
+ def _coerce_arguments_to_schema(self, function_name: str, arguments: dict) -> dict:
2514
+ """
2515
+ Coerce all arguments for a tool to match their schema expectations.
2516
+
2517
+ Args:
2518
+ function_name: Name of the tool
2519
+ arguments: Dictionary of arguments to coerce
2520
+
2521
+ Returns:
2522
+ New dictionary with coerced arguments
2523
+ """
2524
+ if function_name not in self.all_tool_dict:
2525
+ return arguments
2526
+
2527
+ tool_config = self.all_tool_dict[function_name]
2528
+ parameter_schema = tool_config.get("parameter", {})
2529
+ properties = parameter_schema.get("properties", {})
2530
+
2531
+ if not properties:
2532
+ return arguments
2533
+
2534
+ coerced_args = {}
2535
+ for param_name, param_value in arguments.items():
2536
+ if param_name in properties:
2537
+ param_schema = properties[param_name]
2538
+ coerced_value = self._coerce_value_to_type(param_value, param_schema)
2539
+
2540
+ # Log when coercion occurs
2541
+ if coerced_value != param_value:
2542
+ self.logger.debug(
2543
+ f"Coerced parameter '{param_name}' from "
2544
+ f"{param_value!r} ({type(param_value).__name__}) "
2545
+ f"to {coerced_value!r} ({type(coerced_value).__name__})"
2546
+ )
2547
+
2548
+ coerced_args[param_name] = coerced_value
2549
+ else:
2550
+ coerced_args[param_name] = param_value
2551
+
2552
+ return coerced_args
2553
+
2424
2554
  def _validate_parameters(
2425
2555
  self, function_name: str, arguments: dict
2426
2556
  ) -> Optional[ToolError]:
@@ -206,20 +206,15 @@ class BasePythonExecutor:
206
206
  raise TimeoutError("Code execution timed out")
207
207
 
208
208
  def _execute_with_timeout(self, func, timeout_seconds: int, *args, **kwargs):
209
- """Execute function with timeout using signal (Unix only)."""
210
- if hasattr(signal, "SIGALRM"): # Unix systems
211
- old_handler = signal.signal(signal.SIGALRM, self._handle_timeout)
212
- signal.alarm(timeout_seconds)
213
- try:
214
- result = func(*args, **kwargs)
215
- return result
216
- finally:
217
- signal.alarm(0)
218
- signal.signal(signal.SIGALRM, old_handler)
219
- else: # Windows or other systems
220
- # Fallback to threading timeout (simpler but less reliable)
221
- import threading
209
+ """Execute function with timeout using signal or threading."""
210
+ import threading
211
+
212
+ # Check if we're in the main thread
213
+ is_main_thread = threading.current_thread() is threading.main_thread()
222
214
 
215
+ # Use threading timeout if not in main thread or on Windows
216
+ if not is_main_thread or not hasattr(signal, "SIGALRM"):
217
+ # Use threading timeout (works in all threads)
223
218
  result_container = [None]
224
219
  exception_container = [None]
225
220
 
@@ -242,6 +237,41 @@ class BasePythonExecutor:
242
237
 
243
238
  return result_container[0]
244
239
 
240
+ # Use signal timeout only in main thread on Unix systems
241
+ else:
242
+ try:
243
+ old_handler = signal.signal(signal.SIGALRM, self._handle_timeout)
244
+ signal.alarm(timeout_seconds)
245
+ try:
246
+ result = func(*args, **kwargs)
247
+ return result
248
+ finally:
249
+ signal.alarm(0)
250
+ signal.signal(signal.SIGALRM, old_handler)
251
+ except (ValueError, AttributeError):
252
+ # Fallback to threading if signal fails for any reason
253
+ result_container = [None]
254
+ exception_container = [None]
255
+
256
+ def target():
257
+ try:
258
+ result_container[0] = func(*args, **kwargs)
259
+ except Exception as e:
260
+ exception_container[0] = e
261
+
262
+ thread = threading.Thread(target=target)
263
+ thread.daemon = True
264
+ thread.start()
265
+ thread.join(timeout_seconds)
266
+
267
+ if thread.is_alive():
268
+ raise TimeoutError("Code execution timed out")
269
+
270
+ if exception_container[0]:
271
+ raise exception_container[0]
272
+
273
+ return result_container[0]
274
+
245
275
  def _format_error_response(
246
276
  self,
247
277
  error: Exception,
tooluniverse/smcp.py CHANGED
@@ -2230,13 +2230,16 @@ class SMCP(FastMCP):
2230
2230
  python_type = str
2231
2231
  # For string type, don't add json_schema_extra - let Pydantic handle it
2232
2232
  elif param_type == "integer":
2233
- python_type = int
2233
+ # Allow both string and int for lenient coercion
2234
+ python_type = Union[int, str]
2234
2235
  # For integer type, don't add json_schema_extra - let Pydantic handle it
2235
2236
  elif param_type == "number":
2236
- python_type = float
2237
+ # Allow both string and float for lenient coercion
2238
+ python_type = Union[float, str]
2237
2239
  # For number type, don't add json_schema_extra - let Pydantic handle it
2238
2240
  elif param_type == "boolean":
2239
- python_type = bool
2241
+ # Allow both string and bool for lenient coercion
2242
+ python_type = Union[bool, str]
2240
2243
  # For boolean type, don't add json_schema_extra - let Pydantic handle it
2241
2244
  elif param_type == "array":
2242
2245
  python_type = list
@@ -2334,32 +2337,21 @@ class SMCP(FastMCP):
2334
2337
  )
2335
2338
  )
2336
2339
 
2337
- # Add optional streaming parameter to signature
2338
- stream_field = Field(
2339
- description="Set to true to receive incremental streaming output (experimental)."
2340
- )
2341
- stream_annotation = Annotated[Union[bool, type(None)], stream_field]
2342
- param_annotations["_tooluniverse_stream"] = stream_annotation
2343
- func_params.append(
2344
- inspect.Parameter(
2345
- "_tooluniverse_stream",
2346
- inspect.Parameter.POSITIONAL_OR_KEYWORD,
2347
- default=None,
2348
- annotation=stream_annotation,
2349
- )
2350
- )
2351
-
2352
- # Note: ctx parameter removed as it causes Pydantic schema issues
2353
- # FastMCP context injection is handled internally by FastMCP
2340
+ # Add _tooluniverse_stream as an optional parameter for streaming support
2341
+ # This parameter is NOT exposed in the MCP schema (it's in kwargs but not in param_annotations)
2342
+ # Users can pass it to enable streaming, but it won't appear in the tool schema
2354
2343
 
2355
2344
  async def dynamic_tool_function(**kwargs) -> str:
2356
2345
  """Execute ToolUniverse tool with provided arguments."""
2357
2346
  try:
2358
2347
  # Remove ctx if present (legacy support)
2359
2348
  ctx = kwargs.pop("ctx", None) if "ctx" in kwargs else None
2360
- stream_flag = bool(kwargs.get("_tooluniverse_stream"))
2349
+ # Extract streaming flag (users can optionally pass this)
2350
+ stream_flag = bool(kwargs.pop("_tooluniverse_stream", False))
2361
2351
 
2362
- # Filter out None values for optional parameters (preserve streaming flag)
2352
+ # Filter out None values for optional parameters
2353
+ # Note: _tooluniverse_stream was extracted and popped above
2354
+ # so it won't be in args_dict, which is what we want
2363
2355
  args_dict = {k: v for k, v in kwargs.items() if v is not None}
2364
2356
 
2365
2357
  # Validate required parameters (check against args_dict, not filtered_args)
@@ -2409,9 +2401,9 @@ class SMCP(FastMCP):
2409
2401
  # Assign the function to stream_callback
2410
2402
  stream_callback = _stream_callback
2411
2403
 
2412
- # Ensure downstream tools see the streaming flag
2413
- if "_tooluniverse_stream" not in args_dict:
2414
- args_dict["_tooluniverse_stream"] = True
2404
+ # Note: _tooluniverse_stream was extracted from kwargs above
2405
+ # and is not passed to the tool. The stream_callback is sufficient
2406
+ # to enable streaming for downstream tools.
2415
2407
 
2416
2408
  run_callable = functools.partial(
2417
2409
  self.tooluniverse.run_one_function,
@@ -1,7 +1,9 @@
1
1
  """
2
2
  UniProt_search
3
3
 
4
- Search UniProtKB database using flexible query syntax. Supports gene names (e.g., 'gene:TP53'), p...
4
+ Search UniProtKB database using flexible query syntax.
5
+ Supports gene names (e.g., 'gene:TP53'), protein names,
6
+ organism filters, and complex queries.
5
7
  """
6
8
 
7
9
  from typing import Any, Optional, Callable
@@ -13,25 +15,48 @@ def UniProt_search(
13
15
  organism: Optional[str] = None,
14
16
  limit: Optional[int] = None,
15
17
  fields: Optional[list[Any]] = None,
18
+ min_length: Optional[int] = None,
19
+ max_length: Optional[int] = None,
16
20
  *,
17
21
  stream_callback: Optional[Callable[[str], None]] = None,
18
22
  use_cache: bool = False,
19
23
  validate: bool = True,
20
24
  ) -> Any:
21
25
  """
22
- Search UniProtKB database using flexible query syntax. Supports gene names (e.g., 'gene:TP53'), p...
26
+ Search UniProtKB database with flexible query syntax.
27
+
28
+ Search UniProtKB and return protein entries. Supports field searches,
29
+ ranges, wildcards, boolean operators, and parentheses for grouping.
23
30
 
24
31
  Parameters
25
32
  ----------
26
33
  query : str
27
- Search query. Can be simple (e.g., 'MEIOB') or advanced ('gene:TP53 AND organ...
28
- organism : str
29
- Optional organism filter. Can use common name ('human') or taxonomy ID ('9606...
30
- limit : int
31
- Maximum number of results to return (default: 25, max: 500)
32
- fields : list[Any]
33
- Optional list of fields to return. Default returns: accession, id, protein_na...
34
- stream_callback : Callable, optional
34
+ Search query. Examples:
35
+ - Simple: 'MEIOB', 'insulin'
36
+ - Field: 'gene:TP53', 'organism_id:9606', 'reviewed:true'
37
+ - Range: 'length:[100 TO 500]', 'mass:[20000 TO 50000]'
38
+ - Wildcard: 'gene:MEIOB*'
39
+ - Boolean: 'gene:TP53 AND organism_id:9606'
40
+ - Grouped: '(organism_id:9606 OR organism_id:10090) AND
41
+ gene:TP53'
42
+ organism : str, optional
43
+ Organism filter. Use 'human', 'mouse', 'rat', 'yeast' or
44
+ taxonomy ID like '9606'. Combined with query using AND.
45
+ limit : int, optional
46
+ Maximum results to return (default: 25, max: 500).
47
+ Accepts string or integer.
48
+ fields : list[str], optional
49
+ Field names to return. When specified, returns raw API response.
50
+ Common: accession, id, gene_names, gene_primary, protein_name,
51
+ organism_name, organism_id, length, mass, sequence, reviewed,
52
+ cc_function.
53
+ Default: formatted response with accession, id, protein_name,
54
+ gene_names, organism, length.
55
+ min_length : int, optional
56
+ Minimum sequence length. Converts to 'length:[min TO *]'.
57
+ max_length : int, optional
58
+ Maximum sequence length. Converts to 'length:[* TO max]'.
59
+ stream_callback : callable, optional
35
60
  Callback for streaming output
36
61
  use_cache : bool, default False
37
62
  Enable caching
@@ -40,7 +65,15 @@ def UniProt_search(
40
65
 
41
66
  Returns
42
67
  -------
43
- Any
68
+ dict
69
+ Search results with total_results, returned count, and results
70
+ list
71
+
72
+ Examples
73
+ --------
74
+ >>> UniProt_search("gene:TP53", organism="human", limit=5)
75
+ >>> UniProt_search("insulin", fields=['accession', 'length'])
76
+ >>> UniProt_search("gene:MEIOB", min_length=400, max_length=500)
44
77
  """
45
78
  # Handle mutable defaults to avoid B006 linting error
46
79
 
@@ -52,6 +85,8 @@ def UniProt_search(
52
85
  "organism": organism,
53
86
  "limit": limit,
54
87
  "fields": fields,
88
+ "min_length": min_length,
89
+ "max_length": max_length,
55
90
  },
56
91
  },
57
92
  stream_callback=stream_callback,
@@ -1,7 +1,7 @@
1
1
  import time
2
2
  import requests
3
- from typing import Any, Dict
4
- from .base_tool import BaseTool
3
+ from typing import Any, Dict, Optional
4
+ from .base_tool import BaseTool, ToolError
5
5
  from .tool_registry import register_tool
6
6
 
7
7
 
@@ -13,6 +13,21 @@ class UniProtRESTTool(BaseTool):
13
13
  self.extract_path = tool_config["fields"].get("extract_path")
14
14
  self.timeout = 15 # Increase timeout for large entries
15
15
 
16
+ def validate_parameters(self, arguments: Dict[str, Any]) -> Optional[ToolError]:
17
+ """
18
+ Validate parameters with automatic type coercion for limit.
19
+ """
20
+ # Coerce limit to integer if passed as string
21
+ if "limit" in arguments and isinstance(arguments["limit"], str):
22
+ try:
23
+ arguments["limit"] = int(arguments["limit"])
24
+ except (ValueError, TypeError):
25
+ # Let schema validation handle the error
26
+ pass
27
+
28
+ # Call parent validation
29
+ return super().validate_parameters(arguments)
30
+
16
31
  def _build_url(self, args: Dict[str, Any]) -> str:
17
32
  url = self.endpoint
18
33
  for k, v in args.items():
@@ -107,8 +122,19 @@ class UniProtRESTTool(BaseTool):
107
122
  """Handle search queries with flexible parameters"""
108
123
  query = arguments.get("query", "")
109
124
  organism = arguments.get("organism", "")
110
- limit = min(arguments.get("limit", 25), 500)
111
125
  fields = arguments.get("fields")
126
+ min_length = arguments.get("min_length")
127
+ max_length = arguments.get("max_length")
128
+
129
+ # Coerce limit to integer if passed as string
130
+ limit_value = arguments.get("limit", 25)
131
+ if isinstance(limit_value, str):
132
+ limit_value = int(limit_value)
133
+ limit = min(limit_value, 500)
134
+
135
+ # Normalize query: replace 'organism:' with 'organism_id:'
136
+ # for UniProt API compatibility
137
+ query = query.replace("organism:", "organism_id:")
112
138
 
113
139
  # Build query string
114
140
  query_parts = [query]
@@ -121,7 +147,18 @@ class UniProtRESTTool(BaseTool):
121
147
  "yeast": "559292",
122
148
  }
123
149
  taxon_id = organism_map.get(organism.lower(), organism)
124
- query_parts.append(f"organism_id:{taxon_id}")
150
+
151
+ # Check if query already includes organism_id filter
152
+ # to avoid duplication
153
+ if "organism_id:" not in query.lower():
154
+ query_parts.append(f"organism_id:{taxon_id}")
155
+ # If it does, skip adding the organism filter
156
+
157
+ # Auto-convert length parameters to range syntax
158
+ if min_length or max_length:
159
+ min_val = min_length if min_length else "*"
160
+ max_val = max_length if max_length else "*"
161
+ query_parts.append(f"length:[{min_val} TO {max_val}]")
125
162
 
126
163
  full_query = " AND ".join(query_parts)
127
164
 
@@ -141,6 +178,16 @@ class UniProtRESTTool(BaseTool):
141
178
 
142
179
  # Extract results
143
180
  results = data.get("results", [])
181
+
182
+ # If custom fields requested, return raw API response for flexibility
183
+ if fields and isinstance(fields, list):
184
+ return {
185
+ "total_results": data.get("resultsFound", 0),
186
+ "returned": len(results),
187
+ "results": results, # Return raw results when custom fields used
188
+ }
189
+
190
+ # Otherwise, use formatted extraction logic
144
191
  formatted_results = []
145
192
 
146
193
  for entry in results:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tooluniverse
3
- Version: 1.0.11.1
3
+ Version: 1.0.11.2
4
4
  Summary: A comprehensive collection of scientific tools for Agentic AI, offering integration with the ToolUniverse SDK and MCP Server to support advanced scientific workflows.
5
5
  Author-email: Shanghua Gao <shanghuagao@gmail.com>
6
6
  Project-URL: Homepage, https://github.com/mims-harvard/ToolUniverse
@@ -1,9 +1,9 @@
1
1
  tooluniverse/__init__.py,sha256=RcmpZgvV5Olhnu24jhZ6KFmLMN6oHby-vS52yt4HIhc,19251
2
2
  tooluniverse/admetai_tool.py,sha256=k83iqy8_BeE2R_xPJlZ-xaEwaTwNvGc0u4ZUMV7ocwk,3469
3
3
  tooluniverse/agentic_tool.py,sha256=jN7UC6uQp7fxpDYqJf_2BWOb3b9z1oZufFQCRXBVeDM,27723
4
- tooluniverse/alphafold_tool.py,sha256=NkKQttC0MfuetCTTcWTFEG2MdhyKwy3jb5TR4odiqAw,4658
4
+ tooluniverse/alphafold_tool.py,sha256=c1XyFornqv1QYJkPcZd2Tmlevnm1PuwnNvB6sxjerMg,6350
5
5
  tooluniverse/arxiv_tool.py,sha256=60e5moGt3XnwZeKx_nrvRZrwAavE11ok0j78AI5F6Xs,3610
6
- tooluniverse/base_tool.py,sha256=B268ek7tmA3jH56tPhQEMmG2J_Q5xqhH_6GGES56lbk,12861
6
+ tooluniverse/base_tool.py,sha256=QsoCPiEND2zWzVa-zDc7MmtrF3grmdKkb1X4OjDvc-4,13293
7
7
  tooluniverse/biogrid_tool.py,sha256=tszza6Kf1-PKsoVw5GIqLak20FvhFY_PFSVY9XxDuIQ,4274
8
8
  tooluniverse/biorxiv_tool.py,sha256=uffhxXMOWR6b2jzxka-Qss1LjliQTuX-skIIc7RAaPs,4133
9
9
  tooluniverse/blast_tool.py,sha256=BPvkxeMWboLpwLJmO3yi4u0Dx1vvKOqKVO5tLa4xvGM,4960
@@ -33,7 +33,7 @@ tooluniverse/enrichr_tool.py,sha256=beM7xZiZ6FdHgYQUSNb_jfkcbrNC8CumXNJgPE9e1PI,
33
33
  tooluniverse/ensembl_tool.py,sha256=Fjn7dABWpZ55DOQi0sBH2j84nPwSmU4yog3D2pBHAT4,4970
34
34
  tooluniverse/europe_pmc_tool.py,sha256=-3CAq5tDhS5NoeHifevadBgP_MHDZJdwNmM5SvjbAa8,6080
35
35
  tooluniverse/exceptions.py,sha256=3svVCG7H_G_8E1mco74KWi44u1M7V1JojVycSEWdi_o,5386
36
- tooluniverse/execute_function.py,sha256=GatQ21Lqf5BXGwngF7YBD5s-wSsh9TaAvuSAvka6OVs,128407
36
+ tooluniverse/execute_function.py,sha256=Kg-f_jXFKqoEcxlvFzdIY7wN77nln9prdGP-2h9X4_I,133347
37
37
  tooluniverse/extended_hooks.py,sha256=bBI9SZM6IFiHblsZscBotOFNwlkUNi1hrp3cS7JVuZk,15372
38
38
  tooluniverse/fatcat_tool.py,sha256=-QEOYbv6KEcrOiOa27Ms1xm0sySB-vW4rWEinsS6EDw,2051
39
39
  tooluniverse/file_download_tool.py,sha256=jFH8cFNvcd6snO2lMw1NCdZAPoQ5zoEHi7MGIeTTut4,9354
@@ -82,7 +82,7 @@ tooluniverse/pubchem_tool.py,sha256=14C_5uz0RNzv5HaSAXHlq13TZLtlkhmJhr7RrjI91Yk,
82
82
  tooluniverse/pubmed_tool.py,sha256=EYPL_fsUdHuL-yFFULavWZu8lPp23wVKIJ-avDLlFeQ,6146
83
83
  tooluniverse/pubtator_tool.py,sha256=WdFufeT3hY2lCq6vb9SLF44UYJ48PiZ1mf4bSfpI7mg,7151
84
84
  tooluniverse/pypi_package_inspector_tool.py,sha256=EvBA9TY3s-FHKy6QNpl8b5Q_7dlFCO85AQ8BcKi5Xpw,21653
85
- tooluniverse/python_executor_tool.py,sha256=8h3sE7nu3WsTNswYZ1h56--fTi7ey1WeQxbHG7dcIA4,26661
85
+ tooluniverse/python_executor_tool.py,sha256=vBTvJagjUf4xPyppG90XRr8peCDGt-eZZfyFOVUNtiI,27773
86
86
  tooluniverse/rcsb_pdb_tool.py,sha256=8N6bvlD2KIHfNum_FvbXwIa96mKvHorno5ZdjAHgmrc,1550
87
87
  tooluniverse/reactome_tool.py,sha256=ngmJXCcQfsu5d8XcX258ZHW4DIBi45sSjYNwUBk8nxo,4481
88
88
  tooluniverse/regulomedb_tool.py,sha256=jPKd1rce0Yfn3Gns8MmcV0vgxmUTyjCH5sjfeQNyx0I,1117
@@ -91,7 +91,7 @@ tooluniverse/remote_tool.py,sha256=Sa7YNeitFaHyUS6P5gY79kqJdp4i6zHSr5VbM1SdQpI,3
91
91
  tooluniverse/restful_tool.py,sha256=H7alq_vO3G4iVgZIZrrWh9vq2ForJN52Fl7_QOq7yaM,4409
92
92
  tooluniverse/screen_tool.py,sha256=JuSj-qhFUSuYsHlyQzdXdOhVhEzVoJjSql1dWpLOp4A,1580
93
93
  tooluniverse/semantic_scholar_tool.py,sha256=gaDDL5Xy4Ij3qNqipZvwRfOBSyzGyIN8AANT-pYKHvI,3227
94
- tooluniverse/smcp.py,sha256=t7cGLI1rRPoPXv42NI-KLjbQjraVkGipH5k64S13Svg,107782
94
+ tooluniverse/smcp.py,sha256=USjjf86NPBmR7xSQ0mGhekcEmADPZcS2rlL73sB9Dkw,107771
95
95
  tooluniverse/smcp_server.py,sha256=kLJesbeWOSL1eBhVlv5k1WCTG4BRLTpDEkfySyaFG-8,35214
96
96
  tooluniverse/string_tool.py,sha256=3rio0Lt97FuU6fbpykq3ETqD9pA4h_gPR0uXKRb7Zu4,3936
97
97
  tooluniverse/tool_finder_embedding.py,sha256=JDSPlI51grK51aX3072qc4mhbYVrhKaOLnY3hA7hDjM,11274
@@ -100,7 +100,7 @@ tooluniverse/tool_finder_llm.py,sha256=Pg2IFG8icLMxDUlFdx-L6Fw5ntAbCYo4cbPi5f54a
100
100
  tooluniverse/tool_graph_web_ui.py,sha256=68s1MPXTcT6bzdFfuat9gvSNjSgwHxNC-TztfH3raeQ,27987
101
101
  tooluniverse/tool_registry.py,sha256=DYA97MzFgIZ_8oAy0CTqg99pfz_bIS-0A-gbyLkiQuw,15616
102
102
  tooluniverse/unified_guideline_tools.py,sha256=Vhnqis4je3JZDJb7KfynAMX2D2yvW9I71_9mIsG1COU,88376
103
- tooluniverse/uniprot_tool.py,sha256=SXhvFyO5HR0xb3LIBpUKWbrjQ-sk5vQKjptXHkR8-Zk,14362
103
+ tooluniverse/uniprot_tool.py,sha256=3aZSxKC75hz90UEjiEKxOAEekIIA3O3MsIj09xd7VEE,16336
104
104
  tooluniverse/unpaywall_tool.py,sha256=eqdsZZMRCOsMQjMzJRzkNk9xSwIWlsRrrPiTrIBDsFg,2070
105
105
  tooluniverse/url_tool.py,sha256=ofaAXpt5i6cUnAssM-jtJp73_cH1XaiByS9ZcGM9GI0,10023
106
106
  tooluniverse/uspto_tool.py,sha256=jvpCIQkNOI7v5F2CVttoHm8EKYnFgGUQzs9xXxZWEUE,8991
@@ -132,7 +132,7 @@ tooluniverse/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
132
132
  tooluniverse/data/admetai_tools.json,sha256=08LCNSCZJxL6ZuPF1mVfbdlh9f2X5tnEwIYU7RcLLSM,6497
133
133
  tooluniverse/data/adverse_event_tools.json,sha256=gmO2euqIvLqc9lfmf9h9W0_TR33AYpMg_M1tzBNKm7w,12084
134
134
  tooluniverse/data/agentic_tools.json,sha256=vRoYpQZKdzaE7QvumwM47T18plgaiiNgrymwYIQS6wU,58360
135
- tooluniverse/data/alphafold_tools.json,sha256=fXyHxv_FuCfEkaP9IAJx_bU8a5BzDD73djD5ZO-F0r8,14856
135
+ tooluniverse/data/alphafold_tools.json,sha256=G5C6GhU4rIesRaZbkuJAH3w__UtosvWedJ9XiJ0J54w,14177
136
136
  tooluniverse/data/arxiv_tools.json,sha256=v4FNgFMhNX2WBNajje1xpa5250kvifm_RXY1WzPGgFw,2591
137
137
  tooluniverse/data/biorxiv_tools.json,sha256=i7iNyLJDRCPlNd6QjQ9Ml0rUwkcBPjXLG3QOa6cT2sI,2007
138
138
  tooluniverse/data/blast_tools.json,sha256=8tMAZrXUpRGY_UxWAu3lGBdzZRSwy_D_huFMux747qk,3898
@@ -218,7 +218,7 @@ tooluniverse/data/tool_discovery_agents.json.backup,sha256=LXKpggu1F32PcyUHQQOTf
218
218
  tooluniverse/data/toolfinderkeyword_defaults.json,sha256=bydTVd0IUSDiilNQ6kZU-cn4lPqOfInfUAxhh8yMXqY,1050
219
219
  tooluniverse/data/txagent_client_tools.json,sha256=GdQk5H6uP_Uj9Ejz3nMdzNhZJV42t75aiRW8oWtkzRw,409
220
220
  tooluniverse/data/unified_guideline_tools.json,sha256=SzBhv6MCojmdtV6tGyZ5sUqskrr5zS_SQC5TXowVUAo,30688
221
- tooluniverse/data/uniprot_tools.json,sha256=nhJQ9E_kVKB16ehQNwRos8-QRbZJSXYODwgCr120xas,11101
221
+ tooluniverse/data/uniprot_tools.json,sha256=8Hx39uBXsXXKeBlcljDZw9wLTklcx32kh8QIuIFjaWY,12361
222
222
  tooluniverse/data/unpaywall_tools.json,sha256=7ciTSVOeaCwmIA88Lrm_t17mNDqCxPZMtI1pHYsaRco,3123
223
223
  tooluniverse/data/url_fetch_tools.json,sha256=Z_wFzpj9XLUnrELk5KLFpj5pvxifOy2JepPhvmhVTJs,2719
224
224
  tooluniverse/data/uspto_downloader_tools.json,sha256=hMHwvP_hvuFg-upZAAXufywYKx031iQ0xuMyHfzu0uA,349
@@ -639,7 +639,7 @@ tooluniverse/tools/UniProt_get_recommended_name_by_accession.py,sha256=IxvU5rSqA
639
639
  tooluniverse/tools/UniProt_get_sequence_by_accession.py,sha256=NTqhL5XoApu6BFNaPPXjlaDk33Vu6rDkVsdXLOByU38,1171
640
640
  tooluniverse/tools/UniProt_get_subcellular_location_by_accession.py,sha256=oxANci3oT5W7_EB6nCtIPA75QADMFMOihs8ihziO9dI,1315
641
641
  tooluniverse/tools/UniProt_id_mapping.py,sha256=_dnr9B3YZg3Uw2u4N95z2l5cBzMb8sVt5K0pXkqkzSA,1742
642
- tooluniverse/tools/UniProt_search.py,sha256=XFrxQnYTZLjUygEBpszaCtJTcPCqgQOuige0N8SfpIQ,1769
642
+ tooluniverse/tools/UniProt_search.py,sha256=ZhpY3rfdJb_mPFK3Kh6J4HjPWVlNPfeI5lfH58LYVPM,3213
643
643
  tooluniverse/tools/UnifiedToolGenerator.py,sha256=acNEyvmkFUxfrSLVC2hg6TOsa51gCdzmJGRCeNMYBPQ,1646
644
644
  tooluniverse/tools/Unpaywall_check_oa_status.py,sha256=ZQKQQ0f9dSQcEcI26-QCmSh3yCRmsGlQIGGHOMZuXz0,1409
645
645
  tooluniverse/tools/WHO_Guideline_Full_Text.py,sha256=GkM3_FjGc4t-VWYIdhW1t6zd_dRCyGv4ZQBKi78A5ZE,1236
@@ -974,9 +974,9 @@ tooluniverse/tools/visualize_molecule_3d.py,sha256=0fFkjGz0hrNyRBiXRV_FY8yWXR47j
974
974
  tooluniverse/tools/visualize_protein_structure_3d.py,sha256=ifTkgPvtNtvJ9q8d7zmSqSnd32oFmRcKkoO8p1SdVUU,2358
975
975
  tooluniverse/tools/web_api_documentation_search.py,sha256=AVw0w3LylreiKB6pCGooVUvZ1jydsuJnAzxD8HVf6vg,1707
976
976
  tooluniverse/tools/web_search.py,sha256=IjpR7Z6glY4DYX39YhSk6UP4bj5_f1LYAdr8WU9uaFk,1866
977
- tooluniverse-1.0.11.1.dist-info/licenses/LICENSE,sha256=0P0qDClpVzrnQUe3QAu7V-alszSBa-6hVHXyn9Xe60k,11351
978
- tooluniverse-1.0.11.1.dist-info/METADATA,sha256=UjiTcWroccK8NnbdwNdscNxRCuZ8yoP0Pdk8-SH4sFI,21364
979
- tooluniverse-1.0.11.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
980
- tooluniverse-1.0.11.1.dist-info/entry_points.txt,sha256=SAdqP4tU4mMOtp1YWaOHTwnIHhnoiD1wmfKGlSfNSy0,647
981
- tooluniverse-1.0.11.1.dist-info/top_level.txt,sha256=zZ8YeCJ5FAkEwdd_mxsFtSCQMBDgBdxrrmHo3RNBiWs,13
982
- tooluniverse-1.0.11.1.dist-info/RECORD,,
977
+ tooluniverse-1.0.11.2.dist-info/licenses/LICENSE,sha256=0P0qDClpVzrnQUe3QAu7V-alszSBa-6hVHXyn9Xe60k,11351
978
+ tooluniverse-1.0.11.2.dist-info/METADATA,sha256=a6BEnz8gyH38Qls_S_YtBbqFGJ3NI9JIghMOVAHX6GE,21364
979
+ tooluniverse-1.0.11.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
980
+ tooluniverse-1.0.11.2.dist-info/entry_points.txt,sha256=SAdqP4tU4mMOtp1YWaOHTwnIHhnoiD1wmfKGlSfNSy0,647
981
+ tooluniverse-1.0.11.2.dist-info/top_level.txt,sha256=zZ8YeCJ5FAkEwdd_mxsFtSCQMBDgBdxrrmHo3RNBiWs,13
982
+ tooluniverse-1.0.11.2.dist-info/RECORD,,