tooluniverse 1.0.9.1__py3-none-any.whl → 1.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +57 -1
- tooluniverse/admetai_tool.py +1 -1
- tooluniverse/agentic_tool.py +65 -17
- tooluniverse/base_tool.py +19 -8
- tooluniverse/blast_tool.py +132 -0
- tooluniverse/boltz_tool.py +3 -3
- tooluniverse/cache/result_cache_manager.py +167 -12
- tooluniverse/cbioportal_tool.py +42 -0
- tooluniverse/clinvar_tool.py +268 -74
- tooluniverse/compose_scripts/drug_safety_analyzer.py +1 -1
- tooluniverse/compose_scripts/multi_agent_literature_search.py +1 -1
- tooluniverse/compose_scripts/output_summarizer.py +4 -4
- tooluniverse/compose_scripts/tool_discover.py +1941 -443
- tooluniverse/compose_scripts/tool_graph_composer.py +1 -1
- tooluniverse/compose_scripts/tool_metadata_generator.py +1 -1
- tooluniverse/compose_tool.py +9 -9
- tooluniverse/core_tool.py +2 -2
- tooluniverse/ctg_tool.py +4 -4
- tooluniverse/custom_tool.py +1 -1
- tooluniverse/data/agentic_tools.json +0 -370
- tooluniverse/data/alphafold_tools.json +6 -6
- tooluniverse/data/blast_tools.json +112 -0
- tooluniverse/data/cbioportal_tools.json +87 -0
- tooluniverse/data/clinvar_tools.json +235 -0
- tooluniverse/data/compose_tools.json +0 -89
- tooluniverse/data/dbsnp_tools.json +275 -0
- tooluniverse/data/emdb_tools.json +61 -0
- tooluniverse/data/ensembl_tools.json +259 -0
- tooluniverse/data/file_download_tools.json +275 -0
- tooluniverse/data/geo_tools.json +200 -48
- tooluniverse/data/gnomad_tools.json +109 -0
- tooluniverse/data/gtopdb_tools.json +68 -0
- tooluniverse/data/gwas_tools.json +32 -0
- tooluniverse/data/interpro_tools.json +199 -0
- tooluniverse/data/jaspar_tools.json +70 -0
- tooluniverse/data/kegg_tools.json +356 -0
- tooluniverse/data/mpd_tools.json +87 -0
- tooluniverse/data/ols_tools.json +314 -0
- tooluniverse/data/package_discovery_tools.json +64 -0
- tooluniverse/data/packages/categorized_tools.txt +0 -1
- tooluniverse/data/packages/machine_learning_tools.json +0 -47
- tooluniverse/data/paleobiology_tools.json +91 -0
- tooluniverse/data/pride_tools.json +62 -0
- tooluniverse/data/pypi_package_inspector_tools.json +158 -0
- tooluniverse/data/python_executor_tools.json +341 -0
- tooluniverse/data/regulomedb_tools.json +50 -0
- tooluniverse/data/remap_tools.json +89 -0
- tooluniverse/data/screen_tools.json +89 -0
- tooluniverse/data/tool_discovery_agents.json +428 -0
- tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
- tooluniverse/data/uniprot_tools.json +77 -0
- tooluniverse/data/web_search_tools.json +250 -0
- tooluniverse/data/worms_tools.json +55 -0
- tooluniverse/dataset_tool.py +2 -2
- tooluniverse/dbsnp_tool.py +196 -58
- tooluniverse/default_config.py +36 -3
- tooluniverse/emdb_tool.py +30 -0
- tooluniverse/enrichr_tool.py +14 -14
- tooluniverse/ensembl_tool.py +140 -47
- tooluniverse/execute_function.py +594 -29
- tooluniverse/extended_hooks.py +4 -4
- tooluniverse/file_download_tool.py +269 -0
- tooluniverse/gene_ontology_tool.py +1 -1
- tooluniverse/generate_tools.py +3 -3
- tooluniverse/geo_tool.py +81 -28
- tooluniverse/gnomad_tool.py +100 -52
- tooluniverse/gtopdb_tool.py +41 -0
- tooluniverse/humanbase_tool.py +10 -10
- tooluniverse/interpro_tool.py +72 -0
- tooluniverse/jaspar_tool.py +30 -0
- tooluniverse/kegg_tool.py +230 -0
- tooluniverse/logging_config.py +2 -2
- tooluniverse/mcp_client_tool.py +57 -129
- tooluniverse/mcp_integration.py +52 -49
- tooluniverse/mcp_tool_registry.py +147 -528
- tooluniverse/mpd_tool.py +42 -0
- tooluniverse/ncbi_eutils_tool.py +96 -0
- tooluniverse/ols_tool.py +435 -0
- tooluniverse/openalex_tool.py +8 -8
- tooluniverse/openfda_tool.py +2 -2
- tooluniverse/output_hook.py +15 -15
- tooluniverse/package_discovery_tool.py +217 -0
- tooluniverse/package_tool.py +1 -1
- tooluniverse/paleobiology_tool.py +30 -0
- tooluniverse/pmc_tool.py +2 -2
- tooluniverse/pride_tool.py +30 -0
- tooluniverse/pypi_package_inspector_tool.py +593 -0
- tooluniverse/python_executor_tool.py +711 -0
- tooluniverse/regulomedb_tool.py +30 -0
- tooluniverse/remap_tool.py +44 -0
- tooluniverse/remote/boltz/boltz_mcp_server.py +1 -1
- tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +3 -3
- tooluniverse/remote/immune_compass/compass_tool.py +3 -3
- tooluniverse/remote/pinnacle/pinnacle_tool.py +2 -2
- tooluniverse/remote/transcriptformer/transcriptformer_tool.py +3 -3
- tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +3 -3
- tooluniverse/remote_tool.py +4 -4
- tooluniverse/screen_tool.py +44 -0
- tooluniverse/scripts/filter_tool_files.py +2 -2
- tooluniverse/smcp.py +93 -12
- tooluniverse/smcp_server.py +100 -21
- tooluniverse/space/__init__.py +46 -0
- tooluniverse/space/loader.py +133 -0
- tooluniverse/space/validator.py +353 -0
- tooluniverse/tool_finder_embedding.py +5 -3
- tooluniverse/tool_finder_keyword.py +12 -10
- tooluniverse/tool_finder_llm.py +12 -8
- tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
- tooluniverse/tools/BLAST_protein_search.py +63 -0
- tooluniverse/tools/ClinVar_search_variants.py +26 -15
- tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
- tooluniverse/tools/EMDB_get_structure.py +46 -0
- tooluniverse/tools/GtoPdb_get_targets.py +52 -0
- tooluniverse/tools/InterPro_get_domain_details.py +46 -0
- tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
- tooluniverse/tools/InterPro_search_domains.py +52 -0
- tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
- tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
- tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
- tooluniverse/tools/PackageAnalyzer.py +55 -0
- tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
- tooluniverse/tools/PyPIPackageInspector.py +59 -0
- tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
- tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
- tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
- tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
- tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
- tooluniverse/tools/ToolDiscover.py +11 -11
- tooluniverse/tools/UniProt_id_mapping.py +63 -0
- tooluniverse/tools/UniProt_search.py +63 -0
- tooluniverse/tools/UnifiedToolGenerator.py +59 -0
- tooluniverse/tools/WoRMS_search_species.py +49 -0
- tooluniverse/tools/XMLToolOptimizer.py +55 -0
- tooluniverse/tools/__init__.py +119 -29
- tooluniverse/tools/_shared_client.py +3 -3
- tooluniverse/tools/alphafold_get_annotations.py +3 -3
- tooluniverse/tools/alphafold_get_prediction.py +3 -3
- tooluniverse/tools/alphafold_get_summary.py +3 -3
- tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
- tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
- tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
- tooluniverse/tools/clinvar_get_variant_details.py +49 -0
- tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
- tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
- tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
- tooluniverse/tools/download_binary_file.py +66 -0
- tooluniverse/tools/download_file.py +71 -0
- tooluniverse/tools/download_text_content.py +55 -0
- tooluniverse/tools/dynamic_package_discovery.py +59 -0
- tooluniverse/tools/ensembl_get_sequence.py +52 -0
- tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
- tooluniverse/tools/ensembl_lookup_gene.py +46 -0
- tooluniverse/tools/geo_get_dataset_info.py +46 -0
- tooluniverse/tools/geo_get_sample_info.py +46 -0
- tooluniverse/tools/geo_search_datasets.py +67 -0
- tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
- tooluniverse/tools/kegg_find_genes.py +52 -0
- tooluniverse/tools/kegg_get_gene_info.py +46 -0
- tooluniverse/tools/kegg_get_pathway_info.py +46 -0
- tooluniverse/tools/kegg_list_organisms.py +44 -0
- tooluniverse/tools/kegg_search_pathway.py +46 -0
- tooluniverse/tools/ols_find_similar_terms.py +63 -0
- tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
- tooluniverse/tools/ols_get_term_ancestors.py +67 -0
- tooluniverse/tools/ols_get_term_children.py +67 -0
- tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
- tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
- tooluniverse/tools/ols_search_terms.py +71 -0
- tooluniverse/tools/python_code_executor.py +79 -0
- tooluniverse/tools/python_script_runner.py +79 -0
- tooluniverse/tools/web_api_documentation_search.py +63 -0
- tooluniverse/tools/web_search.py +71 -0
- tooluniverse/uniprot_tool.py +219 -16
- tooluniverse/url_tool.py +19 -1
- tooluniverse/uspto_tool.py +1 -1
- tooluniverse/utils.py +12 -12
- tooluniverse/web_search_tool.py +229 -0
- tooluniverse/worms_tool.py +64 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +8 -3
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +184 -92
- tooluniverse/data/genomics_tools.json +0 -174
- tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
- tooluniverse/tools/ToolImplementationGenerator.py +0 -67
- tooluniverse/tools/ToolOptimizer.py +0 -59
- tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
- tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
- tooluniverse/ucsc_tool.py +0 -60
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.9.1.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Space Configuration Validator
|
|
3
|
+
|
|
4
|
+
Comprehensive validation for Space configurations using JSON Schema.
|
|
5
|
+
Supports validation, default value filling, and structure checking for
|
|
6
|
+
Space YAML files.
|
|
7
|
+
|
|
8
|
+
The validation system is based on a comprehensive JSON Schema that defines:
|
|
9
|
+
- All possible fields and their types
|
|
10
|
+
- Default values for optional fields
|
|
11
|
+
- Required fields and validation rules
|
|
12
|
+
- Enum values for specific fields
|
|
13
|
+
- Nested object structures and arrays
|
|
14
|
+
|
|
15
|
+
This provides a robust, flexible, and maintainable validation system that can:
|
|
16
|
+
1. Validate YAML structure and content
|
|
17
|
+
2. Fill in missing default values automatically
|
|
18
|
+
3. Provide detailed error messages for validation failures
|
|
19
|
+
4. Support both simple tool collections and complex workspaces
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
from typing import Any, Dict, List, Tuple
|
|
23
|
+
import yaml
|
|
24
|
+
import jsonschema
|
|
25
|
+
from jsonschema import validate
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
# Space JSON Schema Definition
|
|
29
|
+
# ================================
|
|
30
|
+
# This schema defines the complete structure and validation rules for
|
|
31
|
+
# Space configurations. It serves as the single source of truth for:
|
|
32
|
+
# - Field definitions and types
|
|
33
|
+
# - Default values
|
|
34
|
+
# - Required fields
|
|
35
|
+
# - Validation constraints
|
|
36
|
+
# - Enum values for specific fields
|
|
37
|
+
#
|
|
38
|
+
# The schema supports two main configuration types:
|
|
39
|
+
# 1. Simple tool collections (e.g., literature-search.yaml) - minimal config
|
|
40
|
+
# 2. Complete workspaces (e.g., full-workspace.yaml) - full config with LLM
|
|
41
|
+
#
|
|
42
|
+
# Key features:
|
|
43
|
+
# - Automatic default value filling
|
|
44
|
+
# - Comprehensive validation rules
|
|
45
|
+
# - Support for nested objects and arrays
|
|
46
|
+
# - Flexible tool selection (by name, category, type)
|
|
47
|
+
# - LLM configuration with provider and model settings
|
|
48
|
+
# - Hook system for output processing
|
|
49
|
+
# - Environment variable requirements documentation
|
|
50
|
+
SPACE_SCHEMA = {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"properties": {
|
|
53
|
+
"name": {
|
|
54
|
+
"type": "string",
|
|
55
|
+
"description": "Space name - unique identifier for this configuration",
|
|
56
|
+
},
|
|
57
|
+
"version": {
|
|
58
|
+
"type": "string",
|
|
59
|
+
"default": "1.0.0",
|
|
60
|
+
"description": "Space version - follows semantic versioning "
|
|
61
|
+
"(e.g., 1.0.0, 1.2.3)",
|
|
62
|
+
},
|
|
63
|
+
"description": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"description": "Space description - explains what this "
|
|
66
|
+
"configuration does and its purpose",
|
|
67
|
+
},
|
|
68
|
+
"tags": {
|
|
69
|
+
"type": "array",
|
|
70
|
+
"items": {"type": "string"},
|
|
71
|
+
"default": [],
|
|
72
|
+
"description": "Space tags - keywords for categorization and "
|
|
73
|
+
'discovery (e.g., ["research", "biology", "literature"])',
|
|
74
|
+
},
|
|
75
|
+
"tools": {
|
|
76
|
+
"type": "object",
|
|
77
|
+
"properties": {
|
|
78
|
+
"include_tools": {
|
|
79
|
+
"type": "array",
|
|
80
|
+
"items": {"type": "string"},
|
|
81
|
+
"description": "Tools to include by exact name - most precise "
|
|
82
|
+
"way to select specific tools",
|
|
83
|
+
},
|
|
84
|
+
"categories": {
|
|
85
|
+
"type": "array",
|
|
86
|
+
"items": {"type": "string"},
|
|
87
|
+
"description": "Tool categories to include - broader selection "
|
|
88
|
+
'based on tool categories (e.g., ["literature", "clinical"])',
|
|
89
|
+
},
|
|
90
|
+
"exclude_tools": {
|
|
91
|
+
"type": "array",
|
|
92
|
+
"items": {"type": "string"},
|
|
93
|
+
"description": "Tools to exclude by exact name - removes "
|
|
94
|
+
"specific tools from the selection",
|
|
95
|
+
},
|
|
96
|
+
"include_tool_types": {
|
|
97
|
+
"type": "array",
|
|
98
|
+
"items": {"type": "string"},
|
|
99
|
+
"description": "Tool types to include - filter by tool type "
|
|
100
|
+
'(e.g., ["api", "local", "agentic"])',
|
|
101
|
+
},
|
|
102
|
+
"exclude_tool_types": {
|
|
103
|
+
"type": "array",
|
|
104
|
+
"items": {"type": "string"},
|
|
105
|
+
"description": "Tool types to exclude - removes tools of "
|
|
106
|
+
"specific types from the selection",
|
|
107
|
+
},
|
|
108
|
+
},
|
|
109
|
+
"additionalProperties": False,
|
|
110
|
+
"description": "Tool configuration - defines which tools to load "
|
|
111
|
+
"and how to filter them",
|
|
112
|
+
},
|
|
113
|
+
"llm_config": {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"properties": {
|
|
116
|
+
"mode": {
|
|
117
|
+
"type": "string",
|
|
118
|
+
"enum": ["default", "fallback"],
|
|
119
|
+
"default": "default",
|
|
120
|
+
"description": 'LLM configuration mode - "default" uses this '
|
|
121
|
+
'config as primary, "fallback" uses as backup '
|
|
122
|
+
"when primary fails",
|
|
123
|
+
},
|
|
124
|
+
"default_provider": {
|
|
125
|
+
"type": "string",
|
|
126
|
+
"description": "Default LLM provider - must match AgenticTool "
|
|
127
|
+
"API types (CHATGPT, GEMINI, OPENROUTER, VLLM, etc.)",
|
|
128
|
+
},
|
|
129
|
+
"models": {
|
|
130
|
+
"type": "object",
|
|
131
|
+
"additionalProperties": {"type": "string"},
|
|
132
|
+
"description": "Task-specific model mappings - maps task names "
|
|
133
|
+
'to model IDs (e.g., {"default": "gpt-4o", '
|
|
134
|
+
'"analysis": "gpt-4-turbo"})',
|
|
135
|
+
},
|
|
136
|
+
"temperature": {
|
|
137
|
+
"type": "number",
|
|
138
|
+
"minimum": 0,
|
|
139
|
+
"maximum": 2,
|
|
140
|
+
"description": "LLM temperature - controls randomness in "
|
|
141
|
+
"responses (0.0 = deterministic, 2.0 = very random)",
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
"additionalProperties": False,
|
|
145
|
+
"description": "LLM configuration - settings for AI-powered tools "
|
|
146
|
+
"(AgenticTool) - only needed for complete workspaces",
|
|
147
|
+
},
|
|
148
|
+
"hooks": {
|
|
149
|
+
"type": "array",
|
|
150
|
+
"items": {
|
|
151
|
+
"type": "object",
|
|
152
|
+
"properties": {
|
|
153
|
+
"type": {
|
|
154
|
+
"type": "string",
|
|
155
|
+
"description": "Hook type - identifies the hook implementation "
|
|
156
|
+
'(e.g., "output_summarization", "file_save")',
|
|
157
|
+
},
|
|
158
|
+
"enabled": {
|
|
159
|
+
"type": "boolean",
|
|
160
|
+
"default": True,
|
|
161
|
+
"description": "Whether hook is enabled - allows enabling/"
|
|
162
|
+
"disabling hooks without removing them",
|
|
163
|
+
},
|
|
164
|
+
"config": {
|
|
165
|
+
"type": "object",
|
|
166
|
+
"description": "Hook configuration - specific settings for "
|
|
167
|
+
"this hook instance",
|
|
168
|
+
},
|
|
169
|
+
},
|
|
170
|
+
"required": ["type"],
|
|
171
|
+
"additionalProperties": False,
|
|
172
|
+
},
|
|
173
|
+
"description": "Hook configurations - post-processing functions for "
|
|
174
|
+
"tool outputs (e.g., summarization, file saving)",
|
|
175
|
+
},
|
|
176
|
+
"required_env": {
|
|
177
|
+
"type": "array",
|
|
178
|
+
"items": {"type": "string"},
|
|
179
|
+
"description": "Required environment variables - documents which "
|
|
180
|
+
"environment variables should be set (for documentation "
|
|
181
|
+
"purposes only)",
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
"required": ["name", "version"],
|
|
185
|
+
"additionalProperties": False,
|
|
186
|
+
"description": "Space Configuration Schema - defines the structure for "
|
|
187
|
+
"Space YAML configuration files",
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class ValidationError(Exception):
|
|
192
|
+
"""Raised when configuration validation fails."""
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def validate_space_config(config: Dict[str, Any]) -> Tuple[bool, List[str]]:
|
|
196
|
+
"""
|
|
197
|
+
Validate a Space configuration using JSON Schema.
|
|
198
|
+
|
|
199
|
+
This is a legacy function that now uses the JSON Schema validation system.
|
|
200
|
+
For new code, use validate_with_schema() instead.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
config: Configuration dictionary
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Tuple of (is_valid, list_of_errors)
|
|
207
|
+
"""
|
|
208
|
+
# Convert dict to YAML string for validation
|
|
209
|
+
yaml_content = yaml.dump(config, default_flow_style=False, allow_unicode=True)
|
|
210
|
+
is_valid, errors, _ = validate_with_schema(yaml_content, fill_defaults_flag=False)
|
|
211
|
+
return is_valid, errors
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def validate_yaml_format_by_template(yaml_content: str) -> Tuple[bool, List[str]]:
|
|
215
|
+
"""
|
|
216
|
+
Validate YAML format by comparing against default template format.
|
|
217
|
+
|
|
218
|
+
This method uses the JSON Schema as a reference to validate
|
|
219
|
+
the structure and content of Space YAML configurations.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
yaml_content: YAML content string
|
|
223
|
+
|
|
224
|
+
Returns:
|
|
225
|
+
Tuple of (is_valid, list_of_errors)
|
|
226
|
+
"""
|
|
227
|
+
# Use the new JSON Schema validation instead
|
|
228
|
+
is_valid, errors, _ = validate_with_schema(yaml_content, fill_defaults_flag=False)
|
|
229
|
+
return is_valid, errors
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def validate_yaml_file(file_path: str) -> Tuple[bool, List[str]]:
|
|
233
|
+
"""
|
|
234
|
+
Validate a YAML file by comparing against default template format.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
file_path: Path to YAML file
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
Tuple of (is_valid, list_of_errors)
|
|
241
|
+
"""
|
|
242
|
+
try:
|
|
243
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
244
|
+
yaml_content = f.read()
|
|
245
|
+
return validate_yaml_format_by_template(yaml_content)
|
|
246
|
+
except FileNotFoundError:
|
|
247
|
+
return False, [f"File not found: {file_path}"]
|
|
248
|
+
except Exception as e:
|
|
249
|
+
return False, [f"Error reading file: {e}"]
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def fill_defaults(data: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]:
|
|
253
|
+
"""
|
|
254
|
+
Recursively fill default values from JSON schema.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
data: Configuration data
|
|
258
|
+
schema: JSON schema with default values
|
|
259
|
+
|
|
260
|
+
Returns:
|
|
261
|
+
Configuration with default values filled
|
|
262
|
+
"""
|
|
263
|
+
if not isinstance(data, dict) or not isinstance(schema, dict):
|
|
264
|
+
return data
|
|
265
|
+
|
|
266
|
+
result = data.copy()
|
|
267
|
+
|
|
268
|
+
for key, value in schema.get("properties", {}).items():
|
|
269
|
+
if key not in result and "default" in value:
|
|
270
|
+
result[key] = value["default"]
|
|
271
|
+
elif key in result and isinstance(value, dict) and "properties" in value:
|
|
272
|
+
result[key] = fill_defaults(result[key], value)
|
|
273
|
+
elif (
|
|
274
|
+
key in result
|
|
275
|
+
and isinstance(value, dict)
|
|
276
|
+
and value.get("type") == "array"
|
|
277
|
+
and "items" in value
|
|
278
|
+
):
|
|
279
|
+
if isinstance(result[key], list) and value["items"].get("type") == "object":
|
|
280
|
+
result[key] = [
|
|
281
|
+
fill_defaults(item, value["items"]) for item in result[key]
|
|
282
|
+
]
|
|
283
|
+
|
|
284
|
+
return result
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def validate_with_schema(
|
|
288
|
+
yaml_content: str, fill_defaults_flag: bool = True
|
|
289
|
+
) -> Tuple[bool, List[str], Dict[str, Any]]:
|
|
290
|
+
"""
|
|
291
|
+
Validate YAML content using JSON Schema and optionally fill default values.
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
yaml_content: YAML content string
|
|
295
|
+
fill_defaults_flag: Whether to fill default values
|
|
296
|
+
|
|
297
|
+
Returns:
|
|
298
|
+
Tuple of (is_valid, list_of_errors, processed_config)
|
|
299
|
+
"""
|
|
300
|
+
errors = []
|
|
301
|
+
|
|
302
|
+
try:
|
|
303
|
+
# Parse YAML
|
|
304
|
+
config = yaml.safe_load(yaml_content)
|
|
305
|
+
if not isinstance(config, dict):
|
|
306
|
+
return False, ["YAML content must be a dictionary"], {}
|
|
307
|
+
|
|
308
|
+
# Fill default values if requested
|
|
309
|
+
if fill_defaults_flag:
|
|
310
|
+
config = fill_defaults(config, SPACE_SCHEMA)
|
|
311
|
+
|
|
312
|
+
# Validate against schema
|
|
313
|
+
validate(instance=config, schema=SPACE_SCHEMA)
|
|
314
|
+
|
|
315
|
+
return True, [], config
|
|
316
|
+
|
|
317
|
+
except yaml.YAMLError as e:
|
|
318
|
+
return False, [f"YAML parsing error: {e}"], {}
|
|
319
|
+
except jsonschema.ValidationError as e:
|
|
320
|
+
return (
|
|
321
|
+
False,
|
|
322
|
+
[f"Schema validation error: {e.message}"],
|
|
323
|
+
(config if "config" in locals() else {}),
|
|
324
|
+
)
|
|
325
|
+
except Exception as e:
|
|
326
|
+
return (
|
|
327
|
+
False,
|
|
328
|
+
[f"Validation error: {e}"],
|
|
329
|
+
(config if "config" in locals() else {}),
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def validate_yaml_file_with_schema(
|
|
334
|
+
file_path: str, fill_defaults_flag: bool = True
|
|
335
|
+
) -> Tuple[bool, List[str], Dict[str, Any]]:
|
|
336
|
+
"""
|
|
337
|
+
Validate a YAML file using JSON Schema and optionally fill default values.
|
|
338
|
+
|
|
339
|
+
Args:
|
|
340
|
+
file_path: Path to YAML file
|
|
341
|
+
fill_defaults_flag: Whether to fill default values
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Tuple of (is_valid, list_of_errors, processed_config)
|
|
345
|
+
"""
|
|
346
|
+
try:
|
|
347
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
348
|
+
yaml_content = f.read()
|
|
349
|
+
return validate_with_schema(yaml_content, fill_defaults_flag)
|
|
350
|
+
except FileNotFoundError:
|
|
351
|
+
return False, [f"File not found: {file_path}"], {}
|
|
352
|
+
except Exception as e:
|
|
353
|
+
return False, [f"Error reading file: {e}"], {}
|
|
@@ -161,7 +161,7 @@ class ToolFinderEmbedding(BaseTool):
|
|
|
161
161
|
query (str): User query or description of desired functionality
|
|
162
162
|
top_k (int, optional): Number of top tools to return. Defaults to 5.
|
|
163
163
|
|
|
164
|
-
Returns
|
|
164
|
+
Returns
|
|
165
165
|
list: List of top-k tool names ranked by relevance to the query
|
|
166
166
|
|
|
167
167
|
Raises:
|
|
@@ -203,7 +203,7 @@ class ToolFinderEmbedding(BaseTool):
|
|
|
203
203
|
return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False.
|
|
204
204
|
categories (list, optional): List of tool categories to filter by. Currently not implemented for embedding-based search.
|
|
205
205
|
|
|
206
|
-
Returns
|
|
206
|
+
Returns
|
|
207
207
|
str or tuple:
|
|
208
208
|
- If return_call_result is False: Tool prompts as a formatted string
|
|
209
209
|
- If return_call_result is True: Tuple of (tool_prompts, tool_names)
|
|
@@ -225,7 +225,9 @@ class ToolFinderEmbedding(BaseTool):
|
|
|
225
225
|
picked_tool_names_no_special = picked_tool_names_no_special[:rag_num]
|
|
226
226
|
picked_tool_names = picked_tool_names_no_special[:rag_num]
|
|
227
227
|
|
|
228
|
-
picked_tools = self.tooluniverse.
|
|
228
|
+
picked_tools = self.tooluniverse.get_tool_specification_by_names(
|
|
229
|
+
picked_tool_names
|
|
230
|
+
)
|
|
229
231
|
picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
|
|
230
232
|
if return_call_result:
|
|
231
233
|
return picked_tools_prompt, picked_tool_names
|
|
@@ -182,7 +182,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
182
182
|
Args:
|
|
183
183
|
text (str): Input text to tokenize
|
|
184
184
|
|
|
185
|
-
Returns
|
|
185
|
+
Returns
|
|
186
186
|
List[str]: List of processed tokens
|
|
187
187
|
"""
|
|
188
188
|
if not text:
|
|
@@ -210,7 +210,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
210
210
|
Args:
|
|
211
211
|
word (str): Word to stem
|
|
212
212
|
|
|
213
|
-
Returns
|
|
213
|
+
Returns
|
|
214
214
|
str: Stemmed word
|
|
215
215
|
"""
|
|
216
216
|
if len(word) <= 3:
|
|
@@ -232,7 +232,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
232
232
|
tokens (List[str]): Tokenized words
|
|
233
233
|
max_phrase_length (int): Maximum length of phrases to extract
|
|
234
234
|
|
|
235
|
-
Returns
|
|
235
|
+
Returns
|
|
236
236
|
List[str]: List of phrases and individual tokens
|
|
237
237
|
"""
|
|
238
238
|
phrases = []
|
|
@@ -305,7 +305,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
305
305
|
Args:
|
|
306
306
|
parameter_schema (Dict): Tool parameter schema
|
|
307
307
|
|
|
308
|
-
Returns
|
|
308
|
+
Returns
|
|
309
309
|
List[str]: List of text elements from parameters
|
|
310
310
|
"""
|
|
311
311
|
text_elements = []
|
|
@@ -329,7 +329,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
329
329
|
query_terms (List[str]): Processed query terms and phrases
|
|
330
330
|
tool_name (str): Name of the tool to score
|
|
331
331
|
|
|
332
|
-
Returns
|
|
332
|
+
Returns
|
|
333
333
|
float: TF-IDF relevance score
|
|
334
334
|
"""
|
|
335
335
|
if tool_name not in self._tool_index:
|
|
@@ -364,7 +364,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
364
364
|
query (str): Original query string
|
|
365
365
|
tool (Dict): Tool configuration
|
|
366
366
|
|
|
367
|
-
Returns
|
|
367
|
+
Returns
|
|
368
368
|
float: Exact match bonus score
|
|
369
369
|
"""
|
|
370
370
|
query_lower = query.lower()
|
|
@@ -414,7 +414,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
414
414
|
return_call_result (bool, optional): If True, returns both prompts and tool names. Defaults to False.
|
|
415
415
|
categories (list, optional): List of tool categories to filter by.
|
|
416
416
|
|
|
417
|
-
Returns
|
|
417
|
+
Returns
|
|
418
418
|
str or tuple:
|
|
419
419
|
- If return_call_result is False: Tool prompts as a formatted string
|
|
420
420
|
- If return_call_result is True: Tuple of (tool_prompts, tool_names)
|
|
@@ -451,7 +451,9 @@ class ToolFinderKeyword(BaseTool):
|
|
|
451
451
|
picked_tool_names = picked_tool_names_no_special[:rag_num]
|
|
452
452
|
|
|
453
453
|
# Get tool objects and prepare prompts (matching original behavior)
|
|
454
|
-
picked_tools = self.tooluniverse.
|
|
454
|
+
picked_tools = self.tooluniverse.get_tool_specification_by_names(
|
|
455
|
+
picked_tool_names
|
|
456
|
+
)
|
|
455
457
|
picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
|
|
456
458
|
|
|
457
459
|
if return_call_result:
|
|
@@ -472,7 +474,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
472
474
|
- picked_tool_names (list, optional): Pre-selected tool names to process
|
|
473
475
|
- return_call_result (bool, optional): Whether to return both prompts and names. Defaults to False.
|
|
474
476
|
|
|
475
|
-
Returns
|
|
477
|
+
Returns
|
|
476
478
|
str or tuple:
|
|
477
479
|
- If return_call_result is False: Tool prompts as a formatted string
|
|
478
480
|
- If return_call_result is True: Tuple of (tool_prompts, tool_names)
|
|
@@ -504,7 +506,7 @@ class ToolFinderKeyword(BaseTool):
|
|
|
504
506
|
Args:
|
|
505
507
|
arguments (dict): Search arguments
|
|
506
508
|
|
|
507
|
-
Returns
|
|
509
|
+
Returns
|
|
508
510
|
str: JSON string containing search results with relevance scores
|
|
509
511
|
"""
|
|
510
512
|
try:
|
tooluniverse/tool_finder_llm.py
CHANGED
|
@@ -167,7 +167,7 @@ Requirements:
|
|
|
167
167
|
Args:
|
|
168
168
|
force_refresh (bool): Whether to force refresh the cache
|
|
169
169
|
|
|
170
|
-
Returns
|
|
170
|
+
Returns
|
|
171
171
|
list: List of tool dictionaries with names and descriptions
|
|
172
172
|
"""
|
|
173
173
|
current_time = datetime.now()
|
|
@@ -220,7 +220,7 @@ Requirements:
|
|
|
220
220
|
query (str): User query
|
|
221
221
|
max_tools (int): Maximum number of tools to send to LLM
|
|
222
222
|
|
|
223
|
-
Returns
|
|
223
|
+
Returns
|
|
224
224
|
list: Filtered list of tools
|
|
225
225
|
"""
|
|
226
226
|
if len(available_tools) <= max_tools:
|
|
@@ -269,7 +269,7 @@ Requirements:
|
|
|
269
269
|
Args:
|
|
270
270
|
tools (list): List of tool dictionaries
|
|
271
271
|
|
|
272
|
-
Returns
|
|
272
|
+
Returns
|
|
273
273
|
str: Compact formatted tool descriptions for the prompt
|
|
274
274
|
"""
|
|
275
275
|
formatted_tools = []
|
|
@@ -296,7 +296,7 @@ Requirements:
|
|
|
296
296
|
include_reasoning (bool): Whether to include selection reasoning
|
|
297
297
|
categories (list, optional): List of tool categories to filter by
|
|
298
298
|
|
|
299
|
-
Returns
|
|
299
|
+
Returns
|
|
300
300
|
dict: Dictionary containing selected tools and metadata
|
|
301
301
|
"""
|
|
302
302
|
try:
|
|
@@ -387,7 +387,9 @@ Requirements:
|
|
|
387
387
|
|
|
388
388
|
# Get actual tool objects
|
|
389
389
|
if tool_names:
|
|
390
|
-
selected_tool_objects =
|
|
390
|
+
selected_tool_objects = (
|
|
391
|
+
self.tooluniverse.get_tool_specification_by_names(tool_names)
|
|
392
|
+
)
|
|
391
393
|
tool_prompts = self.tooluniverse.prepare_tool_prompts(
|
|
392
394
|
selected_tool_objects
|
|
393
395
|
)
|
|
@@ -452,7 +454,7 @@ Requirements:
|
|
|
452
454
|
categories (list, optional): List of tool categories to filter by. Applied before LLM selection.
|
|
453
455
|
return_list_only (bool, optional): If True, returns only a list of tool specifications. Overrides other return options.
|
|
454
456
|
|
|
455
|
-
Returns
|
|
457
|
+
Returns
|
|
456
458
|
str, tuple, or list:
|
|
457
459
|
- If return_list_only is True: List of tool specifications
|
|
458
460
|
- If return_call_result is False: Tool prompts as a formatted string
|
|
@@ -495,7 +497,9 @@ Requirements:
|
|
|
495
497
|
picked_tool_names = picked_tool_names_no_special[:rag_num]
|
|
496
498
|
|
|
497
499
|
# Get tool objects and prepare prompts (needed for both list and other formats)
|
|
498
|
-
picked_tools = self.tooluniverse.
|
|
500
|
+
picked_tools = self.tooluniverse.get_tool_specification_by_names(
|
|
501
|
+
picked_tool_names
|
|
502
|
+
)
|
|
499
503
|
picked_tools_prompt = self.tooluniverse.prepare_tool_prompts(picked_tools)
|
|
500
504
|
|
|
501
505
|
# If only list format is requested, return the tool specifications as a list
|
|
@@ -532,7 +536,7 @@ Requirements:
|
|
|
532
536
|
categories: Requested categories filter
|
|
533
537
|
return_call_result: Whether return_call_result was True
|
|
534
538
|
|
|
535
|
-
Returns
|
|
539
|
+
Returns
|
|
536
540
|
str: JSON formatted search results
|
|
537
541
|
"""
|
|
538
542
|
import json
|
|
@@ -1,39 +1,36 @@
|
|
|
1
1
|
"""
|
|
2
|
-
|
|
2
|
+
BLAST_nucleotide_search
|
|
3
3
|
|
|
4
|
-
|
|
4
|
+
Search nucleotide sequences using NCBI BLAST blastn against nucleotide databases
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
from typing import Any, Optional, Callable
|
|
8
8
|
from ._shared_client import get_shared_client
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
track: Optional[str] = "knownGene",
|
|
11
|
+
def BLAST_nucleotide_search(
|
|
12
|
+
sequence: str,
|
|
13
|
+
database: Optional[str] = "nt",
|
|
14
|
+
expect: Optional[float] = 10.0,
|
|
15
|
+
hitlist_size: Optional[int] = 50,
|
|
17
16
|
*,
|
|
18
17
|
stream_callback: Optional[Callable[[str], None]] = None,
|
|
19
18
|
use_cache: bool = False,
|
|
20
19
|
validate: bool = True,
|
|
21
20
|
) -> dict[str, Any]:
|
|
22
21
|
"""
|
|
23
|
-
|
|
22
|
+
Search nucleotide sequences using NCBI BLAST blastn against nucleotide databases
|
|
24
23
|
|
|
25
24
|
Parameters
|
|
26
25
|
----------
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
track : str
|
|
36
|
-
Track name.
|
|
26
|
+
sequence : str
|
|
27
|
+
DNA sequence to search
|
|
28
|
+
database : str
|
|
29
|
+
Database (nt, est, etc.)
|
|
30
|
+
expect : float
|
|
31
|
+
E-value threshold
|
|
32
|
+
hitlist_size : int
|
|
33
|
+
Max hits to return
|
|
37
34
|
stream_callback : Callable, optional
|
|
38
35
|
Callback for streaming output
|
|
39
36
|
use_cache : bool, default False
|
|
@@ -49,13 +46,12 @@ def UCSC_get_genes_by_region(
|
|
|
49
46
|
|
|
50
47
|
return get_shared_client().run_one_function(
|
|
51
48
|
{
|
|
52
|
-
"name": "
|
|
49
|
+
"name": "BLAST_nucleotide_search",
|
|
53
50
|
"arguments": {
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"track": track,
|
|
51
|
+
"sequence": sequence,
|
|
52
|
+
"database": database,
|
|
53
|
+
"expect": expect,
|
|
54
|
+
"hitlist_size": hitlist_size,
|
|
59
55
|
},
|
|
60
56
|
},
|
|
61
57
|
stream_callback=stream_callback,
|
|
@@ -64,4 +60,4 @@ def UCSC_get_genes_by_region(
|
|
|
64
60
|
)
|
|
65
61
|
|
|
66
62
|
|
|
67
|
-
__all__ = ["
|
|
63
|
+
__all__ = ["BLAST_nucleotide_search"]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BLAST_protein_search
|
|
3
|
+
|
|
4
|
+
Search protein sequences using NCBI BLAST blastp against protein databases
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from typing import Any, Optional, Callable
|
|
8
|
+
from ._shared_client import get_shared_client
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def BLAST_protein_search(
|
|
12
|
+
sequence: str,
|
|
13
|
+
database: Optional[str] = "nr",
|
|
14
|
+
expect: Optional[float] = 10.0,
|
|
15
|
+
hitlist_size: Optional[int] = 50,
|
|
16
|
+
*,
|
|
17
|
+
stream_callback: Optional[Callable[[str], None]] = None,
|
|
18
|
+
use_cache: bool = False,
|
|
19
|
+
validate: bool = True,
|
|
20
|
+
) -> dict[str, Any]:
|
|
21
|
+
"""
|
|
22
|
+
Search protein sequences using NCBI BLAST blastp against protein databases
|
|
23
|
+
|
|
24
|
+
Parameters
|
|
25
|
+
----------
|
|
26
|
+
sequence : str
|
|
27
|
+
Protein sequence to search
|
|
28
|
+
database : str
|
|
29
|
+
Database (nr, swissprot, etc.)
|
|
30
|
+
expect : float
|
|
31
|
+
|
|
32
|
+
hitlist_size : int
|
|
33
|
+
|
|
34
|
+
stream_callback : Callable, optional
|
|
35
|
+
Callback for streaming output
|
|
36
|
+
use_cache : bool, default False
|
|
37
|
+
Enable caching
|
|
38
|
+
validate : bool, default True
|
|
39
|
+
Validate parameters
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
dict[str, Any]
|
|
44
|
+
"""
|
|
45
|
+
# Handle mutable defaults to avoid B006 linting error
|
|
46
|
+
|
|
47
|
+
return get_shared_client().run_one_function(
|
|
48
|
+
{
|
|
49
|
+
"name": "BLAST_protein_search",
|
|
50
|
+
"arguments": {
|
|
51
|
+
"sequence": sequence,
|
|
52
|
+
"database": database,
|
|
53
|
+
"expect": expect,
|
|
54
|
+
"hitlist_size": hitlist_size,
|
|
55
|
+
},
|
|
56
|
+
},
|
|
57
|
+
stream_callback=stream_callback,
|
|
58
|
+
use_cache=use_cache,
|
|
59
|
+
validate=validate,
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
__all__ = ["BLAST_protein_search"]
|