tooluniverse 1.0.10__py3-none-any.whl → 1.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (150) hide show
  1. tooluniverse/__init__.py +57 -1
  2. tooluniverse/blast_tool.py +132 -0
  3. tooluniverse/boltz_tool.py +2 -2
  4. tooluniverse/cbioportal_tool.py +42 -0
  5. tooluniverse/clinvar_tool.py +268 -74
  6. tooluniverse/compose_scripts/tool_discover.py +1941 -443
  7. tooluniverse/data/agentic_tools.json +0 -370
  8. tooluniverse/data/alphafold_tools.json +6 -6
  9. tooluniverse/data/blast_tools.json +112 -0
  10. tooluniverse/data/cbioportal_tools.json +87 -0
  11. tooluniverse/data/clinvar_tools.json +235 -0
  12. tooluniverse/data/compose_tools.json +0 -89
  13. tooluniverse/data/dbsnp_tools.json +275 -0
  14. tooluniverse/data/emdb_tools.json +61 -0
  15. tooluniverse/data/ensembl_tools.json +259 -0
  16. tooluniverse/data/file_download_tools.json +275 -0
  17. tooluniverse/data/geo_tools.json +200 -48
  18. tooluniverse/data/gnomad_tools.json +109 -0
  19. tooluniverse/data/gtopdb_tools.json +68 -0
  20. tooluniverse/data/gwas_tools.json +32 -0
  21. tooluniverse/data/interpro_tools.json +199 -0
  22. tooluniverse/data/jaspar_tools.json +70 -0
  23. tooluniverse/data/kegg_tools.json +356 -0
  24. tooluniverse/data/mpd_tools.json +87 -0
  25. tooluniverse/data/ols_tools.json +314 -0
  26. tooluniverse/data/package_discovery_tools.json +64 -0
  27. tooluniverse/data/packages/categorized_tools.txt +0 -1
  28. tooluniverse/data/packages/machine_learning_tools.json +0 -47
  29. tooluniverse/data/paleobiology_tools.json +91 -0
  30. tooluniverse/data/pride_tools.json +62 -0
  31. tooluniverse/data/pypi_package_inspector_tools.json +158 -0
  32. tooluniverse/data/python_executor_tools.json +341 -0
  33. tooluniverse/data/regulomedb_tools.json +50 -0
  34. tooluniverse/data/remap_tools.json +89 -0
  35. tooluniverse/data/screen_tools.json +89 -0
  36. tooluniverse/data/tool_discovery_agents.json +428 -0
  37. tooluniverse/data/tool_discovery_agents.json.backup +1343 -0
  38. tooluniverse/data/uniprot_tools.json +77 -0
  39. tooluniverse/data/web_search_tools.json +250 -0
  40. tooluniverse/data/worms_tools.json +55 -0
  41. tooluniverse/dbsnp_tool.py +196 -58
  42. tooluniverse/default_config.py +35 -2
  43. tooluniverse/emdb_tool.py +30 -0
  44. tooluniverse/ensembl_tool.py +140 -47
  45. tooluniverse/execute_function.py +74 -14
  46. tooluniverse/file_download_tool.py +269 -0
  47. tooluniverse/geo_tool.py +81 -28
  48. tooluniverse/gnomad_tool.py +100 -52
  49. tooluniverse/gtopdb_tool.py +41 -0
  50. tooluniverse/interpro_tool.py +72 -0
  51. tooluniverse/jaspar_tool.py +30 -0
  52. tooluniverse/kegg_tool.py +230 -0
  53. tooluniverse/mpd_tool.py +42 -0
  54. tooluniverse/ncbi_eutils_tool.py +96 -0
  55. tooluniverse/ols_tool.py +435 -0
  56. tooluniverse/package_discovery_tool.py +217 -0
  57. tooluniverse/paleobiology_tool.py +30 -0
  58. tooluniverse/pride_tool.py +30 -0
  59. tooluniverse/pypi_package_inspector_tool.py +593 -0
  60. tooluniverse/python_executor_tool.py +711 -0
  61. tooluniverse/regulomedb_tool.py +30 -0
  62. tooluniverse/remap_tool.py +44 -0
  63. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +1 -1
  64. tooluniverse/screen_tool.py +44 -0
  65. tooluniverse/smcp_server.py +3 -3
  66. tooluniverse/tool_finder_embedding.py +3 -1
  67. tooluniverse/tool_finder_keyword.py +3 -1
  68. tooluniverse/tool_finder_llm.py +6 -2
  69. tooluniverse/tools/{UCSC_get_genes_by_region.py → BLAST_nucleotide_search.py} +22 -26
  70. tooluniverse/tools/BLAST_protein_search.py +63 -0
  71. tooluniverse/tools/ClinVar_search_variants.py +26 -15
  72. tooluniverse/tools/CodeQualityAnalyzer.py +3 -3
  73. tooluniverse/tools/EMDB_get_structure.py +46 -0
  74. tooluniverse/tools/GtoPdb_get_targets.py +52 -0
  75. tooluniverse/tools/InterPro_get_domain_details.py +46 -0
  76. tooluniverse/tools/InterPro_get_protein_domains.py +49 -0
  77. tooluniverse/tools/InterPro_search_domains.py +52 -0
  78. tooluniverse/tools/JASPAR_get_transcription_factors.py +52 -0
  79. tooluniverse/tools/MPD_get_phenotype_data.py +59 -0
  80. tooluniverse/tools/PRIDE_search_proteomics.py +52 -0
  81. tooluniverse/tools/PackageAnalyzer.py +55 -0
  82. tooluniverse/tools/Paleobiology_get_fossils.py +52 -0
  83. tooluniverse/tools/PyPIPackageInspector.py +59 -0
  84. tooluniverse/tools/ReMap_get_transcription_factor_binding.py +59 -0
  85. tooluniverse/tools/ReferenceInfoAnalyzer.py +55 -0
  86. tooluniverse/tools/RegulomeDB_query_variant.py +46 -0
  87. tooluniverse/tools/SCREEN_get_regulatory_elements.py +59 -0
  88. tooluniverse/tools/{ArgumentDescriptionOptimizer.py → TestResultsAnalyzer.py} +13 -13
  89. tooluniverse/tools/ToolDiscover.py +11 -11
  90. tooluniverse/tools/UniProt_id_mapping.py +63 -0
  91. tooluniverse/tools/UniProt_search.py +63 -0
  92. tooluniverse/tools/UnifiedToolGenerator.py +59 -0
  93. tooluniverse/tools/WoRMS_search_species.py +49 -0
  94. tooluniverse/tools/XMLToolOptimizer.py +55 -0
  95. tooluniverse/tools/__init__.py +119 -29
  96. tooluniverse/tools/alphafold_get_annotations.py +3 -3
  97. tooluniverse/tools/alphafold_get_prediction.py +3 -3
  98. tooluniverse/tools/alphafold_get_summary.py +3 -3
  99. tooluniverse/tools/cBioPortal_get_cancer_studies.py +46 -0
  100. tooluniverse/tools/cBioPortal_get_mutations.py +52 -0
  101. tooluniverse/tools/{gnomAD_query_variant.py → clinvar_get_clinical_significance.py} +8 -11
  102. tooluniverse/tools/clinvar_get_variant_details.py +49 -0
  103. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +7 -7
  104. tooluniverse/tools/dbsnp_get_frequencies.py +46 -0
  105. tooluniverse/tools/dbsnp_search_by_gene.py +52 -0
  106. tooluniverse/tools/download_binary_file.py +66 -0
  107. tooluniverse/tools/download_file.py +71 -0
  108. tooluniverse/tools/download_text_content.py +55 -0
  109. tooluniverse/tools/dynamic_package_discovery.py +59 -0
  110. tooluniverse/tools/ensembl_get_sequence.py +52 -0
  111. tooluniverse/tools/{Ensembl_lookup_gene_by_symbol.py → ensembl_get_variants.py} +11 -11
  112. tooluniverse/tools/ensembl_lookup_gene.py +46 -0
  113. tooluniverse/tools/geo_get_dataset_info.py +46 -0
  114. tooluniverse/tools/geo_get_sample_info.py +46 -0
  115. tooluniverse/tools/geo_search_datasets.py +67 -0
  116. tooluniverse/tools/gnomad_get_gene_constraints.py +49 -0
  117. tooluniverse/tools/kegg_find_genes.py +52 -0
  118. tooluniverse/tools/kegg_get_gene_info.py +46 -0
  119. tooluniverse/tools/kegg_get_pathway_info.py +46 -0
  120. tooluniverse/tools/kegg_list_organisms.py +44 -0
  121. tooluniverse/tools/kegg_search_pathway.py +46 -0
  122. tooluniverse/tools/ols_find_similar_terms.py +63 -0
  123. tooluniverse/tools/{get_hyperopt_info.py → ols_get_ontology_info.py} +13 -10
  124. tooluniverse/tools/ols_get_term_ancestors.py +67 -0
  125. tooluniverse/tools/ols_get_term_children.py +67 -0
  126. tooluniverse/tools/{TestCaseGenerator.py → ols_get_term_info.py} +12 -9
  127. tooluniverse/tools/{CodeOptimizer.py → ols_search_ontologies.py} +22 -14
  128. tooluniverse/tools/ols_search_terms.py +71 -0
  129. tooluniverse/tools/python_code_executor.py +79 -0
  130. tooluniverse/tools/python_script_runner.py +79 -0
  131. tooluniverse/tools/web_api_documentation_search.py +63 -0
  132. tooluniverse/tools/web_search.py +71 -0
  133. tooluniverse/uniprot_tool.py +219 -16
  134. tooluniverse/url_tool.py +18 -0
  135. tooluniverse/utils.py +2 -2
  136. tooluniverse/web_search_tool.py +229 -0
  137. tooluniverse/worms_tool.py +64 -0
  138. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/METADATA +3 -2
  139. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/RECORD +143 -54
  140. tooluniverse/data/genomics_tools.json +0 -174
  141. tooluniverse/tools/ToolDescriptionOptimizer.py +0 -67
  142. tooluniverse/tools/ToolImplementationGenerator.py +0 -67
  143. tooluniverse/tools/ToolOptimizer.py +0 -59
  144. tooluniverse/tools/ToolSpecificationGenerator.py +0 -67
  145. tooluniverse/tools/ToolSpecificationOptimizer.py +0 -63
  146. tooluniverse/ucsc_tool.py +0 -60
  147. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/WHEEL +0 -0
  148. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/entry_points.txt +0 -0
  149. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/licenses/LICENSE +0 -0
  150. {tooluniverse-1.0.10.dist-info → tooluniverse-1.0.11.dist-info}/top_level.txt +0 -0
@@ -207,5 +207,82 @@
207
207
  "output_description": "Returns a list containing all isoform ID strings."
208
208
  },
209
209
  "type": "UniProtRESTTool"
210
+ },
211
+ {
212
+ "name": "UniProt_search",
213
+ "description": "Search UniProtKB database using flexible query syntax. Supports gene names (e.g., 'gene:TP53'), protein names, organism filters (e.g., 'organism:9606'), and complex queries using AND/OR operators. Returns matching proteins with accession numbers and key information. Use this to find UniProt accessions from gene or protein names.",
214
+ "parameter": {
215
+ "type": "object",
216
+ "properties": {
217
+ "query": {
218
+ "type": "string",
219
+ "description": "Search query. Can be simple (e.g., 'MEIOB') or advanced ('gene:TP53 AND organism:9606'). Keywords: gene, protein_name, organism, reviewed, etc."
220
+ },
221
+ "organism": {
222
+ "type": "string",
223
+ "description": "Optional organism filter. Can use common name ('human') or taxonomy ID ('9606'). Automatically combined with query using AND."
224
+ },
225
+ "limit": {
226
+ "type": "integer",
227
+ "description": "Maximum number of results to return (default: 25, max: 500)"
228
+ },
229
+ "fields": {
230
+ "type": "array",
231
+ "items": {
232
+ "type": "string"
233
+ },
234
+ "description": "Optional list of fields to return. Default returns: accession, id, protein_name, gene_names, organism, length"
235
+ }
236
+ },
237
+ "required": ["query"]
238
+ },
239
+ "fields": {
240
+ "endpoint": "https://rest.uniprot.org/uniprotkb/search",
241
+ "search_type": "search",
242
+ "format": "json"
243
+ },
244
+ "type": "UniProtRESTTool"
245
+ },
246
+ {
247
+ "name": "UniProt_id_mapping",
248
+ "description": "Map IDs between different databases (e.g., Ensembl to UniProt, Gene Name to UniProt). Supports batch mapping and async task processing. Use this to convert between different database identifiers.",
249
+ "parameter": {
250
+ "type": "object",
251
+ "properties": {
252
+ "ids": {
253
+ "oneOf": [
254
+ {
255
+ "type": "array",
256
+ "items": {
257
+ "type": "string"
258
+ }
259
+ },
260
+ {
261
+ "type": "string"
262
+ }
263
+ ],
264
+ "description": "ID(s) to map. Can be single string or array of strings, e.g., 'ENSG00000141510' or ['MEIOB', 'TP53']"
265
+ },
266
+ "from_db": {
267
+ "type": "string",
268
+ "description": "Source database. Examples: 'Ensembl', 'Gene_Name', 'RefSeq_Protein', 'PDB', 'EMBL'"
269
+ },
270
+ "to_db": {
271
+ "type": "string",
272
+ "description": "Target database (default: 'UniProtKB')"
273
+ },
274
+ "max_wait_time": {
275
+ "type": "integer",
276
+ "description": "Maximum time to wait for async task completion in seconds (default: 30)"
277
+ }
278
+ },
279
+ "required": ["ids", "from_db"]
280
+ },
281
+ "fields": {
282
+ "endpoint": "https://rest.uniprot.org/idmapping",
283
+ "mapping_type": "async",
284
+ "format": "json"
285
+ },
286
+ "type": "UniProtRESTTool"
210
287
  }
211
288
  ]
@@ -0,0 +1,250 @@
1
+ [
2
+ {
3
+ "type": "WebSearchTool",
4
+ "name": "web_search",
5
+ "description": "General web search using DDGS (Dux Distributed Global Search) supporting multiple search engines including Google, Bing, Brave, Yahoo, and DuckDuckGo. No API keys required.",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "query": {
10
+ "type": "string",
11
+ "description": "Search query string"
12
+ },
13
+ "max_results": {
14
+ "type": "integer",
15
+ "description": "Maximum number of results to return",
16
+ "default": 10,
17
+ "minimum": 1,
18
+ "maximum": 50
19
+ },
20
+ "search_type": {
21
+ "type": "string",
22
+ "description": "Type of search to perform",
23
+ "enum": ["general", "api_documentation", "python_packages", "github"],
24
+ "default": "general"
25
+ },
26
+ "backend": {
27
+ "type": "string",
28
+ "description": "Search engine backend to use",
29
+ "enum": ["auto", "google", "bing", "brave", "yahoo", "duckduckgo"],
30
+ "default": "auto"
31
+ },
32
+ "region": {
33
+ "type": "string",
34
+ "description": "Search region/locale",
35
+ "enum": ["us-en", "cn-zh", "uk-en", "de-de", "fr-fr", "ja-jp"],
36
+ "default": "us-en"
37
+ },
38
+ "safesearch": {
39
+ "type": "string",
40
+ "description": "Safe search level",
41
+ "enum": ["on", "moderate", "off"],
42
+ "default": "moderate"
43
+ }
44
+ },
45
+ "required": ["query"]
46
+ },
47
+ "return_schema": {
48
+ "type": "object",
49
+ "properties": {
50
+ "status": {
51
+ "type": "string",
52
+ "description": "Status of the search operation",
53
+ "enum": ["success", "error"]
54
+ },
55
+ "query": {
56
+ "type": "string",
57
+ "description": "The search query that was executed"
58
+ },
59
+ "search_type": {
60
+ "type": "string",
61
+ "description": "Type of search that was performed"
62
+ },
63
+ "total_results": {
64
+ "type": "integer",
65
+ "description": "Total number of results found"
66
+ },
67
+ "results": {
68
+ "type": "array",
69
+ "description": "Array of search results",
70
+ "items": {
71
+ "type": "object",
72
+ "properties": {
73
+ "title": {
74
+ "type": "string",
75
+ "description": "Title of the search result"
76
+ },
77
+ "url": {
78
+ "type": "string",
79
+ "description": "URL of the search result"
80
+ },
81
+ "snippet": {
82
+ "type": "string",
83
+ "description": "Snippet or description of the result"
84
+ },
85
+ "rank": {
86
+ "type": "integer",
87
+ "description": "Rank of the result in search results"
88
+ }
89
+ },
90
+ "required": ["title", "url", "snippet", "rank"]
91
+ }
92
+ },
93
+ "error": {
94
+ "type": "string",
95
+ "description": "Error message if status is error"
96
+ }
97
+ },
98
+ "required": ["status", "query", "total_results", "results"]
99
+ },
100
+ "test_examples": [
101
+ {
102
+ "input": {
103
+ "query": "python requests library",
104
+ "max_results": 5
105
+ },
106
+ "expected_output_type": "object",
107
+ "description": "Search for Python requests library information"
108
+ },
109
+ {
110
+ "input": {
111
+ "query": "machine learning APIs",
112
+ "search_type": "api_documentation",
113
+ "max_results": 3
114
+ },
115
+ "expected_output_type": "object",
116
+ "description": "Search for machine learning API documentation"
117
+ }
118
+ ],
119
+ "metadata": {
120
+ "tags": ["web-search", "multi-engine", "no-api-key", "ddgs"],
121
+ "difficulty_level": "easy",
122
+ "estimated_execution_time": "2-5 seconds"
123
+ }
124
+ },
125
+ {
126
+ "type": "WebAPIDocumentationSearchTool",
127
+ "name": "web_api_documentation_search",
128
+ "description": "Specialized web search for API documentation, Python packages, and technical resources using DDGS with multiple search engines. Optimized for finding official documentation and library information.",
129
+ "parameter": {
130
+ "type": "object",
131
+ "properties": {
132
+ "query": {
133
+ "type": "string",
134
+ "description": "Search query string (e.g., tool name, library name, API name)"
135
+ },
136
+ "max_results": {
137
+ "type": "integer",
138
+ "description": "Maximum number of results to return",
139
+ "default": 10,
140
+ "minimum": 1,
141
+ "maximum": 50
142
+ },
143
+ "focus": {
144
+ "type": "string",
145
+ "description": "Focus area for the search",
146
+ "enum": ["api_docs", "python_packages", "github_repos"],
147
+ "default": "api_docs"
148
+ },
149
+ "backend": {
150
+ "type": "string",
151
+ "description": "Search engine backend to use",
152
+ "enum": ["auto", "google", "bing", "brave", "yahoo", "duckduckgo"],
153
+ "default": "auto"
154
+ }
155
+ },
156
+ "required": ["query"]
157
+ },
158
+ "return_schema": {
159
+ "type": "object",
160
+ "properties": {
161
+ "status": {
162
+ "type": "string",
163
+ "description": "Status of the search operation",
164
+ "enum": ["success", "error"]
165
+ },
166
+ "query": {
167
+ "type": "string",
168
+ "description": "The search query that was executed"
169
+ },
170
+ "focus": {
171
+ "type": "string",
172
+ "description": "Focus area that was used for the search"
173
+ },
174
+ "enhanced_query": {
175
+ "type": "string",
176
+ "description": "Enhanced query that was actually executed"
177
+ },
178
+ "total_results": {
179
+ "type": "integer",
180
+ "description": "Total number of results found"
181
+ },
182
+ "results": {
183
+ "type": "array",
184
+ "description": "Array of search results",
185
+ "items": {
186
+ "type": "object",
187
+ "properties": {
188
+ "title": {
189
+ "type": "string",
190
+ "description": "Title of the search result"
191
+ },
192
+ "url": {
193
+ "type": "string",
194
+ "description": "URL of the search result"
195
+ },
196
+ "snippet": {
197
+ "type": "string",
198
+ "description": "Snippet or description of the result"
199
+ },
200
+ "rank": {
201
+ "type": "integer",
202
+ "description": "Rank of the result in search results"
203
+ }
204
+ },
205
+ "required": ["title", "url", "snippet", "rank"]
206
+ }
207
+ },
208
+ "error": {
209
+ "type": "string",
210
+ "description": "Error message if status is error"
211
+ }
212
+ },
213
+ "required": ["status", "query", "total_results", "results"]
214
+ },
215
+ "test_examples": [
216
+ {
217
+ "input": {
218
+ "query": "pandas",
219
+ "focus": "api_docs",
220
+ "max_results": 5
221
+ },
222
+ "expected_output_type": "object",
223
+ "description": "Search for pandas API documentation"
224
+ },
225
+ {
226
+ "input": {
227
+ "query": "requests",
228
+ "focus": "python_packages",
229
+ "max_results": 3
230
+ },
231
+ "expected_output_type": "object",
232
+ "description": "Search for requests Python package information"
233
+ },
234
+ {
235
+ "input": {
236
+ "query": "scikit-learn",
237
+ "focus": "github_repos",
238
+ "max_results": 3
239
+ },
240
+ "expected_output_type": "object",
241
+ "description": "Search for scikit-learn GitHub repositories"
242
+ }
243
+ ],
244
+ "metadata": {
245
+ "tags": ["web-search", "api-documentation", "python-packages", "technical-resources", "multi-engine", "ddgs"],
246
+ "difficulty_level": "easy",
247
+ "estimated_execution_time": "2-5 seconds"
248
+ }
249
+ }
250
+ ]
@@ -0,0 +1,55 @@
1
+ [
2
+ {
3
+ "type": "WoRMSRESTTool",
4
+ "name": "WoRMS_search_species",
5
+ "description": "Search marine species in World Register of Marine Species",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "query": {"type": "string", "description": "Species name or search term"},
10
+ "limit": {"type": "integer", "default": 20, "description": "Number of results"}
11
+ },
12
+ "required": ["query"]
13
+ },
14
+ "fields": {
15
+ "endpoint": "https://www.marinespecies.org/rest/AphiaIDByName/{query}",
16
+ "return_format": "JSON"
17
+ },
18
+ "return_schema": {
19
+ "type": "object",
20
+ "properties": {
21
+ "AphiaID": {"type": "integer"},
22
+ "url": {"type": "string"},
23
+ "scientificname": {"type": "string"},
24
+ "authority": {"type": "string"},
25
+ "status": {"type": "string"},
26
+ "unacceptreason": {"type": "string"},
27
+ "taxonRankID": {"type": "integer"},
28
+ "rank": {"type": "string"},
29
+ "valid_AphiaID": {"type": "integer"},
30
+ "valid_name": {"type": "string"},
31
+ "valid_authority": {"type": "string"},
32
+ "parentNameUsageID": {"type": "integer"},
33
+ "kingdom": {"type": "string"},
34
+ "phylum": {"type": "string"},
35
+ "class": {"type": "string"},
36
+ "order": {"type": "string"},
37
+ "family": {"type": "string"},
38
+ "genus": {"type": "string"},
39
+ "citation": {"type": "string"},
40
+ "lsid": {"type": "string"},
41
+ "isMarine": {"type": "boolean"},
42
+ "isBrackish": {"type": "boolean"},
43
+ "isFreshwater": {"type": "boolean"},
44
+ "isTerrestrial": {"type": "boolean"},
45
+ "isExtinct": {"type": "boolean"},
46
+ "match_type": {"type": "string"},
47
+ "modified": {"type": "string"}
48
+ }
49
+ },
50
+ "test_examples": [
51
+ {"query": "Actinia equina", "limit": 5},
52
+ {"query": "Scleractinia", "limit": 10}
53
+ ]
54
+ }
55
+ ]
@@ -1,71 +1,209 @@
1
- import requests
2
- from .base_tool import BaseTool
1
+ """
2
+ dbSNP REST API Tool
3
+
4
+ This tool provides access to the dbSNP (Single Nucleotide Polymorphism) database
5
+ for variant information, allele frequencies, and genomic coordinates.
6
+ """
7
+
8
+ from typing import Dict, Any
9
+ from .ncbi_eutils_tool import NCBIEUtilsTool
3
10
  from .tool_registry import register_tool
4
11
 
5
12
 
6
- @register_tool("DbSnpTool")
7
- class DbSnpTool(BaseTool):
8
- """
9
- Local tool wrapper for dbSNP via NCBI Variation Services.
10
- Fetches variant by rsID using the refsnp endpoint.
11
- """
13
+ class dbSNPRESTTool(NCBIEUtilsTool):
14
+ """Base class for dbSNP REST API tools with rate limiting."""
15
+
16
+
17
+ @register_tool("dbSNPGetVariantByRsID")
18
+ class dbSNPGetVariantByRsID(dbSNPRESTTool):
19
+ """Get variant information by rsID."""
12
20
 
13
21
  def __init__(self, tool_config):
14
22
  super().__init__(tool_config)
15
- self.base = "https://api.ncbi.nlm.nih.gov/variation/v0"
16
- self.session = requests.Session()
23
+ self.endpoint = "/esummary.fcgi"
17
24
 
18
- def run(self, arguments):
19
- rsid = arguments.get("rsid")
25
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
26
+ """Get variant by rsID using E-utilities."""
27
+ rsid = arguments.get("rsid", "")
20
28
  if not rsid:
21
- return {"error": "Missing required parameter: rsid"}
29
+ return {"status": "error", "error": "rsid is required"}
22
30
 
23
- # Clean rsid (remove 'rs' prefix if present)
31
+ # Remove 'rs' prefix if present
24
32
  if rsid.startswith("rs"):
25
33
  rsid = rsid[2:]
26
34
 
27
- url = f"{self.base}/refsnp/{rsid}"
28
- resp = self.session.get(url, timeout=20)
29
- resp.raise_for_status()
30
- data = resp.json()
31
-
32
- # Extract key fields from primary snapshot
33
- primary = data.get("primary_snapshot_data", {})
34
- placements = primary.get("placements_with_allele", [])
35
-
36
- chrom = ""
37
- pos = None
38
- alleles = []
39
- hgvs = []
40
-
41
- if placements:
42
- placement = placements[0]
43
- chrom = (
44
- placement.get("seq_id", "").replace("NC_0000", "").replace(".11", "")
45
- )
46
- if chrom.startswith("0"):
47
- chrom = chrom[1:]
48
- chrom = f"chr{chrom}"
49
-
50
- allele_data = placement.get("alleles", [])
51
- for allele in allele_data:
52
- spdi = allele.get("allele", {}).get("spdi", {})
53
- if spdi:
54
- ref = spdi.get("deleted_sequence", "")
55
- alt = spdi.get("inserted_sequence", "")
56
- if ref and alt:
57
- alleles.append(f"{ref}>{alt}")
58
- elif ref:
59
- alleles.append(ref)
60
-
61
- hgvs_val = allele.get("hgvs", "")
62
- if hgvs_val:
63
- hgvs.append(hgvs_val)
64
-
65
- return {
66
- "refsnp_id": f"rs{rsid}",
67
- "chrom": chrom,
68
- "pos": pos,
69
- "alleles": alleles,
70
- "hgvs": hgvs,
35
+ params = {"db": "snp", "id": rsid, "retmode": "json"}
36
+
37
+ result = self._make_request(self.endpoint, params)
38
+
39
+ # Parse and extract useful data from NCBI response
40
+ if result.get("status") == "success":
41
+ data = result.get("data", {})
42
+ if isinstance(data, dict) and "result" in data:
43
+ result_data = data["result"]
44
+ if rsid in result_data:
45
+ variant_data = result_data[rsid]
46
+
47
+ # Extract key information
48
+ parsed_data = {
49
+ "refsnp_id": f"rs{rsid}",
50
+ "snp_id": variant_data.get("snp_id"),
51
+ "chromosome": variant_data.get("chr"),
52
+ "position": variant_data.get("chrpos"),
53
+ "allele": variant_data.get("allele"),
54
+ "snp_class": variant_data.get("snp_class"),
55
+ "clinical_significance": variant_data.get(
56
+ "clinical_significance", ""
57
+ ).split(","),
58
+ "genes": [
59
+ gene.get("name") for gene in variant_data.get("genes", [])
60
+ ],
61
+ "allele_frequencies": variant_data.get("global_mafs", []),
62
+ "hgvs_notation": variant_data.get("docsum", ""),
63
+ "spdi_notation": variant_data.get("spdi", ""),
64
+ "function_class": variant_data.get("fxn_class", "").split(","),
65
+ "validated": variant_data.get("validated", "").split(","),
66
+ "created_date": variant_data.get("createdate"),
67
+ "updated_date": variant_data.get("updatedate"),
68
+ }
69
+
70
+ result["data"] = parsed_data
71
+ result["rsid"] = f"rs{rsid}"
72
+ else:
73
+ result["status"] = "error"
74
+ result["error"] = f"Variant rs{rsid} not found in dbSNP"
75
+ else:
76
+ result["status"] = "error"
77
+ result["error"] = "Invalid response format from NCBI E-utilities"
78
+
79
+ return result
80
+
81
+
82
+ @register_tool("dbSNPSearchByGene")
83
+ class dbSNPSearchByGene(dbSNPRESTTool):
84
+ """Search variants by gene symbol."""
85
+
86
+ def __init__(self, tool_config):
87
+ super().__init__(tool_config)
88
+ self.endpoint = "/esearch.fcgi"
89
+
90
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
91
+ """Search variants by gene using E-utilities."""
92
+ gene_symbol = arguments.get("gene_symbol", "")
93
+ if not gene_symbol:
94
+ return {"status": "error", "error": "gene_symbol is required"}
95
+
96
+ params = {
97
+ "db": "snp",
98
+ "term": f"{gene_symbol}[gene]",
99
+ "retmode": "json",
100
+ "retmax": arguments.get("limit", 20),
71
101
  }
102
+
103
+ result = self._make_request(self.endpoint, params)
104
+
105
+ # Parse and extract useful data from NCBI response
106
+ if result.get("status") == "success":
107
+ data = result.get("data", {})
108
+ if isinstance(data, dict) and "esearchresult" in data:
109
+ esearch_data = data["esearchresult"]
110
+
111
+ # Extract variant IDs
112
+ variant_ids = esearch_data.get("idlist", [])
113
+ count = int(esearch_data.get("count", 0))
114
+
115
+ # Create variant list with basic info
116
+ variants = []
117
+ for variant_id in variant_ids:
118
+ variants.append(
119
+ {"refsnp_id": f"rs{variant_id}", "snp_id": int(variant_id)}
120
+ )
121
+
122
+ parsed_data = {
123
+ "variants": variants,
124
+ "total_count": count,
125
+ "returned_count": len(variants),
126
+ }
127
+
128
+ result["data"] = parsed_data
129
+ result["gene_symbol"] = gene_symbol
130
+ else:
131
+ result["status"] = "error"
132
+ result["error"] = "Invalid response format from NCBI E-utilities"
133
+
134
+ return result
135
+
136
+
137
+ @register_tool("dbSNPGetFrequencies")
138
+ class dbSNPGetFrequencies(dbSNPRESTTool):
139
+ """Get allele frequencies for a variant."""
140
+
141
+ def __init__(self, tool_config):
142
+ super().__init__(tool_config)
143
+ self.endpoint = "/esummary.fcgi"
144
+
145
+ def run(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
146
+ """Get allele frequencies by rsID using E-utilities."""
147
+ rsid = arguments.get("rsid", "")
148
+ if not rsid:
149
+ return {"status": "error", "error": "rsid is required"}
150
+
151
+ # Remove 'rs' prefix if present
152
+ if rsid.startswith("rs"):
153
+ rsid = rsid[2:]
154
+
155
+ params = {"db": "snp", "id": rsid, "retmode": "json"}
156
+
157
+ result = self._make_request(self.endpoint, params)
158
+
159
+ # Parse and extract frequency data from NCBI response
160
+ if result.get("status") == "success":
161
+ data = result.get("data", {})
162
+ if isinstance(data, dict) and "result" in data:
163
+ result_data = data["result"]
164
+ if rsid in result_data:
165
+ variant_data = result_data[rsid]
166
+
167
+ # Extract allele frequency data
168
+ frequencies = []
169
+ global_mafs = variant_data.get("global_mafs", [])
170
+
171
+ for maf in global_mafs:
172
+ study = maf.get("study", "Unknown")
173
+ freq_str = maf.get("freq", "")
174
+
175
+ # Parse frequency string (e.g., "C=0.1505591/754")
176
+ if "=" in freq_str and "/" in freq_str:
177
+ try:
178
+ allele_part, count_part = freq_str.split("/")
179
+ allele = allele_part.split("=")[0]
180
+ frequency = float(allele_part.split("=")[1])
181
+ sample_count = int(count_part)
182
+
183
+ frequencies.append(
184
+ {
185
+ "study": study,
186
+ "allele": allele,
187
+ "frequency": frequency,
188
+ "sample_count": sample_count,
189
+ }
190
+ )
191
+ except (ValueError, IndexError):
192
+ # Skip malformed frequency entries
193
+ continue
194
+
195
+ parsed_data = {
196
+ "frequencies": frequencies,
197
+ "total_studies": len(frequencies),
198
+ }
199
+
200
+ result["data"] = parsed_data
201
+ result["rsid"] = f"rs{rsid}"
202
+ else:
203
+ result["status"] = "error"
204
+ result["error"] = f"Variant rs{rsid} not found in dbSNP"
205
+ else:
206
+ result["status"] = "error"
207
+ result["error"] = "Invalid response format from NCBI E-utilities"
208
+
209
+ return result