tooluniverse 0.2.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (190) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +1 -1
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +1 -1
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  183. tooluniverse/generate_mcp_tools.py +0 -113
  184. tooluniverse/mcp_server.py +0 -3340
  185. tooluniverse-0.2.0.dist-info/METADATA +0 -139
  186. tooluniverse-0.2.0.dist-info/RECORD +0 -21
  187. tooluniverse-0.2.0.dist-info/entry_points.txt +0 -4
  188. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +0 -0
  189. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  190. {tooluniverse-0.2.0.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,206 @@
1
+ "name": "ASE (Atomic Simulation Environment)"
2
+ "name": "AnnData"
3
+ "name": "Arboreto"
4
+ "name": "BioPandas"
5
+ "name": "Biopython"
6
+ "name": "Biotite"
7
+ "name": "COBRApy"
8
+ "name": "COBRApy"
9
+ "name": "Cellpose"
10
+ "name": "Clair3"
11
+ "name": "Cooler"
12
+ "name": "DScribe"
13
+ "name": "DeepChem"
14
+ "name": "DeepPurpose"
15
+ "name": "DeepXDE"
16
+ "name": "Descriptastorus"
17
+ "name": "DiffDock"
18
+ "name": "FAN-C"
19
+ "name": "Faiss"
20
+ "name": "Flask"
21
+ "name": "FlowIO"
22
+ "name": "FlowKit"
23
+ "name": "FlowUtils"
24
+ "name": "GSEApy"
25
+ "name": "Hyperopt"
26
+ "name": "JCVI"
27
+ "name": "MAGeCK"
28
+ "name": "MDAnalysis"
29
+ "name": "Matplotlib"
30
+ "name": "MuData"
31
+ "name": "NetworkX"
32
+ "name": "NumPy"
33
+ "name": "Numba"
34
+ "name": "OpenBabel"
35
+ "name": "OpenCV"
36
+ "name": "OpenChem"
37
+ "name": "OpenMM"
38
+ "name": "PDBFixer"
39
+ "name": "PLIP"
40
+ "name": "PlantCV"
41
+ "name": "ProDy"
42
+ "name": "PyDESeq2"
43
+ "name": "PyKalman"
44
+ "name": "PyLiftover"
45
+ "name": "PyMassSpec"
46
+ "name": "PyMed"
47
+ "name": "PyPDF2"
48
+ "name": "PyRanges"
49
+ "name": "PySCF"
50
+ "name": "PyScreener"
51
+ "name": "PyTDC"
52
+ "name": "PyTorch Geometric"
53
+ "name": "PyTorch"
54
+ "name": "RDKit"
55
+ "name": "ReportLab"
56
+ "name": "Requests"
57
+ "name": "Scanpy"
58
+ "name": "SchNetPack"
59
+ "name": "SciPy"
60
+ "name": "Scrublet"
61
+ "name": "Seaborn"
62
+ "name": "SymPy"
63
+ "name": "TileDB"
64
+ "name": "TileDB-SOMA"
65
+ "name": "UMAP"
66
+ "name": "ViennaRNA"
67
+ "name": "arxiv"
68
+ "name": "cellxgene-census"
69
+ "name": "cryosparc-tools"
70
+ "name": "cyvcf2"
71
+ "name": "deepTools"
72
+ "name": "get_anndata_info"
73
+ "name": "get_arboreto_info"
74
+ "name": "get_arxiv_info"
75
+ "name": "get_ase_info"
76
+ "name": "get_biopandas_info"
77
+ "name": "get_biopython_info"
78
+ "name": "get_biotite_info"
79
+ "name": "get_cellpose_info"
80
+ "name": "get_cellxgene_census_info"
81
+ "name": "get_clair3_info"
82
+ "name": "get_cobra_info"
83
+ "name": "get_cobrapy_info"
84
+ "name": "get_cooler_info"
85
+ "name": "get_cryosparc_tools_info"
86
+ "name": "get_cyvcf2_info"
87
+ "name": "get_deepchem_info"
88
+ "name": "get_deeppurpose_info"
89
+ "name": "get_deeptools_info"
90
+ "name": "get_deepxde_info"
91
+ "name": "get_descriptastorus_info"
92
+ "name": "get_diffdock_info"
93
+ "name": "get_dscribe_info"
94
+ "name": "get_faiss_info"
95
+ "name": "get_fanc_info"
96
+ "name": "get_flask_info"
97
+ "name": "get_flowio_info"
98
+ "name": "get_flowkit_info"
99
+ "name": "get_flowutils_info"
100
+ "name": "get_gget_info"
101
+ "name": "get_googlesearch_python_info"
102
+ "name": "get_gseapy_info"
103
+ "name": "get_h5py_info"
104
+ "name": "get_harmony_pytorch_info"
105
+ "name": "get_hmmlearn_info"
106
+ "name": "get_hyperopt_info"
107
+ "name": "get_igraph_info"
108
+ "name": "get_jcvi_info"
109
+ "name": "get_khmer_info"
110
+ "name": "get_lifelines_info"
111
+ "name": "get_loompy_info"
112
+ "name": "get_mageck_info"
113
+ "name": "get_matplotlib_info"
114
+ "name": "get_mdanalysis_info"
115
+ "name": "get_msprime_info"
116
+ "name": "get_mudata_info"
117
+ "name": "get_networkx_info"
118
+ "name": "get_numba_info"
119
+ "name": "get_numpy_info"
120
+ "name": "get_openbabel_info"
121
+ "name": "get_openchem_info"
122
+ "name": "get_opencv_info"
123
+ "name": "get_openmm_info"
124
+ "name": "get_optlang_info"
125
+ "name": "get_pandas_info"
126
+ "name": "get_pdbfixer_info"
127
+ "name": "get_plantcv_info"
128
+ "name": "get_plip_info"
129
+ "name": "get_poliastro_info"
130
+ "name": "get_prody_info"
131
+ "name": "get_pybedtools_info"
132
+ "name": "get_pybigwig_info"
133
+ "name": "get_pybigwig_info"
134
+ "name": "get_pydeseq2_info"
135
+ "name": "get_pyfaidx_info"
136
+ "name": "get_pykalman_info"
137
+ "name": "get_pyliftover_info"
138
+ "name": "get_pymassspec_info"
139
+ "name": "get_pymed_info"
140
+ "name": "get_pymzml_info"
141
+ "name": "get_pypdf2_info"
142
+ "name": "get_pyranges_info"
143
+ "name": "get_pysam_info"
144
+ "name": "get_pyscenic_info"
145
+ "name": "get_pyscf_info"
146
+ "name": "get_pyscreener_info"
147
+ "name": "get_pytdc_info"
148
+ "name": "get_python_libsbml_info"
149
+ "name": "get_pytorch_info"
150
+ "name": "get_rdkit_info"
151
+ "name": "get_reportlab_info"
152
+ "name": "get_requests_info"
153
+ "name": "get_ruptures_info"
154
+ "name": "get_scanpy_info"
155
+ "name": "get_schnetpack_info"
156
+ "name": "get_scholarly_info"
157
+ "name": "get_scikit_bio_info"
158
+ "name": "get_scikit_image_info"
159
+ "name": "get_scikit_learn_info"
160
+ "name": "get_scipy_info"
161
+ "name": "get_scrublet_info"
162
+ "name": "get_scvelo_info"
163
+ "name": "get_seaborn_info"
164
+ "name": "get_souporcell_info"
165
+ "name": "get_statsmodels_info"
166
+ "name": "get_sympy_info"
167
+ "name": "get_tiledb_info"
168
+ "name": "get_tiledbsoma_info"
169
+ "name": "get_torch_geometric_info"
170
+ "name": "get_tqdm_info"
171
+ "name": "get_trackpy_info"
172
+ "name": "get_tskit_info"
173
+ "name": "get_umap_learn_info"
174
+ "name": "get_viennarna_info"
175
+ "name": "gget"
176
+ "name": "googlesearch-python"
177
+ "name": "h5py"
178
+ "name": "harmony-pytorch"
179
+ "name": "hmmlearn"
180
+ "name": "igraph"
181
+ "name": "khmer"
182
+ "name": "lifelines"
183
+ "name": "loompy"
184
+ "name": "msprime"
185
+ "name": "optlang"
186
+ "name": "pandas"
187
+ "name": "poliastro"
188
+ "name": "pyBigWig"
189
+ "name": "pyBigWig"
190
+ "name": "pySCENIC"
191
+ "name": "pybedtools"
192
+ "name": "pyfaidx"
193
+ "name": "pymzML"
194
+ "name": "pysam"
195
+ "name": "python-libsbml"
196
+ "name": "ruptures"
197
+ "name": "scVelo"
198
+ "name": "scholarly"
199
+ "name": "scikit-bio"
200
+ "name": "scikit-image"
201
+ "name": "scikit-learn"
202
+ "name": "souporcell"
203
+ "name": "statsmodels"
204
+ "name": "tqdm"
205
+ "name": "trackpy"
206
+ "name": "tskit"
@@ -0,0 +1,347 @@
1
+ [
2
+ {
3
+ "type": "PackageTool",
4
+ "name": "get_rdkit_info",
5
+ "description": "Get comprehensive information about RDKit – cheminformatics and machine learning toolkit",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "include_examples": {
10
+ "type": "boolean",
11
+ "description": "Whether to include usage examples and quick start guide",
12
+ "default": true
13
+ }
14
+ }
15
+ },
16
+ "package_name": "rdkit",
17
+ "local_info": {
18
+ "name": "RDKit",
19
+ "description": "Collection of cheminformatics and machine learning tools. Provides functionality for molecular manipulation, descriptor calculation, molecular fingerprinting, and machine learning on chemical data.",
20
+ "category": "Cheminformatics",
21
+ "import_name": "rdkit",
22
+ "popularity": 90,
23
+ "keywords": [
24
+ "cheminformatics",
25
+ "molecular descriptors",
26
+ "SMILES",
27
+ "molecular fingerprints",
28
+ "drug discovery"
29
+ ],
30
+ "documentation": "https://www.rdkit.org/docs/",
31
+ "repository": "https://github.com/rdkit/rdkit",
32
+ "installation": {
33
+ "pip": "pip install rdkit",
34
+ "conda": "conda install -c conda-forge rdkit"
35
+ },
36
+ "usage_example": "from rdkit import Chem\nfrom rdkit.Chem import Descriptors, rdMolDescriptors\n\n# Create molecule from SMILES\nmol = Chem.MolFromSmiles('CCO') # Ethanol\n\n# Calculate molecular descriptors\nmw = Descriptors.MolWt(mol)\nlogp = Descriptors.MolLogP(mol)\nprint(f'Molecular weight: {mw:.2f}')\nprint(f'LogP: {logp:.2f}')\n\n# Generate molecular fingerprint\nfp = rdMolDescriptors.GetMorganFingerprintAsBitVect(mol, 2)\nprint(f'Fingerprint: {fp.ToBitString()[:20]}...')",
37
+ "quick_start": [
38
+ "Install: conda install -c conda-forge rdkit",
39
+ "Create molecule: mol = Chem.MolFromSmiles('CCO')",
40
+ "Descriptors: Descriptors.MolWt(mol), Descriptors.MolLogP(mol)",
41
+ "Fingerprints: rdMolDescriptors.GetMorganFingerprintAsBitVect()",
42
+ "Substructure: mol.HasSubstructMatch(pattern)"
43
+ ]
44
+ }
45
+ },
46
+ {
47
+ "type": "PackageTool",
48
+ "name": "get_openbabel_info",
49
+ "description": "Get comprehensive information about OpenBabel – chemical format conversion and analysis",
50
+ "parameter": {
51
+ "type": "object",
52
+ "properties": {
53
+ "include_examples": {
54
+ "type": "boolean",
55
+ "description": "Whether to include usage examples and quick start guide",
56
+ "default": true
57
+ }
58
+ }
59
+ },
60
+ "package_name": "openbabel",
61
+ "local_info": {
62
+ "name": "OpenBabel",
63
+ "description": "Chemical toolbox designed to convert, filter, and analyze chemical data. Supports reading and writing over 110 chemical file formats and provides extensive molecular manipulation capabilities.",
64
+ "category": "Cheminformatics",
65
+ "import_name": "openbabel",
66
+ "popularity": 75,
67
+ "keywords": [
68
+ "chemical file formats",
69
+ "format conversion",
70
+ "molecular structure",
71
+ "SMILES",
72
+ "SDF"
73
+ ],
74
+ "documentation": "http://openbabel.org/docs/dev/",
75
+ "repository": "https://github.com/openbabel/openbabel",
76
+ "installation": {
77
+ "pip": "pip install openbabel-wheel",
78
+ "conda": "conda install -c conda-forge openbabel"
79
+ },
80
+ "usage_example": "from openbabel import openbabel as ob\n\n# Create molecule\nmol = ob.OBMol()\n# Add atoms and bonds programmatically\n# or read from file\n\n# Convert between formats\nconv = ob.OBConversion()\nconv.SetInAndOutFormats('smi', 'sdf')\nconv.Convert(mol, mol) # Convert SMILES to SDF\n\n# Calculate properties\nprint(f'Molecular weight: {mol.GetMolWt()}')\nprint(f'Formula: {mol.GetFormula()}')",
81
+ "quick_start": [
82
+ "Install: conda install -c conda-forge openbabel",
83
+ "Import: from openbabel import openbabel as ob",
84
+ "Create: mol = ob.OBMol()",
85
+ "Convert: conv = ob.OBConversion(); conv.SetInAndOutFormats()",
86
+ "Properties: mol.GetMolWt(), mol.GetFormula()"
87
+ ]
88
+ }
89
+ },
90
+ {
91
+ "type": "PackageTool",
92
+ "name": "get_deepchem_info",
93
+ "description": "Get comprehensive information about DeepChem – an open-source toolkit that brings advanced AI/ML techniques to drug discovery, materials science and quantum chemistry.",
94
+ "parameter": {
95
+ "type": "object",
96
+ "properties": {
97
+ "include_examples": {
98
+ "type": "boolean",
99
+ "description": "Whether to include usage examples and quick start guide",
100
+ "default": true
101
+ }
102
+ }
103
+ },
104
+ "package_name": "deepchem",
105
+ "local_info": {
106
+ "name": "DeepChem",
107
+ "description": "A powerful Python library providing state-of-the-art deep-learning models, featurizers and workflows for chemistry, biology, materials science and beyond.",
108
+ "category": "AI for Science / Cheminformatics",
109
+ "import_name": "deepchem",
110
+ "popularity": 80,
111
+ "keywords": [
112
+ "drug discovery",
113
+ "molecular machine learning",
114
+ "graph neural networks",
115
+ "molecular descriptors",
116
+ "materials"
117
+ ],
118
+ "documentation": "https://deepchem.io/docs/",
119
+ "repository": "https://github.com/deepchem/deepchem",
120
+ "installation": {
121
+ "pip": "pip install deepchem",
122
+ "conda": "conda install -c conda-forge deepchem"
123
+ },
124
+ "usage_example": "import deepchem as dc\nfrom deepchem.models import GraphConvModel\n\ntasks, datasets, _ = dc.molnet.load_delaney(featurizer='GraphConv')\ntrain, valid, test = datasets\nmodel = GraphConvModel(len(tasks), mode='regression')\nmodel.fit(train, nb_epoch=50)\nprint(model.evaluate(test, metrics=[dc.metrics.Metric(dc.metrics.pearson_r)]))",
125
+ "quick_start": [
126
+ "Install: pip install deepchem",
127
+ "Load data: tasks, datasets, _ = dc.molnet.load_tox21()",
128
+ "Create model: model = dc.models.GraphConvModel(n_tasks)",
129
+ "Train: model.fit(train_dataset)",
130
+ "Evaluate: model.evaluate(test_dataset)"
131
+ ]
132
+ }
133
+ },
134
+ {
135
+ "type": "PackageTool",
136
+ "name": "get_dscribe_info",
137
+ "description": "Get comprehensive information about DScribe – a library for generating machine-learning descriptors for materials and molecules.",
138
+ "parameter": {
139
+ "type": "object",
140
+ "properties": {
141
+ "include_examples": {
142
+ "type": "boolean",
143
+ "description": "Whether to include usage examples and quick start guide",
144
+ "default": true
145
+ }
146
+ }
147
+ },
148
+ "package_name": "dscribe",
149
+ "local_info": {
150
+ "name": "DScribe",
151
+ "description": "Python package that turns atomic structures into fixed-size numerical fingerprints (descriptors) for machine learning, similarity analysis and visualisation.",
152
+ "category": "AI for Science / Materials Descriptors",
153
+ "import_name": "dscribe",
154
+ "popularity": 50,
155
+ "keywords": [
156
+ "molecular descriptors",
157
+ "materials descriptors",
158
+ "machine learning",
159
+ "SOAP",
160
+ "atomic structures"
161
+ ],
162
+ "documentation": "https://singroup.github.io/dscribe/latest/",
163
+ "repository": "https://github.com/SINGROUP/dscribe",
164
+ "installation": {
165
+ "pip": "pip install dscribe",
166
+ "conda": "conda install -c conda-forge dscribe"
167
+ },
168
+ "usage_example": "from ase.build import molecule\nfrom dscribe.descriptors import SOAP\nimport numpy as np\n\natoms = molecule('CH4')\nsoap = SOAP(species=['C', 'H'], rcut=5.0, nmax=8, lmax=6)\nvec = soap.create(atoms)\nprint(np.linalg.norm(vec))",
169
+ "quick_start": [
170
+ "Install: pip install dscribe",
171
+ "Import: from dscribe.descriptors import SOAP",
172
+ "Create descriptor: soap = SOAP(species=['C', 'H'])",
173
+ "Generate: vec = soap.create(atoms)",
174
+ "Use for ML: Train models with descriptor vectors"
175
+ ]
176
+ }
177
+ },
178
+ {
179
+ "type": "PackageTool",
180
+ "name": "get_cobra_info",
181
+ "description": "Get comprehensive information about COBRApy – constraint-based metabolic modeling",
182
+ "parameter": {
183
+ "type": "object",
184
+ "properties": {
185
+ "include_examples": {
186
+ "type": "boolean",
187
+ "description": "Whether to include usage examples and quick start guide",
188
+ "default": true
189
+ }
190
+ }
191
+ },
192
+ "package_name": "cobra",
193
+ "local_info": {
194
+ "name": "COBRApy",
195
+ "description": "Constraint-Based Reconstruction and Analysis package for Python. Provides tools for building, analyzing, and manipulating genome-scale metabolic models.",
196
+ "category": "Systems Biology",
197
+ "import_name": "cobra",
198
+ "popularity": 80,
199
+ "keywords": [
200
+ "metabolic modeling",
201
+ "systems biology",
202
+ "flux balance analysis",
203
+ "genome-scale models",
204
+ "constraint-based"
205
+ ],
206
+ "documentation": "https://cobrapy.readthedocs.io/",
207
+ "repository": "https://github.com/opencobra/cobrapy",
208
+ "installation": {
209
+ "pip": "pip install cobra",
210
+ "conda": "conda install -c bioconda cobra"
211
+ },
212
+ "usage_example": "import cobra\nfrom cobra.test import create_test_model\n\n# Load or create a model\nmodel = create_test_model('textbook')\n\n# Optimize the model\nsolution = model.optimize()\nprint(f'Objective value: {solution.objective_value}')\nprint(f'Status: {solution.status}')\n\n# Analyze reactions\nfor reaction in model.reactions[:5]:\n print(f'{reaction.id}: {reaction.reaction}')",
213
+ "quick_start": [
214
+ "Install: pip install cobra",
215
+ "Load model: model = cobra.io.read_sbml_model('model.xml')",
216
+ "Optimize: solution = model.optimize()",
217
+ "Analyze: Check fluxes, reactions, metabolites",
218
+ "Modify: Add/remove reactions, change bounds"
219
+ ]
220
+ }
221
+ },
222
+ {
223
+ "type": "PackageTool",
224
+ "name": "get_openchem_info",
225
+ "description": "Get comprehensive information about OpenChem – deep learning toolkit for drug discovery",
226
+ "parameter": {
227
+ "type": "object",
228
+ "properties": {
229
+ "info_type": {
230
+ "type": "string",
231
+ "enum": [
232
+ "overview",
233
+ "installation",
234
+ "usage",
235
+ "documentation"
236
+ ],
237
+ "description": "Type of information to retrieve about OpenChem"
238
+ }
239
+ },
240
+ "required": [
241
+ "info_type"
242
+ ]
243
+ },
244
+ "package_name": "openchem",
245
+ "local_info": {
246
+ "name": "OpenChem",
247
+ "description": "Deep learning toolkit for computational chemistry and drug discovery. Provides implementations of state-of-the-art molecular property prediction models, molecular generation, and optimization algorithms.",
248
+ "category": "Drug Discovery / Machine Learning",
249
+ "import_name": "openchem",
250
+ "popularity": 70,
251
+ "keywords": [
252
+ "drug discovery",
253
+ "molecular properties",
254
+ "deep learning",
255
+ "molecular generation",
256
+ "QSAR"
257
+ ],
258
+ "documentation": "https://github.com/Mariewelt/OpenChem",
259
+ "repository": "https://github.com/Mariewelt/OpenChem",
260
+ "installation": {
261
+ "pip": "pip install openchem",
262
+ "conda": "conda install -c conda-forge openchem"
263
+ },
264
+ "usage_example": "# OpenChem deep learning for drug discovery\n# This demonstrates molecular property prediction concepts\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom sklearn.model_selection import train_test_split\nfrom sklearn.ensemble import RandomForestRegressor\nfrom sklearn.metrics import mean_squared_error, r2_score\nimport tempfile\nimport os\nfrom collections import defaultdict\n\nprint('OpenChem - Deep Learning for Drug Discovery')\nprint('=' * 45)\n\n# Overview of OpenChem capabilities\nprint('OpenChem Features:')\nprint('• Molecular property prediction (ADMET, bioactivity)')\nprint('• Graph neural networks for molecules')\nprint('• Molecular generation and optimization')\nprint('• Multi-task learning for drug discovery')\nprint('• Molecular descriptors and fingerprints')\nprint('• Integration with PyTorch and other ML frameworks')\n\nprint('\\nSupported Molecular Representations:')\nprint('• SMILES strings')\nprint('• Molecular graphs')\nprint('• 3D conformations')\nprint('• Molecular fingerprints (ECFP, MACCS, etc.)')\nprint('• Physicochemical descriptors')\n\n# Simulate molecular dataset\nprint('\\n=== Molecular Dataset Simulation ===')\n\nnp.random.seed(42)\n\n# Generate synthetic molecular data\nn_molecules = 1000\nprint(f'Generating {n_molecules} synthetic molecules...')\n\n# Molecular descriptors (simplified)\ndescriptor_names = [\n 'molecular_weight', 'logP', 'num_donors', 'num_acceptors',\n 'tpsa', 'num_rotatable_bonds', 'num_aromatic_rings',\n 'formal_charge', 'num_heteroatoms', 'fraction_csp3'\n]\n\n# Generate realistic molecular descriptors\nmolecular_data = []\nfor i in range(n_molecules):\n # Molecular weight: 150-600 Da (drug-like range)\n mw = np.random.normal(350, 100)\n mw = np.clip(mw, 150, 600)\n \n # LogP: -2 to 6 (lipophilicity)\n logP = np.random.normal(2.5, 1.5)\n logP = np.clip(logP, -2, 6)\n \n # Hydrogen bond donors: 0-5\n donors = np.random.poisson(2)\n donors = np.clip(donors, 0, 5)\n \n # Hydrogen bond acceptors: 0-10\n acceptors = np.random.poisson(4)\n acceptors = np.clip(acceptors, 0, 10)\n \n # Topological polar surface area: 0-200 Ų\n tpsa = np.random.gamma(2, 30)\n tpsa = np.clip(tpsa, 0, 200)\n \n # Rotatable bonds: 0-15\n rot_bonds = np.random.poisson(5)\n rot_bonds = np.clip(rot_bonds, 0, 15)\n \n # Aromatic rings: 0-4\n aromatic_rings = np.random.poisson(2)\n aromatic_rings = np.clip(aromatic_rings, 0, 4)\n \n # Formal charge: typically 0, sometimes ±1\n formal_charge = np.random.choice([0, 0, 0, 0, 1, -1])\n \n # Heteroatoms: 1-8\n heteroatoms = np.random.poisson(3) + 1\n heteroatoms = np.clip(heteroatoms, 1, 8)\n \n # Fraction sp3 carbons: 0-1\n frac_csp3 = np.random.beta(2, 2)\n \n molecule = {\n 'id': f'MOL_{i:04d}',\n 'molecular_weight': mw,\n 'logP': logP,\n 'num_donors': donors,\n 'num_acceptors': acceptors,\n 'tpsa': tpsa,\n 'num_rotatable_bonds': rot_bonds,\n 'num_aromatic_rings': aromatic_rings,\n 'formal_charge': formal_charge,\n 'num_heteroatoms': heteroatoms,\n 'fraction_csp3': frac_csp3\n }\n \n molecular_data.append(molecule)\n\n# Convert to DataFrame\ndf = pd.DataFrame(molecular_data)\nprint(f'Generated molecular dataset: {df.shape}')\nprint(f'Descriptors: {len(descriptor_names)}')\n\n# Show basic statistics\nprint('\\nDataset statistics:')\nprint(df[descriptor_names].describe().round(2))\n\n# Generate target properties\nprint('\\n=== Target Property Generation ===')\n\n# Simulate bioactivity (IC50 values)\nprint('Generating bioactivity data (IC50 values)...')\n\ndef calculate_bioactivity(row):\n \"\"\"Simulate bioactivity based on molecular descriptors\"\"\"\n # Lipinski's rule of five compliance\n lipinski_score = 0\n if row['molecular_weight'] <= 500: lipinski_score += 1\n if row['logP'] <= 5: lipinski_score += 1\n if row['num_donors'] <= 5: lipinski_score += 1\n if row['num_acceptors'] <= 10: lipinski_score += 1\n \n # Base activity influenced by Lipinski compliance\n base_activity = 5.0 + (lipinski_score - 2.0) * 1.5\n \n # Additional molecular factors\n tpsa_factor = -0.01 * max(0, row['tpsa'] - 90) # Penalty for high TPSA\n flexibility_factor = -0.1 * max(0, row['num_rotatable_bonds'] - 7) # Penalty for high flexibility\n aromatic_factor = 0.3 * min(row['num_aromatic_rings'], 3) # Bonus for aromatics (up to 3)\n \n # Combined activity (pIC50)\n activity = base_activity + tpsa_factor + flexibility_factor + aromatic_factor\n \n # Add some noise\n activity += np.random.normal(0, 0.5)\n \n # Ensure reasonable range (4-9 pIC50)\n activity = np.clip(activity, 4.0, 9.0)\n \n return activity\n\n# Calculate bioactivity\ndf['pIC50'] = df.apply(calculate_bioactivity, axis=1)\n\n# Generate additional properties\nprint('Generating ADMET properties...')\n\n# Solubility (LogS)\ndef calculate_solubility(row):\n \"\"\"Simulate aqueous solubility\"\"\"\n # Lipophilicity penalty\n logP_penalty = -0.5 * max(0, row['logP'] - 2)\n \n # Molecular weight penalty\n mw_penalty = -0.005 * max(0, row['molecular_weight'] - 300)\n \n # TPSA bonus (polar surface area helps solubility)\n tpsa_bonus = 0.01 * min(row['tpsa'], 100)\n \n # Base solubility\n base_solubility = -2.0\n \n solubility = base_solubility + logP_penalty + mw_penalty + tpsa_bonus\n solubility += np.random.normal(0, 0.3)\n \n return np.clip(solubility, -6.0, 1.0)\n\ndf['logS'] = df.apply(calculate_solubility, axis=1)\n\n# Permeability (Caco-2)\ndef calculate_permeability(row):\n \"\"\"Simulate cell permeability\"\"\"\n # LogP correlation\n logP_factor = 0.3 * row['logP']\n \n # TPSA penalty\n tpsa_penalty = -0.02 * row['tpsa']\n \n # Molecular weight penalty\n mw_penalty = -0.003 * row['molecular_weight']\n \n # Base permeability (log Papp)\n base_perm = -4.5\n \n permeability = base_perm + logP_factor + tpsa_penalty + mw_penalty\n permeability += np.random.normal(0, 0.4)\n \n return np.clip(permeability, -7.0, -3.0)\n\ndf['log_Papp'] = df.apply(calculate_permeability, axis=1)\n\n# Half-life (t1/2)\ndef calculate_half_life(row):\n \"\"\"Simulate metabolic stability\"\"\"\n # Molecular complexity factor\n complexity = row['num_rotatable_bonds'] + row['num_heteroatoms']\n complexity_factor = -0.05 * complexity\n \n # Aromatic stabilization\n aromatic_factor = 0.1 * row['num_aromatic_rings']\n \n # Base half-life (log hours)\n base_t_half = 1.0\n \n t_half = base_t_half + complexity_factor + aromatic_factor\n t_half += np.random.normal(0, 0.3)\n \n return np.clip(t_half, -1.0, 3.0)\n\ndf['log_t_half'] = df.apply(calculate_half_life, axis=1)\n\nprint(f'Generated properties: pIC50, logS, log_Papp, log_t_half')\nprint(f'Property value ranges:')\nfor prop in ['pIC50', 'logS', 'log_Papp', 'log_t_half']:\n print(f' {prop}: {df[prop].min():.2f} to {df[prop].max():.2f}')\n\n# Apply drug-likeness filters\nprint('\\n=== Drug-likeness Analysis ===')\n\ndef lipinski_filter(row):\n \"\"\"Apply Lipinski's Rule of Five\"\"\"\n violations = 0\n if row['molecular_weight'] > 500: violations += 1\n if row['logP'] > 5: violations += 1\n if row['num_donors'] > 5: violations += 1\n if row['num_acceptors'] > 10: violations += 1\n return violations\n\ndef veber_filter(row):\n \"\"\"Apply Veber's rules for oral bioavailability\"\"\"\n violations = 0\n if row['tpsa'] > 140: violations += 1\n if row['num_rotatable_bonds'] > 10: violations += 1\n return violations\n\ndf['lipinski_violations'] = df.apply(lipinski_filter, axis=1)\ndf['veber_violations'] = df.apply(veber_filter, axis=1)\ndf['drug_like'] = (df['lipinski_violations'] == 0) & (df['veber_violations'] == 0)\n\nprint(f'Drug-likeness analysis:')\nprint(f' Lipinski compliant (0 violations): {(df[\"lipinski_violations\"] == 0).sum()}')\nprint(f' Veber compliant (0 violations): {(df[\"veber_violations\"] == 0).sum()}')\nprint(f' Overall drug-like: {df[\"drug_like\"].sum()}')\nprint(f' Drug-like percentage: {df[\"drug_like\"].mean():.1%}')\n\n# Machine learning model training\nprint('\\n=== Machine Learning Model Training ===')\n\n# Prepare features and targets\nfeature_cols = descriptor_names\ntarget_cols = ['pIC50', 'logS', 'log_Papp', 'log_t_half']\n\nX = df[feature_cols].values\ny = df[target_cols].values\n\nprint(f'Feature matrix shape: {X.shape}')\nprint(f'Target matrix shape: {y.shape}')\n\n# Split data\nX_train, X_test, y_train, y_test = train_test_split(\n X, y, test_size=0.2, random_state=42\n)\n\nprint(f'Training set: {X_train.shape[0]} molecules')\nprint(f'Test set: {X_test.shape[0]} molecules')\n\n# Train models for each property\nmodels = {}\nperformance = {}\n\nfor i, target_name in enumerate(target_cols):\n print(f'\\nTraining model for {target_name}...')\n \n # Random Forest model\n model = RandomForestRegressor(n_estimators=100, random_state=42)\n model.fit(X_train, y_train[:, i])\n \n # Predictions\n y_pred_train = model.predict(X_train)\n y_pred_test = model.predict(X_test)\n \n # Performance metrics\n train_r2 = r2_score(y_train[:, i], y_pred_train)\n test_r2 = r2_score(y_test[:, i], y_pred_test)\n train_rmse = np.sqrt(mean_squared_error(y_train[:, i], y_pred_train))\n test_rmse = np.sqrt(mean_squared_error(y_test[:, i], y_pred_test))\n \n models[target_name] = model\n performance[target_name] = {\n 'train_r2': train_r2,\n 'test_r2': test_r2,\n 'train_rmse': train_rmse,\n 'test_rmse': test_rmse,\n 'predictions': y_pred_test,\n 'true_values': y_test[:, i]\n }\n \n print(f' Training R²: {train_r2:.3f}')\n print(f' Test R²: {test_r2:.3f}')\n print(f' Test RMSE: {test_rmse:.3f}')\n\n# Feature importance analysis\nprint('\\n=== Feature Importance Analysis ===')\n\nfeature_importance = pd.DataFrame({\n 'feature': feature_cols,\n **{target: models[target].feature_importances_ for target in target_cols}\n})\n\nprint('Top features for each property:')\nfor target in target_cols:\n top_features = feature_importance.nlargest(3, target)\n print(f'\\n{target}:')\n for _, row in top_features.iterrows():\n print(f' {row[\"feature\"]}: {row[target]:.3f}')\n\n# Virtual screening simulation\nprint('\\n=== Virtual Screening Simulation ===')\n\n# Generate new molecules for screening\nn_screening = 10000\nprint(f'Generating {n_screening} molecules for virtual screening...')\n\nscreening_data = []\nfor i in range(n_screening):\n # Generate random molecular descriptors\n mw = np.random.normal(350, 120)\n mw = np.clip(mw, 100, 700)\n \n logP = np.random.normal(2.5, 2.0)\n logP = np.clip(logP, -3, 7)\n \n donors = np.random.poisson(2)\n donors = np.clip(donors, 0, 8)\n \n acceptors = np.random.poisson(4)\n acceptors = np.clip(acceptors, 0, 15)\n \n tpsa = np.random.gamma(2, 35)\n tpsa = np.clip(tpsa, 0, 250)\n \n rot_bonds = np.random.poisson(6)\n rot_bonds = np.clip(rot_bonds, 0, 20)\n \n aromatic_rings = np.random.poisson(2)\n aromatic_rings = np.clip(aromatic_rings, 0, 5)\n \n formal_charge = np.random.choice([0, 0, 0, 0, 1, -1])\n \n heteroatoms = np.random.poisson(4) + 1\n heteroatoms = np.clip(heteroatoms, 1, 12)\n \n frac_csp3 = np.random.beta(2, 2)\n \n screening_data.append([\n mw, logP, donors, acceptors, tpsa, rot_bonds,\n aromatic_rings, formal_charge, heteroatoms, frac_csp3\n ])\n\nX_screening = np.array(screening_data)\n\n# Predict properties for screening library\nprint('Predicting properties for screening library...')\nscreening_predictions = {}\n\nfor target in target_cols:\n predictions = models[target].predict(X_screening)\n screening_predictions[target] = predictions\n\n# Apply filters for hit identification\nprint('\\nApplying screening filters...')\n\n# Create screening DataFrame\nscreening_df = pd.DataFrame(X_screening, columns=feature_cols)\nfor target in target_cols:\n screening_df[f'pred_{target}'] = screening_predictions[target]\n\n# Apply drug-likeness filters\nscreening_df['lipinski_violations'] = screening_df.apply(lipinski_filter, axis=1)\nscreening_df['veber_violations'] = screening_df.apply(veber_filter, axis=1)\nscreening_df['drug_like'] = (\n (screening_df['lipinski_violations'] == 0) & \n (screening_df['veber_violations'] == 0)\n)\n\n# Activity filters\nactivity_threshold = 6.5 # pIC50 > 6.5 (IC50 < 316 nM)\nsolubility_threshold = -4.0 # logS > -4 (> 0.1 mM)\npermeability_threshold = -5.5 # log Papp > -5.5\n\nhits = screening_df[\n (screening_df['pred_pIC50'] > activity_threshold) &\n (screening_df['pred_logS'] > solubility_threshold) &\n (screening_df['pred_log_Papp'] > permeability_threshold) &\n (screening_df['drug_like'] == True)\n]\n\nprint(f'Screening results:')\nprint(f' Total molecules: {len(screening_df):,}')\nprint(f' Drug-like molecules: {screening_df[\"drug_like\"].sum():,}')\nprint(f' Active hits (pIC50 > {activity_threshold}): {(screening_df[\"pred_pIC50\"] > activity_threshold).sum():,}')\nprint(f' Final hits (all criteria): {len(hits):,}')\nprint(f' Hit rate: {len(hits) / len(screening_df):.1%}')\n\nif len(hits) > 0:\n print(f'\\nTop 5 hits:')\n top_hits = hits.nlargest(5, 'pred_pIC50')\n for idx, hit in top_hits.iterrows():\n print(f' Hit {idx}: pIC50={hit[\"pred_pIC50\"]:.2f}, '\n f'logS={hit[\"pred_logS\"]:.2f}, MW={hit[\"molecular_weight\"]:.0f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 12))\n\n# 1. Property predictions vs true values\nfor i, target in enumerate(['pIC50', 'logS']):\n ax = axes[0, i]\n perf = performance[target]\n \n ax.scatter(perf['true_values'], perf['predictions'], alpha=0.6, s=20)\n \n # Perfect prediction line\n min_val = min(perf['true_values'].min(), perf['predictions'].min())\n max_val = max(perf['true_values'].max(), perf['predictions'].max())\n ax.plot([min_val, max_val], [min_val, max_val], 'r--', alpha=0.8)\n \n ax.set_xlabel(f'True {target}')\n ax.set_ylabel(f'Predicted {target}')\n ax.set_title(f'{target} Prediction (R² = {perf[\"test_r2\"]:.3f})')\n ax.grid(True, alpha=0.3)\n\n# 2. Feature importance heatmap\nax = axes[1, 0]\nimportance_matrix = feature_importance[target_cols].values.T\nim = ax.imshow(importance_matrix, cmap='viridis', aspect='auto')\nax.set_xticks(range(len(feature_cols)))\nax.set_xticklabels(feature_cols, rotation=45, ha='right')\nax.set_yticks(range(len(target_cols)))\nax.set_yticklabels(target_cols)\nax.set_title('Feature Importance Heatmap')\nplt.colorbar(im, ax=ax)\n\n# 3. Virtual screening results\nax = axes[1, 1]\nax.scatter(screening_df['pred_pIC50'], screening_df['pred_logS'], \n alpha=0.3, s=10, color='lightblue', label='All molecules')\nif len(hits) > 0:\n ax.scatter(hits['pred_pIC50'], hits['pred_logS'], \n alpha=0.8, s=30, color='red', label=f'Hits ({len(hits)})')\n\nax.axvline(x=activity_threshold, color='green', linestyle='--', alpha=0.7, \n label=f'pIC50 > {activity_threshold}')\nax.axhline(y=solubility_threshold, color='orange', linestyle='--', alpha=0.7, \n label=f'logS > {solubility_threshold}')\n\nax.set_xlabel('Predicted pIC50')\nax.set_ylabel('Predicted logS')\nax.set_title('Virtual Screening Results')\nax.legend()\nax.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n viz_file = tmp.name\n\nplt.close()\nprint(f'Analysis visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('OPENCHEM DRUG DISCOVERY SUMMARY')\nprint('=' * 45)\nprint(f'Molecules analyzed: {len(df):,}')\nprint(f'Properties predicted: {len(target_cols)}')\nprint(f'Drug-like molecules: {df[\"drug_like\"].sum():,} ({df[\"drug_like\"].mean():.1%})')\nprint(f'\\nModel performance (test R²):')\nfor target in target_cols:\n print(f' {target}: {performance[target][\"test_r2\"]:.3f}')\nprint(f'\\nVirtual screening:')\nprint(f' Molecules screened: {len(screening_df):,}')\nprint(f' Hits identified: {len(hits):,}')\nprint(f' Hit rate: {len(hits) / len(screening_df):.2%}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nOpenChem provides:')\nprint('• Multi-task molecular property prediction')\nprint('• Graph neural networks for molecules')\nprint('• Molecular generation and optimization')\nprint('• ADMET property prediction')\nprint('• Virtual screening capabilities')\nprint('• Integration with PyTorch')\nprint('• Pre-trained models and datasets')\n\nprint('\\nTypical OpenChem workflow:')\nprint('1. Load molecular dataset (SMILES, SDF)')\nprint('2. Generate molecular representations')\nprint('3. Train/load prediction models')\nprint('4. Predict properties for new molecules')\nprint('5. Apply filters for drug discovery')",
265
+ "quick_start": [
266
+ "Install: pip install openchem",
267
+ "Import: from openchem.models import build_model",
268
+ "Data: Load SMILES and properties",
269
+ "Model: Configure and train neural network",
270
+ "Predict: Apply model to new molecules",
271
+ "Filter: Apply drug-likeness criteria"
272
+ ]
273
+ }
274
+ },
275
+ {
276
+ "type": "PackageTool",
277
+ "name": "get_molvs_info",
278
+ "description": "Get information about the molvs package. Molecule validation and standardization",
279
+ "package_name": "molvs",
280
+ "parameter": {
281
+ "type": "object",
282
+ "properties": {},
283
+ "required": []
284
+ },
285
+ "required": []
286
+ },
287
+ {
288
+ "type": "PackageTool",
289
+ "name": "get_chembl_webresource_client_info",
290
+ "description": "Get information about the chembl-webresource-client package. Python client for ChEMBL web services",
291
+ "package_name": "chembl-webresource-client",
292
+ "parameter": {
293
+ "type": "object",
294
+ "properties": {},
295
+ "required": []
296
+ },
297
+ "required": []
298
+ },
299
+ {
300
+ "type": "PackageTool",
301
+ "name": "get_pubchempy_info",
302
+ "description": "Get information about the pubchempy package. Python interface for PubChem REST API",
303
+ "package_name": "pubchempy",
304
+ "parameter": {
305
+ "type": "object",
306
+ "properties": {},
307
+ "required": []
308
+ },
309
+ "required": []
310
+ },
311
+ {
312
+ "type": "PackageTool",
313
+ "name": "get_mordred_info",
314
+ "description": "Get information about the mordred package. Molecular descriptor calculator",
315
+ "package_name": "mordred",
316
+ "parameter": {
317
+ "type": "object",
318
+ "properties": {},
319
+ "required": []
320
+ },
321
+ "required": []
322
+ },
323
+ {
324
+ "type": "PackageTool",
325
+ "name": "get_datamol_info",
326
+ "description": "Get information about the datamol package. Molecular manipulation made easy",
327
+ "package_name": "datamol",
328
+ "parameter": {
329
+ "type": "object",
330
+ "properties": {},
331
+ "required": []
332
+ },
333
+ "required": []
334
+ },
335
+ {
336
+ "type": "PackageTool",
337
+ "name": "get_molfeat_info",
338
+ "description": "Get information about the molfeat package. Simple and robust molecular featurization",
339
+ "package_name": "molfeat",
340
+ "parameter": {
341
+ "type": "object",
342
+ "properties": {},
343
+ "required": []
344
+ },
345
+ "required": []
346
+ }
347
+ ]
@@ -0,0 +1,74 @@
1
+ [
2
+ {
3
+ "type": "PackageTool",
4
+ "name": "get_xesmf_info",
5
+ "description": "Get information about the xesmf package. Universal regridder for geospatial data",
6
+ "package_name": "xesmf",
7
+ "parameter": {
8
+ "type": "object",
9
+ "properties": {},
10
+ "required": []
11
+ },
12
+ "required": []
13
+ },
14
+ {
15
+ "type": "PackageTool",
16
+ "name": "get_rasterio_info",
17
+ "description": "Get information about the rasterio package. Access to geospatial raster data",
18
+ "package_name": "rasterio",
19
+ "parameter": {
20
+ "type": "object",
21
+ "properties": {},
22
+ "required": []
23
+ },
24
+ "required": []
25
+ },
26
+ {
27
+ "type": "PackageTool",
28
+ "name": "get_geopandas_info",
29
+ "description": "Get information about the geopandas package. Geospatial data manipulation and analysis",
30
+ "package_name": "geopandas",
31
+ "parameter": {
32
+ "type": "object",
33
+ "properties": {},
34
+ "required": []
35
+ },
36
+ "required": []
37
+ },
38
+ {
39
+ "type": "PackageTool",
40
+ "name": "get_cartopy_info",
41
+ "description": "Get information about the cartopy package. Cartographic projections and geospatial data processing",
42
+ "package_name": "cartopy",
43
+ "parameter": {
44
+ "type": "object",
45
+ "properties": {},
46
+ "required": []
47
+ },
48
+ "required": []
49
+ },
50
+ {
51
+ "type": "PackageTool",
52
+ "name": "get_netcdf4_info",
53
+ "description": "Get information about the netcdf4 package. Python interface to netCDF C library",
54
+ "package_name": "netcdf4",
55
+ "parameter": {
56
+ "type": "object",
57
+ "properties": {},
58
+ "required": []
59
+ },
60
+ "required": []
61
+ },
62
+ {
63
+ "type": "PackageTool",
64
+ "name": "get_cftime_info",
65
+ "description": "Get information about the cftime package. Time-handling functionality from netcdf4-python",
66
+ "package_name": "cftime",
67
+ "parameter": {
68
+ "type": "object",
69
+ "properties": {},
70
+ "required": []
71
+ },
72
+ "required": []
73
+ }
74
+ ]