tooluniverse 0.1.4__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (187) hide show
  1. tooluniverse/__init__.py +340 -4
  2. tooluniverse/admetai_tool.py +84 -0
  3. tooluniverse/agentic_tool.py +563 -0
  4. tooluniverse/alphafold_tool.py +96 -0
  5. tooluniverse/base_tool.py +129 -6
  6. tooluniverse/boltz_tool.py +207 -0
  7. tooluniverse/chem_tool.py +192 -0
  8. tooluniverse/compose_scripts/__init__.py +1 -0
  9. tooluniverse/compose_scripts/biomarker_discovery.py +293 -0
  10. tooluniverse/compose_scripts/comprehensive_drug_discovery.py +186 -0
  11. tooluniverse/compose_scripts/drug_safety_analyzer.py +89 -0
  12. tooluniverse/compose_scripts/literature_tool.py +34 -0
  13. tooluniverse/compose_scripts/output_summarizer.py +279 -0
  14. tooluniverse/compose_scripts/tool_description_optimizer.py +681 -0
  15. tooluniverse/compose_scripts/tool_discover.py +705 -0
  16. tooluniverse/compose_scripts/tool_graph_composer.py +448 -0
  17. tooluniverse/compose_tool.py +371 -0
  18. tooluniverse/ctg_tool.py +1002 -0
  19. tooluniverse/custom_tool.py +81 -0
  20. tooluniverse/dailymed_tool.py +108 -0
  21. tooluniverse/data/admetai_tools.json +155 -0
  22. tooluniverse/data/agentic_tools.json +1156 -0
  23. tooluniverse/data/alphafold_tools.json +87 -0
  24. tooluniverse/data/boltz_tools.json +9 -0
  25. tooluniverse/data/chembl_tools.json +16 -0
  26. tooluniverse/data/clait_tools.json +108 -0
  27. tooluniverse/data/clinicaltrials_gov_tools.json +326 -0
  28. tooluniverse/data/compose_tools.json +202 -0
  29. tooluniverse/data/dailymed_tools.json +70 -0
  30. tooluniverse/data/dataset_tools.json +646 -0
  31. tooluniverse/data/disease_target_score_tools.json +712 -0
  32. tooluniverse/data/efo_tools.json +17 -0
  33. tooluniverse/data/embedding_tools.json +319 -0
  34. tooluniverse/data/enrichr_tools.json +31 -0
  35. tooluniverse/data/europe_pmc_tools.json +22 -0
  36. tooluniverse/data/expert_feedback_tools.json +10 -0
  37. tooluniverse/data/fda_drug_adverse_event_tools.json +491 -0
  38. tooluniverse/data/fda_drug_labeling_tools.json +544 -168
  39. tooluniverse/data/fda_drugs_with_brand_generic_names_for_tool.py +76929 -148860
  40. tooluniverse/data/finder_tools.json +209 -0
  41. tooluniverse/data/gene_ontology_tools.json +113 -0
  42. tooluniverse/data/gwas_tools.json +1082 -0
  43. tooluniverse/data/hpa_tools.json +333 -0
  44. tooluniverse/data/humanbase_tools.json +47 -0
  45. tooluniverse/data/idmap_tools.json +74 -0
  46. tooluniverse/data/mcp_client_tools_example.json +113 -0
  47. tooluniverse/data/mcpautoloadertool_defaults.json +28 -0
  48. tooluniverse/data/medlineplus_tools.json +141 -0
  49. tooluniverse/data/monarch_tools.json +1 -1
  50. tooluniverse/data/openalex_tools.json +36 -0
  51. tooluniverse/data/opentarget_tools.json +82 -58
  52. tooluniverse/data/output_summarization_tools.json +101 -0
  53. tooluniverse/data/packages/bioinformatics_core_tools.json +1756 -0
  54. tooluniverse/data/packages/categorized_tools.txt +206 -0
  55. tooluniverse/data/packages/cheminformatics_tools.json +347 -0
  56. tooluniverse/data/packages/earth_sciences_tools.json +74 -0
  57. tooluniverse/data/packages/genomics_tools.json +776 -0
  58. tooluniverse/data/packages/image_processing_tools.json +38 -0
  59. tooluniverse/data/packages/machine_learning_tools.json +789 -0
  60. tooluniverse/data/packages/neuroscience_tools.json +62 -0
  61. tooluniverse/data/packages/original_tools.txt +0 -0
  62. tooluniverse/data/packages/physics_astronomy_tools.json +62 -0
  63. tooluniverse/data/packages/scientific_computing_tools.json +560 -0
  64. tooluniverse/data/packages/single_cell_tools.json +453 -0
  65. tooluniverse/data/packages/software_tools.json +4954 -0
  66. tooluniverse/data/packages/structural_biology_tools.json +396 -0
  67. tooluniverse/data/packages/visualization_tools.json +399 -0
  68. tooluniverse/data/pubchem_tools.json +215 -0
  69. tooluniverse/data/pubtator_tools.json +68 -0
  70. tooluniverse/data/rcsb_pdb_tools.json +1332 -0
  71. tooluniverse/data/reactome_tools.json +19 -0
  72. tooluniverse/data/semantic_scholar_tools.json +26 -0
  73. tooluniverse/data/special_tools.json +2 -25
  74. tooluniverse/data/tool_composition_tools.json +88 -0
  75. tooluniverse/data/toolfinderkeyword_defaults.json +34 -0
  76. tooluniverse/data/txagent_client_tools.json +9 -0
  77. tooluniverse/data/uniprot_tools.json +211 -0
  78. tooluniverse/data/url_fetch_tools.json +94 -0
  79. tooluniverse/data/uspto_downloader_tools.json +9 -0
  80. tooluniverse/data/uspto_tools.json +811 -0
  81. tooluniverse/data/xml_tools.json +3275 -0
  82. tooluniverse/dataset_tool.py +296 -0
  83. tooluniverse/default_config.py +165 -0
  84. tooluniverse/efo_tool.py +42 -0
  85. tooluniverse/embedding_database.py +630 -0
  86. tooluniverse/embedding_sync.py +396 -0
  87. tooluniverse/enrichr_tool.py +266 -0
  88. tooluniverse/europe_pmc_tool.py +52 -0
  89. tooluniverse/execute_function.py +1775 -95
  90. tooluniverse/extended_hooks.py +444 -0
  91. tooluniverse/gene_ontology_tool.py +194 -0
  92. tooluniverse/graphql_tool.py +158 -36
  93. tooluniverse/gwas_tool.py +358 -0
  94. tooluniverse/hpa_tool.py +1645 -0
  95. tooluniverse/humanbase_tool.py +389 -0
  96. tooluniverse/logging_config.py +254 -0
  97. tooluniverse/mcp_client_tool.py +764 -0
  98. tooluniverse/mcp_integration.py +413 -0
  99. tooluniverse/mcp_tool_registry.py +925 -0
  100. tooluniverse/medlineplus_tool.py +337 -0
  101. tooluniverse/openalex_tool.py +228 -0
  102. tooluniverse/openfda_adv_tool.py +283 -0
  103. tooluniverse/openfda_tool.py +393 -160
  104. tooluniverse/output_hook.py +1122 -0
  105. tooluniverse/package_tool.py +195 -0
  106. tooluniverse/pubchem_tool.py +158 -0
  107. tooluniverse/pubtator_tool.py +168 -0
  108. tooluniverse/rcsb_pdb_tool.py +38 -0
  109. tooluniverse/reactome_tool.py +108 -0
  110. tooluniverse/remote/boltz/boltz_mcp_server.py +50 -0
  111. tooluniverse/remote/depmap_24q2/depmap_24q2_mcp_tool.py +442 -0
  112. tooluniverse/remote/expert_feedback/human_expert_mcp_tools.py +2013 -0
  113. tooluniverse/remote/expert_feedback/simple_test.py +23 -0
  114. tooluniverse/remote/expert_feedback/start_web_interface.py +188 -0
  115. tooluniverse/remote/expert_feedback/web_only_interface.py +0 -0
  116. tooluniverse/remote/expert_feedback_mcp/human_expert_mcp_server.py +1611 -0
  117. tooluniverse/remote/expert_feedback_mcp/simple_test.py +34 -0
  118. tooluniverse/remote/expert_feedback_mcp/start_web_interface.py +91 -0
  119. tooluniverse/remote/immune_compass/compass_tool.py +327 -0
  120. tooluniverse/remote/pinnacle/pinnacle_tool.py +328 -0
  121. tooluniverse/remote/transcriptformer/transcriptformer_tool.py +586 -0
  122. tooluniverse/remote/uspto_downloader/uspto_downloader_mcp_server.py +61 -0
  123. tooluniverse/remote/uspto_downloader/uspto_downloader_tool.py +120 -0
  124. tooluniverse/remote_tool.py +99 -0
  125. tooluniverse/restful_tool.py +53 -30
  126. tooluniverse/scripts/generate_tool_graph.py +408 -0
  127. tooluniverse/scripts/visualize_tool_graph.py +829 -0
  128. tooluniverse/semantic_scholar_tool.py +62 -0
  129. tooluniverse/smcp.py +2452 -0
  130. tooluniverse/smcp_server.py +975 -0
  131. tooluniverse/test/mcp_server_test.py +0 -0
  132. tooluniverse/test/test_admetai_tool.py +370 -0
  133. tooluniverse/test/test_agentic_tool.py +129 -0
  134. tooluniverse/test/test_alphafold_tool.py +71 -0
  135. tooluniverse/test/test_chem_tool.py +37 -0
  136. tooluniverse/test/test_compose_lieraturereview.py +63 -0
  137. tooluniverse/test/test_compose_tool.py +448 -0
  138. tooluniverse/test/test_dailymed.py +69 -0
  139. tooluniverse/test/test_dataset_tool.py +200 -0
  140. tooluniverse/test/test_disease_target_score.py +56 -0
  141. tooluniverse/test/test_drugbank_filter_examples.py +179 -0
  142. tooluniverse/test/test_efo.py +31 -0
  143. tooluniverse/test/test_enrichr_tool.py +21 -0
  144. tooluniverse/test/test_europe_pmc_tool.py +20 -0
  145. tooluniverse/test/test_fda_adv.py +95 -0
  146. tooluniverse/test/test_fda_drug_labeling.py +91 -0
  147. tooluniverse/test/test_gene_ontology_tools.py +66 -0
  148. tooluniverse/test/test_gwas_tool.py +139 -0
  149. tooluniverse/test/test_hpa.py +625 -0
  150. tooluniverse/test/test_humanbase_tool.py +20 -0
  151. tooluniverse/test/test_idmap_tools.py +61 -0
  152. tooluniverse/test/test_mcp_server.py +211 -0
  153. tooluniverse/test/test_mcp_tool.py +247 -0
  154. tooluniverse/test/test_medlineplus.py +220 -0
  155. tooluniverse/test/test_openalex_tool.py +32 -0
  156. tooluniverse/test/test_opentargets.py +28 -0
  157. tooluniverse/test/test_pubchem_tool.py +116 -0
  158. tooluniverse/test/test_pubtator_tool.py +37 -0
  159. tooluniverse/test/test_rcsb_pdb_tool.py +86 -0
  160. tooluniverse/test/test_reactome.py +54 -0
  161. tooluniverse/test/test_semantic_scholar_tool.py +24 -0
  162. tooluniverse/test/test_software_tools.py +147 -0
  163. tooluniverse/test/test_tool_description_optimizer.py +49 -0
  164. tooluniverse/test/test_tool_finder.py +26 -0
  165. tooluniverse/test/test_tool_finder_llm.py +252 -0
  166. tooluniverse/test/test_tools_find.py +195 -0
  167. tooluniverse/test/test_uniprot_tools.py +74 -0
  168. tooluniverse/test/test_uspto_tool.py +72 -0
  169. tooluniverse/test/test_xml_tool.py +113 -0
  170. tooluniverse/tool_finder_embedding.py +267 -0
  171. tooluniverse/tool_finder_keyword.py +693 -0
  172. tooluniverse/tool_finder_llm.py +699 -0
  173. tooluniverse/tool_graph_web_ui.py +955 -0
  174. tooluniverse/tool_registry.py +416 -0
  175. tooluniverse/uniprot_tool.py +155 -0
  176. tooluniverse/url_tool.py +253 -0
  177. tooluniverse/uspto_tool.py +240 -0
  178. tooluniverse/utils.py +369 -41
  179. tooluniverse/xml_tool.py +369 -0
  180. tooluniverse-1.0.0.dist-info/METADATA +377 -0
  181. tooluniverse-1.0.0.dist-info/RECORD +186 -0
  182. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/WHEEL +1 -1
  183. tooluniverse-1.0.0.dist-info/entry_points.txt +9 -0
  184. tooluniverse-0.1.4.dist-info/METADATA +0 -141
  185. tooluniverse-0.1.4.dist-info/RECORD +0 -18
  186. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/licenses/LICENSE +0 -0
  187. {tooluniverse-0.1.4.dist-info → tooluniverse-1.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,776 @@
1
+ [
2
+ {
3
+ "type": "PackageTool",
4
+ "name": "get_pysam_info",
5
+ "description": "Get comprehensive information about pysam – interface to SAM/BAM/CRAM files",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "include_examples": {
10
+ "type": "boolean",
11
+ "description": "Whether to include usage examples and quick start guide",
12
+ "default": true
13
+ }
14
+ }
15
+ },
16
+ "package_name": "pysam",
17
+ "local_info": {
18
+ "name": "pysam",
19
+ "description": "Python interface for reading, writing and manipulating SAM/BAM/CRAM files and other genomics file formats. Built on top of htslib and provides efficient access to sequencing data.",
20
+ "category": "Genomics File I/O",
21
+ "import_name": "pysam",
22
+ "popularity": 88,
23
+ "keywords": [
24
+ "SAM/BAM files",
25
+ "genomics",
26
+ "sequencing data",
27
+ "file I/O",
28
+ "htslib"
29
+ ],
30
+ "documentation": "https://pysam.readthedocs.io/",
31
+ "repository": "https://github.com/pysam-developers/pysam",
32
+ "installation": {
33
+ "pip": "pip install pysam",
34
+ "conda": "conda install -c bioconda pysam"
35
+ },
36
+ "usage_example": "import pysam\n\n# Read BAM file\nbamfile = pysam.AlignmentFile('example.bam', 'rb')\n\n# Iterate through reads\nfor read in bamfile.fetch('chr1', 1000, 2000):\n print(f'{read.query_name}: {read.reference_start}-{read.reference_end}')\n \n# Get coverage\nfor pileupcolumn in bamfile.pileup('chr1', 1000, 2000):\n print(f'Position {pileupcolumn.pos}: coverage {pileupcolumn.n}')\n \nbamfile.close()",
37
+ "quick_start": [
38
+ "Install: pip install pysam",
39
+ "Import: import pysam",
40
+ "Open file: bamfile = pysam.AlignmentFile('file.bam', 'rb')",
41
+ "Fetch reads: bamfile.fetch('chr1', start, end)",
42
+ "Analyze: get coverage, extract sequences"
43
+ ]
44
+ }
45
+ },
46
+ {
47
+ "type": "PackageTool",
48
+ "name": "get_pyfaidx_info",
49
+ "description": "Get comprehensive information about pyfaidx – efficient FASTA file indexing and random access",
50
+ "parameter": {
51
+ "type": "object",
52
+ "properties": {
53
+ "include_examples": {
54
+ "type": "boolean",
55
+ "description": "Whether to include usage examples and quick start guide",
56
+ "default": true
57
+ }
58
+ }
59
+ },
60
+ "package_name": "pyfaidx",
61
+ "local_info": {
62
+ "name": "pyfaidx",
63
+ "description": "Pure Python implementation of samtools faidx FASTA indexing for efficient random access to large FASTA files. Supports sequence slicing and extraction without loading entire files into memory.",
64
+ "category": "Genomics File I/O",
65
+ "import_name": "pyfaidx",
66
+ "popularity": 75,
67
+ "keywords": [
68
+ "FASTA files",
69
+ "indexing",
70
+ "random access",
71
+ "sequence extraction",
72
+ "samtools"
73
+ ],
74
+ "documentation": "https://github.com/mdshw5/pyfaidx",
75
+ "repository": "https://github.com/mdshw5/pyfaidx",
76
+ "installation": {
77
+ "pip": "pip install pyfaidx",
78
+ "conda": "conda install -c bioconda pyfaidx"
79
+ },
80
+ "usage_example": "from pyfaidx import Fasta\n\n# Open FASTA file\nfa = Fasta('genome.fa')\n\n# Extract sequences\nseq = fa['chr1'][1000:2000] # Get bases 1000-2000\nprint(seq)\n\n# Get chromosome names\nprint(list(fa.keys()))\n\n# Get sequence length\nprint(len(fa['chr1']))",
81
+ "quick_start": [
82
+ "Install: pip install pyfaidx",
83
+ "Open FASTA: fa = Fasta('genome.fa')",
84
+ "Extract sequence: fa['chr1'][start:end]",
85
+ "List contigs: list(fa.keys())",
86
+ "Command line: faidx genome.fa chr1:1000-2000"
87
+ ]
88
+ }
89
+ },
90
+ {
91
+ "type": "PackageTool",
92
+ "name": "get_pyranges_info",
93
+ "description": "Get comprehensive information about PyRanges – efficient genomic interval operations",
94
+ "parameter": {
95
+ "type": "object",
96
+ "properties": {
97
+ "include_examples": {
98
+ "type": "boolean",
99
+ "description": "Whether to include usage examples and quick start guide",
100
+ "default": true
101
+ }
102
+ }
103
+ },
104
+ "package_name": "pyranges",
105
+ "local_info": {
106
+ "name": "PyRanges",
107
+ "description": "Efficient and intuitive genomic interval operations in Python. Provides fast set operations, intersection, and manipulation of genomic ranges with pandas-like syntax.",
108
+ "category": "Genomic Intervals",
109
+ "import_name": "pyranges",
110
+ "popularity": 78,
111
+ "keywords": [
112
+ "genomic intervals",
113
+ "interval operations",
114
+ "BED files",
115
+ "GTF files",
116
+ "set operations"
117
+ ],
118
+ "documentation": "https://pyranges.readthedocs.io/",
119
+ "repository": "https://github.com/pyranges/pyranges",
120
+ "installation": {
121
+ "pip": "pip install pyranges",
122
+ "conda": "conda install -c bioconda pyranges"
123
+ },
124
+ "usage_example": "import pyranges as pr\n\n# Read BED file\nbedfile = pr.read_bed('genes.bed')\n\n# Create intervals\nintervals = pr.PyRanges(chromosomes=['chr1', 'chr2'],\n starts=[100, 200],\n ends=[200, 300])\n\n# Intersection\noverlaps = bedfile.intersect(intervals)\nprint(overlaps)\n\n# Nearest features\nnearest = bedfile.nearest(intervals)\nprint(nearest)",
125
+ "quick_start": [
126
+ "Install: pip install pyranges",
127
+ "Read files: pr.read_bed('file.bed'), pr.read_gtf('file.gtf')",
128
+ "Create ranges: pr.PyRanges(chromosomes, starts, ends)",
129
+ "Operations: intersect(), subtract(), nearest()",
130
+ "Filter and manipulate with pandas-like syntax"
131
+ ]
132
+ }
133
+ },
134
+ {
135
+ "type": "PackageTool",
136
+ "name": "get_pybedtools_info",
137
+ "description": "Get comprehensive information about pybedtools – Python wrapper for BEDTools",
138
+ "parameter": {
139
+ "type": "object",
140
+ "properties": {
141
+ "include_examples": {
142
+ "type": "boolean",
143
+ "description": "Whether to include usage examples and quick start guide",
144
+ "default": true
145
+ }
146
+ }
147
+ },
148
+ "package_name": "pybedtools",
149
+ "local_info": {
150
+ "name": "pybedtools",
151
+ "description": "Python wrapper for Aaron Quinlan's BEDTools suite. Provides intuitive Python interface for genomic interval operations, sequence analysis, and file format conversions.",
152
+ "category": "Genomic Intervals",
153
+ "import_name": "pybedtools",
154
+ "popularity": 82,
155
+ "keywords": [
156
+ "BEDTools",
157
+ "genomic intervals",
158
+ "interval operations",
159
+ "sequence analysis",
160
+ "file conversion"
161
+ ],
162
+ "documentation": "https://daler.github.io/pybedtools/",
163
+ "repository": "https://github.com/daler/pybedtools",
164
+ "installation": {
165
+ "pip": "pip install pybedtools",
166
+ "conda": "conda install -c bioconda pybedtools"
167
+ },
168
+ "usage_example": "import pybedtools\n\n# Create BedTool objects\na = pybedtools.BedTool('a.bed')\nb = pybedtools.BedTool('b.bed')\n\n# Intersection\nintersection = a.intersect(b)\n\n# Subtract\nsubtracted = a.subtract(b)\n\n# Closest features\nclosest = a.closest(b)\n\n# Save results\nintersection.saveas('intersection.bed')",
169
+ "quick_start": [
170
+ "Install: pip install pybedtools",
171
+ "Create objects: pybedtools.BedTool('file.bed')",
172
+ "Operations: intersect(), subtract(), closest()",
173
+ "Chain operations: a.intersect(b).subtract(c)",
174
+ "Save results: result.saveas('output.bed')"
175
+ ]
176
+ }
177
+ },
178
+ {
179
+ "type": "PackageTool",
180
+ "name": "get_pyliftover_info",
181
+ "description": "Get comprehensive information about PyLiftover – genomic coordinate conversion between assemblies",
182
+ "parameter": {
183
+ "type": "object",
184
+ "properties": {
185
+ "include_examples": {
186
+ "type": "boolean",
187
+ "description": "Whether to include usage examples and quick start guide",
188
+ "default": true
189
+ }
190
+ }
191
+ },
192
+ "package_name": "pyliftover",
193
+ "local_info": {
194
+ "name": "PyLiftover",
195
+ "description": "Pure Python implementation of UCSC liftOver for quick and easy conversion of genomic coordinates between different genome assemblies.",
196
+ "category": "Genomics Tools",
197
+ "import_name": "pyliftover",
198
+ "popularity": 65,
199
+ "keywords": [
200
+ "liftover",
201
+ "coordinate conversion",
202
+ "genome assemblies",
203
+ "UCSC",
204
+ "genomic coordinates"
205
+ ],
206
+ "documentation": "https://github.com/konstantint/pyliftover",
207
+ "repository": "https://github.com/konstantint/pyliftover",
208
+ "installation": {
209
+ "pip": "pip install pyliftover",
210
+ "conda": "conda install -c bioconda pyliftover"
211
+ },
212
+ "usage_example": "from pyliftover import LiftOver\n\n# Initialize liftover from hg19 to hg38\nlo = LiftOver('hg19', 'hg38')\n\n# Convert coordinates\nresult = lo.convert_coordinate('chr1', 1000000)\nif result:\n new_chr, new_pos, new_strand = result[0]\n print(f'hg19 chr1:1000000 -> hg38 {new_chr}:{new_pos}')\nelse:\n print('Coordinate could not be lifted over')",
213
+ "quick_start": [
214
+ "Install: pip install pyliftover",
215
+ "Initialize: lo = LiftOver('hg19', 'hg38')",
216
+ "Convert: result = lo.convert_coordinate('chr1', pos)",
217
+ "Check result: if result: new_chr, new_pos = result[0]",
218
+ "Batch convert: Use loops for multiple coordinates"
219
+ ]
220
+ }
221
+ },
222
+ {
223
+ "type": "PackageTool",
224
+ "name": "get_deeptools_info",
225
+ "description": "Get comprehensive information about deepTools – deep sequencing data processing",
226
+ "parameter": {
227
+ "type": "object",
228
+ "properties": {
229
+ "info_type": {
230
+ "type": "string",
231
+ "enum": [
232
+ "overview",
233
+ "installation",
234
+ "usage",
235
+ "documentation"
236
+ ],
237
+ "description": "Type of information to retrieve about deepTools"
238
+ }
239
+ },
240
+ "required": [
241
+ "info_type"
242
+ ]
243
+ },
244
+ "package_name": "deeptools",
245
+ "local_info": {
246
+ "name": "deepTools",
247
+ "description": "Tools to process and analyze deep sequencing data, particularly for ChIP-seq, RNA-seq, and ATAC-seq experiments. Provides normalization, visualization, and quality control functions.",
248
+ "category": "Genomics / NGS Analysis",
249
+ "import_name": "deeptools",
250
+ "popularity": 80,
251
+ "keywords": [
252
+ "ChIP-seq",
253
+ "RNA-seq",
254
+ "ATAC-seq",
255
+ "NGS",
256
+ "normalization",
257
+ "visualization"
258
+ ],
259
+ "documentation": "https://deeptools.readthedocs.io/",
260
+ "repository": "https://github.com/deeptools/deepTools",
261
+ "installation": {
262
+ "pip": "pip install deeptools",
263
+ "conda": "conda install -c conda-forge deeptools"
264
+ },
265
+ "usage_example": "# Command line tools:\n# Convert BAM to bigWig\nbamCoverage -b input.bam -o output.bw\n\n# Compute correlation matrix\nmultiBigwigSummary bins -b file1.bw file2.bw -o matrix.npz\nplotCorrelation -in matrix.npz -o correlation.png\n\n# Create heatmap around TSS\ncomputeMatrix reference-point -S signals.bw -R genes.bed -o matrix.gz\nplotHeatmap -m matrix.gz -o heatmap.png",
266
+ "quick_start": [
267
+ "Install: pip install deeptools",
268
+ "BAM to bigWig: bamCoverage -b file.bam -o file.bw",
269
+ "Compute matrix: computeMatrix reference-point -S file.bw -R regions.bed",
270
+ "Plot heatmap: plotHeatmap -m matrix.gz -o heatmap.png",
271
+ "Quality control: plotFingerprint -b *.bam",
272
+ "Correlation: plotCorrelation -in matrix.npz"
273
+ ]
274
+ }
275
+ },
276
+ {
277
+ "type": "PackageTool",
278
+ "name": "get_clair3_info",
279
+ "description": "Get comprehensive information about Clair3 – variant calling for long-read sequencing",
280
+ "parameter": {
281
+ "type": "object",
282
+ "properties": {
283
+ "info_type": {
284
+ "type": "string",
285
+ "enum": [
286
+ "overview",
287
+ "installation",
288
+ "usage",
289
+ "documentation"
290
+ ],
291
+ "description": "Type of information to retrieve about Clair3"
292
+ }
293
+ },
294
+ "required": [
295
+ "info_type"
296
+ ]
297
+ },
298
+ "package_name": "clair3",
299
+ "local_info": {
300
+ "name": "Clair3",
301
+ "description": "Symphonizing pileup and full-alignment for high-performance long-read variant calling. Uses deep learning for accurate SNP and indel detection from PacBio and Oxford Nanopore sequencing data.",
302
+ "category": "Genomics / Variant Calling",
303
+ "import_name": "clair3",
304
+ "popularity": 75,
305
+ "keywords": [
306
+ "variant calling",
307
+ "long-read sequencing",
308
+ "deep learning",
309
+ "PacBio",
310
+ "Oxford Nanopore"
311
+ ],
312
+ "documentation": "https://github.com/HKU-BAL/Clair3",
313
+ "repository": "https://github.com/HKU-BAL/Clair3",
314
+ "installation": {
315
+ "pip": "pip install clair3",
316
+ "conda": "conda install -c conda-forge clair3"
317
+ },
318
+ "usage_example": "# Command line usage:\n# Run Clair3 for variant calling\nrun_clair3.sh \\\n --bam_fn=input.bam \\\n --ref_fn=reference.fa \\\n --threads=8 \\\n --platform=ont \\\n --model_path=ont_guppy5 \\\n --output=output_dir\n\n# For PacBio data:\nrun_clair3.sh \\\n --bam_fn=input.bam \\\n --ref_fn=reference.fa \\\n --threads=8 \\\n --platform=hifi \\\n --model_path=hifi \\\n --output=output_dir",
319
+ "quick_start": [
320
+ "Install: conda install -c conda-forge clair3",
321
+ "Prepare: BAM file, reference genome, choose platform",
322
+ "Run: run_clair3.sh --bam_fn input.bam --ref_fn ref.fa",
323
+ "Platform: --platform=ont (Nanopore) or hifi (PacBio)",
324
+ "Output: VCF files with variants and quality scores"
325
+ ]
326
+ }
327
+ },
328
+ {
329
+ "type": "PackageTool",
330
+ "name": "get_arboreto_info",
331
+ "description": "Get comprehensive information about Arboreto – gene regulatory network inference",
332
+ "parameter": {
333
+ "type": "object",
334
+ "properties": {
335
+ "info_type": {
336
+ "type": "string",
337
+ "enum": [
338
+ "overview",
339
+ "installation",
340
+ "usage",
341
+ "documentation"
342
+ ],
343
+ "description": "Type of information to retrieve about Arboreto"
344
+ }
345
+ },
346
+ "required": [
347
+ "info_type"
348
+ ]
349
+ },
350
+ "package_name": "arboreto",
351
+ "local_info": {
352
+ "name": "Arboreto",
353
+ "description": "Scalable gene regulatory network inference using tree-based ensemble methods. Implements GRNBoost2 and other algorithms for inferring gene regulatory networks from expression data, designed for single-cell and bulk RNA-seq.",
354
+ "category": "Gene Regulatory Networks",
355
+ "import_name": "arboreto",
356
+ "popularity": 72,
357
+ "keywords": [
358
+ "gene regulatory networks",
359
+ "GRNBoost2",
360
+ "tree ensemble",
361
+ "transcription factors",
362
+ "network inference"
363
+ ],
364
+ "documentation": "https://arboreto.readthedocs.io/",
365
+ "repository": "https://github.com/aertslab/arboreto",
366
+ "installation": {
367
+ "pip": "pip install arboreto",
368
+ "conda": "conda install -c bioconda arboreto"
369
+ },
370
+ "usage_example": "import pandas as pd\nimport numpy as np\nfrom arboreto.algo import grnboost2, genie3\nfrom arboreto.utils import load_tf_names\n\n# Create sample expression data (genes x samples)\nnp.random.seed(42)\ngenes = [f'Gene_{i}' for i in range(100)]\nsamples = [f'Sample_{i}' for i in range(50)]\nexpression_data = pd.DataFrame(\n np.random.lognormal(1, 1, (100, 50)),\n index=genes,\n columns=samples\n)\n\nprint(f'Expression data shape: {expression_data.shape}')\n\n# Define transcription factors (subset of genes)\ntf_names = genes[:20] # First 20 genes as TFs\nprint(f'Number of TFs: {len(tf_names)}')\n\n# Run GRNBoost2 algorithm\nnetwork = grnboost2(\n expression_data=expression_data,\n tf_names=tf_names,\n verbose=True\n)\n\nprint(f'Inferred network shape: {network.shape}')\nprint('Top 10 regulatory interactions:')\nprint(network.head(10))\n\n# Filter network by importance threshold\nthreshold = network['importance'].quantile(0.95)\nfiltered_network = network[network['importance'] >= threshold]\nprint(f'High-confidence interactions: {len(filtered_network)}')\n\n# Alternative: Use GENIE3 algorithm\n# network_genie3 = genie3(\n# expression_data=expression_data,\n# tf_names=tf_names\n# )",
371
+ "quick_start": [
372
+ "Install: pip install arboreto",
373
+ "Prepare expression matrix (genes x samples)",
374
+ "Define TF list: tf_names = ['TF1', 'TF2', ...]",
375
+ "Run GRNBoost2: grnboost2(expression_data, tf_names)",
376
+ "Filter by importance: network[network['importance'] > threshold]",
377
+ "Export for downstream analysis (e.g., pySCENIC)"
378
+ ]
379
+ }
380
+ },
381
+ {
382
+ "type": "PackageTool",
383
+ "name": "get_cyvcf2_info",
384
+ "description": "Get comprehensive information about cyvcf2 – fast VCF/BCF file processing",
385
+ "parameter": {
386
+ "type": "object",
387
+ "properties": {
388
+ "include_examples": {
389
+ "type": "boolean",
390
+ "description": "Whether to include usage examples and quick start guide",
391
+ "default": true
392
+ }
393
+ }
394
+ },
395
+ "package_name": "cyvcf2",
396
+ "local_info": {
397
+ "name": "cyvcf2",
398
+ "description": "Fast Python library for reading and writing VCF and BCF files using Cython and htslib. Provides efficient access to variant call format data.",
399
+ "category": "Genomics File I/O",
400
+ "import_name": "cyvcf2",
401
+ "popularity": 75,
402
+ "keywords": [
403
+ "VCF",
404
+ "BCF",
405
+ "variant calls",
406
+ "genomics",
407
+ "file parsing"
408
+ ],
409
+ "documentation": "https://github.com/brentp/cyvcf2",
410
+ "repository": "https://github.com/brentp/cyvcf2",
411
+ "installation": {
412
+ "pip": "pip install cyvcf2",
413
+ "conda": "conda install -c bioconda cyvcf2"
414
+ },
415
+ "usage_example": "from cyvcf2 import VCF\n\n# Open VCF file\nvcf = VCF('variants.vcf.gz')\n\n# Iterate through variants\nfor variant in vcf:\n print(f'Position: {variant.CHROM}:{variant.POS}')\n print(f'REF: {variant.REF}, ALT: {variant.ALT}')\n print(f'Quality: {variant.QUAL}')\n print(f'Genotypes: {variant.gt_types}')\n \n # Access INFO fields\n if 'AF' in variant.INFO:\n print(f'Allele frequency: {variant.INFO.get(\"AF\")}')\n \n # Filter by quality\n if variant.QUAL > 30:\n print('High quality variant')\n \n break # Just show first variant",
416
+ "quick_start": [
417
+ "1. Install cyvcf2: pip install cyvcf2",
418
+ "2. Import: from cyvcf2 import VCF",
419
+ "3. Open file: vcf = VCF('variants.vcf.gz')",
420
+ "4. Iterate: for variant in vcf:",
421
+ "5. Access: variant.CHROM, variant.POS, variant.gt_types"
422
+ ]
423
+ }
424
+ },
425
+ {
426
+ "type": "PackageTool",
427
+ "name": "get_gseapy_info",
428
+ "description": "Get comprehensive information about GSEApy – Gene Set Enrichment Analysis in Python",
429
+ "parameter": {
430
+ "type": "object",
431
+ "properties": {
432
+ "include_examples": {
433
+ "type": "boolean",
434
+ "description": "Whether to include usage examples and quick start guide",
435
+ "default": true
436
+ }
437
+ }
438
+ },
439
+ "package_name": "gseapy",
440
+ "local_info": {
441
+ "name": "GSEApy",
442
+ "description": "Python implementation of Gene Set Enrichment Analysis (GSEA) and gene set analysis tools. Supports multiple databases and provides statistical testing for functional enrichment.",
443
+ "category": "Functional Genomics",
444
+ "import_name": "gseapy",
445
+ "popularity": 75,
446
+ "keywords": [
447
+ "gene set enrichment",
448
+ "GSEA",
449
+ "pathway analysis",
450
+ "functional genomics",
451
+ "GO terms"
452
+ ],
453
+ "documentation": "https://gseapy.readthedocs.io/",
454
+ "repository": "https://github.com/zqfang/GSEApy",
455
+ "installation": {
456
+ "pip": "pip install gseapy",
457
+ "conda": "conda install -c bioconda gseapy"
458
+ },
459
+ "usage_example": "import gseapy as gp\nimport pandas as pd\n\n# Gene list enrichment analysis\ngene_list = ['TP53', 'BRCA1', 'BRCA2', 'ATM', 'CHEK2']\nenr = gp.enrichr(gene_list=gene_list,\n gene_sets='GO_Biological_Process_2021',\n organism='Human')\nprint(enr.results.head())\n\n# GSEA preranked analysis\nranked_genes = pd.Series([3.2, 2.1, -1.5, -2.3], \n index=['GENE1', 'GENE2', 'GENE3', 'GENE4'])\ngsea_res = gp.prerank(rnk=ranked_genes, gene_sets='KEGG_2021_Human')",
460
+ "quick_start": [
461
+ "1. Install GSEApy: pip install gseapy",
462
+ "2. Import: import gseapy as gp",
463
+ "3. Enrichment: gp.enrichr(gene_list, gene_sets)",
464
+ "4. GSEA: gp.prerank(ranked_genes, gene_sets)",
465
+ "5. Visualize: plot enrichment results"
466
+ ]
467
+ }
468
+ },
469
+ {
470
+ "type": "PackageTool",
471
+ "name": "get_jcvi_info",
472
+ "description": "Get comprehensive information about JCVI – genome assembly and comparative genomics",
473
+ "parameter": {
474
+ "type": "object",
475
+ "properties": {
476
+ "info_type": {
477
+ "type": "string",
478
+ "enum": [
479
+ "overview",
480
+ "installation",
481
+ "usage",
482
+ "documentation"
483
+ ],
484
+ "description": "Type of information to retrieve about JCVI"
485
+ }
486
+ },
487
+ "required": [
488
+ "info_type"
489
+ ]
490
+ },
491
+ "package_name": "jcvi",
492
+ "local_info": {
493
+ "name": "JCVI",
494
+ "description": "Python library to facilitate genome assembly, annotation, and comparative genomics. Provides tools for synteny analysis, genome visualization, and phylogenetic studies.",
495
+ "category": "Comparative Genomics / Assembly",
496
+ "import_name": "jcvi",
497
+ "popularity": 75,
498
+ "keywords": [
499
+ "genome assembly",
500
+ "comparative genomics",
501
+ "synteny",
502
+ "annotation",
503
+ "phylogenetics"
504
+ ],
505
+ "documentation": "https://github.com/tanghaibao/jcvi/wiki",
506
+ "repository": "https://github.com/tanghaibao/jcvi",
507
+ "installation": {
508
+ "pip": "pip install jcvi",
509
+ "conda": "conda install -c conda-forge jcvi"
510
+ },
511
+ "usage_example": "# JCVI is primarily a command-line tool suite\n# Here's how to use it programmatically\n\nimport tempfile\nimport os\nfrom jcvi.formats.fasta import Fasta\nfrom jcvi.formats.bed import Bed\nfrom jcvi.compara.synteny import quota_align\nimport subprocess\n\n# Create sample data for demonstration\nwith tempfile.TemporaryDirectory() as temp_dir:\n print(f'Working in temporary directory: {temp_dir}')\n \n # Create sample FASTA files\n fasta1_file = os.path.join(temp_dir, 'genome1.fasta')\n fasta2_file = os.path.join(temp_dir, 'genome2.fasta')\n \n # Sample sequences (simplified for demo)\n sequences1 = {\n 'chr1': 'ATCGATCGATCGATCGATCGATCGATCGAAAAATTTTTGGGGCCCCATCGATCGATCG',\n 'chr2': 'GCTAGCTAGCTAGCTAGCTAGCTAGCTTTTTAAAAACCCCGGGGCTAGCTAGCTAGCT',\n 'chr3': 'TTTTAAAACCCCGGGGTTTTAAAACCCCGGGGAAAAATTTTCCCCGGGGAAAAATTTT'\n }\n \n sequences2 = {\n 'chr1': 'ATCGATCGATCGATCGATCGATCGATCGAAAAATTTTTGGGGCCCCATCGATCGATCG',\n 'chr2': 'GCTAGCTAGCTAGCTAGCTAGCTAGCTTTTTAAAAACCCCGGGGCTAGCTAGCTAGCT',\n 'chr3': 'TTTTAAAACCCCGGGGTTTTAAAACCCCGGGGAAAAATTTTCCCCGGGGAAAAATTTT'\n }\n \n with open(fasta1_file, 'w') as f:\n for name, seq in sequences1.items():\n f.write(f'>{name}\\n{seq}\\n')\n \n with open(fasta2_file, 'w') as f:\n for name, seq in sequences2.items():\n f.write(f'>{name}\\n{seq}\\n')\n \n print('Created sample genome FASTA files')\n \n # Load FASTA files using JCVI\n print('\\n=== FASTA Analysis ===')\n fasta1 = Fasta(fasta1_file)\n fasta2 = Fasta(fasta2_file)\n \n print(f'Genome 1: {len(fasta1)} sequences')\n print(f'Genome 2: {len(fasta2)} sequences')\n \n # Analyze sequence statistics\n for name in ['genome1', 'genome2']:\n fasta = fasta1 if name == 'genome1' else fasta2\n total_length = sum(len(seq) for seq in fasta.values())\n avg_length = total_length / len(fasta) if fasta else 0\n \n print(f'\\n{name.capitalize()} statistics:')\n print(f' Total length: {total_length} bp')\n print(f' Average sequence length: {avg_length:.1f} bp')\n print(f' Sequences: {list(fasta.keys())}')\n \n # Create sample BED files for features\n bed1_file = os.path.join(temp_dir, 'features1.bed')\n bed2_file = os.path.join(temp_dir, 'features2.bed')\n \n # Sample features (genes/annotations)\n features1 = [\n ['chr1', '10', '30', 'gene1', '0', '+'],\n ['chr1', '40', '55', 'gene2', '0', '-'],\n ['chr2', '15', '35', 'gene3', '0', '+'],\n ['chr3', '5', '25', 'gene4', '0', '+']\n ]\n \n features2 = [\n ['chr1', '12', '32', 'gene1_ortho', '0', '+'],\n ['chr1', '42', '57', 'gene2_ortho', '0', '-'],\n ['chr2', '17', '37', 'gene3_ortho', '0', '+'],\n ['chr3', '7', '27', 'gene4_ortho', '0', '+']\n ]\n \n with open(bed1_file, 'w') as f:\n for feature in features1:\n f.write('\\t'.join(feature) + '\\n')\n \n with open(bed2_file, 'w') as f:\n for feature in features2:\n f.write('\\t'.join(feature) + '\\n')\n \n print('\\n=== BED Analysis ===')\n bed1 = Bed(bed1_file)\n bed2 = Bed(bed2_file)\n \n print(f'Features in genome1: {len(bed1)}')\n print(f'Features in genome2: {len(bed2)}')\n \n # Analyze feature distribution\n for name, bed in [('genome1', bed1), ('genome2', bed2)]:\n chr_counts = {}\n for feature in bed:\n chr_name = feature.seqid\n chr_counts[chr_name] = chr_counts.get(chr_name, 0) + 1\n \n print(f'\\n{name.capitalize()} feature distribution:')\n for chr_name, count in chr_counts.items():\n print(f' {chr_name}: {count} features')\n \n # Compare sequences\n print('\\n=== Sequence Comparison ===')\n common_sequences = set(fasta1.keys()) & set(fasta2.keys())\n print(f'Common sequences: {len(common_sequences)}')\n \n for seq_name in common_sequences:\n seq1 = fasta1[seq_name]\n seq2 = fasta2[seq_name]\n \n # Simple similarity calculation\n matches = sum(1 for a, b in zip(seq1, seq2) if a == b)\n similarity = matches / min(len(seq1), len(seq2)) * 100\n \n print(f' {seq_name}: {similarity:.1f}% similarity')\n \n # Feature comparison\n print('\\n=== Feature Comparison ===')\n total_features1 = len(bed1)\n total_features2 = len(bed2)\n \n print(f'Genome1 features: {total_features1}')\n print(f'Genome2 features: {total_features2}')\n \n # Check for overlapping regions (simplified)\n overlaps = 0\n for f1 in bed1:\n for f2 in bed2:\n if (f1.seqid == f2.seqid and \n not (f1.end < f2.start or f2.end < f1.start)):\n overlaps += 1\n break\n \n print(f'Features with overlapping regions: {overlaps}')\n\nprint('\\nJCVI provides:')\nprint('- Genome assembly tools')\nprint('- Comparative genomics analysis')\nprint('- Synteny detection and visualization')\nprint('- Phylogenetic analysis')\nprint('- Format conversion utilities')\nprint('- Integration with common genomics formats')\n\nprint('\\nCommon JCVI command-line tools:')\nprint('- python -m jcvi.graphics.synteny: synteny plots')\nprint('- python -m jcvi.compara.catalog: ortholog identification')\nprint('- python -m jcvi.assembly.allmaps: genetic map integration')\nprint('- python -m jcvi.formats.fasta: FASTA utilities')",
512
+ "quick_start": [
513
+ "Install: pip install jcvi",
514
+ "Load FASTA: from jcvi.formats.fasta import Fasta",
515
+ "Load BED: from jcvi.formats.bed import Bed",
516
+ "Synteny analysis: jcvi.compara.synteny",
517
+ "Graphics: jcvi.graphics modules",
518
+ "Use command-line tools for complex analyses"
519
+ ]
520
+ }
521
+ },
522
+ {
523
+ "type": "PackageTool",
524
+ "name": "get_pydeseq2_info",
525
+ "description": "Get comprehensive information about PyDESeq2 – RNA-seq differential expression analysis",
526
+ "parameter": {
527
+ "type": "object",
528
+ "properties": {
529
+ "info_type": {
530
+ "type": "string",
531
+ "enum": [
532
+ "overview",
533
+ "installation",
534
+ "usage",
535
+ "documentation"
536
+ ],
537
+ "description": "Type of information to retrieve about PyDESeq2"
538
+ }
539
+ },
540
+ "required": [
541
+ "info_type"
542
+ ]
543
+ },
544
+ "package_name": "pydeseq2",
545
+ "local_info": {
546
+ "name": "PyDESeq2",
547
+ "description": "Python implementation of the DESeq2 pipeline for bulk RNA-seq differential expression analysis. Provides statistical methods for identifying differentially expressed genes between conditions.",
548
+ "category": "RNA-seq / Differential Expression",
549
+ "import_name": "pydeseq2",
550
+ "popularity": 70,
551
+ "keywords": [
552
+ "RNA-seq",
553
+ "differential expression",
554
+ "DESeq2",
555
+ "transcriptomics",
556
+ "statistics"
557
+ ],
558
+ "documentation": "https://pydeseq2.readthedocs.io/",
559
+ "repository": "https://github.com/owkin/PyDESeq2",
560
+ "installation": {
561
+ "pip": "pip install pydeseq2",
562
+ "conda": "conda install -c conda-forge pydeseq2"
563
+ },
564
+ "usage_example": "import pandas as pd\nimport numpy as np\nfrom pydeseq2 import DeseqDataSet\nfrom pydeseq2.dds import DeseqStats\nfrom pydeseq2.default_inference import DefaultInference\nimport matplotlib.pyplot as plt\nimport seaborn as sns\n\nprint('PyDESeq2 - RNA-seq Differential Expression Analysis')\nprint('=' * 55)\n\n# Create synthetic RNA-seq count data\nprint('Creating synthetic RNA-seq count data...')\n\n# Set random seed for reproducibility\nnp.random.seed(42)\n\n# Parameters\nn_genes = 1000\nn_samples_per_condition = 6\nconditions = ['control', 'treatment']\ntotal_samples = n_samples_per_condition * len(conditions)\n\n# Generate gene names\ngene_names = [f'Gene_{i:04d}' for i in range(1, n_genes + 1)]\n\n# Generate sample names and metadata\nsample_names = []\ncondition_labels = []\nfor condition in conditions:\n for i in range(n_samples_per_condition):\n sample_names.append(f'{condition}_rep{i+1}')\n condition_labels.append(condition)\n\n# Create metadata DataFrame\nmetadata = pd.DataFrame({\n 'sample_id': sample_names,\n 'condition': condition_labels\n})\nmetadata.set_index('sample_id', inplace=True)\n\nprint(f'Created metadata for {len(sample_names)} samples:')\nprint(metadata.groupby('condition').size())\n\n# Generate count matrix\nprint('\\nGenerating count matrix...')\n\n# Base expression levels (log scale)\nbase_expression = np.random.negative_binomial(n=5, p=0.3, size=n_genes)\n\n# Create count matrix\ncounts = np.zeros((n_genes, total_samples))\n\n# Generate counts for each sample\nfor i, condition in enumerate(condition_labels):\n # Add some noise and condition-specific effects\n if condition == 'control':\n # Control condition - use base expression\n sample_counts = np.random.negative_binomial(\n n=base_expression, \n p=0.1, # Dispersion parameter\n size=n_genes\n )\n else:\n # Treatment condition - add differential expression\n # Select ~10% of genes to be differentially expressed\n de_genes = np.random.choice(n_genes, size=int(0.1 * n_genes), replace=False)\n \n modified_expression = base_expression.copy()\n \n # Half upregulated, half downregulated\n up_genes = de_genes[:len(de_genes)//2]\n down_genes = de_genes[len(de_genes)//2:]\n \n # Upregulate (2-4 fold)\n modified_expression[up_genes] *= np.random.uniform(2, 4, len(up_genes))\n \n # Downregulate (0.25-0.5 fold)\n modified_expression[down_genes] *= np.random.uniform(0.25, 0.5, len(down_genes))\n \n sample_counts = np.random.negative_binomial(\n n=modified_expression.astype(int), \n p=0.1,\n size=n_genes\n )\n \n counts[:, i] = sample_counts\n\n# Create count DataFrame\ncount_df = pd.DataFrame(counts, index=gene_names, columns=sample_names)\ncount_df = count_df.astype(int)\n\nprint(f'Count matrix shape: {count_df.shape}')\nprint(f'Total reads per sample:')\nfor sample in count_df.columns:\n total_reads = count_df[sample].sum()\n print(f' {sample}: {total_reads:,} reads')\n\nprint(f'\\nCount statistics:')\nprint(f' Mean counts per gene: {count_df.mean(axis=1).mean():.1f}')\nprint(f' Median counts per gene: {count_df.median(axis=1).median():.1f}')\nprint(f' Genes with zero counts: {(count_df.sum(axis=1) == 0).sum()}')\n\n# Filter low-count genes\nprint('\\nFiltering low-count genes...')\nmin_count = 10\nmin_samples = 3\n\n# Keep genes with at least min_count reads in at least min_samples samples\nkeep_genes = (count_df >= min_count).sum(axis=1) >= min_samples\nfiltered_counts = count_df[keep_genes]\n\nprint(f'Genes before filtering: {len(count_df)}')\nprint(f'Genes after filtering: {len(filtered_counts)}')\nprint(f'Genes removed: {len(count_df) - len(filtered_counts)}')\n\n# Create DESeq2 dataset\nprint('\\n=== DESeq2 Analysis ===')\nprint('Creating DESeq2 dataset...')\n\n# Prepare data for PyDESeq2\ninference = DefaultInference(n_cpus=1)\n\n# Create DESeq2 dataset\ndds = DeseqDataSet(\n counts=filtered_counts,\n metadata=metadata,\n design_factors=['condition'],\n refit_cooks=True,\n inference=inference\n)\n\nprint(f'DESeq2 dataset created with {dds.n_obs} genes and {dds.n_vars} samples')\n\n# Run DESeq2 analysis\nprint('\\nRunning DESeq2 analysis...')\nprint('1. Estimating size factors...')\ndds.fit_size_factors()\n\nprint('2. Estimating dispersions...')\ndds.fit_genewise_dispersions()\ndds.fit_dispersion_trend()\ndds.fit_dispersion_prior()\ndds.fit_MAP_dispersions()\n\nprint('3. Fitting generalized linear model...')\ndds.fit_LFC()\n\nprint('4. Running statistical tests...')\nstat_res = DeseqStats(dds, inference=inference)\nstat_res.summary()\n\n# Get results\nprint('\\n=== Results Analysis ===')\nresults_df = stat_res.results_df\n\nprint(f'Results shape: {results_df.shape}')\nprint(f'Columns: {list(results_df.columns)}')\n\n# Filter for significant genes\nalpha = 0.05\nlog2fc_threshold = 1.0\n\nsignificant = (\n (results_df['padj'] < alpha) & \n (np.abs(results_df['log2FoldChange']) > log2fc_threshold)\n)\n\nupregulated = (\n (results_df['padj'] < alpha) & \n (results_df['log2FoldChange'] > log2fc_threshold)\n)\n\ndownregulated = (\n (results_df['padj'] < alpha) & \n (results_df['log2FoldChange'] < -log2fc_threshold)\n)\n\nprint(f'\\nDifferential expression results:')\nprint(f' Total genes tested: {len(results_df)}')\nprint(f' Significant genes (padj < {alpha}, |log2FC| > {log2fc_threshold}): {significant.sum()}')\nprint(f' Upregulated genes: {upregulated.sum()}')\nprint(f' Downregulated genes: {downregulated.sum()}')\n\n# Show top differentially expressed genes\nprint('\\nTop 10 upregulated genes:')\ntop_up = results_df[upregulated].nlargest(10, 'log2FoldChange')\nfor gene, row in top_up.iterrows():\n print(f' {gene}: log2FC={row[\"log2FoldChange\"]:.2f}, padj={row[\"padj\"]:.2e}')\n\nprint('\\nTop 10 downregulated genes:')\ntop_down = results_df[downregulated].nsmallest(10, 'log2FoldChange')\nfor gene, row in top_down.iterrows():\n print(f' {gene}: log2FC={row[\"log2FoldChange\"]:.2f}, padj={row[\"padj\"]:.2e}')\n\n# Quality control plots\nprint('\\n=== Quality Control Plots ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(12, 10))\n\n# 1. MA plot\naxes[0, 0].scatter(results_df['baseMean'], results_df['log2FoldChange'], \n alpha=0.5, s=1, color='gray')\naxes[0, 0].scatter(results_df.loc[significant, 'baseMean'], \n results_df.loc[significant, 'log2FoldChange'], \n alpha=0.7, s=2, color='red')\naxes[0, 0].axhline(y=0, color='blue', linestyle='--', alpha=0.7)\naxes[0, 0].axhline(y=log2fc_threshold, color='green', linestyle='--', alpha=0.7)\naxes[0, 0].axhline(y=-log2fc_threshold, color='green', linestyle='--', alpha=0.7)\naxes[0, 0].set_xlabel('Mean Expression')\naxes[0, 0].set_ylabel('Log2 Fold Change')\naxes[0, 0].set_title('MA Plot')\naxes[0, 0].set_xscale('log')\n\n# 2. Volcano plot\np_values = -np.log10(results_df['padj'].fillna(1))\naxes[0, 1].scatter(results_df['log2FoldChange'], p_values, \n alpha=0.5, s=1, color='gray')\naxes[0, 1].scatter(results_df.loc[significant, 'log2FoldChange'], \n p_values[significant], \n alpha=0.7, s=2, color='red')\naxes[0, 1].axhline(y=-np.log10(alpha), color='green', linestyle='--', alpha=0.7)\naxes[0, 1].axvline(x=log2fc_threshold, color='green', linestyle='--', alpha=0.7)\naxes[0, 1].axvline(x=-log2fc_threshold, color='green', linestyle='--', alpha=0.7)\naxes[0, 1].set_xlabel('Log2 Fold Change')\naxes[0, 1].set_ylabel('-Log10 Adjusted P-value')\naxes[0, 1].set_title('Volcano Plot')\n\n# 3. P-value histogram\naxes[1, 0].hist(results_df['pvalue'].dropna(), bins=50, alpha=0.7, color='skyblue')\naxes[1, 0].set_xlabel('P-value')\naxes[1, 0].set_ylabel('Frequency')\naxes[1, 0].set_title('P-value Distribution')\n\n# 4. Dispersion plot\naxes[1, 1].scatter(dds.layers['normed_counts'].mean(axis=1), \n dds.varm['dispersions'], \n alpha=0.5, s=1, color='gray')\naxes[1, 1].set_xlabel('Mean Normalized Counts')\naxes[1, 1].set_ylabel('Dispersion')\naxes[1, 1].set_title('Dispersion Estimates')\naxes[1, 1].set_xscale('log')\naxes[1, 1].set_yscale('log')\n\nplt.tight_layout()\n\n# Save plots\nimport tempfile\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n plot_file = tmp.name\n\nplt.close()\nprint(f'QC plots saved to: {plot_file}')\n\n# Summary statistics\nprint('\\n' + '=' * 55)\nprint('DIFFERENTIAL EXPRESSION ANALYSIS SUMMARY')\nprint('=' * 55)\nprint(f'Total genes analyzed: {len(results_df):,}')\nprint(f'Significant DE genes: {significant.sum():,} ({significant.sum()/len(results_df)*100:.1f}%)')\nprint(f'Upregulated genes: {upregulated.sum():,}')\nprint(f'Downregulated genes: {downregulated.sum():,}')\nprint(f'Significance threshold: padj < {alpha}')\nprint(f'Fold change threshold: |log2FC| > {log2fc_threshold}')\n\n# Effect size distribution\nif significant.sum() > 0:\n sig_lfc = results_df.loc[significant, 'log2FoldChange']\n print(f'\\nEffect size statistics (significant genes):')\n print(f' Mean |log2FC|: {np.abs(sig_lfc).mean():.2f}')\n print(f' Max upregulation: {sig_lfc.max():.2f} log2FC')\n print(f' Max downregulation: {sig_lfc.min():.2f} log2FC')\n\n# Cleanup\nimport os\nos.unlink(plot_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nPyDESeq2 provides:')\nprint('• Python implementation of DESeq2')\nprint('• Differential expression analysis')\nprint('• Size factor normalization')\nprint('• Dispersion estimation')\nprint('• Statistical testing with multiple correction')\nprint('• Integration with pandas and numpy')\nprint('• Visualization and quality control')",
565
+ "quick_start": [
566
+ "Install: pip install pydeseq2",
567
+ "Create dataset: dds = DeseqDataSet(counts, metadata, design_factors)",
568
+ "Fit model: dds.fit_size_factors(); dds.fit_genewise_dispersions()",
569
+ "Run stats: stat_res = DeseqStats(dds)",
570
+ "Get results: results_df = stat_res.results_df",
571
+ "Filter significant: results_df[results_df['padj'] < 0.05]"
572
+ ]
573
+ }
574
+ },
575
+ {
576
+ "type": "PackageTool",
577
+ "name": "get_cellxgene_census_info",
578
+ "description": "Get comprehensive information about cellxgene-census – access to the CELLxGENE Census single-cell data",
579
+ "parameter": {
580
+ "type": "object",
581
+ "properties": {
582
+ "info_type": {
583
+ "type": "string",
584
+ "enum": [
585
+ "overview",
586
+ "installation",
587
+ "usage",
588
+ "documentation"
589
+ ],
590
+ "description": "Type of information to retrieve about cellxgene-census"
591
+ }
592
+ },
593
+ "required": [
594
+ "info_type"
595
+ ]
596
+ },
597
+ "package_name": "cellxgene-census",
598
+ "local_info": {
599
+ "name": "cellxgene-census",
600
+ "description": "Python API for querying and accessing the CELLxGENE Census, a unified collection of publicly available single-cell RNA sequencing datasets. Provides standardized access to millions of cells across thousands of datasets.",
601
+ "category": "Single-Cell Data Access",
602
+ "import_name": "cellxgene_census",
603
+ "popularity": 78,
604
+ "keywords": [
605
+ "single-cell",
606
+ "RNA-seq",
607
+ "census",
608
+ "public datasets",
609
+ "cell atlas"
610
+ ],
611
+ "documentation": "https://chanzuckerberg.github.io/cellxgene-census/",
612
+ "repository": "https://github.com/chanzuckerberg/cellxgene-census",
613
+ "installation": {
614
+ "pip": "pip install cellxgene-census",
615
+ "conda": "conda install -c conda-forge cellxgene-census"
616
+ },
617
+ "usage_example": "import cellxgene_census\nimport pandas as pd\n\n# Open the census\nwith cellxgene_census.open_soma() as census:\n # Query human data\n human = census['census_data']['homo_sapiens']\n \n # Get cell metadata\n cell_metadata = human.obs.read(\n value_filter=\"tissue == 'lung'\",\n column_names=['dataset_id', 'cell_type', 'disease']\n ).concat().to_pandas()\n \n print(f'Found {len(cell_metadata)} lung cells')\n print(cell_metadata['cell_type'].value_counts().head())\n \n # Query expression data for specific genes\n var_df = human.var.read(\n value_filter=\"feature_name in ['GAPDH', 'ACTB']\"\n ).concat().to_pandas()\n \n print(f'Gene IDs: {var_df[\"soma_joinid\"].tolist()}')",
618
+ "quick_start": [
619
+ "Install: pip install cellxgene-census",
620
+ "Open census: cellxgene_census.open_soma()",
621
+ "Query cell metadata by tissue, cell type, or disease",
622
+ "Access expression data for specific genes",
623
+ "Filter and download subsets of data",
624
+ "Integrate with scanpy and other analysis tools"
625
+ ]
626
+ }
627
+ },
628
+ {
629
+ "type": "PackageTool",
630
+ "name": "get_viennarna_info",
631
+ "description": "Get comprehensive information about ViennaRNA – RNA structure prediction and analysis",
632
+ "parameter": {
633
+ "type": "object",
634
+ "properties": {
635
+ "include_examples": {
636
+ "type": "boolean",
637
+ "description": "Whether to include usage examples and quick start guide",
638
+ "default": true
639
+ }
640
+ }
641
+ },
642
+ "package_name": "viennarna",
643
+ "local_info": {
644
+ "name": "ViennaRNA",
645
+ "description": "C library with Python bindings for RNA secondary structure prediction and comparison. Includes algorithms for minimum free energy folding, partition function calculations, and more.",
646
+ "category": "RNA Structure",
647
+ "import_name": "RNA",
648
+ "popularity": 75,
649
+ "keywords": [
650
+ "RNA structure",
651
+ "secondary structure",
652
+ "folding",
653
+ "thermodynamics",
654
+ "bioinformatics"
655
+ ],
656
+ "documentation": "https://www.tbi.univie.ac.at/RNA/",
657
+ "repository": "https://github.com/ViennaRNA/ViennaRNA",
658
+ "installation": {
659
+ "pip": "pip install viennarna",
660
+ "conda": "conda install -c bioconda viennarna"
661
+ },
662
+ "usage_example": "import RNA\n\n# RNA sequence\nsequence = 'GGGAAAUCC'\n\n# Predict minimum free energy structure\nstructure, mfe = RNA.fold(sequence)\nprint(f'Sequence: {sequence}')\nprint(f'Structure: {structure}')\nprint(f'MFE: {mfe:.2f} kcal/mol')\n\n# Calculate partition function\npf_structure, fe = RNA.pf_fold(sequence)\nprint(f'Partition function structure: {pf_structure}')\nprint(f'Free energy: {fe:.2f} kcal/mol')\n\n# Base pair probabilities\nprobs = RNA.pf_fold(sequence)[1]\nprint(f'Base pair probabilities computed')",
663
+ "quick_start": [
664
+ "1. Install ViennaRNA: conda install -c bioconda viennarna",
665
+ "2. Import: import RNA",
666
+ "3. Fold: structure, mfe = RNA.fold(sequence)",
667
+ "4. Partition function: RNA.pf_fold(sequence)",
668
+ "5. Analyze: base pair probabilities, energy"
669
+ ]
670
+ }
671
+ },
672
+ {
673
+ "type": "PackageTool",
674
+ "name": "get_reportlab_info",
675
+ "description": "Get comprehensive information about ReportLab – PDF generation library",
676
+ "parameter": {
677
+ "type": "object",
678
+ "properties": {
679
+ "include_examples": {
680
+ "type": "boolean",
681
+ "description": "Whether to include usage examples and quick start guide",
682
+ "default": true
683
+ }
684
+ }
685
+ },
686
+ "package_name": "reportlab",
687
+ "local_info": {
688
+ "name": "ReportLab",
689
+ "description": "Open-source Python library for generating PDF documents. Supports text, graphics, charts, and complex layouts for creating reports and documents programmatically.",
690
+ "category": "Document Generation",
691
+ "import_name": "reportlab",
692
+ "popularity": 80,
693
+ "keywords": [
694
+ "PDF generation",
695
+ "reports",
696
+ "documents",
697
+ "charts",
698
+ "graphics"
699
+ ],
700
+ "documentation": "https://docs.reportlab.com/",
701
+ "repository": "https://github.com/MrBitBucket/reportlab-mirror",
702
+ "installation": {
703
+ "pip": "pip install reportlab",
704
+ "conda": "conda install -c conda-forge reportlab"
705
+ },
706
+ "usage_example": "from reportlab.pdfgen import canvas\nfrom reportlab.lib.pagesizes import letter, A4\nfrom reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer\nfrom reportlab.lib.styles import getSampleStyleSheet\n\n# Simple PDF with canvas\nc = canvas.Canvas('hello.pdf', pagesize=letter)\nc.drawString(100, 750, 'Hello World!')\nc.showPage()\nc.save()\n\n# More complex document\ndoc = SimpleDocTemplate('report.pdf', pagesize=A4)\nstyles = getSampleStyleSheet()\nstory = []\n\n# Add title\ntitle = Paragraph('My Report', styles['Title'])\nstory.append(title)\nstory.append(Spacer(1, 12))\n\n# Add content\ncontent = Paragraph('This is the content of my report.', styles['Normal'])\nstory.append(content)\n\ndoc.build(story)",
707
+ "quick_start": [
708
+ "1. Install ReportLab: pip install reportlab",
709
+ "2. Import: from reportlab.pdfgen import canvas",
710
+ "3. Create: c = canvas.Canvas('output.pdf')",
711
+ "4. Add content: c.drawString(x, y, 'text')",
712
+ "5. Save: c.save()"
713
+ ]
714
+ }
715
+ },
716
+ {
717
+ "type": "PackageTool",
718
+ "name": "get_pyvcf_info",
719
+ "description": "Get information about the pyvcf package. Python library for parsing and manipulating VCF files",
720
+ "package_name": "pyvcf",
721
+ "parameter": {
722
+ "type": "object",
723
+ "properties": {},
724
+ "required": []
725
+ },
726
+ "required": []
727
+ },
728
+ {
729
+ "type": "PackageTool",
730
+ "name": "get_kipoiseq_info",
731
+ "description": "Get information about the kipoiseq package. Kipoi sequence utilities for genomics deep learning",
732
+ "package_name": "kipoiseq",
733
+ "parameter": {
734
+ "type": "object",
735
+ "properties": {},
736
+ "required": []
737
+ },
738
+ "required": []
739
+ },
740
+ {
741
+ "type": "PackageTool",
742
+ "name": "get_pyfasta_info",
743
+ "description": "Get information about the pyfasta package. Python library for efficient random access to fasta subsequences",
744
+ "package_name": "pyfasta",
745
+ "parameter": {
746
+ "type": "object",
747
+ "properties": {},
748
+ "required": []
749
+ },
750
+ "required": []
751
+ },
752
+ {
753
+ "type": "PackageTool",
754
+ "name": "get_pyensembl_info",
755
+ "description": "Get information about the pyensembl package. Python interface to Ensembl reference genome metadata",
756
+ "package_name": "pyensembl",
757
+ "parameter": {
758
+ "type": "object",
759
+ "properties": {},
760
+ "required": []
761
+ },
762
+ "required": []
763
+ },
764
+ {
765
+ "type": "PackageTool",
766
+ "name": "get_poretools_info",
767
+ "description": "Get information about the poretools package. Python package: poretools",
768
+ "package_name": "poretools",
769
+ "parameter": {
770
+ "type": "object",
771
+ "properties": {},
772
+ "required": []
773
+ },
774
+ "required": []
775
+ }
776
+ ]