tooluniverse 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tooluniverse might be problematic. Click here for more details.

Files changed (96) hide show
  1. tooluniverse/__init__.py +37 -14
  2. tooluniverse/admetai_tool.py +16 -5
  3. tooluniverse/base_tool.py +36 -0
  4. tooluniverse/biogrid_tool.py +118 -0
  5. tooluniverse/build_optimizer.py +87 -0
  6. tooluniverse/cache/__init__.py +3 -0
  7. tooluniverse/cache/memory_cache.py +99 -0
  8. tooluniverse/cache/result_cache_manager.py +235 -0
  9. tooluniverse/cache/sqlite_backend.py +257 -0
  10. tooluniverse/clinvar_tool.py +90 -0
  11. tooluniverse/compose_scripts/output_summarizer.py +87 -33
  12. tooluniverse/compose_tool.py +2 -2
  13. tooluniverse/custom_tool.py +28 -0
  14. tooluniverse/data/adverse_event_tools.json +97 -98
  15. tooluniverse/data/agentic_tools.json +81 -162
  16. tooluniverse/data/arxiv_tools.json +1 -4
  17. tooluniverse/data/compose_tools.json +0 -54
  18. tooluniverse/data/core_tools.json +1 -4
  19. tooluniverse/data/dataset_tools.json +7 -7
  20. tooluniverse/data/doaj_tools.json +1 -3
  21. tooluniverse/data/drug_discovery_agents.json +282 -0
  22. tooluniverse/data/europe_pmc_tools.json +1 -2
  23. tooluniverse/data/genomics_tools.json +174 -0
  24. tooluniverse/data/geo_tools.json +86 -0
  25. tooluniverse/data/literature_search_tools.json +15 -35
  26. tooluniverse/data/markitdown_tools.json +51 -0
  27. tooluniverse/data/monarch_tools.json +1 -2
  28. tooluniverse/data/openalex_tools.json +1 -5
  29. tooluniverse/data/opentarget_tools.json +8 -16
  30. tooluniverse/data/output_summarization_tools.json +23 -20
  31. tooluniverse/data/packages/bioinformatics_core_tools.json +2 -2
  32. tooluniverse/data/packages/cheminformatics_tools.json +1 -1
  33. tooluniverse/data/packages/genomics_tools.json +1 -1
  34. tooluniverse/data/packages/single_cell_tools.json +1 -1
  35. tooluniverse/data/packages/structural_biology_tools.json +1 -1
  36. tooluniverse/data/pmc_tools.json +1 -4
  37. tooluniverse/data/ppi_tools.json +139 -0
  38. tooluniverse/data/pubmed_tools.json +1 -3
  39. tooluniverse/data/semantic_scholar_tools.json +1 -2
  40. tooluniverse/data/tool_composition_tools.json +2 -4
  41. tooluniverse/data/unified_guideline_tools.json +206 -4
  42. tooluniverse/data/xml_tools.json +15 -15
  43. tooluniverse/data/zenodo_tools.json +1 -2
  44. tooluniverse/dbsnp_tool.py +71 -0
  45. tooluniverse/default_config.py +6 -0
  46. tooluniverse/ensembl_tool.py +61 -0
  47. tooluniverse/execute_function.py +235 -76
  48. tooluniverse/generate_tools.py +303 -20
  49. tooluniverse/genomics_gene_search_tool.py +56 -0
  50. tooluniverse/geo_tool.py +116 -0
  51. tooluniverse/gnomad_tool.py +63 -0
  52. tooluniverse/logging_config.py +64 -2
  53. tooluniverse/markitdown_tool.py +159 -0
  54. tooluniverse/mcp_client_tool.py +10 -5
  55. tooluniverse/molecule_2d_tool.py +9 -3
  56. tooluniverse/molecule_3d_tool.py +9 -3
  57. tooluniverse/output_hook.py +217 -150
  58. tooluniverse/smcp.py +18 -10
  59. tooluniverse/smcp_server.py +89 -199
  60. tooluniverse/string_tool.py +112 -0
  61. tooluniverse/tools/{MultiAgentLiteratureSearch.py → ADMETAnalyzerAgent.py} +18 -18
  62. tooluniverse/tools/ArXiv_search_papers.py +3 -3
  63. tooluniverse/tools/CMA_Guidelines_Search.py +52 -0
  64. tooluniverse/tools/CORE_search_papers.py +3 -3
  65. tooluniverse/tools/ClinVar_search_variants.py +52 -0
  66. tooluniverse/tools/ClinicalTrialDesignAgent.py +63 -0
  67. tooluniverse/tools/CompoundDiscoveryAgent.py +59 -0
  68. tooluniverse/tools/DOAJ_search_articles.py +2 -2
  69. tooluniverse/tools/DiseaseAnalyzerAgent.py +52 -0
  70. tooluniverse/tools/DrugInteractionAnalyzerAgent.py +52 -0
  71. tooluniverse/tools/DrugOptimizationAgent.py +63 -0
  72. tooluniverse/tools/Ensembl_lookup_gene_by_symbol.py +52 -0
  73. tooluniverse/tools/EuropePMC_search_articles.py +1 -1
  74. tooluniverse/tools/GIN_Guidelines_Search.py +52 -0
  75. tooluniverse/tools/GWAS_search_associations_by_gene.py +52 -0
  76. tooluniverse/tools/LiteratureSynthesisAgent.py +59 -0
  77. tooluniverse/tools/PMC_search_papers.py +3 -3
  78. tooluniverse/tools/PubMed_search_articles.py +2 -2
  79. tooluniverse/tools/SemanticScholar_search_papers.py +1 -1
  80. tooluniverse/tools/UCSC_get_genes_by_region.py +67 -0
  81. tooluniverse/tools/Zenodo_search_records.py +1 -1
  82. tooluniverse/tools/__init__.py +33 -3
  83. tooluniverse/tools/convert_to_markdown.py +59 -0
  84. tooluniverse/tools/dbSNP_get_variant_by_rsid.py +46 -0
  85. tooluniverse/tools/gnomAD_query_variant.py +52 -0
  86. tooluniverse/tools/openalex_literature_search.py +4 -4
  87. tooluniverse/ucsc_tool.py +60 -0
  88. tooluniverse/unified_guideline_tools.py +1175 -57
  89. tooluniverse/utils.py +51 -4
  90. tooluniverse/zenodo_tool.py +2 -1
  91. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/METADATA +10 -3
  92. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/RECORD +96 -61
  93. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/entry_points.txt +0 -3
  94. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/WHEEL +0 -0
  95. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/licenses/LICENSE +0 -0
  96. {tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,86 @@
1
+ [
2
+ {
3
+ "type": "GEORESTTool",
4
+ "name": "GEO_search_expression_data",
5
+ "description": "Search gene expression data from the GEO database. GEO is a public repository that archives and freely distributes microarray, next-generation sequencing, and other forms of high-throughput functional genomics data.",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "query": {
10
+ "type": "string",
11
+ "description": "Search query (e.g., 'cancer', 'diabetes', 'microarray')",
12
+ "minLength": 1
13
+ },
14
+ "organism": {
15
+ "type": "string",
16
+ "description": "Organism name (e.g., 'Homo sapiens', 'Mus musculus')",
17
+ "default": "Homo sapiens"
18
+ },
19
+ "study_type": {
20
+ "type": "string",
21
+ "description": "Type of study (e.g., 'expression', 'methylation', 'genome')",
22
+ "enum": ["expression", "methylation", "genome", "sequence", "other"]
23
+ },
24
+ "platform": {
25
+ "type": "string",
26
+ "description": "Platform used (e.g., 'GPL96', 'GPL570')"
27
+ },
28
+ "date_range": {
29
+ "type": "string",
30
+ "description": "Date range in format 'YYYY:YYYY' (e.g., '2020:2023')"
31
+ },
32
+ "limit": {
33
+ "type": "integer",
34
+ "description": "Maximum number of results to return (default: 50)",
35
+ "minimum": 1,
36
+ "maximum": 500,
37
+ "default": 50
38
+ },
39
+ "sort": {
40
+ "type": "string",
41
+ "description": "Sort order ('relevance', 'date', 'title')",
42
+ "enum": ["relevance", "date", "title"],
43
+ "default": "relevance"
44
+ }
45
+ },
46
+ "required": ["query"]
47
+ },
48
+ "fields": {
49
+ "endpoint": "/esearch.fcgi",
50
+ "return_format": "JSON"
51
+ },
52
+ "return_schema": {
53
+ "type": "object",
54
+ "properties": {
55
+ "success": {"type": "boolean"},
56
+ "count": {"type": "integer"},
57
+ "studies": {
58
+ "type": "array",
59
+ "items": {
60
+ "type": "object",
61
+ "properties": {
62
+ "id": {"type": "string"},
63
+ "title": {"type": "string"},
64
+ "summary": {"type": "string"},
65
+ "organism": {"type": "string"},
66
+ "platform": {"type": "string"},
67
+ "samples": {"type": "integer"},
68
+ "series_type": {"type": "string"},
69
+ "publication_date": {"type": "string"},
70
+ "submission_date": {"type": "string"},
71
+ "contact": {"type": "string"},
72
+ "citation": {"type": "string"}
73
+ }
74
+ }
75
+ },
76
+ "query_translation": {"type": "string"},
77
+ "error": {"type": "string"}
78
+ }
79
+ },
80
+ "implementation": {
81
+ "language": "python",
82
+ "dependencies": ["requests"],
83
+ "source_file": "geo_tool.py"
84
+ }
85
+ }
86
+ ]
@@ -13,13 +13,11 @@
13
13
  "properties": {
14
14
  "user_query": {
15
15
  "type": "string",
16
- "description": "The research query to analyze",
17
- "required": true
16
+ "description": "The research query to analyze"
18
17
  },
19
18
  "context": {
20
19
  "type": "string",
21
20
  "description": "Context information from previous steps",
22
- "required": false,
23
21
  "default": ""
24
22
  }
25
23
  },
@@ -51,23 +49,19 @@
51
49
  "properties": {
52
50
  "plan_title": {
53
51
  "type": "string",
54
- "description": "The title of the search plan",
55
- "required": true
52
+ "description": "The title of the search plan"
56
53
  },
57
54
  "plan_description": {
58
55
  "type": "string",
59
- "description": "The description of the search plan",
60
- "required": true
56
+ "description": "The description of the search plan"
61
57
  },
62
58
  "current_keywords": {
63
59
  "type": "string",
64
- "description": "Current keywords for the plan (comma-separated)",
65
- "required": true
60
+ "description": "Current keywords for the plan (comma-separated)"
66
61
  },
67
62
  "context": {
68
63
  "type": "string",
69
64
  "description": "Context information from previous steps",
70
- "required": false,
71
65
  "default": ""
72
66
  }
73
67
  },
@@ -102,28 +96,23 @@
102
96
  "properties": {
103
97
  "plan_title": {
104
98
  "type": "string",
105
- "description": "The title of the search plan",
106
- "required": true
99
+ "description": "The title of the search plan"
107
100
  },
108
101
  "plan_description": {
109
102
  "type": "string",
110
- "description": "The description of the search plan",
111
- "required": true
103
+ "description": "The description of the search plan"
112
104
  },
113
105
  "paper_count": {
114
106
  "type": "string",
115
- "description": "Number of papers found",
116
- "required": true
107
+ "description": "Number of papers found"
117
108
  },
118
109
  "papers_text": {
119
110
  "type": "string",
120
- "description": "Formatted text of the papers to summarize",
121
- "required": true
111
+ "description": "Formatted text of the papers to summarize"
122
112
  },
123
113
  "context": {
124
114
  "type": "string",
125
115
  "description": "Context information from previous steps",
126
- "required": false,
127
116
  "default": ""
128
117
  }
129
118
  },
@@ -156,13 +145,11 @@
156
145
  "properties": {
157
146
  "plans_analysis": {
158
147
  "type": "string",
159
- "description": "Analysis of current search plans and their quality scores",
160
- "required": true
148
+ "description": "Analysis of current search plans and their quality scores"
161
149
  },
162
150
  "context": {
163
151
  "type": "string",
164
152
  "description": "Context information from previous steps",
165
- "required": false,
166
153
  "default": ""
167
154
  }
168
155
  },
@@ -197,38 +184,31 @@
197
184
  "properties": {
198
185
  "user_query": {
199
186
  "type": "string",
200
- "description": "The original research query",
201
- "required": true
187
+ "description": "The original research query"
202
188
  },
203
189
  "user_intent": {
204
190
  "type": "string",
205
- "description": "The analyzed user intent",
206
- "required": true
191
+ "description": "The analyzed user intent"
207
192
  },
208
193
  "total_papers": {
209
194
  "type": "string",
210
- "description": "Total number of papers found",
211
- "required": true
195
+ "description": "Total number of papers found"
212
196
  },
213
197
  "total_plans": {
214
198
  "type": "string",
215
- "description": "Total number of search plans executed",
216
- "required": true
199
+ "description": "Total number of search plans executed"
217
200
  },
218
201
  "iterations": {
219
202
  "type": "string",
220
- "description": "Number of iterations performed",
221
- "required": true
203
+ "description": "Number of iterations performed"
222
204
  },
223
205
  "plan_summaries": {
224
206
  "type": "string",
225
- "description": "Summaries of all search plans",
226
- "required": true
207
+ "description": "Summaries of all search plans"
227
208
  },
228
209
  "context": {
229
210
  "type": "string",
230
211
  "description": "Context information from previous steps",
231
- "required": false,
232
212
  "default": ""
233
213
  }
234
214
  },
@@ -0,0 +1,51 @@
1
+ [
2
+ {
3
+ "type": "MarkItDownTool",
4
+ "name": "convert_to_markdown",
5
+ "description": "Convert a resource described by an http:, https:, file: or data: URI to markdown.",
6
+ "parameter": {
7
+ "type": "object",
8
+ "properties": {
9
+ "uri": {
10
+ "type": "string",
11
+ "description": "URI of the resource to convert (supports http:, https:, file:, data: URIs)"
12
+ },
13
+ "output_path": {
14
+ "type": "string",
15
+ "description": "Optional output file path"
16
+ },
17
+ "enable_plugins": {
18
+ "type": "boolean",
19
+ "description": "Enable 3rd-party plugins",
20
+ "default": false
21
+ }
22
+ },
23
+ "required": ["uri"]
24
+ },
25
+ "return_schema": {
26
+ "type": "object",
27
+ "properties": {
28
+ "markdown_content": {
29
+ "type": "string",
30
+ "description": "The converted Markdown content"
31
+ },
32
+ "content": {
33
+ "type": "string",
34
+ "description": "The converted Markdown content (same as markdown_content, provided for convenience when no output_path is specified)"
35
+ },
36
+ "file_info": {
37
+ "type": "object",
38
+ "properties": {
39
+ "original_file": {"type": "string"},
40
+ "file_type": {"type": "string"},
41
+ "output_file": {"type": "string"}
42
+ }
43
+ },
44
+ "error": {
45
+ "type": "string",
46
+ "description": "Error message if conversion failed"
47
+ }
48
+ }
49
+ }
50
+ }
51
+ ]
@@ -10,8 +10,7 @@
10
10
  "description": "List of phenotypes or symptoms",
11
11
  "items": {
12
12
  "type": "string",
13
- "description": "The HPO ID of the phenotype or symptom.",
14
- "required": true
13
+ "description": "The HPO ID of the phenotype or symptom."
15
14
  }
16
15
  },
17
16
  "limit": {
@@ -31,11 +31,7 @@
31
31
  }
32
32
  },
33
33
  "required": [
34
- "search_keywords",
35
- "max_results",
36
- "year_from",
37
- "year_to",
38
- "open_access"
34
+ "search_keywords"
39
35
  ]
40
36
  },
41
37
  "return_schema": {
@@ -728,13 +728,11 @@
728
728
  "properties": {
729
729
  "index": {
730
730
  "type": "integer",
731
- "description": "The index of the page to retrieve.",
732
- "required": true
731
+ "description": "The index of the page to retrieve."
733
732
  },
734
733
  "size": {
735
734
  "type": "integer",
736
- "description": "The number of items per page.",
737
- "required": true
735
+ "description": "The number of items per page."
738
736
  }
739
737
  },
740
738
  "description": "Pagination parameters."
@@ -906,13 +904,11 @@
906
904
  "properties": {
907
905
  "index": {
908
906
  "type": "integer",
909
- "description": "Index of the page to fetch, starting from 0.",
910
- "required": true
907
+ "description": "Index of the page to fetch, starting from 0."
911
908
  },
912
909
  "size": {
913
910
  "type": "integer",
914
- "description": "Number of entries per page.",
915
- "required": true
911
+ "description": "Number of entries per page."
916
912
  }
917
913
  },
918
914
  "description": "Pagination settings."
@@ -1152,13 +1148,11 @@
1152
1148
  "properties": {
1153
1149
  "index": {
1154
1150
  "type": "integer",
1155
- "description": "Pagination index.",
1156
- "required": true
1151
+ "description": "Pagination index."
1157
1152
  },
1158
1153
  "size": {
1159
1154
  "type": "integer",
1160
- "description": "Number of records to fetch per page.",
1161
- "required": true
1155
+ "description": "Number of records to fetch per page."
1162
1156
  }
1163
1157
  }
1164
1158
  }
@@ -1277,13 +1271,11 @@
1277
1271
  "properties": {
1278
1272
  "index": {
1279
1273
  "type": "integer",
1280
- "description": "Pagination index.",
1281
- "required": "True"
1274
+ "description": "Pagination index."
1282
1275
  },
1283
1276
  "size": {
1284
1277
  "type": "integer",
1285
- "description": "Pagination size.",
1286
- "required": "True"
1278
+ "description": "Pagination size."
1287
1279
  }
1288
1280
  },
1289
1281
  "description": "Pagination settings with index and size."
@@ -4,39 +4,44 @@
4
4
  "name": "ToolOutputSummarizer",
5
5
  "description": "AI-powered tool for summarizing long tool outputs, focusing on key information relevant to the original query",
6
6
  "prompt": "You are an expert at summarizing tool outputs. Your task is to analyze the provided tool output and create a concise summary that highlights the most important information relevant to the original query.\n\nTool Output to Summarize:\n{tool_output}\n\nOriginal Query Context:\n{query_context}\n\nTool Name: {tool_name}\nFocus Areas: {focus_areas}\nMaximum Summary Length: {max_length}\n\nPlease provide a well-structured summary that:\n1. Captures the key findings and results\n2. Highlights important data points and metrics\n3. Preserves critical technical details\n4. Maintains the essential structure of the original output\n5. Focuses on information most relevant to the query\n\nReturn the summary in a clear, organized format.",
7
- "input_arguments": ["tool_output", "query_context", "tool_name", "focus_areas", "max_length"],
7
+ "input_arguments": [
8
+ "tool_output",
9
+ "query_context",
10
+ "tool_name",
11
+ "focus_areas",
12
+ "max_length"
13
+ ],
8
14
  "parameter": {
9
15
  "type": "object",
10
16
  "properties": {
11
17
  "tool_output": {
12
18
  "type": "string",
13
- "description": "The original tool output to be summarized",
14
- "required": true
19
+ "description": "The original tool output to be summarized"
15
20
  },
16
21
  "query_context": {
17
22
  "type": "string",
18
- "description": "Context about the original query that triggered the tool",
19
- "required": true
23
+ "description": "Context about the original query that triggered the tool"
20
24
  },
21
25
  "tool_name": {
22
26
  "type": "string",
23
- "description": "Name of the tool that generated the output",
24
- "required": true
27
+ "description": "Name of the tool that generated the output"
25
28
  },
26
29
  "focus_areas": {
27
30
  "type": "string",
28
31
  "description": "Specific areas to focus on in the summary",
29
- "required": false,
30
32
  "default": "key_findings_and_results"
31
33
  },
32
34
  "max_length": {
33
35
  "type": "integer",
34
36
  "description": "Maximum length of the summary in characters",
35
- "required": false,
36
37
  "default": 32000
37
38
  }
38
39
  },
39
- "required": ["tool_output", "query_context", "tool_name"]
40
+ "required": [
41
+ "tool_output",
42
+ "query_context",
43
+ "tool_name"
44
+ ]
40
45
  },
41
46
  "configs": {
42
47
  "api_type": "CHATGPT",
@@ -56,39 +61,37 @@
56
61
  "properties": {
57
62
  "tool_output": {
58
63
  "type": "string",
59
- "description": "The original tool output to be summarized",
60
- "required": true
64
+ "description": "The original tool output to be summarized"
61
65
  },
62
66
  "query_context": {
63
67
  "type": "string",
64
- "description": "Context about the original query",
65
- "required": true
68
+ "description": "Context about the original query"
66
69
  },
67
70
  "tool_name": {
68
71
  "type": "string",
69
- "description": "Name of the tool that generated the output",
70
- "required": true
72
+ "description": "Name of the tool that generated the output"
71
73
  },
72
74
  "chunk_size": {
73
75
  "type": "integer",
74
76
  "description": "Size of each chunk for processing",
75
- "required": false,
76
77
  "default": 30000
77
78
  },
78
79
  "focus_areas": {
79
80
  "type": "string",
80
81
  "description": "Areas to focus on in summarization",
81
- "required": false,
82
82
  "default": "key_findings_and_results"
83
83
  },
84
84
  "max_summary_length": {
85
85
  "type": "integer",
86
86
  "description": "Maximum length of final summary",
87
- "required": false,
88
87
  "default": 10000
89
88
  }
90
89
  },
91
- "required": ["tool_output", "query_context", "tool_name"]
90
+ "required": [
91
+ "tool_output",
92
+ "query_context",
93
+ "tool_name"
94
+ ]
92
95
  },
93
96
  "auto_load_dependencies": true,
94
97
  "fail_on_missing_tools": false,
@@ -282,7 +282,7 @@
282
282
  "pip": "pip install numba",
283
283
  "conda": "conda install numba"
284
284
  },
285
- "usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n:,} numbers')\n\n# Warm up JIT functions\nnumba_function(100)\nnumba_nopython(100)\n\n# Time Python function\nstart = time.time()\nresult_python = python_function(n)\ntime_python = time.time() - start\n\n# Time JIT function\nstart = time.time()\nresult_numba = numba_function(n)\ntime_numba = time.time() - start\n\n# Time no-Python JIT\nstart = time.time()\nresult_nopython = numba_nopython(n)\ntime_nopython = time.time() - start\n\nprint(f'Python result: {result_python}')\nprint(f'Numba result: {result_numba}')\nprint(f'No-Python result: {result_nopython}')\nprint(f'\\nPython time: {time_python:.4f} seconds')\nprint(f'Numba time: {time_numba:.4f} seconds')\nprint(f'No-Python time: {time_nopython:.4f} seconds')\nprint(f'Speedup (Numba): {time_python/time_numba:.1f}x')\nprint(f'Speedup (No-Python): {time_python/time_nopython:.1f}x')\n\n# NumPy array operations\nprint('\\n=== NumPy Array Operations ===')\n\n@njit\ndef matrix_multiply_numba(A, B):\n return np.dot(A, B)\n\n@njit\ndef element_wise_operation(arr):\n result = np.zeros_like(arr)\n for i in range(arr.shape[0]):\n for j in range(arr.shape[1]):\n result[i, j] = math.sqrt(arr[i, j]**2 + 1)\n return result\n\n# Create test arrays\nsize = 500\nA = np.random.random((size, size))\nB = np.random.random((size, size))\n\nprint(f'Matrix operations on {size}x{size} arrays')\n\n# Warm up\nmatrix_multiply_numba(A[:10, :10], B[:10, :10])\nelement_wise_operation(A[:10, :10])\n\n# Time NumPy operations\nstart = time.time()\nnumpy_result = np.dot(A, B)\ntime_numpy = time.time() - start\n\n# Time Numba operations\nstart = time.time()\nnumba_result = matrix_multiply_numba(A, B)\ntime_numba_matrix = time.time() - start\n\nprint(f'NumPy matrix multiply: {time_numpy:.4f} seconds')\nprint(f'Numba matrix multiply: {time_numba_matrix:.4f} seconds')\nprint(f'Results equal: {np.allclose(numpy_result, numba_result)}')\n\n# Parallel execution\nprint('\\n=== Parallel Execution ===')\n\n@njit(parallel=True)\ndef parallel_sum(arr):\n total = 0.0\n for i in prange(arr.shape[0]):\n total += arr[i]\n return total\n\n@njit\ndef serial_sum(arr):\n total = 0.0\n for i in range(arr.shape[0]):\n total += arr[i]\n return total\n\nlarge_array = np.random.random(10000000)\n\n# Warm up\nparallel_sum(large_array[:1000])\nserial_sum(large_array[:1000])\n\n# Time serial version\nstart = time.time()\nserial_result = serial_sum(large_array)\ntime_serial = time.time() - start\n\n# Time parallel version\nstart = time.time()\nparallel_result = parallel_sum(large_array)\ntime_parallel = time.time() - start\n\nprint(f'Array size: {len(large_array):,} elements')\nprint(f'Serial sum: {serial_result:.6f} ({time_serial:.4f} seconds)')\nprint(f'Parallel sum: {parallel_result:.6f} ({time_parallel:.4f} seconds)')\nprint(f'Parallel speedup: {time_serial/time_parallel:.1f}x')\n\n# Mathematical functions\nprint('\\n=== Mathematical Functions ===')\n\n@njit\ndef monte_carlo_pi(n_samples):\n count = 0\n for i in range(n_samples):\n x = np.random.random()\n y = np.random.random()\n if x*x + y*y <= 1.0:\n count += 1\n return 4.0 * count / n_samples\n\n@njit\ndef mandelbrot_point(c_real, c_imag, max_iter):\n z_real = 0.0\n z_imag = 0.0\n for i in range(max_iter):\n z_real_new = z_real*z_real - z_imag*z_imag + c_real\n z_imag_new = 2*z_real*z_imag + c_imag\n z_real = z_real_new\n z_imag = z_imag_new\n if z_real*z_real + z_imag*z_imag > 4:\n return i\n return max_iter\n\n# Monte Carlo Pi estimation\nn_samples = 1000000\nprint(f'Monte Carlo π estimation with {n_samples:,} samples')\n\nstart = time.time()\npi_estimate = monte_carlo_pi(n_samples)\ntime_mc = time.time() - start\n\nprint(f'Estimated π: {pi_estimate:.6f}')\nprint(f'Actual π: {math.pi:.6f}')\nprint(f'Error: {abs(pi_estimate - math.pi):.6f}')\nprint(f'Time: {time_mc:.4f} seconds')\n\n# Mandelbrot calculation\nprint(f'\\nMandelbrot set calculation')\nc_values = [-0.5 + 0.5j, -0.8 + 0.2j, 0.3 - 0.6j]\nmax_iterations = 1000\n\nfor c in c_values:\n iterations = mandelbrot_point(c.real, c.imag, max_iterations)\n if iterations == max_iterations:\n print(f'Point {c}: In set (>{max_iterations} iterations)')\n else:\n print(f'Point {c}: Escaped after {iterations} iterations')\n\n# Type signatures and compilation info\nprint('\\n=== Compilation Information ===')\nprint(f'Numba version: {numba.__version__}')\nprint(f'NumPy version: {np.__version__}')\n\n# Function signatures\nprint(f'\\nFunction signatures:')\nprint(f'numba_function: {numba_function.signatures}')\nprint(f'numba_nopython: {numba_nopython.signatures}')\nprint(f'parallel_sum: {parallel_sum.signatures}')\n\n# GPU example (if CUDA available)\nprint('\\n=== GPU Computing (CUDA) ===')\ntry:\n # Simple CUDA kernel example\n @cuda.jit\n def cuda_add(a, b, c):\n idx = cuda.grid(1)\n if idx < c.size:\n c[idx] = a[idx] + b[idx]\n \n # Check if CUDA is available\n if cuda.is_available():\n print('CUDA is available!')\n print(f'CUDA devices: {cuda.list_devices()}')\n \n # Small example\n n = 1000\n a = np.random.random(n).astype(np.float32)\n b = np.random.random(n).astype(np.float32)\n c = np.zeros(n, dtype=np.float32)\n \n # Configure grid and block dimensions\n threads_per_block = 128\n blocks_per_grid = (n + threads_per_block - 1) // threads_per_block\n \n print(f'Running CUDA kernel with {blocks_per_grid} blocks, {threads_per_block} threads each')\n cuda_add[blocks_per_grid, threads_per_block](a, b, c)\n \n # Verify result\n expected = a + b\n print(f'CUDA result matches NumPy: {np.allclose(c, expected)}')\n else:\n print('CUDA not available on this system')\nexcept Exception as e:\n print(f'CUDA example failed: {e}')\n\nprint('\\nNumba provides:')\nprint('• Just-in-time compilation for Python')\nprint('• Automatic parallelization with prange')\nprint('• GPU computing with CUDA support')\nprint('• NumPy array optimization')\nprint('• Minimal code changes for maximum speedup')\nprint('• Support for mathematical functions')\nprint('• Type inference and optimization')",
285
+ "usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n:} numbers')\n\n# Warm up JIT functions\nnumba_function(100)\nnumba_nopython(100)\n\n# Time Python function\nstart = time.time()\nresult_python = python_function(n)\ntime_python = time.time() - start\n\n# Time JIT function\nstart = time.time()\nresult_numba = numba_function(n)\ntime_numba = time.time() - start\n\n# Time no-Python JIT\nstart = time.time()\nresult_nopython = numba_nopython(n)\ntime_nopython = time.time() - start\n\nprint(f'Python result: {result_python}')\nprint(f'Numba result: {result_numba}')\nprint(f'No-Python result: {result_nopython}')\nprint(f'\\nPython time: {time_python:.4f} seconds')\nprint(f'Numba time: {time_numba:.4f} seconds')\nprint(f'No-Python time: {time_nopython:.4f} seconds')\nprint(f'Speedup (Numba): {time_python/time_numba:.1f}x')\nprint(f'Speedup (No-Python): {time_python/time_nopython:.1f}x')\n\n# NumPy array operations\nprint('\\n=== NumPy Array Operations ===')\n\n@njit\ndef matrix_multiply_numba(A, B):\n return np.dot(A, B)\n\n@njit\ndef element_wise_operation(arr):\n result = np.zeros_like(arr)\n for i in range(arr.shape[0]):\n for j in range(arr.shape[1]):\n result[i, j] = math.sqrt(arr[i, j]**2 + 1)\n return result\n\n# Create test arrays\nsize = 500\nA = np.random.random((size, size))\nB = np.random.random((size, size))\n\nprint(f'Matrix operations on {size}x{size} arrays')\n\n# Warm up\nmatrix_multiply_numba(A[:10, :10], B[:10, :10])\nelement_wise_operation(A[:10, :10])\n\n# Time NumPy operations\nstart = time.time()\nnumpy_result = np.dot(A, B)\ntime_numpy = time.time() - start\n\n# Time Numba operations\nstart = time.time()\nnumba_result = matrix_multiply_numba(A, B)\ntime_numba_matrix = time.time() - start\n\nprint(f'NumPy matrix multiply: {time_numpy:.4f} seconds')\nprint(f'Numba matrix multiply: {time_numba_matrix:.4f} seconds')\nprint(f'Results equal: {np.allclose(numpy_result, numba_result)}')\n\n# Parallel execution\nprint('\\n=== Parallel Execution ===')\n\n@njit(parallel=True)\ndef parallel_sum(arr):\n total = 0.0\n for i in prange(arr.shape[0]):\n total += arr[i]\n return total\n\n@njit\ndef serial_sum(arr):\n total = 0.0\n for i in range(arr.shape[0]):\n total += arr[i]\n return total\n\nlarge_array = np.random.random(10000000)\n\n# Warm up\nparallel_sum(large_array[:1000])\nserial_sum(large_array[:1000])\n\n# Time serial version\nstart = time.time()\nserial_result = serial_sum(large_array)\ntime_serial = time.time() - start\n\n# Time parallel version\nstart = time.time()\nparallel_result = parallel_sum(large_array)\ntime_parallel = time.time() - start\n\nprint(f'Array size: {len(large_array):} elements')\nprint(f'Serial sum: {serial_result:.6f} ({time_serial:.4f} seconds)')\nprint(f'Parallel sum: {parallel_result:.6f} ({time_parallel:.4f} seconds)')\nprint(f'Parallel speedup: {time_serial/time_parallel:.1f}x')\n\n# Mathematical functions\nprint('\\n=== Mathematical Functions ===')\n\n@njit\ndef monte_carlo_pi(n_samples):\n count = 0\n for i in range(n_samples):\n x = np.random.random()\n y = np.random.random()\n if x*x + y*y <= 1.0:\n count += 1\n return 4.0 * count / n_samples\n\n@njit\ndef mandelbrot_point(c_real, c_imag, max_iter):\n z_real = 0.0\n z_imag = 0.0\n for i in range(max_iter):\n z_real_new = z_real*z_real - z_imag*z_imag + c_real\n z_imag_new = 2*z_real*z_imag + c_imag\n z_real = z_real_new\n z_imag = z_imag_new\n if z_real*z_real + z_imag*z_imag > 4:\n return i\n return max_iter\n\n# Monte Carlo Pi estimation\nn_samples = 1000000\nprint(f'Monte Carlo π estimation with {n_samples:} samples')\n\nstart = time.time()\npi_estimate = monte_carlo_pi(n_samples)\ntime_mc = time.time() - start\n\nprint(f'Estimated π: {pi_estimate:.6f}')\nprint(f'Actual π: {math.pi:.6f}')\nprint(f'Error: {abs(pi_estimate - math.pi):.6f}')\nprint(f'Time: {time_mc:.4f} seconds')\n\n# Mandelbrot calculation\nprint(f'\\nMandelbrot set calculation')\nc_values = [-0.5 + 0.5j, -0.8 + 0.2j, 0.3 - 0.6j]\nmax_iterations = 1000\n\nfor c in c_values:\n iterations = mandelbrot_point(c.real, c.imag, max_iterations)\n if iterations == max_iterations:\n print(f'Point {c}: In set (>{max_iterations} iterations)')\n else:\n print(f'Point {c}: Escaped after {iterations} iterations')\n\n# Type signatures and compilation info\nprint('\\n=== Compilation Information ===')\nprint(f'Numba version: {numba.__version__}')\nprint(f'NumPy version: {np.__version__}')\n\n# Function signatures\nprint(f'\\nFunction signatures:')\nprint(f'numba_function: {numba_function.signatures}')\nprint(f'numba_nopython: {numba_nopython.signatures}')\nprint(f'parallel_sum: {parallel_sum.signatures}')\n\n# GPU example (if CUDA available)\nprint('\\n=== GPU Computing (CUDA) ===')\ntry:\n # Simple CUDA kernel example\n @cuda.jit\n def cuda_add(a, b, c):\n idx = cuda.grid(1)\n if idx < c.size:\n c[idx] = a[idx] + b[idx]\n \n # Check if CUDA is available\n if cuda.is_available():\n print('CUDA is available!')\n print(f'CUDA devices: {cuda.list_devices()}')\n \n # Small example\n n = 1000\n a = np.random.random(n).astype(np.float32)\n b = np.random.random(n).astype(np.float32)\n c = np.zeros(n, dtype=np.float32)\n \n # Configure grid and block dimensions\n threads_per_block = 128\n blocks_per_grid = (n + threads_per_block - 1) // threads_per_block\n \n print(f'Running CUDA kernel with {blocks_per_grid} blocks, {threads_per_block} threads each')\n cuda_add[blocks_per_grid, threads_per_block](a, b, c)\n \n # Verify result\n expected = a + b\n print(f'CUDA result matches NumPy: {np.allclose(c, expected)}')\n else:\n print('CUDA not available on this system')\nexcept Exception as e:\n print(f'CUDA example failed: {e}')\n\nprint('\\nNumba provides:')\nprint('• Just-in-time compilation for Python')\nprint('• Automatic parallelization with prange')\nprint('• GPU computing with CUDA support')\nprint('• NumPy array optimization')\nprint('• Minimal code changes for maximum speedup')\nprint('• Support for mathematical functions')\nprint('• Type inference and optimization')",
286
286
  "quick_start": [
287
287
  "Install: pip install numba",
288
288
  "Import: from numba import jit, njit",
@@ -1119,7 +1119,7 @@
1119
1119
  "pip": "pip install ruptures",
1120
1120
  "conda": "conda install -c conda-forge ruptures"
1121
1121
  },
1122
- "usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n n = len(data)\n F = np.full(n + 1, np.inf)\n F[0] = -penalty\n cp_candidates = [0]\n \n for t in range(1, n + 1):\n for s in cp_candidates:\n if s < t:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n total_cost = F[s] + cost + penalty\n \n if total_cost < F[t]:\n F[t] = total_cost\n \n # Pruning step\n cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n cp_candidates.append(t)\n \n # Backtrack to find change points\n change_points = []\n t = n\n while t > 0:\n for s in range(t):\n if s in cp_candidates:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n if s > 0:\n change_points.append(s)\n t = s\n break\n else:\n break\n \n return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n \"\"\"Simplified binary segmentation algorithm\"\"\"\n def find_best_split(segment_data, start_idx):\n n = len(segment_data)\n if n < 4: # Minimum segment size\n return None, -np.inf\n \n best_score = -np.inf\n best_split = None\n \n for split in range(2, n - 1):\n left = segment_data[:split]\n right = segment_data[split:]\n \n # Calculate score based on variance reduction\n total_var = np.var(segment_data) * n\n left_var = np.var(left) * len(left)\n right_var = np.var(right) * len(right)\n \n score = total_var - (left_var + right_var)\n \n if score > best_score:\n best_score = score\n best_split = start_idx + split\n \n return best_split, best_score\n \n change_points = []\n segments = [(data, 0)] # (segment_data, start_index)\n \n for _ in range(max_changepoints):\n if not segments:\n break\n \n best_segment = None\n best_split = None\n best_score = -np.inf\n \n # Find the best split among all segments\n for i, (segment_data, start_idx) in enumerate(segments):\n split, score = find_best_split(segment_data, start_idx)\n if split is not None and score > best_score:\n best_score = score\n best_split = split\n best_segment = i\n \n if best_split is None or best_score <= 0:\n break\n \n # Apply the best split\n segment_data, start_idx = segments.pop(best_segment)\n split_point = best_split - start_idx\n \n left_segment = segment_data[:split_point]\n right_segment = segment_data[split_point:]\n \n if len(left_segment) > 0:\n segments.append((left_segment, start_idx))\n if len(right_segment) > 0:\n segments.append((right_segment, best_split))\n \n change_points.append(best_split)\n \n return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n segment_length = cp - current_pos\n segment = np.random.normal(\n segment_means[i], \n segment_stds[i], \n segment_length\n )\n time_series.extend(segment)\n current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n 'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n 'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n 'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n 'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n print(f'\\nRunning {algo_name}...')\n \n detected_cps = algo_func(time_series_with_trend)\n \n # Calculate performance metrics\n def calculate_metrics(true_cps, detected_cps, tolerance=50):\n \"\"\"Calculate precision, recall, and F1 score\"\"\"\n true_positives = 0\n \n for true_cp in true_cps:\n if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n true_positives += 1\n \n precision = true_positives / len(detected_cps) if detected_cps else 0\n recall = true_positives / len(true_cps) if true_cps else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n \n return precision, recall, f1\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n results[algo_name] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Detected change points: {detected_cps}')\n print(f' Precision: {precision:.3f}')\n print(f' Recall: {recall:.3f}')\n print(f' F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n 'Algorithm': list(results.keys()),\n 'Precision': [results[algo]['precision'] for algo in results],\n 'Recall': [results[algo]['recall'] for algo in results],\n 'F1 Score': [results[algo]['f1'] for algo in results],\n 'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), # Low correlation\n np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]), # High correlation\n np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]), # Mixed correlation\n np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]]) # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n segment_length = cp - current_pos\n \n # Generate correlated multivariate normal data\n mean = np.random.normal(0, 2, n_dims)\n cov = corr_matrices[i]\n \n segment = np.random.multivariate_normal(mean, cov, segment_length)\n mv_time_series.append(segment)\n \n current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n dim_data = mv_time_series[:, dim]\n detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n \n precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n \n mv_results[f'Dimension {dim}'] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f' Aggregate CPs: {detected_cps_mv}')\nprint(f' Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n \n # Calculate BIC-like criterion\n n_segments = len(detected_cps) + 1\n n_params = n_segments * 2 # mean and variance for each segment\n \n # Calculate likelihood (simplified)\n log_likelihood = 0\n current_pos = 0\n \n for cp in detected_cps + [len(time_series_with_trend)]:\n segment_data = time_series_with_trend[current_pos:cp]\n if len(segment_data) > 0:\n segment_var = np.var(segment_data)\n if segment_var > 0:\n log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n log_likelihood -= 0.5 * len(segment_data)\n current_pos = cp\n \n bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n model_selection_results.append({\n 'penalty': penalty,\n 'n_changepoints': len(detected_cps),\n 'bic': bic,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):,} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n print(f' {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
1122
+ "usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n n = len(data)\n F = np.full(n + 1, np.inf)\n F[0] = -penalty\n cp_candidates = [0]\n \n for t in range(1, n + 1):\n for s in cp_candidates:\n if s < t:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n total_cost = F[s] + cost + penalty\n \n if total_cost < F[t]:\n F[t] = total_cost\n \n # Pruning step\n cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n cp_candidates.append(t)\n \n # Backtrack to find change points\n change_points = []\n t = n\n while t > 0:\n for s in range(t):\n if s in cp_candidates:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n if s > 0:\n change_points.append(s)\n t = s\n break\n else:\n break\n \n return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n \"\"\"Simplified binary segmentation algorithm\"\"\"\n def find_best_split(segment_data, start_idx):\n n = len(segment_data)\n if n < 4: # Minimum segment size\n return None, -np.inf\n \n best_score = -np.inf\n best_split = None\n \n for split in range(2, n - 1):\n left = segment_data[:split]\n right = segment_data[split:]\n \n # Calculate score based on variance reduction\n total_var = np.var(segment_data) * n\n left_var = np.var(left) * len(left)\n right_var = np.var(right) * len(right)\n \n score = total_var - (left_var + right_var)\n \n if score > best_score:\n best_score = score\n best_split = start_idx + split\n \n return best_split, best_score\n \n change_points = []\n segments = [(data, 0)] # (segment_data, start_index)\n \n for _ in range(max_changepoints):\n if not segments:\n break\n \n best_segment = None\n best_split = None\n best_score = -np.inf\n \n # Find the best split among all segments\n for i, (segment_data, start_idx) in enumerate(segments):\n split, score = find_best_split(segment_data, start_idx)\n if split is not None and score > best_score:\n best_score = score\n best_split = split\n best_segment = i\n \n if best_split is None or best_score <= 0:\n break\n \n # Apply the best split\n segment_data, start_idx = segments.pop(best_segment)\n split_point = best_split - start_idx\n \n left_segment = segment_data[:split_point]\n right_segment = segment_data[split_point:]\n \n if len(left_segment) > 0:\n segments.append((left_segment, start_idx))\n if len(right_segment) > 0:\n segments.append((right_segment, best_split))\n \n change_points.append(best_split)\n \n return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n segment_length = cp - current_pos\n segment = np.random.normal(\n segment_means[i], \n segment_stds[i], \n segment_length\n )\n time_series.extend(segment)\n current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n 'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n 'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n 'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n 'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n print(f'\\nRunning {algo_name}...')\n \n detected_cps = algo_func(time_series_with_trend)\n \n # Calculate performance metrics\n def calculate_metrics(true_cps, detected_cps, tolerance=50):\n \"\"\"Calculate precision, recall, and F1 score\"\"\"\n true_positives = 0\n \n for true_cp in true_cps:\n if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n true_positives += 1\n \n precision = true_positives / len(detected_cps) if detected_cps else 0\n recall = true_positives / len(true_cps) if true_cps else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n \n return precision, recall, f1\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n results[algo_name] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Detected change points: {detected_cps}')\n print(f' Precision: {precision:.3f}')\n print(f' Recall: {recall:.3f}')\n print(f' F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n 'Algorithm': list(results.keys()),\n 'Precision': [results[algo]['precision'] for algo in results],\n 'Recall': [results[algo]['recall'] for algo in results],\n 'F1 Score': [results[algo]['f1'] for algo in results],\n 'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), # Low correlation\n np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]), # High correlation\n np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]), # Mixed correlation\n np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]]) # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n segment_length = cp - current_pos\n \n # Generate correlated multivariate normal data\n mean = np.random.normal(0, 2, n_dims)\n cov = corr_matrices[i]\n \n segment = np.random.multivariate_normal(mean, cov, segment_length)\n mv_time_series.append(segment)\n \n current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n dim_data = mv_time_series[:, dim]\n detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n \n precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n \n mv_results[f'Dimension {dim}'] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f' Aggregate CPs: {detected_cps_mv}')\nprint(f' Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n \n # Calculate BIC-like criterion\n n_segments = len(detected_cps) + 1\n n_params = n_segments * 2 # mean and variance for each segment\n \n # Calculate likelihood (simplified)\n log_likelihood = 0\n current_pos = 0\n \n for cp in detected_cps + [len(time_series_with_trend)]:\n segment_data = time_series_with_trend[current_pos:cp]\n if len(segment_data) > 0:\n segment_var = np.var(segment_data)\n if segment_var > 0:\n log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n log_likelihood -= 0.5 * len(segment_data)\n current_pos = cp\n \n bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n model_selection_results.append({\n 'penalty': penalty,\n 'n_changepoints': len(detected_cps),\n 'bic': bic,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n print(f' {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
1123
1123
  "quick_start": [
1124
1124
  "Install: pip install ruptures",
1125
1125
  "Import: import ruptures as rpt",