tooluniverse 1.0.8__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tooluniverse might be problematic. Click here for more details.
- tooluniverse/__init__.py +8 -0
- tooluniverse/admetai_tool.py +8 -1
- tooluniverse/compose_scripts/output_summarizer.py +87 -33
- tooluniverse/compose_tool.py +2 -2
- tooluniverse/data/adverse_event_tools.json +97 -98
- tooluniverse/data/agentic_tools.json +81 -162
- tooluniverse/data/compose_tools.json +0 -54
- tooluniverse/data/drug_discovery_agents.json +10 -20
- tooluniverse/data/literature_search_tools.json +15 -35
- tooluniverse/data/monarch_tools.json +1 -2
- tooluniverse/data/opentarget_tools.json +8 -16
- tooluniverse/data/output_summarization_tools.json +23 -20
- tooluniverse/data/packages/bioinformatics_core_tools.json +2 -2
- tooluniverse/data/packages/cheminformatics_tools.json +1 -1
- tooluniverse/data/packages/genomics_tools.json +1 -1
- tooluniverse/data/packages/single_cell_tools.json +1 -1
- tooluniverse/data/packages/structural_biology_tools.json +1 -1
- tooluniverse/data/tool_composition_tools.json +2 -4
- tooluniverse/execute_function.py +39 -1
- tooluniverse/logging_config.py +64 -2
- tooluniverse/molecule_2d_tool.py +9 -3
- tooluniverse/molecule_3d_tool.py +9 -3
- tooluniverse/output_hook.py +217 -150
- tooluniverse/smcp.py +8 -1
- tooluniverse/smcp_server.py +89 -199
- tooluniverse/tools/__init__.py +1 -3
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/METADATA +2 -1
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/RECORD +32 -33
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/entry_points.txt +0 -3
- tooluniverse/tools/MultiAgentLiteratureSearch.py +0 -59
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/WHEEL +0 -0
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/licenses/LICENSE +0 -0
- {tooluniverse-1.0.8.dist-info → tooluniverse-1.0.9.dist-info}/top_level.txt +0 -0
|
@@ -314,59 +314,5 @@
|
|
|
314
314
|
],
|
|
315
315
|
"composition_file": "tool_graph_generation.py",
|
|
316
316
|
"composition_function": "compose"
|
|
317
|
-
},
|
|
318
|
-
{
|
|
319
|
-
"type": "ComposeTool",
|
|
320
|
-
"name": "MultiAgentLiteratureSearch",
|
|
321
|
-
"description": "Multi-agent literature search system that uses AI agents to analyze intent, extract keywords, execute parallel searches, summarize results, and check quality iteratively",
|
|
322
|
-
"parameter": {
|
|
323
|
-
"type": "object",
|
|
324
|
-
"properties": {
|
|
325
|
-
"query": {
|
|
326
|
-
"type": "string",
|
|
327
|
-
"description": "The research query to search for"
|
|
328
|
-
},
|
|
329
|
-
"max_iterations": {
|
|
330
|
-
"type": "integer",
|
|
331
|
-
"description": "Maximum number of iterations (default: 3)",
|
|
332
|
-
"default": 3
|
|
333
|
-
},
|
|
334
|
-
"quality_threshold": {
|
|
335
|
-
"type": "number",
|
|
336
|
-
"description": "Quality threshold for completion (default: 0.7)",
|
|
337
|
-
"default": 0.7
|
|
338
|
-
}
|
|
339
|
-
},
|
|
340
|
-
"required": [
|
|
341
|
-
"query",
|
|
342
|
-
"max_iterations",
|
|
343
|
-
"quality_threshold"
|
|
344
|
-
]
|
|
345
|
-
},
|
|
346
|
-
"auto_load_dependencies": true,
|
|
347
|
-
"fail_on_missing_tools": false,
|
|
348
|
-
"required_tools": [
|
|
349
|
-
"IntentAnalyzerAgent",
|
|
350
|
-
"KeywordExtractorAgent",
|
|
351
|
-
"ResultSummarizerAgent",
|
|
352
|
-
"QualityCheckerAgent",
|
|
353
|
-
"OverallSummaryAgent",
|
|
354
|
-
"ArXiv_search_papers",
|
|
355
|
-
"BioRxiv_search_preprints",
|
|
356
|
-
"MedRxiv_search_preprints",
|
|
357
|
-
"HAL_search_archive",
|
|
358
|
-
"Crossref_search_works",
|
|
359
|
-
"PubMed_search_articles",
|
|
360
|
-
"EuropePMC_search_articles",
|
|
361
|
-
"SemanticScholar_search_papers",
|
|
362
|
-
"openalex_literature_search",
|
|
363
|
-
"DBLP_search_publications",
|
|
364
|
-
"DOAJ_search_articles",
|
|
365
|
-
"CORE_search_papers",
|
|
366
|
-
"PMC_search_papers",
|
|
367
|
-
"Zenodo_search_records"
|
|
368
|
-
],
|
|
369
|
-
"composition_file": "multi_agent_literature_search.py",
|
|
370
|
-
"composition_function": "compose"
|
|
371
317
|
}
|
|
372
318
|
]
|
|
@@ -48,13 +48,11 @@
|
|
|
48
48
|
"properties": {
|
|
49
49
|
"disease_name": {
|
|
50
50
|
"type": "string",
|
|
51
|
-
"description": "Name of the disease"
|
|
52
|
-
"required": true
|
|
51
|
+
"description": "Name of the disease"
|
|
53
52
|
},
|
|
54
53
|
"targets": {
|
|
55
54
|
"type": "string",
|
|
56
|
-
"description": "List of therapeutic targets (comma-separated)"
|
|
57
|
-
"required": true
|
|
55
|
+
"description": "List of therapeutic targets (comma-separated)"
|
|
58
56
|
},
|
|
59
57
|
"context": {
|
|
60
58
|
"type": "string",
|
|
@@ -90,13 +88,11 @@
|
|
|
90
88
|
"properties": {
|
|
91
89
|
"compounds": {
|
|
92
90
|
"type": "string",
|
|
93
|
-
"description": "List of compounds to analyze (comma-separated)"
|
|
94
|
-
"required": true
|
|
91
|
+
"description": "List of compounds to analyze (comma-separated)"
|
|
95
92
|
},
|
|
96
93
|
"admet_data": {
|
|
97
94
|
"type": "string",
|
|
98
|
-
"description": "ADMET data from computational tools to analyze"
|
|
99
|
-
"required": true
|
|
95
|
+
"description": "ADMET data from computational tools to analyze"
|
|
100
96
|
},
|
|
101
97
|
"disease_context": {
|
|
102
98
|
"type": "string",
|
|
@@ -131,8 +127,7 @@
|
|
|
131
127
|
"properties": {
|
|
132
128
|
"compounds": {
|
|
133
129
|
"type": "string",
|
|
134
|
-
"description": "List of compounds to analyze for interactions (comma-separated)"
|
|
135
|
-
"required": true
|
|
130
|
+
"description": "List of compounds to analyze for interactions (comma-separated)"
|
|
136
131
|
},
|
|
137
132
|
"patient_context": {
|
|
138
133
|
"type": "string",
|
|
@@ -167,13 +162,11 @@
|
|
|
167
162
|
"properties": {
|
|
168
163
|
"topic": {
|
|
169
164
|
"type": "string",
|
|
170
|
-
"description": "Research topic or question"
|
|
171
|
-
"required": true
|
|
165
|
+
"description": "Research topic or question"
|
|
172
166
|
},
|
|
173
167
|
"literature_data": {
|
|
174
168
|
"type": "string",
|
|
175
|
-
"description": "Literature findings or abstracts to synthesize"
|
|
176
|
-
"required": true
|
|
169
|
+
"description": "Literature findings or abstracts to synthesize"
|
|
177
170
|
},
|
|
178
171
|
"focus_area": {
|
|
179
172
|
"type": "string",
|
|
@@ -210,8 +203,7 @@
|
|
|
210
203
|
"properties": {
|
|
211
204
|
"compounds": {
|
|
212
205
|
"type": "string",
|
|
213
|
-
"description": "List of compounds to optimize (comma-separated)"
|
|
214
|
-
"required": true
|
|
206
|
+
"description": "List of compounds to optimize (comma-separated)"
|
|
215
207
|
},
|
|
216
208
|
"admet_data": {
|
|
217
209
|
"type": "string",
|
|
@@ -257,13 +249,11 @@
|
|
|
257
249
|
"properties": {
|
|
258
250
|
"drug_name": {
|
|
259
251
|
"type": "string",
|
|
260
|
-
"description": "Name of the drug candidate"
|
|
261
|
-
"required": true
|
|
252
|
+
"description": "Name of the drug candidate"
|
|
262
253
|
},
|
|
263
254
|
"indication": {
|
|
264
255
|
"type": "string",
|
|
265
|
-
"description": "Disease indication"
|
|
266
|
-
"required": true
|
|
256
|
+
"description": "Disease indication"
|
|
267
257
|
},
|
|
268
258
|
"preclinical_data": {
|
|
269
259
|
"type": "string",
|
|
@@ -13,13 +13,11 @@
|
|
|
13
13
|
"properties": {
|
|
14
14
|
"user_query": {
|
|
15
15
|
"type": "string",
|
|
16
|
-
"description": "The research query to analyze"
|
|
17
|
-
"required": true
|
|
16
|
+
"description": "The research query to analyze"
|
|
18
17
|
},
|
|
19
18
|
"context": {
|
|
20
19
|
"type": "string",
|
|
21
20
|
"description": "Context information from previous steps",
|
|
22
|
-
"required": false,
|
|
23
21
|
"default": ""
|
|
24
22
|
}
|
|
25
23
|
},
|
|
@@ -51,23 +49,19 @@
|
|
|
51
49
|
"properties": {
|
|
52
50
|
"plan_title": {
|
|
53
51
|
"type": "string",
|
|
54
|
-
"description": "The title of the search plan"
|
|
55
|
-
"required": true
|
|
52
|
+
"description": "The title of the search plan"
|
|
56
53
|
},
|
|
57
54
|
"plan_description": {
|
|
58
55
|
"type": "string",
|
|
59
|
-
"description": "The description of the search plan"
|
|
60
|
-
"required": true
|
|
56
|
+
"description": "The description of the search plan"
|
|
61
57
|
},
|
|
62
58
|
"current_keywords": {
|
|
63
59
|
"type": "string",
|
|
64
|
-
"description": "Current keywords for the plan (comma-separated)"
|
|
65
|
-
"required": true
|
|
60
|
+
"description": "Current keywords for the plan (comma-separated)"
|
|
66
61
|
},
|
|
67
62
|
"context": {
|
|
68
63
|
"type": "string",
|
|
69
64
|
"description": "Context information from previous steps",
|
|
70
|
-
"required": false,
|
|
71
65
|
"default": ""
|
|
72
66
|
}
|
|
73
67
|
},
|
|
@@ -102,28 +96,23 @@
|
|
|
102
96
|
"properties": {
|
|
103
97
|
"plan_title": {
|
|
104
98
|
"type": "string",
|
|
105
|
-
"description": "The title of the search plan"
|
|
106
|
-
"required": true
|
|
99
|
+
"description": "The title of the search plan"
|
|
107
100
|
},
|
|
108
101
|
"plan_description": {
|
|
109
102
|
"type": "string",
|
|
110
|
-
"description": "The description of the search plan"
|
|
111
|
-
"required": true
|
|
103
|
+
"description": "The description of the search plan"
|
|
112
104
|
},
|
|
113
105
|
"paper_count": {
|
|
114
106
|
"type": "string",
|
|
115
|
-
"description": "Number of papers found"
|
|
116
|
-
"required": true
|
|
107
|
+
"description": "Number of papers found"
|
|
117
108
|
},
|
|
118
109
|
"papers_text": {
|
|
119
110
|
"type": "string",
|
|
120
|
-
"description": "Formatted text of the papers to summarize"
|
|
121
|
-
"required": true
|
|
111
|
+
"description": "Formatted text of the papers to summarize"
|
|
122
112
|
},
|
|
123
113
|
"context": {
|
|
124
114
|
"type": "string",
|
|
125
115
|
"description": "Context information from previous steps",
|
|
126
|
-
"required": false,
|
|
127
116
|
"default": ""
|
|
128
117
|
}
|
|
129
118
|
},
|
|
@@ -156,13 +145,11 @@
|
|
|
156
145
|
"properties": {
|
|
157
146
|
"plans_analysis": {
|
|
158
147
|
"type": "string",
|
|
159
|
-
"description": "Analysis of current search plans and their quality scores"
|
|
160
|
-
"required": true
|
|
148
|
+
"description": "Analysis of current search plans and their quality scores"
|
|
161
149
|
},
|
|
162
150
|
"context": {
|
|
163
151
|
"type": "string",
|
|
164
152
|
"description": "Context information from previous steps",
|
|
165
|
-
"required": false,
|
|
166
153
|
"default": ""
|
|
167
154
|
}
|
|
168
155
|
},
|
|
@@ -197,38 +184,31 @@
|
|
|
197
184
|
"properties": {
|
|
198
185
|
"user_query": {
|
|
199
186
|
"type": "string",
|
|
200
|
-
"description": "The original research query"
|
|
201
|
-
"required": true
|
|
187
|
+
"description": "The original research query"
|
|
202
188
|
},
|
|
203
189
|
"user_intent": {
|
|
204
190
|
"type": "string",
|
|
205
|
-
"description": "The analyzed user intent"
|
|
206
|
-
"required": true
|
|
191
|
+
"description": "The analyzed user intent"
|
|
207
192
|
},
|
|
208
193
|
"total_papers": {
|
|
209
194
|
"type": "string",
|
|
210
|
-
"description": "Total number of papers found"
|
|
211
|
-
"required": true
|
|
195
|
+
"description": "Total number of papers found"
|
|
212
196
|
},
|
|
213
197
|
"total_plans": {
|
|
214
198
|
"type": "string",
|
|
215
|
-
"description": "Total number of search plans executed"
|
|
216
|
-
"required": true
|
|
199
|
+
"description": "Total number of search plans executed"
|
|
217
200
|
},
|
|
218
201
|
"iterations": {
|
|
219
202
|
"type": "string",
|
|
220
|
-
"description": "Number of iterations performed"
|
|
221
|
-
"required": true
|
|
203
|
+
"description": "Number of iterations performed"
|
|
222
204
|
},
|
|
223
205
|
"plan_summaries": {
|
|
224
206
|
"type": "string",
|
|
225
|
-
"description": "Summaries of all search plans"
|
|
226
|
-
"required": true
|
|
207
|
+
"description": "Summaries of all search plans"
|
|
227
208
|
},
|
|
228
209
|
"context": {
|
|
229
210
|
"type": "string",
|
|
230
211
|
"description": "Context information from previous steps",
|
|
231
|
-
"required": false,
|
|
232
212
|
"default": ""
|
|
233
213
|
}
|
|
234
214
|
},
|
|
@@ -728,13 +728,11 @@
|
|
|
728
728
|
"properties": {
|
|
729
729
|
"index": {
|
|
730
730
|
"type": "integer",
|
|
731
|
-
"description": "The index of the page to retrieve."
|
|
732
|
-
"required": true
|
|
731
|
+
"description": "The index of the page to retrieve."
|
|
733
732
|
},
|
|
734
733
|
"size": {
|
|
735
734
|
"type": "integer",
|
|
736
|
-
"description": "The number of items per page."
|
|
737
|
-
"required": true
|
|
735
|
+
"description": "The number of items per page."
|
|
738
736
|
}
|
|
739
737
|
},
|
|
740
738
|
"description": "Pagination parameters."
|
|
@@ -906,13 +904,11 @@
|
|
|
906
904
|
"properties": {
|
|
907
905
|
"index": {
|
|
908
906
|
"type": "integer",
|
|
909
|
-
"description": "Index of the page to fetch, starting from 0."
|
|
910
|
-
"required": true
|
|
907
|
+
"description": "Index of the page to fetch, starting from 0."
|
|
911
908
|
},
|
|
912
909
|
"size": {
|
|
913
910
|
"type": "integer",
|
|
914
|
-
"description": "Number of entries per page."
|
|
915
|
-
"required": true
|
|
911
|
+
"description": "Number of entries per page."
|
|
916
912
|
}
|
|
917
913
|
},
|
|
918
914
|
"description": "Pagination settings."
|
|
@@ -1152,13 +1148,11 @@
|
|
|
1152
1148
|
"properties": {
|
|
1153
1149
|
"index": {
|
|
1154
1150
|
"type": "integer",
|
|
1155
|
-
"description": "Pagination index."
|
|
1156
|
-
"required": true
|
|
1151
|
+
"description": "Pagination index."
|
|
1157
1152
|
},
|
|
1158
1153
|
"size": {
|
|
1159
1154
|
"type": "integer",
|
|
1160
|
-
"description": "Number of records to fetch per page."
|
|
1161
|
-
"required": true
|
|
1155
|
+
"description": "Number of records to fetch per page."
|
|
1162
1156
|
}
|
|
1163
1157
|
}
|
|
1164
1158
|
}
|
|
@@ -1277,13 +1271,11 @@
|
|
|
1277
1271
|
"properties": {
|
|
1278
1272
|
"index": {
|
|
1279
1273
|
"type": "integer",
|
|
1280
|
-
"description": "Pagination index."
|
|
1281
|
-
"required": "True"
|
|
1274
|
+
"description": "Pagination index."
|
|
1282
1275
|
},
|
|
1283
1276
|
"size": {
|
|
1284
1277
|
"type": "integer",
|
|
1285
|
-
"description": "Pagination size."
|
|
1286
|
-
"required": "True"
|
|
1278
|
+
"description": "Pagination size."
|
|
1287
1279
|
}
|
|
1288
1280
|
},
|
|
1289
1281
|
"description": "Pagination settings with index and size."
|
|
@@ -4,39 +4,44 @@
|
|
|
4
4
|
"name": "ToolOutputSummarizer",
|
|
5
5
|
"description": "AI-powered tool for summarizing long tool outputs, focusing on key information relevant to the original query",
|
|
6
6
|
"prompt": "You are an expert at summarizing tool outputs. Your task is to analyze the provided tool output and create a concise summary that highlights the most important information relevant to the original query.\n\nTool Output to Summarize:\n{tool_output}\n\nOriginal Query Context:\n{query_context}\n\nTool Name: {tool_name}\nFocus Areas: {focus_areas}\nMaximum Summary Length: {max_length}\n\nPlease provide a well-structured summary that:\n1. Captures the key findings and results\n2. Highlights important data points and metrics\n3. Preserves critical technical details\n4. Maintains the essential structure of the original output\n5. Focuses on information most relevant to the query\n\nReturn the summary in a clear, organized format.",
|
|
7
|
-
"input_arguments": [
|
|
7
|
+
"input_arguments": [
|
|
8
|
+
"tool_output",
|
|
9
|
+
"query_context",
|
|
10
|
+
"tool_name",
|
|
11
|
+
"focus_areas",
|
|
12
|
+
"max_length"
|
|
13
|
+
],
|
|
8
14
|
"parameter": {
|
|
9
15
|
"type": "object",
|
|
10
16
|
"properties": {
|
|
11
17
|
"tool_output": {
|
|
12
18
|
"type": "string",
|
|
13
|
-
"description": "The original tool output to be summarized"
|
|
14
|
-
"required": true
|
|
19
|
+
"description": "The original tool output to be summarized"
|
|
15
20
|
},
|
|
16
21
|
"query_context": {
|
|
17
22
|
"type": "string",
|
|
18
|
-
"description": "Context about the original query that triggered the tool"
|
|
19
|
-
"required": true
|
|
23
|
+
"description": "Context about the original query that triggered the tool"
|
|
20
24
|
},
|
|
21
25
|
"tool_name": {
|
|
22
26
|
"type": "string",
|
|
23
|
-
"description": "Name of the tool that generated the output"
|
|
24
|
-
"required": true
|
|
27
|
+
"description": "Name of the tool that generated the output"
|
|
25
28
|
},
|
|
26
29
|
"focus_areas": {
|
|
27
30
|
"type": "string",
|
|
28
31
|
"description": "Specific areas to focus on in the summary",
|
|
29
|
-
"required": false,
|
|
30
32
|
"default": "key_findings_and_results"
|
|
31
33
|
},
|
|
32
34
|
"max_length": {
|
|
33
35
|
"type": "integer",
|
|
34
36
|
"description": "Maximum length of the summary in characters",
|
|
35
|
-
"required": false,
|
|
36
37
|
"default": 32000
|
|
37
38
|
}
|
|
38
39
|
},
|
|
39
|
-
"required": [
|
|
40
|
+
"required": [
|
|
41
|
+
"tool_output",
|
|
42
|
+
"query_context",
|
|
43
|
+
"tool_name"
|
|
44
|
+
]
|
|
40
45
|
},
|
|
41
46
|
"configs": {
|
|
42
47
|
"api_type": "CHATGPT",
|
|
@@ -56,39 +61,37 @@
|
|
|
56
61
|
"properties": {
|
|
57
62
|
"tool_output": {
|
|
58
63
|
"type": "string",
|
|
59
|
-
"description": "The original tool output to be summarized"
|
|
60
|
-
"required": true
|
|
64
|
+
"description": "The original tool output to be summarized"
|
|
61
65
|
},
|
|
62
66
|
"query_context": {
|
|
63
67
|
"type": "string",
|
|
64
|
-
"description": "Context about the original query"
|
|
65
|
-
"required": true
|
|
68
|
+
"description": "Context about the original query"
|
|
66
69
|
},
|
|
67
70
|
"tool_name": {
|
|
68
71
|
"type": "string",
|
|
69
|
-
"description": "Name of the tool that generated the output"
|
|
70
|
-
"required": true
|
|
72
|
+
"description": "Name of the tool that generated the output"
|
|
71
73
|
},
|
|
72
74
|
"chunk_size": {
|
|
73
75
|
"type": "integer",
|
|
74
76
|
"description": "Size of each chunk for processing",
|
|
75
|
-
"required": false,
|
|
76
77
|
"default": 30000
|
|
77
78
|
},
|
|
78
79
|
"focus_areas": {
|
|
79
80
|
"type": "string",
|
|
80
81
|
"description": "Areas to focus on in summarization",
|
|
81
|
-
"required": false,
|
|
82
82
|
"default": "key_findings_and_results"
|
|
83
83
|
},
|
|
84
84
|
"max_summary_length": {
|
|
85
85
|
"type": "integer",
|
|
86
86
|
"description": "Maximum length of final summary",
|
|
87
|
-
"required": false,
|
|
88
87
|
"default": 10000
|
|
89
88
|
}
|
|
90
89
|
},
|
|
91
|
-
"required": [
|
|
90
|
+
"required": [
|
|
91
|
+
"tool_output",
|
|
92
|
+
"query_context",
|
|
93
|
+
"tool_name"
|
|
94
|
+
]
|
|
92
95
|
},
|
|
93
96
|
"auto_load_dependencies": true,
|
|
94
97
|
"fail_on_missing_tools": false,
|
|
@@ -282,7 +282,7 @@
|
|
|
282
282
|
"pip": "pip install numba",
|
|
283
283
|
"conda": "conda install numba"
|
|
284
284
|
},
|
|
285
|
-
"usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n
|
|
285
|
+
"usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n total = 0\n for i in range(x):\n total += i * i\n return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n:} numbers')\n\n# Warm up JIT functions\nnumba_function(100)\nnumba_nopython(100)\n\n# Time Python function\nstart = time.time()\nresult_python = python_function(n)\ntime_python = time.time() - start\n\n# Time JIT function\nstart = time.time()\nresult_numba = numba_function(n)\ntime_numba = time.time() - start\n\n# Time no-Python JIT\nstart = time.time()\nresult_nopython = numba_nopython(n)\ntime_nopython = time.time() - start\n\nprint(f'Python result: {result_python}')\nprint(f'Numba result: {result_numba}')\nprint(f'No-Python result: {result_nopython}')\nprint(f'\\nPython time: {time_python:.4f} seconds')\nprint(f'Numba time: {time_numba:.4f} seconds')\nprint(f'No-Python time: {time_nopython:.4f} seconds')\nprint(f'Speedup (Numba): {time_python/time_numba:.1f}x')\nprint(f'Speedup (No-Python): {time_python/time_nopython:.1f}x')\n\n# NumPy array operations\nprint('\\n=== NumPy Array Operations ===')\n\n@njit\ndef matrix_multiply_numba(A, B):\n return np.dot(A, B)\n\n@njit\ndef element_wise_operation(arr):\n result = np.zeros_like(arr)\n for i in range(arr.shape[0]):\n for j in range(arr.shape[1]):\n result[i, j] = math.sqrt(arr[i, j]**2 + 1)\n return result\n\n# Create test arrays\nsize = 500\nA = np.random.random((size, size))\nB = np.random.random((size, size))\n\nprint(f'Matrix operations on {size}x{size} arrays')\n\n# Warm up\nmatrix_multiply_numba(A[:10, :10], B[:10, :10])\nelement_wise_operation(A[:10, :10])\n\n# Time NumPy operations\nstart = time.time()\nnumpy_result = np.dot(A, B)\ntime_numpy = time.time() - start\n\n# Time Numba operations\nstart = time.time()\nnumba_result = matrix_multiply_numba(A, B)\ntime_numba_matrix = time.time() - start\n\nprint(f'NumPy matrix multiply: {time_numpy:.4f} seconds')\nprint(f'Numba matrix multiply: {time_numba_matrix:.4f} seconds')\nprint(f'Results equal: {np.allclose(numpy_result, numba_result)}')\n\n# Parallel execution\nprint('\\n=== Parallel Execution ===')\n\n@njit(parallel=True)\ndef parallel_sum(arr):\n total = 0.0\n for i in prange(arr.shape[0]):\n total += arr[i]\n return total\n\n@njit\ndef serial_sum(arr):\n total = 0.0\n for i in range(arr.shape[0]):\n total += arr[i]\n return total\n\nlarge_array = np.random.random(10000000)\n\n# Warm up\nparallel_sum(large_array[:1000])\nserial_sum(large_array[:1000])\n\n# Time serial version\nstart = time.time()\nserial_result = serial_sum(large_array)\ntime_serial = time.time() - start\n\n# Time parallel version\nstart = time.time()\nparallel_result = parallel_sum(large_array)\ntime_parallel = time.time() - start\n\nprint(f'Array size: {len(large_array):} elements')\nprint(f'Serial sum: {serial_result:.6f} ({time_serial:.4f} seconds)')\nprint(f'Parallel sum: {parallel_result:.6f} ({time_parallel:.4f} seconds)')\nprint(f'Parallel speedup: {time_serial/time_parallel:.1f}x')\n\n# Mathematical functions\nprint('\\n=== Mathematical Functions ===')\n\n@njit\ndef monte_carlo_pi(n_samples):\n count = 0\n for i in range(n_samples):\n x = np.random.random()\n y = np.random.random()\n if x*x + y*y <= 1.0:\n count += 1\n return 4.0 * count / n_samples\n\n@njit\ndef mandelbrot_point(c_real, c_imag, max_iter):\n z_real = 0.0\n z_imag = 0.0\n for i in range(max_iter):\n z_real_new = z_real*z_real - z_imag*z_imag + c_real\n z_imag_new = 2*z_real*z_imag + c_imag\n z_real = z_real_new\n z_imag = z_imag_new\n if z_real*z_real + z_imag*z_imag > 4:\n return i\n return max_iter\n\n# Monte Carlo Pi estimation\nn_samples = 1000000\nprint(f'Monte Carlo π estimation with {n_samples:} samples')\n\nstart = time.time()\npi_estimate = monte_carlo_pi(n_samples)\ntime_mc = time.time() - start\n\nprint(f'Estimated π: {pi_estimate:.6f}')\nprint(f'Actual π: {math.pi:.6f}')\nprint(f'Error: {abs(pi_estimate - math.pi):.6f}')\nprint(f'Time: {time_mc:.4f} seconds')\n\n# Mandelbrot calculation\nprint(f'\\nMandelbrot set calculation')\nc_values = [-0.5 + 0.5j, -0.8 + 0.2j, 0.3 - 0.6j]\nmax_iterations = 1000\n\nfor c in c_values:\n iterations = mandelbrot_point(c.real, c.imag, max_iterations)\n if iterations == max_iterations:\n print(f'Point {c}: In set (>{max_iterations} iterations)')\n else:\n print(f'Point {c}: Escaped after {iterations} iterations')\n\n# Type signatures and compilation info\nprint('\\n=== Compilation Information ===')\nprint(f'Numba version: {numba.__version__}')\nprint(f'NumPy version: {np.__version__}')\n\n# Function signatures\nprint(f'\\nFunction signatures:')\nprint(f'numba_function: {numba_function.signatures}')\nprint(f'numba_nopython: {numba_nopython.signatures}')\nprint(f'parallel_sum: {parallel_sum.signatures}')\n\n# GPU example (if CUDA available)\nprint('\\n=== GPU Computing (CUDA) ===')\ntry:\n # Simple CUDA kernel example\n @cuda.jit\n def cuda_add(a, b, c):\n idx = cuda.grid(1)\n if idx < c.size:\n c[idx] = a[idx] + b[idx]\n \n # Check if CUDA is available\n if cuda.is_available():\n print('CUDA is available!')\n print(f'CUDA devices: {cuda.list_devices()}')\n \n # Small example\n n = 1000\n a = np.random.random(n).astype(np.float32)\n b = np.random.random(n).astype(np.float32)\n c = np.zeros(n, dtype=np.float32)\n \n # Configure grid and block dimensions\n threads_per_block = 128\n blocks_per_grid = (n + threads_per_block - 1) // threads_per_block\n \n print(f'Running CUDA kernel with {blocks_per_grid} blocks, {threads_per_block} threads each')\n cuda_add[blocks_per_grid, threads_per_block](a, b, c)\n \n # Verify result\n expected = a + b\n print(f'CUDA result matches NumPy: {np.allclose(c, expected)}')\n else:\n print('CUDA not available on this system')\nexcept Exception as e:\n print(f'CUDA example failed: {e}')\n\nprint('\\nNumba provides:')\nprint('• Just-in-time compilation for Python')\nprint('• Automatic parallelization with prange')\nprint('• GPU computing with CUDA support')\nprint('• NumPy array optimization')\nprint('• Minimal code changes for maximum speedup')\nprint('• Support for mathematical functions')\nprint('• Type inference and optimization')",
|
|
286
286
|
"quick_start": [
|
|
287
287
|
"Install: pip install numba",
|
|
288
288
|
"Import: from numba import jit, njit",
|
|
@@ -1119,7 +1119,7 @@
|
|
|
1119
1119
|
"pip": "pip install ruptures",
|
|
1120
1120
|
"conda": "conda install -c conda-forge ruptures"
|
|
1121
1121
|
},
|
|
1122
|
-
"usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n n = len(data)\n F = np.full(n + 1, np.inf)\n F[0] = -penalty\n cp_candidates = [0]\n \n for t in range(1, n + 1):\n for s in cp_candidates:\n if s < t:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n total_cost = F[s] + cost + penalty\n \n if total_cost < F[t]:\n F[t] = total_cost\n \n # Pruning step\n cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n cp_candidates.append(t)\n \n # Backtrack to find change points\n change_points = []\n t = n\n while t > 0:\n for s in range(t):\n if s in cp_candidates:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n if s > 0:\n change_points.append(s)\n t = s\n break\n else:\n break\n \n return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n \"\"\"Simplified binary segmentation algorithm\"\"\"\n def find_best_split(segment_data, start_idx):\n n = len(segment_data)\n if n < 4: # Minimum segment size\n return None, -np.inf\n \n best_score = -np.inf\n best_split = None\n \n for split in range(2, n - 1):\n left = segment_data[:split]\n right = segment_data[split:]\n \n # Calculate score based on variance reduction\n total_var = np.var(segment_data) * n\n left_var = np.var(left) * len(left)\n right_var = np.var(right) * len(right)\n \n score = total_var - (left_var + right_var)\n \n if score > best_score:\n best_score = score\n best_split = start_idx + split\n \n return best_split, best_score\n \n change_points = []\n segments = [(data, 0)] # (segment_data, start_index)\n \n for _ in range(max_changepoints):\n if not segments:\n break\n \n best_segment = None\n best_split = None\n best_score = -np.inf\n \n # Find the best split among all segments\n for i, (segment_data, start_idx) in enumerate(segments):\n split, score = find_best_split(segment_data, start_idx)\n if split is not None and score > best_score:\n best_score = score\n best_split = split\n best_segment = i\n \n if best_split is None or best_score <= 0:\n break\n \n # Apply the best split\n segment_data, start_idx = segments.pop(best_segment)\n split_point = best_split - start_idx\n \n left_segment = segment_data[:split_point]\n right_segment = segment_data[split_point:]\n \n if len(left_segment) > 0:\n segments.append((left_segment, start_idx))\n if len(right_segment) > 0:\n segments.append((right_segment, best_split))\n \n change_points.append(best_split)\n \n return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n segment_length = cp - current_pos\n segment = np.random.normal(\n segment_means[i], \n segment_stds[i], \n segment_length\n )\n time_series.extend(segment)\n current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n 'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n 'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n 'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n 'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n print(f'\\nRunning {algo_name}...')\n \n detected_cps = algo_func(time_series_with_trend)\n \n # Calculate performance metrics\n def calculate_metrics(true_cps, detected_cps, tolerance=50):\n \"\"\"Calculate precision, recall, and F1 score\"\"\"\n true_positives = 0\n \n for true_cp in true_cps:\n if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n true_positives += 1\n \n precision = true_positives / len(detected_cps) if detected_cps else 0\n recall = true_positives / len(true_cps) if true_cps else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n \n return precision, recall, f1\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n results[algo_name] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Detected change points: {detected_cps}')\n print(f' Precision: {precision:.3f}')\n print(f' Recall: {recall:.3f}')\n print(f' F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n 'Algorithm': list(results.keys()),\n 'Precision': [results[algo]['precision'] for algo in results],\n 'Recall': [results[algo]['recall'] for algo in results],\n 'F1 Score': [results[algo]['f1'] for algo in results],\n 'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), # Low correlation\n np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]), # High correlation\n np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]), # Mixed correlation\n np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]]) # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n segment_length = cp - current_pos\n \n # Generate correlated multivariate normal data\n mean = np.random.normal(0, 2, n_dims)\n cov = corr_matrices[i]\n \n segment = np.random.multivariate_normal(mean, cov, segment_length)\n mv_time_series.append(segment)\n \n current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n dim_data = mv_time_series[:, dim]\n detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n \n precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n \n mv_results[f'Dimension {dim}'] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f' Aggregate CPs: {detected_cps_mv}')\nprint(f' Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n \n # Calculate BIC-like criterion\n n_segments = len(detected_cps) + 1\n n_params = n_segments * 2 # mean and variance for each segment\n \n # Calculate likelihood (simplified)\n log_likelihood = 0\n current_pos = 0\n \n for cp in detected_cps + [len(time_series_with_trend)]:\n segment_data = time_series_with_trend[current_pos:cp]\n if len(segment_data) > 0:\n segment_var = np.var(segment_data)\n if segment_var > 0:\n log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n log_likelihood -= 0.5 * len(segment_data)\n current_pos = cp\n \n bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n model_selection_results.append({\n 'penalty': penalty,\n 'n_changepoints': len(detected_cps),\n 'bic': bic,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):,} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n print(f' {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
|
|
1122
|
+
"usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n n = len(data)\n F = np.full(n + 1, np.inf)\n F[0] = -penalty\n cp_candidates = [0]\n \n for t in range(1, n + 1):\n for s in cp_candidates:\n if s < t:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n total_cost = F[s] + cost + penalty\n \n if total_cost < F[t]:\n F[t] = total_cost\n \n # Pruning step\n cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n cp_candidates.append(t)\n \n # Backtrack to find change points\n change_points = []\n t = n\n while t > 0:\n for s in range(t):\n if s in cp_candidates:\n segment_data = data[s:t]\n if len(segment_data) > 0:\n cost = np.var(segment_data) * len(segment_data)\n if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n if s > 0:\n change_points.append(s)\n t = s\n break\n else:\n break\n \n return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n \"\"\"Simplified binary segmentation algorithm\"\"\"\n def find_best_split(segment_data, start_idx):\n n = len(segment_data)\n if n < 4: # Minimum segment size\n return None, -np.inf\n \n best_score = -np.inf\n best_split = None\n \n for split in range(2, n - 1):\n left = segment_data[:split]\n right = segment_data[split:]\n \n # Calculate score based on variance reduction\n total_var = np.var(segment_data) * n\n left_var = np.var(left) * len(left)\n right_var = np.var(right) * len(right)\n \n score = total_var - (left_var + right_var)\n \n if score > best_score:\n best_score = score\n best_split = start_idx + split\n \n return best_split, best_score\n \n change_points = []\n segments = [(data, 0)] # (segment_data, start_index)\n \n for _ in range(max_changepoints):\n if not segments:\n break\n \n best_segment = None\n best_split = None\n best_score = -np.inf\n \n # Find the best split among all segments\n for i, (segment_data, start_idx) in enumerate(segments):\n split, score = find_best_split(segment_data, start_idx)\n if split is not None and score > best_score:\n best_score = score\n best_split = split\n best_segment = i\n \n if best_split is None or best_score <= 0:\n break\n \n # Apply the best split\n segment_data, start_idx = segments.pop(best_segment)\n split_point = best_split - start_idx\n \n left_segment = segment_data[:split_point]\n right_segment = segment_data[split_point:]\n \n if len(left_segment) > 0:\n segments.append((left_segment, start_idx))\n if len(right_segment) > 0:\n segments.append((right_segment, best_split))\n \n change_points.append(best_split)\n \n return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n segment_length = cp - current_pos\n segment = np.random.normal(\n segment_means[i], \n segment_stds[i], \n segment_length\n )\n time_series.extend(segment)\n current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n 'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n 'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n 'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n 'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n print(f'\\nRunning {algo_name}...')\n \n detected_cps = algo_func(time_series_with_trend)\n \n # Calculate performance metrics\n def calculate_metrics(true_cps, detected_cps, tolerance=50):\n \"\"\"Calculate precision, recall, and F1 score\"\"\"\n true_positives = 0\n \n for true_cp in true_cps:\n if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n true_positives += 1\n \n precision = true_positives / len(detected_cps) if detected_cps else 0\n recall = true_positives / len(true_cps) if true_cps else 0\n f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n \n return precision, recall, f1\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n results[algo_name] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Detected change points: {detected_cps}')\n print(f' Precision: {precision:.3f}')\n print(f' Recall: {recall:.3f}')\n print(f' F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n 'Algorithm': list(results.keys()),\n 'Precision': [results[algo]['precision'] for algo in results],\n 'Recall': [results[algo]['recall'] for algo in results],\n 'F1 Score': [results[algo]['f1'] for algo in results],\n 'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]), # Low correlation\n np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]), # High correlation\n np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]), # Mixed correlation\n np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]]) # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n segment_length = cp - current_pos\n \n # Generate correlated multivariate normal data\n mean = np.random.normal(0, 2, n_dims)\n cov = corr_matrices[i]\n \n segment = np.random.multivariate_normal(mean, cov, segment_length)\n mv_time_series.append(segment)\n \n current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n dim_data = mv_time_series[:, dim]\n detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n \n precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n \n mv_results[f'Dimension {dim}'] = {\n 'detected_cps': detected_cps,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n }\n \n print(f' Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f' Aggregate CPs: {detected_cps_mv}')\nprint(f' Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n \n # Calculate BIC-like criterion\n n_segments = len(detected_cps) + 1\n n_params = n_segments * 2 # mean and variance for each segment\n \n # Calculate likelihood (simplified)\n log_likelihood = 0\n current_pos = 0\n \n for cp in detected_cps + [len(time_series_with_trend)]:\n segment_data = time_series_with_trend[current_pos:cp]\n if len(segment_data) > 0:\n segment_var = np.var(segment_data)\n if segment_var > 0:\n log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n log_likelihood -= 0.5 * len(segment_data)\n current_pos = cp\n \n bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n \n precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n \n model_selection_results.append({\n 'penalty': penalty,\n 'n_changepoints': len(detected_cps),\n 'bic': bic,\n 'precision': precision,\n 'recall': recall,\n 'f1': f1\n })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n print(f' {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
|
|
1123
1123
|
"quick_start": [
|
|
1124
1124
|
"Install: pip install ruptures",
|
|
1125
1125
|
"Import: import ruptures as rpt",
|