PyPI - tooluniverse - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

tooluniverse 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of tooluniverse might be problematic. Click here for more details.

Files changed (96) hide show

tooluniverse/__init__.py +37 -14
tooluniverse/admetai_tool.py +16 -5
tooluniverse/base_tool.py +36 -0
tooluniverse/biogrid_tool.py +118 -0
tooluniverse/build_optimizer.py +87 -0
tooluniverse/cache/__init__.py +3 -0
tooluniverse/cache/memory_cache.py +99 -0
tooluniverse/cache/result_cache_manager.py +235 -0
tooluniverse/cache/sqlite_backend.py +257 -0
tooluniverse/clinvar_tool.py +90 -0
tooluniverse/compose_scripts/output_summarizer.py +87 -33
tooluniverse/compose_tool.py +2 -2
tooluniverse/custom_tool.py +28 -0
tooluniverse/data/adverse_event_tools.json +97 -98
tooluniverse/data/agentic_tools.json +81 -162
tooluniverse/data/arxiv_tools.json +1 -4
tooluniverse/data/compose_tools.json +0 -54
tooluniverse/data/core_tools.json +1 -4
tooluniverse/data/dataset_tools.json +7 -7
tooluniverse/data/doaj_tools.json +1 -3
tooluniverse/data/drug_discovery_agents.json +282 -0
tooluniverse/data/europe_pmc_tools.json +1 -2
tooluniverse/data/genomics_tools.json +174 -0
tooluniverse/data/geo_tools.json +86 -0
tooluniverse/data/literature_search_tools.json +15 -35
tooluniverse/data/markitdown_tools.json +51 -0
tooluniverse/data/monarch_tools.json +1 -2
tooluniverse/data/openalex_tools.json +1 -5
tooluniverse/data/opentarget_tools.json +8 -16
tooluniverse/data/output_summarization_tools.json +23 -20
tooluniverse/data/packages/bioinformatics_core_tools.json +2 -2
tooluniverse/data/packages/cheminformatics_tools.json +1 -1
tooluniverse/data/packages/genomics_tools.json +1 -1
tooluniverse/data/packages/single_cell_tools.json +1 -1
tooluniverse/data/packages/structural_biology_tools.json +1 -1
tooluniverse/data/pmc_tools.json +1 -4
tooluniverse/data/ppi_tools.json +139 -0
tooluniverse/data/pubmed_tools.json +1 -3
tooluniverse/data/semantic_scholar_tools.json +1 -2
tooluniverse/data/tool_composition_tools.json +2 -4
tooluniverse/data/unified_guideline_tools.json +206 -4
tooluniverse/data/xml_tools.json +15 -15
tooluniverse/data/zenodo_tools.json +1 -2
tooluniverse/dbsnp_tool.py +71 -0
tooluniverse/default_config.py +6 -0
tooluniverse/ensembl_tool.py +61 -0
tooluniverse/execute_function.py +235 -76
tooluniverse/generate_tools.py +303 -20
tooluniverse/genomics_gene_search_tool.py +56 -0
tooluniverse/geo_tool.py +116 -0
tooluniverse/gnomad_tool.py +63 -0
tooluniverse/logging_config.py +64 -2
tooluniverse/markitdown_tool.py +159 -0
tooluniverse/mcp_client_tool.py +10 -5
tooluniverse/molecule_2d_tool.py +9 -3
tooluniverse/molecule_3d_tool.py +9 -3
tooluniverse/output_hook.py +217 -150
tooluniverse/smcp.py +18 -10
tooluniverse/smcp_server.py +89 -199
tooluniverse/string_tool.py +112 -0
tooluniverse/tools/{MultiAgentLiteratureSearch.py → ADMETAnalyzerAgent.py} +18 -18
tooluniverse/tools/ArXiv_search_papers.py +3 -3
tooluniverse/tools/CMA_Guidelines_Search.py +52 -0
tooluniverse/tools/CORE_search_papers.py +3 -3
tooluniverse/tools/ClinVar_search_variants.py +52 -0
tooluniverse/tools/ClinicalTrialDesignAgent.py +63 -0
tooluniverse/tools/CompoundDiscoveryAgent.py +59 -0
tooluniverse/tools/DOAJ_search_articles.py +2 -2
tooluniverse/tools/DiseaseAnalyzerAgent.py +52 -0
tooluniverse/tools/DrugInteractionAnalyzerAgent.py +52 -0
tooluniverse/tools/DrugOptimizationAgent.py +63 -0
tooluniverse/tools/Ensembl_lookup_gene_by_symbol.py +52 -0
tooluniverse/tools/EuropePMC_search_articles.py +1 -1
tooluniverse/tools/GIN_Guidelines_Search.py +52 -0
tooluniverse/tools/GWAS_search_associations_by_gene.py +52 -0
tooluniverse/tools/LiteratureSynthesisAgent.py +59 -0
tooluniverse/tools/PMC_search_papers.py +3 -3
tooluniverse/tools/PubMed_search_articles.py +2 -2
tooluniverse/tools/SemanticScholar_search_papers.py +1 -1
tooluniverse/tools/UCSC_get_genes_by_region.py +67 -0
tooluniverse/tools/Zenodo_search_records.py +1 -1
tooluniverse/tools/__init__.py +33 -3
tooluniverse/tools/convert_to_markdown.py +59 -0
tooluniverse/tools/dbSNP_get_variant_by_rsid.py +46 -0
tooluniverse/tools/gnomAD_query_variant.py +52 -0
tooluniverse/tools/openalex_literature_search.py +4 -4
tooluniverse/ucsc_tool.py +60 -0
tooluniverse/unified_guideline_tools.py +1175 -57
tooluniverse/utils.py +51 -4
tooluniverse/zenodo_tool.py +2 -1
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/METADATA +10 -3
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/RECORD +96 -61
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/entry_points.txt +0 -3
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/WHEEL +0 -0
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/licenses/LICENSE +0 -0
{tooluniverse-1.0.7.dist-info → tooluniverse-1.0.9.dist-info}/top_level.txt +0 -0

tooluniverse/data/geo_tools.json ADDED Viewed

@@ -0,0 +1,86 @@
+[
+  {
+    "type": "GEORESTTool",
+    "name": "GEO_search_expression_data",
+    "description": "Search gene expression data from the GEO database. GEO is a public repository that archives and freely distributes microarray, next-generation sequencing, and other forms of high-throughput functional genomics data.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "query": {
+          "type": "string",
+          "description": "Search query (e.g., 'cancer', 'diabetes', 'microarray')",
+          "minLength": 1
+        },
+        "organism": {
+          "type": "string",
+          "description": "Organism name (e.g., 'Homo sapiens', 'Mus musculus')",
+          "default": "Homo sapiens"
+        },
+        "study_type": {
+          "type": "string",
+          "description": "Type of study (e.g., 'expression', 'methylation', 'genome')",
+          "enum": ["expression", "methylation", "genome", "sequence", "other"]
+        },
+        "platform": {
+          "type": "string",
+          "description": "Platform used (e.g., 'GPL96', 'GPL570')"
+        },
+        "date_range": {
+          "type": "string",
+          "description": "Date range in format 'YYYY:YYYY' (e.g., '2020:2023')"
+        },
+        "limit": {
+          "type": "integer",
+          "description": "Maximum number of results to return (default: 50)",
+          "minimum": 1,
+          "maximum": 500,
+          "default": 50
+        },
+        "sort": {
+          "type": "string",
+          "description": "Sort order ('relevance', 'date', 'title')",
+          "enum": ["relevance", "date", "title"],
+          "default": "relevance"
+        }
+      },
+      "required": ["query"]
+    },
+    "fields": {
+      "endpoint": "/esearch.fcgi",
+      "return_format": "JSON"
+    },
+    "return_schema": {
+      "type": "object",
+      "properties": {
+        "success": {"type": "boolean"},
+        "count": {"type": "integer"},
+        "studies": {
+          "type": "array",
+          "items": {
+            "type": "object",
+            "properties": {
+              "id": {"type": "string"},
+              "title": {"type": "string"},
+              "summary": {"type": "string"},
+              "organism": {"type": "string"},
+              "platform": {"type": "string"},
+              "samples": {"type": "integer"},
+              "series_type": {"type": "string"},
+              "publication_date": {"type": "string"},
+              "submission_date": {"type": "string"},
+              "contact": {"type": "string"},
+              "citation": {"type": "string"}
+            }
+          }
+        },
+        "query_translation": {"type": "string"},
+        "error": {"type": "string"}
+      }
+    },
+    "implementation": {
+      "language": "python",
+      "dependencies": ["requests"],
+      "source_file": "geo_tool.py"
+    }
+  }
+]

tooluniverse/data/literature_search_tools.json CHANGED Viewed

@@ -13,13 +13,11 @@
       "properties": {
         "user_query": {
           "type": "string",
-          "description": "The research query to analyze",
-          "required": true
+          "description": "The research query to analyze"
         },
         "context": {
           "type": "string",
           "description": "Context information from previous steps",
-          "required": false,
           "default": ""
         }
       },
@@ -51,23 +49,19 @@
       "properties": {
         "plan_title": {
           "type": "string",
-          "description": "The title of the search plan",
-          "required": true
+          "description": "The title of the search plan"
         },
         "plan_description": {
           "type": "string",
-          "description": "The description of the search plan",
-          "required": true
+          "description": "The description of the search plan"
         },
         "current_keywords": {
           "type": "string",
-          "description": "Current keywords for the plan (comma-separated)",
-          "required": true
+          "description": "Current keywords for the plan (comma-separated)"
         },
         "context": {
           "type": "string",
           "description": "Context information from previous steps",
-          "required": false,
           "default": ""
         }
       },
@@ -102,28 +96,23 @@
       "properties": {
         "plan_title": {
           "type": "string",
-          "description": "The title of the search plan",
-          "required": true
+          "description": "The title of the search plan"
         },
         "plan_description": {
           "type": "string",
-          "description": "The description of the search plan",
-          "required": true
+          "description": "The description of the search plan"
         },
         "paper_count": {
           "type": "string",
-          "description": "Number of papers found",
-          "required": true
+          "description": "Number of papers found"
         },
         "papers_text": {
           "type": "string",
-          "description": "Formatted text of the papers to summarize",
-          "required": true
+          "description": "Formatted text of the papers to summarize"
         },
         "context": {
           "type": "string",
           "description": "Context information from previous steps",
-          "required": false,
           "default": ""
         }
       },
@@ -156,13 +145,11 @@
       "properties": {
         "plans_analysis": {
           "type": "string",
-          "description": "Analysis of current search plans and their quality scores",
-          "required": true
+          "description": "Analysis of current search plans and their quality scores"
         },
         "context": {
           "type": "string",
           "description": "Context information from previous steps",
-          "required": false,
           "default": ""
         }
       },
@@ -197,38 +184,31 @@
       "properties": {
         "user_query": {
           "type": "string",
-          "description": "The original research query",
-          "required": true
+          "description": "The original research query"
         },
         "user_intent": {
           "type": "string",
-          "description": "The analyzed user intent",
-          "required": true
+          "description": "The analyzed user intent"
         },
         "total_papers": {
           "type": "string",
-          "description": "Total number of papers found",
-          "required": true
+          "description": "Total number of papers found"
         },
         "total_plans": {
           "type": "string",
-          "description": "Total number of search plans executed",
-          "required": true
+          "description": "Total number of search plans executed"
         },
         "iterations": {
           "type": "string",
-          "description": "Number of iterations performed",
-          "required": true
+          "description": "Number of iterations performed"
         },
         "plan_summaries": {
           "type": "string",
-          "description": "Summaries of all search plans",
-          "required": true
+          "description": "Summaries of all search plans"
         },
         "context": {
           "type": "string",
           "description": "Context information from previous steps",
-          "required": false,
           "default": ""
         }
       },

tooluniverse/data/markitdown_tools.json ADDED Viewed

@@ -0,0 +1,51 @@
+[
+  {
+    "type": "MarkItDownTool",
+    "name": "convert_to_markdown",
+    "description": "Convert a resource described by an http:, https:, file: or data: URI to markdown.",
+    "parameter": {
+      "type": "object",
+      "properties": {
+        "uri": {
+          "type": "string",
+          "description": "URI of the resource to convert (supports http:, https:, file:, data: URIs)"
+        },
+        "output_path": {
+          "type": "string",
+          "description": "Optional output file path"
+        },
+        "enable_plugins": {
+          "type": "boolean",
+          "description": "Enable 3rd-party plugins",
+          "default": false
+        }
+      },
+      "required": ["uri"]
+    },
+    "return_schema": {
+      "type": "object",
+      "properties": {
+        "markdown_content": {
+          "type": "string",
+          "description": "The converted Markdown content"
+        },
+        "content": {
+          "type": "string",
+          "description": "The converted Markdown content (same as markdown_content, provided for convenience when no output_path is specified)"
+        },
+        "file_info": {
+          "type": "object",
+          "properties": {
+            "original_file": {"type": "string"},
+            "file_type": {"type": "string"},
+            "output_file": {"type": "string"}
+          }
+        },
+        "error": {
+          "type": "string",
+          "description": "Error message if conversion failed"
+        }
+      }
+    }
+  }
+]

tooluniverse/data/monarch_tools.json CHANGED Viewed

@@ -10,8 +10,7 @@
           "description": "List of phenotypes or symptoms",
           "items": {
             "type": "string",
-            "description": "The HPO ID of the phenotype or symptom.",
-            "required": true
+            "description": "The HPO ID of the phenotype or symptom."
           }
         },
         "limit": {

tooluniverse/data/openalex_tools.json CHANGED Viewed

@@ -31,11 +31,7 @@
         }
       },
       "required": [
-        "search_keywords",
-        "max_results",
-        "year_from",
-        "year_to",
-        "open_access"
+        "search_keywords"
       ]
     },
     "return_schema": {

tooluniverse/data/opentarget_tools.json CHANGED Viewed

@@ -728,13 +728,11 @@
           "properties": {
             "index": {
               "type": "integer",
-              "description": "The index of the page to retrieve.",
-              "required": true
+              "description": "The index of the page to retrieve."
             },
             "size": {
               "type": "integer",
-              "description": "The number of items per page.",
-              "required": true
+              "description": "The number of items per page."
             }
           },
           "description": "Pagination parameters."
@@ -906,13 +904,11 @@
           "properties": {
             "index": {
               "type": "integer",
-              "description": "Index of the page to fetch, starting from 0.",
-              "required": true
+              "description": "Index of the page to fetch, starting from 0."
             },
             "size": {
               "type": "integer",
-              "description": "Number of entries per page.",
-              "required": true
+              "description": "Number of entries per page."
             }
           },
           "description": "Pagination settings."
@@ -1152,13 +1148,11 @@
           "properties": {
             "index": {
               "type": "integer",
-              "description": "Pagination index.",
-              "required": true
+              "description": "Pagination index."
             },
             "size": {
               "type": "integer",
-              "description": "Number of records to fetch per page.",
-              "required": true
+              "description": "Number of records to fetch per page."
             }
           }
         }
@@ -1277,13 +1271,11 @@
           "properties": {
             "index": {
               "type": "integer",
-              "description": "Pagination index.",
-              "required": "True"
+              "description": "Pagination index."
             },
             "size": {
               "type": "integer",
-              "description": "Pagination size.",
-              "required": "True"
+              "description": "Pagination size."
             }
           },
           "description": "Pagination settings with index and size."

tooluniverse/data/output_summarization_tools.json CHANGED Viewed

@@ -4,39 +4,44 @@
     "name": "ToolOutputSummarizer",
     "description": "AI-powered tool for summarizing long tool outputs, focusing on key information relevant to the original query",
     "prompt": "You are an expert at summarizing tool outputs. Your task is to analyze the provided tool output and create a concise summary that highlights the most important information relevant to the original query.\n\nTool Output to Summarize:\n{tool_output}\n\nOriginal Query Context:\n{query_context}\n\nTool Name: {tool_name}\nFocus Areas: {focus_areas}\nMaximum Summary Length: {max_length}\n\nPlease provide a well-structured summary that:\n1. Captures the key findings and results\n2. Highlights important data points and metrics\n3. Preserves critical technical details\n4. Maintains the essential structure of the original output\n5. Focuses on information most relevant to the query\n\nReturn the summary in a clear, organized format.",
-    "input_arguments": ["tool_output", "query_context", "tool_name", "focus_areas", "max_length"],
+    "input_arguments": [
+      "tool_output",
+      "query_context",
+      "tool_name",
+      "focus_areas",
+      "max_length"
+    ],
     "parameter": {
       "type": "object",
       "properties": {
         "tool_output": {
           "type": "string",
-          "description": "The original tool output to be summarized",
-          "required": true
+          "description": "The original tool output to be summarized"
         },
         "query_context": {
           "type": "string",
-          "description": "Context about the original query that triggered the tool",
-          "required": true
+          "description": "Context about the original query that triggered the tool"
         },
         "tool_name": {
           "type": "string",
-          "description": "Name of the tool that generated the output",
-          "required": true
+          "description": "Name of the tool that generated the output"
         },
         "focus_areas": {
           "type": "string",
           "description": "Specific areas to focus on in the summary",
-          "required": false,
           "default": "key_findings_and_results"
         },
         "max_length": {
           "type": "integer",
           "description": "Maximum length of the summary in characters",
-          "required": false,
           "default": 32000
         }
       },
-      "required": ["tool_output", "query_context", "tool_name"]
+      "required": [
+        "tool_output",
+        "query_context",
+        "tool_name"
+      ]
     },
     "configs": {
       "api_type": "CHATGPT",
@@ -56,39 +61,37 @@
       "properties": {
         "tool_output": {
           "type": "string",
-          "description": "The original tool output to be summarized",
-          "required": true
+          "description": "The original tool output to be summarized"
         },
         "query_context": {
           "type": "string",
-          "description": "Context about the original query",
-          "required": true
+          "description": "Context about the original query"
         },
         "tool_name": {
           "type": "string",
-          "description": "Name of the tool that generated the output",
-          "required": true
+          "description": "Name of the tool that generated the output"
         },
         "chunk_size": {
           "type": "integer",
           "description": "Size of each chunk for processing",
-          "required": false,
           "default": 30000
         },
         "focus_areas": {
           "type": "string",
           "description": "Areas to focus on in summarization",
-          "required": false,
           "default": "key_findings_and_results"
         },
         "max_summary_length": {
           "type": "integer",
           "description": "Maximum length of final summary",
-          "required": false,
           "default": 10000
         }
       },
-      "required": ["tool_output", "query_context", "tool_name"]
+      "required": [
+        "tool_output",
+        "query_context",
+        "tool_name"
+      ]
     },
     "auto_load_dependencies": true,
     "fail_on_missing_tools": false,

tooluniverse/data/packages/bioinformatics_core_tools.json CHANGED Viewed

@@ -282,7 +282,7 @@
         "pip": "pip install numba",
         "conda": "conda install numba"
       },
-      "usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n:,} numbers')\n\n# Warm up JIT functions\nnumba_function(100)\nnumba_nopython(100)\n\n# Time Python function\nstart = time.time()\nresult_python = python_function(n)\ntime_python = time.time() - start\n\n# Time JIT function\nstart = time.time()\nresult_numba = numba_function(n)\ntime_numba = time.time() - start\n\n# Time no-Python JIT\nstart = time.time()\nresult_nopython = numba_nopython(n)\ntime_nopython = time.time() - start\n\nprint(f'Python result: {result_python}')\nprint(f'Numba result: {result_numba}')\nprint(f'No-Python result: {result_nopython}')\nprint(f'\\nPython time: {time_python:.4f} seconds')\nprint(f'Numba time: {time_numba:.4f} seconds')\nprint(f'No-Python time: {time_nopython:.4f} seconds')\nprint(f'Speedup (Numba): {time_python/time_numba:.1f}x')\nprint(f'Speedup (No-Python): {time_python/time_nopython:.1f}x')\n\n# NumPy array operations\nprint('\\n=== NumPy Array Operations ===')\n\n@njit\ndef matrix_multiply_numba(A, B):\n    return np.dot(A, B)\n\n@njit\ndef element_wise_operation(arr):\n    result = np.zeros_like(arr)\n    for i in range(arr.shape[0]):\n        for j in range(arr.shape[1]):\n            result[i, j] = math.sqrt(arr[i, j]**2 + 1)\n    return result\n\n# Create test arrays\nsize = 500\nA = np.random.random((size, size))\nB = np.random.random((size, size))\n\nprint(f'Matrix operations on {size}x{size} arrays')\n\n# Warm up\nmatrix_multiply_numba(A[:10, :10], B[:10, :10])\nelement_wise_operation(A[:10, :10])\n\n# Time NumPy operations\nstart = time.time()\nnumpy_result = np.dot(A, B)\ntime_numpy = time.time() - start\n\n# Time Numba operations\nstart = time.time()\nnumba_result = matrix_multiply_numba(A, B)\ntime_numba_matrix = time.time() - start\n\nprint(f'NumPy matrix multiply: {time_numpy:.4f} seconds')\nprint(f'Numba matrix multiply: {time_numba_matrix:.4f} seconds')\nprint(f'Results equal: {np.allclose(numpy_result, numba_result)}')\n\n# Parallel execution\nprint('\\n=== Parallel Execution ===')\n\n@njit(parallel=True)\ndef parallel_sum(arr):\n    total = 0.0\n    for i in prange(arr.shape[0]):\n        total += arr[i]\n    return total\n\n@njit\ndef serial_sum(arr):\n    total = 0.0\n    for i in range(arr.shape[0]):\n        total += arr[i]\n    return total\n\nlarge_array = np.random.random(10000000)\n\n# Warm up\nparallel_sum(large_array[:1000])\nserial_sum(large_array[:1000])\n\n# Time serial version\nstart = time.time()\nserial_result = serial_sum(large_array)\ntime_serial = time.time() - start\n\n# Time parallel version\nstart = time.time()\nparallel_result = parallel_sum(large_array)\ntime_parallel = time.time() - start\n\nprint(f'Array size: {len(large_array):,} elements')\nprint(f'Serial sum: {serial_result:.6f} ({time_serial:.4f} seconds)')\nprint(f'Parallel sum: {parallel_result:.6f} ({time_parallel:.4f} seconds)')\nprint(f'Parallel speedup: {time_serial/time_parallel:.1f}x')\n\n# Mathematical functions\nprint('\\n=== Mathematical Functions ===')\n\n@njit\ndef monte_carlo_pi(n_samples):\n    count = 0\n    for i in range(n_samples):\n        x = np.random.random()\n        y = np.random.random()\n        if x*x + y*y <= 1.0:\n            count += 1\n    return 4.0 * count / n_samples\n\n@njit\ndef mandelbrot_point(c_real, c_imag, max_iter):\n    z_real = 0.0\n    z_imag = 0.0\n    for i in range(max_iter):\n        z_real_new = z_real*z_real - z_imag*z_imag + c_real\n        z_imag_new = 2*z_real*z_imag + c_imag\n        z_real = z_real_new\n        z_imag = z_imag_new\n        if z_real*z_real + z_imag*z_imag > 4:\n            return i\n    return max_iter\n\n# Monte Carlo Pi estimation\nn_samples = 1000000\nprint(f'Monte Carlo π estimation with {n_samples:,} samples')\n\nstart = time.time()\npi_estimate = monte_carlo_pi(n_samples)\ntime_mc = time.time() - start\n\nprint(f'Estimated π: {pi_estimate:.6f}')\nprint(f'Actual π: {math.pi:.6f}')\nprint(f'Error: {abs(pi_estimate - math.pi):.6f}')\nprint(f'Time: {time_mc:.4f} seconds')\n\n# Mandelbrot calculation\nprint(f'\\nMandelbrot set calculation')\nc_values = [-0.5 + 0.5j, -0.8 + 0.2j, 0.3 - 0.6j]\nmax_iterations = 1000\n\nfor c in c_values:\n    iterations = mandelbrot_point(c.real, c.imag, max_iterations)\n    if iterations == max_iterations:\n        print(f'Point {c}: In set (>{max_iterations} iterations)')\n    else:\n        print(f'Point {c}: Escaped after {iterations} iterations')\n\n# Type signatures and compilation info\nprint('\\n=== Compilation Information ===')\nprint(f'Numba version: {numba.__version__}')\nprint(f'NumPy version: {np.__version__}')\n\n# Function signatures\nprint(f'\\nFunction signatures:')\nprint(f'numba_function: {numba_function.signatures}')\nprint(f'numba_nopython: {numba_nopython.signatures}')\nprint(f'parallel_sum: {parallel_sum.signatures}')\n\n# GPU example (if CUDA available)\nprint('\\n=== GPU Computing (CUDA) ===')\ntry:\n    # Simple CUDA kernel example\n    @cuda.jit\n    def cuda_add(a, b, c):\n        idx = cuda.grid(1)\n        if idx < c.size:\n            c[idx] = a[idx] + b[idx]\n    \n    # Check if CUDA is available\n    if cuda.is_available():\n        print('CUDA is available!')\n        print(f'CUDA devices: {cuda.list_devices()}')\n        \n        # Small example\n        n = 1000\n        a = np.random.random(n).astype(np.float32)\n        b = np.random.random(n).astype(np.float32)\n        c = np.zeros(n, dtype=np.float32)\n        \n        # Configure grid and block dimensions\n        threads_per_block = 128\n        blocks_per_grid = (n + threads_per_block - 1) // threads_per_block\n        \n        print(f'Running CUDA kernel with {blocks_per_grid} blocks, {threads_per_block} threads each')\n        cuda_add[blocks_per_grid, threads_per_block](a, b, c)\n        \n        # Verify result\n        expected = a + b\n        print(f'CUDA result matches NumPy: {np.allclose(c, expected)}')\n    else:\n        print('CUDA not available on this system')\nexcept Exception as e:\n    print(f'CUDA example failed: {e}')\n\nprint('\\nNumba provides:')\nprint('• Just-in-time compilation for Python')\nprint('• Automatic parallelization with prange')\nprint('• GPU computing with CUDA support')\nprint('• NumPy array optimization')\nprint('• Minimal code changes for maximum speedup')\nprint('• Support for mathematical functions')\nprint('• Type inference and optimization')",
+      "usage_example": "import numba\nfrom numba import jit, njit, prange, cuda\nimport numpy as np\nimport time\nimport math\n\nprint('Numba - JIT Compiler for Python')\nprint('=' * 35)\n\n# Basic JIT compilation example\nprint('\\n=== Basic JIT Compilation ===')\n\n# Pure Python function\ndef python_function(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# JIT compiled function\n@jit\ndef numba_function(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# No-Python mode (faster)\n@njit\ndef numba_nopython(x):\n    total = 0\n    for i in range(x):\n        total += i * i\n    return total\n\n# Performance comparison\nn = 1000000\nprint(f'Computing sum of squares for {n:} numbers')\n\n# Warm up JIT functions\nnumba_function(100)\nnumba_nopython(100)\n\n# Time Python function\nstart = time.time()\nresult_python = python_function(n)\ntime_python = time.time() - start\n\n# Time JIT function\nstart = time.time()\nresult_numba = numba_function(n)\ntime_numba = time.time() - start\n\n# Time no-Python JIT\nstart = time.time()\nresult_nopython = numba_nopython(n)\ntime_nopython = time.time() - start\n\nprint(f'Python result: {result_python}')\nprint(f'Numba result: {result_numba}')\nprint(f'No-Python result: {result_nopython}')\nprint(f'\\nPython time: {time_python:.4f} seconds')\nprint(f'Numba time: {time_numba:.4f} seconds')\nprint(f'No-Python time: {time_nopython:.4f} seconds')\nprint(f'Speedup (Numba): {time_python/time_numba:.1f}x')\nprint(f'Speedup (No-Python): {time_python/time_nopython:.1f}x')\n\n# NumPy array operations\nprint('\\n=== NumPy Array Operations ===')\n\n@njit\ndef matrix_multiply_numba(A, B):\n    return np.dot(A, B)\n\n@njit\ndef element_wise_operation(arr):\n    result = np.zeros_like(arr)\n    for i in range(arr.shape[0]):\n        for j in range(arr.shape[1]):\n            result[i, j] = math.sqrt(arr[i, j]**2 + 1)\n    return result\n\n# Create test arrays\nsize = 500\nA = np.random.random((size, size))\nB = np.random.random((size, size))\n\nprint(f'Matrix operations on {size}x{size} arrays')\n\n# Warm up\nmatrix_multiply_numba(A[:10, :10], B[:10, :10])\nelement_wise_operation(A[:10, :10])\n\n# Time NumPy operations\nstart = time.time()\nnumpy_result = np.dot(A, B)\ntime_numpy = time.time() - start\n\n# Time Numba operations\nstart = time.time()\nnumba_result = matrix_multiply_numba(A, B)\ntime_numba_matrix = time.time() - start\n\nprint(f'NumPy matrix multiply: {time_numpy:.4f} seconds')\nprint(f'Numba matrix multiply: {time_numba_matrix:.4f} seconds')\nprint(f'Results equal: {np.allclose(numpy_result, numba_result)}')\n\n# Parallel execution\nprint('\\n=== Parallel Execution ===')\n\n@njit(parallel=True)\ndef parallel_sum(arr):\n    total = 0.0\n    for i in prange(arr.shape[0]):\n        total += arr[i]\n    return total\n\n@njit\ndef serial_sum(arr):\n    total = 0.0\n    for i in range(arr.shape[0]):\n        total += arr[i]\n    return total\n\nlarge_array = np.random.random(10000000)\n\n# Warm up\nparallel_sum(large_array[:1000])\nserial_sum(large_array[:1000])\n\n# Time serial version\nstart = time.time()\nserial_result = serial_sum(large_array)\ntime_serial = time.time() - start\n\n# Time parallel version\nstart = time.time()\nparallel_result = parallel_sum(large_array)\ntime_parallel = time.time() - start\n\nprint(f'Array size: {len(large_array):} elements')\nprint(f'Serial sum: {serial_result:.6f} ({time_serial:.4f} seconds)')\nprint(f'Parallel sum: {parallel_result:.6f} ({time_parallel:.4f} seconds)')\nprint(f'Parallel speedup: {time_serial/time_parallel:.1f}x')\n\n# Mathematical functions\nprint('\\n=== Mathematical Functions ===')\n\n@njit\ndef monte_carlo_pi(n_samples):\n    count = 0\n    for i in range(n_samples):\n        x = np.random.random()\n        y = np.random.random()\n        if x*x + y*y <= 1.0:\n            count += 1\n    return 4.0 * count / n_samples\n\n@njit\ndef mandelbrot_point(c_real, c_imag, max_iter):\n    z_real = 0.0\n    z_imag = 0.0\n    for i in range(max_iter):\n        z_real_new = z_real*z_real - z_imag*z_imag + c_real\n        z_imag_new = 2*z_real*z_imag + c_imag\n        z_real = z_real_new\n        z_imag = z_imag_new\n        if z_real*z_real + z_imag*z_imag > 4:\n            return i\n    return max_iter\n\n# Monte Carlo Pi estimation\nn_samples = 1000000\nprint(f'Monte Carlo π estimation with {n_samples:} samples')\n\nstart = time.time()\npi_estimate = monte_carlo_pi(n_samples)\ntime_mc = time.time() - start\n\nprint(f'Estimated π: {pi_estimate:.6f}')\nprint(f'Actual π: {math.pi:.6f}')\nprint(f'Error: {abs(pi_estimate - math.pi):.6f}')\nprint(f'Time: {time_mc:.4f} seconds')\n\n# Mandelbrot calculation\nprint(f'\\nMandelbrot set calculation')\nc_values = [-0.5 + 0.5j, -0.8 + 0.2j, 0.3 - 0.6j]\nmax_iterations = 1000\n\nfor c in c_values:\n    iterations = mandelbrot_point(c.real, c.imag, max_iterations)\n    if iterations == max_iterations:\n        print(f'Point {c}: In set (>{max_iterations} iterations)')\n    else:\n        print(f'Point {c}: Escaped after {iterations} iterations')\n\n# Type signatures and compilation info\nprint('\\n=== Compilation Information ===')\nprint(f'Numba version: {numba.__version__}')\nprint(f'NumPy version: {np.__version__}')\n\n# Function signatures\nprint(f'\\nFunction signatures:')\nprint(f'numba_function: {numba_function.signatures}')\nprint(f'numba_nopython: {numba_nopython.signatures}')\nprint(f'parallel_sum: {parallel_sum.signatures}')\n\n# GPU example (if CUDA available)\nprint('\\n=== GPU Computing (CUDA) ===')\ntry:\n    # Simple CUDA kernel example\n    @cuda.jit\n    def cuda_add(a, b, c):\n        idx = cuda.grid(1)\n        if idx < c.size:\n            c[idx] = a[idx] + b[idx]\n    \n    # Check if CUDA is available\n    if cuda.is_available():\n        print('CUDA is available!')\n        print(f'CUDA devices: {cuda.list_devices()}')\n        \n        # Small example\n        n = 1000\n        a = np.random.random(n).astype(np.float32)\n        b = np.random.random(n).astype(np.float32)\n        c = np.zeros(n, dtype=np.float32)\n        \n        # Configure grid and block dimensions\n        threads_per_block = 128\n        blocks_per_grid = (n + threads_per_block - 1) // threads_per_block\n        \n        print(f'Running CUDA kernel with {blocks_per_grid} blocks, {threads_per_block} threads each')\n        cuda_add[blocks_per_grid, threads_per_block](a, b, c)\n        \n        # Verify result\n        expected = a + b\n        print(f'CUDA result matches NumPy: {np.allclose(c, expected)}')\n    else:\n        print('CUDA not available on this system')\nexcept Exception as e:\n    print(f'CUDA example failed: {e}')\n\nprint('\\nNumba provides:')\nprint('• Just-in-time compilation for Python')\nprint('• Automatic parallelization with prange')\nprint('• GPU computing with CUDA support')\nprint('• NumPy array optimization')\nprint('• Minimal code changes for maximum speedup')\nprint('• Support for mathematical functions')\nprint('• Type inference and optimization')",
       "quick_start": [
         "Install: pip install numba",
         "Import: from numba import jit, njit",
@@ -1119,7 +1119,7 @@
         "pip": "pip install ruptures",
         "conda": "conda install -c conda-forge ruptures"
       },
-      "usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n    \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n    n = len(data)\n    F = np.full(n + 1, np.inf)\n    F[0] = -penalty\n    cp_candidates = [0]\n    \n    for t in range(1, n + 1):\n        for s in cp_candidates:\n            if s < t:\n                segment_data = data[s:t]\n                if len(segment_data) > 0:\n                    cost = np.var(segment_data) * len(segment_data)\n                    total_cost = F[s] + cost + penalty\n                    \n                    if total_cost < F[t]:\n                        F[t] = total_cost\n        \n        # Pruning step\n        cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n        cp_candidates.append(t)\n    \n    # Backtrack to find change points\n    change_points = []\n    t = n\n    while t > 0:\n        for s in range(t):\n            if s in cp_candidates:\n                segment_data = data[s:t]\n                if len(segment_data) > 0:\n                    cost = np.var(segment_data) * len(segment_data)\n                    if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n                        if s > 0:\n                            change_points.append(s)\n                        t = s\n                        break\n        else:\n            break\n    \n    return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n    \"\"\"Simplified binary segmentation algorithm\"\"\"\n    def find_best_split(segment_data, start_idx):\n        n = len(segment_data)\n        if n < 4:  # Minimum segment size\n            return None, -np.inf\n        \n        best_score = -np.inf\n        best_split = None\n        \n        for split in range(2, n - 1):\n            left = segment_data[:split]\n            right = segment_data[split:]\n            \n            # Calculate score based on variance reduction\n            total_var = np.var(segment_data) * n\n            left_var = np.var(left) * len(left)\n            right_var = np.var(right) * len(right)\n            \n            score = total_var - (left_var + right_var)\n            \n            if score > best_score:\n                best_score = score\n                best_split = start_idx + split\n        \n        return best_split, best_score\n    \n    change_points = []\n    segments = [(data, 0)]  # (segment_data, start_index)\n    \n    for _ in range(max_changepoints):\n        if not segments:\n            break\n        \n        best_segment = None\n        best_split = None\n        best_score = -np.inf\n        \n        # Find the best split among all segments\n        for i, (segment_data, start_idx) in enumerate(segments):\n            split, score = find_best_split(segment_data, start_idx)\n            if split is not None and score > best_score:\n                best_score = score\n                best_split = split\n                best_segment = i\n        \n        if best_split is None or best_score <= 0:\n            break\n        \n        # Apply the best split\n        segment_data, start_idx = segments.pop(best_segment)\n        split_point = best_split - start_idx\n        \n        left_segment = segment_data[:split_point]\n        right_segment = segment_data[split_point:]\n        \n        if len(left_segment) > 0:\n            segments.append((left_segment, start_idx))\n        if len(right_segment) > 0:\n            segments.append((right_segment, best_split))\n        \n        change_points.append(best_split)\n    \n    return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n    segment_length = cp - current_pos\n    segment = np.random.normal(\n        segment_means[i], \n        segment_stds[i], \n        segment_length\n    )\n    time_series.extend(segment)\n    current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n    'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n    'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n    'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n    'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n    print(f'\\nRunning {algo_name}...')\n    \n    detected_cps = algo_func(time_series_with_trend)\n    \n    # Calculate performance metrics\n    def calculate_metrics(true_cps, detected_cps, tolerance=50):\n        \"\"\"Calculate precision, recall, and F1 score\"\"\"\n        true_positives = 0\n        \n        for true_cp in true_cps:\n            if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n                true_positives += 1\n        \n        precision = true_positives / len(detected_cps) if detected_cps else 0\n        recall = true_positives / len(true_cps) if true_cps else 0\n        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n        \n        return precision, recall, f1\n    \n    precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n    \n    results[algo_name] = {\n        'detected_cps': detected_cps,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    }\n    \n    print(f'  Detected change points: {detected_cps}')\n    print(f'  Precision: {precision:.3f}')\n    print(f'  Recall: {recall:.3f}')\n    print(f'  F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n    'Algorithm': list(results.keys()),\n    'Precision': [results[algo]['precision'] for algo in results],\n    'Recall': [results[algo]['recall'] for algo in results],\n    'F1 Score': [results[algo]['f1'] for algo in results],\n    'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n    np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]),  # Low correlation\n    np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]),  # High correlation\n    np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]),  # Mixed correlation\n    np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]])   # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n    segment_length = cp - current_pos\n    \n    # Generate correlated multivariate normal data\n    mean = np.random.normal(0, 2, n_dims)\n    cov = corr_matrices[i]\n    \n    segment = np.random.multivariate_normal(mean, cov, segment_length)\n    mv_time_series.append(segment)\n    \n    current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n    dim_data = mv_time_series[:, dim]\n    detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n    \n    precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n    \n    mv_results[f'Dimension {dim}'] = {\n        'detected_cps': detected_cps,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    }\n    \n    print(f'  Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f'  Aggregate CPs: {detected_cps_mv}')\nprint(f'  Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n    detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n    \n    # Calculate BIC-like criterion\n    n_segments = len(detected_cps) + 1\n    n_params = n_segments * 2  # mean and variance for each segment\n    \n    # Calculate likelihood (simplified)\n    log_likelihood = 0\n    current_pos = 0\n    \n    for cp in detected_cps + [len(time_series_with_trend)]:\n        segment_data = time_series_with_trend[current_pos:cp]\n        if len(segment_data) > 0:\n            segment_var = np.var(segment_data)\n            if segment_var > 0:\n                log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n                log_likelihood -= 0.5 * len(segment_data)\n        current_pos = cp\n    \n    bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n    \n    precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n    \n    model_selection_results.append({\n        'penalty': penalty,\n        'n_changepoints': len(detected_cps),\n        'bic': bic,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n    ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n    ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n    values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n    ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n    ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n    ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n    plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n    viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):,} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n    print(f'  {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
+      "usage_example": "# ruptures change point detection demonstration\n\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\nfrom scipy import signal\nfrom sklearn.preprocessing import StandardScaler\nimport tempfile\nimport os\n\n# Simulate ruptures functionality\ndef pelt_algorithm(data, penalty=10):\n    \"\"\"Simplified PELT algorithm for change point detection\"\"\"\n    n = len(data)\n    F = np.full(n + 1, np.inf)\n    F[0] = -penalty\n    cp_candidates = [0]\n    \n    for t in range(1, n + 1):\n        for s in cp_candidates:\n            if s < t:\n                segment_data = data[s:t]\n                if len(segment_data) > 0:\n                    cost = np.var(segment_data) * len(segment_data)\n                    total_cost = F[s] + cost + penalty\n                    \n                    if total_cost < F[t]:\n                        F[t] = total_cost\n        \n        # Pruning step\n        cp_candidates = [s for s in cp_candidates if F[s] <= F[t] - penalty]\n        cp_candidates.append(t)\n    \n    # Backtrack to find change points\n    change_points = []\n    t = n\n    while t > 0:\n        for s in range(t):\n            if s in cp_candidates:\n                segment_data = data[s:t]\n                if len(segment_data) > 0:\n                    cost = np.var(segment_data) * len(segment_data)\n                    if abs(F[t] - (F[s] + cost + penalty)) < 1e-10:\n                        if s > 0:\n                            change_points.append(s)\n                        t = s\n                        break\n        else:\n            break\n    \n    return sorted(change_points)\n\ndef binary_segmentation(data, max_changepoints=10):\n    \"\"\"Simplified binary segmentation algorithm\"\"\"\n    def find_best_split(segment_data, start_idx):\n        n = len(segment_data)\n        if n < 4:  # Minimum segment size\n            return None, -np.inf\n        \n        best_score = -np.inf\n        best_split = None\n        \n        for split in range(2, n - 1):\n            left = segment_data[:split]\n            right = segment_data[split:]\n            \n            # Calculate score based on variance reduction\n            total_var = np.var(segment_data) * n\n            left_var = np.var(left) * len(left)\n            right_var = np.var(right) * len(right)\n            \n            score = total_var - (left_var + right_var)\n            \n            if score > best_score:\n                best_score = score\n                best_split = start_idx + split\n        \n        return best_split, best_score\n    \n    change_points = []\n    segments = [(data, 0)]  # (segment_data, start_index)\n    \n    for _ in range(max_changepoints):\n        if not segments:\n            break\n        \n        best_segment = None\n        best_split = None\n        best_score = -np.inf\n        \n        # Find the best split among all segments\n        for i, (segment_data, start_idx) in enumerate(segments):\n            split, score = find_best_split(segment_data, start_idx)\n            if split is not None and score > best_score:\n                best_score = score\n                best_split = split\n                best_segment = i\n        \n        if best_split is None or best_score <= 0:\n            break\n        \n        # Apply the best split\n        segment_data, start_idx = segments.pop(best_segment)\n        split_point = best_split - start_idx\n        \n        left_segment = segment_data[:split_point]\n        right_segment = segment_data[split_point:]\n        \n        if len(left_segment) > 0:\n            segments.append((left_segment, start_idx))\n        if len(right_segment) > 0:\n            segments.append((right_segment, best_split))\n        \n        change_points.append(best_split)\n    \n    return sorted(change_points)\n\nprint('ruptures - Change Point Detection Library')\nprint('=' * 45)\n\nprint('ruptures Features:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window-based methods')\nprint('• Support for various cost functions')\nprint('• Multivariate time series analysis')\nprint('• Model selection and validation')\nprint('• Efficient implementations')\n\nprint('\\nApplications:')\nprint('• Signal processing and anomaly detection')\nprint('• Financial time series analysis')\nprint('• Genomic segmentation')\nprint('• Climate data analysis')\nprint('• Quality control in manufacturing')\n\n# Generate synthetic time series with change points\nprint('\\n=== Synthetic Time Series Generation ===')\n\nnp.random.seed(42)\n\n# Time series parameters\ntotal_length = 1000\ntrue_change_points = [200, 400, 650, 800]\nsegment_means = [1.0, 3.0, 0.5, 2.5, 1.8]\nsegment_stds = [0.5, 0.8, 0.3, 0.6, 0.4]\n\nprint(f'Generating time series with {len(true_change_points)} change points')\nprint(f'True change points: {true_change_points}')\nprint(f'Total length: {total_length} points')\n\n# Generate segments\ntime_series = []\ncurrent_pos = 0\n\nfor i, cp in enumerate(true_change_points + [total_length]):\n    segment_length = cp - current_pos\n    segment = np.random.normal(\n        segment_means[i], \n        segment_stds[i], \n        segment_length\n    )\n    time_series.extend(segment)\n    current_pos = cp\n\ntime_series = np.array(time_series)\ntime_points = np.arange(len(time_series))\n\nprint(f'Generated time series shape: {time_series.shape}')\nprint(f'Value range: {time_series.min():.2f} to {time_series.max():.2f}')\n\n# Add some noise and trends\nprint('\\nAdding noise and trends...')\n\n# Add noise\nnoise_level = 0.1\nnoise = np.random.normal(0, noise_level, len(time_series))\ntime_series_noisy = time_series + noise\n\n# Add slight trend\ntrend = 0.0005 * time_points\ntime_series_with_trend = time_series_noisy + trend\n\nprint(f'Noise level: {noise_level}')\nprint(f'Trend coefficient: 0.0005 per time unit')\n\n# Apply change point detection algorithms\nprint('\\n=== Change Point Detection ===')\n\n# Test different algorithms\nalgorithms = {\n    'PELT (penalty=5)': lambda x: pelt_algorithm(x, penalty=5),\n    'PELT (penalty=10)': lambda x: pelt_algorithm(x, penalty=10),\n    'PELT (penalty=20)': lambda x: pelt_algorithm(x, penalty=20),\n    'Binary Segmentation': lambda x: binary_segmentation(x, max_changepoints=8)\n}\n\nresults = {}\n\nfor algo_name, algo_func in algorithms.items():\n    print(f'\\nRunning {algo_name}...')\n    \n    detected_cps = algo_func(time_series_with_trend)\n    \n    # Calculate performance metrics\n    def calculate_metrics(true_cps, detected_cps, tolerance=50):\n        \"\"\"Calculate precision, recall, and F1 score\"\"\"\n        true_positives = 0\n        \n        for true_cp in true_cps:\n            if any(abs(det_cp - true_cp) <= tolerance for det_cp in detected_cps):\n                true_positives += 1\n        \n        precision = true_positives / len(detected_cps) if detected_cps else 0\n        recall = true_positives / len(true_cps) if true_cps else 0\n        f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0\n        \n        return precision, recall, f1\n    \n    precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n    \n    results[algo_name] = {\n        'detected_cps': detected_cps,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    }\n    \n    print(f'  Detected change points: {detected_cps}')\n    print(f'  Precision: {precision:.3f}')\n    print(f'  Recall: {recall:.3f}')\n    print(f'  F1 Score: {f1:.3f}')\n\n# Compare algorithms\nprint('\\n=== Algorithm Comparison ===')\n\nperformance_df = pd.DataFrame({\n    'Algorithm': list(results.keys()),\n    'Precision': [results[algo]['precision'] for algo in results],\n    'Recall': [results[algo]['recall'] for algo in results],\n    'F1 Score': [results[algo]['f1'] for algo in results],\n    'Num Detected': [len(results[algo]['detected_cps']) for algo in results]\n})\n\nprint(performance_df.round(3))\n\n# Best algorithm\nbest_algo = performance_df.loc[performance_df['F1 Score'].idxmax(), 'Algorithm']\nprint(f'\\nBest performing algorithm: {best_algo}')\nprint(f'F1 Score: {performance_df.loc[performance_df[\"F1 Score\"].idxmax(), \"F1 Score\"]:.3f}')\n\n# Multivariate change point detection simulation\nprint('\\n=== Multivariate Change Point Detection ===')\n\n# Generate multivariate time series\nn_dims = 3\nmv_length = 500\nmv_change_points = [150, 300, 400]\n\nprint(f'Generating {n_dims}D time series with change points at {mv_change_points}')\n\nmv_time_series = []\ncurrent_pos = 0\n\n# Different correlation structures for each segment\ncorr_matrices = [\n    np.array([[1.0, 0.2, 0.1], [0.2, 1.0, 0.3], [0.1, 0.3, 1.0]]),  # Low correlation\n    np.array([[1.0, 0.8, 0.6], [0.8, 1.0, 0.7], [0.6, 0.7, 1.0]]),  # High correlation\n    np.array([[1.0, -0.5, 0.2], [-0.5, 1.0, -0.3], [0.2, -0.3, 1.0]]),  # Mixed correlation\n    np.array([[1.0, 0.1, 0.9], [0.1, 1.0, 0.2], [0.9, 0.2, 1.0]])   # Selective correlation\n]\n\nfor i, cp in enumerate(mv_change_points + [mv_length]):\n    segment_length = cp - current_pos\n    \n    # Generate correlated multivariate normal data\n    mean = np.random.normal(0, 2, n_dims)\n    cov = corr_matrices[i]\n    \n    segment = np.random.multivariate_normal(mean, cov, segment_length)\n    mv_time_series.append(segment)\n    \n    current_pos = cp\n\nmv_time_series = np.vstack(mv_time_series)\nprint(f'Multivariate time series shape: {mv_time_series.shape}')\n\n# Detect change points in each dimension\nprint('\\nDetecting change points in each dimension:')\nmv_results = {}\n\nfor dim in range(n_dims):\n    dim_data = mv_time_series[:, dim]\n    detected_cps = binary_segmentation(dim_data, max_changepoints=5)\n    \n    precision, recall, f1 = calculate_metrics(mv_change_points, detected_cps, tolerance=25)\n    \n    mv_results[f'Dimension {dim}'] = {\n        'detected_cps': detected_cps,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    }\n    \n    print(f'  Dim {dim}: CPs = {detected_cps}, F1 = {f1:.3f}')\n\n# Aggregate multivariate detection (simple approach)\nprint('\\nAggregate multivariate detection:')\n\n# Sum of squared differences approach\nsum_sq_diff = np.sum(np.diff(mv_time_series, axis=0)**2, axis=1)\ndetected_cps_mv = binary_segmentation(sum_sq_diff, max_changepoints=5)\n\nprecision_mv, recall_mv, f1_mv = calculate_metrics(mv_change_points, detected_cps_mv, tolerance=25)\nprint(f'  Aggregate CPs: {detected_cps_mv}')\nprint(f'  Precision: {precision_mv:.3f}, Recall: {recall_mv:.3f}, F1: {f1_mv:.3f}')\n\n# Model selection simulation\nprint('\\n=== Model Selection ===')\n\n# Test different penalty values for PELT\npenalty_values = [1, 2, 5, 10, 15, 20, 30, 50]\nmodel_selection_results = []\n\nfor penalty in penalty_values:\n    detected_cps = pelt_algorithm(time_series_with_trend, penalty=penalty)\n    \n    # Calculate BIC-like criterion\n    n_segments = len(detected_cps) + 1\n    n_params = n_segments * 2  # mean and variance for each segment\n    \n    # Calculate likelihood (simplified)\n    log_likelihood = 0\n    current_pos = 0\n    \n    for cp in detected_cps + [len(time_series_with_trend)]:\n        segment_data = time_series_with_trend[current_pos:cp]\n        if len(segment_data) > 0:\n            segment_var = np.var(segment_data)\n            if segment_var > 0:\n                log_likelihood -= 0.5 * len(segment_data) * np.log(2 * np.pi * segment_var)\n                log_likelihood -= 0.5 * len(segment_data)\n        current_pos = cp\n    \n    bic = -2 * log_likelihood + n_params * np.log(len(time_series_with_trend))\n    \n    precision, recall, f1 = calculate_metrics(true_change_points, detected_cps)\n    \n    model_selection_results.append({\n        'penalty': penalty,\n        'n_changepoints': len(detected_cps),\n        'bic': bic,\n        'precision': precision,\n        'recall': recall,\n        'f1': f1\n    })\n\nmodel_df = pd.DataFrame(model_selection_results)\n\nprint('Model selection results:')\nprint(model_df.round(3))\n\n# Best model by BIC\nbest_bic_idx = model_df['bic'].idxmin()\nbest_penalty = model_df.loc[best_bic_idx, 'penalty']\nprint(f'\\nBest penalty by BIC: {best_penalty}')\nprint(f'Corresponding F1 score: {model_df.loc[best_bic_idx, \"f1\"]:.3f}')\n\n# Visualization\nprint('\\n=== Visualization ===')\n\nfig, axes = plt.subplots(2, 2, figsize=(15, 10))\n\n# 1. Original time series with change points\nax1 = axes[0, 0]\nax1.plot(time_points, time_series_with_trend, 'b-', alpha=0.7, linewidth=1)\n\n# True change points\nfor cp in true_change_points:\n    ax1.axvline(x=cp, color='red', linestyle='--', alpha=0.8, label='True CP' if cp == true_change_points[0] else '')\n\n# Best detected change points\nbest_detected = results[best_algo]['detected_cps']\nfor cp in best_detected:\n    ax1.axvline(x=cp, color='green', linestyle=':', alpha=0.8, label='Detected CP' if cp == best_detected[0] else '')\n\nax1.set_xlabel('Time')\nax1.set_ylabel('Value')\nax1.set_title('Time Series with Change Points')\nax1.legend()\nax1.grid(True, alpha=0.3)\n\n# 2. Algorithm performance comparison\nax2 = axes[0, 1]\nmetrics = ['Precision', 'Recall', 'F1 Score']\nbar_width = 0.2\nx_pos = np.arange(len(metrics))\n\nfor i, algo in enumerate(results.keys()):\n    values = [results[algo]['precision'], results[algo]['recall'], results[algo]['f1']]\n    ax2.bar(x_pos + i*bar_width, values, bar_width, label=algo, alpha=0.8)\n\nax2.set_xlabel('Metrics')\nax2.set_ylabel('Score')\nax2.set_title('Algorithm Performance Comparison')\nax2.set_xticks(x_pos + bar_width * 1.5)\nax2.set_xticklabels(metrics)\nax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')\nax2.grid(True, alpha=0.3)\nax2.set_ylim(0, 1.1)\n\n# 3. Multivariate time series\nax3 = axes[1, 0]\nfor dim in range(min(n_dims, 3)):\n    ax3.plot(mv_time_series[:, dim], label=f'Dimension {dim}', alpha=0.7)\n\nfor cp in mv_change_points:\n    ax3.axvline(x=cp, color='red', linestyle='--', alpha=0.6)\n\nax3.set_xlabel('Time')\nax3.set_ylabel('Value')\nax3.set_title('Multivariate Time Series')\nax3.legend()\nax3.grid(True, alpha=0.3)\n\n# 4. Model selection (BIC vs penalty)\nax4 = axes[1, 1]\nax4.plot(model_df['penalty'], model_df['bic'], 'bo-', label='BIC')\nax4.axvline(x=best_penalty, color='red', linestyle='--', alpha=0.8, label=f'Best penalty ({best_penalty})')\n\n# Secondary y-axis for F1 score\nax4_twin = ax4.twinx()\nax4_twin.plot(model_df['penalty'], model_df['f1'], 'ro-', alpha=0.7, label='F1 Score')\n\nax4.set_xlabel('Penalty Value')\nax4.set_ylabel('BIC', color='blue')\nax4_twin.set_ylabel('F1 Score', color='red')\nax4.set_title('Model Selection: BIC vs Penalty')\nax4.legend(loc='upper left')\nax4_twin.legend(loc='upper right')\nax4.grid(True, alpha=0.3)\n\nplt.tight_layout()\n\n# Save visualization\nwith tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:\n    plt.savefig(tmp.name, dpi=150, bbox_inches='tight')\n    viz_file = tmp.name\n\nplt.close()\nprint(f'Change point detection visualization saved to: {viz_file}')\n\n# Summary report\nprint('\\n' + '=' * 45)\nprint('RUPTURES CHANGE POINT DETECTION SUMMARY')\nprint('=' * 45)\nprint(f'Time series length: {len(time_series_with_trend):} points')\nprint(f'True change points: {len(true_change_points)}')\nprint(f'Best algorithm: {best_algo}')\nprint(f'Best F1 score: {max(results[algo][\"f1\"] for algo in results):.3f}')\nprint(f'\\nAlgorithm rankings by F1 score:')\nfor i, (algo, metrics) in enumerate(sorted(results.items(), key=lambda x: x[1]['f1'], reverse=True), 1):\n    print(f'  {i}. {algo}: {metrics[\"f1\"]:.3f}')\nprint(f'\\nMultivariate detection F1 score: {f1_mv:.3f}')\nprint(f'Optimal penalty (BIC): {best_penalty}')\n\n# Cleanup\nos.unlink(viz_file)\nprint('\\nDemo complete - temporary files cleaned up')\n\nprint('\\nruptures provides:')\nprint('• Multiple change point detection algorithms')\nprint('• PELT, Binary Segmentation, Window methods')\nprint('• Multivariate time series support')\nprint('• Model selection and validation')\nprint('• Custom cost functions')\nprint('• Efficient implementations')\nprint('• Extensive documentation and examples')\n\nprint('\\nTypical ruptures usage:')\nprint('import ruptures as rpt')\nprint('algo = rpt.Pelt(model=\"rbf\").fit(signal)')\nprint('result = algo.predict(pen=10)')",
       "quick_start": [
         "Install: pip install ruptures",
         "Import: import ruptures as rpt",

tooluniverse 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

Potentially problematic release.

tooluniverse 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl