victor-dataanalysis 0.5.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,27 @@
1
+ """Data Analysis Vertical Package - Complete implementation with extensions.
2
+
3
+ Competitive use case: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI, Code Interpreter.
4
+
5
+ This vertical provides:
6
+ - Data exploration and profiling
7
+ - Statistical analysis and visualization
8
+ - Machine learning model training
9
+ - Report generation with insights
10
+ - CSV/Excel/JSON data processing
11
+ """
12
+
13
+ from victor_dataanalysis.assistant import DataAnalysisAssistant
14
+ from victor_dataanalysis.prompts import DataAnalysisPromptContributor
15
+ from victor_dataanalysis.mode_config import DataAnalysisModeConfigProvider
16
+ from victor_dataanalysis.safety import DataAnalysisSafetyExtension
17
+ from victor_dataanalysis.tool_dependencies import DataAnalysisToolDependencyProvider
18
+ from victor_dataanalysis.capabilities import DataAnalysisCapabilityProvider
19
+
20
+ __all__ = [
21
+ "DataAnalysisAssistant",
22
+ "DataAnalysisPromptContributor",
23
+ "DataAnalysisModeConfigProvider",
24
+ "DataAnalysisSafetyExtension",
25
+ "DataAnalysisToolDependencyProvider",
26
+ "DataAnalysisCapabilityProvider",
27
+ ]
@@ -0,0 +1,189 @@
1
+ """Data Analysis Assistant - Complete vertical for data exploration and insights.
2
+
3
+ Competitive positioning: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI.
4
+ """
5
+
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ from victor.core.verticals.base import StageDefinition, VerticalBase
9
+ from victor.core.verticals.protocols import (
10
+ ModeConfigProviderProtocol,
11
+ PromptContributorProtocol,
12
+ SafetyExtensionProtocol,
13
+ TieredToolConfig,
14
+ ToolDependencyProviderProtocol,
15
+ )
16
+
17
+ # Phase 3: Import framework capabilities
18
+ from victor.framework.capabilities import FileOperationsCapability
19
+
20
+
21
+ class DataAnalysisAssistant(VerticalBase):
22
+ """Data analysis assistant for exploration, visualization, and insights.
23
+
24
+ Competitive with: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI.
25
+ """
26
+
27
+ name = "dataanalysis"
28
+ description = "Data exploration, statistical analysis, visualization, and ML insights"
29
+ version = "1.0.0"
30
+
31
+ # Phase 3: Framework file operations capability (read, write, edit, grep)
32
+ _file_ops = FileOperationsCapability()
33
+
34
+ @classmethod
35
+ def get_tools(cls) -> List[str]:
36
+ """Get the list of tools for data analysis tasks.
37
+
38
+ Phase 3: Uses framework FileOperationsCapability for common file operations
39
+ to reduce code duplication and maintain consistency across verticals.
40
+
41
+ Uses canonical tool names from victor.tools.tool_names.
42
+ """
43
+ from victor.tools.tool_names import ToolNames
44
+
45
+ # Start with framework file operations (read, write, edit, grep)
46
+ tools = cls._file_ops.get_tool_list()
47
+
48
+ # Add data analysis-specific tools
49
+ tools.extend(
50
+ [
51
+ # Directory listing for data file exploration
52
+ ToolNames.LS, # list_directory → ls
53
+ # Python/Shell execution for analysis
54
+ ToolNames.SHELL, # bash → shell (for running Python scripts)
55
+ # Code generation and search
56
+ ToolNames.CODE_SEARCH, # Semantic code search
57
+ ToolNames.OVERVIEW, # codebase_overview → overview
58
+ ToolNames.GRAPH, # Code graph analysis (PageRank, dependencies)
59
+ # Web for datasets and documentation
60
+ ToolNames.WEB_SEARCH, # Web search (internet search)
61
+ ToolNames.WEB_FETCH, # Fetch URL content
62
+ ]
63
+ )
64
+
65
+ return tools
66
+
67
+ @classmethod
68
+ def get_system_prompt(cls) -> str:
69
+ """Get the system prompt for data analysis tasks."""
70
+ return cls._get_system_prompt()
71
+
72
+ @classmethod
73
+ def get_stages(cls) -> Dict[str, StageDefinition]:
74
+ """Get Data Analysis-specific stage definitions.
75
+
76
+ Uses canonical tool names from victor.tools.tool_names.
77
+ """
78
+ from victor.tools.tool_names import ToolNames
79
+
80
+ return {
81
+ "INITIAL": StageDefinition(
82
+ name="INITIAL",
83
+ description="Understanding the data and analysis goals",
84
+ tools={ToolNames.READ, ToolNames.LS, ToolNames.OVERVIEW},
85
+ keywords=["what", "data", "analyze", "understand", "explore"],
86
+ next_stages={"DATA_LOADING", "EXPLORATION"},
87
+ ),
88
+ "DATA_LOADING": StageDefinition(
89
+ name="DATA_LOADING",
90
+ description="Loading and validating data files",
91
+ tools={ToolNames.READ, ToolNames.SHELL, ToolNames.WRITE},
92
+ keywords=["load", "import", "read", "open", "fetch"],
93
+ next_stages={"EXPLORATION", "CLEANING"},
94
+ ),
95
+ "EXPLORATION": StageDefinition(
96
+ name="EXPLORATION",
97
+ description="Exploratory data analysis and profiling",
98
+ tools={ToolNames.SHELL, ToolNames.READ, ToolNames.WRITE},
99
+ keywords=["explore", "profile", "describe", "summary", "statistics"],
100
+ next_stages={"CLEANING", "ANALYSIS"},
101
+ ),
102
+ "CLEANING": StageDefinition(
103
+ name="CLEANING",
104
+ description="Data cleaning and transformation",
105
+ tools={ToolNames.SHELL, ToolNames.WRITE, ToolNames.EDIT},
106
+ keywords=["clean", "transform", "fix", "handle", "remove"],
107
+ next_stages={"ANALYSIS", "EXPLORATION"},
108
+ ),
109
+ "ANALYSIS": StageDefinition(
110
+ name="ANALYSIS",
111
+ description="Statistical analysis and modeling",
112
+ tools={ToolNames.SHELL, ToolNames.WRITE, ToolNames.READ},
113
+ keywords=["analyze", "model", "correlate", "test", "predict"],
114
+ next_stages={"VISUALIZATION", "REPORTING"},
115
+ ),
116
+ "VISUALIZATION": StageDefinition(
117
+ name="VISUALIZATION",
118
+ description="Creating charts and visualizations",
119
+ tools={ToolNames.SHELL, ToolNames.WRITE},
120
+ keywords=["plot", "chart", "visualize", "graph", "figure"],
121
+ next_stages={"REPORTING", "ANALYSIS"},
122
+ ),
123
+ "REPORTING": StageDefinition(
124
+ name="REPORTING",
125
+ description="Generating insights and reports",
126
+ tools={ToolNames.WRITE, ToolNames.EDIT, ToolNames.READ},
127
+ keywords=["report", "summarize", "document", "present"],
128
+ next_stages={"COMPLETION"},
129
+ ),
130
+ "COMPLETION": StageDefinition(
131
+ name="COMPLETION",
132
+ description="Finalizing analysis deliverables",
133
+ tools={ToolNames.WRITE, ToolNames.READ},
134
+ keywords=["done", "complete", "finish", "final"],
135
+ next_stages=set(),
136
+ ),
137
+ }
138
+
139
+ @classmethod
140
+ def _get_system_prompt(cls) -> str:
141
+ return """You are a data analysis assistant specializing in exploration, statistics, and visualization.
142
+
143
+ ## Core Capabilities
144
+
145
+ 1. **Data Loading**: CSV, Excel, JSON, Parquet, SQL databases
146
+ 2. **Exploration**: Profiling, summary statistics, distribution analysis
147
+ 3. **Cleaning**: Missing values, outliers, type conversion, normalization
148
+ 4. **Analysis**: Correlation, regression, hypothesis testing, clustering
149
+ 5. **Visualization**: matplotlib, seaborn, plotly for charts and dashboards
150
+ 6. **ML**: scikit-learn for classification, regression, clustering
151
+
152
+ ## Analysis Workflow
153
+
154
+ 1. **LOAD**: Read data, check structure, identify types
155
+ 2. **EXPLORE**: Summary stats, distributions, missing values
156
+ 3. **CLEAN**: Handle nulls, outliers, type issues
157
+ 4. **ANALYZE**: Apply statistical methods, test hypotheses
158
+ 5. **VISUALIZE**: Create informative charts
159
+ 6. **REPORT**: Summarize insights with evidence
160
+
161
+ ## Code Standards
162
+
163
+ - Always use pandas for data manipulation
164
+ - Include comments explaining methodology
165
+ - Handle missing data explicitly
166
+ - Use descriptive variable names
167
+ - Save intermediate results for reproducibility
168
+
169
+ ## Output Format
170
+
171
+ When presenting analysis:
172
+ 1. Start with data overview (shape, types, missing)
173
+ 2. Show key statistics with context
174
+ 3. Include visualizations with captions
175
+ 4. State insights with supporting evidence
176
+ 5. Note limitations and assumptions
177
+ 6. Provide reproducible code
178
+
179
+ ## Privacy and Ethics
180
+
181
+ - Never expose personally identifiable information (PII)
182
+ - Anonymize sensitive columns before analysis
183
+ - Note potential biases in data
184
+ - Be transparent about limitations
185
+ """
186
+
187
+ # =========================================================================
188
+ # New Framework Integrations (Workflows, RL, Teams)
189
+ # =========================================================================