victor-dataanalysis 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- victor_dataanalysis/__init__.py +27 -0
- victor_dataanalysis/assistant.py +189 -0
- victor_dataanalysis/capabilities.py +846 -0
- victor_dataanalysis/enrichment.py +410 -0
- victor_dataanalysis/escape_hatches.py +280 -0
- victor_dataanalysis/handlers.py +971 -0
- victor_dataanalysis/mode_config.py +138 -0
- victor_dataanalysis/prompts.py +195 -0
- victor_dataanalysis/safety.py +433 -0
- victor_dataanalysis/tool_dependencies.py +277 -0
- victor_dataanalysis-0.5.6.dist-info/METADATA +97 -0
- victor_dataanalysis-0.5.6.dist-info/RECORD +16 -0
- victor_dataanalysis-0.5.6.dist-info/WHEEL +5 -0
- victor_dataanalysis-0.5.6.dist-info/entry_points.txt +2 -0
- victor_dataanalysis-0.5.6.dist-info/licenses/LICENSE +190 -0
- victor_dataanalysis-0.5.6.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""Data Analysis Vertical Package - Complete implementation with extensions.
|
|
2
|
+
|
|
3
|
+
Competitive use case: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI, Code Interpreter.
|
|
4
|
+
|
|
5
|
+
This vertical provides:
|
|
6
|
+
- Data exploration and profiling
|
|
7
|
+
- Statistical analysis and visualization
|
|
8
|
+
- Machine learning model training
|
|
9
|
+
- Report generation with insights
|
|
10
|
+
- CSV/Excel/JSON data processing
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from victor_dataanalysis.assistant import DataAnalysisAssistant
|
|
14
|
+
from victor_dataanalysis.prompts import DataAnalysisPromptContributor
|
|
15
|
+
from victor_dataanalysis.mode_config import DataAnalysisModeConfigProvider
|
|
16
|
+
from victor_dataanalysis.safety import DataAnalysisSafetyExtension
|
|
17
|
+
from victor_dataanalysis.tool_dependencies import DataAnalysisToolDependencyProvider
|
|
18
|
+
from victor_dataanalysis.capabilities import DataAnalysisCapabilityProvider
|
|
19
|
+
|
|
20
|
+
__all__ = [
|
|
21
|
+
"DataAnalysisAssistant",
|
|
22
|
+
"DataAnalysisPromptContributor",
|
|
23
|
+
"DataAnalysisModeConfigProvider",
|
|
24
|
+
"DataAnalysisSafetyExtension",
|
|
25
|
+
"DataAnalysisToolDependencyProvider",
|
|
26
|
+
"DataAnalysisCapabilityProvider",
|
|
27
|
+
]
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
"""Data Analysis Assistant - Complete vertical for data exploration and insights.
|
|
2
|
+
|
|
3
|
+
Competitive positioning: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
from victor.core.verticals.base import StageDefinition, VerticalBase
|
|
9
|
+
from victor.core.verticals.protocols import (
|
|
10
|
+
ModeConfigProviderProtocol,
|
|
11
|
+
PromptContributorProtocol,
|
|
12
|
+
SafetyExtensionProtocol,
|
|
13
|
+
TieredToolConfig,
|
|
14
|
+
ToolDependencyProviderProtocol,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# Phase 3: Import framework capabilities
|
|
18
|
+
from victor.framework.capabilities import FileOperationsCapability
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DataAnalysisAssistant(VerticalBase):
|
|
22
|
+
"""Data analysis assistant for exploration, visualization, and insights.
|
|
23
|
+
|
|
24
|
+
Competitive with: ChatGPT Data Analysis, Claude Artifacts, Jupyter AI.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
name = "dataanalysis"
|
|
28
|
+
description = "Data exploration, statistical analysis, visualization, and ML insights"
|
|
29
|
+
version = "1.0.0"
|
|
30
|
+
|
|
31
|
+
# Phase 3: Framework file operations capability (read, write, edit, grep)
|
|
32
|
+
_file_ops = FileOperationsCapability()
|
|
33
|
+
|
|
34
|
+
@classmethod
|
|
35
|
+
def get_tools(cls) -> List[str]:
|
|
36
|
+
"""Get the list of tools for data analysis tasks.
|
|
37
|
+
|
|
38
|
+
Phase 3: Uses framework FileOperationsCapability for common file operations
|
|
39
|
+
to reduce code duplication and maintain consistency across verticals.
|
|
40
|
+
|
|
41
|
+
Uses canonical tool names from victor.tools.tool_names.
|
|
42
|
+
"""
|
|
43
|
+
from victor.tools.tool_names import ToolNames
|
|
44
|
+
|
|
45
|
+
# Start with framework file operations (read, write, edit, grep)
|
|
46
|
+
tools = cls._file_ops.get_tool_list()
|
|
47
|
+
|
|
48
|
+
# Add data analysis-specific tools
|
|
49
|
+
tools.extend(
|
|
50
|
+
[
|
|
51
|
+
# Directory listing for data file exploration
|
|
52
|
+
ToolNames.LS, # list_directory → ls
|
|
53
|
+
# Python/Shell execution for analysis
|
|
54
|
+
ToolNames.SHELL, # bash → shell (for running Python scripts)
|
|
55
|
+
# Code generation and search
|
|
56
|
+
ToolNames.CODE_SEARCH, # Semantic code search
|
|
57
|
+
ToolNames.OVERVIEW, # codebase_overview → overview
|
|
58
|
+
ToolNames.GRAPH, # Code graph analysis (PageRank, dependencies)
|
|
59
|
+
# Web for datasets and documentation
|
|
60
|
+
ToolNames.WEB_SEARCH, # Web search (internet search)
|
|
61
|
+
ToolNames.WEB_FETCH, # Fetch URL content
|
|
62
|
+
]
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
return tools
|
|
66
|
+
|
|
67
|
+
@classmethod
|
|
68
|
+
def get_system_prompt(cls) -> str:
|
|
69
|
+
"""Get the system prompt for data analysis tasks."""
|
|
70
|
+
return cls._get_system_prompt()
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def get_stages(cls) -> Dict[str, StageDefinition]:
|
|
74
|
+
"""Get Data Analysis-specific stage definitions.
|
|
75
|
+
|
|
76
|
+
Uses canonical tool names from victor.tools.tool_names.
|
|
77
|
+
"""
|
|
78
|
+
from victor.tools.tool_names import ToolNames
|
|
79
|
+
|
|
80
|
+
return {
|
|
81
|
+
"INITIAL": StageDefinition(
|
|
82
|
+
name="INITIAL",
|
|
83
|
+
description="Understanding the data and analysis goals",
|
|
84
|
+
tools={ToolNames.READ, ToolNames.LS, ToolNames.OVERVIEW},
|
|
85
|
+
keywords=["what", "data", "analyze", "understand", "explore"],
|
|
86
|
+
next_stages={"DATA_LOADING", "EXPLORATION"},
|
|
87
|
+
),
|
|
88
|
+
"DATA_LOADING": StageDefinition(
|
|
89
|
+
name="DATA_LOADING",
|
|
90
|
+
description="Loading and validating data files",
|
|
91
|
+
tools={ToolNames.READ, ToolNames.SHELL, ToolNames.WRITE},
|
|
92
|
+
keywords=["load", "import", "read", "open", "fetch"],
|
|
93
|
+
next_stages={"EXPLORATION", "CLEANING"},
|
|
94
|
+
),
|
|
95
|
+
"EXPLORATION": StageDefinition(
|
|
96
|
+
name="EXPLORATION",
|
|
97
|
+
description="Exploratory data analysis and profiling",
|
|
98
|
+
tools={ToolNames.SHELL, ToolNames.READ, ToolNames.WRITE},
|
|
99
|
+
keywords=["explore", "profile", "describe", "summary", "statistics"],
|
|
100
|
+
next_stages={"CLEANING", "ANALYSIS"},
|
|
101
|
+
),
|
|
102
|
+
"CLEANING": StageDefinition(
|
|
103
|
+
name="CLEANING",
|
|
104
|
+
description="Data cleaning and transformation",
|
|
105
|
+
tools={ToolNames.SHELL, ToolNames.WRITE, ToolNames.EDIT},
|
|
106
|
+
keywords=["clean", "transform", "fix", "handle", "remove"],
|
|
107
|
+
next_stages={"ANALYSIS", "EXPLORATION"},
|
|
108
|
+
),
|
|
109
|
+
"ANALYSIS": StageDefinition(
|
|
110
|
+
name="ANALYSIS",
|
|
111
|
+
description="Statistical analysis and modeling",
|
|
112
|
+
tools={ToolNames.SHELL, ToolNames.WRITE, ToolNames.READ},
|
|
113
|
+
keywords=["analyze", "model", "correlate", "test", "predict"],
|
|
114
|
+
next_stages={"VISUALIZATION", "REPORTING"},
|
|
115
|
+
),
|
|
116
|
+
"VISUALIZATION": StageDefinition(
|
|
117
|
+
name="VISUALIZATION",
|
|
118
|
+
description="Creating charts and visualizations",
|
|
119
|
+
tools={ToolNames.SHELL, ToolNames.WRITE},
|
|
120
|
+
keywords=["plot", "chart", "visualize", "graph", "figure"],
|
|
121
|
+
next_stages={"REPORTING", "ANALYSIS"},
|
|
122
|
+
),
|
|
123
|
+
"REPORTING": StageDefinition(
|
|
124
|
+
name="REPORTING",
|
|
125
|
+
description="Generating insights and reports",
|
|
126
|
+
tools={ToolNames.WRITE, ToolNames.EDIT, ToolNames.READ},
|
|
127
|
+
keywords=["report", "summarize", "document", "present"],
|
|
128
|
+
next_stages={"COMPLETION"},
|
|
129
|
+
),
|
|
130
|
+
"COMPLETION": StageDefinition(
|
|
131
|
+
name="COMPLETION",
|
|
132
|
+
description="Finalizing analysis deliverables",
|
|
133
|
+
tools={ToolNames.WRITE, ToolNames.READ},
|
|
134
|
+
keywords=["done", "complete", "finish", "final"],
|
|
135
|
+
next_stages=set(),
|
|
136
|
+
),
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
@classmethod
|
|
140
|
+
def _get_system_prompt(cls) -> str:
|
|
141
|
+
return """You are a data analysis assistant specializing in exploration, statistics, and visualization.
|
|
142
|
+
|
|
143
|
+
## Core Capabilities
|
|
144
|
+
|
|
145
|
+
1. **Data Loading**: CSV, Excel, JSON, Parquet, SQL databases
|
|
146
|
+
2. **Exploration**: Profiling, summary statistics, distribution analysis
|
|
147
|
+
3. **Cleaning**: Missing values, outliers, type conversion, normalization
|
|
148
|
+
4. **Analysis**: Correlation, regression, hypothesis testing, clustering
|
|
149
|
+
5. **Visualization**: matplotlib, seaborn, plotly for charts and dashboards
|
|
150
|
+
6. **ML**: scikit-learn for classification, regression, clustering
|
|
151
|
+
|
|
152
|
+
## Analysis Workflow
|
|
153
|
+
|
|
154
|
+
1. **LOAD**: Read data, check structure, identify types
|
|
155
|
+
2. **EXPLORE**: Summary stats, distributions, missing values
|
|
156
|
+
3. **CLEAN**: Handle nulls, outliers, type issues
|
|
157
|
+
4. **ANALYZE**: Apply statistical methods, test hypotheses
|
|
158
|
+
5. **VISUALIZE**: Create informative charts
|
|
159
|
+
6. **REPORT**: Summarize insights with evidence
|
|
160
|
+
|
|
161
|
+
## Code Standards
|
|
162
|
+
|
|
163
|
+
- Always use pandas for data manipulation
|
|
164
|
+
- Include comments explaining methodology
|
|
165
|
+
- Handle missing data explicitly
|
|
166
|
+
- Use descriptive variable names
|
|
167
|
+
- Save intermediate results for reproducibility
|
|
168
|
+
|
|
169
|
+
## Output Format
|
|
170
|
+
|
|
171
|
+
When presenting analysis:
|
|
172
|
+
1. Start with data overview (shape, types, missing)
|
|
173
|
+
2. Show key statistics with context
|
|
174
|
+
3. Include visualizations with captions
|
|
175
|
+
4. State insights with supporting evidence
|
|
176
|
+
5. Note limitations and assumptions
|
|
177
|
+
6. Provide reproducible code
|
|
178
|
+
|
|
179
|
+
## Privacy and Ethics
|
|
180
|
+
|
|
181
|
+
- Never expose personally identifiable information (PII)
|
|
182
|
+
- Anonymize sensitive columns before analysis
|
|
183
|
+
- Note potential biases in data
|
|
184
|
+
- Be transparent about limitations
|
|
185
|
+
"""
|
|
186
|
+
|
|
187
|
+
# =========================================================================
|
|
188
|
+
# New Framework Integrations (Workflows, RL, Teams)
|
|
189
|
+
# =========================================================================
|