statchat-app 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- statchat_app-1.0.2/PKG-INFO +251 -0
- statchat_app-1.0.2/README.md +229 -0
- statchat_app-1.0.2/pyproject.toml +84 -0
- statchat_app-1.0.2/setup.cfg +4 -0
- statchat_app-1.0.2/statchat/__init__.py +0 -0
- statchat_app-1.0.2/statchat/__main__.py +172 -0
- statchat_app-1.0.2/statchat/assets/icon.ico +0 -0
- statchat_app-1.0.2/statchat/assets/icon.png +0 -0
- statchat_app-1.0.2/statchat/cli/__init__.py +0 -0
- statchat_app-1.0.2/statchat/cli/runner.py +197 -0
- statchat_app-1.0.2/statchat/core/__init__.py +0 -0
- statchat_app-1.0.2/statchat/core/adjuster.py +265 -0
- statchat_app-1.0.2/statchat/core/analyzer.py +181 -0
- statchat_app-1.0.2/statchat/core/cleaner.py +142 -0
- statchat_app-1.0.2/statchat/core/llm_backend.py +257 -0
- statchat_app-1.0.2/statchat/core/loader.py +59 -0
- statchat_app-1.0.2/statchat/core/reporter.py +605 -0
- statchat_app-1.0.2/statchat/gui/__init__.py +0 -0
- statchat_app-1.0.2/statchat/gui/app.py +678 -0
- statchat_app-1.0.2/statchat/gui/chat_panel.py +559 -0
- statchat_app-1.0.2/statchat/gui/settings_dialog.py +269 -0
- statchat_app-1.0.2/statchat/icon.ico +0 -0
- statchat_app-1.0.2/statchat_app.egg-info/PKG-INFO +251 -0
- statchat_app-1.0.2/statchat_app.egg-info/SOURCES.txt +26 -0
- statchat_app-1.0.2/statchat_app.egg-info/dependency_links.txt +1 -0
- statchat_app-1.0.2/statchat_app.egg-info/entry_points.txt +2 -0
- statchat_app-1.0.2/statchat_app.egg-info/requires.txt +17 -0
- statchat_app-1.0.2/statchat_app.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: statchat_app
|
|
3
|
+
Version: 1.0.2
|
|
4
|
+
Summary: Data cleaning, normalization, statistical analysis and AI-powered iterative adjustment
|
|
5
|
+
Author-email: Stat Chat <statchat@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Keywords: statistics,data-analysis,data-cleaning,gui,llm,normalization
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Science/Research
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
17
|
+
Classifier: Topic :: Scientific/Engineering :: Visualization
|
|
18
|
+
Classifier: Topic :: Utilities
|
|
19
|
+
Requires-Python: >=3.11
|
|
20
|
+
Description-Content-Type: text/markdown
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
|
|
23
|
+
# Stat Chat
|
|
24
|
+
|
|
25
|
+
**Data Cleaning · Normalization · Statistical Analysis · AI-Powered Iterative Adjustment · PDF Reports**
|
|
26
|
+
|
|
27
|
+
A Python tool with both a **GUI** (Tkinter) and **CLI** interface for analysing tabular data from CSV or Excel files. Powered by Claude API or a local LM Studio model for natural-language dataset adjustment and annotated-report parsing.
|
|
28
|
+
|
|
29
|
+
---
|
|
30
|
+
|
|
31
|
+
## Installation
|
|
32
|
+
|
|
33
|
+
```bash
|
|
34
|
+
pip install statchat-app
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
> Tkinter ships with standard Python on Windows/macOS. On Linux: `sudo apt install python3-tk`
|
|
38
|
+
|
|
39
|
+
---
|
|
40
|
+
|
|
41
|
+
## Launch
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
statchat # GUI
|
|
45
|
+
statchat --cli --help # CLI
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## GUI Walkthrough
|
|
51
|
+
|
|
52
|
+
1. **Load CSV / Excel** — opens a file picker; the Data Preview tab fills instantly.
|
|
53
|
+
2. **Data Cleaning** — tick any combination of:
|
|
54
|
+
- Drop duplicate rows
|
|
55
|
+
- Drop rows with null values
|
|
56
|
+
- Fill nulls (mean / median / mode / zero)
|
|
57
|
+
3. **Normalization** — choose one method:
|
|
58
|
+
- **Z-score** — standardise with custom target mean & std (default 0, 1)
|
|
59
|
+
- **Min-Max** — scale to [0, 1]
|
|
60
|
+
- **Robust** — median/IQR scaling (outlier-resistant)
|
|
61
|
+
4. **Analysis Metrics** — tick any combination:
|
|
62
|
+
- Measures of Central Tendency (mean, median, mode)
|
|
63
|
+
- Measures of Dispersion (std dev, variance, IQR, range, CV)
|
|
64
|
+
- Shape stats (skewness, kurtosis)
|
|
65
|
+
- Percentile Statistics (P5–P95)
|
|
66
|
+
- Normality Tests (Shapiro-Wilk)
|
|
67
|
+
- Correlation Matrix (heatmap in PDF)
|
|
68
|
+
- ROC-AUC (enter the binary target column name)
|
|
69
|
+
5. **Run Analysis** — results appear in the *Analysis Results* tab.
|
|
70
|
+
6. **Save Cleaned Data** — exports as CSV, Excel, or JSON.
|
|
71
|
+
7. **Export PDF Report** — full styled report with tables and charts.
|
|
72
|
+
8. **✦ Adjust Data** — iteratively modify the dataset using plain English or annotated images (see below).
|
|
73
|
+
|
|
74
|
+
---
|
|
75
|
+
|
|
76
|
+
## AI-Powered Data Adjustment
|
|
77
|
+
|
|
78
|
+
The **✦ Adjust Data** tab lets you modify the dataset conversationally after analysis, without writing any code.
|
|
79
|
+
|
|
80
|
+
### Text instructions
|
|
81
|
+
|
|
82
|
+
Type any natural-language instruction and press **Send**:
|
|
83
|
+
|
|
84
|
+
> *"Add $1000 to each value in spend"*
|
|
85
|
+
> *"Multiply income by 1.1"*
|
|
86
|
+
> *"Clip spend between 0 and 5000"*
|
|
87
|
+
> *"Remove rows where age < 18"*
|
|
88
|
+
> *"Log-transform spend"*
|
|
89
|
+
> *"Rename 'score' to 'risk_score'"*
|
|
90
|
+
> *"Fill nulls in income with 0"*
|
|
91
|
+
|
|
92
|
+
Stat Chat sends the instruction to the configured LLM, parses the response into typed operations, shows you a **diff preview** (columns affected, mean before/after), and waits for you to **Accept** or **Discard** before applying the change.
|
|
93
|
+
|
|
94
|
+
### Annotated report images (vision)
|
|
95
|
+
|
|
96
|
+
Print or screenshot your PDF report, annotate it with handwritten or typed notes (e.g. circle a column and write *"× 1.05"*), then click **📷 Image** to upload the photo. A vision-capable model reads the annotations and proposes the matching operations — same Accept/Discard flow.
|
|
97
|
+
|
|
98
|
+
### Version history
|
|
99
|
+
|
|
100
|
+
Every accepted change is saved as a numbered version in the sidebar. You can:
|
|
101
|
+
- **Revert** to any prior version
|
|
102
|
+
- **Save** any version to file (in the original file format)
|
|
103
|
+
- **Generate a PDF report** for any specific version
|
|
104
|
+
|
|
105
|
+
The PDF report includes a full **Iterative Adjustment History** section showing every version, what changed, and a mean-evolution table across versions.
|
|
106
|
+
|
|
107
|
+
---
|
|
108
|
+
|
|
109
|
+
## LLM Backend Configuration
|
|
110
|
+
|
|
111
|
+
Click **⚙ Settings** in the top-right to choose your backend.
|
|
112
|
+
|
|
113
|
+
| Provider | Use case | Setup |
|
|
114
|
+
|---|---|---|
|
|
115
|
+
| **Claude API** (default) | Cloud, no local setup | API key injected automatically |
|
|
116
|
+
| **LM Studio — Text** | Local, privacy-first | Load any text model in LM Studio |
|
|
117
|
+
| **LM Studio — Vision** | Local + image annotations | Load a multimodal model (LLaVA, Moondream, etc.) |
|
|
118
|
+
|
|
119
|
+
**LM Studio setup:**
|
|
120
|
+
1. Download [LM Studio](https://lmstudio.ai)
|
|
121
|
+
2. Load a text model (e.g. Llama 3, Mistral) for chat adjustments
|
|
122
|
+
3. Load a vision model (e.g. LLaVA, BakLLaVA) for image annotation parsing
|
|
123
|
+
4. Developer tab → Enable Local Server → Start Server
|
|
124
|
+
5. In Stat Chat → ⚙ Settings → select LM Studio → click **↻ Fetch loaded models** → **Test Connection** → Save
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## CLI Reference
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
statchat --cli --input FILE [options]
|
|
132
|
+
|
|
133
|
+
File I/O:
|
|
134
|
+
--input, -i PATH Input CSV or Excel file (required)
|
|
135
|
+
--output, -o PATH Save processed data here
|
|
136
|
+
--output-format csv | xlsx | json (default: csv)
|
|
137
|
+
--report, -r PATH Save PDF report here
|
|
138
|
+
|
|
139
|
+
Cleaning:
|
|
140
|
+
--drop-duplicates Remove duplicate rows
|
|
141
|
+
--drop-nulls Drop rows containing any null
|
|
142
|
+
--fill-nulls STRATEGY mean | median | mode | zero
|
|
143
|
+
|
|
144
|
+
Normalization:
|
|
145
|
+
--normalize METHOD zscore | minmax | robust
|
|
146
|
+
--norm-mean FLOAT Z-score target mean (default 0.0)
|
|
147
|
+
--norm-std FLOAT Z-score target std (default 1.0)
|
|
148
|
+
|
|
149
|
+
Analysis:
|
|
150
|
+
--central-tendency Mean, median, mode
|
|
151
|
+
--dispersion Std dev, variance, IQR, range
|
|
152
|
+
--shape Skewness & kurtosis
|
|
153
|
+
--percentiles P5–P95
|
|
154
|
+
--correlation Pearson correlation matrix
|
|
155
|
+
--roc-auc TARGET_COL ROC-AUC vs a binary target column
|
|
156
|
+
--all-metrics Enable all metrics above
|
|
157
|
+
|
|
158
|
+
Adjustment (requires LLM backend):
|
|
159
|
+
--adjust INSTRUCTION Natural-language adjustment (repeatable)
|
|
160
|
+
--adjust-image IMAGE_PATH Annotated report image (PNG/JPG)
|
|
161
|
+
|
|
162
|
+
LLM Backend:
|
|
163
|
+
--backend claude | lmstudio | lmstudio_vision (default: claude)
|
|
164
|
+
--lmstudio-url URL LM Studio server URL (default: http://localhost:1234)
|
|
165
|
+
--lmstudio-model ID LM Studio text model ID
|
|
166
|
+
--lmstudio-vision-model ID LM Studio vision model ID
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
### Examples
|
|
170
|
+
|
|
171
|
+
**Full pipeline with text adjustments:**
|
|
172
|
+
```bash
|
|
173
|
+
statchat --cli \
|
|
174
|
+
--input sales.csv \
|
|
175
|
+
--drop-duplicates --fill-nulls mean \
|
|
176
|
+
--normalize zscore \
|
|
177
|
+
--central-tendency --dispersion --correlation \
|
|
178
|
+
--adjust "Add 1000 to spend" \
|
|
179
|
+
--adjust "Multiply income by 1.1" \
|
|
180
|
+
--output adjusted_sales.xlsx \
|
|
181
|
+
--output-format xlsx \
|
|
182
|
+
--report report.pdf
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
**Parse an annotated report image with a local vision model:**
|
|
186
|
+
```bash
|
|
187
|
+
statchat --cli \
|
|
188
|
+
--input data.csv \
|
|
189
|
+
--adjust-image annotated_report.png \
|
|
190
|
+
--backend lmstudio_vision \
|
|
191
|
+
--lmstudio-url http://localhost:1234 \
|
|
192
|
+
--lmstudio-vision-model llava-1.5-7b \
|
|
193
|
+
--output adjusted.csv \
|
|
194
|
+
--report report.pdf
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
---
|
|
198
|
+
|
|
199
|
+
## Supported Metrics
|
|
200
|
+
|
|
201
|
+
| Category | Metrics |
|
|
202
|
+
|---|---|
|
|
203
|
+
| Central Tendency | Mean, Median, Mode, Count |
|
|
204
|
+
| Dispersion | Std Dev, Variance, Range, IQR, Min, Max, CV |
|
|
205
|
+
| Shape | Skewness, Kurtosis |
|
|
206
|
+
| Percentiles | P5, P10, P25, P50, P75, P90, P95 |
|
|
207
|
+
| Normality | Shapiro-Wilk statistic & p-value |
|
|
208
|
+
| Correlation | Pearson correlation matrix + heatmap |
|
|
209
|
+
| ROC-AUC | Per-feature AUC score + ROC curve plot |
|
|
210
|
+
|
|
211
|
+
---
|
|
212
|
+
|
|
213
|
+
## Project Structure
|
|
214
|
+
|
|
215
|
+
```
|
|
216
|
+
statchat/
|
|
217
|
+
├── __main__.py Entry point (GUI or CLI)
|
|
218
|
+
├── icon.ico
|
|
219
|
+
├── assets/
|
|
220
|
+
├── core/
|
|
221
|
+
│ ├── loader.py File I/O (CSV, Excel, JSON)
|
|
222
|
+
│ ├── cleaner.py Cleaning & normalization
|
|
223
|
+
│ ├── analyzer.py Statistical metrics
|
|
224
|
+
│ ├── adjuster.py NL instruction parser & executor
|
|
225
|
+
│ ├── llm_backend.py Claude API + LM Studio abstraction
|
|
226
|
+
│ └── reporter.py PDF report generation
|
|
227
|
+
├── gui/
|
|
228
|
+
│ ├── app.py Main Tkinter GUI
|
|
229
|
+
│ ├── chat_panel.py Adjust Data tab (chat + version history)
|
|
230
|
+
│ └── settings_dialog.py LLM backend settings
|
|
231
|
+
└── cli/
|
|
232
|
+
└── runner.py CLI runner
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
---
|
|
236
|
+
|
|
237
|
+
## Requirements
|
|
238
|
+
|
|
239
|
+
```
|
|
240
|
+
pandas >= 2.0
|
|
241
|
+
numpy >= 1.24
|
|
242
|
+
scipy >= 1.10
|
|
243
|
+
scikit-learn >= 1.3
|
|
244
|
+
openpyxl >= 3.1
|
|
245
|
+
reportlab >= 4.0
|
|
246
|
+
matplotlib >= 3.7
|
|
247
|
+
Pillow >= 9.0
|
|
248
|
+
requests >= 2.28
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
Tkinter is required for the GUI and ships with standard Python. Linux users may need `sudo apt install python3-tk`.
|
|
@@ -0,0 +1,229 @@
|
|
|
1
|
+
# Stat Chat
|
|
2
|
+
|
|
3
|
+
**Data Cleaning · Normalization · Statistical Analysis · AI-Powered Iterative Adjustment · PDF Reports**
|
|
4
|
+
|
|
5
|
+
A Python tool with both a **GUI** (Tkinter) and **CLI** interface for analysing tabular data from CSV or Excel files. Powered by Claude API or a local LM Studio model for natural-language dataset adjustment and annotated-report parsing.
|
|
6
|
+
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
## Installation
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install statchat-app
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
> Tkinter ships with standard Python on Windows/macOS. On Linux: `sudo apt install python3-tk`
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Launch
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
statchat # GUI
|
|
23
|
+
statchat --cli --help # CLI
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
---
|
|
27
|
+
|
|
28
|
+
## GUI Walkthrough
|
|
29
|
+
|
|
30
|
+
1. **Load CSV / Excel** — opens a file picker; the Data Preview tab fills instantly.
|
|
31
|
+
2. **Data Cleaning** — tick any combination of:
|
|
32
|
+
- Drop duplicate rows
|
|
33
|
+
- Drop rows with null values
|
|
34
|
+
- Fill nulls (mean / median / mode / zero)
|
|
35
|
+
3. **Normalization** — choose one method:
|
|
36
|
+
- **Z-score** — standardise with custom target mean & std (default 0, 1)
|
|
37
|
+
- **Min-Max** — scale to [0, 1]
|
|
38
|
+
- **Robust** — median/IQR scaling (outlier-resistant)
|
|
39
|
+
4. **Analysis Metrics** — tick any combination:
|
|
40
|
+
- Measures of Central Tendency (mean, median, mode)
|
|
41
|
+
- Measures of Dispersion (std dev, variance, IQR, range, CV)
|
|
42
|
+
- Shape stats (skewness, kurtosis)
|
|
43
|
+
- Percentile Statistics (P5–P95)
|
|
44
|
+
- Normality Tests (Shapiro-Wilk)
|
|
45
|
+
- Correlation Matrix (heatmap in PDF)
|
|
46
|
+
- ROC-AUC (enter the binary target column name)
|
|
47
|
+
5. **Run Analysis** — results appear in the *Analysis Results* tab.
|
|
48
|
+
6. **Save Cleaned Data** — exports as CSV, Excel, or JSON.
|
|
49
|
+
7. **Export PDF Report** — full styled report with tables and charts.
|
|
50
|
+
8. **✦ Adjust Data** — iteratively modify the dataset using plain English or annotated images (see below).
|
|
51
|
+
|
|
52
|
+
---
|
|
53
|
+
|
|
54
|
+
## AI-Powered Data Adjustment
|
|
55
|
+
|
|
56
|
+
The **✦ Adjust Data** tab lets you modify the dataset conversationally after analysis, without writing any code.
|
|
57
|
+
|
|
58
|
+
### Text instructions
|
|
59
|
+
|
|
60
|
+
Type any natural-language instruction and press **Send**:
|
|
61
|
+
|
|
62
|
+
> *"Add $1000 to each value in spend"*
|
|
63
|
+
> *"Multiply income by 1.1"*
|
|
64
|
+
> *"Clip spend between 0 and 5000"*
|
|
65
|
+
> *"Remove rows where age < 18"*
|
|
66
|
+
> *"Log-transform spend"*
|
|
67
|
+
> *"Rename 'score' to 'risk_score'"*
|
|
68
|
+
> *"Fill nulls in income with 0"*
|
|
69
|
+
|
|
70
|
+
Stat Chat sends the instruction to the configured LLM, parses the response into typed operations, shows you a **diff preview** (columns affected, mean before/after), and waits for you to **Accept** or **Discard** before applying the change.
|
|
71
|
+
|
|
72
|
+
### Annotated report images (vision)
|
|
73
|
+
|
|
74
|
+
Print or screenshot your PDF report, annotate it with handwritten or typed notes (e.g. circle a column and write *"× 1.05"*), then click **📷 Image** to upload the photo. A vision-capable model reads the annotations and proposes the matching operations — same Accept/Discard flow.
|
|
75
|
+
|
|
76
|
+
### Version history
|
|
77
|
+
|
|
78
|
+
Every accepted change is saved as a numbered version in the sidebar. You can:
|
|
79
|
+
- **Revert** to any prior version
|
|
80
|
+
- **Save** any version to file (in the original file format)
|
|
81
|
+
- **Generate a PDF report** for any specific version
|
|
82
|
+
|
|
83
|
+
The PDF report includes a full **Iterative Adjustment History** section showing every version, what changed, and a mean-evolution table across versions.
|
|
84
|
+
|
|
85
|
+
---
|
|
86
|
+
|
|
87
|
+
## LLM Backend Configuration
|
|
88
|
+
|
|
89
|
+
Click **⚙ Settings** in the top-right to choose your backend.
|
|
90
|
+
|
|
91
|
+
| Provider | Use case | Setup |
|
|
92
|
+
|---|---|---|
|
|
93
|
+
| **Claude API** (default) | Cloud, no local setup | API key injected automatically |
|
|
94
|
+
| **LM Studio — Text** | Local, privacy-first | Load any text model in LM Studio |
|
|
95
|
+
| **LM Studio — Vision** | Local + image annotations | Load a multimodal model (LLaVA, Moondream, etc.) |
|
|
96
|
+
|
|
97
|
+
**LM Studio setup:**
|
|
98
|
+
1. Download [LM Studio](https://lmstudio.ai)
|
|
99
|
+
2. Load a text model (e.g. Llama 3, Mistral) for chat adjustments
|
|
100
|
+
3. Load a vision model (e.g. LLaVA, BakLLaVA) for image annotation parsing
|
|
101
|
+
4. Developer tab → Enable Local Server → Start Server
|
|
102
|
+
5. In Stat Chat → ⚙ Settings → select LM Studio → click **↻ Fetch loaded models** → **Test Connection** → Save
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
## CLI Reference
|
|
107
|
+
|
|
108
|
+
```
|
|
109
|
+
statchat --cli --input FILE [options]
|
|
110
|
+
|
|
111
|
+
File I/O:
|
|
112
|
+
--input, -i PATH Input CSV or Excel file (required)
|
|
113
|
+
--output, -o PATH Save processed data here
|
|
114
|
+
--output-format csv | xlsx | json (default: csv)
|
|
115
|
+
--report, -r PATH Save PDF report here
|
|
116
|
+
|
|
117
|
+
Cleaning:
|
|
118
|
+
--drop-duplicates Remove duplicate rows
|
|
119
|
+
--drop-nulls Drop rows containing any null
|
|
120
|
+
--fill-nulls STRATEGY mean | median | mode | zero
|
|
121
|
+
|
|
122
|
+
Normalization:
|
|
123
|
+
--normalize METHOD zscore | minmax | robust
|
|
124
|
+
--norm-mean FLOAT Z-score target mean (default 0.0)
|
|
125
|
+
--norm-std FLOAT Z-score target std (default 1.0)
|
|
126
|
+
|
|
127
|
+
Analysis:
|
|
128
|
+
--central-tendency Mean, median, mode
|
|
129
|
+
--dispersion Std dev, variance, IQR, range
|
|
130
|
+
--shape Skewness & kurtosis
|
|
131
|
+
--percentiles P5–P95
|
|
132
|
+
--correlation Pearson correlation matrix
|
|
133
|
+
--roc-auc TARGET_COL ROC-AUC vs a binary target column
|
|
134
|
+
--all-metrics Enable all metrics above
|
|
135
|
+
|
|
136
|
+
Adjustment (requires LLM backend):
|
|
137
|
+
--adjust INSTRUCTION Natural-language adjustment (repeatable)
|
|
138
|
+
--adjust-image IMAGE_PATH Annotated report image (PNG/JPG)
|
|
139
|
+
|
|
140
|
+
LLM Backend:
|
|
141
|
+
--backend claude | lmstudio | lmstudio_vision (default: claude)
|
|
142
|
+
--lmstudio-url URL LM Studio server URL (default: http://localhost:1234)
|
|
143
|
+
--lmstudio-model ID LM Studio text model ID
|
|
144
|
+
--lmstudio-vision-model ID LM Studio vision model ID
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Examples
|
|
148
|
+
|
|
149
|
+
**Full pipeline with text adjustments:**
|
|
150
|
+
```bash
|
|
151
|
+
statchat --cli \
|
|
152
|
+
--input sales.csv \
|
|
153
|
+
--drop-duplicates --fill-nulls mean \
|
|
154
|
+
--normalize zscore \
|
|
155
|
+
--central-tendency --dispersion --correlation \
|
|
156
|
+
--adjust "Add 1000 to spend" \
|
|
157
|
+
--adjust "Multiply income by 1.1" \
|
|
158
|
+
--output adjusted_sales.xlsx \
|
|
159
|
+
--output-format xlsx \
|
|
160
|
+
--report report.pdf
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
**Parse an annotated report image with a local vision model:**
|
|
164
|
+
```bash
|
|
165
|
+
statchat --cli \
|
|
166
|
+
--input data.csv \
|
|
167
|
+
--adjust-image annotated_report.png \
|
|
168
|
+
--backend lmstudio_vision \
|
|
169
|
+
--lmstudio-url http://localhost:1234 \
|
|
170
|
+
--lmstudio-vision-model llava-1.5-7b \
|
|
171
|
+
--output adjusted.csv \
|
|
172
|
+
--report report.pdf
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
---
|
|
176
|
+
|
|
177
|
+
## Supported Metrics
|
|
178
|
+
|
|
179
|
+
| Category | Metrics |
|
|
180
|
+
|---|---|
|
|
181
|
+
| Central Tendency | Mean, Median, Mode, Count |
|
|
182
|
+
| Dispersion | Std Dev, Variance, Range, IQR, Min, Max, CV |
|
|
183
|
+
| Shape | Skewness, Kurtosis |
|
|
184
|
+
| Percentiles | P5, P10, P25, P50, P75, P90, P95 |
|
|
185
|
+
| Normality | Shapiro-Wilk statistic & p-value |
|
|
186
|
+
| Correlation | Pearson correlation matrix + heatmap |
|
|
187
|
+
| ROC-AUC | Per-feature AUC score + ROC curve plot |
|
|
188
|
+
|
|
189
|
+
---
|
|
190
|
+
|
|
191
|
+
## Project Structure
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
statchat/
|
|
195
|
+
├── __main__.py Entry point (GUI or CLI)
|
|
196
|
+
├── icon.ico
|
|
197
|
+
├── assets/
|
|
198
|
+
├── core/
|
|
199
|
+
│ ├── loader.py File I/O (CSV, Excel, JSON)
|
|
200
|
+
│ ├── cleaner.py Cleaning & normalization
|
|
201
|
+
│ ├── analyzer.py Statistical metrics
|
|
202
|
+
│ ├── adjuster.py NL instruction parser & executor
|
|
203
|
+
│ ├── llm_backend.py Claude API + LM Studio abstraction
|
|
204
|
+
│ └── reporter.py PDF report generation
|
|
205
|
+
├── gui/
|
|
206
|
+
│ ├── app.py Main Tkinter GUI
|
|
207
|
+
│ ├── chat_panel.py Adjust Data tab (chat + version history)
|
|
208
|
+
│ └── settings_dialog.py LLM backend settings
|
|
209
|
+
└── cli/
|
|
210
|
+
└── runner.py CLI runner
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
---
|
|
214
|
+
|
|
215
|
+
## Requirements
|
|
216
|
+
|
|
217
|
+
```
|
|
218
|
+
pandas >= 2.0
|
|
219
|
+
numpy >= 1.24
|
|
220
|
+
scipy >= 1.10
|
|
221
|
+
scikit-learn >= 1.3
|
|
222
|
+
openpyxl >= 3.1
|
|
223
|
+
reportlab >= 4.0
|
|
224
|
+
matplotlib >= 3.7
|
|
225
|
+
Pillow >= 9.0
|
|
226
|
+
requests >= 2.28
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
Tkinter is required for the GUI and ships with standard Python. Linux users may need `sudo apt install python3-tk`.
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "statchat_app"
|
|
7
|
+
version = "1.0.2"
|
|
8
|
+
description = "Data cleaning, normalization, statistical analysis and AI-powered iterative adjustment"
|
|
9
|
+
readme = {file = "README.md", content-type = "text/markdown"}
|
|
10
|
+
requires-python = ">=3.11"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "Stat Chat", email = "statchat@example.com"},
|
|
14
|
+
]
|
|
15
|
+
keywords = ["statistics", "data-analysis", "data-cleaning", "gui", "llm", "normalization"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Science/Research",
|
|
19
|
+
"Intended Audience :: Developers",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Operating System :: OS Independent",
|
|
22
|
+
"Programming Language :: Python :: 3",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
26
|
+
"Topic :: Scientific/Engineering :: Visualization",
|
|
27
|
+
"Topic :: Utilities",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"pandas>=2.0",
|
|
31
|
+
"numpy>=1.24",
|
|
32
|
+
"scipy>=1.10",
|
|
33
|
+
"scikit-learn>=1.3",
|
|
34
|
+
"openpyxl>=3.1",
|
|
35
|
+
"xlrd>=2.0",
|
|
36
|
+
"reportlab>=4.0",
|
|
37
|
+
"matplotlib>=3.7",
|
|
38
|
+
"Pillow>=9.0",
|
|
39
|
+
"requests>=2.28",
|
|
40
|
+
]
|
|
41
|
+
|
|
42
|
+
[project.optional-dependencies]
|
|
43
|
+
dev = [
|
|
44
|
+
"pytest>=7.0",
|
|
45
|
+
"pytest-cov",
|
|
46
|
+
"black",
|
|
47
|
+
"ruff",
|
|
48
|
+
"mypy",
|
|
49
|
+
]
|
|
50
|
+
|
|
51
|
+
[project.scripts]
|
|
52
|
+
# This is the key line: installs a `statchat` command that calls main()
|
|
53
|
+
statchat = "statchat.__main__:main"
|
|
54
|
+
|
|
55
|
+
[project.urls]
|
|
56
|
+
# Update these to your real repo before publishing
|
|
57
|
+
# Repository = "https://github.com/your-username/statchat"
|
|
58
|
+
# "Bug Tracker" = "https://github.com/your-username/statchat/issues"
|
|
59
|
+
|
|
60
|
+
[tool.setuptools.packages.find]
|
|
61
|
+
# Automatically find statchat and all its sub-packages
|
|
62
|
+
where = ["."]
|
|
63
|
+
include = ["statchat*"]
|
|
64
|
+
|
|
65
|
+
[tool.setuptools.package-data]
|
|
66
|
+
# Bundle the icon files so they're available after pip install
|
|
67
|
+
statchat = [
|
|
68
|
+
"icon.ico",
|
|
69
|
+
"assets/icon.ico",
|
|
70
|
+
"assets/icon.png",
|
|
71
|
+
]
|
|
72
|
+
|
|
73
|
+
[tool.black]
|
|
74
|
+
line-length = 100
|
|
75
|
+
target-version = ["py311"]
|
|
76
|
+
|
|
77
|
+
[tool.ruff]
|
|
78
|
+
line-length = 100
|
|
79
|
+
select = ["E", "F", "W"]
|
|
80
|
+
ignore = ["E501"]
|
|
81
|
+
|
|
82
|
+
[tool.mypy]
|
|
83
|
+
python_version = "3.11"
|
|
84
|
+
ignore_missing_imports = true
|
|
File without changes
|