statchat-app 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,251 @@
1
+ Metadata-Version: 2.1
2
+ Name: statchat_app
3
+ Version: 1.0.2
4
+ Summary: Data cleaning, normalization, statistical analysis and AI-powered iterative adjustment
5
+ Author-email: Stat Chat <statchat@example.com>
6
+ License: MIT
7
+ Keywords: statistics,data-analysis,data-cleaning,gui,llm,normalization
8
+ Classifier: Development Status :: 4 - Beta
9
+ Classifier: Intended Audience :: Science/Research
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Operating System :: OS Independent
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
17
+ Classifier: Topic :: Scientific/Engineering :: Visualization
18
+ Classifier: Topic :: Utilities
19
+ Requires-Python: >=3.11
20
+ Description-Content-Type: text/markdown
21
+ Provides-Extra: dev
22
+
23
+ # Stat Chat
24
+
25
+ **Data Cleaning · Normalization · Statistical Analysis · AI-Powered Iterative Adjustment · PDF Reports**
26
+
27
+ A Python tool with both a **GUI** (Tkinter) and **CLI** interface for analysing tabular data from CSV or Excel files. Powered by Claude API or a local LM Studio model for natural-language dataset adjustment and annotated-report parsing.
28
+
29
+ ---
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install statchat-app
35
+ ```
36
+
37
+ > Tkinter ships with standard Python on Windows/macOS. On Linux: `sudo apt install python3-tk`
38
+
39
+ ---
40
+
41
+ ## Launch
42
+
43
+ ```bash
44
+ statchat # GUI
45
+ statchat --cli --help # CLI
46
+ ```
47
+
48
+ ---
49
+
50
+ ## GUI Walkthrough
51
+
52
+ 1. **Load CSV / Excel** — opens a file picker; the Data Preview tab fills instantly.
53
+ 2. **Data Cleaning** — tick any combination of:
54
+ - Drop duplicate rows
55
+ - Drop rows with null values
56
+ - Fill nulls (mean / median / mode / zero)
57
+ 3. **Normalization** — choose one method:
58
+ - **Z-score** — standardise with custom target mean & std (default 0, 1)
59
+ - **Min-Max** — scale to [0, 1]
60
+ - **Robust** — median/IQR scaling (outlier-resistant)
61
+ 4. **Analysis Metrics** — tick any combination:
62
+ - Measures of Central Tendency (mean, median, mode)
63
+ - Measures of Dispersion (std dev, variance, IQR, range, CV)
64
+ - Shape stats (skewness, kurtosis)
65
+ - Percentile Statistics (P5–P95)
66
+ - Normality Tests (Shapiro-Wilk)
67
+ - Correlation Matrix (heatmap in PDF)
68
+ - ROC-AUC (enter the binary target column name)
69
+ 5. **Run Analysis** — results appear in the *Analysis Results* tab.
70
+ 6. **Save Cleaned Data** — exports as CSV, Excel, or JSON.
71
+ 7. **Export PDF Report** — full styled report with tables and charts.
72
+ 8. **✦ Adjust Data** — iteratively modify the dataset using plain English or annotated images (see below).
73
+
74
+ ---
75
+
76
+ ## AI-Powered Data Adjustment
77
+
78
+ The **✦ Adjust Data** tab lets you modify the dataset conversationally after analysis, without writing any code.
79
+
80
+ ### Text instructions
81
+
82
+ Type any natural-language instruction and press **Send**:
83
+
84
+ > *"Add $1000 to each value in spend"*
85
+ > *"Multiply income by 1.1"*
86
+ > *"Clip spend between 0 and 5000"*
87
+ > *"Remove rows where age < 18"*
88
+ > *"Log-transform spend"*
89
+ > *"Rename 'score' to 'risk_score'"*
90
+ > *"Fill nulls in income with 0"*
91
+
92
+ Stat Chat sends the instruction to the configured LLM, parses the response into typed operations, shows you a **diff preview** (columns affected, mean before/after), and waits for you to **Accept** or **Discard** before applying the change.
93
+
94
+ ### Annotated report images (vision)
95
+
96
+ Print or screenshot your PDF report, annotate it with handwritten or typed notes (e.g. circle a column and write *"× 1.05"*), then click **📷 Image** to upload the photo. A vision-capable model reads the annotations and proposes the matching operations — same Accept/Discard flow.
97
+
98
+ ### Version history
99
+
100
+ Every accepted change is saved as a numbered version in the sidebar. You can:
101
+ - **Revert** to any prior version
102
+ - **Save** any version to file (in the original file format)
103
+ - **Generate a PDF report** for any specific version
104
+
105
+ The PDF report includes a full **Iterative Adjustment History** section showing every version, what changed, and a mean-evolution table across versions.
106
+
107
+ ---
108
+
109
+ ## LLM Backend Configuration
110
+
111
+ Click **⚙ Settings** in the top-right to choose your backend.
112
+
113
+ | Provider | Use case | Setup |
114
+ |---|---|---|
115
+ | **Claude API** (default) | Cloud, no local setup | API key injected automatically |
116
+ | **LM Studio — Text** | Local, privacy-first | Load any text model in LM Studio |
117
+ | **LM Studio — Vision** | Local + image annotations | Load a multimodal model (LLaVA, Moondream, etc.) |
118
+
119
+ **LM Studio setup:**
120
+ 1. Download [LM Studio](https://lmstudio.ai)
121
+ 2. Load a text model (e.g. Llama 3, Mistral) for chat adjustments
122
+ 3. Load a vision model (e.g. LLaVA, BakLLaVA) for image annotation parsing
123
+ 4. Developer tab → Enable Local Server → Start Server
124
+ 5. In Stat Chat → ⚙ Settings → select LM Studio → click **↻ Fetch loaded models** → **Test Connection** → Save
125
+
126
+ ---
127
+
128
+ ## CLI Reference
129
+
130
+ ```
131
+ statchat --cli --input FILE [options]
132
+
133
+ File I/O:
134
+ --input, -i PATH Input CSV or Excel file (required)
135
+ --output, -o PATH Save processed data here
136
+ --output-format csv | xlsx | json (default: csv)
137
+ --report, -r PATH Save PDF report here
138
+
139
+ Cleaning:
140
+ --drop-duplicates Remove duplicate rows
141
+ --drop-nulls Drop rows containing any null
142
+ --fill-nulls STRATEGY mean | median | mode | zero
143
+
144
+ Normalization:
145
+ --normalize METHOD zscore | minmax | robust
146
+ --norm-mean FLOAT Z-score target mean (default 0.0)
147
+ --norm-std FLOAT Z-score target std (default 1.0)
148
+
149
+ Analysis:
150
+ --central-tendency Mean, median, mode
151
+ --dispersion Std dev, variance, IQR, range
152
+ --shape Skewness & kurtosis
153
+ --percentiles P5–P95
154
+ --correlation Pearson correlation matrix
155
+ --roc-auc TARGET_COL ROC-AUC vs a binary target column
156
+ --all-metrics Enable all metrics above
157
+
158
+ Adjustment (requires LLM backend):
159
+ --adjust INSTRUCTION Natural-language adjustment (repeatable)
160
+ --adjust-image IMAGE_PATH Annotated report image (PNG/JPG)
161
+
162
+ LLM Backend:
163
+ --backend claude | lmstudio | lmstudio_vision (default: claude)
164
+ --lmstudio-url URL LM Studio server URL (default: http://localhost:1234)
165
+ --lmstudio-model ID LM Studio text model ID
166
+ --lmstudio-vision-model ID LM Studio vision model ID
167
+ ```
168
+
169
+ ### Examples
170
+
171
+ **Full pipeline with text adjustments:**
172
+ ```bash
173
+ statchat --cli \
174
+ --input sales.csv \
175
+ --drop-duplicates --fill-nulls mean \
176
+ --normalize zscore \
177
+ --central-tendency --dispersion --correlation \
178
+ --adjust "Add 1000 to spend" \
179
+ --adjust "Multiply income by 1.1" \
180
+ --output adjusted_sales.xlsx \
181
+ --output-format xlsx \
182
+ --report report.pdf
183
+ ```
184
+
185
+ **Parse an annotated report image with a local vision model:**
186
+ ```bash
187
+ statchat --cli \
188
+ --input data.csv \
189
+ --adjust-image annotated_report.png \
190
+ --backend lmstudio_vision \
191
+ --lmstudio-url http://localhost:1234 \
192
+ --lmstudio-vision-model llava-1.5-7b \
193
+ --output adjusted.csv \
194
+ --report report.pdf
195
+ ```
196
+
197
+ ---
198
+
199
+ ## Supported Metrics
200
+
201
+ | Category | Metrics |
202
+ |---|---|
203
+ | Central Tendency | Mean, Median, Mode, Count |
204
+ | Dispersion | Std Dev, Variance, Range, IQR, Min, Max, CV |
205
+ | Shape | Skewness, Kurtosis |
206
+ | Percentiles | P5, P10, P25, P50, P75, P90, P95 |
207
+ | Normality | Shapiro-Wilk statistic & p-value |
208
+ | Correlation | Pearson correlation matrix + heatmap |
209
+ | ROC-AUC | Per-feature AUC score + ROC curve plot |
210
+
211
+ ---
212
+
213
+ ## Project Structure
214
+
215
+ ```
216
+ statchat/
217
+ ├── __main__.py Entry point (GUI or CLI)
218
+ ├── icon.ico
219
+ ├── assets/
220
+ ├── core/
221
+ │ ├── loader.py File I/O (CSV, Excel, JSON)
222
+ │ ├── cleaner.py Cleaning & normalization
223
+ │ ├── analyzer.py Statistical metrics
224
+ │ ├── adjuster.py NL instruction parser & executor
225
+ │ ├── llm_backend.py Claude API + LM Studio abstraction
226
+ │ └── reporter.py PDF report generation
227
+ ├── gui/
228
+ │ ├── app.py Main Tkinter GUI
229
+ │ ├── chat_panel.py Adjust Data tab (chat + version history)
230
+ │ └── settings_dialog.py LLM backend settings
231
+ └── cli/
232
+ └── runner.py CLI runner
233
+ ```
234
+
235
+ ---
236
+
237
+ ## Requirements
238
+
239
+ ```
240
+ pandas >= 2.0
241
+ numpy >= 1.24
242
+ scipy >= 1.10
243
+ scikit-learn >= 1.3
244
+ openpyxl >= 3.1
245
+ reportlab >= 4.0
246
+ matplotlib >= 3.7
247
+ Pillow >= 9.0
248
+ requests >= 2.28
249
+ ```
250
+
251
+ Tkinter is required for the GUI and ships with standard Python. Linux users may need `sudo apt install python3-tk`.
@@ -0,0 +1,229 @@
1
+ # Stat Chat
2
+
3
+ **Data Cleaning · Normalization · Statistical Analysis · AI-Powered Iterative Adjustment · PDF Reports**
4
+
5
+ A Python tool with both a **GUI** (Tkinter) and **CLI** interface for analysing tabular data from CSV or Excel files. Powered by Claude API or a local LM Studio model for natural-language dataset adjustment and annotated-report parsing.
6
+
7
+ ---
8
+
9
+ ## Installation
10
+
11
+ ```bash
12
+ pip install statchat-app
13
+ ```
14
+
15
+ > Tkinter ships with standard Python on Windows/macOS. On Linux: `sudo apt install python3-tk`
16
+
17
+ ---
18
+
19
+ ## Launch
20
+
21
+ ```bash
22
+ statchat # GUI
23
+ statchat --cli --help # CLI
24
+ ```
25
+
26
+ ---
27
+
28
+ ## GUI Walkthrough
29
+
30
+ 1. **Load CSV / Excel** — opens a file picker; the Data Preview tab fills instantly.
31
+ 2. **Data Cleaning** — tick any combination of:
32
+ - Drop duplicate rows
33
+ - Drop rows with null values
34
+ - Fill nulls (mean / median / mode / zero)
35
+ 3. **Normalization** — choose one method:
36
+ - **Z-score** — standardise with custom target mean & std (default 0, 1)
37
+ - **Min-Max** — scale to [0, 1]
38
+ - **Robust** — median/IQR scaling (outlier-resistant)
39
+ 4. **Analysis Metrics** — tick any combination:
40
+ - Measures of Central Tendency (mean, median, mode)
41
+ - Measures of Dispersion (std dev, variance, IQR, range, CV)
42
+ - Shape stats (skewness, kurtosis)
43
+ - Percentile Statistics (P5–P95)
44
+ - Normality Tests (Shapiro-Wilk)
45
+ - Correlation Matrix (heatmap in PDF)
46
+ - ROC-AUC (enter the binary target column name)
47
+ 5. **Run Analysis** — results appear in the *Analysis Results* tab.
48
+ 6. **Save Cleaned Data** — exports as CSV, Excel, or JSON.
49
+ 7. **Export PDF Report** — full styled report with tables and charts.
50
+ 8. **✦ Adjust Data** — iteratively modify the dataset using plain English or annotated images (see below).
51
+
52
+ ---
53
+
54
+ ## AI-Powered Data Adjustment
55
+
56
+ The **✦ Adjust Data** tab lets you modify the dataset conversationally after analysis, without writing any code.
57
+
58
+ ### Text instructions
59
+
60
+ Type any natural-language instruction and press **Send**:
61
+
62
+ > *"Add $1000 to each value in spend"*
63
+ > *"Multiply income by 1.1"*
64
+ > *"Clip spend between 0 and 5000"*
65
+ > *"Remove rows where age < 18"*
66
+ > *"Log-transform spend"*
67
+ > *"Rename 'score' to 'risk_score'"*
68
+ > *"Fill nulls in income with 0"*
69
+
70
+ Stat Chat sends the instruction to the configured LLM, parses the response into typed operations, shows you a **diff preview** (columns affected, mean before/after), and waits for you to **Accept** or **Discard** before applying the change.
71
+
72
+ ### Annotated report images (vision)
73
+
74
+ Print or screenshot your PDF report, annotate it with handwritten or typed notes (e.g. circle a column and write *"× 1.05"*), then click **📷 Image** to upload the photo. A vision-capable model reads the annotations and proposes the matching operations — same Accept/Discard flow.
75
+
76
+ ### Version history
77
+
78
+ Every accepted change is saved as a numbered version in the sidebar. You can:
79
+ - **Revert** to any prior version
80
+ - **Save** any version to file (in the original file format)
81
+ - **Generate a PDF report** for any specific version
82
+
83
+ The PDF report includes a full **Iterative Adjustment History** section showing every version, what changed, and a mean-evolution table across versions.
84
+
85
+ ---
86
+
87
+ ## LLM Backend Configuration
88
+
89
+ Click **⚙ Settings** in the top-right to choose your backend.
90
+
91
+ | Provider | Use case | Setup |
92
+ |---|---|---|
93
+ | **Claude API** (default) | Cloud, no local setup | API key injected automatically |
94
+ | **LM Studio — Text** | Local, privacy-first | Load any text model in LM Studio |
95
+ | **LM Studio — Vision** | Local + image annotations | Load a multimodal model (LLaVA, Moondream, etc.) |
96
+
97
+ **LM Studio setup:**
98
+ 1. Download [LM Studio](https://lmstudio.ai)
99
+ 2. Load a text model (e.g. Llama 3, Mistral) for chat adjustments
100
+ 3. Load a vision model (e.g. LLaVA, BakLLaVA) for image annotation parsing
101
+ 4. Developer tab → Enable Local Server → Start Server
102
+ 5. In Stat Chat → ⚙ Settings → select LM Studio → click **↻ Fetch loaded models** → **Test Connection** → Save
103
+
104
+ ---
105
+
106
+ ## CLI Reference
107
+
108
+ ```
109
+ statchat --cli --input FILE [options]
110
+
111
+ File I/O:
112
+ --input, -i PATH Input CSV or Excel file (required)
113
+ --output, -o PATH Save processed data here
114
+ --output-format csv | xlsx | json (default: csv)
115
+ --report, -r PATH Save PDF report here
116
+
117
+ Cleaning:
118
+ --drop-duplicates Remove duplicate rows
119
+ --drop-nulls Drop rows containing any null
120
+ --fill-nulls STRATEGY mean | median | mode | zero
121
+
122
+ Normalization:
123
+ --normalize METHOD zscore | minmax | robust
124
+ --norm-mean FLOAT Z-score target mean (default 0.0)
125
+ --norm-std FLOAT Z-score target std (default 1.0)
126
+
127
+ Analysis:
128
+ --central-tendency Mean, median, mode
129
+ --dispersion Std dev, variance, IQR, range
130
+ --shape Skewness & kurtosis
131
+ --percentiles P5–P95
132
+ --correlation Pearson correlation matrix
133
+ --roc-auc TARGET_COL ROC-AUC vs a binary target column
134
+ --all-metrics Enable all metrics above
135
+
136
+ Adjustment (requires LLM backend):
137
+ --adjust INSTRUCTION Natural-language adjustment (repeatable)
138
+ --adjust-image IMAGE_PATH Annotated report image (PNG/JPG)
139
+
140
+ LLM Backend:
141
+ --backend claude | lmstudio | lmstudio_vision (default: claude)
142
+ --lmstudio-url URL LM Studio server URL (default: http://localhost:1234)
143
+ --lmstudio-model ID LM Studio text model ID
144
+ --lmstudio-vision-model ID LM Studio vision model ID
145
+ ```
146
+
147
+ ### Examples
148
+
149
+ **Full pipeline with text adjustments:**
150
+ ```bash
151
+ statchat --cli \
152
+ --input sales.csv \
153
+ --drop-duplicates --fill-nulls mean \
154
+ --normalize zscore \
155
+ --central-tendency --dispersion --correlation \
156
+ --adjust "Add 1000 to spend" \
157
+ --adjust "Multiply income by 1.1" \
158
+ --output adjusted_sales.xlsx \
159
+ --output-format xlsx \
160
+ --report report.pdf
161
+ ```
162
+
163
+ **Parse an annotated report image with a local vision model:**
164
+ ```bash
165
+ statchat --cli \
166
+ --input data.csv \
167
+ --adjust-image annotated_report.png \
168
+ --backend lmstudio_vision \
169
+ --lmstudio-url http://localhost:1234 \
170
+ --lmstudio-vision-model llava-1.5-7b \
171
+ --output adjusted.csv \
172
+ --report report.pdf
173
+ ```
174
+
175
+ ---
176
+
177
+ ## Supported Metrics
178
+
179
+ | Category | Metrics |
180
+ |---|---|
181
+ | Central Tendency | Mean, Median, Mode, Count |
182
+ | Dispersion | Std Dev, Variance, Range, IQR, Min, Max, CV |
183
+ | Shape | Skewness, Kurtosis |
184
+ | Percentiles | P5, P10, P25, P50, P75, P90, P95 |
185
+ | Normality | Shapiro-Wilk statistic & p-value |
186
+ | Correlation | Pearson correlation matrix + heatmap |
187
+ | ROC-AUC | Per-feature AUC score + ROC curve plot |
188
+
189
+ ---
190
+
191
+ ## Project Structure
192
+
193
+ ```
194
+ statchat/
195
+ ├── __main__.py Entry point (GUI or CLI)
196
+ ├── icon.ico
197
+ ├── assets/
198
+ ├── core/
199
+ │ ├── loader.py File I/O (CSV, Excel, JSON)
200
+ │ ├── cleaner.py Cleaning & normalization
201
+ │ ├── analyzer.py Statistical metrics
202
+ │ ├── adjuster.py NL instruction parser & executor
203
+ │ ├── llm_backend.py Claude API + LM Studio abstraction
204
+ │ └── reporter.py PDF report generation
205
+ ├── gui/
206
+ │ ├── app.py Main Tkinter GUI
207
+ │ ├── chat_panel.py Adjust Data tab (chat + version history)
208
+ │ └── settings_dialog.py LLM backend settings
209
+ └── cli/
210
+ └── runner.py CLI runner
211
+ ```
212
+
213
+ ---
214
+
215
+ ## Requirements
216
+
217
+ ```
218
+ pandas >= 2.0
219
+ numpy >= 1.24
220
+ scipy >= 1.10
221
+ scikit-learn >= 1.3
222
+ openpyxl >= 3.1
223
+ reportlab >= 4.0
224
+ matplotlib >= 3.7
225
+ Pillow >= 9.0
226
+ requests >= 2.28
227
+ ```
228
+
229
+ Tkinter is required for the GUI and ships with standard Python. Linux users may need `sudo apt install python3-tk`.
@@ -0,0 +1,84 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "statchat_app"
7
+ version = "1.0.2"
8
+ description = "Data cleaning, normalization, statistical analysis and AI-powered iterative adjustment"
9
+ readme = {file = "README.md", content-type = "text/markdown"}
10
+ requires-python = ">=3.11"
11
+ license = { text = "MIT" }
12
+ authors = [
13
+ {name = "Stat Chat", email = "statchat@example.com"},
14
+ ]
15
+ keywords = ["statistics", "data-analysis", "data-cleaning", "gui", "llm", "normalization"]
16
+ classifiers = [
17
+ "Development Status :: 4 - Beta",
18
+ "Intended Audience :: Science/Research",
19
+ "Intended Audience :: Developers",
20
+ "License :: OSI Approved :: MIT License",
21
+ "Operating System :: OS Independent",
22
+ "Programming Language :: Python :: 3",
23
+ "Programming Language :: Python :: 3.11",
24
+ "Programming Language :: Python :: 3.12",
25
+ "Topic :: Scientific/Engineering :: Information Analysis",
26
+ "Topic :: Scientific/Engineering :: Visualization",
27
+ "Topic :: Utilities",
28
+ ]
29
+ dependencies = [
30
+ "pandas>=2.0",
31
+ "numpy>=1.24",
32
+ "scipy>=1.10",
33
+ "scikit-learn>=1.3",
34
+ "openpyxl>=3.1",
35
+ "xlrd>=2.0",
36
+ "reportlab>=4.0",
37
+ "matplotlib>=3.7",
38
+ "Pillow>=9.0",
39
+ "requests>=2.28",
40
+ ]
41
+
42
+ [project.optional-dependencies]
43
+ dev = [
44
+ "pytest>=7.0",
45
+ "pytest-cov",
46
+ "black",
47
+ "ruff",
48
+ "mypy",
49
+ ]
50
+
51
+ [project.scripts]
52
+ # This is the key line: installs a `statchat` command that calls main()
53
+ statchat = "statchat.__main__:main"
54
+
55
+ [project.urls]
56
+ # Update these to your real repo before publishing
57
+ # Repository = "https://github.com/your-username/statchat"
58
+ # "Bug Tracker" = "https://github.com/your-username/statchat/issues"
59
+
60
+ [tool.setuptools.packages.find]
61
+ # Automatically find statchat and all its sub-packages
62
+ where = ["."]
63
+ include = ["statchat*"]
64
+
65
+ [tool.setuptools.package-data]
66
+ # Bundle the icon files so they're available after pip install
67
+ statchat = [
68
+ "icon.ico",
69
+ "assets/icon.ico",
70
+ "assets/icon.png",
71
+ ]
72
+
73
+ [tool.black]
74
+ line-length = 100
75
+ target-version = ["py311"]
76
+
77
+ [tool.ruff]
78
+ line-length = 100
79
+ select = ["E", "F", "W"]
80
+ ignore = ["E501"]
81
+
82
+ [tool.mypy]
83
+ python_version = "3.11"
84
+ ignore_missing_imports = true
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
File without changes