tedcheck 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tedcheck-0.1.0/PKG-INFO +270 -0
- tedcheck-0.1.0/README.md +252 -0
- tedcheck-0.1.0/pyproject.toml +28 -0
- tedcheck-0.1.0/setup.cfg +4 -0
- tedcheck-0.1.0/tedcheck/__init__.py +40 -0
- tedcheck-0.1.0/tedcheck/cli.py +206 -0
- tedcheck-0.1.0/tedcheck/config.py +78 -0
- tedcheck-0.1.0/tedcheck/exceptions.py +26 -0
- tedcheck-0.1.0/tedcheck/features.py +451 -0
- tedcheck-0.1.0/tedcheck/logger.py +43 -0
- tedcheck-0.1.0/tedcheck/utils.py +45 -0
- tedcheck-0.1.0/tedcheck.egg-info/PKG-INFO +270 -0
- tedcheck-0.1.0/tedcheck.egg-info/SOURCES.txt +14 -0
- tedcheck-0.1.0/tedcheck.egg-info/dependency_links.txt +1 -0
- tedcheck-0.1.0/tedcheck.egg-info/entry_points.txt +2 -0
- tedcheck-0.1.0/tedcheck.egg-info/top_level.txt +1 -0
tedcheck-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tedcheck
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: UMAP Segment Validation tool
|
|
5
|
+
Author-email: Tergel Munkhbaatar <tergelitu@example.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/tergelitu/tedcheck
|
|
8
|
+
Project-URL: Repository, https://github.com/tergelitu/tedcheck.git
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
|
|
19
|
+
# TEDCHECK - UMAP Segment Validation Tool
|
|
20
|
+
|
|
21
|
+
A comprehensive Python package for UMAP-based customer segmentation visualization and validation.
|
|
22
|
+
|
|
23
|
+
## Features
|
|
24
|
+
|
|
25
|
+
- **Flexible Configuration**: Customize column names, exclude/include features dynamically
|
|
26
|
+
- **UMAP Dimensionality Reduction**: 2D visualization of customer segments
|
|
27
|
+
- **Quality Metrics**: Calculate silhouette scores, feature importance, and purity metrics
|
|
28
|
+
- **Interactive Visualizations**: Plotly-based interactive charts
|
|
29
|
+
- **Preset Configurations**: Built-in presets for different use cases
|
|
30
|
+
- **Package-Ready**: Both terminal CLI and Python library usage
|
|
31
|
+
|
|
32
|
+
## Installation
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
pip install -e .
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
## Quick Start
|
|
39
|
+
|
|
40
|
+
### Terminal Usage
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
# Using default configuration
|
|
44
|
+
tedcheck data.csv
|
|
45
|
+
|
|
46
|
+
# With custom columns
|
|
47
|
+
tedcheck data.csv --user-id customer_id --segment-col tier --time-col month
|
|
48
|
+
|
|
49
|
+
# Using presets
|
|
50
|
+
tedcheck data.csv --preset ecommerce --metrics
|
|
51
|
+
|
|
52
|
+
# Custom configuration file
|
|
53
|
+
tedcheck data.csv --config my_config.json
|
|
54
|
+
|
|
55
|
+
# Skip time column if not available
|
|
56
|
+
tedcheck data.csv --skip-time
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
### Python Usage
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from tedcheck import Config, apply_umap_reduction, calculate_umap_metrics
|
|
63
|
+
import pandas as pd
|
|
64
|
+
|
|
65
|
+
# Load data
|
|
66
|
+
df = pd.read_csv('data.csv')
|
|
67
|
+
|
|
68
|
+
# Create config
|
|
69
|
+
config = Config(
|
|
70
|
+
user_id_col='customer_id',
|
|
71
|
+
segment_col='tier',
|
|
72
|
+
skip_time=False
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Apply UMAP reduction
|
|
76
|
+
df_umap, embedding, num_cols = apply_umap_reduction(df, config=config)
|
|
77
|
+
|
|
78
|
+
# Calculate metrics
|
|
79
|
+
metrics = calculate_umap_metrics(df_umap, embedding, df, config=config)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
## Configuration
|
|
83
|
+
|
|
84
|
+
### Default Configuration
|
|
85
|
+
|
|
86
|
+
```json
|
|
87
|
+
{
|
|
88
|
+
"user_id_col": "user_id",
|
|
89
|
+
"time_col": "base_month",
|
|
90
|
+
"segment_col": "segment",
|
|
91
|
+
"cluster_col": "cluster_kmeans",
|
|
92
|
+
"exclude_cols": ["user_id", "base_month", "segment""],
|
|
93
|
+
"include_cols": null,
|
|
94
|
+
"n_neighbors": 50,
|
|
95
|
+
"min_dist": 0.1,
|
|
96
|
+
"random_state": 42,
|
|
97
|
+
"skip_time": false
|
|
98
|
+
}
|
|
99
|
+
```
|
|
100
|
+
## Preset Configurations
|
|
101
|
+
|
|
102
|
+
### Default (General Purpose)
|
|
103
|
+
```bash
|
|
104
|
+
tedcheck data.csv --preset default
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
### E-commerce
|
|
108
|
+
```bash
|
|
109
|
+
tedcheck data.csv --preset ecommerce
|
|
110
|
+
# Uses: customer_id, purchase_month, customer_tier
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### SaaS
|
|
114
|
+
```bash
|
|
115
|
+
tedcheck data.csv --preset saas
|
|
116
|
+
# Uses: account_id, billing_month, account_segment
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## CLI Options
|
|
120
|
+
|
|
121
|
+
```
|
|
122
|
+
Usage: tedcheck <csv_file> [OPTIONS]
|
|
123
|
+
|
|
124
|
+
Options:
|
|
125
|
+
--base-month <value> Filter by specific month
|
|
126
|
+
--user-id <col> User ID column name
|
|
127
|
+
--time-col <col> Time column name
|
|
128
|
+
--segment-col <col> Segment column name
|
|
129
|
+
--cluster-col <col> Cluster column name
|
|
130
|
+
--exclude-cols <col1,col2> Columns to exclude
|
|
131
|
+
--include-cols <col1,col2> Columns to include only
|
|
132
|
+
--n-neighbors <int> UMAP n_neighbors
|
|
133
|
+
--min-dist <float> UMAP min_dist
|
|
134
|
+
--metrics Calculate and save metrics
|
|
135
|
+
--config <json_file> Load config from JSON
|
|
136
|
+
--preset <name> Load preset (default, ecommerce, saas)
|
|
137
|
+
--skip-time Skip time column if not available
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Output Files
|
|
141
|
+
|
|
142
|
+
- `*_umap_results.csv` - UMAP coordinates with user IDs and segments
|
|
143
|
+
- `umap_segment_*.html` - Interactive visualizations by month (if time column exists)
|
|
144
|
+
- `umap_segment_all.html` - Single visualization (if no time column)
|
|
145
|
+
- `umap_metrics.json` - Quality metrics (with `--metrics` flag)
|
|
146
|
+
- `feature_importance.csv` - Feature importance scores (with `--metrics` flag)
|
|
147
|
+
|
|
148
|
+
## Metrics Explained
|
|
149
|
+
|
|
150
|
+
### Silhouette Score
|
|
151
|
+
- Range: -1 to 1
|
|
152
|
+
- 1: Well-separated clusters
|
|
153
|
+
- 0: Overlapping clusters
|
|
154
|
+
- -1: Incorrect assignment
|
|
155
|
+
|
|
156
|
+
### Feature Importance
|
|
157
|
+
- Importance of each feature in UMAP dimensions
|
|
158
|
+
- Higher = More influential
|
|
159
|
+
|
|
160
|
+
### Purity Metrics
|
|
161
|
+
- **Homogeneity**: Segmentation purity (0-1)
|
|
162
|
+
- **Completeness**: Cluster completeness (0-1)
|
|
163
|
+
- **V-Measure**: Harmonic mean (0-1)
|
|
164
|
+
|
|
165
|
+
## Package Structure
|
|
166
|
+
|
|
167
|
+
```
|
|
168
|
+
tedcheck/
|
|
169
|
+
├── __init__.py # Package initialization
|
|
170
|
+
├── config.py # Configuration class
|
|
171
|
+
├── features.py # Core UMAP functions
|
|
172
|
+
├── cli.py # Command-line interface
|
|
173
|
+
├── utils.py # Utility functions
|
|
174
|
+
├── exceptions.py # Custom exceptions
|
|
175
|
+
├── logger.py # Logging setup
|
|
176
|
+
└── configs/ # Preset configurations
|
|
177
|
+
├── default.json
|
|
178
|
+
├── ecommerce.json
|
|
179
|
+
└── saas.json
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## API Reference
|
|
183
|
+
|
|
184
|
+
### `Config` Class
|
|
185
|
+
```python
|
|
186
|
+
from tedcheck import Config
|
|
187
|
+
|
|
188
|
+
config = Config(
|
|
189
|
+
user_id_col='id',
|
|
190
|
+
segment_col='tier',
|
|
191
|
+
skip_time=True
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Validate columns
|
|
195
|
+
missing = config.validate_columns(df)
|
|
196
|
+
|
|
197
|
+
# Load from file
|
|
198
|
+
config = Config.from_json('config.json')
|
|
199
|
+
|
|
200
|
+
# Load preset
|
|
201
|
+
config = Config.from_preset('ecommerce')
|
|
202
|
+
|
|
203
|
+
# Save config
|
|
204
|
+
config.to_json('my_config.json')
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### `apply_umap_reduction()` Function
|
|
208
|
+
```python
|
|
209
|
+
from tedcheck import apply_umap_reduction, Config
|
|
210
|
+
|
|
211
|
+
df_umap, embedding, num_cols = apply_umap_reduction(
|
|
212
|
+
df,
|
|
213
|
+
config=config
|
|
214
|
+
)
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### `calculate_umap_metrics()` Function
|
|
218
|
+
```python
|
|
219
|
+
from tedcheck import calculate_umap_metrics
|
|
220
|
+
|
|
221
|
+
metrics = calculate_umap_metrics(
|
|
222
|
+
df_umap,
|
|
223
|
+
embedding,
|
|
224
|
+
df,
|
|
225
|
+
config=config
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
print(metrics['silhouette_avg'])
|
|
229
|
+
print(metrics['feature_importance'])
|
|
230
|
+
print(metrics['purity_metrics'])
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## Troubleshooting
|
|
234
|
+
|
|
235
|
+
### Missing Column Error
|
|
236
|
+
```bash
|
|
237
|
+
# Check available columns
|
|
238
|
+
python -c "import pandas as pd; print(pd.read_csv('data.csv').columns.tolist())"
|
|
239
|
+
|
|
240
|
+
# Use --skip-time if time column doesn't exist
|
|
241
|
+
tedcheck data.csv --skip-time
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### Wrong Column Names
|
|
245
|
+
```bash
|
|
246
|
+
# Specify correct column names
|
|
247
|
+
tedcheck data.csv --user-id id --segment-col group --time-col month
|
|
248
|
+
```
|
|
249
|
+
|
|
250
|
+
### Memory Issues
|
|
251
|
+
```bash
|
|
252
|
+
# Use include_cols to select only important features
|
|
253
|
+
tedcheck data.csv --include-cols feature1,feature2,feature3
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
## Contributing
|
|
257
|
+
|
|
258
|
+
Contributions welcome! Please submit pull requests or issues on GitHub.
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
MIT License - See LICENSE file for details
|
|
263
|
+
|
|
264
|
+
## Author
|
|
265
|
+
|
|
266
|
+
Tergel Munkhbaatar
|
|
267
|
+
|
|
268
|
+
## Version
|
|
269
|
+
|
|
270
|
+
0.1.0
|
tedcheck-0.1.0/README.md
ADDED
|
@@ -0,0 +1,252 @@
|
|
|
1
|
+
# TEDCHECK - UMAP Segment Validation Tool
|
|
2
|
+
|
|
3
|
+
A comprehensive Python package for UMAP-based customer segmentation visualization and validation.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Flexible Configuration**: Customize column names, exclude/include features dynamically
|
|
8
|
+
- **UMAP Dimensionality Reduction**: 2D visualization of customer segments
|
|
9
|
+
- **Quality Metrics**: Calculate silhouette scores, feature importance, and purity metrics
|
|
10
|
+
- **Interactive Visualizations**: Plotly-based interactive charts
|
|
11
|
+
- **Preset Configurations**: Built-in presets for different use cases
|
|
12
|
+
- **Package-Ready**: Both terminal CLI and Python library usage
|
|
13
|
+
|
|
14
|
+
## Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
pip install -e .
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
## Quick Start
|
|
21
|
+
|
|
22
|
+
### Terminal Usage
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
# Using default configuration
|
|
26
|
+
tedcheck data.csv
|
|
27
|
+
|
|
28
|
+
# With custom columns
|
|
29
|
+
tedcheck data.csv --user-id customer_id --segment-col tier --time-col month
|
|
30
|
+
|
|
31
|
+
# Using presets
|
|
32
|
+
tedcheck data.csv --preset ecommerce --metrics
|
|
33
|
+
|
|
34
|
+
# Custom configuration file
|
|
35
|
+
tedcheck data.csv --config my_config.json
|
|
36
|
+
|
|
37
|
+
# Skip time column if not available
|
|
38
|
+
tedcheck data.csv --skip-time
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Python Usage
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
from tedcheck import Config, apply_umap_reduction, calculate_umap_metrics
|
|
45
|
+
import pandas as pd
|
|
46
|
+
|
|
47
|
+
# Load data
|
|
48
|
+
df = pd.read_csv('data.csv')
|
|
49
|
+
|
|
50
|
+
# Create config
|
|
51
|
+
config = Config(
|
|
52
|
+
user_id_col='customer_id',
|
|
53
|
+
segment_col='tier',
|
|
54
|
+
skip_time=False
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Apply UMAP reduction
|
|
58
|
+
df_umap, embedding, num_cols = apply_umap_reduction(df, config=config)
|
|
59
|
+
|
|
60
|
+
# Calculate metrics
|
|
61
|
+
metrics = calculate_umap_metrics(df_umap, embedding, df, config=config)
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
## Configuration
|
|
65
|
+
|
|
66
|
+
### Default Configuration
|
|
67
|
+
|
|
68
|
+
```json
|
|
69
|
+
{
|
|
70
|
+
"user_id_col": "user_id",
|
|
71
|
+
"time_col": "base_month",
|
|
72
|
+
"segment_col": "segment",
|
|
73
|
+
"cluster_col": "cluster_kmeans",
|
|
74
|
+
"exclude_cols": ["user_id", "base_month", "segment""],
|
|
75
|
+
"include_cols": null,
|
|
76
|
+
"n_neighbors": 50,
|
|
77
|
+
"min_dist": 0.1,
|
|
78
|
+
"random_state": 42,
|
|
79
|
+
"skip_time": false
|
|
80
|
+
}
|
|
81
|
+
```
|
|
82
|
+
## Preset Configurations
|
|
83
|
+
|
|
84
|
+
### Default (General Purpose)
|
|
85
|
+
```bash
|
|
86
|
+
tedcheck data.csv --preset default
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### E-commerce
|
|
90
|
+
```bash
|
|
91
|
+
tedcheck data.csv --preset ecommerce
|
|
92
|
+
# Uses: customer_id, purchase_month, customer_tier
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### SaaS
|
|
96
|
+
```bash
|
|
97
|
+
tedcheck data.csv --preset saas
|
|
98
|
+
# Uses: account_id, billing_month, account_segment
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## CLI Options
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
Usage: tedcheck <csv_file> [OPTIONS]
|
|
105
|
+
|
|
106
|
+
Options:
|
|
107
|
+
--base-month <value> Filter by specific month
|
|
108
|
+
--user-id <col> User ID column name
|
|
109
|
+
--time-col <col> Time column name
|
|
110
|
+
--segment-col <col> Segment column name
|
|
111
|
+
--cluster-col <col> Cluster column name
|
|
112
|
+
--exclude-cols <col1,col2> Columns to exclude
|
|
113
|
+
--include-cols <col1,col2> Columns to include only
|
|
114
|
+
--n-neighbors <int> UMAP n_neighbors
|
|
115
|
+
--min-dist <float> UMAP min_dist
|
|
116
|
+
--metrics Calculate and save metrics
|
|
117
|
+
--config <json_file> Load config from JSON
|
|
118
|
+
--preset <name> Load preset (default, ecommerce, saas)
|
|
119
|
+
--skip-time Skip time column if not available
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Output Files
|
|
123
|
+
|
|
124
|
+
- `*_umap_results.csv` - UMAP coordinates with user IDs and segments
|
|
125
|
+
- `umap_segment_*.html` - Interactive visualizations by month (if time column exists)
|
|
126
|
+
- `umap_segment_all.html` - Single visualization (if no time column)
|
|
127
|
+
- `umap_metrics.json` - Quality metrics (with `--metrics` flag)
|
|
128
|
+
- `feature_importance.csv` - Feature importance scores (with `--metrics` flag)
|
|
129
|
+
|
|
130
|
+
## Metrics Explained
|
|
131
|
+
|
|
132
|
+
### Silhouette Score
|
|
133
|
+
- Range: -1 to 1
|
|
134
|
+
- 1: Well-separated clusters
|
|
135
|
+
- 0: Overlapping clusters
|
|
136
|
+
- -1: Incorrect assignment
|
|
137
|
+
|
|
138
|
+
### Feature Importance
|
|
139
|
+
- Importance of each feature in UMAP dimensions
|
|
140
|
+
- Higher = More influential
|
|
141
|
+
|
|
142
|
+
### Purity Metrics
|
|
143
|
+
- **Homogeneity**: Segmentation purity (0-1)
|
|
144
|
+
- **Completeness**: Cluster completeness (0-1)
|
|
145
|
+
- **V-Measure**: Harmonic mean (0-1)
|
|
146
|
+
|
|
147
|
+
## Package Structure
|
|
148
|
+
|
|
149
|
+
```
|
|
150
|
+
tedcheck/
|
|
151
|
+
├── __init__.py # Package initialization
|
|
152
|
+
├── config.py # Configuration class
|
|
153
|
+
├── features.py # Core UMAP functions
|
|
154
|
+
├── cli.py # Command-line interface
|
|
155
|
+
├── utils.py # Utility functions
|
|
156
|
+
├── exceptions.py # Custom exceptions
|
|
157
|
+
├── logger.py # Logging setup
|
|
158
|
+
└── configs/ # Preset configurations
|
|
159
|
+
├── default.json
|
|
160
|
+
├── ecommerce.json
|
|
161
|
+
└── saas.json
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## API Reference
|
|
165
|
+
|
|
166
|
+
### `Config` Class
|
|
167
|
+
```python
|
|
168
|
+
from tedcheck import Config
|
|
169
|
+
|
|
170
|
+
config = Config(
|
|
171
|
+
user_id_col='id',
|
|
172
|
+
segment_col='tier',
|
|
173
|
+
skip_time=True
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
# Validate columns
|
|
177
|
+
missing = config.validate_columns(df)
|
|
178
|
+
|
|
179
|
+
# Load from file
|
|
180
|
+
config = Config.from_json('config.json')
|
|
181
|
+
|
|
182
|
+
# Load preset
|
|
183
|
+
config = Config.from_preset('ecommerce')
|
|
184
|
+
|
|
185
|
+
# Save config
|
|
186
|
+
config.to_json('my_config.json')
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### `apply_umap_reduction()` Function
|
|
190
|
+
```python
|
|
191
|
+
from tedcheck import apply_umap_reduction, Config
|
|
192
|
+
|
|
193
|
+
df_umap, embedding, num_cols = apply_umap_reduction(
|
|
194
|
+
df,
|
|
195
|
+
config=config
|
|
196
|
+
)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
### `calculate_umap_metrics()` Function
|
|
200
|
+
```python
|
|
201
|
+
from tedcheck import calculate_umap_metrics
|
|
202
|
+
|
|
203
|
+
metrics = calculate_umap_metrics(
|
|
204
|
+
df_umap,
|
|
205
|
+
embedding,
|
|
206
|
+
df,
|
|
207
|
+
config=config
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
print(metrics['silhouette_avg'])
|
|
211
|
+
print(metrics['feature_importance'])
|
|
212
|
+
print(metrics['purity_metrics'])
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Troubleshooting
|
|
216
|
+
|
|
217
|
+
### Missing Column Error
|
|
218
|
+
```bash
|
|
219
|
+
# Check available columns
|
|
220
|
+
python -c "import pandas as pd; print(pd.read_csv('data.csv').columns.tolist())"
|
|
221
|
+
|
|
222
|
+
# Use --skip-time if time column doesn't exist
|
|
223
|
+
tedcheck data.csv --skip-time
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
### Wrong Column Names
|
|
227
|
+
```bash
|
|
228
|
+
# Specify correct column names
|
|
229
|
+
tedcheck data.csv --user-id id --segment-col group --time-col month
|
|
230
|
+
```
|
|
231
|
+
|
|
232
|
+
### Memory Issues
|
|
233
|
+
```bash
|
|
234
|
+
# Use include_cols to select only important features
|
|
235
|
+
tedcheck data.csv --include-cols feature1,feature2,feature3
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
## Contributing
|
|
239
|
+
|
|
240
|
+
Contributions welcome! Please submit pull requests or issues on GitHub.
|
|
241
|
+
|
|
242
|
+
## License
|
|
243
|
+
|
|
244
|
+
MIT License - See LICENSE file for details
|
|
245
|
+
|
|
246
|
+
## Author
|
|
247
|
+
|
|
248
|
+
Tergel Munkhbaatar
|
|
249
|
+
|
|
250
|
+
## Version
|
|
251
|
+
|
|
252
|
+
0.1.0
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=45", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tedcheck"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "UMAP Segment Validation tool"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.8"
|
|
11
|
+
license = {text = "MIT"}
|
|
12
|
+
authors = [{name = "Tergel Munkhbaatar", email = "tergelitu@example.com"}]
|
|
13
|
+
classifiers = [
|
|
14
|
+
"Development Status :: 3 - Alpha",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.8",
|
|
19
|
+
"Programming Language :: Python :: 3.9",
|
|
20
|
+
"Programming Language :: Python :: 3.10",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.urls]
|
|
24
|
+
Homepage = "https://github.com/tergelitu/tedcheck"
|
|
25
|
+
Repository = "https://github.com/tergelitu/tedcheck.git"
|
|
26
|
+
|
|
27
|
+
[project.scripts]
|
|
28
|
+
tedcheck = "tedcheck.cli:main"
|
tedcheck-0.1.0/setup.cfg
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""TEDCHECK - UMAP Segment Validation Tool"""
|
|
2
|
+
|
|
3
|
+
__version__ = "0.1.0"
|
|
4
|
+
__author__ = "Tergel Munkhbaatar"
|
|
5
|
+
|
|
6
|
+
from .config import Config
|
|
7
|
+
from .features import (
|
|
8
|
+
apply_umap_reduction,
|
|
9
|
+
calculate_umap_metrics,
|
|
10
|
+
plot_umap_by_segment,
|
|
11
|
+
)
|
|
12
|
+
from .utils import (
|
|
13
|
+
get_color_pool,
|
|
14
|
+
assign_colors_to_segments,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
from .exceptions import (
|
|
18
|
+
TEDCheckException,
|
|
19
|
+
ConfigError,
|
|
20
|
+
MissingColumnsError,
|
|
21
|
+
InvalidPresetError,
|
|
22
|
+
DataValidationError,
|
|
23
|
+
)
|
|
24
|
+
from .logger import setup_logger, get_logger
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"Config",
|
|
28
|
+
"apply_umap_reduction",
|
|
29
|
+
"calculate_umap_metrics",
|
|
30
|
+
"plot_umap_by_segment",
|
|
31
|
+
"get_color_pool",
|
|
32
|
+
"assign_colors_to_segments",
|
|
33
|
+
"TEDCheckException",
|
|
34
|
+
"ConfigError",
|
|
35
|
+
"MissingColumnsError",
|
|
36
|
+
"InvalidPresetError",
|
|
37
|
+
"DataValidationError",
|
|
38
|
+
"setup_logger",
|
|
39
|
+
"get_logger",
|
|
40
|
+
]
|