tscli-darts 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tscli_darts-0.1.0/PKG-INFO +204 -0
- tscli_darts-0.1.0/README.md +166 -0
- tscli_darts-0.1.0/pyproject.toml +69 -0
- tscli_darts-0.1.0/setup.cfg +4 -0
- tscli_darts-0.1.0/src/tscli/__init__.py +5 -0
- tscli_darts-0.1.0/src/tscli/__main__.py +5 -0
- tscli_darts-0.1.0/src/tscli/analysis.py +63 -0
- tscli_darts-0.1.0/src/tscli/data.py +64 -0
- tscli_darts-0.1.0/src/tscli/forecasting.py +531 -0
- tscli_darts-0.1.0/src/tscli/main.py +422 -0
- tscli_darts-0.1.0/src/tscli/preprocessing.py +113 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/PKG-INFO +204 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/SOURCES.txt +15 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/dependency_links.txt +1 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/entry_points.txt +2 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/requires.txt +17 -0
- tscli_darts-0.1.0/src/tscli_darts.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tscli-darts
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A DARTS-first CLI for time series analysis, preprocessing, and forecasting
|
|
5
|
+
Author: Senhores do Tempo
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Senhores-do-Tempo/tscli
|
|
8
|
+
Project-URL: Repository, https://github.com/Senhores-do-Tempo/tscli
|
|
9
|
+
Project-URL: Issues, https://github.com/Senhores-do-Tempo/tscli/issues
|
|
10
|
+
Keywords: time-series,forecasting,darts,cli,analytics
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: Operating System :: OS Independent
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
21
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown
|
|
24
|
+
Requires-Dist: darts>=0.30.0
|
|
25
|
+
Requires-Dist: matplotlib>=3.9.0
|
|
26
|
+
Requires-Dist: numpy>=1.26.0
|
|
27
|
+
Requires-Dist: pandas>=2.2.0
|
|
28
|
+
Requires-Dist: rich>=13.7.0
|
|
29
|
+
Requires-Dist: tqdm>=4.66.0
|
|
30
|
+
Requires-Dist: typer>=0.12.3
|
|
31
|
+
Provides-Extra: classical
|
|
32
|
+
Requires-Dist: pytorch-lightning>=2.6.0; extra == "classical"
|
|
33
|
+
Provides-Extra: autoarima
|
|
34
|
+
Requires-Dist: statsforecast>=2.0.0; extra == "autoarima"
|
|
35
|
+
Provides-Extra: full
|
|
36
|
+
Requires-Dist: pytorch-lightning>=2.6.0; extra == "full"
|
|
37
|
+
Requires-Dist: statsforecast>=2.0.0; extra == "full"
|
|
38
|
+
|
|
39
|
+
# tscli
|
|
40
|
+
|
|
41
|
+
`tscli` is a command-line tool for time series analysis and forecasting built around [DARTS](https://unit8co.github.io/darts/).
|
|
42
|
+
|
|
43
|
+
It is designed for a simple workflow:
|
|
44
|
+
|
|
45
|
+
- load a CSV
|
|
46
|
+
- clean common formatting issues
|
|
47
|
+
- inspect and analyze the series
|
|
48
|
+
- compare models on a holdout window
|
|
49
|
+
- generate and export forecasts
|
|
50
|
+
|
|
51
|
+
## What It Does
|
|
52
|
+
|
|
53
|
+
- works directly from CSV files
|
|
54
|
+
- detects and fixes common time-series formatting issues
|
|
55
|
+
- supports a clean `date + target` workflow
|
|
56
|
+
- benchmarks multiple models with `MAE`, `RMSE`, and `MAPE`
|
|
57
|
+
- exports cleaned datasets, forecasts, benchmark tables, and plots
|
|
58
|
+
- provides an interactive terminal mode
|
|
59
|
+
|
|
60
|
+
## Install
|
|
61
|
+
|
|
62
|
+
Local development install:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
pip install -e .
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
When published to PyPI, the standard install will be:
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
pip install tscli-darts
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
Optional extras:
|
|
75
|
+
|
|
76
|
+
- Classical DARTS models such as `theta` and `exponential-smoothing`
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install -e .[classical]
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
- AutoARIMA support
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
pip install -e .[autoarima]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
- Everything
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
pip install -e .[full]
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
## Typical Workflow
|
|
95
|
+
|
|
96
|
+
### 1. Inspect the raw CSV
|
|
97
|
+
|
|
98
|
+
Use this first to confirm the time column, target column, inferred frequency, and any preprocessing fixes.
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
python -m tscli inspect .\sales.csv --time-col Month --target-col Sales
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### 2. Clean the dataset
|
|
105
|
+
|
|
106
|
+
If the CSV has shorthand dates, duplicate timestamps, formatted numeric values, or other simple issues, save a normalized version.
|
|
107
|
+
|
|
108
|
+
```bash
|
|
109
|
+
python -m tscli clean .\sales.csv --time-col Month --target-col Sales --output .\cleaned_sales.csv
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 3. Analyze the time series
|
|
113
|
+
|
|
114
|
+
Get quick descriptive statistics and recent observations before forecasting.
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
python -m tscli analyze .\cleaned_sales.csv --time-col Month --target-col Sales
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### 4. Benchmark models
|
|
121
|
+
|
|
122
|
+
Run several models against a holdout window, compare metrics, and optionally export the score table, forecast, and plot.
|
|
123
|
+
|
|
124
|
+
```bash
|
|
125
|
+
python -m tscli benchmark .\cleaned_sales.csv --time-col Month --target-col Sales --horizon 12 --models all --scores-output .\scores.csv --forecast-output .\best_forecast.csv --plot-output .\benchmark.png
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### 5. Generate a forecast
|
|
129
|
+
|
|
130
|
+
Forecast future periods with a chosen model and optionally export the forecast and chart.
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
python -m tscli forecast .\cleaned_sales.csv --time-col Month --target-col Sales --model naive-drift --horizon 12 --output .\forecast.csv --plot-output .\forecast.png
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
### 6. Use interactive mode
|
|
137
|
+
|
|
138
|
+
Run the full workflow from a menu-driven terminal interface.
|
|
139
|
+
|
|
140
|
+
```bash
|
|
141
|
+
python -m tscli interactive .\cleaned_sales.csv --time-col Month --target-col Sales
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Commands
|
|
145
|
+
|
|
146
|
+
- `inspect`: summarize the dataset and show preprocessing fixes
|
|
147
|
+
- `clean`: normalize and save a cleaned CSV
|
|
148
|
+
- `analyze`: print descriptive statistics and recent observations
|
|
149
|
+
- `forecast`: generate future values from one model
|
|
150
|
+
- `benchmark`: compare several models on a holdout window
|
|
151
|
+
- `models`: list supported forecasting models
|
|
152
|
+
- `interactive`: launch the terminal menu workflow
|
|
153
|
+
|
|
154
|
+
## Forecasting Models
|
|
155
|
+
|
|
156
|
+
Supported models:
|
|
157
|
+
|
|
158
|
+
- `naive-last`
|
|
159
|
+
- `naive-drift`
|
|
160
|
+
- `naive-seasonal`
|
|
161
|
+
- `moving-average`
|
|
162
|
+
- `weighted-moving-average`
|
|
163
|
+
- `exp-smoothing`
|
|
164
|
+
- `seasonal-average`
|
|
165
|
+
- `seasonal-median`
|
|
166
|
+
- `linear-trend`
|
|
167
|
+
- `quadratic-trend`
|
|
168
|
+
- `arima`
|
|
169
|
+
- `sarima`
|
|
170
|
+
- `theta`
|
|
171
|
+
- `exponential-smoothing`
|
|
172
|
+
- `auto-arima`
|
|
173
|
+
|
|
174
|
+
## Example Dataset
|
|
175
|
+
|
|
176
|
+
The bundled `examples/sales.csv` shows a shorthand monthly sales format like:
|
|
177
|
+
|
|
178
|
+
```csv
|
|
179
|
+
Month,Sales
|
|
180
|
+
1-01,266.0
|
|
181
|
+
1-02,145.9
|
|
182
|
+
1-03,183.1
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
`tscli` will detect and normalize that `Month` column into proper first-of-month datetimes.
|
|
186
|
+
|
|
187
|
+
## Notes
|
|
188
|
+
|
|
189
|
+
- The CSV should include a target column and optionally a time column.
|
|
190
|
+
- If no time column is provided, `tscli` builds a synthetic integer index.
|
|
191
|
+
- If DARTS cannot infer a frequency automatically, forecasting still uses the ordered observations.
|
|
192
|
+
- Some classical DARTS models depend on optional libraries; when unavailable, `forecast` explains the missing requirement and `benchmark` skips the model.
|
|
193
|
+
- `arima` and `sarima` remain DARTS-first models, with fallback behavior only when the DARTS classical path is unavailable.
|
|
194
|
+
|
|
195
|
+
## Packaging
|
|
196
|
+
|
|
197
|
+
To build distributable artifacts locally:
|
|
198
|
+
|
|
199
|
+
```bash
|
|
200
|
+
python -m pip install build
|
|
201
|
+
python -m build
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
This will generate source and wheel distributions in `dist/`.
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# tscli
|
|
2
|
+
|
|
3
|
+
`tscli` is a command-line tool for time series analysis and forecasting built around [DARTS](https://unit8co.github.io/darts/).
|
|
4
|
+
|
|
5
|
+
It is designed for a simple workflow:
|
|
6
|
+
|
|
7
|
+
- load a CSV
|
|
8
|
+
- clean common formatting issues
|
|
9
|
+
- inspect and analyze the series
|
|
10
|
+
- compare models on a holdout window
|
|
11
|
+
- generate and export forecasts
|
|
12
|
+
|
|
13
|
+
## What It Does
|
|
14
|
+
|
|
15
|
+
- works directly from CSV files
|
|
16
|
+
- detects and fixes common time-series formatting issues
|
|
17
|
+
- supports a clean `date + target` workflow
|
|
18
|
+
- benchmarks multiple models with `MAE`, `RMSE`, and `MAPE`
|
|
19
|
+
- exports cleaned datasets, forecasts, benchmark tables, and plots
|
|
20
|
+
- provides an interactive terminal mode
|
|
21
|
+
|
|
22
|
+
## Install
|
|
23
|
+
|
|
24
|
+
Local development install:
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install -e .
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
When published to PyPI, the standard install will be:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
pip install tscli-darts
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Optional extras:
|
|
37
|
+
|
|
38
|
+
- Classical DARTS models such as `theta` and `exponential-smoothing`
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install -e .[classical]
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
- AutoARIMA support
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install -e .[autoarima]
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
- Everything
|
|
51
|
+
|
|
52
|
+
```bash
|
|
53
|
+
pip install -e .[full]
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Typical Workflow
|
|
57
|
+
|
|
58
|
+
### 1. Inspect the raw CSV
|
|
59
|
+
|
|
60
|
+
Use this first to confirm the time column, target column, inferred frequency, and any preprocessing fixes.
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
python -m tscli inspect .\sales.csv --time-col Month --target-col Sales
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
### 2. Clean the dataset
|
|
67
|
+
|
|
68
|
+
If the CSV has shorthand dates, duplicate timestamps, formatted numeric values, or other simple issues, save a normalized version.
|
|
69
|
+
|
|
70
|
+
```bash
|
|
71
|
+
python -m tscli clean .\sales.csv --time-col Month --target-col Sales --output .\cleaned_sales.csv
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 3. Analyze the time series
|
|
75
|
+
|
|
76
|
+
Get quick descriptive statistics and recent observations before forecasting.
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
python -m tscli analyze .\cleaned_sales.csv --time-col Month --target-col Sales
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
### 4. Benchmark models
|
|
83
|
+
|
|
84
|
+
Run several models against a holdout window, compare metrics, and optionally export the score table, forecast, and plot.
|
|
85
|
+
|
|
86
|
+
```bash
|
|
87
|
+
python -m tscli benchmark .\cleaned_sales.csv --time-col Month --target-col Sales --horizon 12 --models all --scores-output .\scores.csv --forecast-output .\best_forecast.csv --plot-output .\benchmark.png
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 5. Generate a forecast
|
|
91
|
+
|
|
92
|
+
Forecast future periods with a chosen model and optionally export the forecast and chart.
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
python -m tscli forecast .\cleaned_sales.csv --time-col Month --target-col Sales --model naive-drift --horizon 12 --output .\forecast.csv --plot-output .\forecast.png
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### 6. Use interactive mode
|
|
99
|
+
|
|
100
|
+
Run the full workflow from a menu-driven terminal interface.
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
python -m tscli interactive .\cleaned_sales.csv --time-col Month --target-col Sales
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
## Commands
|
|
107
|
+
|
|
108
|
+
- `inspect`: summarize the dataset and show preprocessing fixes
|
|
109
|
+
- `clean`: normalize and save a cleaned CSV
|
|
110
|
+
- `analyze`: print descriptive statistics and recent observations
|
|
111
|
+
- `forecast`: generate future values from one model
|
|
112
|
+
- `benchmark`: compare several models on a holdout window
|
|
113
|
+
- `models`: list supported forecasting models
|
|
114
|
+
- `interactive`: launch the terminal menu workflow
|
|
115
|
+
|
|
116
|
+
## Forecasting Models
|
|
117
|
+
|
|
118
|
+
Supported models:
|
|
119
|
+
|
|
120
|
+
- `naive-last`
|
|
121
|
+
- `naive-drift`
|
|
122
|
+
- `naive-seasonal`
|
|
123
|
+
- `moving-average`
|
|
124
|
+
- `weighted-moving-average`
|
|
125
|
+
- `exp-smoothing`
|
|
126
|
+
- `seasonal-average`
|
|
127
|
+
- `seasonal-median`
|
|
128
|
+
- `linear-trend`
|
|
129
|
+
- `quadratic-trend`
|
|
130
|
+
- `arima`
|
|
131
|
+
- `sarima`
|
|
132
|
+
- `theta`
|
|
133
|
+
- `exponential-smoothing`
|
|
134
|
+
- `auto-arima`
|
|
135
|
+
|
|
136
|
+
## Example Dataset
|
|
137
|
+
|
|
138
|
+
The bundled `examples/sales.csv` shows a shorthand monthly sales format like:
|
|
139
|
+
|
|
140
|
+
```csv
|
|
141
|
+
Month,Sales
|
|
142
|
+
1-01,266.0
|
|
143
|
+
1-02,145.9
|
|
144
|
+
1-03,183.1
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
`tscli` will detect and normalize that `Month` column into proper first-of-month datetimes.
|
|
148
|
+
|
|
149
|
+
## Notes
|
|
150
|
+
|
|
151
|
+
- The CSV should include a target column and optionally a time column.
|
|
152
|
+
- If no time column is provided, `tscli` builds a synthetic integer index.
|
|
153
|
+
- If DARTS cannot infer a frequency automatically, forecasting still uses the ordered observations.
|
|
154
|
+
- Some classical DARTS models depend on optional libraries; when unavailable, `forecast` explains the missing requirement and `benchmark` skips the model.
|
|
155
|
+
- `arima` and `sarima` remain DARTS-first models, with fallback behavior only when the DARTS classical path is unavailable.
|
|
156
|
+
|
|
157
|
+
## Packaging
|
|
158
|
+
|
|
159
|
+
To build distributable artifacts locally:
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
python -m pip install build
|
|
163
|
+
python -m build
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
This will generate source and wheel distributions in `dist/`.
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "tscli-darts"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "A DARTS-first CLI for time series analysis, preprocessing, and forecasting"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "MIT"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Senhores do Tempo" }
|
|
14
|
+
]
|
|
15
|
+
keywords = [
|
|
16
|
+
"time-series",
|
|
17
|
+
"forecasting",
|
|
18
|
+
"darts",
|
|
19
|
+
"cli",
|
|
20
|
+
"analytics",
|
|
21
|
+
]
|
|
22
|
+
classifiers = [
|
|
23
|
+
"Development Status :: 3 - Alpha",
|
|
24
|
+
"Intended Audience :: Developers",
|
|
25
|
+
"Intended Audience :: Science/Research",
|
|
26
|
+
"Operating System :: OS Independent",
|
|
27
|
+
"Programming Language :: Python :: 3",
|
|
28
|
+
"Programming Language :: Python :: 3.10",
|
|
29
|
+
"Programming Language :: Python :: 3.11",
|
|
30
|
+
"Programming Language :: Python :: 3.12",
|
|
31
|
+
"Programming Language :: Python :: 3.13",
|
|
32
|
+
"Topic :: Scientific/Engineering :: Information Analysis",
|
|
33
|
+
"Topic :: Software Development :: Libraries :: Python Modules",
|
|
34
|
+
]
|
|
35
|
+
dependencies = [
|
|
36
|
+
"darts>=0.30.0",
|
|
37
|
+
"matplotlib>=3.9.0",
|
|
38
|
+
"numpy>=1.26.0",
|
|
39
|
+
"pandas>=2.2.0",
|
|
40
|
+
"rich>=13.7.0",
|
|
41
|
+
"tqdm>=4.66.0",
|
|
42
|
+
"typer>=0.12.3",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
[project.urls]
|
|
46
|
+
Homepage = "https://github.com/Senhores-do-Tempo/tscli"
|
|
47
|
+
Repository = "https://github.com/Senhores-do-Tempo/tscli"
|
|
48
|
+
Issues = "https://github.com/Senhores-do-Tempo/tscli/issues"
|
|
49
|
+
|
|
50
|
+
[project.optional-dependencies]
|
|
51
|
+
classical = [
|
|
52
|
+
"pytorch-lightning>=2.6.0",
|
|
53
|
+
]
|
|
54
|
+
autoarima = [
|
|
55
|
+
"statsforecast>=2.0.0",
|
|
56
|
+
]
|
|
57
|
+
full = [
|
|
58
|
+
"pytorch-lightning>=2.6.0",
|
|
59
|
+
"statsforecast>=2.0.0",
|
|
60
|
+
]
|
|
61
|
+
|
|
62
|
+
[project.scripts]
|
|
63
|
+
tscli = "tscli.main:main"
|
|
64
|
+
|
|
65
|
+
[tool.setuptools]
|
|
66
|
+
package-dir = {"" = "src"}
|
|
67
|
+
|
|
68
|
+
[tool.setuptools.packages.find]
|
|
69
|
+
where = ["src"]
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
import pandas as pd
|
|
6
|
+
|
|
7
|
+
from tscli.data import LoadedSeries
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class SeriesSummary:
|
|
12
|
+
row_count: int
|
|
13
|
+
start: str
|
|
14
|
+
end: str
|
|
15
|
+
missing_target: int
|
|
16
|
+
mean: float
|
|
17
|
+
median: float
|
|
18
|
+
minimum: float
|
|
19
|
+
maximum: float
|
|
20
|
+
std_dev: float
|
|
21
|
+
inferred_frequency: str
|
|
22
|
+
trend_direction: str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def summarize_series(dataset: LoadedSeries) -> SeriesSummary:
|
|
26
|
+
frame = dataset.frame.copy()
|
|
27
|
+
target = frame[dataset.target_col]
|
|
28
|
+
|
|
29
|
+
inferred_frequency = "not available"
|
|
30
|
+
if dataset.time_col != "__index__":
|
|
31
|
+
inferred = pd.infer_freq(frame[dataset.time_col])
|
|
32
|
+
if inferred:
|
|
33
|
+
inferred_frequency = inferred
|
|
34
|
+
|
|
35
|
+
clean_target = target.dropna()
|
|
36
|
+
if clean_target.empty:
|
|
37
|
+
raise ValueError("The target series is empty after dropping missing values.")
|
|
38
|
+
|
|
39
|
+
trend_delta = clean_target.iloc[-1] - clean_target.iloc[0]
|
|
40
|
+
if trend_delta > 0:
|
|
41
|
+
trend_direction = "upward"
|
|
42
|
+
elif trend_delta < 0:
|
|
43
|
+
trend_direction = "downward"
|
|
44
|
+
else:
|
|
45
|
+
trend_direction = "flat"
|
|
46
|
+
|
|
47
|
+
return SeriesSummary(
|
|
48
|
+
row_count=len(frame),
|
|
49
|
+
start=str(frame[dataset.time_col].iloc[0]),
|
|
50
|
+
end=str(frame[dataset.time_col].iloc[-1]),
|
|
51
|
+
missing_target=int(target.isna().sum()),
|
|
52
|
+
mean=float(clean_target.mean()),
|
|
53
|
+
median=float(clean_target.median()),
|
|
54
|
+
minimum=float(clean_target.min()),
|
|
55
|
+
maximum=float(clean_target.max()),
|
|
56
|
+
std_dev=float(clean_target.std(ddof=0)),
|
|
57
|
+
inferred_frequency=inferred_frequency,
|
|
58
|
+
trend_direction=trend_direction,
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def recent_observations(dataset: LoadedSeries, rows: int = 5) -> pd.DataFrame:
|
|
63
|
+
return dataset.frame[[dataset.time_col, dataset.target_col]].tail(rows).copy()
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
import pandas as pd
|
|
7
|
+
|
|
8
|
+
from tscli.preprocessing import (
|
|
9
|
+
PreprocessingReport,
|
|
10
|
+
clean_numeric_column,
|
|
11
|
+
finalize_time_series,
|
|
12
|
+
normalize_columns,
|
|
13
|
+
parse_time_column,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class LoadedSeries:
|
|
19
|
+
source: Path
|
|
20
|
+
frame: pd.DataFrame
|
|
21
|
+
time_col: str
|
|
22
|
+
target_col: str
|
|
23
|
+
report: PreprocessingReport
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def load_csv(csv_path: Path, time_col: str | None, target_col: str) -> LoadedSeries:
|
|
27
|
+
frame = pd.read_csv(csv_path)
|
|
28
|
+
report = PreprocessingReport()
|
|
29
|
+
frame = normalize_columns(frame, report)
|
|
30
|
+
if target_col not in frame.columns:
|
|
31
|
+
raise ValueError(f"Target column '{target_col}' was not found in the CSV.")
|
|
32
|
+
|
|
33
|
+
resolved_time_col = time_col
|
|
34
|
+
if resolved_time_col is None:
|
|
35
|
+
for candidate in ("date", "datetime", "timestamp", "ds", "time"):
|
|
36
|
+
if candidate in frame.columns:
|
|
37
|
+
resolved_time_col = candidate
|
|
38
|
+
break
|
|
39
|
+
|
|
40
|
+
if resolved_time_col is not None:
|
|
41
|
+
if resolved_time_col not in frame.columns:
|
|
42
|
+
raise ValueError(f"Time column '{resolved_time_col}' was not found in the CSV.")
|
|
43
|
+
frame = parse_time_column(frame, resolved_time_col, report)
|
|
44
|
+
if frame[resolved_time_col].isna().any():
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Time column '{resolved_time_col}' contains values that could not be parsed as datetime."
|
|
47
|
+
)
|
|
48
|
+
else:
|
|
49
|
+
resolved_time_col = "__index__"
|
|
50
|
+
frame[resolved_time_col] = pd.RangeIndex(start=0, stop=len(frame), step=1)
|
|
51
|
+
report.add_fix("Created a synthetic integer time index because no time column was provided.")
|
|
52
|
+
|
|
53
|
+
frame = clean_numeric_column(frame, target_col, report)
|
|
54
|
+
if frame[target_col].isna().all():
|
|
55
|
+
raise ValueError(f"Target column '{target_col}' does not contain numeric values.")
|
|
56
|
+
frame = finalize_time_series(frame, resolved_time_col, target_col, report)
|
|
57
|
+
|
|
58
|
+
return LoadedSeries(
|
|
59
|
+
source=csv_path,
|
|
60
|
+
frame=frame,
|
|
61
|
+
time_col=resolved_time_col,
|
|
62
|
+
target_col=target_col,
|
|
63
|
+
report=report,
|
|
64
|
+
)
|