tscli-darts 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,204 @@
1
+ Metadata-Version: 2.4
2
+ Name: tscli-darts
3
+ Version: 0.1.0
4
+ Summary: A DARTS-first CLI for time series analysis, preprocessing, and forecasting
5
+ Author: Senhores do Tempo
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Senhores-do-Tempo/tscli
8
+ Project-URL: Repository, https://github.com/Senhores-do-Tempo/tscli
9
+ Project-URL: Issues, https://github.com/Senhores-do-Tempo/tscli/issues
10
+ Keywords: time-series,forecasting,darts,cli,analytics
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: Operating System :: OS Independent
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.10
23
+ Description-Content-Type: text/markdown
24
+ Requires-Dist: darts>=0.30.0
25
+ Requires-Dist: matplotlib>=3.9.0
26
+ Requires-Dist: numpy>=1.26.0
27
+ Requires-Dist: pandas>=2.2.0
28
+ Requires-Dist: rich>=13.7.0
29
+ Requires-Dist: tqdm>=4.66.0
30
+ Requires-Dist: typer>=0.12.3
31
+ Provides-Extra: classical
32
+ Requires-Dist: pytorch-lightning>=2.6.0; extra == "classical"
33
+ Provides-Extra: autoarima
34
+ Requires-Dist: statsforecast>=2.0.0; extra == "autoarima"
35
+ Provides-Extra: full
36
+ Requires-Dist: pytorch-lightning>=2.6.0; extra == "full"
37
+ Requires-Dist: statsforecast>=2.0.0; extra == "full"
38
+
39
+ # tscli
40
+
41
+ `tscli` is a command-line tool for time series analysis and forecasting built around [DARTS](https://unit8co.github.io/darts/).
42
+
43
+ It is designed for a simple workflow:
44
+
45
+ - load a CSV
46
+ - clean common formatting issues
47
+ - inspect and analyze the series
48
+ - compare models on a holdout window
49
+ - generate and export forecasts
50
+
51
+ ## What It Does
52
+
53
+ - works directly from CSV files
54
+ - detects and fixes common time-series formatting issues
55
+ - supports a clean `date + target` workflow
56
+ - benchmarks multiple models with `MAE`, `RMSE`, and `MAPE`
57
+ - exports cleaned datasets, forecasts, benchmark tables, and plots
58
+ - provides an interactive terminal mode
59
+
60
+ ## Install
61
+
62
+ Local development install:
63
+
64
+ ```bash
65
+ pip install -e .
66
+ ```
67
+
68
+ When published to PyPI, the standard install will be:
69
+
70
+ ```bash
71
+ pip install tscli-darts
72
+ ```
73
+
74
+ Optional extras:
75
+
76
+ - Classical DARTS models such as `theta` and `exponential-smoothing`
77
+
78
+ ```bash
79
+ pip install -e .[classical]
80
+ ```
81
+
82
+ - AutoARIMA support
83
+
84
+ ```bash
85
+ pip install -e .[autoarima]
86
+ ```
87
+
88
+ - Everything
89
+
90
+ ```bash
91
+ pip install -e .[full]
92
+ ```
93
+
94
+ ## Typical Workflow
95
+
96
+ ### 1. Inspect the raw CSV
97
+
98
+ Use this first to confirm the time column, target column, inferred frequency, and any preprocessing fixes.
99
+
100
+ ```bash
101
+ python -m tscli inspect .\sales.csv --time-col Month --target-col Sales
102
+ ```
103
+
104
+ ### 2. Clean the dataset
105
+
106
+ If the CSV has shorthand dates, duplicate timestamps, formatted numeric values, or other simple issues, save a normalized version.
107
+
108
+ ```bash
109
+ python -m tscli clean .\sales.csv --time-col Month --target-col Sales --output .\cleaned_sales.csv
110
+ ```
111
+
112
+ ### 3. Analyze the time series
113
+
114
+ Get quick descriptive statistics and recent observations before forecasting.
115
+
116
+ ```bash
117
+ python -m tscli analyze .\cleaned_sales.csv --time-col Month --target-col Sales
118
+ ```
119
+
120
+ ### 4. Benchmark models
121
+
122
+ Run several models against a holdout window, compare metrics, and optionally export the score table, forecast, and plot.
123
+
124
+ ```bash
125
+ python -m tscli benchmark .\cleaned_sales.csv --time-col Month --target-col Sales --horizon 12 --models all --scores-output .\scores.csv --forecast-output .\best_forecast.csv --plot-output .\benchmark.png
126
+ ```
127
+
128
+ ### 5. Generate a forecast
129
+
130
+ Forecast future periods with a chosen model and optionally export the forecast and chart.
131
+
132
+ ```bash
133
+ python -m tscli forecast .\cleaned_sales.csv --time-col Month --target-col Sales --model naive-drift --horizon 12 --output .\forecast.csv --plot-output .\forecast.png
134
+ ```
135
+
136
+ ### 6. Use interactive mode
137
+
138
+ Run the full workflow from a menu-driven terminal interface.
139
+
140
+ ```bash
141
+ python -m tscli interactive .\cleaned_sales.csv --time-col Month --target-col Sales
142
+ ```
143
+
144
+ ## Commands
145
+
146
+ - `inspect`: summarize the dataset and show preprocessing fixes
147
+ - `clean`: normalize and save a cleaned CSV
148
+ - `analyze`: print descriptive statistics and recent observations
149
+ - `forecast`: generate future values from one model
150
+ - `benchmark`: compare several models on a holdout window
151
+ - `models`: list supported forecasting models
152
+ - `interactive`: launch the terminal menu workflow
153
+
154
+ ## Forecasting Models
155
+
156
+ Supported models:
157
+
158
+ - `naive-last`
159
+ - `naive-drift`
160
+ - `naive-seasonal`
161
+ - `moving-average`
162
+ - `weighted-moving-average`
163
+ - `exp-smoothing`
164
+ - `seasonal-average`
165
+ - `seasonal-median`
166
+ - `linear-trend`
167
+ - `quadratic-trend`
168
+ - `arima`
169
+ - `sarima`
170
+ - `theta`
171
+ - `exponential-smoothing`
172
+ - `auto-arima`
173
+
174
+ ## Example Dataset
175
+
176
+ The bundled `examples/sales.csv` shows a shorthand monthly sales format like:
177
+
178
+ ```csv
179
+ Month,Sales
180
+ 1-01,266.0
181
+ 1-02,145.9
182
+ 1-03,183.1
183
+ ```
184
+
185
+ `tscli` will detect and normalize that `Month` column into proper first-of-month datetimes.
186
+
187
+ ## Notes
188
+
189
+ - The CSV should include a target column and optionally a time column.
190
+ - If no time column is provided, `tscli` builds a synthetic integer index.
191
+ - If DARTS cannot infer a frequency automatically, forecasting still uses the ordered observations.
192
+ - Some classical DARTS models depend on optional libraries; when unavailable, `forecast` explains the missing requirement and `benchmark` skips the model.
193
+ - `arima` and `sarima` remain DARTS-first models, with fallback behavior only when the DARTS classical path is unavailable.
194
+
195
+ ## Packaging
196
+
197
+ To build distributable artifacts locally:
198
+
199
+ ```bash
200
+ python -m pip install build
201
+ python -m build
202
+ ```
203
+
204
+ This will generate source and wheel distributions in `dist/`.
@@ -0,0 +1,166 @@
1
+ # tscli
2
+
3
+ `tscli` is a command-line tool for time series analysis and forecasting built around [DARTS](https://unit8co.github.io/darts/).
4
+
5
+ It is designed for a simple workflow:
6
+
7
+ - load a CSV
8
+ - clean common formatting issues
9
+ - inspect and analyze the series
10
+ - compare models on a holdout window
11
+ - generate and export forecasts
12
+
13
+ ## What It Does
14
+
15
+ - works directly from CSV files
16
+ - detects and fixes common time-series formatting issues
17
+ - supports a clean `date + target` workflow
18
+ - benchmarks multiple models with `MAE`, `RMSE`, and `MAPE`
19
+ - exports cleaned datasets, forecasts, benchmark tables, and plots
20
+ - provides an interactive terminal mode
21
+
22
+ ## Install
23
+
24
+ Local development install:
25
+
26
+ ```bash
27
+ pip install -e .
28
+ ```
29
+
30
+ When published to PyPI, the standard install will be:
31
+
32
+ ```bash
33
+ pip install tscli-darts
34
+ ```
35
+
36
+ Optional extras:
37
+
38
+ - Classical DARTS models such as `theta` and `exponential-smoothing`
39
+
40
+ ```bash
41
+ pip install -e .[classical]
42
+ ```
43
+
44
+ - AutoARIMA support
45
+
46
+ ```bash
47
+ pip install -e .[autoarima]
48
+ ```
49
+
50
+ - Everything
51
+
52
+ ```bash
53
+ pip install -e .[full]
54
+ ```
55
+
56
+ ## Typical Workflow
57
+
58
+ ### 1. Inspect the raw CSV
59
+
60
+ Use this first to confirm the time column, target column, inferred frequency, and any preprocessing fixes.
61
+
62
+ ```bash
63
+ python -m tscli inspect .\sales.csv --time-col Month --target-col Sales
64
+ ```
65
+
66
+ ### 2. Clean the dataset
67
+
68
+ If the CSV has shorthand dates, duplicate timestamps, formatted numeric values, or other simple issues, save a normalized version.
69
+
70
+ ```bash
71
+ python -m tscli clean .\sales.csv --time-col Month --target-col Sales --output .\cleaned_sales.csv
72
+ ```
73
+
74
+ ### 3. Analyze the time series
75
+
76
+ Get quick descriptive statistics and recent observations before forecasting.
77
+
78
+ ```bash
79
+ python -m tscli analyze .\cleaned_sales.csv --time-col Month --target-col Sales
80
+ ```
81
+
82
+ ### 4. Benchmark models
83
+
84
+ Run several models against a holdout window, compare metrics, and optionally export the score table, forecast, and plot.
85
+
86
+ ```bash
87
+ python -m tscli benchmark .\cleaned_sales.csv --time-col Month --target-col Sales --horizon 12 --models all --scores-output .\scores.csv --forecast-output .\best_forecast.csv --plot-output .\benchmark.png
88
+ ```
89
+
90
+ ### 5. Generate a forecast
91
+
92
+ Forecast future periods with a chosen model and optionally export the forecast and chart.
93
+
94
+ ```bash
95
+ python -m tscli forecast .\cleaned_sales.csv --time-col Month --target-col Sales --model naive-drift --horizon 12 --output .\forecast.csv --plot-output .\forecast.png
96
+ ```
97
+
98
+ ### 6. Use interactive mode
99
+
100
+ Run the full workflow from a menu-driven terminal interface.
101
+
102
+ ```bash
103
+ python -m tscli interactive .\cleaned_sales.csv --time-col Month --target-col Sales
104
+ ```
105
+
106
+ ## Commands
107
+
108
+ - `inspect`: summarize the dataset and show preprocessing fixes
109
+ - `clean`: normalize and save a cleaned CSV
110
+ - `analyze`: print descriptive statistics and recent observations
111
+ - `forecast`: generate future values from one model
112
+ - `benchmark`: compare several models on a holdout window
113
+ - `models`: list supported forecasting models
114
+ - `interactive`: launch the terminal menu workflow
115
+
116
+ ## Forecasting Models
117
+
118
+ Supported models:
119
+
120
+ - `naive-last`
121
+ - `naive-drift`
122
+ - `naive-seasonal`
123
+ - `moving-average`
124
+ - `weighted-moving-average`
125
+ - `exp-smoothing`
126
+ - `seasonal-average`
127
+ - `seasonal-median`
128
+ - `linear-trend`
129
+ - `quadratic-trend`
130
+ - `arima`
131
+ - `sarima`
132
+ - `theta`
133
+ - `exponential-smoothing`
134
+ - `auto-arima`
135
+
136
+ ## Example Dataset
137
+
138
+ The bundled `examples/sales.csv` shows a shorthand monthly sales format like:
139
+
140
+ ```csv
141
+ Month,Sales
142
+ 1-01,266.0
143
+ 1-02,145.9
144
+ 1-03,183.1
145
+ ```
146
+
147
+ `tscli` will detect and normalize that `Month` column into proper first-of-month datetimes.
148
+
149
+ ## Notes
150
+
151
+ - The CSV should include a target column and optionally a time column.
152
+ - If no time column is provided, `tscli` builds a synthetic integer index.
153
+ - If DARTS cannot infer a frequency automatically, forecasting still uses the ordered observations.
154
+ - Some classical DARTS models depend on optional libraries; when unavailable, `forecast` explains the missing requirement and `benchmark` skips the model.
155
+ - `arima` and `sarima` remain DARTS-first models, with fallback behavior only when the DARTS classical path is unavailable.
156
+
157
+ ## Packaging
158
+
159
+ To build distributable artifacts locally:
160
+
161
+ ```bash
162
+ python -m pip install build
163
+ python -m build
164
+ ```
165
+
166
+ This will generate source and wheel distributions in `dist/`.
@@ -0,0 +1,69 @@
1
+ [build-system]
2
+ requires = ["setuptools>=68", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "tscli-darts"
7
+ version = "0.1.0"
8
+ description = "A DARTS-first CLI for time series analysis, preprocessing, and forecasting"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ requires-python = ">=3.10"
12
+ authors = [
13
+ { name = "Senhores do Tempo" }
14
+ ]
15
+ keywords = [
16
+ "time-series",
17
+ "forecasting",
18
+ "darts",
19
+ "cli",
20
+ "analytics",
21
+ ]
22
+ classifiers = [
23
+ "Development Status :: 3 - Alpha",
24
+ "Intended Audience :: Developers",
25
+ "Intended Audience :: Science/Research",
26
+ "Operating System :: OS Independent",
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.10",
29
+ "Programming Language :: Python :: 3.11",
30
+ "Programming Language :: Python :: 3.12",
31
+ "Programming Language :: Python :: 3.13",
32
+ "Topic :: Scientific/Engineering :: Information Analysis",
33
+ "Topic :: Software Development :: Libraries :: Python Modules",
34
+ ]
35
+ dependencies = [
36
+ "darts>=0.30.0",
37
+ "matplotlib>=3.9.0",
38
+ "numpy>=1.26.0",
39
+ "pandas>=2.2.0",
40
+ "rich>=13.7.0",
41
+ "tqdm>=4.66.0",
42
+ "typer>=0.12.3",
43
+ ]
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/Senhores-do-Tempo/tscli"
47
+ Repository = "https://github.com/Senhores-do-Tempo/tscli"
48
+ Issues = "https://github.com/Senhores-do-Tempo/tscli/issues"
49
+
50
+ [project.optional-dependencies]
51
+ classical = [
52
+ "pytorch-lightning>=2.6.0",
53
+ ]
54
+ autoarima = [
55
+ "statsforecast>=2.0.0",
56
+ ]
57
+ full = [
58
+ "pytorch-lightning>=2.6.0",
59
+ "statsforecast>=2.0.0",
60
+ ]
61
+
62
+ [project.scripts]
63
+ tscli = "tscli.main:main"
64
+
65
+ [tool.setuptools]
66
+ package-dir = {"" = "src"}
67
+
68
+ [tool.setuptools.packages.find]
69
+ where = ["src"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,5 @@
1
+ """tscli package."""
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.1.0"
@@ -0,0 +1,5 @@
1
+ from tscli.main import main
2
+
3
+
4
+ if __name__ == "__main__":
5
+ main()
@@ -0,0 +1,63 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+
5
+ import pandas as pd
6
+
7
+ from tscli.data import LoadedSeries
8
+
9
+
10
+ @dataclass
11
+ class SeriesSummary:
12
+ row_count: int
13
+ start: str
14
+ end: str
15
+ missing_target: int
16
+ mean: float
17
+ median: float
18
+ minimum: float
19
+ maximum: float
20
+ std_dev: float
21
+ inferred_frequency: str
22
+ trend_direction: str
23
+
24
+
25
+ def summarize_series(dataset: LoadedSeries) -> SeriesSummary:
26
+ frame = dataset.frame.copy()
27
+ target = frame[dataset.target_col]
28
+
29
+ inferred_frequency = "not available"
30
+ if dataset.time_col != "__index__":
31
+ inferred = pd.infer_freq(frame[dataset.time_col])
32
+ if inferred:
33
+ inferred_frequency = inferred
34
+
35
+ clean_target = target.dropna()
36
+ if clean_target.empty:
37
+ raise ValueError("The target series is empty after dropping missing values.")
38
+
39
+ trend_delta = clean_target.iloc[-1] - clean_target.iloc[0]
40
+ if trend_delta > 0:
41
+ trend_direction = "upward"
42
+ elif trend_delta < 0:
43
+ trend_direction = "downward"
44
+ else:
45
+ trend_direction = "flat"
46
+
47
+ return SeriesSummary(
48
+ row_count=len(frame),
49
+ start=str(frame[dataset.time_col].iloc[0]),
50
+ end=str(frame[dataset.time_col].iloc[-1]),
51
+ missing_target=int(target.isna().sum()),
52
+ mean=float(clean_target.mean()),
53
+ median=float(clean_target.median()),
54
+ minimum=float(clean_target.min()),
55
+ maximum=float(clean_target.max()),
56
+ std_dev=float(clean_target.std(ddof=0)),
57
+ inferred_frequency=inferred_frequency,
58
+ trend_direction=trend_direction,
59
+ )
60
+
61
+
62
+ def recent_observations(dataset: LoadedSeries, rows: int = 5) -> pd.DataFrame:
63
+ return dataset.frame[[dataset.time_col, dataset.target_col]].tail(rows).copy()
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from pathlib import Path
5
+
6
+ import pandas as pd
7
+
8
+ from tscli.preprocessing import (
9
+ PreprocessingReport,
10
+ clean_numeric_column,
11
+ finalize_time_series,
12
+ normalize_columns,
13
+ parse_time_column,
14
+ )
15
+
16
+
17
+ @dataclass
18
+ class LoadedSeries:
19
+ source: Path
20
+ frame: pd.DataFrame
21
+ time_col: str
22
+ target_col: str
23
+ report: PreprocessingReport
24
+
25
+
26
+ def load_csv(csv_path: Path, time_col: str | None, target_col: str) -> LoadedSeries:
27
+ frame = pd.read_csv(csv_path)
28
+ report = PreprocessingReport()
29
+ frame = normalize_columns(frame, report)
30
+ if target_col not in frame.columns:
31
+ raise ValueError(f"Target column '{target_col}' was not found in the CSV.")
32
+
33
+ resolved_time_col = time_col
34
+ if resolved_time_col is None:
35
+ for candidate in ("date", "datetime", "timestamp", "ds", "time"):
36
+ if candidate in frame.columns:
37
+ resolved_time_col = candidate
38
+ break
39
+
40
+ if resolved_time_col is not None:
41
+ if resolved_time_col not in frame.columns:
42
+ raise ValueError(f"Time column '{resolved_time_col}' was not found in the CSV.")
43
+ frame = parse_time_column(frame, resolved_time_col, report)
44
+ if frame[resolved_time_col].isna().any():
45
+ raise ValueError(
46
+ f"Time column '{resolved_time_col}' contains values that could not be parsed as datetime."
47
+ )
48
+ else:
49
+ resolved_time_col = "__index__"
50
+ frame[resolved_time_col] = pd.RangeIndex(start=0, stop=len(frame), step=1)
51
+ report.add_fix("Created a synthetic integer time index because no time column was provided.")
52
+
53
+ frame = clean_numeric_column(frame, target_col, report)
54
+ if frame[target_col].isna().all():
55
+ raise ValueError(f"Target column '{target_col}' does not contain numeric values.")
56
+ frame = finalize_time_series(frame, resolved_time_col, target_col, report)
57
+
58
+ return LoadedSeries(
59
+ source=csv_path,
60
+ frame=frame,
61
+ time_col=resolved_time_col,
62
+ target_col=target_col,
63
+ report=report,
64
+ )