tsagentkit 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tsagentkit/__init__.py +126 -0
  2. tsagentkit/anomaly/__init__.py +130 -0
  3. tsagentkit/backtest/__init__.py +48 -0
  4. tsagentkit/backtest/engine.py +788 -0
  5. tsagentkit/backtest/metrics.py +244 -0
  6. tsagentkit/backtest/report.py +342 -0
  7. tsagentkit/calibration/__init__.py +136 -0
  8. tsagentkit/contracts/__init__.py +133 -0
  9. tsagentkit/contracts/errors.py +275 -0
  10. tsagentkit/contracts/results.py +418 -0
  11. tsagentkit/contracts/schema.py +44 -0
  12. tsagentkit/contracts/task_spec.py +300 -0
  13. tsagentkit/covariates/__init__.py +340 -0
  14. tsagentkit/eval/__init__.py +285 -0
  15. tsagentkit/features/__init__.py +20 -0
  16. tsagentkit/features/covariates.py +328 -0
  17. tsagentkit/features/extra/__init__.py +5 -0
  18. tsagentkit/features/extra/native.py +179 -0
  19. tsagentkit/features/factory.py +187 -0
  20. tsagentkit/features/matrix.py +159 -0
  21. tsagentkit/features/tsfeatures_adapter.py +115 -0
  22. tsagentkit/features/versioning.py +203 -0
  23. tsagentkit/hierarchy/__init__.py +39 -0
  24. tsagentkit/hierarchy/aggregation.py +62 -0
  25. tsagentkit/hierarchy/evaluator.py +400 -0
  26. tsagentkit/hierarchy/reconciliation.py +232 -0
  27. tsagentkit/hierarchy/structure.py +453 -0
  28. tsagentkit/models/__init__.py +182 -0
  29. tsagentkit/models/adapters/__init__.py +83 -0
  30. tsagentkit/models/adapters/base.py +321 -0
  31. tsagentkit/models/adapters/chronos.py +387 -0
  32. tsagentkit/models/adapters/moirai.py +256 -0
  33. tsagentkit/models/adapters/registry.py +171 -0
  34. tsagentkit/models/adapters/timesfm.py +440 -0
  35. tsagentkit/models/baselines.py +207 -0
  36. tsagentkit/models/sktime.py +307 -0
  37. tsagentkit/monitoring/__init__.py +51 -0
  38. tsagentkit/monitoring/alerts.py +302 -0
  39. tsagentkit/monitoring/coverage.py +203 -0
  40. tsagentkit/monitoring/drift.py +330 -0
  41. tsagentkit/monitoring/report.py +214 -0
  42. tsagentkit/monitoring/stability.py +275 -0
  43. tsagentkit/monitoring/triggers.py +423 -0
  44. tsagentkit/qa/__init__.py +347 -0
  45. tsagentkit/router/__init__.py +37 -0
  46. tsagentkit/router/bucketing.py +489 -0
  47. tsagentkit/router/fallback.py +132 -0
  48. tsagentkit/router/plan.py +23 -0
  49. tsagentkit/router/router.py +271 -0
  50. tsagentkit/series/__init__.py +26 -0
  51. tsagentkit/series/alignment.py +206 -0
  52. tsagentkit/series/dataset.py +449 -0
  53. tsagentkit/series/sparsity.py +261 -0
  54. tsagentkit/series/validation.py +393 -0
  55. tsagentkit/serving/__init__.py +39 -0
  56. tsagentkit/serving/orchestration.py +943 -0
  57. tsagentkit/serving/packaging.py +73 -0
  58. tsagentkit/serving/provenance.py +317 -0
  59. tsagentkit/serving/tsfm_cache.py +214 -0
  60. tsagentkit/skill/README.md +135 -0
  61. tsagentkit/skill/__init__.py +8 -0
  62. tsagentkit/skill/recipes.md +429 -0
  63. tsagentkit/skill/tool_map.md +21 -0
  64. tsagentkit/time/__init__.py +134 -0
  65. tsagentkit/utils/__init__.py +20 -0
  66. tsagentkit/utils/quantiles.py +83 -0
  67. tsagentkit/utils/signature.py +47 -0
  68. tsagentkit/utils/temporal.py +41 -0
  69. tsagentkit-1.0.2.dist-info/METADATA +371 -0
  70. tsagentkit-1.0.2.dist-info/RECORD +72 -0
  71. tsagentkit-1.0.2.dist-info/WHEEL +4 -0
  72. tsagentkit-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,271 @@
1
+ """Deterministic routing logic aligned to the PRD PlanSpec."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import TYPE_CHECKING
6
+
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ from tsagentkit.contracts import PlanSpec, RouteDecision, RouterConfig, RouterThresholds, TaskSpec
11
+ from tsagentkit.time import normalize_pandas_freq
12
+
13
+ if TYPE_CHECKING:
14
+ from tsagentkit.qa import QAReport
15
+ from tsagentkit.series import TSDataset
16
+
17
+
18
+ def make_plan(
19
+ dataset: TSDataset,
20
+ task_spec: TaskSpec,
21
+ qa: QAReport | None = None,
22
+ router_config: RouterConfig | None = None,
23
+ use_tsfm: bool = True,
24
+ tsfm_preference: list[str] | None = None,
25
+ ) -> tuple[PlanSpec, RouteDecision]:
26
+ """Create a deterministic PlanSpec and RouteDecision for a dataset.
27
+
28
+ Returns:
29
+ Tuple of (PlanSpec, RouteDecision) containing the execution plan
30
+ and detailed routing decision information.
31
+ """
32
+ thresholds = (router_config or RouterConfig()).thresholds
33
+ stats, buckets = _compute_router_stats(dataset, task_spec, thresholds)
34
+
35
+ # Determine TSFM availability
36
+ available_tsfms: list[str] = []
37
+ if use_tsfm and _tsfm_allowed(dataset, thresholds):
38
+ from tsagentkit.models.adapters import AdapterRegistry
39
+
40
+ for name in (tsfm_preference or ["chronos", "moirai", "timesfm"]):
41
+ is_avail, _ = AdapterRegistry.check_availability(name)
42
+ if is_avail:
43
+ available_tsfms.append(name)
44
+
45
+ # Build candidate model list
46
+ if "intermittent" in buckets:
47
+ candidates = ["Croston", "Naive"]
48
+ elif "short_history" in buckets:
49
+ candidates = ["HistoricAverage", "Naive"]
50
+ else:
51
+ candidates = ["SeasonalNaive", "HistoricAverage", "Naive"]
52
+
53
+ if available_tsfms and "intermittent" not in buckets:
54
+ tsfm_models = [f"tsfm-{name}" for name in available_tsfms]
55
+ candidates = tsfm_models + candidates
56
+
57
+ plan = PlanSpec(
58
+ plan_name="default",
59
+ candidate_models=candidates,
60
+ use_static=True,
61
+ use_past=True,
62
+ use_future_known=True,
63
+ min_train_size=thresholds.min_train_size,
64
+ max_train_size=thresholds.max_points_per_series_for_tsfm,
65
+ interval_mode=task_spec.forecast_contract.interval_mode,
66
+ levels=task_spec.forecast_contract.levels,
67
+ quantiles=task_spec.forecast_contract.quantiles,
68
+ allow_drop_covariates=True,
69
+ allow_baseline=True,
70
+ )
71
+
72
+ # Build RouteDecision for audit trail
73
+ reasons = [
74
+ f"selected_models: {candidates}",
75
+ f"buckets: {buckets}",
76
+ f"tsfm_available: {bool(available_tsfms)}",
77
+ ]
78
+ if available_tsfms:
79
+ reasons.append(f"tsfm_models: {available_tsfms}")
80
+
81
+ route_decision = RouteDecision(
82
+ stats=stats,
83
+ buckets=buckets,
84
+ selected_plan=plan,
85
+ reasons=reasons,
86
+ )
87
+
88
+ return plan, route_decision
89
+
90
+
91
+ def get_model_for_series(
92
+ unique_id: str,
93
+ dataset: TSDataset,
94
+ task_spec: TaskSpec,
95
+ thresholds: RouterThresholds | None = None,
96
+ ) -> str:
97
+ """Get recommended model for a specific series."""
98
+ thresholds = thresholds or RouterThresholds()
99
+ series_df = dataset.get_series(unique_id)
100
+ stats, buckets = _compute_series_stats(series_df, task_spec, thresholds)
101
+
102
+ if "intermittent" in buckets:
103
+ return "Croston"
104
+ if "short_history" in buckets:
105
+ return "HistoricAverage"
106
+ return "SeasonalNaive"
107
+
108
+
109
+ def _compute_router_stats(
110
+ dataset: TSDataset,
111
+ task_spec: TaskSpec,
112
+ thresholds: RouterThresholds,
113
+ ) -> tuple[dict[str, float], list[str]]:
114
+ df = dataset.df
115
+ stats: dict[str, float] = {}
116
+ buckets: list[str] = []
117
+
118
+ lengths = df.groupby("unique_id").size()
119
+ min_len = int(lengths.min()) if not lengths.empty else 0
120
+ stats["min_series_length"] = float(min_len)
121
+ if min_len < thresholds.min_train_size:
122
+ buckets.append("short_history")
123
+
124
+ missing_ratio = _compute_missing_ratio(df, task_spec)
125
+ stats["missing_ratio"] = float(missing_ratio)
126
+ if missing_ratio > thresholds.max_missing_ratio:
127
+ buckets.append("sparse")
128
+
129
+ uid_col = task_spec.panel_contract.unique_id_col
130
+ ds_col = task_spec.panel_contract.ds_col
131
+ y_col = task_spec.panel_contract.y_col
132
+
133
+ intermittency = _compute_intermittency(df, thresholds, uid_col, ds_col, y_col)
134
+ stats.update(intermittency)
135
+ if intermittency.get("intermittent_series_ratio", 0.0) > 0:
136
+ buckets.append("intermittent")
137
+
138
+ season_conf = _seasonality_confidence(df, task_spec, uid_col, y_col)
139
+ stats["seasonality_confidence"] = float(season_conf)
140
+ if season_conf >= thresholds.min_seasonality_conf:
141
+ buckets.append("seasonal_candidate")
142
+
143
+ return stats, buckets
144
+
145
+
146
+ def _compute_series_stats(
147
+ series_df: pd.DataFrame,
148
+ task_spec: TaskSpec,
149
+ thresholds: RouterThresholds,
150
+ ) -> tuple[dict[str, float], list[str]]:
151
+ stats: dict[str, float] = {}
152
+ buckets: list[str] = []
153
+
154
+ length = len(series_df)
155
+ stats["series_length"] = float(length)
156
+ if length < thresholds.min_train_size:
157
+ buckets.append("short_history")
158
+
159
+ missing_ratio = _compute_missing_ratio(series_df, task_spec)
160
+ stats["missing_ratio"] = float(missing_ratio)
161
+ if missing_ratio > thresholds.max_missing_ratio:
162
+ buckets.append("sparse")
163
+
164
+ uid_col = task_spec.panel_contract.unique_id_col
165
+ ds_col = task_spec.panel_contract.ds_col
166
+ y_col = task_spec.panel_contract.y_col
167
+
168
+ intermittency = _compute_intermittency(series_df, thresholds, uid_col, ds_col, y_col)
169
+ stats.update(intermittency)
170
+ if intermittency.get("intermittent_series_ratio", 0.0) > 0:
171
+ buckets.append("intermittent")
172
+
173
+ season_conf = _seasonality_confidence(series_df, task_spec, uid_col, y_col)
174
+ stats["seasonality_confidence"] = float(season_conf)
175
+ if season_conf >= thresholds.min_seasonality_conf:
176
+ buckets.append("seasonal_candidate")
177
+
178
+ return stats, buckets
179
+
180
+
181
+ def _compute_missing_ratio(df: pd.DataFrame, task_spec: TaskSpec) -> float:
182
+ if df.empty:
183
+ return 0.0
184
+ uid_col = task_spec.panel_contract.unique_id_col
185
+ ds_col = task_spec.panel_contract.ds_col
186
+
187
+ ratios = []
188
+ for uid in df[uid_col].unique():
189
+ series = df[df[uid_col] == uid].sort_values(ds_col)
190
+ if series.empty:
191
+ continue
192
+ full_range = pd.date_range(
193
+ start=series[ds_col].min(),
194
+ end=series[ds_col].max(),
195
+ freq=normalize_pandas_freq(task_spec.freq),
196
+ )
197
+ missing = len(full_range) - len(series)
198
+ ratio = missing / max(len(full_range), 1)
199
+ ratios.append(ratio)
200
+ return float(np.mean(ratios)) if ratios else 0.0
201
+
202
+
203
+ def _compute_intermittency(
204
+ df: pd.DataFrame,
205
+ thresholds: RouterThresholds,
206
+ uid_col: str,
207
+ ds_col: str,
208
+ y_col: str,
209
+ ) -> dict[str, float]:
210
+ intermittent = 0
211
+ total = 0
212
+
213
+ for uid in df[uid_col].unique():
214
+ series = df[df[uid_col] == uid].sort_values(ds_col)
215
+ y = series[y_col].values
216
+ total += 1
217
+
218
+ non_zero_idx = np.where(y > 0)[0]
219
+ if len(non_zero_idx) <= 1:
220
+ adi = float("inf")
221
+ cv2 = float("inf")
222
+ else:
223
+ intervals = np.diff(non_zero_idx)
224
+ adi = float(np.mean(intervals)) if len(intervals) > 0 else float("inf")
225
+ non_zero_vals = y[non_zero_idx]
226
+ mean = np.mean(non_zero_vals) if len(non_zero_vals) > 0 else 0.0
227
+ std = np.std(non_zero_vals) if len(non_zero_vals) > 0 else 0.0
228
+ cv2 = float((std / mean) ** 2) if mean != 0 else float("inf")
229
+
230
+ if adi >= thresholds.max_intermittency_adi and cv2 >= thresholds.max_intermittency_cv2:
231
+ intermittent += 1
232
+
233
+ ratio = intermittent / total if total > 0 else 0.0
234
+ return {
235
+ "intermittent_series_ratio": ratio,
236
+ "intermittent_series_count": float(intermittent),
237
+ }
238
+
239
+
240
+ def _seasonality_confidence(
241
+ df: pd.DataFrame,
242
+ task_spec: TaskSpec,
243
+ uid_col: str,
244
+ y_col: str,
245
+ ) -> float:
246
+ season_length = task_spec.season_length
247
+ if season_length is None or season_length <= 1:
248
+ return 0.0
249
+ confs: list[float] = []
250
+ for uid in df[uid_col].unique():
251
+ series = df[df[uid_col] == uid][y_col].values
252
+ if len(series) <= season_length:
253
+ continue
254
+ series = series - np.mean(series)
255
+ denom = np.dot(series, series)
256
+ if denom == 0:
257
+ continue
258
+ lagged = np.roll(series, season_length)
259
+ corr = np.dot(series[season_length:], lagged[season_length:]) / denom
260
+ confs.append(abs(float(corr)))
261
+ return float(np.mean(confs)) if confs else 0.0
262
+
263
+
264
+ def _tsfm_allowed(dataset: TSDataset, thresholds: RouterThresholds) -> bool:
265
+ if dataset.n_series > thresholds.max_series_count_for_tsfm:
266
+ return False
267
+ max_points = dataset.df.groupby("unique_id").size().max()
268
+ return max_points <= thresholds.max_points_per_series_for_tsfm
269
+
270
+
271
+ __all__ = ["make_plan", "get_model_for_series", "RouteDecision"]
@@ -0,0 +1,26 @@
1
+ """Series module for tsagentkit.
2
+
3
+ Provides time series data structures and operations.
4
+ """
5
+
6
+ from .alignment import align_timezone, fill_gaps, resample_series
7
+ from .dataset import TSDataset, build_dataset
8
+ from .sparsity import SparsityClass, SparsityProfile, compute_sparsity_profile
9
+ from .validation import normalize_panel_columns, validate_contract
10
+
11
+ __all__ = [
12
+ # Dataset
13
+ "TSDataset",
14
+ "build_dataset",
15
+ # Sparsity
16
+ "SparsityProfile",
17
+ "SparsityClass",
18
+ "compute_sparsity_profile",
19
+ # Alignment
20
+ "align_timezone",
21
+ "resample_series",
22
+ "fill_gaps",
23
+ # Validation helpers (series layer)
24
+ "validate_contract",
25
+ "normalize_panel_columns",
26
+ ]
@@ -0,0 +1,206 @@
1
+ """Time alignment and resampling utilities.
2
+
3
+ Provides timezone unification and resampling for time series data.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from typing import Literal
9
+
10
+ import pandas as pd
11
+
12
+
13
+ def align_timezone(
14
+ df: pd.DataFrame,
15
+ target_tz: str | None = "UTC",
16
+ ds_col: str = "ds",
17
+ ) -> pd.DataFrame:
18
+ """Unify timezones across a dataset.
19
+
20
+ Converts all datetime values to the target timezone. Handles
21
+ timezone-aware and timezone-naive datetimes appropriately.
22
+
23
+ Args:
24
+ df: DataFrame with datetime column
25
+ target_tz: Target timezone (default: "UTC", None for naive)
26
+ ds_col: Name of datetime column (default: "ds")
27
+
28
+ Returns:
29
+ DataFrame with unified timezone
30
+
31
+ Raises:
32
+ ValueError: If ds_col is not found or not datetime
33
+ """
34
+ if ds_col not in df.columns:
35
+ raise ValueError(f"Column '{ds_col}' not found in DataFrame")
36
+
37
+ if not pd.api.types.is_datetime64_any_dtype(df[ds_col]):
38
+ raise ValueError(f"Column '{ds_col}' must be datetime type")
39
+
40
+ result = df.copy()
41
+
42
+ # Handle timezone
43
+ if target_tz is None:
44
+ # Make timezone-naive
45
+ if result[ds_col].dt.tz is not None:
46
+ result[ds_col] = result[ds_col].dt.tz_localize(None)
47
+ else:
48
+ # Convert to target timezone
49
+ if result[ds_col].dt.tz is None:
50
+ # Assume UTC for naive datetimes, then convert
51
+ result[ds_col] = result[ds_col].dt.tz_localize("UTC").dt.tz_convert(target_tz)
52
+ else:
53
+ result[ds_col] = result[ds_col].dt.tz_convert(target_tz)
54
+
55
+ return result
56
+
57
+
58
+ def resample_series(
59
+ df: pd.DataFrame,
60
+ freq: str,
61
+ agg_func: Literal["sum", "mean", "last", "first", "max", "min"] = "sum",
62
+ ds_col: str = "ds",
63
+ unique_id_col: str = "unique_id",
64
+ y_col: str = "y",
65
+ ) -> pd.DataFrame:
66
+ """Resample time series to a new frequency.
67
+
68
+ Resamples each series independently to the target frequency using
69
+ the specified aggregation function.
70
+
71
+ Args:
72
+ df: DataFrame with time series data
73
+ freq: Target frequency (pandas freq string, e.g., 'D', 'H', 'M')
74
+ agg_func: Aggregation function (default: "sum")
75
+ ds_col: Name of datetime column (default: "ds")
76
+ unique_id_col: Name of series ID column (default: "unique_id")
77
+ y_col: Name of target column (default: "y")
78
+
79
+ Returns:
80
+ Resampled DataFrame
81
+
82
+ Raises:
83
+ ValueError: If required columns not found
84
+ """
85
+ required_cols = {ds_col, unique_id_col, y_col}
86
+ missing = required_cols - set(df.columns)
87
+ if missing:
88
+ raise ValueError(f"Missing required columns: {missing}")
89
+
90
+ # Ensure datetime
91
+ if not pd.api.types.is_datetime64_any_dtype(df[ds_col]):
92
+ raise ValueError(f"Column '{ds_col}' must be datetime type")
93
+
94
+ # Resample each series
95
+ resampled_frames: list[pd.DataFrame] = []
96
+
97
+ for uid in df[unique_id_col].unique():
98
+ series = df[df[unique_id_col] == uid].set_index(ds_col).sort_index()
99
+
100
+ # Select numeric columns for resampling
101
+ numeric_cols = series.select_dtypes(include=["number"]).columns.tolist()
102
+
103
+ if not numeric_cols:
104
+ continue
105
+
106
+ # Resample
107
+ resampler = series[numeric_cols].resample(freq)
108
+
109
+ # Apply aggregation
110
+ if agg_func == "sum":
111
+ resampled = resampler.sum()
112
+ elif agg_func == "mean":
113
+ resampled = resampler.mean()
114
+ elif agg_func == "last":
115
+ resampled = resampler.last()
116
+ elif agg_func == "first":
117
+ resampled = resampler.first()
118
+ elif agg_func == "max":
119
+ resampled = resampler.max()
120
+ elif agg_func == "min":
121
+ resampled = resampler.min()
122
+ else:
123
+ raise ValueError(f"Unknown aggregation function: {agg_func}")
124
+
125
+ # Add back unique_id
126
+ resampled[unique_id_col] = uid
127
+ resampled = resampled.reset_index()
128
+
129
+ resampled_frames.append(resampled)
130
+
131
+ if not resampled_frames:
132
+ # Return empty DataFrame with correct structure
133
+ return pd.DataFrame(columns=[unique_id_col, ds_col, y_col])
134
+
135
+ # Combine all series
136
+ result = pd.concat(resampled_frames, ignore_index=True)
137
+
138
+ # Reorder columns to standard order
139
+ cols = [unique_id_col, ds_col] + [c for c in result.columns if c not in {unique_id_col, ds_col}]
140
+
141
+ return result[cols]
142
+
143
+
144
+ def fill_gaps(
145
+ df: pd.DataFrame,
146
+ freq: str,
147
+ method: Literal["interpolate", "forward", "backward", "zero"] = "interpolate",
148
+ ds_col: str = "ds",
149
+ unique_id_col: str = "unique_id",
150
+ ) -> pd.DataFrame:
151
+ """Fill gaps in time series data.
152
+
153
+ Identifies missing timestamps and fills them using the specified method.
154
+
155
+ Args:
156
+ df: DataFrame with time series data
157
+ freq: Expected frequency (pandas freq string)
158
+ method: Fill method (default: "interpolate")
159
+ ds_col: Name of datetime column (default: "ds")
160
+ unique_id_col: Name of series ID column (default: "unique_id")
161
+
162
+ Returns:
163
+ DataFrame with gaps filled
164
+ """
165
+ filled_frames: list[pd.DataFrame] = []
166
+
167
+ for uid in df[unique_id_col].unique():
168
+ series = df[df[unique_id_col] == uid].set_index(ds_col).sort_index()
169
+
170
+ # Create complete date range
171
+ full_range = pd.date_range(start=series.index.min(), end=series.index.max(), freq=freq)
172
+
173
+ # Reindex to include gaps
174
+ series_filled = series.reindex(full_range)
175
+
176
+ # Select only numeric columns for filling
177
+ numeric_cols = series_filled.select_dtypes(include=["number"]).columns.tolist()
178
+
179
+ # Fill missing values in numeric columns only
180
+ if method == "interpolate":
181
+ series_filled[numeric_cols] = series_filled[numeric_cols].interpolate(method="linear")
182
+ elif method == "forward":
183
+ series_filled[numeric_cols] = series_filled[numeric_cols].ffill()
184
+ elif method == "backward":
185
+ series_filled[numeric_cols] = series_filled[numeric_cols].bfill()
186
+ elif method == "zero":
187
+ series_filled[numeric_cols] = series_filled[numeric_cols].fillna(0)
188
+
189
+ # Add back unique_id
190
+ series_filled[unique_id_col] = uid
191
+ series_filled = series_filled.reset_index()
192
+ # Rename the datetime column (could be "index" or the original index name)
193
+ if "index" in series_filled.columns:
194
+ series_filled = series_filled.rename(columns={"index": ds_col})
195
+
196
+ filled_frames.append(series_filled)
197
+
198
+ if not filled_frames:
199
+ return df.copy()
200
+
201
+ result = pd.concat(filled_frames, ignore_index=True)
202
+
203
+ # Reorder columns
204
+ cols = [unique_id_col, ds_col] + [c for c in result.columns if c not in {unique_id_col, ds_col}]
205
+
206
+ return result[cols]