tsagentkit 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tsagentkit/__init__.py +126 -0
  2. tsagentkit/anomaly/__init__.py +130 -0
  3. tsagentkit/backtest/__init__.py +48 -0
  4. tsagentkit/backtest/engine.py +788 -0
  5. tsagentkit/backtest/metrics.py +244 -0
  6. tsagentkit/backtest/report.py +342 -0
  7. tsagentkit/calibration/__init__.py +136 -0
  8. tsagentkit/contracts/__init__.py +133 -0
  9. tsagentkit/contracts/errors.py +275 -0
  10. tsagentkit/contracts/results.py +418 -0
  11. tsagentkit/contracts/schema.py +44 -0
  12. tsagentkit/contracts/task_spec.py +300 -0
  13. tsagentkit/covariates/__init__.py +340 -0
  14. tsagentkit/eval/__init__.py +285 -0
  15. tsagentkit/features/__init__.py +20 -0
  16. tsagentkit/features/covariates.py +328 -0
  17. tsagentkit/features/extra/__init__.py +5 -0
  18. tsagentkit/features/extra/native.py +179 -0
  19. tsagentkit/features/factory.py +187 -0
  20. tsagentkit/features/matrix.py +159 -0
  21. tsagentkit/features/tsfeatures_adapter.py +115 -0
  22. tsagentkit/features/versioning.py +203 -0
  23. tsagentkit/hierarchy/__init__.py +39 -0
  24. tsagentkit/hierarchy/aggregation.py +62 -0
  25. tsagentkit/hierarchy/evaluator.py +400 -0
  26. tsagentkit/hierarchy/reconciliation.py +232 -0
  27. tsagentkit/hierarchy/structure.py +453 -0
  28. tsagentkit/models/__init__.py +182 -0
  29. tsagentkit/models/adapters/__init__.py +83 -0
  30. tsagentkit/models/adapters/base.py +321 -0
  31. tsagentkit/models/adapters/chronos.py +387 -0
  32. tsagentkit/models/adapters/moirai.py +256 -0
  33. tsagentkit/models/adapters/registry.py +171 -0
  34. tsagentkit/models/adapters/timesfm.py +440 -0
  35. tsagentkit/models/baselines.py +207 -0
  36. tsagentkit/models/sktime.py +307 -0
  37. tsagentkit/monitoring/__init__.py +51 -0
  38. tsagentkit/monitoring/alerts.py +302 -0
  39. tsagentkit/monitoring/coverage.py +203 -0
  40. tsagentkit/monitoring/drift.py +330 -0
  41. tsagentkit/monitoring/report.py +214 -0
  42. tsagentkit/monitoring/stability.py +275 -0
  43. tsagentkit/monitoring/triggers.py +423 -0
  44. tsagentkit/qa/__init__.py +347 -0
  45. tsagentkit/router/__init__.py +37 -0
  46. tsagentkit/router/bucketing.py +489 -0
  47. tsagentkit/router/fallback.py +132 -0
  48. tsagentkit/router/plan.py +23 -0
  49. tsagentkit/router/router.py +271 -0
  50. tsagentkit/series/__init__.py +26 -0
  51. tsagentkit/series/alignment.py +206 -0
  52. tsagentkit/series/dataset.py +449 -0
  53. tsagentkit/series/sparsity.py +261 -0
  54. tsagentkit/series/validation.py +393 -0
  55. tsagentkit/serving/__init__.py +39 -0
  56. tsagentkit/serving/orchestration.py +943 -0
  57. tsagentkit/serving/packaging.py +73 -0
  58. tsagentkit/serving/provenance.py +317 -0
  59. tsagentkit/serving/tsfm_cache.py +214 -0
  60. tsagentkit/skill/README.md +135 -0
  61. tsagentkit/skill/__init__.py +8 -0
  62. tsagentkit/skill/recipes.md +429 -0
  63. tsagentkit/skill/tool_map.md +21 -0
  64. tsagentkit/time/__init__.py +134 -0
  65. tsagentkit/utils/__init__.py +20 -0
  66. tsagentkit/utils/quantiles.py +83 -0
  67. tsagentkit/utils/signature.py +47 -0
  68. tsagentkit/utils/temporal.py +41 -0
  69. tsagentkit-1.0.2.dist-info/METADATA +371 -0
  70. tsagentkit-1.0.2.dist-info/RECORD +72 -0
  71. tsagentkit-1.0.2.dist-info/WHEEL +4 -0
  72. tsagentkit-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,788 @@
1
+ """Rolling window backtest engine.
2
+
3
+ Implements expanding and sliding window backtesting with strict
4
+ temporal integrity (no random splits allowed).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from collections.abc import Callable
10
+ from typing import TYPE_CHECKING, Any, Literal
11
+
12
+ import numpy as np
13
+ import pandas as pd
14
+
15
+ from tsagentkit.contracts import CVFrame, ESplitRandomForbidden
16
+ from tsagentkit.covariates import AlignedDataset, align_covariates
17
+ from tsagentkit.eval import evaluate_forecasts
18
+ from tsagentkit.utils import drop_future_rows, normalize_quantile_columns
19
+
20
+ from .report import (
21
+ BacktestReport,
22
+ SegmentMetrics,
23
+ SeriesMetrics,
24
+ TemporalMetrics,
25
+ WindowResult,
26
+ )
27
+
28
+ if TYPE_CHECKING:
29
+ from tsagentkit.contracts import TaskSpec
30
+ from tsagentkit.hierarchy import HierarchyStructure, ReconciliationMethod
31
+ from tsagentkit.router import PlanSpec
32
+ from tsagentkit.series import TSDataset
33
+
34
+
35
+ def rolling_backtest(
36
+ dataset: TSDataset,
37
+ spec: TaskSpec,
38
+ plan: PlanSpec,
39
+ fit_func: Callable[[TSDataset, PlanSpec], Any] | None = None,
40
+ predict_func: Callable[[TSDataset, Any, TaskSpec], Any] | None = None, # Also accepts covariates: Optional kwarg
41
+ n_windows: int = 5,
42
+ window_strategy: Literal["expanding", "sliding"] = "expanding",
43
+ min_train_size: int | None = None,
44
+ step_size: int | None = None,
45
+ reconcile: bool = True,
46
+ route_decision: Any | None = None,
47
+ ) -> BacktestReport:
48
+ """Execute rolling window backtest.
49
+
50
+ Performs temporal cross-validation using expanding or sliding windows.
51
+ Random splits are strictly forbidden.
52
+
53
+ For hierarchical datasets, applies forecast reconciliation to ensure
54
+ coherence across the hierarchy (enabled by default).
55
+
56
+ Args:
57
+ dataset: TSDataset with time series data
58
+ spec: Task specification
59
+ plan: Execution plan with model configuration
60
+ fit_func: Function to fit model: fit_func(train_dataset, plan) (defaults to models.fit)
61
+ predict_func: Function to predict: predict_func(train_dataset, model_artifact, spec, covariates=None)
62
+ (defaults to models.predict). Must accept covariates as optional kwarg.
63
+ n_windows: Number of backtest windows (default: 5)
64
+ window_strategy: "expanding" or "sliding" (default: "expanding")
65
+ min_train_size: Minimum training observations per series
66
+ step_size: Step size between windows (default: spec.horizon)
67
+ reconcile: Whether to reconcile forecasts for hierarchical data (default: True)
68
+ route_decision: Optional RouteDecision for including routing info in metadata (v1.0)
69
+
70
+ Returns:
71
+ BacktestReport with results from all windows
72
+
73
+ Raises:
74
+ ESplitRandomForbidden: If random splitting is detected
75
+ EBacktestInsufficientData: If not enough data for requested windows
76
+ """
77
+ # Resolve default model functions
78
+ if fit_func is None or predict_func is None:
79
+ from tsagentkit.models import fit as default_fit
80
+ from tsagentkit.models import predict as default_predict
81
+
82
+ fit_func = default_fit if fit_func is None else fit_func
83
+ predict_func = default_predict if predict_func is None else predict_func
84
+
85
+ # Drop future rows (y is null beyond last observed per series) before validation
86
+ df, _ = drop_future_rows(dataset.df)
87
+
88
+ # Validate temporal ordering (guardrail)
89
+ _validate_temporal_ordering(df)
90
+
91
+ # Set defaults
92
+ horizon = spec.horizon
93
+ step = step_size if step_size is not None else horizon
94
+ if step != horizon:
95
+ step = horizon
96
+ season_length = spec.season_length or 1
97
+
98
+ if min_train_size is None:
99
+ # Default: at least 2 seasons worth of data
100
+ min_train_size = max(season_length * 2, 10)
101
+
102
+ # Get date range
103
+ all_dates = pd.to_datetime(df["ds"].unique())
104
+ all_dates = sorted(all_dates)
105
+
106
+ if len(all_dates) < min_train_size + horizon * n_windows:
107
+ from tsagentkit.contracts import EBacktestInsufficientData
108
+
109
+ raise EBacktestInsufficientData(
110
+ f"Insufficient data for {n_windows} windows. "
111
+ f"Have {len(all_dates)} dates, need at least {min_train_size + horizon * n_windows}",
112
+ context={
113
+ "n_dates": len(all_dates),
114
+ "min_required": min_train_size + horizon * n_windows,
115
+ "n_windows_requested": n_windows,
116
+ },
117
+ )
118
+
119
+ # Generate windows
120
+ window_results: list[WindowResult] = []
121
+ series_metrics_agg: dict[str, list[dict]] = {}
122
+ cv_frames: list[pd.DataFrame] = []
123
+ errors: list[dict] = []
124
+
125
+ # Calculate window cutoffs
126
+ cutoffs = _generate_cutoffs(
127
+ all_dates,
128
+ n_windows=n_windows,
129
+ horizon=horizon,
130
+ step=step,
131
+ min_train_size=min_train_size,
132
+ strategy=window_strategy,
133
+ )
134
+
135
+ for window_idx, (cutoff_date, test_dates) in enumerate(cutoffs):
136
+ try:
137
+ # Split data
138
+ train_df = df[df["ds"] < cutoff_date].copy()
139
+ test_df = df[df["ds"].isin(test_dates)].copy()
140
+
141
+ if len(train_df) == 0 or len(test_df) == 0:
142
+ errors.append(
143
+ {
144
+ "window": window_idx,
145
+ "error": "Empty train or test set",
146
+ "cutoff": str(cutoff_date),
147
+ }
148
+ )
149
+ continue
150
+
151
+ from tsagentkit.series import TSDataset
152
+
153
+ train_ds = TSDataset.from_dataframe(train_df, spec, validate=False)
154
+ if dataset.is_hierarchical() and dataset.hierarchy:
155
+ train_ds = train_ds.with_hierarchy(dataset.hierarchy)
156
+ window_covariates = None
157
+ try:
158
+ window_covariates = _build_window_covariates(
159
+ dataset=dataset,
160
+ task_spec=spec,
161
+ cutoff_date=pd.Timestamp(cutoff_date),
162
+ panel_for_index=train_df,
163
+ )
164
+ except Exception as e:
165
+ errors.append(
166
+ {
167
+ "window": window_idx,
168
+ "stage": "covariate_alignment",
169
+ "error": str(e),
170
+ "type": type(e).__name__,
171
+ }
172
+ )
173
+ if not plan.allow_drop_covariates:
174
+ raise
175
+ if window_covariates is not None:
176
+ train_ds = train_ds.with_covariates(
177
+ window_covariates,
178
+ panel_with_covariates=dataset.panel_with_covariates,
179
+ covariate_bundle=dataset.covariate_bundle,
180
+ )
181
+
182
+ # Fit model (with fallback handled by fit_func)
183
+ try:
184
+ model = _call_with_optional_kwargs(
185
+ fit_func,
186
+ train_ds,
187
+ plan,
188
+ covariates=window_covariates,
189
+ )
190
+ except Exception as e:
191
+ errors.append(
192
+ {
193
+ "window": window_idx,
194
+ "stage": "fit",
195
+ "error": str(e),
196
+ "type": type(e).__name__,
197
+ "model": plan.candidate_models[0] if plan.candidate_models else None,
198
+ }
199
+ )
200
+ continue
201
+
202
+ model_name = getattr(model, "model_name", None)
203
+ if model_name is None and hasattr(model, "metadata"):
204
+ model_name = model.metadata.get("model_name") if model.metadata else None
205
+ if model_name is None:
206
+ model_name = plan.candidate_models[0] if plan.candidate_models else "model"
207
+
208
+ # Predict using training context
209
+ try:
210
+ predictions = _call_with_optional_kwargs(
211
+ predict_func,
212
+ train_ds,
213
+ model,
214
+ spec,
215
+ covariates=window_covariates,
216
+ )
217
+ except Exception as e:
218
+ errors.append(
219
+ {
220
+ "window": window_idx,
221
+ "stage": "predict",
222
+ "error": str(e),
223
+ "type": type(e).__name__,
224
+ "model": model_name,
225
+ }
226
+ )
227
+ continue
228
+ if isinstance(predictions, dict):
229
+ raise ValueError("predict_func must return DataFrame or ForecastResult")
230
+ if hasattr(predictions, "df"):
231
+ predictions = predictions.df
232
+
233
+ # Align predictions to test dates only
234
+ predictions = predictions.merge(
235
+ test_df[["unique_id", "ds"]],
236
+ on=["unique_id", "ds"],
237
+ how="inner",
238
+ )
239
+ predictions["model"] = model_name
240
+
241
+ cv_frame = predictions.merge(
242
+ test_df[["unique_id", "ds", "y"]],
243
+ on=["unique_id", "ds"],
244
+ how="left",
245
+ )
246
+ cv_frame["cutoff"] = pd.Timestamp(cutoff_date)
247
+ cv_frames.append(cv_frame)
248
+
249
+ # Apply reconciliation if hierarchical
250
+ if reconcile and dataset.is_hierarchical() and dataset.hierarchy:
251
+ predictions = _reconcile_forecast(
252
+ predictions,
253
+ dataset.hierarchy,
254
+ "bottom_up",
255
+ )
256
+ predictions = normalize_quantile_columns(predictions)
257
+ predictions["model"] = model_name
258
+
259
+ # Compute window + series metrics via eval utilities
260
+ merged_metrics = predictions.merge(
261
+ test_df[["unique_id", "ds", "y"]],
262
+ on=["unique_id", "ds"],
263
+ how="left",
264
+ )
265
+ metric_frame, summary = evaluate_forecasts(
266
+ merged_metrics,
267
+ train_df=train_df,
268
+ season_length=season_length,
269
+ id_col="unique_id",
270
+ ds_col="ds",
271
+ target_col="y",
272
+ model_col="model",
273
+ pred_col="yhat",
274
+ cutoff_col=None,
275
+ )
276
+
277
+ window_metrics = _summary_to_metrics(summary.df, model_name)
278
+ series_window_metrics = _series_metrics_from_frame(metric_frame.df, model_name)
279
+ for uid, metrics in series_window_metrics.items():
280
+ if not metrics:
281
+ continue
282
+ if uid not in series_metrics_agg:
283
+ series_metrics_agg[uid] = []
284
+ series_metrics_agg[uid].append(metrics)
285
+
286
+ # Create window result
287
+ window_result = WindowResult(
288
+ window_index=window_idx,
289
+ train_start=str(train_df["ds"].min()),
290
+ train_end=str(train_df["ds"].max()),
291
+ test_start=str(test_df["ds"].min()),
292
+ test_end=str(test_df["ds"].max()),
293
+ metrics=window_metrics,
294
+ num_series=test_df["unique_id"].nunique(),
295
+ num_observations=len(test_df),
296
+ )
297
+ window_results.append(window_result)
298
+
299
+ except Exception as e:
300
+ errors.append(
301
+ {
302
+ "window": window_idx,
303
+ "error": str(e),
304
+ "type": type(e).__name__,
305
+ }
306
+ )
307
+
308
+ # Aggregate metrics across all windows
309
+ aggregate_metrics = _aggregate_metrics(series_metrics_agg)
310
+
311
+ # Create series metrics
312
+ series_metrics: dict[str, SeriesMetrics] = {}
313
+ for uid, metrics_list in series_metrics_agg.items():
314
+ metric_names: set[str] = set()
315
+ for metrics in metrics_list:
316
+ metric_names.update(metrics.keys())
317
+ avg_metrics = {
318
+ k: np.mean([m[k] for m in metrics_list if not np.isnan(m.get(k, np.nan))])
319
+ for k in metric_names
320
+ }
321
+ series_metrics[uid] = SeriesMetrics(
322
+ series_id=uid,
323
+ metrics=avg_metrics,
324
+ num_windows=len(metrics_list),
325
+ )
326
+
327
+ # Compute segment metrics (by sparsity class) if sparsity profile available
328
+ segment_metrics = _compute_segment_metrics(series_metrics, dataset)
329
+
330
+ # Compute temporal metrics if datetime information available
331
+ temporal_metrics = _compute_temporal_metrics(series_metrics_agg, df)
332
+
333
+ return BacktestReport(
334
+ n_windows=len(window_results),
335
+ strategy=window_strategy,
336
+ window_results=window_results,
337
+ aggregate_metrics=aggregate_metrics,
338
+ series_metrics=series_metrics,
339
+ segment_metrics=segment_metrics,
340
+ temporal_metrics=temporal_metrics,
341
+ errors=errors,
342
+ metadata={
343
+ "horizon": horizon,
344
+ "step_size": step,
345
+ "min_train_size": min_train_size,
346
+ "primary_model": plan.candidate_models[0] if plan.candidate_models else None,
347
+ "decision_summary": {
348
+ "plan_name": getattr(plan, "plan_name", None),
349
+ "primary_model": plan.candidate_models[0] if plan.candidate_models else None,
350
+ "reasons": route_decision.reasons if route_decision else ["rule_based_router"],
351
+ "buckets": route_decision.buckets if route_decision else [],
352
+ "stats": route_decision.stats if route_decision else {},
353
+ },
354
+ },
355
+ cv_frame=CVFrame(df=pd.concat(cv_frames, ignore_index=True)) if cv_frames else None,
356
+ )
357
+
358
+
359
+ def _summary_to_metrics(summary_df: pd.DataFrame, model_name: str | None) -> dict[str, float]:
360
+ if summary_df is None or summary_df.empty:
361
+ return {}
362
+ df = summary_df
363
+ if "model" in df.columns:
364
+ if model_name and model_name in df["model"].unique():
365
+ df = df[df["model"] == model_name]
366
+ else:
367
+ df = df[df["model"] == df["model"].iloc[0]]
368
+ if "metric" not in df.columns or "value" not in df.columns:
369
+ return {}
370
+ metrics = df.groupby("metric")["value"].mean()
371
+ return {metric: float(value) for metric, value in metrics.items()}
372
+
373
+
374
+ def _series_metrics_from_frame(
375
+ metrics_df: pd.DataFrame,
376
+ model_name: str | None,
377
+ ) -> dict[str, dict[str, float]]:
378
+ if metrics_df is None or metrics_df.empty:
379
+ return {}
380
+ df = metrics_df
381
+ if "model" in df.columns:
382
+ if model_name and model_name in df["model"].unique():
383
+ df = df[df["model"] == model_name]
384
+ else:
385
+ df = df[df["model"] == df["model"].iloc[0]]
386
+ if "unique_id" not in df.columns or "metric" not in df.columns or "value" not in df.columns:
387
+ return {}
388
+ grouped = df.groupby(["unique_id", "metric"])["value"].mean().reset_index()
389
+ result: dict[str, dict[str, float]] = {}
390
+ for uid, group in grouped.groupby("unique_id"):
391
+ result[uid] = {row["metric"]: float(row["value"]) for _, row in group.iterrows()}
392
+ return result
393
+
394
+
395
+ def _aggregate_metrics(
396
+ series_metrics_agg: dict[str, list[dict[str, float]]]
397
+ ) -> dict[str, float]:
398
+ """Aggregate metrics across all series and windows.
399
+
400
+ Args:
401
+ series_metrics_agg: Dict mapping series_id to list of metrics per window
402
+
403
+ Returns:
404
+ Dictionary of aggregated metrics
405
+ """
406
+ if not series_metrics_agg:
407
+ return {}
408
+
409
+ # Collect all metric names
410
+ all_metric_names: set[str] = set()
411
+ for metrics_list in series_metrics_agg.values():
412
+ for metrics in metrics_list:
413
+ all_metric_names.update(metrics.keys())
414
+
415
+ # Aggregate each metric
416
+ aggregated: dict[str, float] = {}
417
+ for metric_name in all_metric_names:
418
+ values = []
419
+ for metrics_list in series_metrics_agg.values():
420
+ for m in metrics_list:
421
+ if metric_name in m and not np.isnan(m[metric_name]):
422
+ values.append(m[metric_name])
423
+
424
+ if values:
425
+ aggregated[metric_name] = float(np.mean(values))
426
+ else:
427
+ aggregated[metric_name] = float("nan")
428
+
429
+ return aggregated
430
+
431
+
432
+ def _validate_temporal_ordering(df: pd.DataFrame) -> None:
433
+ """Validate that data is temporally ordered (no shuffling).
434
+
435
+ This is a critical guardrail to prevent data leakage.
436
+
437
+ Args:
438
+ df: DataFrame to validate
439
+
440
+ Raises:
441
+ ESplitRandomForbidden: If data appears to be randomly ordered
442
+ """
443
+ # Check if data is sorted by unique_id, ds
444
+ expected_order = df.sort_values(["unique_id", "ds"]).index
445
+ if not df.index.equals(expected_order):
446
+ raise ESplitRandomForbidden(
447
+ "Data must be sorted by (unique_id, ds). "
448
+ "Random splits or shuffling is strictly forbidden.",
449
+ context={
450
+ "suggestion": "Ensure data is sorted: df.sort_values(['unique_id', 'ds'])",
451
+ },
452
+ )
453
+
454
+ # Additional check: verify dates are monotonic within each series
455
+ for uid in df["unique_id"].unique():
456
+ series = df[df["unique_id"] == uid]
457
+ dates = pd.to_datetime(series["ds"])
458
+ if not dates.is_monotonic_increasing:
459
+ raise ESplitRandomForbidden(
460
+ f"Dates for series {uid} are not monotonically increasing. "
461
+ f"Data may be shuffled or contain time-travel.",
462
+ context={"series_id": uid},
463
+ )
464
+
465
+
466
+ def _generate_cutoffs(
467
+ all_dates: list[pd.Timestamp],
468
+ n_windows: int,
469
+ horizon: int,
470
+ step: int,
471
+ min_train_size: int,
472
+ strategy: Literal["expanding", "sliding"],
473
+ ) -> list[tuple[pd.Timestamp, list[pd.Timestamp]]]:
474
+ """Generate cutoff dates for backtest windows.
475
+
476
+ Args:
477
+ all_dates: Sorted list of all dates in dataset
478
+ n_windows: Number of windows
479
+ horizon: Forecast horizon
480
+ step: Step size between windows
481
+ min_train_size: Minimum training set size
482
+ strategy: "expanding" or "sliding"
483
+
484
+ Returns:
485
+ List of (cutoff_date, test_dates) tuples
486
+ """
487
+ cutoffs = []
488
+
489
+ # Calculate starting point
490
+ if strategy == "expanding":
491
+ # Expanding window: each window adds more training data
492
+ start_idx = min_train_size
493
+ for i in range(n_windows):
494
+ cutoff_idx = start_idx + i * step
495
+ if cutoff_idx + horizon > len(all_dates):
496
+ break
497
+
498
+ cutoff_date = all_dates[cutoff_idx]
499
+ test_dates = all_dates[cutoff_idx : cutoff_idx + horizon]
500
+ cutoffs.append((cutoff_date, test_dates))
501
+
502
+ elif strategy == "sliding":
503
+ # Sliding window: fixed training size, slides forward
504
+ total_needed = min_train_size + n_windows * step + horizon
505
+ if total_needed > len(all_dates):
506
+ # Adjust n_windows
507
+ n_windows = (len(all_dates) - min_train_size - horizon) // step
508
+
509
+ for i in range(n_windows):
510
+ train_end_idx = min_train_size + i * step
511
+ cutoff_date = all_dates[train_end_idx]
512
+ test_dates = all_dates[train_end_idx : train_end_idx + horizon]
513
+ cutoffs.append((cutoff_date, test_dates))
514
+
515
+ return cutoffs
516
+
517
+
518
+ def cross_validation_split(
519
+ df: pd.DataFrame,
520
+ n_splits: int = 5,
521
+ horizon: int = 1,
522
+ gap: int = 0,
523
+ ) -> list[tuple[pd.DataFrame, pd.DataFrame]]:
524
+ """Generate cross-validation splits with temporal validation.
525
+
526
+ Random splits are strictly forbidden - this uses temporal splits only.
527
+
528
+ Args:
529
+ df: DataFrame with time series
530
+ n_splits: Number of splits
531
+ horizon: Forecast horizon
532
+ gap: Gap between train and test
533
+
534
+ Returns:
535
+ List of (train_df, test_df) tuples
536
+
537
+ Raises:
538
+ ESplitRandomForbidden: If data is not temporally ordered
539
+ """
540
+ # Validate ordering
541
+ _validate_temporal_ordering(df)
542
+
543
+ splits = []
544
+ dates = sorted(df["ds"].unique())
545
+
546
+ # Calculate fold size
547
+ fold_size = (len(dates) - horizon) // n_splits
548
+
549
+ for i in range(n_splits):
550
+ split_point = (i + 1) * fold_size
551
+
552
+ train_end = dates[split_point - 1]
553
+ test_start_idx = split_point + gap
554
+
555
+ if test_start_idx + horizon > len(dates):
556
+ break
557
+
558
+ test_dates = dates[test_start_idx : test_start_idx + horizon]
559
+
560
+ train_df = df[df["ds"] <= train_end].copy()
561
+ test_df = df[df["ds"].isin(test_dates)].copy()
562
+
563
+ splits.append((train_df, test_df))
564
+
565
+ return splits
566
+
567
+
568
+ def _reconcile_forecast(
569
+ forecast_df: pd.DataFrame,
570
+ hierarchy: HierarchyStructure,
571
+ method: str | ReconciliationMethod,
572
+ ) -> pd.DataFrame:
573
+ """Reconcile forecast to ensure hierarchy coherence.
574
+
575
+ Args:
576
+ forecast_df: Forecast DataFrame with columns [unique_id, ds, yhat]
577
+ hierarchy: Hierarchy structure
578
+ method: Reconciliation method name or enum
579
+
580
+ Returns:
581
+ Reconciled forecast DataFrame
582
+ """
583
+ from tsagentkit.hierarchy import ReconciliationMethod, reconcile_forecasts
584
+
585
+ # Convert method string to enum if needed
586
+ if isinstance(method, str):
587
+ method_map = {
588
+ "bottom_up": ReconciliationMethod.BOTTOM_UP,
589
+ "top_down": ReconciliationMethod.TOP_DOWN,
590
+ "middle_out": ReconciliationMethod.MIDDLE_OUT,
591
+ "ols": ReconciliationMethod.OLS,
592
+ "wls": ReconciliationMethod.WLS,
593
+ "min_trace": ReconciliationMethod.MIN_TRACE,
594
+ }
595
+ method = method_map.get(method, ReconciliationMethod.BOTTOM_UP)
596
+
597
+ # Apply reconciliation
598
+ reconciled = reconcile_forecasts(
599
+ base_forecasts=forecast_df,
600
+ structure=hierarchy,
601
+ method=method,
602
+ )
603
+
604
+ return reconciled
605
+
606
+
607
+ def _build_window_covariates(
608
+ dataset: TSDataset,
609
+ task_spec: TaskSpec,
610
+ cutoff_date: pd.Timestamp,
611
+ panel_for_index: pd.DataFrame,
612
+ ) -> AlignedDataset | None:
613
+ """Align covariates for a specific backtest window."""
614
+ if dataset.panel_with_covariates is None and dataset.covariate_bundle is None:
615
+ return None
616
+
617
+ ds_col = task_spec.panel_contract.ds_col
618
+ y_col = task_spec.panel_contract.y_col
619
+
620
+ if dataset.panel_with_covariates is not None:
621
+ panel = dataset.panel_with_covariates.copy()
622
+ else:
623
+ panel = panel_for_index.copy()
624
+
625
+ if y_col in panel.columns:
626
+ panel.loc[panel[ds_col] >= cutoff_date, y_col] = np.nan
627
+
628
+ return align_covariates(
629
+ panel=panel,
630
+ task_spec=task_spec,
631
+ covariates=dataset.covariate_bundle,
632
+ )
633
+
634
+
635
+ def _call_with_optional_kwargs(func: Any, *args: Any, **kwargs: Any) -> Any:
636
+ """Call a function with only supported keyword arguments."""
637
+ if not kwargs:
638
+ return func(*args)
639
+
640
+ try:
641
+ import inspect
642
+
643
+ params = inspect.signature(func).parameters
644
+ accepted = {k: v for k, v in kwargs.items() if k in params}
645
+ return func(*args, **accepted)
646
+ except Exception:
647
+ return func(*args, **kwargs)
648
+
649
+
650
+ def _compute_segment_metrics(
651
+ series_metrics: dict[str, SeriesMetrics],
652
+ dataset: TSDataset,
653
+ ) -> dict[str, SegmentMetrics]:
654
+ """Compute segment metrics grouped by sparsity class.
655
+
656
+ Args:
657
+ series_metrics: Dictionary of series_id to SeriesMetrics
658
+ dataset: TSDataset with sparsity profile
659
+
660
+ Returns:
661
+ Dictionary of segment_name to SegmentMetrics
662
+ """
663
+ from collections import defaultdict
664
+
665
+ segment_series: dict[str, list[str]] = defaultdict(list)
666
+ segment_metrics: dict[str, list[dict[str, float]]] = defaultdict(list)
667
+
668
+ # Group series by sparsity class
669
+ if dataset.sparsity_profile:
670
+ for uid in series_metrics:
671
+ classification = dataset.sparsity_profile.get_classification(uid)
672
+ segment_name = classification.value
673
+ segment_series[segment_name].append(uid)
674
+ segment_metrics[segment_name].append(series_metrics[uid].metrics)
675
+ else:
676
+ # No sparsity profile, put all in "unknown" segment
677
+ for uid, sm in series_metrics.items():
678
+ segment_series["unknown"].append(uid)
679
+ segment_metrics["unknown"].append(sm.metrics)
680
+
681
+ # Aggregate metrics per segment
682
+ result: dict[str, SegmentMetrics] = {}
683
+ for segment_name, series_ids in segment_series.items():
684
+ metrics_list = segment_metrics[segment_name]
685
+ if not metrics_list:
686
+ continue
687
+
688
+ # Compute mean for each metric
689
+ aggregated: dict[str, float] = {}
690
+ metric_names: set[str] = set()
691
+ for metrics in metrics_list:
692
+ metric_names.update(metrics.keys())
693
+ for metric_name in metric_names:
694
+ values = [m[metric_name] for m in metrics_list if not np.isnan(m.get(metric_name, np.nan))]
695
+ if values:
696
+ aggregated[metric_name] = float(np.mean(values))
697
+
698
+ result[segment_name] = SegmentMetrics(
699
+ segment_name=segment_name,
700
+ series_ids=series_ids,
701
+ metrics=aggregated,
702
+ n_series=len(series_ids),
703
+ )
704
+
705
+ return result
706
+
707
+
708
+ def _select_primary_metric(
709
+ series_metrics_agg: dict[str, list[dict[str, float]]],
710
+ preferred: tuple[str, ...] = ("wape", "mae", "rmse", "smape", "mase"),
711
+ ) -> str | None:
712
+ metric_names: set[str] = set()
713
+ for metrics_list in series_metrics_agg.values():
714
+ for metrics in metrics_list:
715
+ metric_names.update(metrics.keys())
716
+ for name in preferred:
717
+ if name in metric_names:
718
+ return name
719
+ return next(iter(metric_names), None)
720
+
721
+
722
+ def _compute_temporal_metrics(
723
+ series_metrics_agg: dict[str, list[dict[str, float]]],
724
+ df: pd.DataFrame,
725
+ ) -> dict[str, TemporalMetrics]:
726
+ """Compute temporal metrics grouped by time dimensions.
727
+
728
+ Args:
729
+ series_metrics_agg: Dict mapping series_id to list of window metrics
730
+ df: Original DataFrame with datetime information
731
+
732
+ Returns:
733
+ Dictionary of dimension to TemporalMetrics
734
+ """
735
+ result: dict[str, TemporalMetrics] = {}
736
+
737
+ metric_name = _select_primary_metric(series_metrics_agg)
738
+ if metric_name is None:
739
+ return result
740
+
741
+ # Parse dates
742
+ df = df.copy()
743
+ df["ds"] = pd.to_datetime(df["ds"])
744
+
745
+ # Compute hour-of-day metrics
746
+ df["hour"] = df["ds"].dt.hour
747
+ hour_metrics: dict[str, dict[str, float]] = {}
748
+ for hour in sorted(df["hour"].unique()):
749
+ hour_str = str(hour)
750
+ # Get series present in this hour
751
+ hour_series = df[df["hour"] == hour]["unique_id"].unique()
752
+ if len(hour_series) > 0:
753
+ # Average metrics for series in this hour
754
+ values = []
755
+ for uid in hour_series:
756
+ if uid in series_metrics_agg and series_metrics_agg[uid]:
757
+ avg_metric = np.mean([m.get(metric_name, np.nan) for m in series_metrics_agg[uid]])
758
+ if not np.isnan(avg_metric):
759
+ values.append(avg_metric)
760
+ if values:
761
+ hour_metrics[hour_str] = {metric_name: float(np.mean(values))}
762
+
763
+ if hour_metrics:
764
+ result["hour"] = TemporalMetrics(dimension="hour", metrics_by_value=hour_metrics)
765
+
766
+ # Compute day-of-week metrics
767
+ df["dayofweek"] = df["ds"].dt.dayofweek
768
+ dow_metrics: dict[str, dict[str, float]] = {}
769
+ dow_names = ["Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"]
770
+ for dow in sorted(df["dayofweek"].unique()):
771
+ dow_str = dow_names[dow]
772
+ # Get series present on this day of week
773
+ dow_series = df[df["dayofweek"] == dow]["unique_id"].unique()
774
+ if len(dow_series) > 0:
775
+ # Average metrics for series on this day
776
+ values = []
777
+ for uid in dow_series:
778
+ if uid in series_metrics_agg and series_metrics_agg[uid]:
779
+ avg_metric = np.mean([m.get(metric_name, np.nan) for m in series_metrics_agg[uid]])
780
+ if not np.isnan(avg_metric):
781
+ values.append(avg_metric)
782
+ if values:
783
+ dow_metrics[dow_str] = {metric_name: float(np.mean(values))}
784
+
785
+ if dow_metrics:
786
+ result["dayofweek"] = TemporalMetrics(dimension="dayofweek", metrics_by_value=dow_metrics)
787
+
788
+ return result