tsagentkit 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tsagentkit/__init__.py +126 -0
  2. tsagentkit/anomaly/__init__.py +130 -0
  3. tsagentkit/backtest/__init__.py +48 -0
  4. tsagentkit/backtest/engine.py +788 -0
  5. tsagentkit/backtest/metrics.py +244 -0
  6. tsagentkit/backtest/report.py +342 -0
  7. tsagentkit/calibration/__init__.py +136 -0
  8. tsagentkit/contracts/__init__.py +133 -0
  9. tsagentkit/contracts/errors.py +275 -0
  10. tsagentkit/contracts/results.py +418 -0
  11. tsagentkit/contracts/schema.py +44 -0
  12. tsagentkit/contracts/task_spec.py +300 -0
  13. tsagentkit/covariates/__init__.py +340 -0
  14. tsagentkit/eval/__init__.py +285 -0
  15. tsagentkit/features/__init__.py +20 -0
  16. tsagentkit/features/covariates.py +328 -0
  17. tsagentkit/features/extra/__init__.py +5 -0
  18. tsagentkit/features/extra/native.py +179 -0
  19. tsagentkit/features/factory.py +187 -0
  20. tsagentkit/features/matrix.py +159 -0
  21. tsagentkit/features/tsfeatures_adapter.py +115 -0
  22. tsagentkit/features/versioning.py +203 -0
  23. tsagentkit/hierarchy/__init__.py +39 -0
  24. tsagentkit/hierarchy/aggregation.py +62 -0
  25. tsagentkit/hierarchy/evaluator.py +400 -0
  26. tsagentkit/hierarchy/reconciliation.py +232 -0
  27. tsagentkit/hierarchy/structure.py +453 -0
  28. tsagentkit/models/__init__.py +182 -0
  29. tsagentkit/models/adapters/__init__.py +83 -0
  30. tsagentkit/models/adapters/base.py +321 -0
  31. tsagentkit/models/adapters/chronos.py +387 -0
  32. tsagentkit/models/adapters/moirai.py +256 -0
  33. tsagentkit/models/adapters/registry.py +171 -0
  34. tsagentkit/models/adapters/timesfm.py +440 -0
  35. tsagentkit/models/baselines.py +207 -0
  36. tsagentkit/models/sktime.py +307 -0
  37. tsagentkit/monitoring/__init__.py +51 -0
  38. tsagentkit/monitoring/alerts.py +302 -0
  39. tsagentkit/monitoring/coverage.py +203 -0
  40. tsagentkit/monitoring/drift.py +330 -0
  41. tsagentkit/monitoring/report.py +214 -0
  42. tsagentkit/monitoring/stability.py +275 -0
  43. tsagentkit/monitoring/triggers.py +423 -0
  44. tsagentkit/qa/__init__.py +347 -0
  45. tsagentkit/router/__init__.py +37 -0
  46. tsagentkit/router/bucketing.py +489 -0
  47. tsagentkit/router/fallback.py +132 -0
  48. tsagentkit/router/plan.py +23 -0
  49. tsagentkit/router/router.py +271 -0
  50. tsagentkit/series/__init__.py +26 -0
  51. tsagentkit/series/alignment.py +206 -0
  52. tsagentkit/series/dataset.py +449 -0
  53. tsagentkit/series/sparsity.py +261 -0
  54. tsagentkit/series/validation.py +393 -0
  55. tsagentkit/serving/__init__.py +39 -0
  56. tsagentkit/serving/orchestration.py +943 -0
  57. tsagentkit/serving/packaging.py +73 -0
  58. tsagentkit/serving/provenance.py +317 -0
  59. tsagentkit/serving/tsfm_cache.py +214 -0
  60. tsagentkit/skill/README.md +135 -0
  61. tsagentkit/skill/__init__.py +8 -0
  62. tsagentkit/skill/recipes.md +429 -0
  63. tsagentkit/skill/tool_map.md +21 -0
  64. tsagentkit/time/__init__.py +134 -0
  65. tsagentkit/utils/__init__.py +20 -0
  66. tsagentkit/utils/quantiles.py +83 -0
  67. tsagentkit/utils/signature.py +47 -0
  68. tsagentkit/utils/temporal.py +41 -0
  69. tsagentkit-1.0.2.dist-info/METADATA +371 -0
  70. tsagentkit-1.0.2.dist-info/RECORD +72 -0
  71. tsagentkit-1.0.2.dist-info/WHEEL +4 -0
  72. tsagentkit-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,285 @@
1
+ """Evaluation utilities for forecast metrics and summaries."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from functools import partial
7
+ from typing import Any
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ from tsagentkit.utils import parse_quantile_column
13
+
14
+
15
+ @dataclass(frozen=True)
16
+ class MetricFrame:
17
+ """Container for metric results."""
18
+
19
+ df: pd.DataFrame
20
+
21
+
22
+ @dataclass(frozen=True)
23
+ class ScoreSummary:
24
+ """Aggregate metric summary."""
25
+
26
+ df: pd.DataFrame
27
+
28
+
29
+ def _maybe_import_utilsforecast():
30
+ try:
31
+ from utilsforecast import evaluation as ufeval
32
+ from utilsforecast import losses as uflosses
33
+ except Exception:
34
+ return None, None
35
+ return ufeval, uflosses
36
+
37
+
38
+ def _wide_predictions(
39
+ df: pd.DataFrame,
40
+ id_col: str,
41
+ ds_col: str,
42
+ target_col: str,
43
+ model_col: str,
44
+ pred_col: str,
45
+ cutoff_col: str | None,
46
+ ) -> tuple[pd.DataFrame, list[str], dict[str, dict[float, str]]]:
47
+ index_cols = [id_col, ds_col]
48
+ if cutoff_col and cutoff_col in df.columns:
49
+ index_cols.append(cutoff_col)
50
+ pivot = df.pivot_table(
51
+ index=index_cols,
52
+ columns=model_col,
53
+ values=pred_col,
54
+ aggfunc="mean",
55
+ )
56
+ wide = pivot.reset_index()
57
+ actuals = df[index_cols + [target_col]].drop_duplicates(subset=index_cols)
58
+ wide = wide.merge(actuals, on=index_cols, how="left")
59
+ model_cols = [c for c in wide.columns if c not in index_cols + [target_col]]
60
+ quantile_cols = [c for c in df.columns if parse_quantile_column(c) is not None]
61
+ quantile_map: dict[str, dict[float, str]] = {}
62
+
63
+ if quantile_cols:
64
+ for q_col in quantile_cols:
65
+ q = parse_quantile_column(q_col)
66
+ if q is None:
67
+ continue
68
+ q_pivot = df.pivot_table(
69
+ index=index_cols,
70
+ columns=model_col,
71
+ values=q_col,
72
+ aggfunc="mean",
73
+ ).reset_index()
74
+ rename_map: dict[str, str] = {}
75
+ for col in q_pivot.columns:
76
+ if col in index_cols:
77
+ continue
78
+ new_col = f"{col}__{q_col}"
79
+ rename_map[col] = new_col
80
+ quantile_map.setdefault(col, {})[q] = new_col
81
+ if rename_map:
82
+ q_pivot = q_pivot.rename(columns=rename_map)
83
+ wide = wide.merge(q_pivot, on=index_cols, how="left")
84
+
85
+ return wide, model_cols, quantile_map
86
+
87
+
88
+ def _wrap_metric_name(func: Any, name: str) -> Any:
89
+ func.__name__ = name
90
+ return func
91
+
92
+
93
+ def _make_wape_metric(uflosses: Any, cutoff_col: str) -> Any:
94
+ def _metric(
95
+ df: pd.DataFrame,
96
+ models: list[str],
97
+ id_col: str = "unique_id",
98
+ target_col: str = "y",
99
+ **_: Any,
100
+ ) -> pd.DataFrame:
101
+ return uflosses.nd(
102
+ df=df,
103
+ models=models,
104
+ id_col=id_col,
105
+ target_col=target_col,
106
+ cutoff_col=cutoff_col,
107
+ )
108
+
109
+ return _wrap_metric_name(_metric, "wape")
110
+
111
+
112
+ def _make_quantile_loss_metric(
113
+ uflosses: Any,
114
+ q: float,
115
+ quantile_models: dict[str, str],
116
+ cutoff_col: str,
117
+ ) -> Any:
118
+ def _metric(
119
+ df: pd.DataFrame,
120
+ models: list[str],
121
+ id_col: str = "unique_id",
122
+ target_col: str = "y",
123
+ **_: Any,
124
+ ) -> pd.DataFrame:
125
+ return uflosses.quantile_loss(
126
+ df=df,
127
+ models=quantile_models,
128
+ q=q,
129
+ id_col=id_col,
130
+ target_col=target_col,
131
+ cutoff_col=cutoff_col,
132
+ )
133
+
134
+ return _wrap_metric_name(_metric, f"pinball_{q:.3f}")
135
+
136
+
137
+ def _make_wql_metric(
138
+ uflosses: Any,
139
+ quantile_models: dict[str, list[str]],
140
+ quantiles: np.ndarray,
141
+ cutoff_col: str,
142
+ ) -> Any:
143
+ def _metric(
144
+ df: pd.DataFrame,
145
+ models: list[str],
146
+ id_col: str = "unique_id",
147
+ target_col: str = "y",
148
+ **_: Any,
149
+ ) -> pd.DataFrame:
150
+ return uflosses.mqloss(
151
+ df=df,
152
+ models=quantile_models,
153
+ quantiles=quantiles,
154
+ id_col=id_col,
155
+ target_col=target_col,
156
+ cutoff_col=cutoff_col,
157
+ )
158
+
159
+ return _wrap_metric_name(_metric, "wql")
160
+
161
+
162
+ def evaluate_forecasts(
163
+ df: pd.DataFrame,
164
+ train_df: pd.DataFrame | None = None,
165
+ season_length: int | None = None,
166
+ id_col: str = "unique_id",
167
+ ds_col: str = "ds",
168
+ target_col: str = "y",
169
+ model_col: str = "model",
170
+ pred_col: str = "yhat",
171
+ cutoff_col: str | None = "cutoff",
172
+ ) -> tuple[MetricFrame, ScoreSummary]:
173
+ """Compute point + quantile metrics in a stable long schema."""
174
+ if df.empty:
175
+ return MetricFrame(pd.DataFrame()), ScoreSummary(pd.DataFrame())
176
+
177
+ if model_col not in df.columns:
178
+ df = df.copy()
179
+ df[model_col] = "model"
180
+
181
+ wide, model_cols, quantile_map = _wide_predictions(
182
+ df,
183
+ id_col=id_col,
184
+ ds_col=ds_col,
185
+ target_col=target_col,
186
+ model_col=model_col,
187
+ pred_col=pred_col,
188
+ cutoff_col=cutoff_col,
189
+ )
190
+
191
+ ufeval, uflosses = _maybe_import_utilsforecast()
192
+ if ufeval is None or uflosses is None or not model_cols:
193
+ return MetricFrame(pd.DataFrame()), ScoreSummary(pd.DataFrame())
194
+
195
+ cutoff_present = cutoff_col is not None and cutoff_col in wide.columns
196
+ cutoff_name = cutoff_col if cutoff_col is not None else "cutoff"
197
+ wide_eval = wide
198
+
199
+ metrics: list[Any] = [uflosses.mae, uflosses.rmse, uflosses.smape]
200
+ if hasattr(uflosses, "nd"):
201
+ metrics.append(_make_wape_metric(uflosses, cutoff_name))
202
+
203
+ if train_df is not None and season_length and hasattr(uflosses, "mase"):
204
+ mase_metric = partial(uflosses.mase, seasonality=season_length)
205
+ metrics.append(_wrap_metric_name(mase_metric, "mase"))
206
+
207
+ if quantile_map and hasattr(uflosses, "quantile_loss"):
208
+ available_models = [model for model in model_cols if model in quantile_map]
209
+ if available_models:
210
+ common_quantiles = set.intersection(
211
+ *[
212
+ set(quantile_map[model].keys())
213
+ for model in available_models
214
+ ]
215
+ )
216
+ else:
217
+ common_quantiles = set()
218
+
219
+ if common_quantiles:
220
+ quantiles_sorted = sorted(common_quantiles)
221
+ for q in quantiles_sorted:
222
+ per_q_models = {
223
+ model: quantile_map[model][q]
224
+ for model in available_models
225
+ if q in quantile_map[model]
226
+ }
227
+ if per_q_models:
228
+ metrics.append(
229
+ _make_quantile_loss_metric(
230
+ uflosses=uflosses,
231
+ q=q,
232
+ quantile_models=per_q_models,
233
+ cutoff_col=cutoff_name,
234
+ )
235
+ )
236
+
237
+ per_model_quantiles = {
238
+ model: [quantile_map[model][q] for q in quantiles_sorted]
239
+ for model in available_models
240
+ if all(q in quantile_map[model] for q in quantiles_sorted)
241
+ }
242
+ if per_model_quantiles and hasattr(uflosses, "mqloss"):
243
+ metrics.append(
244
+ _make_wql_metric(
245
+ uflosses=uflosses,
246
+ quantile_models=per_model_quantiles,
247
+ quantiles=np.asarray(quantiles_sorted, dtype=float),
248
+ cutoff_col=cutoff_name,
249
+ )
250
+ )
251
+
252
+ metric_df = ufeval.evaluate(
253
+ wide_eval,
254
+ metrics=metrics,
255
+ models=model_cols,
256
+ train_df=train_df,
257
+ id_col=id_col,
258
+ time_col=ds_col,
259
+ target_col=target_col,
260
+ cutoff_col=cutoff_name,
261
+ )
262
+
263
+ metrics_long = metric_df.copy()
264
+ index_cols = [id_col]
265
+ if cutoff_present and cutoff_col and cutoff_col in metrics_long.columns:
266
+ index_cols.append(cutoff_col)
267
+
268
+ metric_cols = [c for c in metrics_long.columns if c not in index_cols + ["metric"]]
269
+ metrics_long = metrics_long.melt(
270
+ id_vars=index_cols + ["metric"],
271
+ value_vars=metric_cols,
272
+ var_name="model",
273
+ value_name="value",
274
+ )
275
+
276
+ summary = (
277
+ metrics_long.groupby(["model", "metric"])["value"]
278
+ .mean()
279
+ .reset_index()
280
+ )
281
+
282
+ return MetricFrame(metrics_long), ScoreSummary(summary)
283
+
284
+
285
+ __all__ = ["MetricFrame", "ScoreSummary", "evaluate_forecasts"]
@@ -0,0 +1,20 @@
1
+ """Feature engineering module for time series forecasting.
2
+
3
+ Provides point-in-time safe feature engineering with full versioning support.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from tsagentkit.features.covariates import CovariateManager, CovariatePolicy
9
+ from tsagentkit.features.factory import FeatureConfig, FeatureFactory
10
+ from tsagentkit.features.matrix import FeatureMatrix
11
+ from tsagentkit.features.versioning import compute_feature_hash
12
+
13
+ __all__ = [
14
+ "FeatureMatrix",
15
+ "FeatureFactory",
16
+ "FeatureConfig",
17
+ "CovariateManager",
18
+ "CovariatePolicy",
19
+ "compute_feature_hash",
20
+ ]
@@ -0,0 +1,328 @@
1
+ """Covariate management for known vs observed covariates with leakage protection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import datetime
7
+ from enum import Enum
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pandas as pd
11
+
12
+ from tsagentkit.contracts.errors import ECovariateLeakage
13
+
14
+ if TYPE_CHECKING:
15
+ pass
16
+
17
+
18
+ class CovariatePolicy(Enum):
19
+ """Policy for handling different covariate types.
20
+
21
+ - KNOWN: Covariates known for all time steps (e.g., holidays, promotions planned in advance)
22
+ - OBSERVED: Covariates only observed up to current time (e.g., actual sales, weather)
23
+ """
24
+
25
+ KNOWN = "known"
26
+ OBSERVED = "observed"
27
+
28
+
29
+ @dataclass(frozen=True)
30
+ class CovariateConfig:
31
+ """Configuration specifying covariate types.
32
+
33
+ Attributes:
34
+ known: List of column names for known covariates
35
+ observed: List of column names for observed covariates
36
+
37
+ Example:
38
+ >>> config = CovariateConfig(
39
+ ... known=["holiday", "promotion_planned"],
40
+ ... observed=["competitor_price", "weather"],
41
+ ... )
42
+ """
43
+
44
+ known: list[str] = field(default_factory=list)
45
+ observed: list[str] = field(default_factory=list)
46
+
47
+ def __post_init__(self) -> None:
48
+ """Validate no overlap between known and observed."""
49
+ overlap = set(self.known) & set(self.observed)
50
+ if overlap:
51
+ raise ValueError(f"Covariates cannot be both known and observed: {overlap}")
52
+
53
+ def get_policy(self, column: str) -> CovariatePolicy | None:
54
+ """Get the policy for a specific column.
55
+
56
+ Args:
57
+ column: Column name to check
58
+
59
+ Returns:
60
+ CovariatePolicy or None if column is not a covariate
61
+ """
62
+ if column in self.known:
63
+ return CovariatePolicy.KNOWN
64
+ elif column in self.observed:
65
+ return CovariatePolicy.OBSERVED
66
+ return None
67
+
68
+ def all_covariates(self) -> list[str]:
69
+ """Return all covariate column names."""
70
+ return self.known + self.observed
71
+
72
+
73
+ def infer_covariate_config(
74
+ df: pd.DataFrame,
75
+ policy: str,
76
+ id_col: str = "unique_id",
77
+ ds_col: str = "ds",
78
+ target_col: str = "y",
79
+ ) -> CovariateConfig:
80
+ """Infer covariate configuration based on policy and data."""
81
+ if policy == "ignore":
82
+ return CovariateConfig()
83
+
84
+ covariate_cols = [
85
+ c for c in df.columns
86
+ if c not in {id_col, ds_col, target_col}
87
+ ]
88
+
89
+ if not covariate_cols:
90
+ return CovariateConfig()
91
+
92
+ if policy == "known":
93
+ return CovariateConfig(known=covariate_cols, observed=[])
94
+ if policy == "observed":
95
+ return CovariateConfig(known=[], observed=covariate_cols)
96
+
97
+ # Auto policy: infer based on future rows (y is null)
98
+ future_mask = df[target_col].isna() if target_col in df.columns else None
99
+ known: list[str] = []
100
+ observed: list[str] = []
101
+
102
+ for col in covariate_cols:
103
+ if future_mask is not None and future_mask.any():
104
+ has_future_values = df.loc[future_mask, col].notna().any()
105
+ if has_future_values:
106
+ known.append(col)
107
+ else:
108
+ observed.append(col)
109
+ else:
110
+ # Default to observed if we can't see future values
111
+ observed.append(col)
112
+
113
+ return CovariateConfig(known=known, observed=observed)
114
+
115
+
116
+ class CovariateManager:
117
+ """Manage known vs observed covariates with leakage protection.
118
+
119
+ This class ensures that observed covariates are properly handled to prevent
120
+ future information from leaking into training or predictions.
121
+
122
+ Example:
123
+ >>> manager = CovariateManager(
124
+ ... known_covariates=["holiday"],
125
+ ... observed_covariates=["promotion"],
126
+ ... )
127
+ >>>
128
+ >>> # Validate no leakage
129
+ >>> manager.validate_for_prediction(
130
+ ... df, forecast_start=datetime(2024, 1, 1), horizon=7
131
+ ... )
132
+ >>>
133
+ >>> # Mask observed covariates for training
134
+ >>> train_df = manager.mask_observed_for_training(df, target_col="y")
135
+ """
136
+
137
+ def __init__(
138
+ self,
139
+ known_covariates: list[str] | None = None,
140
+ observed_covariates: list[str] | None = None,
141
+ ):
142
+ """Initialize the covariate manager.
143
+
144
+ Args:
145
+ known_covariates: Columns known for all time steps
146
+ observed_covariates: Columns only observed up to current time
147
+ """
148
+ self.known_covariates = known_covariates or []
149
+ self.observed_covariates = observed_covariates or []
150
+
151
+ # Check for overlap
152
+ overlap = set(self.known_covariates) & set(self.observed_covariates)
153
+ if overlap:
154
+ raise ValueError(f"Covariates cannot be both known and observed: {overlap}")
155
+
156
+ def validate_for_prediction(
157
+ self,
158
+ df: pd.DataFrame,
159
+ forecast_start: datetime,
160
+ horizon: int,
161
+ ds_col: str = "ds",
162
+ ) -> None:
163
+ """Validate that observed covariates don't leak future information.
164
+
165
+ This checks that observed covariates do not have values beyond the
166
+ forecast start time, which would indicate future information leakage.
167
+
168
+ Args:
169
+ df: DataFrame with covariates
170
+ forecast_start: Start time of the forecast period
171
+ horizon: Forecast horizon
172
+ ds_col: Name of the timestamp column
173
+
174
+ Raises:
175
+ ECovariateLeakage: If observed covariates extend beyond forecast_start
176
+
177
+ Example:
178
+ >>> manager = CovariateManager(observed_covariates=["promo"])
179
+ >>> # This will raise if promo has values after 2024-01-01
180
+ >>> manager.validate_for_prediction(df, datetime(2024, 1, 1), horizon=7)
181
+ """
182
+ if not self.observed_covariates:
183
+ return
184
+
185
+ # Check each observed covariate
186
+ for col in self.observed_covariates:
187
+ if col not in df.columns:
188
+ continue
189
+
190
+ # Find rows where observed covariate has non-null values beyond forecast_start
191
+ future_mask = (df[ds_col] >= forecast_start) & df[col].notna()
192
+ future_count = future_mask.sum()
193
+
194
+ if future_count > 0:
195
+ raise ECovariateLeakage(
196
+ f"Observed covariate '{col}' has {future_count} values "
197
+ f"at or after forecast start time {forecast_start}. "
198
+ "Observed covariates cannot be known in advance.",
199
+ context={
200
+ "covariate": col,
201
+ "forecast_start": forecast_start.isoformat(),
202
+ "future_values_count": int(future_count),
203
+ },
204
+ )
205
+
206
+ def mask_observed_for_training(
207
+ self,
208
+ df: pd.DataFrame,
209
+ target_col: str = "y",
210
+ ds_col: str = "ds",
211
+ unique_id_col: str = "unique_id",
212
+ ) -> pd.DataFrame:
213
+ """Mask observed covariates at time t to prevent leakage during training.
214
+
215
+ For observed covariates, we should only use values that would be available
216
+ at prediction time. This means observed covariates at time t should be
217
+ lagged (using values from before t) to prevent leakage.
218
+
219
+ By default, this sets observed covariates to null for the target timestamp
220
+ to ensure proper training. The caller is responsible for creating lagged
221
+ versions of observed covariates before calling this method.
222
+
223
+ Args:
224
+ df: DataFrame with covariates
225
+ target_col: Name of target column
226
+ ds_col: Name of timestamp column
227
+ unique_id_col: Name of unique_id column
228
+
229
+ Returns:
230
+ DataFrame with observed covariates masked at target time
231
+ """
232
+ if not self.observed_covariates:
233
+ return df.copy()
234
+
235
+ df = df.copy()
236
+
237
+ # For training, we mask observed covariates at the prediction time
238
+ # since they wouldn't be known yet. The model should use lagged versions.
239
+ for col in self.observed_covariates:
240
+ if col in df.columns:
241
+ # Set to null - caller should create lagged features
242
+ df[col] = None
243
+
244
+ return df
245
+
246
+ def create_lagged_observed_features(
247
+ self,
248
+ df: pd.DataFrame,
249
+ lags: list[int],
250
+ ds_col: str = "ds",
251
+ unique_id_col: str = "unique_id",
252
+ ) -> pd.DataFrame:
253
+ """Create lagged versions of observed covariates.
254
+
255
+ This creates lagged features for observed covariates to ensure
256
+ point-in-time correctness. For a horizon h, observed covariates
257
+ should be lagged by at least h to prevent leakage.
258
+
259
+ Args:
260
+ df: DataFrame with covariates
261
+ lags: List of lag periods to create
262
+ ds_col: Name of timestamp column
263
+ unique_id_col: Name of unique_id column
264
+
265
+ Returns:
266
+ DataFrame with added lagged observed covariate columns
267
+ """
268
+ if not self.observed_covariates or not lags:
269
+ return df.copy()
270
+
271
+ df = df.copy()
272
+
273
+ for col in self.observed_covariates:
274
+ if col not in df.columns:
275
+ continue
276
+
277
+ for lag in lags:
278
+ lag_col = f"{col}_lag_{lag}"
279
+ df[lag_col] = (
280
+ df.groupby(unique_id_col)[col]
281
+ .shift(lag)
282
+ .values
283
+ )
284
+
285
+ return df
286
+
287
+ def separate_covariates_for_prediction(
288
+ self,
289
+ df: pd.DataFrame,
290
+ forecast_start: datetime,
291
+ ds_col: str = "ds",
292
+ ) -> tuple[pd.DataFrame, pd.DataFrame]:
293
+ """Separate known and observed covariates for prediction setup.
294
+
295
+ Returns two DataFrames:
296
+ 1. Known covariates: Can be used directly (values known for all time steps)
297
+ 2. Observed covariates: Should be masked/handle carefully
298
+
299
+ Args:
300
+ df: DataFrame with covariates
301
+ forecast_start: Start of forecast period
302
+ ds_col: Name of timestamp column
303
+
304
+ Returns:
305
+ Tuple of (known_covariates_df, observed_covariates_df)
306
+ """
307
+ all_cols = ["unique_id", ds_col]
308
+
309
+ known_cols = all_cols + [
310
+ col for col in self.known_covariates if col in df.columns
311
+ ]
312
+ observed_cols = all_cols + [
313
+ col for col in self.observed_covariates if col in df.columns
314
+ ]
315
+
316
+ known_df = df[known_cols].copy() if len(known_cols) > 2 else pd.DataFrame()
317
+ observed_df = (
318
+ df[observed_cols].copy() if len(observed_cols) > 2 else pd.DataFrame()
319
+ )
320
+
321
+ return known_df, observed_df
322
+
323
+ def get_config(self) -> CovariateConfig:
324
+ """Get covariate configuration."""
325
+ return CovariateConfig(
326
+ known=self.known_covariates,
327
+ observed=self.observed_covariates,
328
+ )
@@ -0,0 +1,5 @@
1
+ """Extra (non-default) feature engineering backends."""
2
+
3
+ from .native import build_native_feature_matrix
4
+
5
+ __all__ = ["build_native_feature_matrix"]