tsagentkit 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tsagentkit/__init__.py +126 -0
  2. tsagentkit/anomaly/__init__.py +130 -0
  3. tsagentkit/backtest/__init__.py +48 -0
  4. tsagentkit/backtest/engine.py +788 -0
  5. tsagentkit/backtest/metrics.py +244 -0
  6. tsagentkit/backtest/report.py +342 -0
  7. tsagentkit/calibration/__init__.py +136 -0
  8. tsagentkit/contracts/__init__.py +133 -0
  9. tsagentkit/contracts/errors.py +275 -0
  10. tsagentkit/contracts/results.py +418 -0
  11. tsagentkit/contracts/schema.py +44 -0
  12. tsagentkit/contracts/task_spec.py +300 -0
  13. tsagentkit/covariates/__init__.py +340 -0
  14. tsagentkit/eval/__init__.py +285 -0
  15. tsagentkit/features/__init__.py +20 -0
  16. tsagentkit/features/covariates.py +328 -0
  17. tsagentkit/features/extra/__init__.py +5 -0
  18. tsagentkit/features/extra/native.py +179 -0
  19. tsagentkit/features/factory.py +187 -0
  20. tsagentkit/features/matrix.py +159 -0
  21. tsagentkit/features/tsfeatures_adapter.py +115 -0
  22. tsagentkit/features/versioning.py +203 -0
  23. tsagentkit/hierarchy/__init__.py +39 -0
  24. tsagentkit/hierarchy/aggregation.py +62 -0
  25. tsagentkit/hierarchy/evaluator.py +400 -0
  26. tsagentkit/hierarchy/reconciliation.py +232 -0
  27. tsagentkit/hierarchy/structure.py +453 -0
  28. tsagentkit/models/__init__.py +182 -0
  29. tsagentkit/models/adapters/__init__.py +83 -0
  30. tsagentkit/models/adapters/base.py +321 -0
  31. tsagentkit/models/adapters/chronos.py +387 -0
  32. tsagentkit/models/adapters/moirai.py +256 -0
  33. tsagentkit/models/adapters/registry.py +171 -0
  34. tsagentkit/models/adapters/timesfm.py +440 -0
  35. tsagentkit/models/baselines.py +207 -0
  36. tsagentkit/models/sktime.py +307 -0
  37. tsagentkit/monitoring/__init__.py +51 -0
  38. tsagentkit/monitoring/alerts.py +302 -0
  39. tsagentkit/monitoring/coverage.py +203 -0
  40. tsagentkit/monitoring/drift.py +330 -0
  41. tsagentkit/monitoring/report.py +214 -0
  42. tsagentkit/monitoring/stability.py +275 -0
  43. tsagentkit/monitoring/triggers.py +423 -0
  44. tsagentkit/qa/__init__.py +347 -0
  45. tsagentkit/router/__init__.py +37 -0
  46. tsagentkit/router/bucketing.py +489 -0
  47. tsagentkit/router/fallback.py +132 -0
  48. tsagentkit/router/plan.py +23 -0
  49. tsagentkit/router/router.py +271 -0
  50. tsagentkit/series/__init__.py +26 -0
  51. tsagentkit/series/alignment.py +206 -0
  52. tsagentkit/series/dataset.py +449 -0
  53. tsagentkit/series/sparsity.py +261 -0
  54. tsagentkit/series/validation.py +393 -0
  55. tsagentkit/serving/__init__.py +39 -0
  56. tsagentkit/serving/orchestration.py +943 -0
  57. tsagentkit/serving/packaging.py +73 -0
  58. tsagentkit/serving/provenance.py +317 -0
  59. tsagentkit/serving/tsfm_cache.py +214 -0
  60. tsagentkit/skill/README.md +135 -0
  61. tsagentkit/skill/__init__.py +8 -0
  62. tsagentkit/skill/recipes.md +429 -0
  63. tsagentkit/skill/tool_map.md +21 -0
  64. tsagentkit/time/__init__.py +134 -0
  65. tsagentkit/utils/__init__.py +20 -0
  66. tsagentkit/utils/quantiles.py +83 -0
  67. tsagentkit/utils/signature.py +47 -0
  68. tsagentkit/utils/temporal.py +41 -0
  69. tsagentkit-1.0.2.dist-info/METADATA +371 -0
  70. tsagentkit-1.0.2.dist-info/RECORD +72 -0
  71. tsagentkit-1.0.2.dist-info/WHEEL +4 -0
  72. tsagentkit-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,179 @@
1
+ """Native (hand-rolled) feature engineering backend.
2
+
3
+ This module preserves the original feature-engineering logic but is treated
4
+ as a non-default backend for Phase 2+.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from datetime import datetime
10
+ from typing import Any
11
+
12
+ import pandas as pd
13
+
14
+ from tsagentkit.features.matrix import FeatureMatrix
15
+ from tsagentkit.features.versioning import FeatureConfig, compute_feature_hash
16
+
17
+
18
+ def extract_panel(dataset: Any) -> pd.DataFrame:
19
+ if hasattr(dataset, "df"):
20
+ return dataset.df.copy()
21
+ if hasattr(dataset, "data"):
22
+ return dataset.data.copy()
23
+ return dataset.copy()
24
+
25
+
26
+ def prepare_panel(df: pd.DataFrame, reference_time: datetime | None) -> pd.DataFrame:
27
+ required = ["unique_id", "ds", "y"]
28
+ missing = [col for col in required if col not in df.columns]
29
+ if missing:
30
+ raise ValueError(f"Missing required columns: {missing}")
31
+
32
+ if not pd.api.types.is_datetime64_any_dtype(df["ds"]):
33
+ df["ds"] = pd.to_datetime(df["ds"])
34
+
35
+ if reference_time is None:
36
+ reference_time = df["ds"].max()
37
+
38
+ df = df[df["ds"] <= reference_time].copy()
39
+ return df.sort_values(["unique_id", "ds"]).reset_index(drop=True)
40
+
41
+
42
+ def create_lag_features(df: pd.DataFrame, lags: list[int]) -> pd.DataFrame:
43
+ for lag in lags:
44
+ df[f"y_lag_{lag}"] = df.groupby("unique_id")["y"].shift(lag)
45
+ return df
46
+
47
+
48
+ def create_calendar_features(df: pd.DataFrame, features: list[str]) -> pd.DataFrame:
49
+ ds = pd.to_datetime(df["ds"])
50
+ feature_map = {
51
+ "dayofweek": lambda d: d.dt.dayofweek,
52
+ "month": lambda d: d.dt.month,
53
+ "quarter": lambda d: d.dt.quarter,
54
+ "year": lambda d: d.dt.year,
55
+ "dayofmonth": lambda d: d.dt.day,
56
+ "dayofyear": lambda d: d.dt.dayofyear,
57
+ "weekofyear": lambda d: d.dt.isocalendar().week,
58
+ "hour": lambda d: d.dt.hour,
59
+ "minute": lambda d: d.dt.minute,
60
+ "is_month_start": lambda d: d.dt.is_month_start.astype(int),
61
+ "is_month_end": lambda d: d.dt.is_month_end.astype(int),
62
+ "is_quarter_start": lambda d: d.dt.is_quarter_start.astype(int),
63
+ "is_quarter_end": lambda d: d.dt.is_quarter_end.astype(int),
64
+ }
65
+
66
+ for feature in features:
67
+ if feature in feature_map:
68
+ df[feature] = feature_map[feature](ds)
69
+
70
+ return df
71
+
72
+
73
+ def create_rolling_features(df: pd.DataFrame, windows: dict[int, list[str]]) -> pd.DataFrame:
74
+ for window, aggs in windows.items():
75
+ for agg in aggs:
76
+ if agg == "mean":
77
+ series = df.groupby("unique_id")["y"].transform(
78
+ lambda x, window=window: x.shift(1)
79
+ .rolling(window=window, min_periods=1)
80
+ .mean()
81
+ )
82
+ elif agg == "std":
83
+ series = df.groupby("unique_id")["y"].transform(
84
+ lambda x, window=window: x.shift(1)
85
+ .rolling(window=window, min_periods=1)
86
+ .std()
87
+ )
88
+ elif agg == "min":
89
+ series = df.groupby("unique_id")["y"].transform(
90
+ lambda x, window=window: x.shift(1)
91
+ .rolling(window=window, min_periods=1)
92
+ .min()
93
+ )
94
+ elif agg == "max":
95
+ series = df.groupby("unique_id")["y"].transform(
96
+ lambda x, window=window: x.shift(1)
97
+ .rolling(window=window, min_periods=1)
98
+ .max()
99
+ )
100
+ elif agg == "sum":
101
+ series = df.groupby("unique_id")["y"].transform(
102
+ lambda x, window=window: x.shift(1)
103
+ .rolling(window=window, min_periods=1)
104
+ .sum()
105
+ )
106
+ elif agg == "median":
107
+ series = df.groupby("unique_id")["y"].transform(
108
+ lambda x, window=window: x.shift(1)
109
+ .rolling(window=window, min_periods=1)
110
+ .median()
111
+ )
112
+ else:
113
+ continue
114
+ df[f"y_rolling_{agg}_{window}"] = series
115
+
116
+ return df
117
+
118
+
119
+ def create_observed_covariate_features(
120
+ df: pd.DataFrame,
121
+ observed_covariates: list[str],
122
+ ) -> pd.DataFrame:
123
+ for col in observed_covariates:
124
+ if col not in df.columns:
125
+ continue
126
+ lag_col = f"{col}_lag_1"
127
+ df[lag_col] = df.groupby("unique_id")[col].shift(1)
128
+ return df
129
+
130
+
131
+ def build_native_feature_matrix(
132
+ dataset: Any,
133
+ config: FeatureConfig,
134
+ reference_time: datetime | None = None,
135
+ ) -> FeatureMatrix:
136
+ df = prepare_panel(extract_panel(dataset), reference_time)
137
+
138
+ feature_cols: list[str] = []
139
+
140
+ if config.lags:
141
+ df = create_lag_features(df, config.lags)
142
+ feature_cols.extend([f"y_lag_{lag}" for lag in config.lags])
143
+
144
+ if config.calendar_features:
145
+ df = create_calendar_features(df, config.calendar_features)
146
+ feature_cols.extend(config.calendar_features)
147
+
148
+ if config.rolling_windows:
149
+ df = create_rolling_features(df, config.rolling_windows)
150
+ for window, aggs in config.rolling_windows.items():
151
+ for agg in aggs:
152
+ feature_cols.append(f"y_rolling_{agg}_{window}")
153
+
154
+ if config.observed_covariates:
155
+ df = create_observed_covariate_features(df, config.observed_covariates)
156
+ for col in config.observed_covariates:
157
+ lag_col = f"{col}_lag_1"
158
+ if lag_col in df.columns:
159
+ feature_cols.append(lag_col)
160
+
161
+ if config.known_covariates:
162
+ for col in config.known_covariates:
163
+ if col in df.columns and col not in feature_cols:
164
+ feature_cols.append(col)
165
+
166
+ if config.include_intercept:
167
+ df["intercept"] = 1.0
168
+ feature_cols.append("intercept")
169
+
170
+ config_hash = compute_feature_hash(config)
171
+
172
+ return FeatureMatrix(
173
+ data=df,
174
+ config_hash=config_hash,
175
+ target_col="y",
176
+ feature_cols=feature_cols,
177
+ known_covariates=config.known_covariates,
178
+ observed_covariates=config.observed_covariates,
179
+ )
@@ -0,0 +1,187 @@
1
+ """Feature factory for point-in-time safe feature engineering."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from dataclasses import dataclass, replace
7
+ from datetime import datetime
8
+ from typing import TYPE_CHECKING
9
+
10
+ import pandas as pd
11
+
12
+ from tsagentkit.features.covariates import CovariateManager
13
+ from tsagentkit.features.extra.native import (
14
+ build_native_feature_matrix,
15
+ create_calendar_features,
16
+ create_lag_features,
17
+ create_observed_covariate_features,
18
+ create_rolling_features,
19
+ )
20
+ from tsagentkit.features.matrix import FeatureMatrix
21
+ from tsagentkit.features.tsfeatures_adapter import build_tsfeatures_matrix
22
+ from tsagentkit.features.versioning import FeatureConfig
23
+
24
+ if TYPE_CHECKING:
25
+ from tsagentkit.series import TSDataset
26
+
27
+
28
+ @dataclass
29
+ class FeatureFactory:
30
+ """Point-in-time safe feature engineering for time series.
31
+
32
+ This factory creates features ensuring no lookahead bias by strictly
33
+ enforcing that features at time t only use information available at time t.
34
+
35
+ Attributes:
36
+ config: Feature configuration specifying what features to create
37
+ covariate_manager: Manager for handling known vs observed covariates
38
+
39
+ Example:
40
+ >>> config = FeatureConfig(
41
+ ... lags=[1, 7, 14],
42
+ ... calendar_features=["dayofweek", "month"],
43
+ ... rolling_windows={7: ["mean", "std"]},
44
+ ... )
45
+ >>> factory = FeatureFactory(config)
46
+ >>> matrix = factory.create_features(dataset)
47
+ >>> print(matrix.signature)
48
+ FeatureMatrix(c=abc123...,n=5)
49
+ """
50
+
51
+ config: FeatureConfig
52
+ covariate_manager: CovariateManager | None = None
53
+
54
+ def __post_init__(self) -> None:
55
+ """Initialize covariate manager if not provided."""
56
+ if self.covariate_manager is None:
57
+ self.covariate_manager = CovariateManager(
58
+ known_covariates=self.config.known_covariates,
59
+ observed_covariates=self.config.observed_covariates,
60
+ )
61
+
62
+ def create_features(
63
+ self,
64
+ dataset: TSDataset,
65
+ reference_time: datetime | None = None,
66
+ ) -> FeatureMatrix:
67
+ """Create features ensuring no lookahead bias."""
68
+ engine = self._resolve_engine()
69
+ config = self._resolved_config(engine)
70
+
71
+ if engine == "tsfeatures":
72
+ try:
73
+ return build_tsfeatures_matrix(
74
+ dataset=dataset,
75
+ config=config,
76
+ reference_time=reference_time,
77
+ )
78
+ except ImportError as exc:
79
+ if not config.allow_fallback:
80
+ raise
81
+ warnings.warn(
82
+ f"tsfeatures unavailable ({exc}); falling back to native features.",
83
+ RuntimeWarning,
84
+ stacklevel=2,
85
+ )
86
+ config = self._resolved_config("native")
87
+ return build_native_feature_matrix(
88
+ dataset=dataset,
89
+ config=config,
90
+ reference_time=reference_time,
91
+ )
92
+
93
+ return build_native_feature_matrix(
94
+ dataset=dataset,
95
+ config=config,
96
+ reference_time=reference_time,
97
+ )
98
+
99
+ def _resolve_engine(self) -> str:
100
+ """Resolve feature engine based on configuration.
101
+
102
+ When engine is "auto", defaults to tsfeatures. If tsfeatures is not
103
+ available, raises ImportError unless allow_fallback is True.
104
+ """
105
+ if self.config.engine == "auto":
106
+ try:
107
+ import tsfeatures # type: ignore # noqa: F401
108
+
109
+ return "tsfeatures"
110
+ except Exception as exc:
111
+ if not self.config.allow_fallback:
112
+ raise ImportError(
113
+ "tsfeatures is required but not installed. "
114
+ "Install with: pip install tsfeatures "
115
+ "Or set allow_fallback=True to use native features."
116
+ ) from exc
117
+ warnings.warn(
118
+ "tsfeatures is not installed; falling back to native features. "
119
+ "For reproducibility, install tsfeatures or explicitly set engine='native'.",
120
+ RuntimeWarning,
121
+ stacklevel=2,
122
+ )
123
+ return "native"
124
+ return self.config.engine
125
+
126
+ def _resolved_config(self, engine: str) -> FeatureConfig:
127
+ if engine == self.config.engine:
128
+ return self.config
129
+ return replace(self.config, engine=engine)
130
+
131
+ def _create_lag_features(
132
+ self,
133
+ df: pd.DataFrame,
134
+ lags: list[int],
135
+ ) -> pd.DataFrame:
136
+ return create_lag_features(df, lags)
137
+
138
+ def _create_calendar_features(
139
+ self,
140
+ df: pd.DataFrame,
141
+ features: list[str],
142
+ ) -> pd.DataFrame:
143
+ return create_calendar_features(df, features)
144
+
145
+ def _create_rolling_features(
146
+ self,
147
+ df: pd.DataFrame,
148
+ windows: dict[int, list[str]],
149
+ ) -> pd.DataFrame:
150
+ return create_rolling_features(df, windows)
151
+
152
+ def _create_observed_covariate_features(
153
+ self,
154
+ df: pd.DataFrame,
155
+ ) -> pd.DataFrame:
156
+ return create_observed_covariate_features(df, self.config.observed_covariates)
157
+
158
+ def get_feature_importance_template(
159
+ self,
160
+ ) -> dict[str, float]:
161
+ """Return a template for feature importance scores.
162
+
163
+ Returns:
164
+ Dict mapping feature names to 0.0 (template for importance scores)
165
+ """
166
+ importance: dict[str, float] = {}
167
+
168
+ for lag in self.config.lags:
169
+ importance[f"y_lag_{lag}"] = 0.0
170
+
171
+ for feature in self.config.calendar_features:
172
+ importance[feature] = 0.0
173
+
174
+ for window, aggs in self.config.rolling_windows.items():
175
+ for agg in aggs:
176
+ importance[f"y_rolling_{agg}_{window}"] = 0.0
177
+
178
+ for col in self.config.known_covariates:
179
+ importance[col] = 0.0
180
+
181
+ for col in self.config.observed_covariates:
182
+ importance[f"{col}_lag_1"] = 0.0
183
+
184
+ if self.config.include_intercept:
185
+ importance["intercept"] = 0.0
186
+
187
+ return importance
@@ -0,0 +1,159 @@
1
+ """FeatureMatrix dataclass for storing engineered features with provenance."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from datetime import UTC, datetime
7
+ from typing import TYPE_CHECKING
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ if TYPE_CHECKING:
13
+ pass
14
+
15
+
16
+ @dataclass(frozen=True)
17
+ class FeatureMatrix:
18
+ """Container for engineered features with provenance.
19
+
20
+ Attributes:
21
+ data: DataFrame with engineered features (includes unique_id, ds, target)
22
+ config_hash: Hash of the feature configuration used to create these features
23
+ target_col: Name of the target variable column
24
+ feature_cols: List of engineered feature column names
25
+ known_covariates: List of known covariate column names
26
+ observed_covariates: List of observed covariate column names
27
+ created_at: ISO 8601 timestamp of feature matrix creation
28
+
29
+ Example:
30
+ >>> matrix = FeatureMatrix(
31
+ ... data=df_with_features,
32
+ ... config_hash="abc123...",
33
+ ... feature_cols=["lag_7", "rolling_mean_30", "dayofweek"],
34
+ ... known_covariates=["holiday"],
35
+ ... observed_covariates=["promotion"],
36
+ ... )
37
+ >>> print(matrix.signature)
38
+ FeatureMatrix(c=abc123...,n=3)
39
+ """
40
+
41
+ data: pd.DataFrame
42
+ config_hash: str
43
+ target_col: str = "y"
44
+ feature_cols: list[str] = field(default_factory=list)
45
+ known_covariates: list[str] = field(default_factory=list)
46
+ observed_covariates: list[str] = field(default_factory=list)
47
+ created_at: str = field(
48
+ default_factory=lambda: datetime.now(UTC).isoformat()
49
+ )
50
+
51
+ def __post_init__(self) -> None:
52
+ """Validate the feature matrix after creation."""
53
+ # Validate required columns exist
54
+ required = ["unique_id", "ds", self.target_col]
55
+ missing = [col for col in required if col not in self.data.columns]
56
+ if missing:
57
+ raise ValueError(f"Missing required columns: {missing}")
58
+
59
+ # Validate feature columns exist in data
60
+ invalid_features = [col for col in self.feature_cols if col not in self.data.columns]
61
+ if invalid_features:
62
+ raise ValueError(f"Feature columns not in data: {invalid_features}")
63
+
64
+ # Validate covariate columns exist
65
+ invalid_known = [col for col in self.known_covariates if col not in self.data.columns]
66
+ if invalid_known:
67
+ raise ValueError(f"Known covariates not in data: {invalid_known}")
68
+
69
+ invalid_observed = [
70
+ col for col in self.observed_covariates if col not in self.data.columns
71
+ ]
72
+ if invalid_observed:
73
+ raise ValueError(f"Observed covariates not in data: {invalid_observed}")
74
+
75
+ @property
76
+ def signature(self) -> str:
77
+ """Return feature matrix signature for provenance.
78
+
79
+ Returns:
80
+ String signature like "FeatureMatrix(c=abc123...,n=5)"
81
+ """
82
+ return f"FeatureMatrix(c={self.config_hash},n={len(self.feature_cols)})"
83
+
84
+ def to_pandas(self) -> pd.DataFrame:
85
+ """Return the feature matrix as a pandas DataFrame.
86
+
87
+ Returns:
88
+ Copy of the underlying DataFrame
89
+ """
90
+ return self.data.copy()
91
+
92
+ def get_feature_data(self) -> pd.DataFrame:
93
+ """Get only the feature columns (excluding id, timestamp, target).
94
+
95
+ Returns:
96
+ DataFrame with only feature columns
97
+ """
98
+ return self.data[self.feature_cols].copy()
99
+
100
+ def get_target_data(self) -> pd.Series:
101
+ """Get the target variable.
102
+
103
+ Returns:
104
+ Series with target values
105
+ """
106
+ return self.data[self.target_col].copy()
107
+
108
+ def get_covariate_data(self, covariate_type: str | None = None) -> pd.DataFrame:
109
+ """Get covariate columns.
110
+
111
+ Args:
112
+ covariate_type: "known", "observed", or None (all covariates)
113
+
114
+ Returns:
115
+ DataFrame with covariate columns
116
+ """
117
+ if covariate_type == "known":
118
+ cols = self.known_covariates
119
+ elif covariate_type == "observed":
120
+ cols = self.observed_covariates
121
+ else:
122
+ cols = self.known_covariates + self.observed_covariates
123
+
124
+ if not cols:
125
+ return pd.DataFrame(index=self.data.index)
126
+
127
+ return self.data[cols].copy()
128
+
129
+ def validate(self) -> list[str]:
130
+ """Validate the feature matrix and return any issues.
131
+
132
+ Returns:
133
+ List of validation error messages (empty if valid)
134
+ """
135
+ issues = []
136
+
137
+ # Check for nulls in features
138
+ if self.feature_cols:
139
+ null_counts = self.data[self.feature_cols].isnull().sum()
140
+ if null_counts.any():
141
+ cols_with_nulls = null_counts[null_counts > 0].index.tolist()
142
+ issues.append(f"Features contain nulls: {cols_with_nulls}")
143
+
144
+ # Check for infinite values
145
+ if self.feature_cols:
146
+ numeric_cols = self.data[self.feature_cols].select_dtypes(include=["number"])
147
+ if numeric_cols is not None and not numeric_cols.empty:
148
+ inf_counts = np.isinf(numeric_cols).sum()
149
+ if inf_counts.any():
150
+ cols_with_inf = inf_counts[inf_counts > 0].index.tolist()
151
+ issues.append(f"Features contain infinite values: {cols_with_inf}")
152
+
153
+ # Check target exists and is numeric
154
+ if self.target_col not in self.data.columns:
155
+ issues.append(f"Target column '{self.target_col}' not found")
156
+ elif not pd.api.types.is_numeric_dtype(self.data[self.target_col]):
157
+ issues.append(f"Target column '{self.target_col}' is not numeric")
158
+
159
+ return issues
@@ -0,0 +1,115 @@
1
+ """tsfeatures adapter for statistical feature extraction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Callable
6
+ from datetime import datetime
7
+ from typing import Any
8
+
9
+ import pandas as pd
10
+
11
+ from tsagentkit.features.extra.native import (
12
+ create_observed_covariate_features,
13
+ extract_panel,
14
+ prepare_panel,
15
+ )
16
+ from tsagentkit.features.matrix import FeatureMatrix
17
+ from tsagentkit.features.versioning import FeatureConfig, compute_feature_hash
18
+
19
+
20
+ def _import_tsfeatures():
21
+ import tsfeatures # type: ignore
22
+
23
+ return tsfeatures
24
+
25
+
26
+ def _resolve_feature_fns(tsfeatures_mod: Any, names: list[str]) -> list[Callable] | None:
27
+ if not names:
28
+ return None
29
+ fns: list[Callable] = []
30
+ for name in names:
31
+ fn = getattr(tsfeatures_mod, name, None)
32
+ if fn is None or not callable(fn):
33
+ raise ValueError(f"Unknown tsfeatures function: {name}")
34
+ fns.append(fn)
35
+ return fns
36
+
37
+
38
+ def _resolve_tsfeatures_freq(dataset: Any, config: FeatureConfig) -> int | None:
39
+ if config.tsfeatures_freq is not None:
40
+ return config.tsfeatures_freq
41
+ task_spec = getattr(dataset, "task_spec", None)
42
+ if task_spec is None:
43
+ return None
44
+ return task_spec.season_length
45
+
46
+
47
+ def _prefix_if_conflict(df: pd.DataFrame, feature_cols: list[str]) -> tuple[pd.DataFrame, list[str]]:
48
+ reserved = {"unique_id", "ds", "y"}
49
+ conflicts = [col for col in feature_cols if col in reserved]
50
+ if not conflicts:
51
+ return df, feature_cols
52
+
53
+ rename_map = {col: f"tsf_{col}" for col in conflicts}
54
+ df = df.rename(columns=rename_map)
55
+ updated = [rename_map.get(col, col) for col in feature_cols]
56
+ return df, updated
57
+
58
+
59
+ def build_tsfeatures_matrix(
60
+ dataset: Any,
61
+ config: FeatureConfig,
62
+ reference_time: datetime | None = None,
63
+ ) -> FeatureMatrix:
64
+ df = prepare_panel(extract_panel(dataset), reference_time)
65
+
66
+ tsfeatures_mod = _import_tsfeatures()
67
+ fns = _resolve_feature_fns(tsfeatures_mod, config.tsfeatures_features)
68
+
69
+ freq = _resolve_tsfeatures_freq(dataset, config)
70
+ dict_freqs = config.tsfeatures_dict_freqs or None
71
+
72
+ features_df = tsfeatures_mod.tsfeatures(
73
+ df,
74
+ freq=freq,
75
+ features=fns,
76
+ dict_freqs=dict_freqs,
77
+ )
78
+
79
+ if "unique_id" not in features_df.columns:
80
+ if features_df.index.name == "unique_id":
81
+ features_df = features_df.reset_index()
82
+ else:
83
+ raise ValueError("tsfeatures output must include unique_id")
84
+
85
+ feature_cols = [c for c in features_df.columns if c != "unique_id"]
86
+ features_df, feature_cols = _prefix_if_conflict(features_df, feature_cols)
87
+
88
+ merged = df.merge(features_df, on="unique_id", how="left")
89
+
90
+ if config.observed_covariates:
91
+ merged = create_observed_covariate_features(merged, config.observed_covariates)
92
+ for col in config.observed_covariates:
93
+ lag_col = f"{col}_lag_1"
94
+ if lag_col in merged.columns:
95
+ feature_cols.append(lag_col)
96
+
97
+ if config.known_covariates:
98
+ for col in config.known_covariates:
99
+ if col in merged.columns and col not in feature_cols:
100
+ feature_cols.append(col)
101
+
102
+ if config.include_intercept:
103
+ merged["intercept"] = 1.0
104
+ feature_cols.append("intercept")
105
+
106
+ config_hash = compute_feature_hash(config)
107
+
108
+ return FeatureMatrix(
109
+ data=merged,
110
+ config_hash=config_hash,
111
+ target_col="y",
112
+ feature_cols=feature_cols,
113
+ known_covariates=config.known_covariates,
114
+ observed_covariates=config.observed_covariates,
115
+ )