tsagentkit 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. tsagentkit/__init__.py +126 -0
  2. tsagentkit/anomaly/__init__.py +130 -0
  3. tsagentkit/backtest/__init__.py +48 -0
  4. tsagentkit/backtest/engine.py +788 -0
  5. tsagentkit/backtest/metrics.py +244 -0
  6. tsagentkit/backtest/report.py +342 -0
  7. tsagentkit/calibration/__init__.py +136 -0
  8. tsagentkit/contracts/__init__.py +133 -0
  9. tsagentkit/contracts/errors.py +275 -0
  10. tsagentkit/contracts/results.py +418 -0
  11. tsagentkit/contracts/schema.py +44 -0
  12. tsagentkit/contracts/task_spec.py +300 -0
  13. tsagentkit/covariates/__init__.py +340 -0
  14. tsagentkit/eval/__init__.py +285 -0
  15. tsagentkit/features/__init__.py +20 -0
  16. tsagentkit/features/covariates.py +328 -0
  17. tsagentkit/features/extra/__init__.py +5 -0
  18. tsagentkit/features/extra/native.py +179 -0
  19. tsagentkit/features/factory.py +187 -0
  20. tsagentkit/features/matrix.py +159 -0
  21. tsagentkit/features/tsfeatures_adapter.py +115 -0
  22. tsagentkit/features/versioning.py +203 -0
  23. tsagentkit/hierarchy/__init__.py +39 -0
  24. tsagentkit/hierarchy/aggregation.py +62 -0
  25. tsagentkit/hierarchy/evaluator.py +400 -0
  26. tsagentkit/hierarchy/reconciliation.py +232 -0
  27. tsagentkit/hierarchy/structure.py +453 -0
  28. tsagentkit/models/__init__.py +182 -0
  29. tsagentkit/models/adapters/__init__.py +83 -0
  30. tsagentkit/models/adapters/base.py +321 -0
  31. tsagentkit/models/adapters/chronos.py +387 -0
  32. tsagentkit/models/adapters/moirai.py +256 -0
  33. tsagentkit/models/adapters/registry.py +171 -0
  34. tsagentkit/models/adapters/timesfm.py +440 -0
  35. tsagentkit/models/baselines.py +207 -0
  36. tsagentkit/models/sktime.py +307 -0
  37. tsagentkit/monitoring/__init__.py +51 -0
  38. tsagentkit/monitoring/alerts.py +302 -0
  39. tsagentkit/monitoring/coverage.py +203 -0
  40. tsagentkit/monitoring/drift.py +330 -0
  41. tsagentkit/monitoring/report.py +214 -0
  42. tsagentkit/monitoring/stability.py +275 -0
  43. tsagentkit/monitoring/triggers.py +423 -0
  44. tsagentkit/qa/__init__.py +347 -0
  45. tsagentkit/router/__init__.py +37 -0
  46. tsagentkit/router/bucketing.py +489 -0
  47. tsagentkit/router/fallback.py +132 -0
  48. tsagentkit/router/plan.py +23 -0
  49. tsagentkit/router/router.py +271 -0
  50. tsagentkit/series/__init__.py +26 -0
  51. tsagentkit/series/alignment.py +206 -0
  52. tsagentkit/series/dataset.py +449 -0
  53. tsagentkit/series/sparsity.py +261 -0
  54. tsagentkit/series/validation.py +393 -0
  55. tsagentkit/serving/__init__.py +39 -0
  56. tsagentkit/serving/orchestration.py +943 -0
  57. tsagentkit/serving/packaging.py +73 -0
  58. tsagentkit/serving/provenance.py +317 -0
  59. tsagentkit/serving/tsfm_cache.py +214 -0
  60. tsagentkit/skill/README.md +135 -0
  61. tsagentkit/skill/__init__.py +8 -0
  62. tsagentkit/skill/recipes.md +429 -0
  63. tsagentkit/skill/tool_map.md +21 -0
  64. tsagentkit/time/__init__.py +134 -0
  65. tsagentkit/utils/__init__.py +20 -0
  66. tsagentkit/utils/quantiles.py +83 -0
  67. tsagentkit/utils/signature.py +47 -0
  68. tsagentkit/utils/temporal.py +41 -0
  69. tsagentkit-1.0.2.dist-info/METADATA +371 -0
  70. tsagentkit-1.0.2.dist-info/RECORD +72 -0
  71. tsagentkit-1.0.2.dist-info/WHEEL +4 -0
  72. tsagentkit-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,393 @@
1
+ """Data validation schemas.
2
+
3
+ Provides validation functions to check input data against the required
4
+ schema for time series forecasting.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from typing import Any
10
+
11
+ import pandas as pd
12
+
13
+ from tsagentkit.contracts.errors import (
14
+ EContractDuplicateKey,
15
+ EContractInvalid,
16
+ EContractInvalidType,
17
+ EContractMissingColumn,
18
+ EContractUnsorted,
19
+ EFreqInferFail,
20
+ )
21
+ from tsagentkit.contracts.results import ValidationReport
22
+ from tsagentkit.contracts.task_spec import PanelContract
23
+
24
+
25
+ def normalize_panel_columns(
26
+ df: pd.DataFrame,
27
+ contract: PanelContract,
28
+ ) -> tuple[pd.DataFrame, dict[str, str] | None]:
29
+ """Normalize panel columns to the canonical contract names.
30
+
31
+ Returns:
32
+ (normalized_df, column_map) where column_map maps original names to
33
+ canonical names. If no normalization is needed, column_map is None.
34
+ """
35
+ default_contract = PanelContract()
36
+ mapping = {
37
+ contract.unique_id_col: default_contract.unique_id_col,
38
+ contract.ds_col: default_contract.ds_col,
39
+ contract.y_col: default_contract.y_col,
40
+ }
41
+
42
+ if mapping == {
43
+ default_contract.unique_id_col: default_contract.unique_id_col,
44
+ default_contract.ds_col: default_contract.ds_col,
45
+ default_contract.y_col: default_contract.y_col,
46
+ }:
47
+ return df, None
48
+
49
+ missing = [src for src in mapping if src not in df.columns]
50
+ if missing:
51
+ raise ValueError(f"Missing required columns: {missing}")
52
+
53
+ return df.rename(columns=mapping), mapping
54
+
55
+
56
+ def validate_contract(
57
+ data: Any,
58
+ panel_contract: PanelContract | None = None,
59
+ apply_aggregation: bool = False,
60
+ return_data: bool = False,
61
+ ) -> ValidationReport | tuple[ValidationReport, pd.DataFrame]:
62
+ """Validate input data against the required schema.
63
+
64
+ Args:
65
+ data: Input data (DataFrame or convertible)
66
+ panel_contract: Optional PanelContract specifying column names and aggregation
67
+ apply_aggregation: Whether to aggregate duplicates when allowed by contract
68
+ return_data: If True, return (ValidationReport, normalized_df)
69
+
70
+ Returns:
71
+ ValidationReport (and optionally normalized DataFrame)
72
+ """
73
+ errors: list[dict[str, Any]] = []
74
+ warnings: list[dict[str, Any]] = []
75
+ stats: dict[str, Any] = {}
76
+
77
+ contract = panel_contract or PanelContract()
78
+ uid_col = contract.unique_id_col
79
+ ds_col = contract.ds_col
80
+ y_col = contract.y_col
81
+
82
+ # Convert to DataFrame if needed
83
+ df = _convert_to_dataframe(data)
84
+ if df is None:
85
+ errors.append({
86
+ "code": EContractInvalid.error_code,
87
+ "message": "Data must be a DataFrame or convertible to DataFrame",
88
+ "context": {"type": type(data).__name__},
89
+ })
90
+ report = ValidationReport(valid=False, errors=errors, warnings=warnings)
91
+ return (report, pd.DataFrame()) if return_data else report
92
+
93
+ # Check required columns
94
+ required_cols = {uid_col, ds_col, y_col}
95
+ missing_cols = required_cols - set(df.columns)
96
+ if missing_cols:
97
+ errors.append({
98
+ "code": EContractMissingColumn.error_code,
99
+ "message": f"Missing required columns: {sorted(missing_cols)}",
100
+ "context": {
101
+ "missing": sorted(missing_cols),
102
+ "available": sorted(df.columns.tolist()),
103
+ },
104
+ })
105
+ report = ValidationReport(
106
+ valid=False,
107
+ errors=errors,
108
+ warnings=warnings,
109
+ stats={"n_rows": len(df), "n_series": 0},
110
+ )
111
+ return (report, df) if return_data else report
112
+
113
+ # Normalize types
114
+ type_errors = _validate_column_types(df, uid_col, ds_col, y_col)
115
+ errors.extend(type_errors)
116
+
117
+ # Check for empty DataFrame
118
+ if len(df) == 0:
119
+ errors.append({
120
+ "code": EContractInvalid.error_code,
121
+ "message": "DataFrame is empty (no rows)",
122
+ "context": {"n_rows": 0},
123
+ })
124
+
125
+ # Aggregate duplicates if allowed
126
+ if contract.aggregation != "reject" and apply_aggregation:
127
+ df, agg_warnings = _aggregate_duplicates(df, uid_col, ds_col, y_col, contract.aggregation)
128
+ warnings.extend(agg_warnings)
129
+
130
+ # Check for duplicates (post-aggregation if any)
131
+ duplicate_errors = _validate_no_duplicates(df, uid_col, ds_col, contract.aggregation)
132
+ errors.extend(duplicate_errors)
133
+
134
+ # Check sorting
135
+ sort_errors = _validate_sorted(df, uid_col, ds_col)
136
+ errors.extend(sort_errors)
137
+
138
+ # Try to infer frequency
139
+ freq_warnings, freq_stats = _validate_frequency(df, uid_col, ds_col)
140
+ warnings.extend(freq_warnings)
141
+ stats.update(freq_stats)
142
+
143
+ # Collect statistics (only if y is numeric)
144
+ if not any(
145
+ e["code"] == EContractInvalidType.error_code
146
+ and e.get("context", {}).get("column") == y_col
147
+ for e in errors
148
+ ):
149
+ stats.update(_compute_stats(df, uid_col, ds_col, y_col))
150
+
151
+ valid = len(errors) == 0
152
+ report = ValidationReport(valid=valid, errors=errors, warnings=warnings, stats=stats)
153
+
154
+ return (report, df) if return_data else report
155
+
156
+
157
+ def _convert_to_dataframe(data: Any) -> pd.DataFrame | None:
158
+ if isinstance(data, pd.DataFrame):
159
+ return data.copy()
160
+
161
+ try:
162
+ if hasattr(data, "to_pandas"):
163
+ return data.to_pandas()
164
+ if isinstance(data, dict):
165
+ return pd.DataFrame(data)
166
+ if isinstance(data, list) and len(data) > 0:
167
+ return pd.DataFrame(data)
168
+ except Exception:
169
+ pass
170
+
171
+ return None
172
+
173
+
174
+ def _validate_column_types(
175
+ df: pd.DataFrame,
176
+ uid_col: str,
177
+ ds_col: str,
178
+ y_col: str,
179
+ ) -> list[dict[str, Any]]:
180
+ errors: list[dict[str, Any]] = []
181
+
182
+ if not pd.api.types.is_string_dtype(df[uid_col]):
183
+ try:
184
+ df[uid_col] = df[uid_col].astype(str)
185
+ except Exception as e:
186
+ errors.append({
187
+ "code": EContractInvalidType.error_code,
188
+ "message": f"Column '{uid_col}' must be convertible to string",
189
+ "context": {
190
+ "column": uid_col,
191
+ "actual_type": str(df[uid_col].dtype),
192
+ "error": str(e),
193
+ },
194
+ })
195
+
196
+ if not pd.api.types.is_datetime64_any_dtype(df[ds_col]):
197
+ try:
198
+ df[ds_col] = pd.to_datetime(df[ds_col], format="mixed")
199
+ except Exception as e:
200
+ errors.append({
201
+ "code": EContractInvalidType.error_code,
202
+ "message": f"Column '{ds_col}' must be convertible to datetime",
203
+ "context": {
204
+ "column": ds_col,
205
+ "actual_type": str(df[ds_col].dtype),
206
+ "error": str(e),
207
+ },
208
+ })
209
+
210
+ if len(df) > 0 and not pd.api.types.is_numeric_dtype(df[y_col]):
211
+ errors.append({
212
+ "code": EContractInvalidType.error_code,
213
+ "message": f"Column '{y_col}' must be numeric",
214
+ "context": {
215
+ "column": y_col,
216
+ "actual_type": str(df[y_col].dtype),
217
+ },
218
+ })
219
+
220
+ return errors
221
+
222
+
223
+ def _aggregate_duplicates(
224
+ df: pd.DataFrame,
225
+ uid_col: str,
226
+ ds_col: str,
227
+ y_col: str,
228
+ aggregation: str,
229
+ ) -> tuple[pd.DataFrame, list[dict[str, Any]]]:
230
+ warnings: list[dict[str, Any]] = []
231
+
232
+ duplicates = df.duplicated(subset=[uid_col, ds_col], keep=False)
233
+ if not duplicates.any():
234
+ return df, warnings
235
+
236
+ if aggregation == "reject":
237
+ return df, warnings
238
+
239
+ agg_map = {
240
+ "sum": "sum",
241
+ "mean": "mean",
242
+ "median": "median",
243
+ "last": "last",
244
+ }
245
+ if aggregation not in agg_map:
246
+ return df, warnings
247
+
248
+ grouped = df.groupby([uid_col, ds_col], as_index=False)
249
+ df = grouped.agg({y_col: agg_map[aggregation]})
250
+
251
+ warnings.append({
252
+ "code": "W_CONTRACT_AGGREGATED",
253
+ "message": f"Aggregated duplicate keys using '{aggregation}'",
254
+ "context": {"aggregation": aggregation},
255
+ })
256
+
257
+ return df, warnings
258
+
259
+
260
+ def _validate_no_duplicates(
261
+ df: pd.DataFrame,
262
+ uid_col: str,
263
+ ds_col: str,
264
+ aggregation: str,
265
+ ) -> list[dict[str, Any]]:
266
+ errors: list[dict[str, Any]] = []
267
+
268
+ duplicates = df.duplicated(subset=[uid_col, ds_col], keep=False)
269
+ if duplicates.any():
270
+ dup_df = df[duplicates]
271
+ dup_keys = dup_df[[uid_col, ds_col]].drop_duplicates()
272
+
273
+ errors.append({
274
+ "code": EContractDuplicateKey.error_code,
275
+ "message": f"Found {len(dup_keys)} duplicate ({uid_col}, {ds_col}) pairs",
276
+ "context": {
277
+ "num_duplicates": int(duplicates.sum()),
278
+ "duplicate_keys": dup_keys.head(10).to_dict("records"),
279
+ "aggregation": aggregation,
280
+ },
281
+ })
282
+
283
+ return errors
284
+
285
+
286
+ def _validate_sorted(
287
+ df: pd.DataFrame,
288
+ uid_col: str,
289
+ ds_col: str,
290
+ ) -> list[dict[str, Any]]:
291
+ errors: list[dict[str, Any]] = []
292
+
293
+ expected_order = df.sort_values([uid_col, ds_col]).index
294
+ if not df.index.equals(expected_order):
295
+ errors.append({
296
+ "code": EContractUnsorted.error_code,
297
+ "message": f"Data must be sorted by ({uid_col}, {ds_col})",
298
+ "context": {
299
+ "suggestion": f"Use df.sort_values(['{uid_col}', '{ds_col}']) to fix",
300
+ },
301
+ })
302
+
303
+ return errors
304
+
305
+
306
+ def _validate_frequency(
307
+ df: pd.DataFrame,
308
+ uid_col: str,
309
+ ds_col: str,
310
+ ) -> tuple[list[dict[str, Any]], dict[str, Any]]:
311
+ warnings: list[dict[str, Any]] = []
312
+ stats: dict[str, Any] = {}
313
+
314
+ freq_counts: dict[str, int] = {}
315
+
316
+ for uid in df[uid_col].unique():
317
+ series = df[df[uid_col] == uid].sort_values(ds_col)
318
+ if len(series) < 2:
319
+ continue
320
+
321
+ try:
322
+ freq = pd.infer_freq(series[ds_col])
323
+ if freq:
324
+ freq_counts[freq] = freq_counts.get(freq, 0) + 1
325
+ except Exception:
326
+ pass
327
+
328
+ if freq_counts:
329
+ inferred_freq = max(freq_counts, key=freq_counts.get)
330
+ stats["inferred_freq"] = inferred_freq
331
+ stats["freq_counts"] = freq_counts
332
+ else:
333
+ warnings.append({
334
+ "code": EFreqInferFail.error_code,
335
+ "message": "Could not infer frequency from data",
336
+ "context": {
337
+ "suggestion": "Specify frequency explicitly in TaskSpec",
338
+ },
339
+ })
340
+
341
+ return warnings, stats
342
+
343
+
344
+ def _compute_stats(
345
+ df: pd.DataFrame,
346
+ uid_col: str,
347
+ ds_col: str,
348
+ y_col: str,
349
+ ) -> dict[str, Any]:
350
+ stats: dict[str, Any] = {
351
+ "n_rows": len(df),
352
+ "n_series": df[uid_col].nunique(),
353
+ }
354
+
355
+ if pd.api.types.is_datetime64_any_dtype(df[ds_col]):
356
+ stats["date_range"] = {
357
+ "start": df[ds_col].min().isoformat() if not df[ds_col].empty else None,
358
+ "end": df[ds_col].max().isoformat() if not df[ds_col].empty else None,
359
+ }
360
+ else:
361
+ stats["date_range"] = {"start": None, "end": None}
362
+
363
+ if len(df) > 0 and pd.api.types.is_numeric_dtype(df[y_col]):
364
+ stats["y_stats"] = {
365
+ "mean": float(df[y_col].mean()),
366
+ "std": float(df[y_col].std()),
367
+ "min": float(df[y_col].min()),
368
+ "max": float(df[y_col].max()),
369
+ "missing": int(df[y_col].isna().sum()),
370
+ }
371
+ else:
372
+ stats["y_stats"] = {
373
+ "mean": None,
374
+ "std": None,
375
+ "min": None,
376
+ "max": None,
377
+ "missing": int(df[y_col].isna().sum()) if y_col in df.columns else 0,
378
+ }
379
+
380
+ if len(df) > 0:
381
+ series_lengths = df.groupby(uid_col).size()
382
+ stats["series_lengths"] = {
383
+ "min": int(series_lengths.min()),
384
+ "max": int(series_lengths.max()),
385
+ "mean": float(series_lengths.mean()),
386
+ }
387
+ else:
388
+ stats["series_lengths"] = {"min": 0, "max": 0, "mean": 0.0}
389
+
390
+ return stats
391
+
392
+
393
+ __all__ = ["validate_contract", "normalize_panel_columns"]
@@ -0,0 +1,39 @@
1
+ """Serving module for tsagentkit.
2
+
3
+ Provides batch inference orchestration and artifact packaging.
4
+ """
5
+
6
+ from tsagentkit.contracts import RunArtifact
7
+
8
+ from .orchestration import MonitoringConfig, run_forecast
9
+ from .packaging import package_run
10
+ from .provenance import (
11
+ StructuredLogger,
12
+ compute_config_signature,
13
+ compute_data_signature,
14
+ create_provenance,
15
+ format_event_json,
16
+ log_event,
17
+ )
18
+ from .tsfm_cache import TSFMModelCache, clear_tsfm_cache, get_tsfm_model
19
+
20
+ __all__ = [
21
+ # Orchestration
22
+ "run_forecast",
23
+ "MonitoringConfig",
24
+ # Packaging
25
+ "package_run",
26
+ "RunArtifact",
27
+ # Provenance
28
+ "compute_data_signature",
29
+ "compute_config_signature",
30
+ "create_provenance",
31
+ # Structured Logging
32
+ "log_event",
33
+ "format_event_json",
34
+ "StructuredLogger",
35
+ # TSFM Cache
36
+ "TSFMModelCache",
37
+ "get_tsfm_model",
38
+ "clear_tsfm_cache",
39
+ ]