tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tsam/config.py ADDED
@@ -0,0 +1,852 @@
1
+ """Configuration classes for tsam aggregation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from dataclasses import dataclass, field
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ import pandas as pd
10
+
11
+ if TYPE_CHECKING:
12
+ from tsam.result import AggregationResult
13
+
14
+ # Type aliases for clarity
15
+ ClusterMethod = Literal[
16
+ "averaging",
17
+ "kmeans",
18
+ "kmedoids",
19
+ "kmaxoids",
20
+ "hierarchical",
21
+ "contiguous",
22
+ ]
23
+
24
+ RepresentationMethod = Literal[
25
+ "mean",
26
+ "medoid",
27
+ "maxoid",
28
+ "distribution",
29
+ "distribution_minmax",
30
+ "minmax_mean",
31
+ ]
32
+
33
+ ExtremeMethod = Literal[
34
+ "append",
35
+ "replace",
36
+ "new_cluster",
37
+ ]
38
+
39
+ Solver = Literal["highs", "cbc", "gurobi", "cplex"]
40
+
41
+
42
+ @dataclass(frozen=True)
43
+ class ClusterConfig:
44
+ """Configuration for the clustering algorithm.
45
+
46
+ Parameters
47
+ ----------
48
+ method : str, default "hierarchical"
49
+ Clustering algorithm to use:
50
+ - "averaging": Sequential averaging of periods
51
+ - "kmeans": K-means clustering (fast, uses centroids)
52
+ - "kmedoids": K-medoids using MILP optimization (uses actual periods)
53
+ - "kmaxoids": K-maxoids (selects most dissimilar periods)
54
+ - "hierarchical": Agglomerative hierarchical clustering
55
+ - "contiguous": Hierarchical with temporal contiguity constraint
56
+
57
+ representation : str, optional
58
+ How to represent cluster centers:
59
+ - "mean": Centroid (average of cluster members)
60
+ - "medoid": Actual period closest to centroid
61
+ - "maxoid": Actual period most dissimilar to others
62
+ - "distribution": Preserve value distribution (duration curve)
63
+ - "distribution_minmax": Distribution + preserve min/max values
64
+ - "minmax_mean": Combine min/max/mean per timestep
65
+
66
+ Default depends on method:
67
+ - "mean" for averaging, kmeans
68
+ - "medoid" for kmedoids, hierarchical, contiguous
69
+ - "maxoid" for kmaxoids
70
+
71
+ weights : dict[str, float], optional
72
+ Per-column weights for clustering distance calculation.
73
+ Higher weight = more influence on clustering.
74
+ Example: {"demand": 2.0, "solar": 1.0}
75
+
76
+ normalize_column_means : bool, default False
77
+ Normalize all columns to the same mean before clustering.
78
+ Useful when columns have very different scales.
79
+
80
+ use_duration_curves : bool, default False
81
+ Sort values within each period before clustering.
82
+ Matches periods by their value distribution rather than timing.
83
+
84
+ include_period_sums : bool, default False
85
+ Include period totals as additional features for clustering.
86
+ Helps preserve total energy/load values.
87
+
88
+ solver : str, default "highs"
89
+ MILP solver for kmedoids method.
90
+ Options: "highs" (default, open source), "cbc", "gurobi", "cplex"
91
+ """
92
+
93
+ method: ClusterMethod = "hierarchical"
94
+ representation: RepresentationMethod | None = None
95
+ weights: dict[str, float] | None = None
96
+ normalize_column_means: bool = False
97
+ use_duration_curves: bool = False
98
+ include_period_sums: bool = False
99
+ solver: Solver = "highs"
100
+
101
+ def get_representation(self) -> RepresentationMethod:
102
+ """Get the representation method, using default if not specified."""
103
+ if self.representation is not None:
104
+ return self.representation
105
+
106
+ # Default representation based on clustering method
107
+ defaults: dict[ClusterMethod, RepresentationMethod] = {
108
+ "averaging": "mean",
109
+ "kmeans": "mean",
110
+ "kmedoids": "medoid",
111
+ "kmaxoids": "maxoid",
112
+ "hierarchical": "medoid",
113
+ "contiguous": "medoid",
114
+ }
115
+ return defaults.get(self.method, "mean")
116
+
117
+ def to_dict(self) -> dict[str, Any]:
118
+ """Convert to dictionary for JSON serialization."""
119
+ result: dict[str, Any] = {"method": self.method}
120
+ if self.representation is not None:
121
+ result["representation"] = self.representation
122
+ if self.weights is not None:
123
+ result["weights"] = self.weights
124
+ if self.normalize_column_means:
125
+ result["normalize_column_means"] = self.normalize_column_means
126
+ if self.use_duration_curves:
127
+ result["use_duration_curves"] = self.use_duration_curves
128
+ if self.include_period_sums:
129
+ result["include_period_sums"] = self.include_period_sums
130
+ if self.solver != "highs":
131
+ result["solver"] = self.solver
132
+ return result
133
+
134
+ @classmethod
135
+ def from_dict(cls, data: dict) -> ClusterConfig:
136
+ """Create from dictionary (e.g., loaded from JSON)."""
137
+ return cls(
138
+ method=data.get("method", "hierarchical"),
139
+ representation=data.get("representation"),
140
+ weights=data.get("weights"),
141
+ normalize_column_means=data.get("normalize_column_means", False),
142
+ use_duration_curves=data.get("use_duration_curves", False),
143
+ include_period_sums=data.get("include_period_sums", False),
144
+ solver=data.get("solver", "highs"),
145
+ )
146
+
147
+
148
+ @dataclass(frozen=True)
149
+ class SegmentConfig:
150
+ """Configuration for temporal segmentation within periods.
151
+
152
+ Segmentation reduces the temporal resolution within each typical period,
153
+ grouping consecutive timesteps into segments.
154
+
155
+ Parameters
156
+ ----------
157
+ n_segments : int
158
+ Number of segments per period.
159
+ Must be less than or equal to the number of timesteps per period.
160
+ Example: period_duration=24 with hourly data has 24 timesteps,
161
+ so n_segments could be 1-24.
162
+
163
+ representation : str, default "mean"
164
+ How to represent each segment:
165
+ - "mean": Average value of timesteps in segment
166
+ - "medoid": Actual timestep closest to segment mean
167
+ - "distribution": Preserve distribution within segment
168
+ """
169
+
170
+ n_segments: int
171
+ representation: RepresentationMethod = "mean"
172
+
173
+ def __post_init__(self) -> None:
174
+ if self.n_segments < 1:
175
+ raise ValueError(f"n_segments must be positive, got {self.n_segments}")
176
+ # Note: Upper bound validation (n_segments <= timesteps_per_period)
177
+ # is performed in api.aggregate() when period_duration is known.
178
+
179
+ def to_dict(self) -> dict[str, Any]:
180
+ """Convert to dictionary for JSON serialization."""
181
+ result: dict[str, Any] = {"n_segments": self.n_segments}
182
+ if self.representation != "mean":
183
+ result["representation"] = self.representation
184
+ return result
185
+
186
+ @classmethod
187
+ def from_dict(cls, data: dict) -> SegmentConfig:
188
+ """Create from dictionary (e.g., loaded from JSON)."""
189
+ return cls(
190
+ n_segments=data["n_segments"],
191
+ representation=data.get("representation", "mean"),
192
+ )
193
+
194
+
195
+ @dataclass(frozen=True)
196
+ class ClusteringResult:
197
+ """Clustering assignments that can be saved/loaded and applied to new data.
198
+
199
+ This class bundles all clustering and segmentation assignments from an
200
+ aggregation, enabling:
201
+ - Simple IO via to_json()/from_json()
202
+ - Applying the same clustering to different datasets via apply()
203
+ - Preserving the parameters used to create the clustering
204
+
205
+ Get this from `result.clustering` after running an aggregation.
206
+
207
+ Transfer Fields (used by apply())
208
+ ----------------------------------
209
+ period_duration : float
210
+ Length of each period in hours (e.g., 24 for daily periods).
211
+
212
+ cluster_assignments : tuple[int, ...]
213
+ Cluster assignments for each original period.
214
+ Length equals the number of original periods in the data.
215
+
216
+ n_timesteps_per_period : int
217
+ Number of timesteps in each period. Used to validate that new data
218
+ has compatible structure when calling apply().
219
+
220
+ cluster_centers : tuple[int, ...], optional
221
+ Indices of original periods used as cluster centers.
222
+ If not provided, centers will be recalculated when applying.
223
+
224
+ segment_assignments : tuple[tuple[int, ...], ...], optional
225
+ Segment assignments per timestep, per typical period.
226
+ Only present if segmentation was used.
227
+
228
+ segment_durations : tuple[tuple[int, ...], ...], optional
229
+ Duration (in timesteps) per segment, per typical period.
230
+ Required if segment_assignments is present.
231
+
232
+ segment_centers : tuple[tuple[int, ...], ...], optional
233
+ Indices of timesteps used as segment centers, per typical period.
234
+ Required for fully deterministic segment replication.
235
+
236
+ preserve_column_means : bool, default True
237
+ Whether to rescale typical periods to match original data means.
238
+
239
+ rescale_exclude_columns : tuple[str, ...], optional
240
+ Column names to exclude from rescaling. Useful for binary columns.
241
+
242
+ representation : str, default "medoid"
243
+ How to compute typical periods from cluster members.
244
+
245
+ segment_representation : str, optional
246
+ How to compute segment values. Only used if segmentation is present.
247
+
248
+ temporal_resolution : float, optional
249
+ Time resolution of input data in hours. If not provided, inferred.
250
+
251
+ Reference Fields (for documentation, not used by apply())
252
+ ---------------------------------------------------------
253
+ cluster_config : ClusterConfig, optional
254
+ Clustering configuration used to create this result.
255
+
256
+ segment_config : SegmentConfig, optional
257
+ Segmentation configuration used to create this result.
258
+
259
+ extremes_config : ExtremeConfig, optional
260
+ Extreme period configuration used to create this result.
261
+
262
+ Examples
263
+ --------
264
+ >>> # Get clustering from a result
265
+ >>> result = tsam.aggregate(df_wind, n_clusters=8)
266
+ >>> clustering = result.clustering
267
+
268
+ >>> # Save to file
269
+ >>> clustering.to_json("clustering.json")
270
+
271
+ >>> # Load from file
272
+ >>> clustering = ClusteringResult.from_json("clustering.json")
273
+
274
+ >>> # Apply to new data
275
+ >>> result2 = clustering.apply(df_all)
276
+ """
277
+
278
+ # === Transfer fields (used by apply()) ===
279
+ period_duration: float
280
+ cluster_assignments: tuple[int, ...]
281
+ n_timesteps_per_period: int
282
+ cluster_centers: tuple[int, ...] | None = None
283
+ segment_assignments: tuple[tuple[int, ...], ...] | None = None
284
+ segment_durations: tuple[tuple[int, ...], ...] | None = None
285
+ segment_centers: tuple[tuple[int, ...], ...] | None = None
286
+ preserve_column_means: bool = True
287
+ rescale_exclude_columns: tuple[str, ...] | None = None
288
+ representation: RepresentationMethod = "medoid"
289
+ segment_representation: RepresentationMethod | None = None
290
+ temporal_resolution: float | None = None
291
+ extreme_cluster_indices: tuple[int, ...] | None = None
292
+
293
+ # === Reference fields (for documentation, not used by apply()) ===
294
+ cluster_config: ClusterConfig | None = None
295
+ segment_config: SegmentConfig | None = None
296
+ extremes_config: ExtremeConfig | None = None
297
+
298
+ def __post_init__(self) -> None:
299
+ if self.segment_assignments is not None and self.segment_durations is None:
300
+ raise ValueError(
301
+ "segment_durations must be provided when segment_assignments is specified"
302
+ )
303
+ if self.segment_durations is not None and self.segment_assignments is None:
304
+ raise ValueError(
305
+ "segment_assignments must be provided when segment_durations is specified"
306
+ )
307
+ if self.segment_centers is not None and self.segment_assignments is None:
308
+ raise ValueError(
309
+ "segment_assignments must be provided when segment_centers is specified"
310
+ )
311
+
312
+ @property
313
+ def n_clusters(self) -> int:
314
+ """Number of clusters (typical periods)."""
315
+ return len(set(self.cluster_assignments))
316
+
317
+ @property
318
+ def n_original_periods(self) -> int:
319
+ """Number of original periods in the source data."""
320
+ return len(self.cluster_assignments)
321
+
322
+ @property
323
+ def n_segments(self) -> int | None:
324
+ """Number of segments per period, or None if no segmentation."""
325
+ if self.segment_durations is None:
326
+ return None
327
+ return len(self.segment_durations[0])
328
+
329
+ def __repr__(self) -> str:
330
+ has_centers = self.cluster_centers is not None
331
+ has_segments = self.segment_assignments is not None
332
+
333
+ lines = [
334
+ "ClusteringResult(",
335
+ f" period_duration={self.period_duration},",
336
+ f" n_original_periods={self.n_original_periods},",
337
+ f" n_clusters={self.n_clusters},",
338
+ f" has_cluster_centers={has_centers},",
339
+ ]
340
+
341
+ if has_segments:
342
+ n_segments = len(self.segment_durations[0]) if self.segment_durations else 0
343
+ n_timesteps = (
344
+ len(self.segment_assignments[0]) if self.segment_assignments else 0
345
+ )
346
+ has_seg_centers = self.segment_centers is not None
347
+ lines.append(f" n_segments={n_segments},")
348
+ lines.append(f" n_timesteps_per_period={n_timesteps},")
349
+ lines.append(f" has_segment_centers={has_seg_centers},")
350
+
351
+ lines.append(")")
352
+ return "\n".join(lines)
353
+
354
+ def to_dataframe(self) -> pd.DataFrame:
355
+ """Convert to a readable DataFrame.
356
+
357
+ Returns a DataFrame with one row per original period showing
358
+ cluster assignments.
359
+
360
+ Returns
361
+ -------
362
+ pd.DataFrame
363
+ DataFrame with cluster_assignments indexed by original period.
364
+ """
365
+ df = pd.DataFrame(
366
+ {"cluster": list(self.cluster_assignments)},
367
+ index=pd.RangeIndex(len(self.cluster_assignments), name="original_period"),
368
+ )
369
+
370
+ if self.cluster_centers is not None:
371
+ center_set = set(self.cluster_centers)
372
+ df["is_center"] = [
373
+ i in center_set for i in range(len(self.cluster_assignments))
374
+ ]
375
+
376
+ return df
377
+
378
+ def segment_dataframe(self) -> pd.DataFrame | None:
379
+ """Get segment structure as a readable DataFrame.
380
+
381
+ Returns a DataFrame showing segment durations per typical period.
382
+ Returns None if no segmentation is defined.
383
+
384
+ Returns
385
+ -------
386
+ pd.DataFrame | None
387
+ DataFrame with typical periods as rows and segments as columns,
388
+ values are segment durations in timesteps.
389
+ """
390
+ if self.segment_durations is None:
391
+ return None
392
+
393
+ n_clusters = len(self.segment_durations)
394
+ n_segments = len(self.segment_durations[0])
395
+
396
+ return pd.DataFrame(
397
+ list(self.segment_durations),
398
+ index=pd.RangeIndex(n_clusters, name="cluster"),
399
+ columns=pd.RangeIndex(n_segments, name="segment"),
400
+ )
401
+
402
+ def to_dict(self) -> dict[str, Any]:
403
+ """Convert to dictionary for JSON serialization."""
404
+ # Transfer fields (always included)
405
+ result: dict[str, Any] = {
406
+ "period_duration": self.period_duration,
407
+ "cluster_assignments": list(self.cluster_assignments),
408
+ "n_timesteps_per_period": self.n_timesteps_per_period,
409
+ "preserve_column_means": self.preserve_column_means,
410
+ "representation": self.representation,
411
+ }
412
+ if self.cluster_centers is not None:
413
+ result["cluster_centers"] = list(self.cluster_centers)
414
+ if self.segment_assignments is not None:
415
+ result["segment_assignments"] = [list(s) for s in self.segment_assignments]
416
+ if self.segment_durations is not None:
417
+ result["segment_durations"] = [list(s) for s in self.segment_durations]
418
+ if self.segment_centers is not None:
419
+ result["segment_centers"] = [list(s) for s in self.segment_centers]
420
+ if self.rescale_exclude_columns is not None:
421
+ result["rescale_exclude_columns"] = list(self.rescale_exclude_columns)
422
+ if self.segment_representation is not None:
423
+ result["segment_representation"] = self.segment_representation
424
+ if self.temporal_resolution is not None:
425
+ result["temporal_resolution"] = self.temporal_resolution
426
+ if self.extreme_cluster_indices is not None:
427
+ result["extreme_cluster_indices"] = list(self.extreme_cluster_indices)
428
+ # Reference fields (optional, for documentation)
429
+ if self.cluster_config is not None:
430
+ result["cluster_config"] = self.cluster_config.to_dict()
431
+ if self.segment_config is not None:
432
+ result["segment_config"] = self.segment_config.to_dict()
433
+ if self.extremes_config is not None:
434
+ result["extremes_config"] = self.extremes_config.to_dict()
435
+ return result
436
+
437
+ @classmethod
438
+ def from_dict(cls, data: dict) -> ClusteringResult:
439
+ """Create from dictionary (e.g., loaded from JSON)."""
440
+ # Transfer fields
441
+ kwargs: dict[str, Any] = {
442
+ "period_duration": data["period_duration"],
443
+ "cluster_assignments": tuple(data["cluster_assignments"]),
444
+ "n_timesteps_per_period": data["n_timesteps_per_period"],
445
+ "preserve_column_means": data.get("preserve_column_means", True),
446
+ "representation": data.get("representation", "medoid"),
447
+ }
448
+ if "cluster_centers" in data:
449
+ kwargs["cluster_centers"] = tuple(data["cluster_centers"])
450
+ if "segment_assignments" in data:
451
+ kwargs["segment_assignments"] = tuple(
452
+ tuple(s) for s in data["segment_assignments"]
453
+ )
454
+ if "segment_durations" in data:
455
+ kwargs["segment_durations"] = tuple(
456
+ tuple(s) for s in data["segment_durations"]
457
+ )
458
+ if "segment_centers" in data:
459
+ kwargs["segment_centers"] = tuple(tuple(s) for s in data["segment_centers"])
460
+ if "rescale_exclude_columns" in data:
461
+ kwargs["rescale_exclude_columns"] = tuple(data["rescale_exclude_columns"])
462
+ if "segment_representation" in data:
463
+ kwargs["segment_representation"] = data["segment_representation"]
464
+ if "temporal_resolution" in data:
465
+ kwargs["temporal_resolution"] = data["temporal_resolution"]
466
+ if "extreme_cluster_indices" in data:
467
+ kwargs["extreme_cluster_indices"] = tuple(data["extreme_cluster_indices"])
468
+ # Reference fields
469
+ if "cluster_config" in data:
470
+ kwargs["cluster_config"] = ClusterConfig.from_dict(data["cluster_config"])
471
+ if "segment_config" in data:
472
+ kwargs["segment_config"] = SegmentConfig.from_dict(data["segment_config"])
473
+ if "extremes_config" in data:
474
+ kwargs["extremes_config"] = ExtremeConfig.from_dict(data["extremes_config"])
475
+ return cls(**kwargs)
476
+
477
+ def to_json(self, path: str) -> None:
478
+ """Save clustering result to a JSON file.
479
+
480
+ Parameters
481
+ ----------
482
+ path : str
483
+ File path to save to.
484
+
485
+ Notes
486
+ -----
487
+ If the clustering used the 'replace' extreme method, a warning will be
488
+ issued because the saved clustering cannot be perfectly reproduced when
489
+ loaded and applied later. See :meth:`apply` for details.
490
+
491
+ Examples
492
+ --------
493
+ >>> result.clustering.to_json("clustering.json")
494
+ """
495
+ import json
496
+
497
+ # Warn if using replace extreme method (transfer is not exact)
498
+ if (
499
+ self.extremes_config is not None
500
+ and self.extremes_config.method == "replace"
501
+ ):
502
+ warnings.warn(
503
+ "Saving a clustering that used the 'replace' extreme method. "
504
+ "The 'replace' method creates a hybrid cluster representation "
505
+ "(some columns from the medoid, some from the extreme period) that "
506
+ "cannot be perfectly reproduced when loaded and applied later. "
507
+ "For exact transfer, use 'append' or 'new_cluster' extreme methods.",
508
+ UserWarning,
509
+ stacklevel=2,
510
+ )
511
+
512
+ with open(path, "w") as f:
513
+ json.dump(self.to_dict(), f, indent=2)
514
+
515
+ @classmethod
516
+ def from_json(cls, path: str) -> ClusteringResult:
517
+ """Load clustering result from a JSON file.
518
+
519
+ Parameters
520
+ ----------
521
+ path : str
522
+ File path to load from.
523
+
524
+ Returns
525
+ -------
526
+ ClusteringResult
527
+ Loaded clustering result.
528
+
529
+ Examples
530
+ --------
531
+ >>> clustering = ClusteringResult.from_json("clustering.json")
532
+ >>> result = clustering.apply(new_data)
533
+ """
534
+ import json
535
+
536
+ with open(path) as f:
537
+ return cls.from_dict(json.load(f))
538
+
539
+ def apply(
540
+ self,
541
+ data: pd.DataFrame,
542
+ *,
543
+ temporal_resolution: float | None = None,
544
+ round_decimals: int | None = None,
545
+ numerical_tolerance: float = 1e-13,
546
+ ) -> AggregationResult:
547
+ """Apply this clustering to new data.
548
+
549
+ Uses the stored cluster assignments and transfer fields to aggregate
550
+ a different dataset with the same clustering structure deterministically.
551
+
552
+ Parameters
553
+ ----------
554
+ data : pd.DataFrame
555
+ Input time series data with a datetime index.
556
+ Must have the same number of periods as the original data.
557
+
558
+ temporal_resolution : float, optional
559
+ Time resolution of input data in hours.
560
+ If not provided, uses stored temporal_resolution or infers from data index.
561
+
562
+ round_decimals : int, optional
563
+ Round output values to this many decimal places.
564
+
565
+ numerical_tolerance : float, default 1e-13
566
+ Tolerance for numerical precision issues.
567
+
568
+ Returns
569
+ -------
570
+ AggregationResult
571
+ Aggregation result using this clustering.
572
+
573
+ Notes
574
+ -----
575
+ **Extreme period transfer limitations:**
576
+
577
+ The 'replace' extreme method creates a hybrid cluster representation where
578
+ some columns use the medoid values and others use the extreme period values.
579
+ This hybrid representation cannot be perfectly reproduced during transfer.
580
+ When applying a clustering that used 'replace', a warning will be issued
581
+ and the transferred result will use the medoid representation for all columns.
582
+
583
+ For exact transfer with extreme periods, use 'append' or 'new_cluster'
584
+ extreme methods instead.
585
+
586
+ Examples
587
+ --------
588
+ >>> # Cluster on wind data, apply to full dataset
589
+ >>> result_wind = tsam.aggregate(df_wind, n_clusters=8)
590
+ >>> result_all = result_wind.clustering.apply(df_all)
591
+
592
+ >>> # Load saved clustering and apply
593
+ >>> clustering = ClusteringResult.from_json("clustering.json")
594
+ >>> result = clustering.apply(df)
595
+ """
596
+ # Import here to avoid circular imports
597
+ from tsam.api import _build_old_params
598
+ from tsam.exceptions import LegacyAPIWarning
599
+ from tsam.result import AccuracyMetrics, AggregationResult
600
+ from tsam.timeseriesaggregation import TimeSeriesAggregation
601
+
602
+ # Warn if using replace extreme method (transfer is not exact)
603
+ if (
604
+ self.extremes_config is not None
605
+ and self.extremes_config.method == "replace"
606
+ ):
607
+ warnings.warn(
608
+ "The 'replace' extreme method creates a hybrid cluster representation "
609
+ "(some columns from the medoid, some from the extreme period) that cannot "
610
+ "be perfectly reproduced during transfer. The transferred result will use "
611
+ "the medoid representation for all columns instead of the hybrid values. "
612
+ "For exact transfer, use 'append' or 'new_cluster' extreme methods.",
613
+ UserWarning,
614
+ stacklevel=2,
615
+ )
616
+
617
+ # Use stored temporal_resolution if not provided
618
+ effective_temporal_resolution = (
619
+ temporal_resolution
620
+ if temporal_resolution is not None
621
+ else self.temporal_resolution
622
+ )
623
+
624
+ # Validate n_timesteps_per_period matches data
625
+ # Infer timestep duration from data if not provided
626
+ if effective_temporal_resolution is None:
627
+ if isinstance(data.index, pd.DatetimeIndex) and len(data.index) > 1:
628
+ inferred = (data.index[1] - data.index[0]).total_seconds() / 3600
629
+ else:
630
+ inferred = 1.0 # Default to hourly
631
+ else:
632
+ inferred = effective_temporal_resolution
633
+
634
+ inferred_timesteps = int(self.period_duration / inferred)
635
+ if inferred_timesteps != self.n_timesteps_per_period:
636
+ raise ValueError(
637
+ f"Data has {inferred_timesteps} timesteps per period "
638
+ f"(period_duration={self.period_duration}h, timestep={inferred}h), "
639
+ f"but clustering expects {self.n_timesteps_per_period} timesteps per period"
640
+ )
641
+
642
+ # Validate number of periods matches
643
+ n_periods_in_data = len(data) // self.n_timesteps_per_period
644
+ if n_periods_in_data != self.n_original_periods:
645
+ raise ValueError(
646
+ f"Data has {n_periods_in_data} periods, "
647
+ f"but clustering expects {self.n_original_periods} periods"
648
+ )
649
+
650
+ # Build minimal ClusterConfig with just the representation.
651
+ # We intentionally ignore stored cluster_config.weights since:
652
+ # 1. Weights were only used to compute the original assignments
653
+ # 2. Assignments are now fixed, so weights are irrelevant
654
+ # 3. New data may have different columns than the original
655
+ cluster = ClusterConfig(representation=self.representation)
656
+
657
+ # Use stored segment config if available, otherwise build from transfer fields
658
+ segments: SegmentConfig | None = None
659
+ n_segments: int | None = None
660
+ if self.segment_assignments is not None and self.segment_durations is not None:
661
+ n_segments = len(self.segment_durations[0])
662
+ segments = self.segment_config or SegmentConfig(
663
+ n_segments=n_segments,
664
+ representation=self.segment_representation or "mean",
665
+ )
666
+
667
+ # Build old API parameters, passing predefined values directly
668
+ # Note: Don't pass extremes config - extreme clusters are handled via
669
+ # extreme_cluster_indices and representations are computed from
670
+ # the periods assigned to those clusters in cluster_assignments
671
+ old_params = _build_old_params(
672
+ data=data,
673
+ n_clusters=self.n_clusters,
674
+ period_duration=self.period_duration,
675
+ temporal_resolution=effective_temporal_resolution,
676
+ cluster=cluster,
677
+ segments=segments,
678
+ extremes=None,
679
+ preserve_column_means=self.preserve_column_means,
680
+ rescale_exclude_columns=list(self.rescale_exclude_columns)
681
+ if self.rescale_exclude_columns
682
+ else None,
683
+ round_decimals=round_decimals,
684
+ numerical_tolerance=numerical_tolerance,
685
+ # Predefined values from this ClusteringResult
686
+ predef_cluster_assignments=self.cluster_assignments,
687
+ predef_cluster_centers=self.cluster_centers,
688
+ predef_extreme_cluster_indices=self.extreme_cluster_indices,
689
+ predef_segment_assignments=self.segment_assignments,
690
+ predef_segment_durations=self.segment_durations,
691
+ predef_segment_centers=self.segment_centers,
692
+ )
693
+
694
+ # Run aggregation using old implementation (suppress deprecation warning)
695
+ with warnings.catch_warnings():
696
+ warnings.simplefilter("ignore", LegacyAPIWarning)
697
+ agg = TimeSeriesAggregation(**old_params)
698
+ cluster_representatives = agg.createTypicalPeriods()
699
+
700
+ # Rename index levels for consistency with new API terminology
701
+ cluster_representatives = cluster_representatives.rename_axis(
702
+ index={"PeriodNum": "cluster", "TimeStep": "timestep"}
703
+ )
704
+
705
+ # Build accuracy metrics
706
+ accuracy_df = agg.accuracyIndicators()
707
+
708
+ # Build rescale deviations DataFrame
709
+ rescale_deviations_dict = getattr(agg, "_rescaleDeviations", {})
710
+ if rescale_deviations_dict:
711
+ rescale_deviations = pd.DataFrame.from_dict(
712
+ rescale_deviations_dict, orient="index"
713
+ )
714
+ rescale_deviations.index.name = "column"
715
+ else:
716
+ rescale_deviations = pd.DataFrame(
717
+ columns=["deviation_pct", "converged", "iterations"]
718
+ )
719
+
720
+ accuracy = AccuracyMetrics(
721
+ rmse=accuracy_df["RMSE"],
722
+ mae=accuracy_df["MAE"],
723
+ rmse_duration=accuracy_df["RMSE_duration"],
724
+ rescale_deviations=rescale_deviations,
725
+ )
726
+
727
+ # Build ClusteringResult - preserve stored values
728
+ from tsam.api import _build_clustering_result
729
+
730
+ clustering_result = _build_clustering_result(
731
+ agg=agg,
732
+ n_segments=n_segments,
733
+ cluster_config=cluster,
734
+ segment_config=segments,
735
+ extremes_config=self.extremes_config,
736
+ preserve_column_means=self.preserve_column_means,
737
+ rescale_exclude_columns=list(self.rescale_exclude_columns)
738
+ if self.rescale_exclude_columns
739
+ else None,
740
+ temporal_resolution=effective_temporal_resolution,
741
+ )
742
+
743
+ # Build result object
744
+ return AggregationResult(
745
+ cluster_representatives=cluster_representatives,
746
+ cluster_weights=dict(agg.clusterPeriodNoOccur),
747
+ n_timesteps_per_period=agg.timeStepsPerPeriod,
748
+ segment_durations=self.segment_durations,
749
+ accuracy=accuracy,
750
+ clustering_duration=getattr(agg, "clusteringDuration", 0.0),
751
+ clustering=clustering_result,
752
+ is_transferred=True,
753
+ _aggregation=agg,
754
+ )
755
+
756
+
757
+ @dataclass(frozen=True)
758
+ class ExtremeConfig:
759
+ """Configuration for preserving extreme periods.
760
+
761
+ Extreme periods contain critical peak values that must be preserved
762
+ in the aggregated representation (e.g., peak demand for capacity sizing).
763
+
764
+ Parameters
765
+ ----------
766
+ method : str, default "append"
767
+ How to handle extreme periods:
768
+ - "append": Add extreme periods as additional cluster centers
769
+ - "replace": Replace the nearest cluster center with the extreme
770
+ - "new_cluster": Add as new cluster and reassign affected periods
771
+
772
+ max_value : list[str], optional
773
+ Column names where the maximum value should be preserved.
774
+ The entire period containing that single extreme value becomes an extreme period.
775
+ Example: ["electricity_demand"] to preserve peak demand hour.
776
+
777
+ min_value : list[str], optional
778
+ Column names where the minimum value should be preserved.
779
+ Example: ["temperature"] to preserve coldest hour.
780
+
781
+ max_period : list[str], optional
782
+ Column names where the period with maximum total should be preserved.
783
+ Example: ["solar_generation"] to preserve highest solar day.
784
+
785
+ min_period : list[str], optional
786
+ Column names where the period with minimum total should be preserved.
787
+ Example: ["wind_generation"] to preserve lowest wind day.
788
+ """
789
+
790
+ method: ExtremeMethod = "append"
791
+ max_value: list[str] = field(default_factory=list)
792
+ min_value: list[str] = field(default_factory=list)
793
+ max_period: list[str] = field(default_factory=list)
794
+ min_period: list[str] = field(default_factory=list)
795
+
796
+ def has_extremes(self) -> bool:
797
+ """Check if any extreme periods are configured."""
798
+ return bool(
799
+ self.max_value or self.min_value or self.max_period or self.min_period
800
+ )
801
+
802
+ def to_dict(self) -> dict[str, Any]:
803
+ """Convert to dictionary for JSON serialization."""
804
+ result: dict[str, Any] = {}
805
+ if self.method != "append":
806
+ result["method"] = self.method
807
+ if self.max_value:
808
+ result["max_value"] = self.max_value
809
+ if self.min_value:
810
+ result["min_value"] = self.min_value
811
+ if self.max_period:
812
+ result["max_period"] = self.max_period
813
+ if self.min_period:
814
+ result["min_period"] = self.min_period
815
+ return result
816
+
817
+ @classmethod
818
+ def from_dict(cls, data: dict) -> ExtremeConfig:
819
+ """Create from dictionary (e.g., loaded from JSON)."""
820
+ return cls(
821
+ method=data.get("method", "append"),
822
+ max_value=data.get("max_value", []),
823
+ min_value=data.get("min_value", []),
824
+ max_period=data.get("max_period", []),
825
+ min_period=data.get("min_period", []),
826
+ )
827
+
828
+
829
+ # Mapping from new API names to old API names
830
+ METHOD_MAPPING: dict[ClusterMethod, str] = {
831
+ "averaging": "averaging",
832
+ "kmeans": "k_means",
833
+ "kmedoids": "k_medoids",
834
+ "kmaxoids": "k_maxoids",
835
+ "hierarchical": "hierarchical",
836
+ "contiguous": "adjacent_periods",
837
+ }
838
+
839
+ REPRESENTATION_MAPPING: dict[RepresentationMethod, str] = {
840
+ "mean": "meanRepresentation",
841
+ "medoid": "medoidRepresentation",
842
+ "maxoid": "maxoidRepresentation",
843
+ "distribution": "distributionRepresentation",
844
+ "distribution_minmax": "distributionAndMinMaxRepresentation",
845
+ "minmax_mean": "minmaxmeanRepresentation",
846
+ }
847
+
848
+ EXTREME_METHOD_MAPPING: dict[ExtremeMethod, str] = {
849
+ "append": "append",
850
+ "replace": "replace_cluster_center",
851
+ "new_cluster": "new_cluster_center",
852
+ }