tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tsam/__init__.py CHANGED
@@ -0,0 +1,79 @@
1
+ """tsam - Time Series Aggregation Module.
2
+
3
+ A Python package for aggregating time series data using clustering algorithms.
4
+ Designed for reducing computational load in energy system optimization models.
5
+
6
+ Quick Start
7
+ -----------
8
+ >>> import pandas as pd
9
+ >>> import tsam
10
+ >>>
11
+ >>> # Load your time series data
12
+ >>> df = pd.read_csv("data.csv", index_col=0, parse_dates=True)
13
+ >>>
14
+ >>> # Aggregate to 8 typical days
15
+ >>> result = tsam.aggregate(df, n_clusters=8)
16
+ >>>
17
+ >>> # Access results
18
+ >>> cluster_representatives = result.cluster_representatives
19
+ >>> print(f"RMSE: {result.accuracy.rmse.mean():.4f}")
20
+
21
+ For more control, use configuration objects:
22
+
23
+ >>> from tsam import aggregate, ClusterConfig, SegmentConfig
24
+ >>>
25
+ >>> result = aggregate(
26
+ ... df,
27
+ ... n_clusters=8,
28
+ ... cluster=ClusterConfig(method="hierarchical", representation="distribution"),
29
+ ... segments=SegmentConfig(n_segments=12),
30
+ ... )
31
+
32
+ Legacy API
33
+ ----------
34
+ The original class-based API is still available:
35
+
36
+ >>> from tsam.timeseriesaggregation import TimeSeriesAggregation
37
+ >>> agg = TimeSeriesAggregation(df, noTypicalPeriods=8)
38
+ >>> typical = agg.createTypicalPeriods()
39
+ """
40
+
41
+ from tsam.api import aggregate, unstack_to_periods
42
+
43
+ # Optional modules loaded on-demand to avoid importing heavy dependencies (e.g., plotly)
44
+ _LAZY_MODULES = ("plot", "tuning")
45
+
46
+
47
+ def __getattr__(name: str):
48
+ """Lazy import handler for optional modules."""
49
+ import importlib
50
+
51
+ if name in _LAZY_MODULES:
52
+ return importlib.import_module(f".{name}", __name__)
53
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
54
+
55
+
56
+ from tsam.config import ClusterConfig, ClusteringResult, ExtremeConfig, SegmentConfig
57
+ from tsam.exceptions import LegacyAPIWarning
58
+ from tsam.result import AccuracyMetrics, AggregationResult
59
+
60
+ # Legacy imports for backward compatibility
61
+ from tsam.timeseriesaggregation import TimeSeriesAggregation, unstackToPeriods
62
+
63
+ __version__ = "3.0.0"
64
+
65
+ __all__ = [
66
+ "AccuracyMetrics",
67
+ "AggregationResult",
68
+ "ClusterConfig",
69
+ "ClusteringResult",
70
+ "ExtremeConfig",
71
+ "LegacyAPIWarning",
72
+ "SegmentConfig",
73
+ "TimeSeriesAggregation",
74
+ "aggregate",
75
+ "plot",
76
+ "tuning",
77
+ "unstackToPeriods", # Legacy alias
78
+ "unstack_to_periods",
79
+ ]
tsam/api.py ADDED
@@ -0,0 +1,602 @@
1
+ """New simplified API for tsam aggregation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import warnings
6
+ from typing import cast
7
+
8
+ import pandas as pd
9
+
10
+ from tsam.config import (
11
+ EXTREME_METHOD_MAPPING,
12
+ METHOD_MAPPING,
13
+ REPRESENTATION_MAPPING,
14
+ ClusterConfig,
15
+ ClusteringResult,
16
+ ExtremeConfig,
17
+ SegmentConfig,
18
+ )
19
+ from tsam.exceptions import LegacyAPIWarning
20
+ from tsam.result import AccuracyMetrics, AggregationResult
21
+ from tsam.timeseriesaggregation import TimeSeriesAggregation, unstackToPeriods
22
+
23
+
24
+ def _parse_duration_hours(value: int | float | str, param_name: str) -> float:
25
+ """Parse a duration value to hours.
26
+
27
+ Accepts:
28
+ - int/float: interpreted as hours (e.g., 24 → 24.0 hours)
29
+ - str: pandas Timedelta string (e.g., '24h', '1d', '15min')
30
+
31
+ Returns duration in hours as float.
32
+ """
33
+ if isinstance(value, (int, float)):
34
+ return float(value)
35
+ if isinstance(value, str):
36
+ try:
37
+ td = pd.Timedelta(value)
38
+ return td.total_seconds() / 3600
39
+ except ValueError as e:
40
+ raise ValueError(
41
+ f"{param_name}: invalid duration string '{value}': {e}"
42
+ ) from e
43
+ raise TypeError(
44
+ f"{param_name} must be int, float, or string, got {type(value).__name__}"
45
+ )
46
+
47
+
48
+ def aggregate(
49
+ data: pd.DataFrame,
50
+ n_clusters: int,
51
+ *,
52
+ period_duration: int | float | str = 24,
53
+ temporal_resolution: float | str | None = None,
54
+ cluster: ClusterConfig | None = None,
55
+ segments: SegmentConfig | None = None,
56
+ extremes: ExtremeConfig | None = None,
57
+ preserve_column_means: bool = True,
58
+ rescale_exclude_columns: list[str] | None = None,
59
+ round_decimals: int | None = None,
60
+ numerical_tolerance: float = 1e-13,
61
+ ) -> AggregationResult:
62
+ """Aggregate time series data into typical periods.
63
+
64
+ This function reduces a time series dataset to a smaller set of
65
+ representative "typical periods" using clustering algorithms.
66
+
67
+ Parameters
68
+ ----------
69
+ data : pd.DataFrame
70
+ Input time series data with a datetime index.
71
+ Each column represents a different variable (e.g., solar, wind, demand).
72
+ The index should be a DatetimeIndex with regular intervals.
73
+
74
+ n_clusters : int
75
+ Number of clusters (typical periods) to create.
76
+ Higher values = more accuracy but less data reduction.
77
+ Typical range: 4-20 for energy system models.
78
+
79
+ period_duration : int, float, or str, default 24
80
+ Length of each period. Accepts:
81
+ - int/float: hours (e.g., 24 for daily, 168 for weekly)
82
+ - str: pandas Timedelta string (e.g., '24h', '1d', '1w')
83
+
84
+ temporal_resolution : float or str, optional
85
+ Time resolution of input data. Accepts:
86
+ - float: hours (e.g., 1.0 for hourly, 0.25 for 15-minute)
87
+ - str: pandas Timedelta string (e.g., '1h', '15min', '30min')
88
+ If not provided, inferred from the datetime index.
89
+
90
+ cluster : ClusterConfig, optional
91
+ Clustering configuration. If not provided, uses defaults:
92
+ - method: "hierarchical"
93
+ - representation: "medoid"
94
+
95
+ segments : SegmentConfig, optional
96
+ Segmentation configuration for reducing temporal resolution
97
+ within periods. If not provided, no segmentation is applied.
98
+
99
+ extremes : ExtremeConfig, optional
100
+ Configuration for preserving extreme periods.
101
+ If not provided, no extreme period handling is applied.
102
+
103
+ preserve_column_means : bool, default True
104
+ Rescale typical periods so each column's weighted mean matches
105
+ the original data's mean. Ensures total energy/load is preserved
106
+ when weights represent occurrence counts.
107
+
108
+ rescale_exclude_columns : list[str], optional
109
+ Column names to exclude from rescaling when preserve_column_means=True.
110
+ Useful for binary/indicator columns (0/1 values) that should not be
111
+ rescaled. If None (default), all columns are rescaled.
112
+
113
+ round_decimals : int, optional
114
+ Round output values to this many decimal places.
115
+ If not provided, no rounding is applied.
116
+
117
+ numerical_tolerance : float, default 1e-13
118
+ Tolerance for numerical precision issues.
119
+ Controls when warnings are raised for aggregated values exceeding
120
+ the original time series bounds. Increase this value to silence
121
+ warnings caused by floating-point precision errors.
122
+
123
+ Returns
124
+ -------
125
+ AggregationResult
126
+ Object containing:
127
+ - cluster_representatives: DataFrame with aggregated periods
128
+ - cluster_assignments: Which cluster each original period belongs to
129
+ - cluster_weights: Occurrence count per cluster
130
+ - accuracy: RMSE, MAE metrics
131
+ - Methods: to_dict()
132
+
133
+ Raises
134
+ ------
135
+ ValueError
136
+ If input data is invalid or parameters are inconsistent.
137
+ TypeError
138
+ If parameter types are incorrect.
139
+
140
+ Examples
141
+ --------
142
+ Basic usage with defaults:
143
+
144
+ >>> import tsam
145
+ >>> result = tsam.aggregate(df, n_clusters=8)
146
+ >>> typical = result.cluster_representatives
147
+
148
+ With custom clustering:
149
+
150
+ >>> from tsam import aggregate, ClusterConfig
151
+ >>> result = aggregate(
152
+ ... df,
153
+ ... n_clusters=8,
154
+ ... cluster=ClusterConfig(method="kmeans", representation="mean"),
155
+ ... )
156
+
157
+ With segmentation (reduce to 12 timesteps per period):
158
+
159
+ >>> from tsam import aggregate, SegmentConfig
160
+ >>> result = aggregate(
161
+ ... df,
162
+ ... n_clusters=8,
163
+ ... segments=SegmentConfig(n_segments=12),
164
+ ... )
165
+
166
+ Preserving peak demand periods:
167
+
168
+ >>> from tsam import aggregate, ExtremeConfig
169
+ >>> result = aggregate(
170
+ ... df,
171
+ ... n_clusters=8,
172
+ ... extremes=ExtremeConfig(max_value=["demand"]),
173
+ ... )
174
+
175
+ Transferring assignments to new data:
176
+
177
+ >>> result1 = aggregate(df_wind, n_clusters=8)
178
+ >>> result2 = result1.clustering.apply(df_all)
179
+
180
+ See Also
181
+ --------
182
+ ClusterConfig : Clustering algorithm configuration
183
+ SegmentConfig : Temporal segmentation configuration
184
+ ExtremeConfig : Extreme period preservation configuration
185
+ AggregationResult : Result object with all outputs
186
+ """
187
+ # Validate input
188
+ if not isinstance(data, pd.DataFrame):
189
+ raise TypeError(f"data must be a pandas DataFrame, got {type(data).__name__}")
190
+
191
+ if not isinstance(n_clusters, int) or n_clusters < 1:
192
+ raise ValueError(f"n_clusters must be a positive integer, got {n_clusters}")
193
+
194
+ # Parse duration parameters to hours
195
+ period_duration = _parse_duration_hours(period_duration, "period_duration")
196
+ if period_duration <= 0:
197
+ raise ValueError(f"period_duration must be positive, got {period_duration}")
198
+
199
+ temporal_resolution = (
200
+ _parse_duration_hours(temporal_resolution, "temporal_resolution")
201
+ if temporal_resolution is not None
202
+ else None
203
+ )
204
+ if temporal_resolution is not None and temporal_resolution <= 0:
205
+ raise ValueError(
206
+ f"temporal_resolution must be positive, got {temporal_resolution}"
207
+ )
208
+
209
+ # Apply defaults
210
+ if cluster is None:
211
+ cluster = ClusterConfig()
212
+
213
+ # Validate segments against data
214
+ if segments is not None:
215
+ # Calculate timesteps per period
216
+ if temporal_resolution is not None:
217
+ timesteps_per_period = int(period_duration / temporal_resolution)
218
+ else:
219
+ # Infer resolution from data index
220
+ if isinstance(data.index, pd.DatetimeIndex) and len(data.index) > 1:
221
+ inferred_resolution = (
222
+ data.index[1] - data.index[0]
223
+ ).total_seconds() / 3600
224
+ timesteps_per_period = int(period_duration / inferred_resolution)
225
+ else:
226
+ # Fall back to assuming hourly resolution
227
+ timesteps_per_period = int(period_duration)
228
+
229
+ if segments.n_segments > timesteps_per_period:
230
+ raise ValueError(
231
+ f"n_segments ({segments.n_segments}) cannot exceed "
232
+ f"timesteps per period ({timesteps_per_period})"
233
+ )
234
+
235
+ # Validate extreme columns exist in data
236
+ if extremes is not None:
237
+ all_extreme_cols = (
238
+ extremes.max_value
239
+ + extremes.min_value
240
+ + extremes.max_period
241
+ + extremes.min_period
242
+ )
243
+ missing = set(all_extreme_cols) - set(data.columns)
244
+ if missing:
245
+ raise ValueError(f"Extreme period columns not found in data: {missing}")
246
+
247
+ # Validate weight columns exist
248
+ if cluster.weights is not None:
249
+ missing = set(cluster.weights.keys()) - set(data.columns)
250
+ if missing:
251
+ raise ValueError(f"Weight columns not found in data: {missing}")
252
+
253
+ # Build old API parameters
254
+ old_params = _build_old_params(
255
+ data=data,
256
+ n_clusters=n_clusters,
257
+ period_duration=period_duration,
258
+ temporal_resolution=temporal_resolution,
259
+ cluster=cluster,
260
+ segments=segments,
261
+ extremes=extremes,
262
+ preserve_column_means=preserve_column_means,
263
+ rescale_exclude_columns=rescale_exclude_columns,
264
+ round_decimals=round_decimals,
265
+ numerical_tolerance=numerical_tolerance,
266
+ )
267
+
268
+ # Run aggregation using old implementation (suppress deprecation warning for internal use)
269
+ with warnings.catch_warnings():
270
+ warnings.simplefilter("ignore", LegacyAPIWarning)
271
+ agg = TimeSeriesAggregation(**old_params)
272
+ cluster_representatives = agg.createTypicalPeriods()
273
+
274
+ # Rename index levels for consistency with new API terminology
275
+ cluster_representatives = cluster_representatives.rename_axis(
276
+ index={"PeriodNum": "cluster", "TimeStep": "timestep"}
277
+ )
278
+
279
+ # Build accuracy metrics
280
+ accuracy_df = agg.accuracyIndicators()
281
+
282
+ # Build rescale deviations DataFrame
283
+ rescale_deviations_dict = getattr(agg, "_rescaleDeviations", {})
284
+ if rescale_deviations_dict:
285
+ rescale_deviations = pd.DataFrame.from_dict(
286
+ rescale_deviations_dict, orient="index"
287
+ )
288
+ rescale_deviations.index.name = "column"
289
+ else:
290
+ rescale_deviations = pd.DataFrame(
291
+ columns=["deviation_pct", "converged", "iterations"]
292
+ )
293
+
294
+ accuracy = AccuracyMetrics(
295
+ rmse=accuracy_df["RMSE"],
296
+ mae=accuracy_df["MAE"],
297
+ rmse_duration=accuracy_df["RMSE_duration"],
298
+ rescale_deviations=rescale_deviations,
299
+ )
300
+
301
+ # Build ClusteringResult
302
+ clustering_result = _build_clustering_result(
303
+ agg=agg,
304
+ n_segments=segments.n_segments if segments else None,
305
+ cluster_config=cluster,
306
+ segment_config=segments,
307
+ extremes_config=extremes,
308
+ preserve_column_means=preserve_column_means,
309
+ rescale_exclude_columns=rescale_exclude_columns,
310
+ temporal_resolution=temporal_resolution,
311
+ )
312
+
313
+ # Compute segment_durations as tuple of tuples
314
+ segment_durations_tuple = None
315
+ if segments and hasattr(agg, "segmentedNormalizedTypicalPeriods"):
316
+ segmented_df = agg.segmentedNormalizedTypicalPeriods
317
+ segment_durations_tuple = tuple(
318
+ tuple(
319
+ int(seg_dur)
320
+ for _seg_step, seg_dur, _orig_start in segmented_df.loc[
321
+ period_idx
322
+ ].index
323
+ )
324
+ for period_idx in segmented_df.index.get_level_values(0).unique()
325
+ )
326
+
327
+ # Build result object
328
+ return AggregationResult(
329
+ cluster_representatives=cluster_representatives,
330
+ cluster_weights=dict(agg.clusterPeriodNoOccur),
331
+ n_timesteps_per_period=agg.timeStepsPerPeriod,
332
+ segment_durations=segment_durations_tuple,
333
+ accuracy=accuracy,
334
+ clustering_duration=getattr(agg, "clusteringDuration", 0.0),
335
+ clustering=clustering_result,
336
+ is_transferred=False,
337
+ _aggregation=agg,
338
+ )
339
+
340
+
341
+ def _build_clustering_result(
342
+ agg: TimeSeriesAggregation,
343
+ n_segments: int | None,
344
+ cluster_config: ClusterConfig,
345
+ segment_config: SegmentConfig | None,
346
+ extremes_config: ExtremeConfig | None,
347
+ preserve_column_means: bool,
348
+ rescale_exclude_columns: list[str] | None,
349
+ temporal_resolution: float | None,
350
+ ) -> ClusteringResult:
351
+ """Build ClusteringResult from a TimeSeriesAggregation object."""
352
+ # Get cluster centers (convert to Python ints for JSON serialization)
353
+ # Handle extreme periods based on method:
354
+ # - new_cluster/append: append extreme period indices (creates additional clusters)
355
+ # - replace: keep original cluster centers
356
+ # Note: replace creates a hybrid representation (some columns from medoid, some
357
+ # from extreme period) that cannot be perfectly reproduced during transfer
358
+ cluster_centers: tuple[int, ...] | None = None
359
+ if agg.clusterCenterIndices is not None:
360
+ center_indices = [int(x) for x in agg.clusterCenterIndices]
361
+
362
+ if (
363
+ hasattr(agg, "extremePeriods")
364
+ and agg.extremePeriods
365
+ and extremes_config is not None
366
+ and extremes_config.method in ("new_cluster", "append")
367
+ ):
368
+ # Add extreme period indices as new cluster centers
369
+ for period_type in agg.extremePeriods:
370
+ center_indices.append(int(agg.extremePeriods[period_type]["stepNo"]))
371
+
372
+ cluster_centers = tuple(center_indices)
373
+
374
+ # Compute segment data if segmentation was used
375
+ segment_assignments: tuple[tuple[int, ...], ...] | None = None
376
+ segment_durations: tuple[tuple[int, ...], ...] | None = None
377
+ segment_centers: tuple[tuple[int, ...], ...] | None = None
378
+
379
+ if n_segments is not None and hasattr(agg, "segmentedNormalizedTypicalPeriods"):
380
+ segmented_df = agg.segmentedNormalizedTypicalPeriods
381
+ segment_assignments_list = []
382
+ segment_durations_list = []
383
+
384
+ for period_idx in segmented_df.index.get_level_values(0).unique():
385
+ period_data = segmented_df.loc[period_idx]
386
+ # Index levels: Segment Step, Segment Duration, Original Start Step
387
+ assignments = []
388
+ durations = []
389
+ for seg_step, seg_dur, _orig_start in period_data.index:
390
+ assignments.extend([int(seg_step)] * int(seg_dur))
391
+ durations.append(int(seg_dur))
392
+ segment_assignments_list.append(tuple(assignments))
393
+ segment_durations_list.append(tuple(durations))
394
+
395
+ segment_assignments = tuple(segment_assignments_list)
396
+ segment_durations = tuple(segment_durations_list)
397
+
398
+ # Extract segment center indices (only available for medoid/maxoid representations)
399
+ if (
400
+ hasattr(agg, "segmentCenterIndices")
401
+ and agg.segmentCenterIndices is not None
402
+ ):
403
+ # Check if any period has center indices (None for mean representation)
404
+ if all(pc is not None for pc in agg.segmentCenterIndices):
405
+ segment_centers = tuple(
406
+ tuple(int(x) for x in period_centers)
407
+ for period_centers in agg.segmentCenterIndices
408
+ )
409
+
410
+ # Extract representation from configs
411
+ representation = cluster_config.get_representation()
412
+ segment_representation = segment_config.representation if segment_config else None
413
+
414
+ # Extract extreme cluster indices if extremes were used
415
+ extreme_cluster_indices: tuple[int, ...] | None = None
416
+ if hasattr(agg, "extremeClusterIdx") and agg.extremeClusterIdx:
417
+ extreme_cluster_indices = tuple(int(x) for x in agg.extremeClusterIdx)
418
+
419
+ return ClusteringResult(
420
+ period_duration=agg.hoursPerPeriod,
421
+ cluster_assignments=tuple(int(x) for x in agg.clusterOrder),
422
+ cluster_centers=cluster_centers,
423
+ segment_assignments=segment_assignments,
424
+ segment_durations=segment_durations,
425
+ segment_centers=segment_centers,
426
+ preserve_column_means=preserve_column_means,
427
+ rescale_exclude_columns=tuple(rescale_exclude_columns)
428
+ if rescale_exclude_columns
429
+ else None,
430
+ representation=representation,
431
+ segment_representation=segment_representation,
432
+ temporal_resolution=temporal_resolution,
433
+ n_timesteps_per_period=agg.timeStepsPerPeriod,
434
+ extreme_cluster_indices=extreme_cluster_indices,
435
+ cluster_config=cluster_config,
436
+ segment_config=segment_config,
437
+ extremes_config=extremes_config,
438
+ )
439
+
440
+
441
+ def _build_old_params(
442
+ data: pd.DataFrame,
443
+ n_clusters: int,
444
+ period_duration: float,
445
+ temporal_resolution: float | None,
446
+ cluster: ClusterConfig,
447
+ segments: SegmentConfig | None,
448
+ extremes: ExtremeConfig | None,
449
+ preserve_column_means: bool,
450
+ rescale_exclude_columns: list[str] | None,
451
+ round_decimals: int | None,
452
+ numerical_tolerance: float,
453
+ *,
454
+ # Predefined parameters (used internally by ClusteringResult.apply())
455
+ predef_cluster_assignments: tuple[int, ...] | None = None,
456
+ predef_cluster_centers: tuple[int, ...] | None = None,
457
+ predef_extreme_cluster_indices: tuple[int, ...] | None = None,
458
+ predef_segment_assignments: tuple[tuple[int, ...], ...] | None = None,
459
+ predef_segment_durations: tuple[tuple[int, ...], ...] | None = None,
460
+ predef_segment_centers: tuple[tuple[int, ...], ...] | None = None,
461
+ ) -> dict:
462
+ """Build parameters for the old TimeSeriesAggregation API."""
463
+ params: dict = {
464
+ "timeSeries": data,
465
+ "noTypicalPeriods": n_clusters,
466
+ "hoursPerPeriod": period_duration,
467
+ "rescaleClusterPeriods": preserve_column_means,
468
+ "rescaleExcludeColumns": rescale_exclude_columns,
469
+ "numericalTolerance": numerical_tolerance,
470
+ }
471
+
472
+ if temporal_resolution is not None:
473
+ params["resolution"] = temporal_resolution
474
+
475
+ if round_decimals is not None:
476
+ params["roundOutput"] = round_decimals
477
+
478
+ # Cluster config
479
+ method = METHOD_MAPPING.get(cluster.method)
480
+ if method is None:
481
+ raise ValueError(
482
+ f"Unknown cluster method: {cluster.method!r}. "
483
+ f"Valid options: {list(METHOD_MAPPING.keys())}"
484
+ )
485
+ params["clusterMethod"] = method
486
+
487
+ representation = cluster.get_representation()
488
+ rep_mapped = REPRESENTATION_MAPPING.get(representation)
489
+ if rep_mapped is None:
490
+ raise ValueError(
491
+ f"Unknown representation method: {representation!r}. "
492
+ f"Valid options: {list(REPRESENTATION_MAPPING.keys())}"
493
+ )
494
+ params["representationMethod"] = rep_mapped
495
+ params["sortValues"] = cluster.use_duration_curves
496
+ params["sameMean"] = cluster.normalize_column_means
497
+ params["evalSumPeriods"] = cluster.include_period_sums
498
+ params["solver"] = cluster.solver
499
+
500
+ if cluster.weights is not None:
501
+ params["weightDict"] = cluster.weights
502
+
503
+ if predef_cluster_assignments is not None:
504
+ params["predefClusterOrder"] = list(predef_cluster_assignments)
505
+
506
+ if predef_cluster_centers is not None:
507
+ params["predefClusterCenterIndices"] = list(predef_cluster_centers)
508
+
509
+ if predef_extreme_cluster_indices is not None:
510
+ params["predefExtremeClusterIdx"] = list(predef_extreme_cluster_indices)
511
+
512
+ # Segmentation config
513
+ if segments is not None:
514
+ params["segmentation"] = True
515
+ params["noSegments"] = segments.n_segments
516
+ params["segmentRepresentationMethod"] = REPRESENTATION_MAPPING.get(
517
+ segments.representation, "meanRepresentation"
518
+ )
519
+
520
+ # Predefined segment parameters (from ClusteringResult)
521
+ if predef_segment_assignments is not None:
522
+ params["predefSegmentOrder"] = [list(s) for s in predef_segment_assignments]
523
+ if predef_segment_durations is not None:
524
+ params["predefSegmentDurations"] = [
525
+ list(s) for s in predef_segment_durations
526
+ ]
527
+ if predef_segment_centers is not None:
528
+ params["predefSegmentCenters"] = [list(s) for s in predef_segment_centers]
529
+ else:
530
+ params["segmentation"] = False
531
+
532
+ # Extreme config
533
+ if extremes is not None and extremes.has_extremes():
534
+ params["extremePeriodMethod"] = EXTREME_METHOD_MAPPING[extremes.method]
535
+ params["addPeakMax"] = extremes.max_value
536
+ params["addPeakMin"] = extremes.min_value
537
+ params["addMeanMax"] = extremes.max_period
538
+ params["addMeanMin"] = extremes.min_period
539
+ else:
540
+ params["extremePeriodMethod"] = "None"
541
+
542
+ return params
543
+
544
+
545
+ def unstack_to_periods(
546
+ data: pd.DataFrame,
547
+ period_duration: int | float | str = 24,
548
+ ) -> pd.DataFrame:
549
+ """Reshape time series data into period structure for visualization.
550
+
551
+ Transforms a flat time series into a DataFrame with periods as rows and
552
+ timesteps as a MultiIndex level, suitable for creating heatmaps with plotly.
553
+
554
+ Parameters
555
+ ----------
556
+ data : pd.DataFrame
557
+ Time series data with datetime index.
558
+ period_duration : int, float, or str, default 24
559
+ Length of each period. Accepts:
560
+ - int/float: hours (e.g., 24 for daily, 168 for weekly)
561
+ - str: pandas Timedelta string (e.g., '24h', '1d', '1w')
562
+
563
+ Returns
564
+ -------
565
+ pd.DataFrame
566
+ Reshaped data with shape (n_periods, n_timesteps_per_period) for each column.
567
+ Suitable for ``px.imshow(result["column"].values.T)`` to create heatmaps.
568
+
569
+ Examples
570
+ --------
571
+ >>> import tsam
572
+ >>> import plotly.express as px
573
+ >>>
574
+ >>> # Reshape data for heatmap visualization
575
+ >>> unstacked = tsam.unstack_to_periods(df, period_duration=24)
576
+ >>>
577
+ >>> # Create heatmap with plotly
578
+ >>> px.imshow(
579
+ ... unstacked["Load"].values.T,
580
+ ... labels={"x": "Day", "y": "Hour", "color": "Load"},
581
+ ... title="Load Heatmap"
582
+ ... )
583
+ """
584
+ period_hours = _parse_duration_hours(period_duration, "period_duration")
585
+
586
+ # Infer timestep resolution from data index
587
+ timestep_hours = 1.0 # Default to hourly
588
+ if isinstance(data.index, pd.DatetimeIndex) and len(data.index) > 1:
589
+ timestep_hours = (data.index[1] - data.index[0]).total_seconds() / 3600
590
+
591
+ # Calculate timesteps per period
592
+ timesteps_per_period = round(period_hours / timestep_hours)
593
+ if timesteps_per_period < 1:
594
+ raise ValueError(
595
+ f"period_duration ({period_hours}h) is smaller than "
596
+ f"data timestep resolution ({timestep_hours}h)"
597
+ )
598
+
599
+ with warnings.catch_warnings():
600
+ warnings.simplefilter("ignore", LegacyAPIWarning)
601
+ unstacked, _ = unstackToPeriods(data.copy(), timesteps_per_period)
602
+ return cast("pd.DataFrame", unstacked)