tsam 2.3.9__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsam/__init__.py +79 -0
- tsam/api.py +602 -0
- tsam/config.py +852 -0
- tsam/exceptions.py +17 -0
- tsam/hyperparametertuning.py +289 -245
- tsam/periodAggregation.py +140 -141
- tsam/plot.py +513 -0
- tsam/py.typed +0 -0
- tsam/representations.py +177 -167
- tsam/result.py +397 -0
- tsam/timeseriesaggregation.py +1446 -1361
- tsam/tuning.py +1038 -0
- tsam/utils/durationRepresentation.py +229 -223
- tsam/utils/k_maxoids.py +138 -145
- tsam/utils/k_medoids_contiguity.py +139 -140
- tsam/utils/k_medoids_exact.py +232 -239
- tsam/utils/segmentation.py +232 -118
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/METADATA +124 -81
- tsam-3.0.0.dist-info/RECORD +23 -0
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/WHEEL +1 -1
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/licenses/LICENSE.txt +21 -21
- tsam-2.3.9.dist-info/RECORD +0 -16
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/top_level.txt +0 -0
tsam/__init__.py
CHANGED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
"""tsam - Time Series Aggregation Module.
|
|
2
|
+
|
|
3
|
+
A Python package for aggregating time series data using clustering algorithms.
|
|
4
|
+
Designed for reducing computational load in energy system optimization models.
|
|
5
|
+
|
|
6
|
+
Quick Start
|
|
7
|
+
-----------
|
|
8
|
+
>>> import pandas as pd
|
|
9
|
+
>>> import tsam
|
|
10
|
+
>>>
|
|
11
|
+
>>> # Load your time series data
|
|
12
|
+
>>> df = pd.read_csv("data.csv", index_col=0, parse_dates=True)
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Aggregate to 8 typical days
|
|
15
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Access results
|
|
18
|
+
>>> cluster_representatives = result.cluster_representatives
|
|
19
|
+
>>> print(f"RMSE: {result.accuracy.rmse.mean():.4f}")
|
|
20
|
+
|
|
21
|
+
For more control, use configuration objects:
|
|
22
|
+
|
|
23
|
+
>>> from tsam import aggregate, ClusterConfig, SegmentConfig
|
|
24
|
+
>>>
|
|
25
|
+
>>> result = aggregate(
|
|
26
|
+
... df,
|
|
27
|
+
... n_clusters=8,
|
|
28
|
+
... cluster=ClusterConfig(method="hierarchical", representation="distribution"),
|
|
29
|
+
... segments=SegmentConfig(n_segments=12),
|
|
30
|
+
... )
|
|
31
|
+
|
|
32
|
+
Legacy API
|
|
33
|
+
----------
|
|
34
|
+
The original class-based API is still available:
|
|
35
|
+
|
|
36
|
+
>>> from tsam.timeseriesaggregation import TimeSeriesAggregation
|
|
37
|
+
>>> agg = TimeSeriesAggregation(df, noTypicalPeriods=8)
|
|
38
|
+
>>> typical = agg.createTypicalPeriods()
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
from tsam.api import aggregate, unstack_to_periods
|
|
42
|
+
|
|
43
|
+
# Optional modules loaded on-demand to avoid importing heavy dependencies (e.g., plotly)
|
|
44
|
+
_LAZY_MODULES = ("plot", "tuning")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def __getattr__(name: str):
|
|
48
|
+
"""Lazy import handler for optional modules."""
|
|
49
|
+
import importlib
|
|
50
|
+
|
|
51
|
+
if name in _LAZY_MODULES:
|
|
52
|
+
return importlib.import_module(f".{name}", __name__)
|
|
53
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
from tsam.config import ClusterConfig, ClusteringResult, ExtremeConfig, SegmentConfig
|
|
57
|
+
from tsam.exceptions import LegacyAPIWarning
|
|
58
|
+
from tsam.result import AccuracyMetrics, AggregationResult
|
|
59
|
+
|
|
60
|
+
# Legacy imports for backward compatibility
|
|
61
|
+
from tsam.timeseriesaggregation import TimeSeriesAggregation, unstackToPeriods
|
|
62
|
+
|
|
63
|
+
__version__ = "3.0.0"
|
|
64
|
+
|
|
65
|
+
__all__ = [
|
|
66
|
+
"AccuracyMetrics",
|
|
67
|
+
"AggregationResult",
|
|
68
|
+
"ClusterConfig",
|
|
69
|
+
"ClusteringResult",
|
|
70
|
+
"ExtremeConfig",
|
|
71
|
+
"LegacyAPIWarning",
|
|
72
|
+
"SegmentConfig",
|
|
73
|
+
"TimeSeriesAggregation",
|
|
74
|
+
"aggregate",
|
|
75
|
+
"plot",
|
|
76
|
+
"tuning",
|
|
77
|
+
"unstackToPeriods", # Legacy alias
|
|
78
|
+
"unstack_to_periods",
|
|
79
|
+
]
|
tsam/api.py
ADDED
|
@@ -0,0 +1,602 @@
|
|
|
1
|
+
"""New simplified API for tsam aggregation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import warnings
|
|
6
|
+
from typing import cast
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
from tsam.config import (
|
|
11
|
+
EXTREME_METHOD_MAPPING,
|
|
12
|
+
METHOD_MAPPING,
|
|
13
|
+
REPRESENTATION_MAPPING,
|
|
14
|
+
ClusterConfig,
|
|
15
|
+
ClusteringResult,
|
|
16
|
+
ExtremeConfig,
|
|
17
|
+
SegmentConfig,
|
|
18
|
+
)
|
|
19
|
+
from tsam.exceptions import LegacyAPIWarning
|
|
20
|
+
from tsam.result import AccuracyMetrics, AggregationResult
|
|
21
|
+
from tsam.timeseriesaggregation import TimeSeriesAggregation, unstackToPeriods
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _parse_duration_hours(value: int | float | str, param_name: str) -> float:
|
|
25
|
+
"""Parse a duration value to hours.
|
|
26
|
+
|
|
27
|
+
Accepts:
|
|
28
|
+
- int/float: interpreted as hours (e.g., 24 → 24.0 hours)
|
|
29
|
+
- str: pandas Timedelta string (e.g., '24h', '1d', '15min')
|
|
30
|
+
|
|
31
|
+
Returns duration in hours as float.
|
|
32
|
+
"""
|
|
33
|
+
if isinstance(value, (int, float)):
|
|
34
|
+
return float(value)
|
|
35
|
+
if isinstance(value, str):
|
|
36
|
+
try:
|
|
37
|
+
td = pd.Timedelta(value)
|
|
38
|
+
return td.total_seconds() / 3600
|
|
39
|
+
except ValueError as e:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
f"{param_name}: invalid duration string '{value}': {e}"
|
|
42
|
+
) from e
|
|
43
|
+
raise TypeError(
|
|
44
|
+
f"{param_name} must be int, float, or string, got {type(value).__name__}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def aggregate(
|
|
49
|
+
data: pd.DataFrame,
|
|
50
|
+
n_clusters: int,
|
|
51
|
+
*,
|
|
52
|
+
period_duration: int | float | str = 24,
|
|
53
|
+
temporal_resolution: float | str | None = None,
|
|
54
|
+
cluster: ClusterConfig | None = None,
|
|
55
|
+
segments: SegmentConfig | None = None,
|
|
56
|
+
extremes: ExtremeConfig | None = None,
|
|
57
|
+
preserve_column_means: bool = True,
|
|
58
|
+
rescale_exclude_columns: list[str] | None = None,
|
|
59
|
+
round_decimals: int | None = None,
|
|
60
|
+
numerical_tolerance: float = 1e-13,
|
|
61
|
+
) -> AggregationResult:
|
|
62
|
+
"""Aggregate time series data into typical periods.
|
|
63
|
+
|
|
64
|
+
This function reduces a time series dataset to a smaller set of
|
|
65
|
+
representative "typical periods" using clustering algorithms.
|
|
66
|
+
|
|
67
|
+
Parameters
|
|
68
|
+
----------
|
|
69
|
+
data : pd.DataFrame
|
|
70
|
+
Input time series data with a datetime index.
|
|
71
|
+
Each column represents a different variable (e.g., solar, wind, demand).
|
|
72
|
+
The index should be a DatetimeIndex with regular intervals.
|
|
73
|
+
|
|
74
|
+
n_clusters : int
|
|
75
|
+
Number of clusters (typical periods) to create.
|
|
76
|
+
Higher values = more accuracy but less data reduction.
|
|
77
|
+
Typical range: 4-20 for energy system models.
|
|
78
|
+
|
|
79
|
+
period_duration : int, float, or str, default 24
|
|
80
|
+
Length of each period. Accepts:
|
|
81
|
+
- int/float: hours (e.g., 24 for daily, 168 for weekly)
|
|
82
|
+
- str: pandas Timedelta string (e.g., '24h', '1d', '1w')
|
|
83
|
+
|
|
84
|
+
temporal_resolution : float or str, optional
|
|
85
|
+
Time resolution of input data. Accepts:
|
|
86
|
+
- float: hours (e.g., 1.0 for hourly, 0.25 for 15-minute)
|
|
87
|
+
- str: pandas Timedelta string (e.g., '1h', '15min', '30min')
|
|
88
|
+
If not provided, inferred from the datetime index.
|
|
89
|
+
|
|
90
|
+
cluster : ClusterConfig, optional
|
|
91
|
+
Clustering configuration. If not provided, uses defaults:
|
|
92
|
+
- method: "hierarchical"
|
|
93
|
+
- representation: "medoid"
|
|
94
|
+
|
|
95
|
+
segments : SegmentConfig, optional
|
|
96
|
+
Segmentation configuration for reducing temporal resolution
|
|
97
|
+
within periods. If not provided, no segmentation is applied.
|
|
98
|
+
|
|
99
|
+
extremes : ExtremeConfig, optional
|
|
100
|
+
Configuration for preserving extreme periods.
|
|
101
|
+
If not provided, no extreme period handling is applied.
|
|
102
|
+
|
|
103
|
+
preserve_column_means : bool, default True
|
|
104
|
+
Rescale typical periods so each column's weighted mean matches
|
|
105
|
+
the original data's mean. Ensures total energy/load is preserved
|
|
106
|
+
when weights represent occurrence counts.
|
|
107
|
+
|
|
108
|
+
rescale_exclude_columns : list[str], optional
|
|
109
|
+
Column names to exclude from rescaling when preserve_column_means=True.
|
|
110
|
+
Useful for binary/indicator columns (0/1 values) that should not be
|
|
111
|
+
rescaled. If None (default), all columns are rescaled.
|
|
112
|
+
|
|
113
|
+
round_decimals : int, optional
|
|
114
|
+
Round output values to this many decimal places.
|
|
115
|
+
If not provided, no rounding is applied.
|
|
116
|
+
|
|
117
|
+
numerical_tolerance : float, default 1e-13
|
|
118
|
+
Tolerance for numerical precision issues.
|
|
119
|
+
Controls when warnings are raised for aggregated values exceeding
|
|
120
|
+
the original time series bounds. Increase this value to silence
|
|
121
|
+
warnings caused by floating-point precision errors.
|
|
122
|
+
|
|
123
|
+
Returns
|
|
124
|
+
-------
|
|
125
|
+
AggregationResult
|
|
126
|
+
Object containing:
|
|
127
|
+
- cluster_representatives: DataFrame with aggregated periods
|
|
128
|
+
- cluster_assignments: Which cluster each original period belongs to
|
|
129
|
+
- cluster_weights: Occurrence count per cluster
|
|
130
|
+
- accuracy: RMSE, MAE metrics
|
|
131
|
+
- Methods: to_dict()
|
|
132
|
+
|
|
133
|
+
Raises
|
|
134
|
+
------
|
|
135
|
+
ValueError
|
|
136
|
+
If input data is invalid or parameters are inconsistent.
|
|
137
|
+
TypeError
|
|
138
|
+
If parameter types are incorrect.
|
|
139
|
+
|
|
140
|
+
Examples
|
|
141
|
+
--------
|
|
142
|
+
Basic usage with defaults:
|
|
143
|
+
|
|
144
|
+
>>> import tsam
|
|
145
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
146
|
+
>>> typical = result.cluster_representatives
|
|
147
|
+
|
|
148
|
+
With custom clustering:
|
|
149
|
+
|
|
150
|
+
>>> from tsam import aggregate, ClusterConfig
|
|
151
|
+
>>> result = aggregate(
|
|
152
|
+
... df,
|
|
153
|
+
... n_clusters=8,
|
|
154
|
+
... cluster=ClusterConfig(method="kmeans", representation="mean"),
|
|
155
|
+
... )
|
|
156
|
+
|
|
157
|
+
With segmentation (reduce to 12 timesteps per period):
|
|
158
|
+
|
|
159
|
+
>>> from tsam import aggregate, SegmentConfig
|
|
160
|
+
>>> result = aggregate(
|
|
161
|
+
... df,
|
|
162
|
+
... n_clusters=8,
|
|
163
|
+
... segments=SegmentConfig(n_segments=12),
|
|
164
|
+
... )
|
|
165
|
+
|
|
166
|
+
Preserving peak demand periods:
|
|
167
|
+
|
|
168
|
+
>>> from tsam import aggregate, ExtremeConfig
|
|
169
|
+
>>> result = aggregate(
|
|
170
|
+
... df,
|
|
171
|
+
... n_clusters=8,
|
|
172
|
+
... extremes=ExtremeConfig(max_value=["demand"]),
|
|
173
|
+
... )
|
|
174
|
+
|
|
175
|
+
Transferring assignments to new data:
|
|
176
|
+
|
|
177
|
+
>>> result1 = aggregate(df_wind, n_clusters=8)
|
|
178
|
+
>>> result2 = result1.clustering.apply(df_all)
|
|
179
|
+
|
|
180
|
+
See Also
|
|
181
|
+
--------
|
|
182
|
+
ClusterConfig : Clustering algorithm configuration
|
|
183
|
+
SegmentConfig : Temporal segmentation configuration
|
|
184
|
+
ExtremeConfig : Extreme period preservation configuration
|
|
185
|
+
AggregationResult : Result object with all outputs
|
|
186
|
+
"""
|
|
187
|
+
# Validate input
|
|
188
|
+
if not isinstance(data, pd.DataFrame):
|
|
189
|
+
raise TypeError(f"data must be a pandas DataFrame, got {type(data).__name__}")
|
|
190
|
+
|
|
191
|
+
if not isinstance(n_clusters, int) or n_clusters < 1:
|
|
192
|
+
raise ValueError(f"n_clusters must be a positive integer, got {n_clusters}")
|
|
193
|
+
|
|
194
|
+
# Parse duration parameters to hours
|
|
195
|
+
period_duration = _parse_duration_hours(period_duration, "period_duration")
|
|
196
|
+
if period_duration <= 0:
|
|
197
|
+
raise ValueError(f"period_duration must be positive, got {period_duration}")
|
|
198
|
+
|
|
199
|
+
temporal_resolution = (
|
|
200
|
+
_parse_duration_hours(temporal_resolution, "temporal_resolution")
|
|
201
|
+
if temporal_resolution is not None
|
|
202
|
+
else None
|
|
203
|
+
)
|
|
204
|
+
if temporal_resolution is not None and temporal_resolution <= 0:
|
|
205
|
+
raise ValueError(
|
|
206
|
+
f"temporal_resolution must be positive, got {temporal_resolution}"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Apply defaults
|
|
210
|
+
if cluster is None:
|
|
211
|
+
cluster = ClusterConfig()
|
|
212
|
+
|
|
213
|
+
# Validate segments against data
|
|
214
|
+
if segments is not None:
|
|
215
|
+
# Calculate timesteps per period
|
|
216
|
+
if temporal_resolution is not None:
|
|
217
|
+
timesteps_per_period = int(period_duration / temporal_resolution)
|
|
218
|
+
else:
|
|
219
|
+
# Infer resolution from data index
|
|
220
|
+
if isinstance(data.index, pd.DatetimeIndex) and len(data.index) > 1:
|
|
221
|
+
inferred_resolution = (
|
|
222
|
+
data.index[1] - data.index[0]
|
|
223
|
+
).total_seconds() / 3600
|
|
224
|
+
timesteps_per_period = int(period_duration / inferred_resolution)
|
|
225
|
+
else:
|
|
226
|
+
# Fall back to assuming hourly resolution
|
|
227
|
+
timesteps_per_period = int(period_duration)
|
|
228
|
+
|
|
229
|
+
if segments.n_segments > timesteps_per_period:
|
|
230
|
+
raise ValueError(
|
|
231
|
+
f"n_segments ({segments.n_segments}) cannot exceed "
|
|
232
|
+
f"timesteps per period ({timesteps_per_period})"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Validate extreme columns exist in data
|
|
236
|
+
if extremes is not None:
|
|
237
|
+
all_extreme_cols = (
|
|
238
|
+
extremes.max_value
|
|
239
|
+
+ extremes.min_value
|
|
240
|
+
+ extremes.max_period
|
|
241
|
+
+ extremes.min_period
|
|
242
|
+
)
|
|
243
|
+
missing = set(all_extreme_cols) - set(data.columns)
|
|
244
|
+
if missing:
|
|
245
|
+
raise ValueError(f"Extreme period columns not found in data: {missing}")
|
|
246
|
+
|
|
247
|
+
# Validate weight columns exist
|
|
248
|
+
if cluster.weights is not None:
|
|
249
|
+
missing = set(cluster.weights.keys()) - set(data.columns)
|
|
250
|
+
if missing:
|
|
251
|
+
raise ValueError(f"Weight columns not found in data: {missing}")
|
|
252
|
+
|
|
253
|
+
# Build old API parameters
|
|
254
|
+
old_params = _build_old_params(
|
|
255
|
+
data=data,
|
|
256
|
+
n_clusters=n_clusters,
|
|
257
|
+
period_duration=period_duration,
|
|
258
|
+
temporal_resolution=temporal_resolution,
|
|
259
|
+
cluster=cluster,
|
|
260
|
+
segments=segments,
|
|
261
|
+
extremes=extremes,
|
|
262
|
+
preserve_column_means=preserve_column_means,
|
|
263
|
+
rescale_exclude_columns=rescale_exclude_columns,
|
|
264
|
+
round_decimals=round_decimals,
|
|
265
|
+
numerical_tolerance=numerical_tolerance,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# Run aggregation using old implementation (suppress deprecation warning for internal use)
|
|
269
|
+
with warnings.catch_warnings():
|
|
270
|
+
warnings.simplefilter("ignore", LegacyAPIWarning)
|
|
271
|
+
agg = TimeSeriesAggregation(**old_params)
|
|
272
|
+
cluster_representatives = agg.createTypicalPeriods()
|
|
273
|
+
|
|
274
|
+
# Rename index levels for consistency with new API terminology
|
|
275
|
+
cluster_representatives = cluster_representatives.rename_axis(
|
|
276
|
+
index={"PeriodNum": "cluster", "TimeStep": "timestep"}
|
|
277
|
+
)
|
|
278
|
+
|
|
279
|
+
# Build accuracy metrics
|
|
280
|
+
accuracy_df = agg.accuracyIndicators()
|
|
281
|
+
|
|
282
|
+
# Build rescale deviations DataFrame
|
|
283
|
+
rescale_deviations_dict = getattr(agg, "_rescaleDeviations", {})
|
|
284
|
+
if rescale_deviations_dict:
|
|
285
|
+
rescale_deviations = pd.DataFrame.from_dict(
|
|
286
|
+
rescale_deviations_dict, orient="index"
|
|
287
|
+
)
|
|
288
|
+
rescale_deviations.index.name = "column"
|
|
289
|
+
else:
|
|
290
|
+
rescale_deviations = pd.DataFrame(
|
|
291
|
+
columns=["deviation_pct", "converged", "iterations"]
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
accuracy = AccuracyMetrics(
|
|
295
|
+
rmse=accuracy_df["RMSE"],
|
|
296
|
+
mae=accuracy_df["MAE"],
|
|
297
|
+
rmse_duration=accuracy_df["RMSE_duration"],
|
|
298
|
+
rescale_deviations=rescale_deviations,
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
# Build ClusteringResult
|
|
302
|
+
clustering_result = _build_clustering_result(
|
|
303
|
+
agg=agg,
|
|
304
|
+
n_segments=segments.n_segments if segments else None,
|
|
305
|
+
cluster_config=cluster,
|
|
306
|
+
segment_config=segments,
|
|
307
|
+
extremes_config=extremes,
|
|
308
|
+
preserve_column_means=preserve_column_means,
|
|
309
|
+
rescale_exclude_columns=rescale_exclude_columns,
|
|
310
|
+
temporal_resolution=temporal_resolution,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Compute segment_durations as tuple of tuples
|
|
314
|
+
segment_durations_tuple = None
|
|
315
|
+
if segments and hasattr(agg, "segmentedNormalizedTypicalPeriods"):
|
|
316
|
+
segmented_df = agg.segmentedNormalizedTypicalPeriods
|
|
317
|
+
segment_durations_tuple = tuple(
|
|
318
|
+
tuple(
|
|
319
|
+
int(seg_dur)
|
|
320
|
+
for _seg_step, seg_dur, _orig_start in segmented_df.loc[
|
|
321
|
+
period_idx
|
|
322
|
+
].index
|
|
323
|
+
)
|
|
324
|
+
for period_idx in segmented_df.index.get_level_values(0).unique()
|
|
325
|
+
)
|
|
326
|
+
|
|
327
|
+
# Build result object
|
|
328
|
+
return AggregationResult(
|
|
329
|
+
cluster_representatives=cluster_representatives,
|
|
330
|
+
cluster_weights=dict(agg.clusterPeriodNoOccur),
|
|
331
|
+
n_timesteps_per_period=agg.timeStepsPerPeriod,
|
|
332
|
+
segment_durations=segment_durations_tuple,
|
|
333
|
+
accuracy=accuracy,
|
|
334
|
+
clustering_duration=getattr(agg, "clusteringDuration", 0.0),
|
|
335
|
+
clustering=clustering_result,
|
|
336
|
+
is_transferred=False,
|
|
337
|
+
_aggregation=agg,
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _build_clustering_result(
|
|
342
|
+
agg: TimeSeriesAggregation,
|
|
343
|
+
n_segments: int | None,
|
|
344
|
+
cluster_config: ClusterConfig,
|
|
345
|
+
segment_config: SegmentConfig | None,
|
|
346
|
+
extremes_config: ExtremeConfig | None,
|
|
347
|
+
preserve_column_means: bool,
|
|
348
|
+
rescale_exclude_columns: list[str] | None,
|
|
349
|
+
temporal_resolution: float | None,
|
|
350
|
+
) -> ClusteringResult:
|
|
351
|
+
"""Build ClusteringResult from a TimeSeriesAggregation object."""
|
|
352
|
+
# Get cluster centers (convert to Python ints for JSON serialization)
|
|
353
|
+
# Handle extreme periods based on method:
|
|
354
|
+
# - new_cluster/append: append extreme period indices (creates additional clusters)
|
|
355
|
+
# - replace: keep original cluster centers
|
|
356
|
+
# Note: replace creates a hybrid representation (some columns from medoid, some
|
|
357
|
+
# from extreme period) that cannot be perfectly reproduced during transfer
|
|
358
|
+
cluster_centers: tuple[int, ...] | None = None
|
|
359
|
+
if agg.clusterCenterIndices is not None:
|
|
360
|
+
center_indices = [int(x) for x in agg.clusterCenterIndices]
|
|
361
|
+
|
|
362
|
+
if (
|
|
363
|
+
hasattr(agg, "extremePeriods")
|
|
364
|
+
and agg.extremePeriods
|
|
365
|
+
and extremes_config is not None
|
|
366
|
+
and extremes_config.method in ("new_cluster", "append")
|
|
367
|
+
):
|
|
368
|
+
# Add extreme period indices as new cluster centers
|
|
369
|
+
for period_type in agg.extremePeriods:
|
|
370
|
+
center_indices.append(int(agg.extremePeriods[period_type]["stepNo"]))
|
|
371
|
+
|
|
372
|
+
cluster_centers = tuple(center_indices)
|
|
373
|
+
|
|
374
|
+
# Compute segment data if segmentation was used
|
|
375
|
+
segment_assignments: tuple[tuple[int, ...], ...] | None = None
|
|
376
|
+
segment_durations: tuple[tuple[int, ...], ...] | None = None
|
|
377
|
+
segment_centers: tuple[tuple[int, ...], ...] | None = None
|
|
378
|
+
|
|
379
|
+
if n_segments is not None and hasattr(agg, "segmentedNormalizedTypicalPeriods"):
|
|
380
|
+
segmented_df = agg.segmentedNormalizedTypicalPeriods
|
|
381
|
+
segment_assignments_list = []
|
|
382
|
+
segment_durations_list = []
|
|
383
|
+
|
|
384
|
+
for period_idx in segmented_df.index.get_level_values(0).unique():
|
|
385
|
+
period_data = segmented_df.loc[period_idx]
|
|
386
|
+
# Index levels: Segment Step, Segment Duration, Original Start Step
|
|
387
|
+
assignments = []
|
|
388
|
+
durations = []
|
|
389
|
+
for seg_step, seg_dur, _orig_start in period_data.index:
|
|
390
|
+
assignments.extend([int(seg_step)] * int(seg_dur))
|
|
391
|
+
durations.append(int(seg_dur))
|
|
392
|
+
segment_assignments_list.append(tuple(assignments))
|
|
393
|
+
segment_durations_list.append(tuple(durations))
|
|
394
|
+
|
|
395
|
+
segment_assignments = tuple(segment_assignments_list)
|
|
396
|
+
segment_durations = tuple(segment_durations_list)
|
|
397
|
+
|
|
398
|
+
# Extract segment center indices (only available for medoid/maxoid representations)
|
|
399
|
+
if (
|
|
400
|
+
hasattr(agg, "segmentCenterIndices")
|
|
401
|
+
and agg.segmentCenterIndices is not None
|
|
402
|
+
):
|
|
403
|
+
# Check if any period has center indices (None for mean representation)
|
|
404
|
+
if all(pc is not None for pc in agg.segmentCenterIndices):
|
|
405
|
+
segment_centers = tuple(
|
|
406
|
+
tuple(int(x) for x in period_centers)
|
|
407
|
+
for period_centers in agg.segmentCenterIndices
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
# Extract representation from configs
|
|
411
|
+
representation = cluster_config.get_representation()
|
|
412
|
+
segment_representation = segment_config.representation if segment_config else None
|
|
413
|
+
|
|
414
|
+
# Extract extreme cluster indices if extremes were used
|
|
415
|
+
extreme_cluster_indices: tuple[int, ...] | None = None
|
|
416
|
+
if hasattr(agg, "extremeClusterIdx") and agg.extremeClusterIdx:
|
|
417
|
+
extreme_cluster_indices = tuple(int(x) for x in agg.extremeClusterIdx)
|
|
418
|
+
|
|
419
|
+
return ClusteringResult(
|
|
420
|
+
period_duration=agg.hoursPerPeriod,
|
|
421
|
+
cluster_assignments=tuple(int(x) for x in agg.clusterOrder),
|
|
422
|
+
cluster_centers=cluster_centers,
|
|
423
|
+
segment_assignments=segment_assignments,
|
|
424
|
+
segment_durations=segment_durations,
|
|
425
|
+
segment_centers=segment_centers,
|
|
426
|
+
preserve_column_means=preserve_column_means,
|
|
427
|
+
rescale_exclude_columns=tuple(rescale_exclude_columns)
|
|
428
|
+
if rescale_exclude_columns
|
|
429
|
+
else None,
|
|
430
|
+
representation=representation,
|
|
431
|
+
segment_representation=segment_representation,
|
|
432
|
+
temporal_resolution=temporal_resolution,
|
|
433
|
+
n_timesteps_per_period=agg.timeStepsPerPeriod,
|
|
434
|
+
extreme_cluster_indices=extreme_cluster_indices,
|
|
435
|
+
cluster_config=cluster_config,
|
|
436
|
+
segment_config=segment_config,
|
|
437
|
+
extremes_config=extremes_config,
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
def _build_old_params(
|
|
442
|
+
data: pd.DataFrame,
|
|
443
|
+
n_clusters: int,
|
|
444
|
+
period_duration: float,
|
|
445
|
+
temporal_resolution: float | None,
|
|
446
|
+
cluster: ClusterConfig,
|
|
447
|
+
segments: SegmentConfig | None,
|
|
448
|
+
extremes: ExtremeConfig | None,
|
|
449
|
+
preserve_column_means: bool,
|
|
450
|
+
rescale_exclude_columns: list[str] | None,
|
|
451
|
+
round_decimals: int | None,
|
|
452
|
+
numerical_tolerance: float,
|
|
453
|
+
*,
|
|
454
|
+
# Predefined parameters (used internally by ClusteringResult.apply())
|
|
455
|
+
predef_cluster_assignments: tuple[int, ...] | None = None,
|
|
456
|
+
predef_cluster_centers: tuple[int, ...] | None = None,
|
|
457
|
+
predef_extreme_cluster_indices: tuple[int, ...] | None = None,
|
|
458
|
+
predef_segment_assignments: tuple[tuple[int, ...], ...] | None = None,
|
|
459
|
+
predef_segment_durations: tuple[tuple[int, ...], ...] | None = None,
|
|
460
|
+
predef_segment_centers: tuple[tuple[int, ...], ...] | None = None,
|
|
461
|
+
) -> dict:
|
|
462
|
+
"""Build parameters for the old TimeSeriesAggregation API."""
|
|
463
|
+
params: dict = {
|
|
464
|
+
"timeSeries": data,
|
|
465
|
+
"noTypicalPeriods": n_clusters,
|
|
466
|
+
"hoursPerPeriod": period_duration,
|
|
467
|
+
"rescaleClusterPeriods": preserve_column_means,
|
|
468
|
+
"rescaleExcludeColumns": rescale_exclude_columns,
|
|
469
|
+
"numericalTolerance": numerical_tolerance,
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
if temporal_resolution is not None:
|
|
473
|
+
params["resolution"] = temporal_resolution
|
|
474
|
+
|
|
475
|
+
if round_decimals is not None:
|
|
476
|
+
params["roundOutput"] = round_decimals
|
|
477
|
+
|
|
478
|
+
# Cluster config
|
|
479
|
+
method = METHOD_MAPPING.get(cluster.method)
|
|
480
|
+
if method is None:
|
|
481
|
+
raise ValueError(
|
|
482
|
+
f"Unknown cluster method: {cluster.method!r}. "
|
|
483
|
+
f"Valid options: {list(METHOD_MAPPING.keys())}"
|
|
484
|
+
)
|
|
485
|
+
params["clusterMethod"] = method
|
|
486
|
+
|
|
487
|
+
representation = cluster.get_representation()
|
|
488
|
+
rep_mapped = REPRESENTATION_MAPPING.get(representation)
|
|
489
|
+
if rep_mapped is None:
|
|
490
|
+
raise ValueError(
|
|
491
|
+
f"Unknown representation method: {representation!r}. "
|
|
492
|
+
f"Valid options: {list(REPRESENTATION_MAPPING.keys())}"
|
|
493
|
+
)
|
|
494
|
+
params["representationMethod"] = rep_mapped
|
|
495
|
+
params["sortValues"] = cluster.use_duration_curves
|
|
496
|
+
params["sameMean"] = cluster.normalize_column_means
|
|
497
|
+
params["evalSumPeriods"] = cluster.include_period_sums
|
|
498
|
+
params["solver"] = cluster.solver
|
|
499
|
+
|
|
500
|
+
if cluster.weights is not None:
|
|
501
|
+
params["weightDict"] = cluster.weights
|
|
502
|
+
|
|
503
|
+
if predef_cluster_assignments is not None:
|
|
504
|
+
params["predefClusterOrder"] = list(predef_cluster_assignments)
|
|
505
|
+
|
|
506
|
+
if predef_cluster_centers is not None:
|
|
507
|
+
params["predefClusterCenterIndices"] = list(predef_cluster_centers)
|
|
508
|
+
|
|
509
|
+
if predef_extreme_cluster_indices is not None:
|
|
510
|
+
params["predefExtremeClusterIdx"] = list(predef_extreme_cluster_indices)
|
|
511
|
+
|
|
512
|
+
# Segmentation config
|
|
513
|
+
if segments is not None:
|
|
514
|
+
params["segmentation"] = True
|
|
515
|
+
params["noSegments"] = segments.n_segments
|
|
516
|
+
params["segmentRepresentationMethod"] = REPRESENTATION_MAPPING.get(
|
|
517
|
+
segments.representation, "meanRepresentation"
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
# Predefined segment parameters (from ClusteringResult)
|
|
521
|
+
if predef_segment_assignments is not None:
|
|
522
|
+
params["predefSegmentOrder"] = [list(s) for s in predef_segment_assignments]
|
|
523
|
+
if predef_segment_durations is not None:
|
|
524
|
+
params["predefSegmentDurations"] = [
|
|
525
|
+
list(s) for s in predef_segment_durations
|
|
526
|
+
]
|
|
527
|
+
if predef_segment_centers is not None:
|
|
528
|
+
params["predefSegmentCenters"] = [list(s) for s in predef_segment_centers]
|
|
529
|
+
else:
|
|
530
|
+
params["segmentation"] = False
|
|
531
|
+
|
|
532
|
+
# Extreme config
|
|
533
|
+
if extremes is not None and extremes.has_extremes():
|
|
534
|
+
params["extremePeriodMethod"] = EXTREME_METHOD_MAPPING[extremes.method]
|
|
535
|
+
params["addPeakMax"] = extremes.max_value
|
|
536
|
+
params["addPeakMin"] = extremes.min_value
|
|
537
|
+
params["addMeanMax"] = extremes.max_period
|
|
538
|
+
params["addMeanMin"] = extremes.min_period
|
|
539
|
+
else:
|
|
540
|
+
params["extremePeriodMethod"] = "None"
|
|
541
|
+
|
|
542
|
+
return params
|
|
543
|
+
|
|
544
|
+
|
|
545
|
+
def unstack_to_periods(
|
|
546
|
+
data: pd.DataFrame,
|
|
547
|
+
period_duration: int | float | str = 24,
|
|
548
|
+
) -> pd.DataFrame:
|
|
549
|
+
"""Reshape time series data into period structure for visualization.
|
|
550
|
+
|
|
551
|
+
Transforms a flat time series into a DataFrame with periods as rows and
|
|
552
|
+
timesteps as a MultiIndex level, suitable for creating heatmaps with plotly.
|
|
553
|
+
|
|
554
|
+
Parameters
|
|
555
|
+
----------
|
|
556
|
+
data : pd.DataFrame
|
|
557
|
+
Time series data with datetime index.
|
|
558
|
+
period_duration : int, float, or str, default 24
|
|
559
|
+
Length of each period. Accepts:
|
|
560
|
+
- int/float: hours (e.g., 24 for daily, 168 for weekly)
|
|
561
|
+
- str: pandas Timedelta string (e.g., '24h', '1d', '1w')
|
|
562
|
+
|
|
563
|
+
Returns
|
|
564
|
+
-------
|
|
565
|
+
pd.DataFrame
|
|
566
|
+
Reshaped data with shape (n_periods, n_timesteps_per_period) for each column.
|
|
567
|
+
Suitable for ``px.imshow(result["column"].values.T)`` to create heatmaps.
|
|
568
|
+
|
|
569
|
+
Examples
|
|
570
|
+
--------
|
|
571
|
+
>>> import tsam
|
|
572
|
+
>>> import plotly.express as px
|
|
573
|
+
>>>
|
|
574
|
+
>>> # Reshape data for heatmap visualization
|
|
575
|
+
>>> unstacked = tsam.unstack_to_periods(df, period_duration=24)
|
|
576
|
+
>>>
|
|
577
|
+
>>> # Create heatmap with plotly
|
|
578
|
+
>>> px.imshow(
|
|
579
|
+
... unstacked["Load"].values.T,
|
|
580
|
+
... labels={"x": "Day", "y": "Hour", "color": "Load"},
|
|
581
|
+
... title="Load Heatmap"
|
|
582
|
+
... )
|
|
583
|
+
"""
|
|
584
|
+
period_hours = _parse_duration_hours(period_duration, "period_duration")
|
|
585
|
+
|
|
586
|
+
# Infer timestep resolution from data index
|
|
587
|
+
timestep_hours = 1.0 # Default to hourly
|
|
588
|
+
if isinstance(data.index, pd.DatetimeIndex) and len(data.index) > 1:
|
|
589
|
+
timestep_hours = (data.index[1] - data.index[0]).total_seconds() / 3600
|
|
590
|
+
|
|
591
|
+
# Calculate timesteps per period
|
|
592
|
+
timesteps_per_period = round(period_hours / timestep_hours)
|
|
593
|
+
if timesteps_per_period < 1:
|
|
594
|
+
raise ValueError(
|
|
595
|
+
f"period_duration ({period_hours}h) is smaller than "
|
|
596
|
+
f"data timestep resolution ({timestep_hours}h)"
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
with warnings.catch_warnings():
|
|
600
|
+
warnings.simplefilter("ignore", LegacyAPIWarning)
|
|
601
|
+
unstacked, _ = unstackToPeriods(data.copy(), timesteps_per_period)
|
|
602
|
+
return cast("pd.DataFrame", unstacked)
|