tsam 2.3.9__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsam/__init__.py +79 -0
- tsam/api.py +602 -0
- tsam/config.py +852 -0
- tsam/exceptions.py +17 -0
- tsam/hyperparametertuning.py +289 -245
- tsam/periodAggregation.py +140 -141
- tsam/plot.py +513 -0
- tsam/py.typed +0 -0
- tsam/representations.py +177 -167
- tsam/result.py +397 -0
- tsam/timeseriesaggregation.py +1446 -1361
- tsam/tuning.py +1038 -0
- tsam/utils/durationRepresentation.py +229 -223
- tsam/utils/k_maxoids.py +138 -145
- tsam/utils/k_medoids_contiguity.py +139 -140
- tsam/utils/k_medoids_exact.py +232 -239
- tsam/utils/segmentation.py +232 -118
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/METADATA +124 -81
- tsam-3.0.0.dist-info/RECORD +23 -0
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/WHEEL +1 -1
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/licenses/LICENSE.txt +21 -21
- tsam-2.3.9.dist-info/RECORD +0 -16
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/top_level.txt +0 -0
tsam/result.py
ADDED
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
"""Result classes for tsam aggregation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from functools import cached_property
|
|
7
|
+
from typing import TYPE_CHECKING, cast
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pandas as pd
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from tsam.config import ClusteringResult
|
|
14
|
+
from tsam.plot import ResultPlotAccessor
|
|
15
|
+
from tsam.timeseriesaggregation import TimeSeriesAggregation
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class AccuracyMetrics:
|
|
20
|
+
"""Accuracy metrics comparing aggregated to original time series.
|
|
21
|
+
|
|
22
|
+
Attributes
|
|
23
|
+
----------
|
|
24
|
+
rmse : pd.Series
|
|
25
|
+
Root Mean Square Error per column.
|
|
26
|
+
mae : pd.Series
|
|
27
|
+
Mean Absolute Error per column.
|
|
28
|
+
rmse_duration : pd.Series
|
|
29
|
+
RMSE on duration curves (sorted values) per column.
|
|
30
|
+
rescale_deviations : pd.DataFrame
|
|
31
|
+
Rescaling deviation information per column. Contains columns:
|
|
32
|
+
- deviation_pct: Final deviation percentage after rescaling
|
|
33
|
+
- converged: Whether rescaling converged within max iterations
|
|
34
|
+
- iterations: Number of iterations used
|
|
35
|
+
Only populated if rescaling was enabled, otherwise empty DataFrame.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
rmse: pd.Series
|
|
39
|
+
mae: pd.Series
|
|
40
|
+
rmse_duration: pd.Series
|
|
41
|
+
rescale_deviations: pd.DataFrame
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def summary(self) -> pd.DataFrame:
|
|
45
|
+
"""Summary DataFrame with all metrics per column.
|
|
46
|
+
|
|
47
|
+
Returns
|
|
48
|
+
-------
|
|
49
|
+
pd.DataFrame
|
|
50
|
+
DataFrame with columns: rmse, mae, rmse_duration, and deviation_pct
|
|
51
|
+
(if rescaling was enabled). Index is the original column names.
|
|
52
|
+
"""
|
|
53
|
+
df = pd.DataFrame(
|
|
54
|
+
{
|
|
55
|
+
"rmse": self.rmse,
|
|
56
|
+
"mae": self.mae,
|
|
57
|
+
"rmse_duration": self.rmse_duration,
|
|
58
|
+
}
|
|
59
|
+
)
|
|
60
|
+
if not self.rescale_deviations.empty:
|
|
61
|
+
df["deviation_pct"] = self.rescale_deviations["deviation_pct"]
|
|
62
|
+
return df
|
|
63
|
+
|
|
64
|
+
def __repr__(self) -> str:
|
|
65
|
+
rescale_info = ""
|
|
66
|
+
if not self.rescale_deviations.empty:
|
|
67
|
+
n_failed = (~self.rescale_deviations["converged"]).sum()
|
|
68
|
+
if n_failed > 0:
|
|
69
|
+
max_dev = self.rescale_deviations["deviation_pct"].max()
|
|
70
|
+
rescale_info = f",\n rescale_failures={n_failed} (max {max_dev:.2f}%)"
|
|
71
|
+
return (
|
|
72
|
+
f"AccuracyMetrics(\n"
|
|
73
|
+
f" rmse={self.rmse.mean():.4f} (mean),\n"
|
|
74
|
+
f" mae={self.mae.mean():.4f} (mean),\n"
|
|
75
|
+
f" rmse_duration={self.rmse_duration.mean():.4f} (mean){rescale_info}\n"
|
|
76
|
+
f")"
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class AggregationResult:
|
|
82
|
+
"""Result of time series aggregation.
|
|
83
|
+
|
|
84
|
+
This class holds all outputs from the aggregation process and provides
|
|
85
|
+
convenient methods for accessing and exporting the results.
|
|
86
|
+
|
|
87
|
+
Attributes
|
|
88
|
+
----------
|
|
89
|
+
cluster_representatives : pd.DataFrame
|
|
90
|
+
The aggregated typical periods with MultiIndex (cluster, timestep).
|
|
91
|
+
Each row represents one timestep in one cluster representative.
|
|
92
|
+
|
|
93
|
+
cluster_assignments : np.ndarray
|
|
94
|
+
Which cluster each original period belongs to.
|
|
95
|
+
Length equals the number of original periods.
|
|
96
|
+
Values are cluster indices (0 to n_clusters-1).
|
|
97
|
+
|
|
98
|
+
cluster_weights : dict[int, int]
|
|
99
|
+
How many original periods each cluster represents.
|
|
100
|
+
Keys are cluster indices, values are occurrence counts.
|
|
101
|
+
|
|
102
|
+
n_clusters : int
|
|
103
|
+
Number of clusters (typical periods).
|
|
104
|
+
|
|
105
|
+
n_timesteps_per_period : int
|
|
106
|
+
Number of timesteps in each period.
|
|
107
|
+
|
|
108
|
+
n_segments : int | None
|
|
109
|
+
Number of segments per period if segmentation was used, else None.
|
|
110
|
+
|
|
111
|
+
segment_durations : tuple[tuple[int, ...], ...] | None
|
|
112
|
+
Duration (in timesteps) for each segment in each typical period.
|
|
113
|
+
Outer tuple has one entry per typical period, inner tuple has
|
|
114
|
+
duration for each segment. Use for transferring to another aggregation.
|
|
115
|
+
|
|
116
|
+
accuracy : AccuracyMetrics
|
|
117
|
+
Accuracy metrics comparing reconstructed to original data.
|
|
118
|
+
|
|
119
|
+
clustering_duration : float
|
|
120
|
+
Time taken for clustering in seconds.
|
|
121
|
+
|
|
122
|
+
is_transferred : bool
|
|
123
|
+
Whether this result was created by applying a transferred clustering
|
|
124
|
+
(via ``ClusteringResult.apply()``) rather than by clustering this data directly.
|
|
125
|
+
|
|
126
|
+
Examples
|
|
127
|
+
--------
|
|
128
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
129
|
+
>>> result.cluster_representatives
|
|
130
|
+
solar wind demand
|
|
131
|
+
cluster timestep
|
|
132
|
+
0 0 0.12 0.45 0.78
|
|
133
|
+
1 0.15 0.42 0.82
|
|
134
|
+
...
|
|
135
|
+
|
|
136
|
+
>>> result.cluster_weights
|
|
137
|
+
{0: 45, 1: 52, 2: 38, ...}
|
|
138
|
+
|
|
139
|
+
>>> result.accuracy.rmse
|
|
140
|
+
solar 0.023
|
|
141
|
+
wind 0.041
|
|
142
|
+
demand 0.015
|
|
143
|
+
dtype: float64
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
cluster_representatives: pd.DataFrame
|
|
147
|
+
cluster_weights: dict[int, int]
|
|
148
|
+
n_timesteps_per_period: int
|
|
149
|
+
segment_durations: tuple[tuple[int, ...], ...] | None
|
|
150
|
+
accuracy: AccuracyMetrics
|
|
151
|
+
clustering_duration: float
|
|
152
|
+
clustering: ClusteringResult
|
|
153
|
+
is_transferred: bool
|
|
154
|
+
_aggregation: TimeSeriesAggregation = field(repr=False, compare=False)
|
|
155
|
+
|
|
156
|
+
@cached_property
|
|
157
|
+
def n_clusters(self) -> int:
|
|
158
|
+
"""Number of clusters (typical periods).
|
|
159
|
+
|
|
160
|
+
Derived from the cluster_representatives DataFrame index,
|
|
161
|
+
which is the authoritative source. Note: cluster_weights may
|
|
162
|
+
have more entries than actual cluster IDs due to tsam quirks.
|
|
163
|
+
"""
|
|
164
|
+
return self.cluster_representatives.index.get_level_values(0).nunique()
|
|
165
|
+
|
|
166
|
+
@cached_property
|
|
167
|
+
def n_segments(self) -> int | None:
|
|
168
|
+
"""Number of segments per period if segmentation was used, else None."""
|
|
169
|
+
return self.clustering.n_segments
|
|
170
|
+
|
|
171
|
+
@cached_property
|
|
172
|
+
def cluster_assignments(self) -> np.ndarray:
|
|
173
|
+
"""Which cluster each original period belongs to.
|
|
174
|
+
|
|
175
|
+
Length equals the number of original periods.
|
|
176
|
+
Values are cluster indices (0 to n_clusters-1).
|
|
177
|
+
"""
|
|
178
|
+
return np.array(self.clustering.cluster_assignments)
|
|
179
|
+
|
|
180
|
+
def __repr__(self) -> str:
|
|
181
|
+
seg_info = f", n_segments={self.n_segments}" if self.n_segments else ""
|
|
182
|
+
transferred_info = ", is_transferred=True" if self.is_transferred else ""
|
|
183
|
+
return (
|
|
184
|
+
f"AggregationResult(\n"
|
|
185
|
+
f" n_clusters={self.n_clusters},\n"
|
|
186
|
+
f" n_timesteps_per_period={self.n_timesteps_per_period}{seg_info}{transferred_info},\n"
|
|
187
|
+
f" accuracy={self.accuracy}\n"
|
|
188
|
+
f")"
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
@cached_property
|
|
192
|
+
def original(self) -> pd.DataFrame:
|
|
193
|
+
"""Original time series data.
|
|
194
|
+
|
|
195
|
+
Returns
|
|
196
|
+
-------
|
|
197
|
+
pd.DataFrame
|
|
198
|
+
The original input time series with datetime index.
|
|
199
|
+
|
|
200
|
+
Examples
|
|
201
|
+
--------
|
|
202
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
203
|
+
>>> result.original.shape == df.shape
|
|
204
|
+
True
|
|
205
|
+
"""
|
|
206
|
+
return cast("pd.DataFrame", self._aggregation.timeSeries)
|
|
207
|
+
|
|
208
|
+
@cached_property
|
|
209
|
+
def reconstructed(self) -> pd.DataFrame:
|
|
210
|
+
"""Reconstructed time series from typical periods.
|
|
211
|
+
|
|
212
|
+
Each original period is replaced by its assigned cluster representative.
|
|
213
|
+
This is cached for performance since reconstruction can be expensive.
|
|
214
|
+
|
|
215
|
+
Returns
|
|
216
|
+
-------
|
|
217
|
+
pd.DataFrame
|
|
218
|
+
Reconstructed time series with same shape as original.
|
|
219
|
+
|
|
220
|
+
Examples
|
|
221
|
+
--------
|
|
222
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
223
|
+
>>> result.reconstructed.shape == df.shape
|
|
224
|
+
True
|
|
225
|
+
"""
|
|
226
|
+
return cast("pd.DataFrame", self._aggregation.predictOriginalData())
|
|
227
|
+
|
|
228
|
+
@cached_property
|
|
229
|
+
def residuals(self) -> pd.DataFrame:
|
|
230
|
+
"""Residuals (original - reconstructed).
|
|
231
|
+
|
|
232
|
+
Positive values indicate the original exceeded the reconstruction.
|
|
233
|
+
|
|
234
|
+
Returns
|
|
235
|
+
-------
|
|
236
|
+
pd.DataFrame
|
|
237
|
+
Residual time series with same shape as original.
|
|
238
|
+
|
|
239
|
+
Examples
|
|
240
|
+
--------
|
|
241
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
242
|
+
>>> result.residuals.mean() # Should be close to zero
|
|
243
|
+
"""
|
|
244
|
+
return self.original - self.reconstructed
|
|
245
|
+
|
|
246
|
+
def to_dict(self) -> dict:
|
|
247
|
+
"""Export results as a dictionary for serialization.
|
|
248
|
+
|
|
249
|
+
Returns
|
|
250
|
+
-------
|
|
251
|
+
dict
|
|
252
|
+
Dictionary containing all result data in serializable format.
|
|
253
|
+
"""
|
|
254
|
+
return {
|
|
255
|
+
"cluster_representatives": self.cluster_representatives.to_dict(),
|
|
256
|
+
"cluster_assignments": self.cluster_assignments.tolist(),
|
|
257
|
+
"cluster_weights": self.cluster_weights,
|
|
258
|
+
"n_clusters": self.n_clusters,
|
|
259
|
+
"n_timesteps_per_period": self.n_timesteps_per_period,
|
|
260
|
+
"n_segments": self.n_segments,
|
|
261
|
+
"segment_durations": self.segment_durations,
|
|
262
|
+
"clustering": self.clustering.to_dict(),
|
|
263
|
+
"accuracy": {
|
|
264
|
+
"rmse": self.accuracy.rmse.to_dict(),
|
|
265
|
+
"mae": self.accuracy.mae.to_dict(),
|
|
266
|
+
"rmse_duration": self.accuracy.rmse_duration.to_dict(),
|
|
267
|
+
"rescale_deviations": self.accuracy.rescale_deviations.to_dict(),
|
|
268
|
+
},
|
|
269
|
+
"clustering_duration": self.clustering_duration,
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
@property
|
|
273
|
+
def timestep_index(self) -> list[int]:
|
|
274
|
+
"""Get the timestep or segment indices.
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
list[int]
|
|
279
|
+
List of indices [0, 1, ..., n-1] where n is n_segments
|
|
280
|
+
if segmentation was used, otherwise n_timesteps_per_period.
|
|
281
|
+
"""
|
|
282
|
+
n = self.n_segments if self.n_segments else self.n_timesteps_per_period
|
|
283
|
+
return list(range(n))
|
|
284
|
+
|
|
285
|
+
@property
|
|
286
|
+
def period_index(self) -> list[int]:
|
|
287
|
+
"""Get the period (cluster) indices.
|
|
288
|
+
|
|
289
|
+
Returns the actual cluster IDs from the cluster_representatives
|
|
290
|
+
DataFrame, which is the authoritative source.
|
|
291
|
+
|
|
292
|
+
Returns
|
|
293
|
+
-------
|
|
294
|
+
list[int]
|
|
295
|
+
Sorted list of cluster indices present in cluster_representatives.
|
|
296
|
+
"""
|
|
297
|
+
return sorted(self.cluster_representatives.index.get_level_values(0).unique())
|
|
298
|
+
|
|
299
|
+
@property
|
|
300
|
+
def assignments(self) -> pd.DataFrame:
|
|
301
|
+
"""Get timestep-level assignment information.
|
|
302
|
+
|
|
303
|
+
Returns a DataFrame with one row per original timestep containing
|
|
304
|
+
assignment information for transferring results to another aggregation.
|
|
305
|
+
|
|
306
|
+
Columns
|
|
307
|
+
-------
|
|
308
|
+
period_idx : int
|
|
309
|
+
Index of the original period (0-indexed, 0 to n_original_periods-1).
|
|
310
|
+
timestep_idx : int
|
|
311
|
+
Timestep index within the period (0 to n_timesteps_per_period-1).
|
|
312
|
+
cluster_idx : int
|
|
313
|
+
Which cluster this period is assigned to (0 to n_clusters-1).
|
|
314
|
+
segment_idx : int (only if segmentation was used)
|
|
315
|
+
Which segment this timestep belongs to within its period.
|
|
316
|
+
|
|
317
|
+
Returns
|
|
318
|
+
-------
|
|
319
|
+
pd.DataFrame
|
|
320
|
+
DataFrame indexed by original time index with assignment columns.
|
|
321
|
+
|
|
322
|
+
Examples
|
|
323
|
+
--------
|
|
324
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
325
|
+
>>> result.assignments.head()
|
|
326
|
+
period_idx timestep_idx cluster_idx
|
|
327
|
+
2010-01-01 00:00:00 0 0 3
|
|
328
|
+
2010-01-01 01:00:00 0 1 3
|
|
329
|
+
...
|
|
330
|
+
|
|
331
|
+
>>> # Save and reload assignments
|
|
332
|
+
>>> result.assignments.to_csv("assignments.csv")
|
|
333
|
+
"""
|
|
334
|
+
agg = self._aggregation
|
|
335
|
+
|
|
336
|
+
# Build period_idx and timestep_idx for each original timestep
|
|
337
|
+
period_indices = []
|
|
338
|
+
timestep_indices = []
|
|
339
|
+
cluster_indices = []
|
|
340
|
+
|
|
341
|
+
for orig_period_idx, cluster_idx in enumerate(self.cluster_assignments):
|
|
342
|
+
for timestep in range(self.n_timesteps_per_period):
|
|
343
|
+
period_indices.append(orig_period_idx)
|
|
344
|
+
timestep_indices.append(timestep)
|
|
345
|
+
cluster_indices.append(cluster_idx)
|
|
346
|
+
|
|
347
|
+
result_df = pd.DataFrame(
|
|
348
|
+
{
|
|
349
|
+
"period_idx": period_indices,
|
|
350
|
+
"timestep_idx": timestep_indices,
|
|
351
|
+
"cluster_idx": cluster_indices,
|
|
352
|
+
},
|
|
353
|
+
index=agg.timeIndex,
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Add segment_idx if segmentation was used
|
|
357
|
+
if self.n_segments is not None and hasattr(
|
|
358
|
+
agg, "segmentedNormalizedTypicalPeriods"
|
|
359
|
+
):
|
|
360
|
+
segment_indices = []
|
|
361
|
+
for cluster_idx in self.cluster_assignments:
|
|
362
|
+
# Get segment structure for this cluster's typical period
|
|
363
|
+
segment_data = agg.segmentedNormalizedTypicalPeriods.loc[cluster_idx]
|
|
364
|
+
# Segment Step is level 0, Segment Duration is level 1
|
|
365
|
+
segment_steps = segment_data.index.get_level_values(0)
|
|
366
|
+
segment_durations = segment_data.index.get_level_values(1)
|
|
367
|
+
# Repeat each segment index by its duration
|
|
368
|
+
segment_indices.extend(
|
|
369
|
+
np.repeat(segment_steps, segment_durations).tolist()
|
|
370
|
+
)
|
|
371
|
+
result_df["segment_idx"] = segment_indices
|
|
372
|
+
|
|
373
|
+
return result_df
|
|
374
|
+
|
|
375
|
+
@property
|
|
376
|
+
def plot(self) -> ResultPlotAccessor:
|
|
377
|
+
"""Access plotting methods.
|
|
378
|
+
|
|
379
|
+
Returns a plotting accessor with methods for visualizing the results.
|
|
380
|
+
|
|
381
|
+
Returns
|
|
382
|
+
-------
|
|
383
|
+
ResultPlotAccessor
|
|
384
|
+
Accessor with plotting methods.
|
|
385
|
+
|
|
386
|
+
Examples
|
|
387
|
+
--------
|
|
388
|
+
>>> result = tsam.aggregate(df, n_clusters=8)
|
|
389
|
+
>>> result.plot.compare() # Compare original vs reconstructed
|
|
390
|
+
>>> result.plot.residuals() # View reconstruction errors
|
|
391
|
+
>>> result.plot.cluster_representatives()
|
|
392
|
+
>>> result.plot.cluster_weights()
|
|
393
|
+
>>> result.plot.accuracy()
|
|
394
|
+
"""
|
|
395
|
+
from tsam.plot import ResultPlotAccessor
|
|
396
|
+
|
|
397
|
+
return ResultPlotAccessor(self)
|