tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tsam/result.py ADDED
@@ -0,0 +1,397 @@
1
+ """Result classes for tsam aggregation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from functools import cached_property
7
+ from typing import TYPE_CHECKING, cast
8
+
9
+ import numpy as np
10
+ import pandas as pd
11
+
12
+ if TYPE_CHECKING:
13
+ from tsam.config import ClusteringResult
14
+ from tsam.plot import ResultPlotAccessor
15
+ from tsam.timeseriesaggregation import TimeSeriesAggregation
16
+
17
+
18
+ @dataclass
19
+ class AccuracyMetrics:
20
+ """Accuracy metrics comparing aggregated to original time series.
21
+
22
+ Attributes
23
+ ----------
24
+ rmse : pd.Series
25
+ Root Mean Square Error per column.
26
+ mae : pd.Series
27
+ Mean Absolute Error per column.
28
+ rmse_duration : pd.Series
29
+ RMSE on duration curves (sorted values) per column.
30
+ rescale_deviations : pd.DataFrame
31
+ Rescaling deviation information per column. Contains columns:
32
+ - deviation_pct: Final deviation percentage after rescaling
33
+ - converged: Whether rescaling converged within max iterations
34
+ - iterations: Number of iterations used
35
+ Only populated if rescaling was enabled, otherwise empty DataFrame.
36
+ """
37
+
38
+ rmse: pd.Series
39
+ mae: pd.Series
40
+ rmse_duration: pd.Series
41
+ rescale_deviations: pd.DataFrame
42
+
43
+ @property
44
+ def summary(self) -> pd.DataFrame:
45
+ """Summary DataFrame with all metrics per column.
46
+
47
+ Returns
48
+ -------
49
+ pd.DataFrame
50
+ DataFrame with columns: rmse, mae, rmse_duration, and deviation_pct
51
+ (if rescaling was enabled). Index is the original column names.
52
+ """
53
+ df = pd.DataFrame(
54
+ {
55
+ "rmse": self.rmse,
56
+ "mae": self.mae,
57
+ "rmse_duration": self.rmse_duration,
58
+ }
59
+ )
60
+ if not self.rescale_deviations.empty:
61
+ df["deviation_pct"] = self.rescale_deviations["deviation_pct"]
62
+ return df
63
+
64
+ def __repr__(self) -> str:
65
+ rescale_info = ""
66
+ if not self.rescale_deviations.empty:
67
+ n_failed = (~self.rescale_deviations["converged"]).sum()
68
+ if n_failed > 0:
69
+ max_dev = self.rescale_deviations["deviation_pct"].max()
70
+ rescale_info = f",\n rescale_failures={n_failed} (max {max_dev:.2f}%)"
71
+ return (
72
+ f"AccuracyMetrics(\n"
73
+ f" rmse={self.rmse.mean():.4f} (mean),\n"
74
+ f" mae={self.mae.mean():.4f} (mean),\n"
75
+ f" rmse_duration={self.rmse_duration.mean():.4f} (mean){rescale_info}\n"
76
+ f")"
77
+ )
78
+
79
+
80
+ @dataclass
81
+ class AggregationResult:
82
+ """Result of time series aggregation.
83
+
84
+ This class holds all outputs from the aggregation process and provides
85
+ convenient methods for accessing and exporting the results.
86
+
87
+ Attributes
88
+ ----------
89
+ cluster_representatives : pd.DataFrame
90
+ The aggregated typical periods with MultiIndex (cluster, timestep).
91
+ Each row represents one timestep in one cluster representative.
92
+
93
+ cluster_assignments : np.ndarray
94
+ Which cluster each original period belongs to.
95
+ Length equals the number of original periods.
96
+ Values are cluster indices (0 to n_clusters-1).
97
+
98
+ cluster_weights : dict[int, int]
99
+ How many original periods each cluster represents.
100
+ Keys are cluster indices, values are occurrence counts.
101
+
102
+ n_clusters : int
103
+ Number of clusters (typical periods).
104
+
105
+ n_timesteps_per_period : int
106
+ Number of timesteps in each period.
107
+
108
+ n_segments : int | None
109
+ Number of segments per period if segmentation was used, else None.
110
+
111
+ segment_durations : tuple[tuple[int, ...], ...] | None
112
+ Duration (in timesteps) for each segment in each typical period.
113
+ Outer tuple has one entry per typical period, inner tuple has
114
+ duration for each segment. Use for transferring to another aggregation.
115
+
116
+ accuracy : AccuracyMetrics
117
+ Accuracy metrics comparing reconstructed to original data.
118
+
119
+ clustering_duration : float
120
+ Time taken for clustering in seconds.
121
+
122
+ is_transferred : bool
123
+ Whether this result was created by applying a transferred clustering
124
+ (via ``ClusteringResult.apply()``) rather than by clustering this data directly.
125
+
126
+ Examples
127
+ --------
128
+ >>> result = tsam.aggregate(df, n_clusters=8)
129
+ >>> result.cluster_representatives
130
+ solar wind demand
131
+ cluster timestep
132
+ 0 0 0.12 0.45 0.78
133
+ 1 0.15 0.42 0.82
134
+ ...
135
+
136
+ >>> result.cluster_weights
137
+ {0: 45, 1: 52, 2: 38, ...}
138
+
139
+ >>> result.accuracy.rmse
140
+ solar 0.023
141
+ wind 0.041
142
+ demand 0.015
143
+ dtype: float64
144
+ """
145
+
146
+ cluster_representatives: pd.DataFrame
147
+ cluster_weights: dict[int, int]
148
+ n_timesteps_per_period: int
149
+ segment_durations: tuple[tuple[int, ...], ...] | None
150
+ accuracy: AccuracyMetrics
151
+ clustering_duration: float
152
+ clustering: ClusteringResult
153
+ is_transferred: bool
154
+ _aggregation: TimeSeriesAggregation = field(repr=False, compare=False)
155
+
156
+ @cached_property
157
+ def n_clusters(self) -> int:
158
+ """Number of clusters (typical periods).
159
+
160
+ Derived from the cluster_representatives DataFrame index,
161
+ which is the authoritative source. Note: cluster_weights may
162
+ have more entries than actual cluster IDs due to tsam quirks.
163
+ """
164
+ return self.cluster_representatives.index.get_level_values(0).nunique()
165
+
166
+ @cached_property
167
+ def n_segments(self) -> int | None:
168
+ """Number of segments per period if segmentation was used, else None."""
169
+ return self.clustering.n_segments
170
+
171
+ @cached_property
172
+ def cluster_assignments(self) -> np.ndarray:
173
+ """Which cluster each original period belongs to.
174
+
175
+ Length equals the number of original periods.
176
+ Values are cluster indices (0 to n_clusters-1).
177
+ """
178
+ return np.array(self.clustering.cluster_assignments)
179
+
180
+ def __repr__(self) -> str:
181
+ seg_info = f", n_segments={self.n_segments}" if self.n_segments else ""
182
+ transferred_info = ", is_transferred=True" if self.is_transferred else ""
183
+ return (
184
+ f"AggregationResult(\n"
185
+ f" n_clusters={self.n_clusters},\n"
186
+ f" n_timesteps_per_period={self.n_timesteps_per_period}{seg_info}{transferred_info},\n"
187
+ f" accuracy={self.accuracy}\n"
188
+ f")"
189
+ )
190
+
191
+ @cached_property
192
+ def original(self) -> pd.DataFrame:
193
+ """Original time series data.
194
+
195
+ Returns
196
+ -------
197
+ pd.DataFrame
198
+ The original input time series with datetime index.
199
+
200
+ Examples
201
+ --------
202
+ >>> result = tsam.aggregate(df, n_clusters=8)
203
+ >>> result.original.shape == df.shape
204
+ True
205
+ """
206
+ return cast("pd.DataFrame", self._aggregation.timeSeries)
207
+
208
+ @cached_property
209
+ def reconstructed(self) -> pd.DataFrame:
210
+ """Reconstructed time series from typical periods.
211
+
212
+ Each original period is replaced by its assigned cluster representative.
213
+ This is cached for performance since reconstruction can be expensive.
214
+
215
+ Returns
216
+ -------
217
+ pd.DataFrame
218
+ Reconstructed time series with same shape as original.
219
+
220
+ Examples
221
+ --------
222
+ >>> result = tsam.aggregate(df, n_clusters=8)
223
+ >>> result.reconstructed.shape == df.shape
224
+ True
225
+ """
226
+ return cast("pd.DataFrame", self._aggregation.predictOriginalData())
227
+
228
+ @cached_property
229
+ def residuals(self) -> pd.DataFrame:
230
+ """Residuals (original - reconstructed).
231
+
232
+ Positive values indicate the original exceeded the reconstruction.
233
+
234
+ Returns
235
+ -------
236
+ pd.DataFrame
237
+ Residual time series with same shape as original.
238
+
239
+ Examples
240
+ --------
241
+ >>> result = tsam.aggregate(df, n_clusters=8)
242
+ >>> result.residuals.mean() # Should be close to zero
243
+ """
244
+ return self.original - self.reconstructed
245
+
246
+ def to_dict(self) -> dict:
247
+ """Export results as a dictionary for serialization.
248
+
249
+ Returns
250
+ -------
251
+ dict
252
+ Dictionary containing all result data in serializable format.
253
+ """
254
+ return {
255
+ "cluster_representatives": self.cluster_representatives.to_dict(),
256
+ "cluster_assignments": self.cluster_assignments.tolist(),
257
+ "cluster_weights": self.cluster_weights,
258
+ "n_clusters": self.n_clusters,
259
+ "n_timesteps_per_period": self.n_timesteps_per_period,
260
+ "n_segments": self.n_segments,
261
+ "segment_durations": self.segment_durations,
262
+ "clustering": self.clustering.to_dict(),
263
+ "accuracy": {
264
+ "rmse": self.accuracy.rmse.to_dict(),
265
+ "mae": self.accuracy.mae.to_dict(),
266
+ "rmse_duration": self.accuracy.rmse_duration.to_dict(),
267
+ "rescale_deviations": self.accuracy.rescale_deviations.to_dict(),
268
+ },
269
+ "clustering_duration": self.clustering_duration,
270
+ }
271
+
272
+ @property
273
+ def timestep_index(self) -> list[int]:
274
+ """Get the timestep or segment indices.
275
+
276
+ Returns
277
+ -------
278
+ list[int]
279
+ List of indices [0, 1, ..., n-1] where n is n_segments
280
+ if segmentation was used, otherwise n_timesteps_per_period.
281
+ """
282
+ n = self.n_segments if self.n_segments else self.n_timesteps_per_period
283
+ return list(range(n))
284
+
285
+ @property
286
+ def period_index(self) -> list[int]:
287
+ """Get the period (cluster) indices.
288
+
289
+ Returns the actual cluster IDs from the cluster_representatives
290
+ DataFrame, which is the authoritative source.
291
+
292
+ Returns
293
+ -------
294
+ list[int]
295
+ Sorted list of cluster indices present in cluster_representatives.
296
+ """
297
+ return sorted(self.cluster_representatives.index.get_level_values(0).unique())
298
+
299
+ @property
300
+ def assignments(self) -> pd.DataFrame:
301
+ """Get timestep-level assignment information.
302
+
303
+ Returns a DataFrame with one row per original timestep containing
304
+ assignment information for transferring results to another aggregation.
305
+
306
+ Columns
307
+ -------
308
+ period_idx : int
309
+ Index of the original period (0-indexed, 0 to n_original_periods-1).
310
+ timestep_idx : int
311
+ Timestep index within the period (0 to n_timesteps_per_period-1).
312
+ cluster_idx : int
313
+ Which cluster this period is assigned to (0 to n_clusters-1).
314
+ segment_idx : int (only if segmentation was used)
315
+ Which segment this timestep belongs to within its period.
316
+
317
+ Returns
318
+ -------
319
+ pd.DataFrame
320
+ DataFrame indexed by original time index with assignment columns.
321
+
322
+ Examples
323
+ --------
324
+ >>> result = tsam.aggregate(df, n_clusters=8)
325
+ >>> result.assignments.head()
326
+ period_idx timestep_idx cluster_idx
327
+ 2010-01-01 00:00:00 0 0 3
328
+ 2010-01-01 01:00:00 0 1 3
329
+ ...
330
+
331
+ >>> # Save and reload assignments
332
+ >>> result.assignments.to_csv("assignments.csv")
333
+ """
334
+ agg = self._aggregation
335
+
336
+ # Build period_idx and timestep_idx for each original timestep
337
+ period_indices = []
338
+ timestep_indices = []
339
+ cluster_indices = []
340
+
341
+ for orig_period_idx, cluster_idx in enumerate(self.cluster_assignments):
342
+ for timestep in range(self.n_timesteps_per_period):
343
+ period_indices.append(orig_period_idx)
344
+ timestep_indices.append(timestep)
345
+ cluster_indices.append(cluster_idx)
346
+
347
+ result_df = pd.DataFrame(
348
+ {
349
+ "period_idx": period_indices,
350
+ "timestep_idx": timestep_indices,
351
+ "cluster_idx": cluster_indices,
352
+ },
353
+ index=agg.timeIndex,
354
+ )
355
+
356
+ # Add segment_idx if segmentation was used
357
+ if self.n_segments is not None and hasattr(
358
+ agg, "segmentedNormalizedTypicalPeriods"
359
+ ):
360
+ segment_indices = []
361
+ for cluster_idx in self.cluster_assignments:
362
+ # Get segment structure for this cluster's typical period
363
+ segment_data = agg.segmentedNormalizedTypicalPeriods.loc[cluster_idx]
364
+ # Segment Step is level 0, Segment Duration is level 1
365
+ segment_steps = segment_data.index.get_level_values(0)
366
+ segment_durations = segment_data.index.get_level_values(1)
367
+ # Repeat each segment index by its duration
368
+ segment_indices.extend(
369
+ np.repeat(segment_steps, segment_durations).tolist()
370
+ )
371
+ result_df["segment_idx"] = segment_indices
372
+
373
+ return result_df
374
+
375
+ @property
376
+ def plot(self) -> ResultPlotAccessor:
377
+ """Access plotting methods.
378
+
379
+ Returns a plotting accessor with methods for visualizing the results.
380
+
381
+ Returns
382
+ -------
383
+ ResultPlotAccessor
384
+ Accessor with plotting methods.
385
+
386
+ Examples
387
+ --------
388
+ >>> result = tsam.aggregate(df, n_clusters=8)
389
+ >>> result.plot.compare() # Compare original vs reconstructed
390
+ >>> result.plot.residuals() # View reconstruction errors
391
+ >>> result.plot.cluster_representatives()
392
+ >>> result.plot.cluster_weights()
393
+ >>> result.plot.accuracy()
394
+ """
395
+ from tsam.plot import ResultPlotAccessor
396
+
397
+ return ResultPlotAccessor(self)