tsam 2.2.2__py3-none-any.whl → 2.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1309 +1,1343 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- import copy
4
- import time
5
- import warnings
6
-
7
- import pandas as pd
8
- import numpy as np
9
-
10
- from sklearn.metrics import mean_squared_error, mean_absolute_error
11
- from sklearn.metrics.pairwise import euclidean_distances
12
- from sklearn import preprocessing
13
-
14
- from tsam.periodAggregation import aggregatePeriods
15
- from tsam.representations import representations
16
-
17
- pd.set_option("mode.chained_assignment", None)
18
-
19
- # max iterator while resacling cluster profiles
20
- MAX_ITERATOR = 20
21
-
22
- # tolerance while rescaling cluster periods to meet the annual sum of the original profile
23
- TOLERANCE = 1e-6
24
-
25
-
26
- # minimal weight that overwrites a weighting of zero in order to carry the profile through the aggregation process
27
- MIN_WEIGHT = 1e-6
28
-
29
-
30
- def unstackToPeriods(timeSeries, timeStepsPerPeriod):
31
- """
32
- Extend the timeseries to an integer multiple of the period length and
33
- groups the time series to the periods.
34
-
35
- :param timeSeries:
36
- :type timeSeries: pandas DataFrame
37
-
38
- :param timeStepsPerPeriod: The number of discrete timesteps which describe one period. required
39
- :type timeStepsPerPeriod: integer
40
-
41
- :returns: - **unstackedTimeSeries** (pandas DataFrame) -- is stacked such that each row represents a
42
- candidate period
43
- - **timeIndex** (pandas Series index) -- is the modification of the original
44
- timeseriesindex in case an integer multiple was created
45
- """
46
- # init new grouped timeindex
47
- unstackedTimeSeries = timeSeries.copy()
48
-
49
- # initialize new indices
50
- periodIndex = []
51
- stepIndex = []
52
-
53
- # extend to inger multiple of period length
54
- if len(timeSeries) % timeStepsPerPeriod == 0:
55
- attached_timesteps = 0
56
- else:
57
- # calculate number of timesteps which get attached
58
- attached_timesteps = timeStepsPerPeriod - len(timeSeries) % timeStepsPerPeriod
59
-
60
- # take these from the head of the original time series
61
- rep_data = unstackedTimeSeries.head(attached_timesteps)
62
-
63
- # append them at the end of the time series
64
- unstackedTimeSeries = unstackedTimeSeries.append(rep_data, ignore_index=False)
65
-
66
- # create period and step index
67
- for ii in range(0, len(unstackedTimeSeries)):
68
- periodIndex.append(int(ii / timeStepsPerPeriod))
69
- stepIndex.append(ii - int(ii / timeStepsPerPeriod) * timeStepsPerPeriod)
70
-
71
- # save old index
72
- timeIndex = copy.deepcopy(unstackedTimeSeries.index)
73
-
74
- # create new double index and unstack the time series
75
- unstackedTimeSeries.index = pd.MultiIndex.from_arrays(
76
- [stepIndex, periodIndex], names=["TimeStep", "PeriodNum"]
77
- )
78
- unstackedTimeSeries = unstackedTimeSeries.unstack(level="TimeStep")
79
-
80
- return unstackedTimeSeries, timeIndex
81
-
82
-
83
-
84
- class TimeSeriesAggregation(object):
85
- """
86
- Clusters time series data to typical periods.
87
- """
88
-
89
- CLUSTER_METHODS = [
90
- "averaging",
91
- "k_means",
92
- "k_medoids",
93
- "k_maxoids",
94
- "hierarchical",
95
- "adjacent_periods",
96
- ]
97
-
98
- REPRESENTATION_METHODS = [
99
- "meanRepresentation",
100
- "medoidRepresentation",
101
- "maxoidRepresentation",
102
- "minmaxmeanRepresentation",
103
- "durationRepresentation",
104
- "distributionRepresentation",
105
- "distributionAndMinMaxRepresentation",
106
- ]
107
-
108
- EXTREME_PERIOD_METHODS = [
109
- "None",
110
- "append",
111
- "new_cluster_center",
112
- "replace_cluster_center",
113
- ]
114
-
115
- def __init__(
116
- self,
117
- timeSeries,
118
- resolution=None,
119
- noTypicalPeriods=10,
120
- noSegments=10,
121
- hoursPerPeriod=24,
122
- clusterMethod="hierarchical",
123
- evalSumPeriods=False,
124
- sortValues=False,
125
- sameMean=False,
126
- rescaleClusterPeriods=True,
127
- weightDict=None,
128
- segmentation=False,
129
- extremePeriodMethod="None",
130
- representationMethod=None,
131
- representationDict=None,
132
- distributionPeriodWise=True,
133
- predefClusterOrder=None,
134
- predefClusterCenterIndices=None,
135
- solver="highs",
136
- roundOutput=None,
137
- addPeakMin=None,
138
- addPeakMax=None,
139
- addMeanMin=None,
140
- addMeanMax=None,
141
- ):
142
- """
143
- Initialize the periodly clusters.
144
-
145
- :param timeSeries: DataFrame with the datetime as index and the relevant
146
- time series parameters as columns. required
147
- :type timeSeries: pandas.DataFrame() or dict
148
-
149
- :param resolution: Resolution of the time series in hours [h]. If timeSeries is a
150
- pandas.DataFrame() the resolution is derived from the datetime
151
- index. optional, default: delta_T in timeSeries
152
- :type resolution: float
153
-
154
- :param hoursPerPeriod: Value which defines the length of a cluster period. optional, default: 24
155
- :type hoursPerPeriod: integer
156
-
157
- :param noTypicalPeriods: Number of typical Periods - equivalent to the number of clusters. optional, default: 10
158
- :type noTypicalPeriods: integer
159
-
160
- :param noSegments: Number of segments in which the typical periods shoul be subdivided - equivalent to the
161
- number of inner-period clusters. optional, default: 10
162
- :type noSegments: integer
163
-
164
- :param clusterMethod: Chosen clustering method. optional, default: 'hierarchical'
165
- |br| Options are:
166
-
167
- * 'averaging'
168
- * 'k_means'
169
- * 'k_medoids'
170
- * 'k_maxoids'
171
- * 'hierarchical'
172
- * 'adjacent_periods'
173
- :type clusterMethod: string
174
-
175
- :param evalSumPeriods: Boolean if in the clustering process also the averaged periodly values
176
- shall be integrated additional to the periodly profiles as parameters. optional, default: False
177
- :type evalSumPeriods: boolean
178
-
179
- :param sameMean: Boolean which is used in the normalization procedure. If true, all time series get normalized
180
- such that they have the same mean value. optional, default: False
181
- :type sameMean: boolean
182
-
183
- :param sortValues: Boolean if the clustering should be done by the periodly duration
184
- curves (true) or the original shape of the data. optional (default: False)
185
- :type sortValues: boolean
186
-
187
- :param rescaleClusterPeriods: Decides if the cluster Periods shall get rescaled such that their
188
- weighted mean value fits the mean value of the original time series. optional (default: True)
189
- :type rescaleClusterPeriods: boolean
190
-
191
- :param weightDict: Dictionary which weights the profiles. It is done by scaling
192
- the time series while the normalization process. Normally all time
193
- series have a scale from 0 to 1. By scaling them, the values get
194
- different distances to each other and with this, they are
195
- differently evaluated while the clustering process. optional (default: None )
196
- :type weightDict: dict
197
-
198
- :param extremePeriodMethod: Method how to integrate extreme Periods (peak demand, lowest temperature etc.)
199
- into to the typical period profiles. optional, default: 'None'
200
- |br| Options are:
201
-
202
- * None: No integration at all.
203
- * 'append': append typical Periods to cluster centers
204
- * 'new_cluster_center': add the extreme period as additional cluster center. It is checked then for all
205
- Periods if they fit better to the this new center or their original cluster center.
206
- * 'replace_cluster_center': replaces the cluster center of the
207
- cluster where the extreme period belongs to with the periodly profile of the extreme period. (Worst
208
- case system design)
209
- :type extremePeriodMethod: string
210
-
211
- :param representationMethod: Chosen representation. If specified, the clusters are represented in the chosen
212
- way. Otherwise, each clusterMethod has its own commonly used default representation method.
213
- |br| Options are:
214
-
215
- * 'meanRepresentation' (default of 'averaging' and 'k_means')
216
- * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
217
- * 'minmaxmeanRepresentation'
218
- * 'durationRepresentation'/ 'distributionRepresentation'
219
- * 'distribtionAndMinMaxRepresentation'
220
- :type representationMethod: string
221
-
222
- :param representationDict: Dictionary which states for each attribute whether the profiles in each cluster
223
- should be represented by the minimum value or maximum value of each time step. This enables estimations
224
- to the safe side. This dictionary is needed when 'minmaxmeanRepresentation' is chosen. If not specified, the
225
- dictionary is set to containing 'mean' values only.
226
- :type representationDict: dict
227
-
228
- :param distributionPeriodWise: If durationRepresentation is chosen, you can choose whether the distribution of
229
- each cluster should be separately preserved or that of the original time series only (default: True)
230
- :type distributionPeriodWise:
231
-
232
- :param predefClusterOrder: Instead of aggregating a time series, a predefined grouping is taken
233
- which is given by this list. optional (default: None)
234
- :type predefClusterOrder: list or array
235
-
236
- :param predefClusterCenterIndices: If predefClusterOrder is give, this list can define the representative
237
- cluster candidates. Otherwise the medoid is taken. optional (default: None)
238
- :type predefClusterCenterIndices: list or array
239
-
240
- :param solver: Solver that is used for k_medoids clustering. optional (default: 'cbc' )
241
- :type solver: string
242
-
243
- :param roundOutput: Decimals to what the output time series get round. optional (default: None )
244
- :type roundOutput: integer
245
-
246
- :param addPeakMin: List of column names which's minimal value shall be added to the
247
- typical periods. E.g.: ['Temperature']. optional, default: []
248
- :type addPeakMin: list
249
-
250
- :param addPeakMax: List of column names which's maximal value shall be added to the
251
- typical periods. E.g. ['EDemand', 'HDemand']. optional, default: []
252
- :type addPeakMax: list
253
-
254
- :param addMeanMin: List of column names where the period with the cumulative minimal value
255
- shall be added to the typical periods. E.g. ['Photovoltaic']. optional, default: []
256
- :type addMeanMin: list
257
-
258
- :param addMeanMax: List of column names where the period with the cumulative maximal value
259
- shall be added to the typical periods. optional, default: []
260
- :type addMeanMax: list
261
- """
262
- if addMeanMin is None:
263
- addMeanMin = []
264
- if addMeanMax is None:
265
- addMeanMax = []
266
- if addPeakMax is None:
267
- addPeakMax = []
268
- if addPeakMin is None:
269
- addPeakMin = []
270
- if weightDict is None:
271
- weightDict = {}
272
- self.timeSeries = timeSeries
273
-
274
- self.resolution = resolution
275
-
276
- self.hoursPerPeriod = hoursPerPeriod
277
-
278
- self.noTypicalPeriods = noTypicalPeriods
279
-
280
- self.noSegments = noSegments
281
-
282
- self.clusterMethod = clusterMethod
283
-
284
- self.extremePeriodMethod = extremePeriodMethod
285
-
286
- self.evalSumPeriods = evalSumPeriods
287
-
288
- self.sortValues = sortValues
289
-
290
- self.sameMean = sameMean
291
-
292
- self.rescaleClusterPeriods = rescaleClusterPeriods
293
-
294
- self.weightDict = weightDict
295
-
296
- self.representationMethod = representationMethod
297
-
298
- self.representationDict = representationDict
299
-
300
- self.distributionPeriodWise = distributionPeriodWise
301
-
302
- self.predefClusterOrder = predefClusterOrder
303
-
304
- self.predefClusterCenterIndices = predefClusterCenterIndices
305
-
306
- self.solver = solver
307
-
308
- self.segmentation = segmentation
309
-
310
- self.roundOutput = roundOutput
311
-
312
- self.addPeakMin = addPeakMin
313
-
314
- self.addPeakMax = addPeakMax
315
-
316
- self.addMeanMin = addMeanMin
317
-
318
- self.addMeanMax = addMeanMax
319
-
320
- self._check_init_args()
321
-
322
- # internal attributes
323
- self._normalizedMean = None
324
-
325
- return
326
-
327
- def _check_init_args(self):
328
-
329
- # check timeSeries and set it as pandas DataFrame
330
- if not isinstance(self.timeSeries, pd.DataFrame):
331
- if isinstance(self.timeSeries, dict):
332
- self.timeSeries = pd.DataFrame(self.timeSeries)
333
- elif isinstance(self.timeSeries, np.ndarray):
334
- self.timeSeries = pd.DataFrame(self.timeSeries)
335
- else:
336
- raise ValueError(
337
- "timeSeries has to be of type pandas.DataFrame() "
338
- + "or of type np.array() "
339
- "in initialization of object of class " + type(self).__name__
340
- )
341
-
342
- # check if extreme periods exist in the dataframe
343
- for peak in self.addPeakMin:
344
- if peak not in self.timeSeries.columns:
345
- raise ValueError(
346
- peak
347
- + ' listed in "addPeakMin"'
348
- + " does not occur as timeSeries column"
349
- )
350
- for peak in self.addPeakMax:
351
- if peak not in self.timeSeries.columns:
352
- raise ValueError(
353
- peak
354
- + ' listed in "addPeakMax"'
355
- + " does not occur as timeSeries column"
356
- )
357
- for peak in self.addMeanMin:
358
- if peak not in self.timeSeries.columns:
359
- raise ValueError(
360
- peak
361
- + ' listed in "addMeanMin"'
362
- + " does not occur as timeSeries column"
363
- )
364
- for peak in self.addMeanMax:
365
- if peak not in self.timeSeries.columns:
366
- raise ValueError(
367
- peak
368
- + ' listed in "addMeanMax"'
369
- + " does not occur as timeSeries column"
370
- )
371
-
372
- # derive resolution from date time index if not provided
373
- if self.resolution is None:
374
- try:
375
- timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
376
- self.resolution = float(timedelta.total_seconds()) / 3600
377
- except AttributeError:
378
- raise ValueError(
379
- "'resolution' argument has to be nonnegative float or int"
380
- + " or the given timeseries needs a datetime index"
381
- )
382
- except TypeError:
383
- try:
384
- self.timeSeries.index = pd.to_datetime(self.timeSeries.index)
385
- timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
386
- self.resolution = float(timedelta.total_seconds()) / 3600
387
- except:
388
- raise ValueError(
389
- "'resolution' argument has to be nonnegative float or int"
390
- + " or the given timeseries needs a datetime index"
391
- )
392
-
393
- if not (isinstance(self.resolution, int) or isinstance(self.resolution, float)):
394
- raise ValueError("resolution has to be nonnegative float or int")
395
-
396
- # check hoursPerPeriod
397
- if self.hoursPerPeriod is None or self.hoursPerPeriod <= 0:
398
- raise ValueError("hoursPerPeriod has to be nonnegative float or int")
399
-
400
- # check typical Periods
401
- if (
402
- self.noTypicalPeriods is None
403
- or self.noTypicalPeriods <= 0
404
- or not isinstance(self.noTypicalPeriods, int)
405
- ):
406
- raise ValueError("noTypicalPeriods has to be nonnegative integer")
407
- self.timeStepsPerPeriod = int(self.hoursPerPeriod / self.resolution)
408
- if not self.timeStepsPerPeriod == self.hoursPerPeriod / self.resolution:
409
- raise ValueError(
410
- "The combination of hoursPerPeriod and the "
411
- + "resulution does not result in an integer "
412
- + "number of time steps per period"
413
- )
414
- if self.segmentation:
415
- if self.noSegments > self.timeStepsPerPeriod:
416
- warnings.warn(
417
- "The number of segments must be less than or equal to the number of time steps per period. "
418
- "Segment number is decreased to number of time steps per period."
419
- )
420
- self.noSegments = self.timeStepsPerPeriod
421
-
422
- # check clusterMethod
423
- if self.clusterMethod not in self.CLUSTER_METHODS:
424
- raise ValueError(
425
- "clusterMethod needs to be one of "
426
- + "the following: "
427
- + "{}".format(self.CLUSTER_METHODS)
428
- )
429
-
430
- # check representationMethod
431
- if (
432
- self.representationMethod is not None
433
- and self.representationMethod not in self.REPRESENTATION_METHODS
434
- ):
435
- raise ValueError(
436
- "If specified, representationMethod needs to be one of "
437
- + "the following: "
438
- + "{}".format(self.REPRESENTATION_METHODS)
439
- )
440
-
441
- # if representationDict None, represent by maximum time steps in each cluster
442
- if self.representationDict is None:
443
- self.representationDict = {i: "mean" for i in list(self.timeSeries.columns)}
444
- # sort representationDict alphabetically to make sure that the min, max or mean function is applied to the right
445
- # column
446
- self.representationDict = (
447
- pd.Series(self.representationDict).sort_index(axis=0).to_dict()
448
- )
449
-
450
- # check extremePeriods
451
- if self.extremePeriodMethod not in self.EXTREME_PERIOD_METHODS:
452
- raise ValueError(
453
- "extremePeriodMethod needs to be one of "
454
- + "the following: "
455
- + "{}".format(self.EXTREME_PERIOD_METHODS)
456
- )
457
-
458
- # check evalSumPeriods
459
- if not isinstance(self.evalSumPeriods, bool):
460
- raise ValueError("evalSumPeriods has to be boolean")
461
- # check sortValues
462
- if not isinstance(self.sortValues, bool):
463
- raise ValueError("sortValues has to be boolean")
464
- # check sameMean
465
- if not isinstance(self.sameMean, bool):
466
- raise ValueError("sameMean has to be boolean")
467
- # check rescaleClusterPeriods
468
- if not isinstance(self.rescaleClusterPeriods, bool):
469
- raise ValueError("rescaleClusterPeriods has to be boolean")
470
-
471
- # check predefClusterOrder
472
- if self.predefClusterOrder is not None:
473
- if not isinstance(self.predefClusterOrder, (list, np.ndarray)):
474
- raise ValueError("predefClusterOrder has to be an array or list")
475
- if self.predefClusterCenterIndices is not None:
476
- # check predefClusterCenterIndices
477
- if not isinstance(self.predefClusterCenterIndices, (list, np.ndarray)):
478
- raise ValueError(
479
- "predefClusterCenterIndices has to be an array or list"
480
- )
481
- elif self.predefClusterCenterIndices is not None:
482
- raise ValueError(
483
- 'If "predefClusterCenterIndices" is defined, "predefClusterOrder" needs to be defined as well'
484
- )
485
-
486
- return
487
-
488
- def _normalizeTimeSeries(self, sameMean=False):
489
- """
490
- Normalizes each time series independently.
491
-
492
- :param sameMean: Decides if the time series should have all the same mean value.
493
- Relevant for weighting time series. optional (default: False)
494
- :type sameMean: boolean
495
-
496
- :returns: normalized time series
497
- """
498
- min_max_scaler = preprocessing.MinMaxScaler()
499
- normalizedTimeSeries = pd.DataFrame(
500
- min_max_scaler.fit_transform(self.timeSeries),
501
- columns=self.timeSeries.columns,
502
- index=self.timeSeries.index,
503
- )
504
-
505
- self._normalizedMean = normalizedTimeSeries.mean()
506
- if sameMean:
507
- normalizedTimeSeries /= self._normalizedMean
508
-
509
- return normalizedTimeSeries
510
-
511
- def _unnormalizeTimeSeries(self, normalizedTimeSeries, sameMean=False):
512
- """
513
- Equivalent to '_normalizeTimeSeries'. Just does the back
514
- transformation.
515
-
516
- :param normalizedTimeSeries: Time series which should get back transformated. required
517
- :type normalizedTimeSeries: pandas.DataFrame()
518
-
519
- :param sameMean: Has to have the same value as in _normalizeTimeSeries. optional (default: False)
520
- :type sameMean: boolean
521
-
522
- :returns: unnormalized time series
523
- """
524
- from sklearn import preprocessing
525
-
526
- min_max_scaler = preprocessing.MinMaxScaler()
527
- min_max_scaler.fit(self.timeSeries)
528
-
529
- if sameMean:
530
- normalizedTimeSeries *= self._normalizedMean
531
-
532
- unnormalizedTimeSeries = pd.DataFrame(
533
- min_max_scaler.inverse_transform(normalizedTimeSeries),
534
- columns=normalizedTimeSeries.columns,
535
- index=normalizedTimeSeries.index,
536
- )
537
-
538
- return unnormalizedTimeSeries
539
-
540
- def _preProcessTimeSeries(self):
541
- """
542
- Normalize the time series, weight them based on the weight dict and
543
- puts them into the correct matrix format.
544
- """
545
- # first sort the time series in order to avoid bug mention in #18
546
- self.timeSeries.sort_index(axis=1, inplace=True)
547
-
548
- # convert the dataframe to floats
549
- self.timeSeries = self.timeSeries.astype(float)
550
-
551
- # normalize the time series and group them to periodly profiles
552
- self.normalizedTimeSeries = self._normalizeTimeSeries(sameMean=self.sameMean)
553
-
554
- for column in self.weightDict:
555
- if self.weightDict[column] < MIN_WEIGHT:
556
- print(
557
- 'weight of "'
558
- + str(column)
559
- + '" set to the minmal tolerable weighting'
560
- )
561
- self.weightDict[column] = MIN_WEIGHT
562
- self.normalizedTimeSeries[column] = (
563
- self.normalizedTimeSeries[column] * self.weightDict[column]
564
- )
565
-
566
- self.normalizedPeriodlyProfiles, self.timeIndex = unstackToPeriods(
567
- self.normalizedTimeSeries, self.timeStepsPerPeriod
568
- )
569
-
570
- # check if no NaN is in the resulting profiles
571
- if self.normalizedPeriodlyProfiles.isnull().values.any():
572
- raise ValueError(
573
- "Pre processed data includes NaN. Please check the timeSeries input data."
574
- )
575
-
576
- def _postProcessTimeSeries(self, normalizedTimeSeries, applyWeighting=True):
577
- """
578
- Neutralizes the weighting the time series back and unnormalizes them.
579
- """
580
- if applyWeighting:
581
- for column in self.weightDict:
582
- normalizedTimeSeries[column] = (
583
- normalizedTimeSeries[column] / self.weightDict[column]
584
- )
585
-
586
- unnormalizedTimeSeries = self._unnormalizeTimeSeries(
587
- normalizedTimeSeries, sameMean=self.sameMean
588
- )
589
-
590
- if self.roundOutput is not None:
591
- unnormalizedTimeSeries = unnormalizedTimeSeries.round(
592
- decimals=self.roundOutput
593
- )
594
-
595
- return unnormalizedTimeSeries
596
-
597
- def _addExtremePeriods(
598
- self,
599
- groupedSeries,
600
- clusterCenters,
601
- clusterOrder,
602
- extremePeriodMethod="new_cluster_center",
603
- addPeakMin=None,
604
- addPeakMax=None,
605
- addMeanMin=None,
606
- addMeanMax=None,
607
- ):
608
- """
609
- Adds different extreme periods based on the to the clustered data,
610
- decribed by the clusterCenters and clusterOrder.
611
-
612
- :param groupedSeries: periodly grouped groupedSeries on which basis it should be decided,
613
- which period is an extreme period. required
614
- :type groupedSeries: pandas.DataFrame()
615
-
616
- :param clusterCenters: Output from clustering with sklearn. required
617
- :type clusterCenters: dict
618
-
619
- :param clusterOrder: Output from clsutering with sklearn. required
620
- :type clusterOrder: dict
621
-
622
- :param extremePeriodMethod: Chosen extremePeriodMethod. The method. optional(default: 'new_cluster_center' )
623
- :type extremePeriodMethod: string
624
-
625
- :returns: - **newClusterCenters** -- The new cluster centers extended with the extreme periods.
626
- - **newClusterOrder** -- The new cluster order including the extreme periods.
627
- - **extremeClusterIdx** -- A list of indices where in the newClusterCenters are the extreme
628
- periods located.
629
- """
630
-
631
- # init required dicts and lists
632
- self.extremePeriods = {}
633
- extremePeriodNo = []
634
-
635
- ccList = [center.tolist() for center in clusterCenters]
636
-
637
- # check which extreme periods exist in the profile and add them to
638
- # self.extremePeriods dict
639
- for column in self.timeSeries.columns:
640
-
641
- if column in addPeakMax:
642
- stepNo = groupedSeries[column].max(axis=1).idxmax()
643
- # add only if stepNo is not already in extremePeriods
644
- # if it is not already a cluster center
645
- if (
646
- stepNo not in extremePeriodNo
647
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
648
- ):
649
- max_col = self._append_col_with(column, " max.")
650
- self.extremePeriods[max_col] = {
651
- "stepNo": stepNo,
652
- "profile": groupedSeries.loc[stepNo, :].values,
653
- "column": column,
654
- }
655
- extremePeriodNo.append(stepNo)
656
-
657
- if column in addPeakMin:
658
- stepNo = groupedSeries[column].min(axis=1).idxmin()
659
- # add only if stepNo is not already in extremePeriods
660
- # if it is not already a cluster center
661
- if (
662
- stepNo not in extremePeriodNo
663
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
664
- ):
665
- min_col = self._append_col_with(column, " min.")
666
- self.extremePeriods[min_col] = {
667
- "stepNo": stepNo,
668
- "profile": groupedSeries.loc[stepNo, :].values,
669
- "column": column,
670
- }
671
- extremePeriodNo.append(stepNo)
672
-
673
- if column in addMeanMax:
674
- stepNo = groupedSeries[column].mean(axis=1).idxmax()
675
- # add only if stepNo is not already in extremePeriods
676
- # if it is not already a cluster center
677
- if (
678
- stepNo not in extremePeriodNo
679
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
680
- ):
681
- mean_max_col = self._append_col_with(column, " daily max.")
682
- self.extremePeriods[mean_max_col] = {
683
- "stepNo": stepNo,
684
- "profile": groupedSeries.loc[stepNo, :].values,
685
- "column": column,
686
- }
687
- extremePeriodNo.append(stepNo)
688
-
689
- if column in addMeanMin:
690
- stepNo = groupedSeries[column].mean(axis=1).idxmin()
691
- # add only if stepNo is not already in extremePeriods and
692
- # if it is not already a cluster center
693
- if (
694
- stepNo not in extremePeriodNo
695
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
696
- ):
697
- mean_min_col = self._append_col_with(column, " daily min.")
698
- self.extremePeriods[mean_min_col] = {
699
- "stepNo": stepNo,
700
- "profile": groupedSeries.loc[stepNo, :].values,
701
- "column": column,
702
- }
703
- extremePeriodNo.append(stepNo)
704
-
705
- for periodType in self.extremePeriods:
706
- # get current related clusters of extreme periods
707
- self.extremePeriods[periodType]["clusterNo"] = clusterOrder[
708
- self.extremePeriods[periodType]["stepNo"]
709
- ]
710
-
711
- # init new cluster structure
712
- newClusterCenters = []
713
- newClusterOrder = clusterOrder
714
- extremeClusterIdx = []
715
-
716
- # integrate extreme periods to clusters
717
- if extremePeriodMethod == "append":
718
- # attach extreme periods to cluster centers
719
- for i, cluster_center in enumerate(clusterCenters):
720
- newClusterCenters.append(cluster_center)
721
- for i, periodType in enumerate(self.extremePeriods):
722
- extremeClusterIdx.append(len(newClusterCenters))
723
- newClusterCenters.append(self.extremePeriods[periodType]["profile"])
724
- newClusterOrder[self.extremePeriods[periodType]["stepNo"]] = i + len(
725
- clusterCenters
726
- )
727
-
728
- elif extremePeriodMethod == "new_cluster_center":
729
- for i, cluster_center in enumerate(clusterCenters):
730
- newClusterCenters.append(cluster_center)
731
- # attach extrem periods to cluster centers and consider for all periods
732
- # if the fit better to the cluster or the extrem period
733
- for i, periodType in enumerate(self.extremePeriods):
734
- extremeClusterIdx.append(len(newClusterCenters))
735
- newClusterCenters.append(self.extremePeriods[periodType]["profile"])
736
- self.extremePeriods[periodType]["newClusterNo"] = i + len(
737
- clusterCenters
738
- )
739
-
740
- for i, cPeriod in enumerate(newClusterOrder):
741
- # caclulate euclidean distance to cluster center
742
- cluster_dist = sum(
743
- (groupedSeries.iloc[i].values - clusterCenters[cPeriod]) ** 2
744
- )
745
- for ii, extremPeriodType in enumerate(self.extremePeriods):
746
- # exclude other extreme periods from adding to the new
747
- # cluster center
748
- isOtherExtreme = False
749
- for otherExPeriod in self.extremePeriods:
750
- if (
751
- i == self.extremePeriods[otherExPeriod]["stepNo"]
752
- and otherExPeriod != extremPeriodType
753
- ):
754
- isOtherExtreme = True
755
- # calculate distance to extreme periods
756
- extperiod_dist = sum(
757
- (
758
- groupedSeries.iloc[i].values
759
- - self.extremePeriods[extremPeriodType]["profile"]
760
- )
761
- ** 2
762
- )
763
- # choose new cluster relation
764
- if extperiod_dist < cluster_dist and not isOtherExtreme:
765
- newClusterOrder[i] = self.extremePeriods[extremPeriodType][
766
- "newClusterNo"
767
- ]
768
-
769
- elif extremePeriodMethod == "replace_cluster_center":
770
- # Worst Case Clusterperiods
771
- newClusterCenters = clusterCenters
772
- for periodType in self.extremePeriods:
773
- index = groupedSeries.columns.get_loc(
774
- self.extremePeriods[periodType]["column"]
775
- )
776
- newClusterCenters[self.extremePeriods[periodType]["clusterNo"]][
777
- index
778
- ] = self.extremePeriods[periodType]["profile"][index]
779
- if (
780
- not self.extremePeriods[periodType]["clusterNo"]
781
- in extremeClusterIdx
782
- ):
783
- extremeClusterIdx.append(
784
- self.extremePeriods[periodType]["clusterNo"]
785
- )
786
-
787
- return newClusterCenters, newClusterOrder, extremeClusterIdx
788
-
789
- def _append_col_with(self, column, append_with=" max."):
790
- """Appends a string to the column name. For MultiIndexes, which turn out to be
791
- tuples when this method is called, only last level is changed"""
792
- if isinstance(column, str):
793
- return column + append_with
794
- elif isinstance(column, tuple):
795
- col = list(column)
796
- col[-1] = col[-1] + append_with
797
- return tuple(col)
798
-
799
- def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx):
800
- """
801
- Rescale the values of the clustered Periods such that mean of each time
802
- series in the typical Periods fits the mean value of the original time
803
- series, without changing the values of the extremePeriods.
804
- """
805
- weightingVec = pd.Series(self._clusterPeriodNoOccur).values
806
- typicalPeriods = pd.DataFrame(
807
- clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns
808
- )
809
- idx_wo_peak = np.delete(typicalPeriods.index, extremeClusterIdx)
810
- for column in self.timeSeries.columns:
811
- diff = 1
812
- sum_raw = self.normalizedPeriodlyProfiles[column].sum().sum()
813
- sum_peak = sum(
814
- weightingVec[extremeClusterIdx]
815
- * typicalPeriods[column].loc[extremeClusterIdx, :].sum(axis=1)
816
- )
817
- sum_clu_wo_peak = sum(
818
- weightingVec[idx_wo_peak]
819
- * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
820
- )
821
-
822
- # define the upper scale dependent on the weighting of the series
823
- scale_ub = 1.0
824
- if self.sameMean:
825
- scale_ub = (
826
- scale_ub
827
- * self.timeSeries[column].max()
828
- / self.timeSeries[column].mean()
829
- )
830
- if column in self.weightDict:
831
- scale_ub = scale_ub * self.weightDict[column]
832
-
833
- # difference between predicted and original sum
834
- diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
835
-
836
- # use while loop to rescale cluster periods
837
- a = 0
838
- while diff > sum_raw * TOLERANCE and a < MAX_ITERATOR:
839
- # rescale values
840
- typicalPeriods.loc[idx_wo_peak, column] = (
841
- typicalPeriods[column].loc[idx_wo_peak, :].values
842
- * (sum_raw - sum_peak)
843
- / sum_clu_wo_peak
844
- )
845
-
846
- # reset values higher than the upper sacle or less than zero
847
- typicalPeriods[column][typicalPeriods[column] > scale_ub] = scale_ub
848
- typicalPeriods[column][typicalPeriods[column] < 0.0] = 0.0
849
-
850
- typicalPeriods[column] = typicalPeriods[column].fillna(0.0)
851
-
852
- # calc new sum and new diff to orig data
853
- sum_clu_wo_peak = sum(
854
- weightingVec[idx_wo_peak]
855
- * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
856
- )
857
- diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
858
- a += 1
859
- if a == MAX_ITERATOR:
860
- deviation = str(round((diff / sum_raw) * 100, 2))
861
- warnings.warn(
862
- 'Max iteration number reached for "'
863
- + str(column)
864
- + '" while rescaling the cluster periods.'
865
- + " The integral of the aggregated time series deviates by: "
866
- + deviation
867
- + "%"
868
- )
869
- return typicalPeriods.values
870
-
871
- def _clusterSortedPeriods(self, candidates, n_init=20):
872
- """
873
- Runs the clustering algorithms for the sorted profiles within the period
874
- instead of the original profiles. (Duration curve clustering)
875
- """
876
- # initialize
877
- normalizedSortedPeriodlyProfiles = copy.deepcopy(
878
- self.normalizedPeriodlyProfiles
879
- )
880
- for column in self.timeSeries.columns:
881
- # sort each period individually
882
- df = normalizedSortedPeriodlyProfiles[column]
883
- values = df.values
884
- values.sort(axis=1)
885
- values = values[:, ::-1]
886
- normalizedSortedPeriodlyProfiles[column] = pd.DataFrame(
887
- values, df.index, df.columns
888
- )
889
- sortedClusterValues = normalizedSortedPeriodlyProfiles.values
890
-
891
- (
892
- altClusterCenters,
893
- self.clusterCenterIndices,
894
- clusterOrders_C,
895
- ) = aggregatePeriods(
896
- sortedClusterValues,
897
- n_clusters=self.noTypicalPeriods,
898
- n_iter=30,
899
- solver=self.solver,
900
- clusterMethod=self.clusterMethod,
901
- representationMethod=self.representationMethod,
902
- representationDict=self.representationDict,
903
- distributionPeriodWise=self.distributionPeriodWise,
904
- timeStepsPerPeriod=self.timeStepsPerPeriod,
905
- )
906
-
907
- clusterCenters_C = []
908
-
909
- # take the clusters and determine the most representative sorted
910
- # period as cluster center
911
- for clusterNum in np.unique(clusterOrders_C):
912
- indice = np.where(clusterOrders_C == clusterNum)[0]
913
- if len(indice) > 1:
914
- # mean value for each time step for each time series over
915
- # all Periods in the cluster
916
- currentMean_C = sortedClusterValues[indice].mean(axis=0)
917
- # index of the period with the lowest distance to the cluster
918
- # center
919
- mindistIdx_C = np.argmin(
920
- np.square(sortedClusterValues[indice] - currentMean_C).sum(axis=1)
921
- )
922
- # append original time series of this period
923
- medoid_C = candidates[indice][mindistIdx_C]
924
-
925
- # append to cluster center
926
- clusterCenters_C.append(medoid_C)
927
-
928
- else:
929
- # if only on period is part of the cluster, add this index
930
- clusterCenters_C.append(candidates[indice][0])
931
-
932
- return clusterCenters_C, clusterOrders_C
933
-
934
- def createTypicalPeriods(self):
935
- """
936
- Clusters the Periods.
937
-
938
- :returns: **self.typicalPeriods** -- All typical Periods in scaled form.
939
- """
940
- self._preProcessTimeSeries()
941
-
942
- # check for additional cluster parameters
943
- if self.evalSumPeriods:
944
- evaluationValues = (
945
- self.normalizedPeriodlyProfiles.stack(level=0)
946
- .sum(axis=1)
947
- .unstack(level=1)
948
- )
949
- # how many values have to get deleted later
950
- delClusterParams = -len(evaluationValues.columns)
951
- candidates = np.concatenate(
952
- (self.normalizedPeriodlyProfiles.values, evaluationValues.values),
953
- axis=1,
954
- )
955
- else:
956
- delClusterParams = None
957
- candidates = self.normalizedPeriodlyProfiles.values
958
-
959
- # skip aggregation procedure for the case of a predefined cluster sequence and get only the correct representation
960
- if not self.predefClusterOrder is None:
961
- self._clusterOrder = self.predefClusterOrder
962
- # check if representatives are defined
963
- if not self.predefClusterCenterIndices is None:
964
- self.clusterCenterIndices = self.predefClusterCenterIndices
965
- self.clusterCenters = candidates[self.predefClusterCenterIndices]
966
- else:
967
- # otherwise take the medoids
968
- self.clusterCenters, self.clusterCenterIndices = representations(
969
- candidates,
970
- self._clusterOrder,
971
- default="medoidRepresentation",
972
- representationMethod=self.representationMethod,
973
- representationDict=self.representationDict,
974
- timeStepsPerPeriod=self.timeStepsPerPeriod,
975
- )
976
- else:
977
- cluster_duration = time.time()
978
- if not self.sortValues:
979
- # cluster the data
980
- (
981
- self.clusterCenters,
982
- self.clusterCenterIndices,
983
- self._clusterOrder,
984
- ) = aggregatePeriods(
985
- candidates,
986
- n_clusters=self.noTypicalPeriods,
987
- n_iter=100,
988
- solver=self.solver,
989
- clusterMethod=self.clusterMethod,
990
- representationMethod=self.representationMethod,
991
- representationDict=self.representationDict,
992
- distributionPeriodWise=self.distributionPeriodWise,
993
- timeStepsPerPeriod=self.timeStepsPerPeriod,
994
- )
995
- else:
996
- self.clusterCenters, self._clusterOrder = self._clusterSortedPeriods(
997
- candidates
998
- )
999
- self.clusteringDuration = time.time() - cluster_duration
1000
-
1001
- # get cluster centers without additional evaluation values
1002
- self.clusterPeriods = []
1003
- for i, cluster_center in enumerate(self.clusterCenters):
1004
- self.clusterPeriods.append(cluster_center[:delClusterParams])
1005
-
1006
- if not self.extremePeriodMethod == "None":
1007
- # overwrite clusterPeriods and clusterOrder
1008
- (
1009
- self.clusterPeriods,
1010
- self._clusterOrder,
1011
- self.extremeClusterIdx,
1012
- ) = self._addExtremePeriods(
1013
- self.normalizedPeriodlyProfiles,
1014
- self.clusterPeriods,
1015
- self._clusterOrder,
1016
- extremePeriodMethod=self.extremePeriodMethod,
1017
- addPeakMin=self.addPeakMin,
1018
- addPeakMax=self.addPeakMax,
1019
- addMeanMin=self.addMeanMin,
1020
- addMeanMax=self.addMeanMax,
1021
- )
1022
- else:
1023
- self.extremeClusterIdx = []
1024
-
1025
- # get number of appearance of the the typical periods
1026
- nums, counts = np.unique(self._clusterOrder, return_counts=True)
1027
- self._clusterPeriodNoOccur = {num: counts[ii] for ii, num in enumerate(nums)}
1028
-
1029
- if self.rescaleClusterPeriods:
1030
- self.clusterPeriods = self._rescaleClusterPeriods(
1031
- self._clusterOrder, self.clusterPeriods, self.extremeClusterIdx
1032
- )
1033
-
1034
- # if additional time steps have been added, reduce the number of occurrence of the typical period
1035
- # which is related to these time steps
1036
- if not len(self.timeSeries) % self.timeStepsPerPeriod == 0:
1037
- self._clusterPeriodNoOccur[self._clusterOrder[-1]] -= (
1038
- 1
1039
- - float(len(self.timeSeries) % self.timeStepsPerPeriod)
1040
- / self.timeStepsPerPeriod
1041
- )
1042
-
1043
- # put the clustered data in pandas format and scale back
1044
- self.normalizedTypicalPeriods = pd.DataFrame(
1045
- self.clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns
1046
- ).stack(level="TimeStep")
1047
-
1048
- if self.segmentation:
1049
- from tsam.utils.segmentation import segmentation
1050
-
1051
- (
1052
- self.segmentedNormalizedTypicalPeriods,
1053
- self.predictedSegmentedNormalizedTypicalPeriods,
1054
- ) = segmentation(
1055
- self.normalizedTypicalPeriods,
1056
- self.noSegments,
1057
- self.timeStepsPerPeriod,
1058
- self.solver,
1059
- representationMethod=self.representationMethod,
1060
- representationDict=self.representationDict,
1061
- distributionPeriodWise=self.distributionPeriodWise,
1062
- )
1063
- self.normalizedTypicalPeriods = (
1064
- self.segmentedNormalizedTypicalPeriods.reset_index(level=3, drop=True)
1065
- )
1066
-
1067
- self.typicalPeriods = self._postProcessTimeSeries(self.normalizedTypicalPeriods)
1068
-
1069
- # check if original time series boundaries are not exceeded
1070
- if np.array(
1071
- self.typicalPeriods.max(axis=0) > self.timeSeries.max(axis=0)
1072
- ).any():
1073
- warnings.warn(
1074
- "Something went wrong: At least one maximal value of the aggregated time series exceeds the maximal value the input time series"
1075
- )
1076
- if np.array(
1077
- self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1078
- ).any():
1079
- warnings.warn(
1080
- "Something went wrong: At least one minimal value of the aggregated time series exceeds the minimal value the input time series"
1081
- )
1082
- return self.typicalPeriods
1083
-
1084
- def prepareEnersysInput(self):
1085
- """
1086
- Creates all dictionaries and lists which are required for the energy system
1087
- optimization input.
1088
- """
1089
- warnings.warn(
1090
- '"prepareEnersysInput" is deprecated, since the created attributes can be directly accessed as properties',
1091
- DeprecationWarning,
1092
- )
1093
- return
1094
-
1095
- @property
1096
- def stepIdx(self):
1097
- """
1098
- Index inside a single cluster
1099
- """
1100
- if self.segmentation:
1101
- return [ix for ix in range(0, self.noSegments)]
1102
- else:
1103
- return [ix for ix in range(0, self.timeStepsPerPeriod)]
1104
-
1105
- @property
1106
- def clusterPeriodIdx(self):
1107
- """
1108
- Index of the clustered periods
1109
- """
1110
- if not hasattr(self, "clusterOrder"):
1111
- self.createTypicalPeriods()
1112
- return np.sort(np.unique(self._clusterOrder))
1113
-
1114
- @property
1115
- def clusterOrder(self):
1116
- """
1117
- How often does a typical period occur in the original time series
1118
- """
1119
- if not hasattr(self, "_clusterOrder"):
1120
- self.createTypicalPeriods()
1121
- return self._clusterOrder
1122
-
1123
- @property
1124
- def clusterPeriodNoOccur(self):
1125
- """
1126
- How often does a typical period occur in the original time series
1127
- """
1128
- if not hasattr(self, "clusterOrder"):
1129
- self.createTypicalPeriods()
1130
- return self._clusterPeriodNoOccur
1131
-
1132
- @property
1133
- def clusterPeriodDict(self):
1134
- """
1135
- Time series data for each period index as dictionary
1136
- """
1137
- if not hasattr(self, "_clusterOrder"):
1138
- self.createTypicalPeriods()
1139
- if not hasattr(self, "_clusterPeriodDict"):
1140
- self._clusterPeriodDict = {}
1141
- for column in self.typicalPeriods:
1142
- self._clusterPeriodDict[column] = self.typicalPeriods[column].to_dict()
1143
- return self._clusterPeriodDict
1144
-
1145
- @property
1146
- def segmentDurationDict(self):
1147
- """
1148
- Segment duration in time steps for each period index as dictionary
1149
- """
1150
- if not hasattr(self, "_clusterOrder"):
1151
- self.createTypicalPeriods()
1152
- if not hasattr(self, "_segmentDurationDict"):
1153
- if self.segmentation:
1154
- self._segmentDurationDict = (
1155
- self.segmentedNormalizedTypicalPeriods.drop(
1156
- self.segmentedNormalizedTypicalPeriods.columns, axis=1
1157
- )
1158
- .reset_index(level=3, drop=True)
1159
- .reset_index(2)
1160
- .to_dict()
1161
- )
1162
- else:
1163
- self._segmentDurationDict = self.typicalPeriods.drop(
1164
- self.typicalPeriods.columns, axis=1
1165
- )
1166
- self._segmentDurationDict["Segment Duration"] = 1
1167
- self._segmentDurationDict = self._segmentDurationDict.to_dict()
1168
- warnings.warn(
1169
- "Segmentation is turned off. All segments are consistent the time steps."
1170
- )
1171
- return self._segmentDurationDict
1172
-
1173
- def predictOriginalData(self):
1174
- """
1175
- Predicts the overall time series if every period would be placed in the
1176
- related cluster center
1177
-
1178
- :returns: **predictedData** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1179
- """
1180
- if not hasattr(self, "_clusterOrder"):
1181
- self.createTypicalPeriods()
1182
-
1183
- # list up typical periods according to their order of occurrence using the _clusterOrder.
1184
- new_data = []
1185
- for label in self._clusterOrder:
1186
- # if segmentation is used, use the segmented typical periods with predicted time steps with the same number
1187
- # of time steps as unsegmented typical periods
1188
- if self.segmentation:
1189
- new_data.append(
1190
- self.predictedSegmentedNormalizedTypicalPeriods.loc[label, :]
1191
- .unstack()
1192
- .values
1193
- )
1194
- else:
1195
- # new_data.append(self.clusterPeriods[label])
1196
- new_data.append(
1197
- self.normalizedTypicalPeriods.loc[label, :].unstack().values
1198
- )
1199
-
1200
- # back in matrix
1201
- clustered_data_df = pd.DataFrame(
1202
- new_data,
1203
- columns=self.normalizedPeriodlyProfiles.columns,
1204
- index=self.normalizedPeriodlyProfiles.index,
1205
- )
1206
- clustered_data_df = clustered_data_df.stack(level="TimeStep")
1207
-
1208
- # back in form
1209
- self.normalizedPredictedData = pd.DataFrame(
1210
- clustered_data_df.values[: len(self.timeSeries)],
1211
- index=self.timeSeries.index,
1212
- columns=self.timeSeries.columns,
1213
- )
1214
- # normalize again if sameMean = True to avoid doubled unnormalization when using _postProcessTimeSeries after
1215
- # createTypicalPeriods has been called
1216
- if self.sameMean:
1217
- self.normalizedPredictedData /= self._normalizedMean
1218
- self.predictedData = self._postProcessTimeSeries(
1219
- self.normalizedPredictedData, applyWeighting=False
1220
- )
1221
-
1222
- return self.predictedData
1223
-
1224
- def indexMatching(self):
1225
- """
1226
- Relates the index of the original time series with the indices
1227
- represented by the clusters
1228
-
1229
- :returns: **timeStepMatching** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1230
- """
1231
- if not hasattr(self, "_clusterOrder"):
1232
- self.createTypicalPeriods()
1233
-
1234
- # create aggregated period and time step index lists
1235
- periodIndex = []
1236
- stepIndex = []
1237
- for label in self._clusterOrder:
1238
- for step in range(self.timeStepsPerPeriod):
1239
- periodIndex.append(label)
1240
- stepIndex.append(step)
1241
-
1242
- # create a dataframe
1243
- timeStepMatching = pd.DataFrame(
1244
- [periodIndex, stepIndex],
1245
- index=["PeriodNum", "TimeStep"],
1246
- columns=self.timeIndex,
1247
- ).T
1248
-
1249
- # if segmentation is chosen, append another column stating which
1250
- if self.segmentation:
1251
- segmentIndex = []
1252
- for label in self._clusterOrder:
1253
- segmentIndex.extend(
1254
- np.repeat(
1255
- self.segmentedNormalizedTypicalPeriods.loc[
1256
- label, :
1257
- ].index.get_level_values(0),
1258
- self.segmentedNormalizedTypicalPeriods.loc[
1259
- label, :
1260
- ].index.get_level_values(1),
1261
- ).values
1262
- )
1263
- timeStepMatching = pd.DataFrame(
1264
- [periodIndex, stepIndex, segmentIndex],
1265
- index=["PeriodNum", "TimeStep", "SegmentIndex"],
1266
- columns=self.timeIndex,
1267
- ).T
1268
-
1269
- return timeStepMatching
1270
-
1271
- def accuracyIndicators(self):
1272
- """
1273
- Compares the predicted data with the original time series.
1274
-
1275
- :returns: **pd.DataFrame(indicatorRaw)** (pandas.DataFrame) -- Dataframe containing indicators evaluating the
1276
- accuracy of the
1277
- aggregation
1278
- """
1279
- if not hasattr(self, "predictedData"):
1280
- self.predictOriginalData()
1281
-
1282
- indicatorRaw = {
1283
- "RMSE": {},
1284
- "RMSE_duration": {},
1285
- "MAE": {},
1286
- } # 'Silhouette score':{},
1287
-
1288
- for column in self.normalizedTimeSeries.columns:
1289
- if self.weightDict:
1290
- origTS = self.normalizedTimeSeries[column] / self.weightDict[column]
1291
- else:
1292
- origTS = self.normalizedTimeSeries[column]
1293
- predTS = self.normalizedPredictedData[column]
1294
- indicatorRaw["RMSE"][column] = np.sqrt(mean_squared_error(origTS, predTS))
1295
- indicatorRaw["RMSE_duration"][column] = np.sqrt(
1296
- mean_squared_error(
1297
- origTS.sort_values(ascending=False).reset_index(drop=True),
1298
- predTS.sort_values(ascending=False).reset_index(drop=True),
1299
- )
1300
- )
1301
- indicatorRaw["MAE"][column] = mean_absolute_error(origTS, predTS)
1302
-
1303
- return pd.DataFrame(indicatorRaw)
1304
-
1305
- def totalAccuracyIndicators(self):
1306
- """
1307
- Derives the accuracy indicators over all time series
1308
- """
1309
- return np.sqrt(self.accuracyIndicators().pow(2).sum()/len(self.normalizedTimeSeries.columns))
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import copy
4
+ import time
5
+ import warnings
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
11
+ from sklearn.metrics.pairwise import euclidean_distances
12
+ from sklearn import preprocessing
13
+
14
+ from tsam.periodAggregation import aggregatePeriods
15
+ from tsam.representations import representations
16
+
17
+ pd.set_option("mode.chained_assignment", None)
18
+
19
+ # max iterator while resacling cluster profiles
20
+ MAX_ITERATOR = 20
21
+
22
+ # tolerance while rescaling cluster periods to meet the annual sum of the original profile
23
+ TOLERANCE = 1e-6
24
+
25
+
26
+ # minimal weight that overwrites a weighting of zero in order to carry the profile through the aggregation process
27
+ MIN_WEIGHT = 1e-6
28
+
29
+
30
+ def unstackToPeriods(timeSeries, timeStepsPerPeriod):
31
+ """
32
+ Extend the timeseries to an integer multiple of the period length and
33
+ groups the time series to the periods.
34
+
35
+ :param timeSeries:
36
+ :type timeSeries: pandas DataFrame
37
+
38
+ :param timeStepsPerPeriod: The number of discrete timesteps which describe one period. required
39
+ :type timeStepsPerPeriod: integer
40
+
41
+ :returns: - **unstackedTimeSeries** (pandas DataFrame) -- is stacked such that each row represents a
42
+ candidate period
43
+ - **timeIndex** (pandas Series index) -- is the modification of the original
44
+ timeseriesindex in case an integer multiple was created
45
+ """
46
+ # init new grouped timeindex
47
+ unstackedTimeSeries = timeSeries.copy()
48
+
49
+ # initialize new indices
50
+ periodIndex = []
51
+ stepIndex = []
52
+
53
+ # extend to inger multiple of period length
54
+ if len(timeSeries) % timeStepsPerPeriod == 0:
55
+ attached_timesteps = 0
56
+ else:
57
+ # calculate number of timesteps which get attached
58
+ attached_timesteps = timeStepsPerPeriod - len(timeSeries) % timeStepsPerPeriod
59
+
60
+ # take these from the head of the original time series
61
+ rep_data = unstackedTimeSeries.head(attached_timesteps)
62
+
63
+ # append them at the end of the time series
64
+ unstackedTimeSeries = pd.concat([unstackedTimeSeries, rep_data])
65
+
66
+ # create period and step index
67
+ for ii in range(0, len(unstackedTimeSeries)):
68
+ periodIndex.append(int(ii / timeStepsPerPeriod))
69
+ stepIndex.append(ii - int(ii / timeStepsPerPeriod) * timeStepsPerPeriod)
70
+
71
+ # save old index
72
+ timeIndex = copy.deepcopy(unstackedTimeSeries.index)
73
+
74
+ # create new double index and unstack the time series
75
+ unstackedTimeSeries.index = pd.MultiIndex.from_arrays(
76
+ [stepIndex, periodIndex], names=["TimeStep", "PeriodNum"]
77
+ )
78
+ unstackedTimeSeries = unstackedTimeSeries.unstack(level="TimeStep")
79
+
80
+ return unstackedTimeSeries, timeIndex
81
+
82
+
83
+
84
+ class TimeSeriesAggregation(object):
85
+ """
86
+ Clusters time series data to typical periods.
87
+ """
88
+
89
+ CLUSTER_METHODS = [
90
+ "averaging",
91
+ "k_means",
92
+ "k_medoids",
93
+ "k_maxoids",
94
+ "hierarchical",
95
+ "adjacent_periods",
96
+ ]
97
+
98
+ REPRESENTATION_METHODS = [
99
+ "meanRepresentation",
100
+ "medoidRepresentation",
101
+ "maxoidRepresentation",
102
+ "minmaxmeanRepresentation",
103
+ "durationRepresentation",
104
+ "distributionRepresentation",
105
+ "distributionAndMinMaxRepresentation",
106
+ ]
107
+
108
+ EXTREME_PERIOD_METHODS = [
109
+ "None",
110
+ "append",
111
+ "new_cluster_center",
112
+ "replace_cluster_center",
113
+ ]
114
+
115
+ def __init__(
116
+ self,
117
+ timeSeries,
118
+ resolution=None,
119
+ noTypicalPeriods=10,
120
+ noSegments=10,
121
+ hoursPerPeriod=24,
122
+ clusterMethod="hierarchical",
123
+ evalSumPeriods=False,
124
+ sortValues=False,
125
+ sameMean=False,
126
+ rescaleClusterPeriods=True,
127
+ weightDict=None,
128
+ segmentation=False,
129
+ extremePeriodMethod="None",
130
+ representationMethod=None,
131
+ representationDict=None,
132
+ distributionPeriodWise=True,
133
+ segmentRepresentationMethod=None,
134
+ predefClusterOrder=None,
135
+ predefClusterCenterIndices=None,
136
+ solver="highs",
137
+ roundOutput=None,
138
+ addPeakMin=None,
139
+ addPeakMax=None,
140
+ addMeanMin=None,
141
+ addMeanMax=None,
142
+ ):
143
+ """
144
+ Initialize the periodly clusters.
145
+
146
+ :param timeSeries: DataFrame with the datetime as index and the relevant
147
+ time series parameters as columns. required
148
+ :type timeSeries: pandas.DataFrame() or dict
149
+
150
+ :param resolution: Resolution of the time series in hours [h]. If timeSeries is a
151
+ pandas.DataFrame() the resolution is derived from the datetime
152
+ index. optional, default: delta_T in timeSeries
153
+ :type resolution: float
154
+
155
+ :param hoursPerPeriod: Value which defines the length of a cluster period. optional, default: 24
156
+ :type hoursPerPeriod: integer
157
+
158
+ :param noTypicalPeriods: Number of typical Periods - equivalent to the number of clusters. optional, default: 10
159
+ :type noTypicalPeriods: integer
160
+
161
+ :param noSegments: Number of segments in which the typical periods shoul be subdivided - equivalent to the
162
+ number of inner-period clusters. optional, default: 10
163
+ :type noSegments: integer
164
+
165
+ :param clusterMethod: Chosen clustering method. optional, default: 'hierarchical'
166
+ |br| Options are:
167
+
168
+ * 'averaging'
169
+ * 'k_means'
170
+ * 'k_medoids'
171
+ * 'k_maxoids'
172
+ * 'hierarchical'
173
+ * 'adjacent_periods'
174
+ :type clusterMethod: string
175
+
176
+ :param evalSumPeriods: Boolean if in the clustering process also the averaged periodly values
177
+ shall be integrated additional to the periodly profiles as parameters. optional, default: False
178
+ :type evalSumPeriods: boolean
179
+
180
+ :param sameMean: Boolean which is used in the normalization procedure. If true, all time series get normalized
181
+ such that they have the same mean value. optional, default: False
182
+ :type sameMean: boolean
183
+
184
+ :param sortValues: Boolean if the clustering should be done by the periodly duration
185
+ curves (true) or the original shape of the data. optional (default: False)
186
+ :type sortValues: boolean
187
+
188
+ :param rescaleClusterPeriods: Decides if the cluster Periods shall get rescaled such that their
189
+ weighted mean value fits the mean value of the original time series. optional (default: True)
190
+ :type rescaleClusterPeriods: boolean
191
+
192
+ :param weightDict: Dictionary which weights the profiles. It is done by scaling
193
+ the time series while the normalization process. Normally all time
194
+ series have a scale from 0 to 1. By scaling them, the values get
195
+ different distances to each other and with this, they are
196
+ differently evaluated while the clustering process. optional (default: None )
197
+ :type weightDict: dict
198
+
199
+ :param extremePeriodMethod: Method how to integrate extreme Periods (peak demand, lowest temperature etc.)
200
+ into to the typical period profiles. optional, default: 'None'
201
+ |br| Options are:
202
+
203
+ * None: No integration at all.
204
+ * 'append': append typical Periods to cluster centers
205
+ * 'new_cluster_center': add the extreme period as additional cluster center. It is checked then for all
206
+ Periods if they fit better to the this new center or their original cluster center.
207
+ * 'replace_cluster_center': replaces the cluster center of the
208
+ cluster where the extreme period belongs to with the periodly profile of the extreme period. (Worst
209
+ case system design)
210
+ :type extremePeriodMethod: string
211
+
212
+ :param representationMethod: Chosen representation. If specified, the clusters are represented in the chosen
213
+ way. Otherwise, each clusterMethod has its own commonly used default representation method.
214
+ |br| Options are:
215
+
216
+ * 'meanRepresentation' (default of 'averaging' and 'k_means')
217
+ * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
218
+ * 'minmaxmeanRepresentation'
219
+ * 'durationRepresentation'/ 'distributionRepresentation'
220
+ * 'distribtionAndMinMaxRepresentation'
221
+ :type representationMethod: string
222
+
223
+ :param representationDict: Dictionary which states for each attribute whether the profiles in each cluster
224
+ should be represented by the minimum value or maximum value of each time step. This enables estimations
225
+ to the safe side. This dictionary is needed when 'minmaxmeanRepresentation' is chosen. If not specified, the
226
+ dictionary is set to containing 'mean' values only.
227
+ :type representationDict: dict
228
+
229
+ :param distributionPeriodWise: If durationRepresentation is chosen, you can choose whether the distribution of
230
+ each cluster should be separately preserved or that of the original time series only (default: True)
231
+ :type distributionPeriodWise:
232
+
233
+ :param segmentRepresentationMethod: Chosen representation for the segments. If specified, the segments are
234
+ represented in the chosen way. Otherwise, it is inherited from the representationMethod.
235
+ |br| Options are:
236
+
237
+ * 'meanRepresentation' (default of 'averaging' and 'k_means')
238
+ * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
239
+ * 'minmaxmeanRepresentation'
240
+ * 'durationRepresentation'/ 'distributionRepresentation'
241
+ * 'distribtionAndMinMaxRepresentation'
242
+ :type segmentRepresentationMethod: string
243
+
244
+ :param predefClusterOrder: Instead of aggregating a time series, a predefined grouping is taken
245
+ which is given by this list. optional (default: None)
246
+ :type predefClusterOrder: list or array
247
+
248
+ :param predefClusterCenterIndices: If predefClusterOrder is give, this list can define the representative
249
+ cluster candidates. Otherwise the medoid is taken. optional (default: None)
250
+ :type predefClusterCenterIndices: list or array
251
+
252
+ :param solver: Solver that is used for k_medoids clustering. optional (default: 'cbc' )
253
+ :type solver: string
254
+
255
+ :param roundOutput: Decimals to what the output time series get round. optional (default: None )
256
+ :type roundOutput: integer
257
+
258
+ :param addPeakMin: List of column names which's minimal value shall be added to the
259
+ typical periods. E.g.: ['Temperature']. optional, default: []
260
+ :type addPeakMin: list
261
+
262
+ :param addPeakMax: List of column names which's maximal value shall be added to the
263
+ typical periods. E.g. ['EDemand', 'HDemand']. optional, default: []
264
+ :type addPeakMax: list
265
+
266
+ :param addMeanMin: List of column names where the period with the cumulative minimal value
267
+ shall be added to the typical periods. E.g. ['Photovoltaic']. optional, default: []
268
+ :type addMeanMin: list
269
+
270
+ :param addMeanMax: List of column names where the period with the cumulative maximal value
271
+ shall be added to the typical periods. optional, default: []
272
+ :type addMeanMax: list
273
+ """
274
+ if addMeanMin is None:
275
+ addMeanMin = []
276
+ if addMeanMax is None:
277
+ addMeanMax = []
278
+ if addPeakMax is None:
279
+ addPeakMax = []
280
+ if addPeakMin is None:
281
+ addPeakMin = []
282
+ if weightDict is None:
283
+ weightDict = {}
284
+ self.timeSeries = timeSeries
285
+
286
+ self.resolution = resolution
287
+
288
+ self.hoursPerPeriod = hoursPerPeriod
289
+
290
+ self.noTypicalPeriods = noTypicalPeriods
291
+
292
+ self.noSegments = noSegments
293
+
294
+ self.clusterMethod = clusterMethod
295
+
296
+ self.extremePeriodMethod = extremePeriodMethod
297
+
298
+ self.evalSumPeriods = evalSumPeriods
299
+
300
+ self.sortValues = sortValues
301
+
302
+ self.sameMean = sameMean
303
+
304
+ self.rescaleClusterPeriods = rescaleClusterPeriods
305
+
306
+ self.weightDict = weightDict
307
+
308
+ self.representationMethod = representationMethod
309
+
310
+ self.representationDict = representationDict
311
+
312
+ self.distributionPeriodWise = distributionPeriodWise
313
+
314
+ self.segmentRepresentationMethod = segmentRepresentationMethod
315
+
316
+ self.predefClusterOrder = predefClusterOrder
317
+
318
+ self.predefClusterCenterIndices = predefClusterCenterIndices
319
+
320
+ self.solver = solver
321
+
322
+ self.segmentation = segmentation
323
+
324
+ self.roundOutput = roundOutput
325
+
326
+ self.addPeakMin = addPeakMin
327
+
328
+ self.addPeakMax = addPeakMax
329
+
330
+ self.addMeanMin = addMeanMin
331
+
332
+ self.addMeanMax = addMeanMax
333
+
334
+ self._check_init_args()
335
+
336
+ # internal attributes
337
+ self._normalizedMean = None
338
+
339
+ return
340
+
341
+ def _check_init_args(self):
342
+
343
+ # check timeSeries and set it as pandas DataFrame
344
+ if not isinstance(self.timeSeries, pd.DataFrame):
345
+ if isinstance(self.timeSeries, dict):
346
+ self.timeSeries = pd.DataFrame(self.timeSeries)
347
+ elif isinstance(self.timeSeries, np.ndarray):
348
+ self.timeSeries = pd.DataFrame(self.timeSeries)
349
+ else:
350
+ raise ValueError(
351
+ "timeSeries has to be of type pandas.DataFrame() "
352
+ + "or of type np.array() "
353
+ "in initialization of object of class " + type(self).__name__
354
+ )
355
+
356
+ # check if extreme periods exist in the dataframe
357
+ for peak in self.addPeakMin:
358
+ if peak not in self.timeSeries.columns:
359
+ raise ValueError(
360
+ peak
361
+ + ' listed in "addPeakMin"'
362
+ + " does not occur as timeSeries column"
363
+ )
364
+ for peak in self.addPeakMax:
365
+ if peak not in self.timeSeries.columns:
366
+ raise ValueError(
367
+ peak
368
+ + ' listed in "addPeakMax"'
369
+ + " does not occur as timeSeries column"
370
+ )
371
+ for peak in self.addMeanMin:
372
+ if peak not in self.timeSeries.columns:
373
+ raise ValueError(
374
+ peak
375
+ + ' listed in "addMeanMin"'
376
+ + " does not occur as timeSeries column"
377
+ )
378
+ for peak in self.addMeanMax:
379
+ if peak not in self.timeSeries.columns:
380
+ raise ValueError(
381
+ peak
382
+ + ' listed in "addMeanMax"'
383
+ + " does not occur as timeSeries column"
384
+ )
385
+
386
+ # derive resolution from date time index if not provided
387
+ if self.resolution is None:
388
+ try:
389
+ timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
390
+ self.resolution = float(timedelta.total_seconds()) / 3600
391
+ except AttributeError:
392
+ raise ValueError(
393
+ "'resolution' argument has to be nonnegative float or int"
394
+ + " or the given timeseries needs a datetime index"
395
+ )
396
+ except TypeError:
397
+ try:
398
+ self.timeSeries.index = pd.to_datetime(self.timeSeries.index)
399
+ timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
400
+ self.resolution = float(timedelta.total_seconds()) / 3600
401
+ except:
402
+ raise ValueError(
403
+ "'resolution' argument has to be nonnegative float or int"
404
+ + " or the given timeseries needs a datetime index"
405
+ )
406
+
407
+ if not (isinstance(self.resolution, int) or isinstance(self.resolution, float)):
408
+ raise ValueError("resolution has to be nonnegative float or int")
409
+
410
+ # check hoursPerPeriod
411
+ if self.hoursPerPeriod is None or self.hoursPerPeriod <= 0:
412
+ raise ValueError("hoursPerPeriod has to be nonnegative float or int")
413
+
414
+ # check typical Periods
415
+ if (
416
+ self.noTypicalPeriods is None
417
+ or self.noTypicalPeriods <= 0
418
+ or not isinstance(self.noTypicalPeriods, int)
419
+ ):
420
+ raise ValueError("noTypicalPeriods has to be nonnegative integer")
421
+ self.timeStepsPerPeriod = int(self.hoursPerPeriod / self.resolution)
422
+ if not self.timeStepsPerPeriod == self.hoursPerPeriod / self.resolution:
423
+ raise ValueError(
424
+ "The combination of hoursPerPeriod and the "
425
+ + "resulution does not result in an integer "
426
+ + "number of time steps per period"
427
+ )
428
+ if self.segmentation:
429
+ if self.noSegments > self.timeStepsPerPeriod:
430
+ warnings.warn(
431
+ "The number of segments must be less than or equal to the number of time steps per period. "
432
+ "Segment number is decreased to number of time steps per period."
433
+ )
434
+ self.noSegments = self.timeStepsPerPeriod
435
+
436
+ # check clusterMethod
437
+ if self.clusterMethod not in self.CLUSTER_METHODS:
438
+ raise ValueError(
439
+ "clusterMethod needs to be one of "
440
+ + "the following: "
441
+ + "{}".format(self.CLUSTER_METHODS)
442
+ )
443
+
444
+ # check representationMethod
445
+ if (
446
+ self.representationMethod is not None
447
+ and self.representationMethod not in self.REPRESENTATION_METHODS
448
+ ):
449
+ raise ValueError(
450
+ "If specified, representationMethod needs to be one of "
451
+ + "the following: "
452
+ + "{}".format(self.REPRESENTATION_METHODS)
453
+ )
454
+
455
+ # check representationMethod
456
+ if self.segmentRepresentationMethod is None:
457
+ self.segmentRepresentationMethod = self.representationMethod
458
+ else:
459
+ if self.segmentRepresentationMethod not in self.REPRESENTATION_METHODS:
460
+ raise ValueError(
461
+ "If specified, segmentRepresentationMethod needs to be one of "
462
+ + "the following: "
463
+ + "{}".format(self.REPRESENTATION_METHODS)
464
+ )
465
+
466
+ # if representationDict None, represent by maximum time steps in each cluster
467
+ if self.representationDict is None:
468
+ self.representationDict = {i: "mean" for i in list(self.timeSeries.columns)}
469
+ # sort representationDict alphabetically to make sure that the min, max or mean function is applied to the right
470
+ # column
471
+ self.representationDict = (
472
+ pd.Series(self.representationDict).sort_index(axis=0).to_dict()
473
+ )
474
+
475
+ # check extremePeriods
476
+ if self.extremePeriodMethod not in self.EXTREME_PERIOD_METHODS:
477
+ raise ValueError(
478
+ "extremePeriodMethod needs to be one of "
479
+ + "the following: "
480
+ + "{}".format(self.EXTREME_PERIOD_METHODS)
481
+ )
482
+
483
+ # check evalSumPeriods
484
+ if not isinstance(self.evalSumPeriods, bool):
485
+ raise ValueError("evalSumPeriods has to be boolean")
486
+ # check sortValues
487
+ if not isinstance(self.sortValues, bool):
488
+ raise ValueError("sortValues has to be boolean")
489
+ # check sameMean
490
+ if not isinstance(self.sameMean, bool):
491
+ raise ValueError("sameMean has to be boolean")
492
+ # check rescaleClusterPeriods
493
+ if not isinstance(self.rescaleClusterPeriods, bool):
494
+ raise ValueError("rescaleClusterPeriods has to be boolean")
495
+
496
+ # check predefClusterOrder
497
+ if self.predefClusterOrder is not None:
498
+ if not isinstance(self.predefClusterOrder, (list, np.ndarray)):
499
+ raise ValueError("predefClusterOrder has to be an array or list")
500
+ if self.predefClusterCenterIndices is not None:
501
+ # check predefClusterCenterIndices
502
+ if not isinstance(self.predefClusterCenterIndices, (list, np.ndarray)):
503
+ raise ValueError(
504
+ "predefClusterCenterIndices has to be an array or list"
505
+ )
506
+ elif self.predefClusterCenterIndices is not None:
507
+ raise ValueError(
508
+ 'If "predefClusterCenterIndices" is defined, "predefClusterOrder" needs to be defined as well'
509
+ )
510
+
511
+ return
512
+
513
+ def _normalizeTimeSeries(self, sameMean=False):
514
+ """
515
+ Normalizes each time series independently.
516
+
517
+ :param sameMean: Decides if the time series should have all the same mean value.
518
+ Relevant for weighting time series. optional (default: False)
519
+ :type sameMean: boolean
520
+
521
+ :returns: normalized time series
522
+ """
523
+ min_max_scaler = preprocessing.MinMaxScaler()
524
+ normalizedTimeSeries = pd.DataFrame(
525
+ min_max_scaler.fit_transform(self.timeSeries),
526
+ columns=self.timeSeries.columns,
527
+ index=self.timeSeries.index,
528
+ )
529
+
530
+ self._normalizedMean = normalizedTimeSeries.mean()
531
+ if sameMean:
532
+ normalizedTimeSeries /= self._normalizedMean
533
+
534
+ return normalizedTimeSeries
535
+
536
+ def _unnormalizeTimeSeries(self, normalizedTimeSeries, sameMean=False):
537
+ """
538
+ Equivalent to '_normalizeTimeSeries'. Just does the back
539
+ transformation.
540
+
541
+ :param normalizedTimeSeries: Time series which should get back transformated. required
542
+ :type normalizedTimeSeries: pandas.DataFrame()
543
+
544
+ :param sameMean: Has to have the same value as in _normalizeTimeSeries. optional (default: False)
545
+ :type sameMean: boolean
546
+
547
+ :returns: unnormalized time series
548
+ """
549
+ from sklearn import preprocessing
550
+
551
+ min_max_scaler = preprocessing.MinMaxScaler()
552
+ min_max_scaler.fit(self.timeSeries)
553
+
554
+ if sameMean:
555
+ normalizedTimeSeries *= self._normalizedMean
556
+
557
+ unnormalizedTimeSeries = pd.DataFrame(
558
+ min_max_scaler.inverse_transform(normalizedTimeSeries),
559
+ columns=normalizedTimeSeries.columns,
560
+ index=normalizedTimeSeries.index,
561
+ )
562
+
563
+ return unnormalizedTimeSeries
564
+
565
+ def _preProcessTimeSeries(self):
566
+ """
567
+ Normalize the time series, weight them based on the weight dict and
568
+ puts them into the correct matrix format.
569
+ """
570
+ # first sort the time series in order to avoid bug mention in #18
571
+ self.timeSeries.sort_index(axis=1, inplace=True)
572
+
573
+ # convert the dataframe to floats
574
+ self.timeSeries = self.timeSeries.astype(float)
575
+
576
+ # normalize the time series and group them to periodly profiles
577
+ self.normalizedTimeSeries = self._normalizeTimeSeries(sameMean=self.sameMean)
578
+
579
+ for column in self.weightDict:
580
+ if self.weightDict[column] < MIN_WEIGHT:
581
+ print(
582
+ 'weight of "'
583
+ + str(column)
584
+ + '" set to the minmal tolerable weighting'
585
+ )
586
+ self.weightDict[column] = MIN_WEIGHT
587
+ self.normalizedTimeSeries[column] = (
588
+ self.normalizedTimeSeries[column] * self.weightDict[column]
589
+ )
590
+
591
+ self.normalizedPeriodlyProfiles, self.timeIndex = unstackToPeriods(
592
+ self.normalizedTimeSeries, self.timeStepsPerPeriod
593
+ )
594
+
595
+ # check if no NaN is in the resulting profiles
596
+ if self.normalizedPeriodlyProfiles.isnull().values.any():
597
+ raise ValueError(
598
+ "Pre processed data includes NaN. Please check the timeSeries input data."
599
+ )
600
+
601
+ def _postProcessTimeSeries(self, normalizedTimeSeries, applyWeighting=True):
602
+ """
603
+ Neutralizes the weighting the time series back and unnormalizes them.
604
+ """
605
+ if applyWeighting:
606
+ for column in self.weightDict:
607
+ normalizedTimeSeries[column] = (
608
+ normalizedTimeSeries[column] / self.weightDict[column]
609
+ )
610
+
611
+ unnormalizedTimeSeries = self._unnormalizeTimeSeries(
612
+ normalizedTimeSeries, sameMean=self.sameMean
613
+ )
614
+
615
+ if self.roundOutput is not None:
616
+ unnormalizedTimeSeries = unnormalizedTimeSeries.round(
617
+ decimals=self.roundOutput
618
+ )
619
+
620
+ return unnormalizedTimeSeries
621
+
622
+ def _addExtremePeriods(
623
+ self,
624
+ groupedSeries,
625
+ clusterCenters,
626
+ clusterOrder,
627
+ extremePeriodMethod="new_cluster_center",
628
+ addPeakMin=None,
629
+ addPeakMax=None,
630
+ addMeanMin=None,
631
+ addMeanMax=None,
632
+ ):
633
+ """
634
+ Adds different extreme periods based on the to the clustered data,
635
+ decribed by the clusterCenters and clusterOrder.
636
+
637
+ :param groupedSeries: periodly grouped groupedSeries on which basis it should be decided,
638
+ which period is an extreme period. required
639
+ :type groupedSeries: pandas.DataFrame()
640
+
641
+ :param clusterCenters: Output from clustering with sklearn. required
642
+ :type clusterCenters: dict
643
+
644
+ :param clusterOrder: Output from clsutering with sklearn. required
645
+ :type clusterOrder: dict
646
+
647
+ :param extremePeriodMethod: Chosen extremePeriodMethod. The method. optional(default: 'new_cluster_center' )
648
+ :type extremePeriodMethod: string
649
+
650
+ :returns: - **newClusterCenters** -- The new cluster centers extended with the extreme periods.
651
+ - **newClusterOrder** -- The new cluster order including the extreme periods.
652
+ - **extremeClusterIdx** -- A list of indices where in the newClusterCenters are the extreme
653
+ periods located.
654
+ """
655
+
656
+ # init required dicts and lists
657
+ self.extremePeriods = {}
658
+ extremePeriodNo = []
659
+
660
+ ccList = [center.tolist() for center in clusterCenters]
661
+
662
+ # check which extreme periods exist in the profile and add them to
663
+ # self.extremePeriods dict
664
+ for column in self.timeSeries.columns:
665
+
666
+ if column in addPeakMax:
667
+ stepNo = groupedSeries[column].max(axis=1).idxmax()
668
+ # add only if stepNo is not already in extremePeriods
669
+ # if it is not already a cluster center
670
+ if (
671
+ stepNo not in extremePeriodNo
672
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
673
+ ):
674
+ max_col = self._append_col_with(column, " max.")
675
+ self.extremePeriods[max_col] = {
676
+ "stepNo": stepNo,
677
+ "profile": groupedSeries.loc[stepNo, :].values,
678
+ "column": column,
679
+ }
680
+ extremePeriodNo.append(stepNo)
681
+
682
+ if column in addPeakMin:
683
+ stepNo = groupedSeries[column].min(axis=1).idxmin()
684
+ # add only if stepNo is not already in extremePeriods
685
+ # if it is not already a cluster center
686
+ if (
687
+ stepNo not in extremePeriodNo
688
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
689
+ ):
690
+ min_col = self._append_col_with(column, " min.")
691
+ self.extremePeriods[min_col] = {
692
+ "stepNo": stepNo,
693
+ "profile": groupedSeries.loc[stepNo, :].values,
694
+ "column": column,
695
+ }
696
+ extremePeriodNo.append(stepNo)
697
+
698
+ if column in addMeanMax:
699
+ stepNo = groupedSeries[column].mean(axis=1).idxmax()
700
+ # add only if stepNo is not already in extremePeriods
701
+ # if it is not already a cluster center
702
+ if (
703
+ stepNo not in extremePeriodNo
704
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
705
+ ):
706
+ mean_max_col = self._append_col_with(column, " daily max.")
707
+ self.extremePeriods[mean_max_col] = {
708
+ "stepNo": stepNo,
709
+ "profile": groupedSeries.loc[stepNo, :].values,
710
+ "column": column,
711
+ }
712
+ extremePeriodNo.append(stepNo)
713
+
714
+ if column in addMeanMin:
715
+ stepNo = groupedSeries[column].mean(axis=1).idxmin()
716
+ # add only if stepNo is not already in extremePeriods and
717
+ # if it is not already a cluster center
718
+ if (
719
+ stepNo not in extremePeriodNo
720
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
721
+ ):
722
+ mean_min_col = self._append_col_with(column, " daily min.")
723
+ self.extremePeriods[mean_min_col] = {
724
+ "stepNo": stepNo,
725
+ "profile": groupedSeries.loc[stepNo, :].values,
726
+ "column": column,
727
+ }
728
+ extremePeriodNo.append(stepNo)
729
+
730
+ for periodType in self.extremePeriods:
731
+ # get current related clusters of extreme periods
732
+ self.extremePeriods[periodType]["clusterNo"] = clusterOrder[
733
+ self.extremePeriods[periodType]["stepNo"]
734
+ ]
735
+
736
+ # init new cluster structure
737
+ newClusterCenters = []
738
+ newClusterOrder = clusterOrder
739
+ extremeClusterIdx = []
740
+
741
+ # integrate extreme periods to clusters
742
+ if extremePeriodMethod == "append":
743
+ # attach extreme periods to cluster centers
744
+ for i, cluster_center in enumerate(clusterCenters):
745
+ newClusterCenters.append(cluster_center)
746
+ for i, periodType in enumerate(self.extremePeriods):
747
+ extremeClusterIdx.append(len(newClusterCenters))
748
+ newClusterCenters.append(self.extremePeriods[periodType]["profile"])
749
+ newClusterOrder[self.extremePeriods[periodType]["stepNo"]] = i + len(
750
+ clusterCenters
751
+ )
752
+
753
+ elif extremePeriodMethod == "new_cluster_center":
754
+ for i, cluster_center in enumerate(clusterCenters):
755
+ newClusterCenters.append(cluster_center)
756
+ # attach extrem periods to cluster centers and consider for all periods
757
+ # if the fit better to the cluster or the extrem period
758
+ for i, periodType in enumerate(self.extremePeriods):
759
+ extremeClusterIdx.append(len(newClusterCenters))
760
+ newClusterCenters.append(self.extremePeriods[periodType]["profile"])
761
+ self.extremePeriods[periodType]["newClusterNo"] = i + len(
762
+ clusterCenters
763
+ )
764
+
765
+ for i, cPeriod in enumerate(newClusterOrder):
766
+ # caclulate euclidean distance to cluster center
767
+ cluster_dist = sum(
768
+ (groupedSeries.iloc[i].values - clusterCenters[cPeriod]) ** 2
769
+ )
770
+ for ii, extremPeriodType in enumerate(self.extremePeriods):
771
+ # exclude other extreme periods from adding to the new
772
+ # cluster center
773
+ isOtherExtreme = False
774
+ for otherExPeriod in self.extremePeriods:
775
+ if (
776
+ i == self.extremePeriods[otherExPeriod]["stepNo"]
777
+ and otherExPeriod != extremPeriodType
778
+ ):
779
+ isOtherExtreme = True
780
+ # calculate distance to extreme periods
781
+ extperiod_dist = sum(
782
+ (
783
+ groupedSeries.iloc[i].values
784
+ - self.extremePeriods[extremPeriodType]["profile"]
785
+ )
786
+ ** 2
787
+ )
788
+ # choose new cluster relation
789
+ if extperiod_dist < cluster_dist and not isOtherExtreme:
790
+ newClusterOrder[i] = self.extremePeriods[extremPeriodType][
791
+ "newClusterNo"
792
+ ]
793
+
794
+ elif extremePeriodMethod == "replace_cluster_center":
795
+ # Worst Case Clusterperiods
796
+ newClusterCenters = clusterCenters
797
+ for periodType in self.extremePeriods:
798
+ index = groupedSeries.columns.get_loc(
799
+ self.extremePeriods[periodType]["column"]
800
+ )
801
+ newClusterCenters[self.extremePeriods[periodType]["clusterNo"]][
802
+ index
803
+ ] = self.extremePeriods[periodType]["profile"][index]
804
+ if (
805
+ not self.extremePeriods[periodType]["clusterNo"]
806
+ in extremeClusterIdx
807
+ ):
808
+ extremeClusterIdx.append(
809
+ self.extremePeriods[periodType]["clusterNo"]
810
+ )
811
+
812
+ return newClusterCenters, newClusterOrder, extremeClusterIdx
813
+
814
+ def _append_col_with(self, column, append_with=" max."):
815
+ """Appends a string to the column name. For MultiIndexes, which turn out to be
816
+ tuples when this method is called, only last level is changed"""
817
+ if isinstance(column, str):
818
+ return column + append_with
819
+ elif isinstance(column, tuple):
820
+ col = list(column)
821
+ col[-1] = col[-1] + append_with
822
+ return tuple(col)
823
+
824
+ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx):
825
+ """
826
+ Rescale the values of the clustered Periods such that mean of each time
827
+ series in the typical Periods fits the mean value of the original time
828
+ series, without changing the values of the extremePeriods.
829
+ """
830
+ weightingVec = pd.Series(self._clusterPeriodNoOccur).values
831
+ typicalPeriods = pd.concat([
832
+ pd.Series(s, index=self.normalizedPeriodlyProfiles.columns)
833
+ for s in self.clusterPeriods
834
+ ], axis=1).T
835
+ idx_wo_peak = np.delete(typicalPeriods.index, extremeClusterIdx)
836
+ for column in self.timeSeries.columns:
837
+ diff = 1
838
+ sum_raw = self.normalizedPeriodlyProfiles[column].sum().sum()
839
+ sum_peak = np.sum(
840
+ weightingVec[extremeClusterIdx]
841
+ * typicalPeriods[column].loc[extremeClusterIdx, :].sum(axis=1)
842
+ )
843
+ sum_clu_wo_peak = np.sum(
844
+ weightingVec[idx_wo_peak]
845
+ * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
846
+ )
847
+
848
+ # define the upper scale dependent on the weighting of the series
849
+ scale_ub = 1.0
850
+ if self.sameMean:
851
+ scale_ub = (
852
+ scale_ub
853
+ * self.timeSeries[column].max()
854
+ / self.timeSeries[column].mean()
855
+ )
856
+ if column in self.weightDict:
857
+ scale_ub = scale_ub * self.weightDict[column]
858
+
859
+ # difference between predicted and original sum
860
+ diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
861
+
862
+ # use while loop to rescale cluster periods
863
+ a = 0
864
+ while diff > sum_raw * TOLERANCE and a < MAX_ITERATOR:
865
+ # rescale values
866
+ typicalPeriods.loc[idx_wo_peak, column] = (
867
+ typicalPeriods[column].loc[idx_wo_peak, :].values
868
+ * (sum_raw - sum_peak)
869
+ / sum_clu_wo_peak
870
+ )
871
+
872
+ # reset values higher than the upper sacle or less than zero
873
+ typicalPeriods[column].clip(lower=0, upper=scale_ub, inplace=True)
874
+
875
+ typicalPeriods[column].fillna(0.0, inplace=True)
876
+
877
+ # calc new sum and new diff to orig data
878
+ sum_clu_wo_peak = np.sum(
879
+ weightingVec[idx_wo_peak]
880
+ * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
881
+ )
882
+ diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
883
+ a += 1
884
+ if a == MAX_ITERATOR:
885
+ deviation = str(round((diff / sum_raw) * 100, 2))
886
+ warnings.warn(
887
+ 'Max iteration number reached for "'
888
+ + str(column)
889
+ + '" while rescaling the cluster periods.'
890
+ + " The integral of the aggregated time series deviates by: "
891
+ + deviation
892
+ + "%"
893
+ )
894
+ return typicalPeriods.values
895
+
896
+ def _clusterSortedPeriods(self, candidates, n_init=20):
897
+ """
898
+ Runs the clustering algorithms for the sorted profiles within the period
899
+ instead of the original profiles. (Duration curve clustering)
900
+ """
901
+ # initialize
902
+ normalizedSortedPeriodlyProfiles = copy.deepcopy(
903
+ self.normalizedPeriodlyProfiles
904
+ )
905
+ for column in self.timeSeries.columns:
906
+ # sort each period individually
907
+ df = normalizedSortedPeriodlyProfiles[column]
908
+ values = df.values
909
+ values.sort(axis=1)
910
+ values = values[:, ::-1]
911
+ normalizedSortedPeriodlyProfiles[column] = pd.DataFrame(
912
+ values, df.index, df.columns
913
+ )
914
+ sortedClusterValues = normalizedSortedPeriodlyProfiles.values
915
+
916
+ (
917
+ altClusterCenters,
918
+ self.clusterCenterIndices,
919
+ clusterOrders_C,
920
+ ) = aggregatePeriods(
921
+ sortedClusterValues,
922
+ n_clusters=self.noTypicalPeriods,
923
+ n_iter=30,
924
+ solver=self.solver,
925
+ clusterMethod=self.clusterMethod,
926
+ representationMethod=self.representationMethod,
927
+ representationDict=self.representationDict,
928
+ distributionPeriodWise=self.distributionPeriodWise,
929
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
930
+ )
931
+
932
+ clusterCenters_C = []
933
+
934
+ # take the clusters and determine the most representative sorted
935
+ # period as cluster center
936
+ for clusterNum in np.unique(clusterOrders_C):
937
+ indice = np.where(clusterOrders_C == clusterNum)[0]
938
+ if len(indice) > 1:
939
+ # mean value for each time step for each time series over
940
+ # all Periods in the cluster
941
+ currentMean_C = sortedClusterValues[indice].mean(axis=0)
942
+ # index of the period with the lowest distance to the cluster
943
+ # center
944
+ mindistIdx_C = np.argmin(
945
+ np.square(sortedClusterValues[indice] - currentMean_C).sum(axis=1)
946
+ )
947
+ # append original time series of this period
948
+ medoid_C = candidates[indice][mindistIdx_C]
949
+
950
+ # append to cluster center
951
+ clusterCenters_C.append(medoid_C)
952
+
953
+ else:
954
+ # if only on period is part of the cluster, add this index
955
+ clusterCenters_C.append(candidates[indice][0])
956
+
957
+ return clusterCenters_C, clusterOrders_C
958
+
959
+ def createTypicalPeriods(self):
960
+ """
961
+ Clusters the Periods.
962
+
963
+ :returns: **self.typicalPeriods** -- All typical Periods in scaled form.
964
+ """
965
+ self._preProcessTimeSeries()
966
+
967
+ # check for additional cluster parameters
968
+ if self.evalSumPeriods:
969
+ evaluationValues = (
970
+ self.normalizedPeriodlyProfiles.stack(level=0)
971
+ .sum(axis=1)
972
+ .unstack(level=1)
973
+ )
974
+ # how many values have to get deleted later
975
+ delClusterParams = -len(evaluationValues.columns)
976
+ candidates = np.concatenate(
977
+ (self.normalizedPeriodlyProfiles.values, evaluationValues.values),
978
+ axis=1,
979
+ )
980
+ else:
981
+ delClusterParams = None
982
+ candidates = self.normalizedPeriodlyProfiles.values
983
+
984
+ # skip aggregation procedure for the case of a predefined cluster sequence and get only the correct representation
985
+ if not self.predefClusterOrder is None:
986
+ self._clusterOrder = self.predefClusterOrder
987
+ # check if representatives are defined
988
+ if not self.predefClusterCenterIndices is None:
989
+ self.clusterCenterIndices = self.predefClusterCenterIndices
990
+ self.clusterCenters = candidates[self.predefClusterCenterIndices]
991
+ else:
992
+ # otherwise take the medoids
993
+ self.clusterCenters, self.clusterCenterIndices = representations(
994
+ candidates,
995
+ self._clusterOrder,
996
+ default="medoidRepresentation",
997
+ representationMethod=self.representationMethod,
998
+ representationDict=self.representationDict,
999
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
1000
+ )
1001
+ else:
1002
+ cluster_duration = time.time()
1003
+ if not self.sortValues:
1004
+ # cluster the data
1005
+ (
1006
+ self.clusterCenters,
1007
+ self.clusterCenterIndices,
1008
+ self._clusterOrder,
1009
+ ) = aggregatePeriods(
1010
+ candidates,
1011
+ n_clusters=self.noTypicalPeriods,
1012
+ n_iter=100,
1013
+ solver=self.solver,
1014
+ clusterMethod=self.clusterMethod,
1015
+ representationMethod=self.representationMethod,
1016
+ representationDict=self.representationDict,
1017
+ distributionPeriodWise=self.distributionPeriodWise,
1018
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
1019
+ )
1020
+ else:
1021
+ self.clusterCenters, self._clusterOrder = self._clusterSortedPeriods(
1022
+ candidates
1023
+ )
1024
+ self.clusteringDuration = time.time() - cluster_duration
1025
+
1026
+ # get cluster centers without additional evaluation values
1027
+ self.clusterPeriods = []
1028
+ for i, cluster_center in enumerate(self.clusterCenters):
1029
+ self.clusterPeriods.append(cluster_center[:delClusterParams])
1030
+
1031
+ if not self.extremePeriodMethod == "None":
1032
+ # overwrite clusterPeriods and clusterOrder
1033
+ (
1034
+ self.clusterPeriods,
1035
+ self._clusterOrder,
1036
+ self.extremeClusterIdx,
1037
+ ) = self._addExtremePeriods(
1038
+ self.normalizedPeriodlyProfiles,
1039
+ self.clusterPeriods,
1040
+ self._clusterOrder,
1041
+ extremePeriodMethod=self.extremePeriodMethod,
1042
+ addPeakMin=self.addPeakMin,
1043
+ addPeakMax=self.addPeakMax,
1044
+ addMeanMin=self.addMeanMin,
1045
+ addMeanMax=self.addMeanMax,
1046
+ )
1047
+ else:
1048
+ self.extremeClusterIdx = []
1049
+
1050
+ # get number of appearance of the the typical periods
1051
+ nums, counts = np.unique(self._clusterOrder, return_counts=True)
1052
+ self._clusterPeriodNoOccur = {num: counts[ii] for ii, num in enumerate(nums)}
1053
+
1054
+ if self.rescaleClusterPeriods:
1055
+ self.clusterPeriods = self._rescaleClusterPeriods(
1056
+ self._clusterOrder, self.clusterPeriods, self.extremeClusterIdx
1057
+ )
1058
+
1059
+ # if additional time steps have been added, reduce the number of occurrence of the typical period
1060
+ # which is related to these time steps
1061
+ if not len(self.timeSeries) % self.timeStepsPerPeriod == 0:
1062
+ self._clusterPeriodNoOccur[self._clusterOrder[-1]] -= (
1063
+ 1
1064
+ - float(len(self.timeSeries) % self.timeStepsPerPeriod)
1065
+ / self.timeStepsPerPeriod
1066
+ )
1067
+
1068
+ # put the clustered data in pandas format and scale back
1069
+ self.normalizedTypicalPeriods = pd.concat([
1070
+ pd.Series(s, index=self.normalizedPeriodlyProfiles.columns)
1071
+ for s in self.clusterPeriods
1072
+ ], axis=1).unstack("TimeStep").T
1073
+
1074
+ if self.segmentation:
1075
+ from tsam.utils.segmentation import segmentation
1076
+
1077
+ (
1078
+ self.segmentedNormalizedTypicalPeriods,
1079
+ self.predictedSegmentedNormalizedTypicalPeriods,
1080
+ ) = segmentation(
1081
+ self.normalizedTypicalPeriods,
1082
+ self.noSegments,
1083
+ self.timeStepsPerPeriod,
1084
+ representationMethod=self.segmentRepresentationMethod,
1085
+ representationDict=self.representationDict,
1086
+ distributionPeriodWise=self.distributionPeriodWise,
1087
+ )
1088
+ self.normalizedTypicalPeriods = (
1089
+ self.segmentedNormalizedTypicalPeriods.reset_index(level=3, drop=True)
1090
+ )
1091
+
1092
+ self.typicalPeriods = self._postProcessTimeSeries(self.normalizedTypicalPeriods)
1093
+
1094
+ # check if original time series boundaries are not exceeded
1095
+ if np.array(
1096
+ self.typicalPeriods.max(axis=0) > self.timeSeries.max(axis=0)
1097
+ ).any():
1098
+ warning_list = self.typicalPeriods.max(axis=0) < self.timeSeries.max(axis=0)
1099
+ warnings.warn(
1100
+ "Something went wrong... At least one maximal value of the " +
1101
+ "aggregated time series exceeds the maximal value " +
1102
+ "the input time series for: " +
1103
+ "{}".format(list(warning_list[warning_list>0].index))
1104
+ )
1105
+ if np.array(
1106
+ self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1107
+ ).any():
1108
+ warning_list = self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1109
+ warnings.warn(
1110
+ "Something went wrong... At least one minimal value of the " +
1111
+ "aggregated time series exceeds the minimal value " +
1112
+ "the input time series for: " +
1113
+ "{}".format(list(warning_list[warning_list>0].index))
1114
+ )
1115
+ return self.typicalPeriods
1116
+
1117
+ def prepareEnersysInput(self):
1118
+ """
1119
+ Creates all dictionaries and lists which are required for the energy system
1120
+ optimization input.
1121
+ """
1122
+ warnings.warn(
1123
+ '"prepareEnersysInput" is deprecated, since the created attributes can be directly accessed as properties',
1124
+ DeprecationWarning,
1125
+ )
1126
+ return
1127
+
1128
+ @property
1129
+ def stepIdx(self):
1130
+ """
1131
+ Index inside a single cluster
1132
+ """
1133
+ if self.segmentation:
1134
+ return [ix for ix in range(0, self.noSegments)]
1135
+ else:
1136
+ return [ix for ix in range(0, self.timeStepsPerPeriod)]
1137
+
1138
+ @property
1139
+ def clusterPeriodIdx(self):
1140
+ """
1141
+ Index of the clustered periods
1142
+ """
1143
+ if not hasattr(self, "clusterOrder"):
1144
+ self.createTypicalPeriods()
1145
+ return np.sort(np.unique(self._clusterOrder))
1146
+
1147
+ @property
1148
+ def clusterOrder(self):
1149
+ """
1150
+ The sequence/order of the typical period to represent
1151
+ the original time series
1152
+ """
1153
+ if not hasattr(self, "_clusterOrder"):
1154
+ self.createTypicalPeriods()
1155
+ return self._clusterOrder
1156
+
1157
+ @property
1158
+ def clusterPeriodNoOccur(self):
1159
+ """
1160
+ How often does a typical period occur in the original time series
1161
+ """
1162
+ if not hasattr(self, "clusterOrder"):
1163
+ self.createTypicalPeriods()
1164
+ return self._clusterPeriodNoOccur
1165
+
1166
+ @property
1167
+ def clusterPeriodDict(self):
1168
+ """
1169
+ Time series data for each period index as dictionary
1170
+ """
1171
+ if not hasattr(self, "_clusterOrder"):
1172
+ self.createTypicalPeriods()
1173
+ if not hasattr(self, "_clusterPeriodDict"):
1174
+ self._clusterPeriodDict = {}
1175
+ for column in self.typicalPeriods:
1176
+ self._clusterPeriodDict[column] = self.typicalPeriods[column].to_dict()
1177
+ return self._clusterPeriodDict
1178
+
1179
+ @property
1180
+ def segmentDurationDict(self):
1181
+ """
1182
+ Segment duration in time steps for each period index as dictionary
1183
+ """
1184
+ if not hasattr(self, "_clusterOrder"):
1185
+ self.createTypicalPeriods()
1186
+ if not hasattr(self, "_segmentDurationDict"):
1187
+ if self.segmentation:
1188
+ self._segmentDurationDict = (
1189
+ self.segmentedNormalizedTypicalPeriods.drop(
1190
+ self.segmentedNormalizedTypicalPeriods.columns, axis=1
1191
+ )
1192
+ .reset_index(level=3, drop=True)
1193
+ .reset_index(2)
1194
+ .to_dict()
1195
+ )
1196
+ else:
1197
+ self._segmentDurationDict = self.typicalPeriods.drop(
1198
+ self.typicalPeriods.columns, axis=1
1199
+ )
1200
+ self._segmentDurationDict["Segment Duration"] = 1
1201
+ self._segmentDurationDict = self._segmentDurationDict.to_dict()
1202
+ warnings.warn(
1203
+ "Segmentation is turned off. All segments are consistent the time steps."
1204
+ )
1205
+ return self._segmentDurationDict
1206
+
1207
+ def predictOriginalData(self):
1208
+ """
1209
+ Predicts the overall time series if every period would be placed in the
1210
+ related cluster center
1211
+
1212
+ :returns: **predictedData** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1213
+ """
1214
+ if not hasattr(self, "_clusterOrder"):
1215
+ self.createTypicalPeriods()
1216
+
1217
+ # list up typical periods according to their order of occurrence using the _clusterOrder.
1218
+ new_data = []
1219
+ for label in self._clusterOrder:
1220
+ # if segmentation is used, use the segmented typical periods with predicted time steps with the same number
1221
+ # of time steps as unsegmented typical periods
1222
+ if self.segmentation:
1223
+ new_data.append(
1224
+ self.predictedSegmentedNormalizedTypicalPeriods.loc[label, :]
1225
+ .unstack()
1226
+ .values
1227
+ )
1228
+ else:
1229
+ # new_data.append(self.clusterPeriods[label])
1230
+ new_data.append(
1231
+ self.normalizedTypicalPeriods.loc[label, :].unstack().values
1232
+ )
1233
+
1234
+ # back in matrix
1235
+ clustered_data_df = pd.DataFrame(
1236
+ new_data,
1237
+ columns=self.normalizedPeriodlyProfiles.columns,
1238
+ index=self.normalizedPeriodlyProfiles.index,
1239
+ )
1240
+ clustered_data_df = clustered_data_df.stack(level="TimeStep")
1241
+
1242
+ # back in form
1243
+ self.normalizedPredictedData = pd.DataFrame(
1244
+ clustered_data_df.values[: len(self.timeSeries)],
1245
+ index=self.timeSeries.index,
1246
+ columns=self.timeSeries.columns,
1247
+ )
1248
+ # normalize again if sameMean = True to avoid doubled unnormalization when using _postProcessTimeSeries after
1249
+ # createTypicalPeriods has been called
1250
+ if self.sameMean:
1251
+ self.normalizedPredictedData /= self._normalizedMean
1252
+ self.predictedData = self._postProcessTimeSeries(
1253
+ self.normalizedPredictedData, applyWeighting=False
1254
+ )
1255
+
1256
+ return self.predictedData
1257
+
1258
+ def indexMatching(self):
1259
+ """
1260
+ Relates the index of the original time series with the indices
1261
+ represented by the clusters
1262
+
1263
+ :returns: **timeStepMatching** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1264
+ """
1265
+ if not hasattr(self, "_clusterOrder"):
1266
+ self.createTypicalPeriods()
1267
+
1268
+ # create aggregated period and time step index lists
1269
+ periodIndex = []
1270
+ stepIndex = []
1271
+ for label in self._clusterOrder:
1272
+ for step in range(self.timeStepsPerPeriod):
1273
+ periodIndex.append(label)
1274
+ stepIndex.append(step)
1275
+
1276
+ # create a dataframe
1277
+ timeStepMatching = pd.DataFrame(
1278
+ [periodIndex, stepIndex],
1279
+ index=["PeriodNum", "TimeStep"],
1280
+ columns=self.timeIndex,
1281
+ ).T
1282
+
1283
+ # if segmentation is chosen, append another column stating which
1284
+ if self.segmentation:
1285
+ segmentIndex = []
1286
+ for label in self._clusterOrder:
1287
+ segmentIndex.extend(
1288
+ np.repeat(
1289
+ self.segmentedNormalizedTypicalPeriods.loc[
1290
+ label, :
1291
+ ].index.get_level_values(0),
1292
+ self.segmentedNormalizedTypicalPeriods.loc[
1293
+ label, :
1294
+ ].index.get_level_values(1),
1295
+ ).values
1296
+ )
1297
+ timeStepMatching = pd.DataFrame(
1298
+ [periodIndex, stepIndex, segmentIndex],
1299
+ index=["PeriodNum", "TimeStep", "SegmentIndex"],
1300
+ columns=self.timeIndex,
1301
+ ).T
1302
+
1303
+ return timeStepMatching
1304
+
1305
+ def accuracyIndicators(self):
1306
+ """
1307
+ Compares the predicted data with the original time series.
1308
+
1309
+ :returns: **pd.DataFrame(indicatorRaw)** (pandas.DataFrame) -- Dataframe containing indicators evaluating the
1310
+ accuracy of the
1311
+ aggregation
1312
+ """
1313
+ if not hasattr(self, "predictedData"):
1314
+ self.predictOriginalData()
1315
+
1316
+ indicatorRaw = {
1317
+ "RMSE": {},
1318
+ "RMSE_duration": {},
1319
+ "MAE": {},
1320
+ } # 'Silhouette score':{},
1321
+
1322
+ for column in self.normalizedTimeSeries.columns:
1323
+ if self.weightDict:
1324
+ origTS = self.normalizedTimeSeries[column] / self.weightDict[column]
1325
+ else:
1326
+ origTS = self.normalizedTimeSeries[column]
1327
+ predTS = self.normalizedPredictedData[column]
1328
+ indicatorRaw["RMSE"][column] = np.sqrt(mean_squared_error(origTS, predTS))
1329
+ indicatorRaw["RMSE_duration"][column] = np.sqrt(
1330
+ mean_squared_error(
1331
+ origTS.sort_values(ascending=False).reset_index(drop=True),
1332
+ predTS.sort_values(ascending=False).reset_index(drop=True),
1333
+ )
1334
+ )
1335
+ indicatorRaw["MAE"][column] = mean_absolute_error(origTS, predTS)
1336
+
1337
+ return pd.DataFrame(indicatorRaw)
1338
+
1339
+ def totalAccuracyIndicators(self):
1340
+ """
1341
+ Derives the accuracy indicators over all time series
1342
+ """
1343
+ return np.sqrt(self.accuracyIndicators().pow(2).sum()/len(self.normalizedTimeSeries.columns))