tsam 2.2.2__py3-none-any.whl → 2.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,1309 +1,1358 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- import copy
4
- import time
5
- import warnings
6
-
7
- import pandas as pd
8
- import numpy as np
9
-
10
- from sklearn.metrics import mean_squared_error, mean_absolute_error
11
- from sklearn.metrics.pairwise import euclidean_distances
12
- from sklearn import preprocessing
13
-
14
- from tsam.periodAggregation import aggregatePeriods
15
- from tsam.representations import representations
16
-
17
- pd.set_option("mode.chained_assignment", None)
18
-
19
- # max iterator while resacling cluster profiles
20
- MAX_ITERATOR = 20
21
-
22
- # tolerance while rescaling cluster periods to meet the annual sum of the original profile
23
- TOLERANCE = 1e-6
24
-
25
-
26
- # minimal weight that overwrites a weighting of zero in order to carry the profile through the aggregation process
27
- MIN_WEIGHT = 1e-6
28
-
29
-
30
- def unstackToPeriods(timeSeries, timeStepsPerPeriod):
31
- """
32
- Extend the timeseries to an integer multiple of the period length and
33
- groups the time series to the periods.
34
-
35
- :param timeSeries:
36
- :type timeSeries: pandas DataFrame
37
-
38
- :param timeStepsPerPeriod: The number of discrete timesteps which describe one period. required
39
- :type timeStepsPerPeriod: integer
40
-
41
- :returns: - **unstackedTimeSeries** (pandas DataFrame) -- is stacked such that each row represents a
42
- candidate period
43
- - **timeIndex** (pandas Series index) -- is the modification of the original
44
- timeseriesindex in case an integer multiple was created
45
- """
46
- # init new grouped timeindex
47
- unstackedTimeSeries = timeSeries.copy()
48
-
49
- # initialize new indices
50
- periodIndex = []
51
- stepIndex = []
52
-
53
- # extend to inger multiple of period length
54
- if len(timeSeries) % timeStepsPerPeriod == 0:
55
- attached_timesteps = 0
56
- else:
57
- # calculate number of timesteps which get attached
58
- attached_timesteps = timeStepsPerPeriod - len(timeSeries) % timeStepsPerPeriod
59
-
60
- # take these from the head of the original time series
61
- rep_data = unstackedTimeSeries.head(attached_timesteps)
62
-
63
- # append them at the end of the time series
64
- unstackedTimeSeries = unstackedTimeSeries.append(rep_data, ignore_index=False)
65
-
66
- # create period and step index
67
- for ii in range(0, len(unstackedTimeSeries)):
68
- periodIndex.append(int(ii / timeStepsPerPeriod))
69
- stepIndex.append(ii - int(ii / timeStepsPerPeriod) * timeStepsPerPeriod)
70
-
71
- # save old index
72
- timeIndex = copy.deepcopy(unstackedTimeSeries.index)
73
-
74
- # create new double index and unstack the time series
75
- unstackedTimeSeries.index = pd.MultiIndex.from_arrays(
76
- [stepIndex, periodIndex], names=["TimeStep", "PeriodNum"]
77
- )
78
- unstackedTimeSeries = unstackedTimeSeries.unstack(level="TimeStep")
79
-
80
- return unstackedTimeSeries, timeIndex
81
-
82
-
83
-
84
- class TimeSeriesAggregation(object):
85
- """
86
- Clusters time series data to typical periods.
87
- """
88
-
89
- CLUSTER_METHODS = [
90
- "averaging",
91
- "k_means",
92
- "k_medoids",
93
- "k_maxoids",
94
- "hierarchical",
95
- "adjacent_periods",
96
- ]
97
-
98
- REPRESENTATION_METHODS = [
99
- "meanRepresentation",
100
- "medoidRepresentation",
101
- "maxoidRepresentation",
102
- "minmaxmeanRepresentation",
103
- "durationRepresentation",
104
- "distributionRepresentation",
105
- "distributionAndMinMaxRepresentation",
106
- ]
107
-
108
- EXTREME_PERIOD_METHODS = [
109
- "None",
110
- "append",
111
- "new_cluster_center",
112
- "replace_cluster_center",
113
- ]
114
-
115
- def __init__(
116
- self,
117
- timeSeries,
118
- resolution=None,
119
- noTypicalPeriods=10,
120
- noSegments=10,
121
- hoursPerPeriod=24,
122
- clusterMethod="hierarchical",
123
- evalSumPeriods=False,
124
- sortValues=False,
125
- sameMean=False,
126
- rescaleClusterPeriods=True,
127
- weightDict=None,
128
- segmentation=False,
129
- extremePeriodMethod="None",
130
- representationMethod=None,
131
- representationDict=None,
132
- distributionPeriodWise=True,
133
- predefClusterOrder=None,
134
- predefClusterCenterIndices=None,
135
- solver="highs",
136
- roundOutput=None,
137
- addPeakMin=None,
138
- addPeakMax=None,
139
- addMeanMin=None,
140
- addMeanMax=None,
141
- ):
142
- """
143
- Initialize the periodly clusters.
144
-
145
- :param timeSeries: DataFrame with the datetime as index and the relevant
146
- time series parameters as columns. required
147
- :type timeSeries: pandas.DataFrame() or dict
148
-
149
- :param resolution: Resolution of the time series in hours [h]. If timeSeries is a
150
- pandas.DataFrame() the resolution is derived from the datetime
151
- index. optional, default: delta_T in timeSeries
152
- :type resolution: float
153
-
154
- :param hoursPerPeriod: Value which defines the length of a cluster period. optional, default: 24
155
- :type hoursPerPeriod: integer
156
-
157
- :param noTypicalPeriods: Number of typical Periods - equivalent to the number of clusters. optional, default: 10
158
- :type noTypicalPeriods: integer
159
-
160
- :param noSegments: Number of segments in which the typical periods shoul be subdivided - equivalent to the
161
- number of inner-period clusters. optional, default: 10
162
- :type noSegments: integer
163
-
164
- :param clusterMethod: Chosen clustering method. optional, default: 'hierarchical'
165
- |br| Options are:
166
-
167
- * 'averaging'
168
- * 'k_means'
169
- * 'k_medoids'
170
- * 'k_maxoids'
171
- * 'hierarchical'
172
- * 'adjacent_periods'
173
- :type clusterMethod: string
174
-
175
- :param evalSumPeriods: Boolean if in the clustering process also the averaged periodly values
176
- shall be integrated additional to the periodly profiles as parameters. optional, default: False
177
- :type evalSumPeriods: boolean
178
-
179
- :param sameMean: Boolean which is used in the normalization procedure. If true, all time series get normalized
180
- such that they have the same mean value. optional, default: False
181
- :type sameMean: boolean
182
-
183
- :param sortValues: Boolean if the clustering should be done by the periodly duration
184
- curves (true) or the original shape of the data. optional (default: False)
185
- :type sortValues: boolean
186
-
187
- :param rescaleClusterPeriods: Decides if the cluster Periods shall get rescaled such that their
188
- weighted mean value fits the mean value of the original time series. optional (default: True)
189
- :type rescaleClusterPeriods: boolean
190
-
191
- :param weightDict: Dictionary which weights the profiles. It is done by scaling
192
- the time series while the normalization process. Normally all time
193
- series have a scale from 0 to 1. By scaling them, the values get
194
- different distances to each other and with this, they are
195
- differently evaluated while the clustering process. optional (default: None )
196
- :type weightDict: dict
197
-
198
- :param extremePeriodMethod: Method how to integrate extreme Periods (peak demand, lowest temperature etc.)
199
- into to the typical period profiles. optional, default: 'None'
200
- |br| Options are:
201
-
202
- * None: No integration at all.
203
- * 'append': append typical Periods to cluster centers
204
- * 'new_cluster_center': add the extreme period as additional cluster center. It is checked then for all
205
- Periods if they fit better to the this new center or their original cluster center.
206
- * 'replace_cluster_center': replaces the cluster center of the
207
- cluster where the extreme period belongs to with the periodly profile of the extreme period. (Worst
208
- case system design)
209
- :type extremePeriodMethod: string
210
-
211
- :param representationMethod: Chosen representation. If specified, the clusters are represented in the chosen
212
- way. Otherwise, each clusterMethod has its own commonly used default representation method.
213
- |br| Options are:
214
-
215
- * 'meanRepresentation' (default of 'averaging' and 'k_means')
216
- * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
217
- * 'minmaxmeanRepresentation'
218
- * 'durationRepresentation'/ 'distributionRepresentation'
219
- * 'distribtionAndMinMaxRepresentation'
220
- :type representationMethod: string
221
-
222
- :param representationDict: Dictionary which states for each attribute whether the profiles in each cluster
223
- should be represented by the minimum value or maximum value of each time step. This enables estimations
224
- to the safe side. This dictionary is needed when 'minmaxmeanRepresentation' is chosen. If not specified, the
225
- dictionary is set to containing 'mean' values only.
226
- :type representationDict: dict
227
-
228
- :param distributionPeriodWise: If durationRepresentation is chosen, you can choose whether the distribution of
229
- each cluster should be separately preserved or that of the original time series only (default: True)
230
- :type distributionPeriodWise:
231
-
232
- :param predefClusterOrder: Instead of aggregating a time series, a predefined grouping is taken
233
- which is given by this list. optional (default: None)
234
- :type predefClusterOrder: list or array
235
-
236
- :param predefClusterCenterIndices: If predefClusterOrder is give, this list can define the representative
237
- cluster candidates. Otherwise the medoid is taken. optional (default: None)
238
- :type predefClusterCenterIndices: list or array
239
-
240
- :param solver: Solver that is used for k_medoids clustering. optional (default: 'cbc' )
241
- :type solver: string
242
-
243
- :param roundOutput: Decimals to what the output time series get round. optional (default: None )
244
- :type roundOutput: integer
245
-
246
- :param addPeakMin: List of column names which's minimal value shall be added to the
247
- typical periods. E.g.: ['Temperature']. optional, default: []
248
- :type addPeakMin: list
249
-
250
- :param addPeakMax: List of column names which's maximal value shall be added to the
251
- typical periods. E.g. ['EDemand', 'HDemand']. optional, default: []
252
- :type addPeakMax: list
253
-
254
- :param addMeanMin: List of column names where the period with the cumulative minimal value
255
- shall be added to the typical periods. E.g. ['Photovoltaic']. optional, default: []
256
- :type addMeanMin: list
257
-
258
- :param addMeanMax: List of column names where the period with the cumulative maximal value
259
- shall be added to the typical periods. optional, default: []
260
- :type addMeanMax: list
261
- """
262
- if addMeanMin is None:
263
- addMeanMin = []
264
- if addMeanMax is None:
265
- addMeanMax = []
266
- if addPeakMax is None:
267
- addPeakMax = []
268
- if addPeakMin is None:
269
- addPeakMin = []
270
- if weightDict is None:
271
- weightDict = {}
272
- self.timeSeries = timeSeries
273
-
274
- self.resolution = resolution
275
-
276
- self.hoursPerPeriod = hoursPerPeriod
277
-
278
- self.noTypicalPeriods = noTypicalPeriods
279
-
280
- self.noSegments = noSegments
281
-
282
- self.clusterMethod = clusterMethod
283
-
284
- self.extremePeriodMethod = extremePeriodMethod
285
-
286
- self.evalSumPeriods = evalSumPeriods
287
-
288
- self.sortValues = sortValues
289
-
290
- self.sameMean = sameMean
291
-
292
- self.rescaleClusterPeriods = rescaleClusterPeriods
293
-
294
- self.weightDict = weightDict
295
-
296
- self.representationMethod = representationMethod
297
-
298
- self.representationDict = representationDict
299
-
300
- self.distributionPeriodWise = distributionPeriodWise
301
-
302
- self.predefClusterOrder = predefClusterOrder
303
-
304
- self.predefClusterCenterIndices = predefClusterCenterIndices
305
-
306
- self.solver = solver
307
-
308
- self.segmentation = segmentation
309
-
310
- self.roundOutput = roundOutput
311
-
312
- self.addPeakMin = addPeakMin
313
-
314
- self.addPeakMax = addPeakMax
315
-
316
- self.addMeanMin = addMeanMin
317
-
318
- self.addMeanMax = addMeanMax
319
-
320
- self._check_init_args()
321
-
322
- # internal attributes
323
- self._normalizedMean = None
324
-
325
- return
326
-
327
- def _check_init_args(self):
328
-
329
- # check timeSeries and set it as pandas DataFrame
330
- if not isinstance(self.timeSeries, pd.DataFrame):
331
- if isinstance(self.timeSeries, dict):
332
- self.timeSeries = pd.DataFrame(self.timeSeries)
333
- elif isinstance(self.timeSeries, np.ndarray):
334
- self.timeSeries = pd.DataFrame(self.timeSeries)
335
- else:
336
- raise ValueError(
337
- "timeSeries has to be of type pandas.DataFrame() "
338
- + "or of type np.array() "
339
- "in initialization of object of class " + type(self).__name__
340
- )
341
-
342
- # check if extreme periods exist in the dataframe
343
- for peak in self.addPeakMin:
344
- if peak not in self.timeSeries.columns:
345
- raise ValueError(
346
- peak
347
- + ' listed in "addPeakMin"'
348
- + " does not occur as timeSeries column"
349
- )
350
- for peak in self.addPeakMax:
351
- if peak not in self.timeSeries.columns:
352
- raise ValueError(
353
- peak
354
- + ' listed in "addPeakMax"'
355
- + " does not occur as timeSeries column"
356
- )
357
- for peak in self.addMeanMin:
358
- if peak not in self.timeSeries.columns:
359
- raise ValueError(
360
- peak
361
- + ' listed in "addMeanMin"'
362
- + " does not occur as timeSeries column"
363
- )
364
- for peak in self.addMeanMax:
365
- if peak not in self.timeSeries.columns:
366
- raise ValueError(
367
- peak
368
- + ' listed in "addMeanMax"'
369
- + " does not occur as timeSeries column"
370
- )
371
-
372
- # derive resolution from date time index if not provided
373
- if self.resolution is None:
374
- try:
375
- timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
376
- self.resolution = float(timedelta.total_seconds()) / 3600
377
- except AttributeError:
378
- raise ValueError(
379
- "'resolution' argument has to be nonnegative float or int"
380
- + " or the given timeseries needs a datetime index"
381
- )
382
- except TypeError:
383
- try:
384
- self.timeSeries.index = pd.to_datetime(self.timeSeries.index)
385
- timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
386
- self.resolution = float(timedelta.total_seconds()) / 3600
387
- except:
388
- raise ValueError(
389
- "'resolution' argument has to be nonnegative float or int"
390
- + " or the given timeseries needs a datetime index"
391
- )
392
-
393
- if not (isinstance(self.resolution, int) or isinstance(self.resolution, float)):
394
- raise ValueError("resolution has to be nonnegative float or int")
395
-
396
- # check hoursPerPeriod
397
- if self.hoursPerPeriod is None or self.hoursPerPeriod <= 0:
398
- raise ValueError("hoursPerPeriod has to be nonnegative float or int")
399
-
400
- # check typical Periods
401
- if (
402
- self.noTypicalPeriods is None
403
- or self.noTypicalPeriods <= 0
404
- or not isinstance(self.noTypicalPeriods, int)
405
- ):
406
- raise ValueError("noTypicalPeriods has to be nonnegative integer")
407
- self.timeStepsPerPeriod = int(self.hoursPerPeriod / self.resolution)
408
- if not self.timeStepsPerPeriod == self.hoursPerPeriod / self.resolution:
409
- raise ValueError(
410
- "The combination of hoursPerPeriod and the "
411
- + "resulution does not result in an integer "
412
- + "number of time steps per period"
413
- )
414
- if self.segmentation:
415
- if self.noSegments > self.timeStepsPerPeriod:
416
- warnings.warn(
417
- "The number of segments must be less than or equal to the number of time steps per period. "
418
- "Segment number is decreased to number of time steps per period."
419
- )
420
- self.noSegments = self.timeStepsPerPeriod
421
-
422
- # check clusterMethod
423
- if self.clusterMethod not in self.CLUSTER_METHODS:
424
- raise ValueError(
425
- "clusterMethod needs to be one of "
426
- + "the following: "
427
- + "{}".format(self.CLUSTER_METHODS)
428
- )
429
-
430
- # check representationMethod
431
- if (
432
- self.representationMethod is not None
433
- and self.representationMethod not in self.REPRESENTATION_METHODS
434
- ):
435
- raise ValueError(
436
- "If specified, representationMethod needs to be one of "
437
- + "the following: "
438
- + "{}".format(self.REPRESENTATION_METHODS)
439
- )
440
-
441
- # if representationDict None, represent by maximum time steps in each cluster
442
- if self.representationDict is None:
443
- self.representationDict = {i: "mean" for i in list(self.timeSeries.columns)}
444
- # sort representationDict alphabetically to make sure that the min, max or mean function is applied to the right
445
- # column
446
- self.representationDict = (
447
- pd.Series(self.representationDict).sort_index(axis=0).to_dict()
448
- )
449
-
450
- # check extremePeriods
451
- if self.extremePeriodMethod not in self.EXTREME_PERIOD_METHODS:
452
- raise ValueError(
453
- "extremePeriodMethod needs to be one of "
454
- + "the following: "
455
- + "{}".format(self.EXTREME_PERIOD_METHODS)
456
- )
457
-
458
- # check evalSumPeriods
459
- if not isinstance(self.evalSumPeriods, bool):
460
- raise ValueError("evalSumPeriods has to be boolean")
461
- # check sortValues
462
- if not isinstance(self.sortValues, bool):
463
- raise ValueError("sortValues has to be boolean")
464
- # check sameMean
465
- if not isinstance(self.sameMean, bool):
466
- raise ValueError("sameMean has to be boolean")
467
- # check rescaleClusterPeriods
468
- if not isinstance(self.rescaleClusterPeriods, bool):
469
- raise ValueError("rescaleClusterPeriods has to be boolean")
470
-
471
- # check predefClusterOrder
472
- if self.predefClusterOrder is not None:
473
- if not isinstance(self.predefClusterOrder, (list, np.ndarray)):
474
- raise ValueError("predefClusterOrder has to be an array or list")
475
- if self.predefClusterCenterIndices is not None:
476
- # check predefClusterCenterIndices
477
- if not isinstance(self.predefClusterCenterIndices, (list, np.ndarray)):
478
- raise ValueError(
479
- "predefClusterCenterIndices has to be an array or list"
480
- )
481
- elif self.predefClusterCenterIndices is not None:
482
- raise ValueError(
483
- 'If "predefClusterCenterIndices" is defined, "predefClusterOrder" needs to be defined as well'
484
- )
485
-
486
- return
487
-
488
- def _normalizeTimeSeries(self, sameMean=False):
489
- """
490
- Normalizes each time series independently.
491
-
492
- :param sameMean: Decides if the time series should have all the same mean value.
493
- Relevant for weighting time series. optional (default: False)
494
- :type sameMean: boolean
495
-
496
- :returns: normalized time series
497
- """
498
- min_max_scaler = preprocessing.MinMaxScaler()
499
- normalizedTimeSeries = pd.DataFrame(
500
- min_max_scaler.fit_transform(self.timeSeries),
501
- columns=self.timeSeries.columns,
502
- index=self.timeSeries.index,
503
- )
504
-
505
- self._normalizedMean = normalizedTimeSeries.mean()
506
- if sameMean:
507
- normalizedTimeSeries /= self._normalizedMean
508
-
509
- return normalizedTimeSeries
510
-
511
- def _unnormalizeTimeSeries(self, normalizedTimeSeries, sameMean=False):
512
- """
513
- Equivalent to '_normalizeTimeSeries'. Just does the back
514
- transformation.
515
-
516
- :param normalizedTimeSeries: Time series which should get back transformated. required
517
- :type normalizedTimeSeries: pandas.DataFrame()
518
-
519
- :param sameMean: Has to have the same value as in _normalizeTimeSeries. optional (default: False)
520
- :type sameMean: boolean
521
-
522
- :returns: unnormalized time series
523
- """
524
- from sklearn import preprocessing
525
-
526
- min_max_scaler = preprocessing.MinMaxScaler()
527
- min_max_scaler.fit(self.timeSeries)
528
-
529
- if sameMean:
530
- normalizedTimeSeries *= self._normalizedMean
531
-
532
- unnormalizedTimeSeries = pd.DataFrame(
533
- min_max_scaler.inverse_transform(normalizedTimeSeries),
534
- columns=normalizedTimeSeries.columns,
535
- index=normalizedTimeSeries.index,
536
- )
537
-
538
- return unnormalizedTimeSeries
539
-
540
- def _preProcessTimeSeries(self):
541
- """
542
- Normalize the time series, weight them based on the weight dict and
543
- puts them into the correct matrix format.
544
- """
545
- # first sort the time series in order to avoid bug mention in #18
546
- self.timeSeries.sort_index(axis=1, inplace=True)
547
-
548
- # convert the dataframe to floats
549
- self.timeSeries = self.timeSeries.astype(float)
550
-
551
- # normalize the time series and group them to periodly profiles
552
- self.normalizedTimeSeries = self._normalizeTimeSeries(sameMean=self.sameMean)
553
-
554
- for column in self.weightDict:
555
- if self.weightDict[column] < MIN_WEIGHT:
556
- print(
557
- 'weight of "'
558
- + str(column)
559
- + '" set to the minmal tolerable weighting'
560
- )
561
- self.weightDict[column] = MIN_WEIGHT
562
- self.normalizedTimeSeries[column] = (
563
- self.normalizedTimeSeries[column] * self.weightDict[column]
564
- )
565
-
566
- self.normalizedPeriodlyProfiles, self.timeIndex = unstackToPeriods(
567
- self.normalizedTimeSeries, self.timeStepsPerPeriod
568
- )
569
-
570
- # check if no NaN is in the resulting profiles
571
- if self.normalizedPeriodlyProfiles.isnull().values.any():
572
- raise ValueError(
573
- "Pre processed data includes NaN. Please check the timeSeries input data."
574
- )
575
-
576
- def _postProcessTimeSeries(self, normalizedTimeSeries, applyWeighting=True):
577
- """
578
- Neutralizes the weighting the time series back and unnormalizes them.
579
- """
580
- if applyWeighting:
581
- for column in self.weightDict:
582
- normalizedTimeSeries[column] = (
583
- normalizedTimeSeries[column] / self.weightDict[column]
584
- )
585
-
586
- unnormalizedTimeSeries = self._unnormalizeTimeSeries(
587
- normalizedTimeSeries, sameMean=self.sameMean
588
- )
589
-
590
- if self.roundOutput is not None:
591
- unnormalizedTimeSeries = unnormalizedTimeSeries.round(
592
- decimals=self.roundOutput
593
- )
594
-
595
- return unnormalizedTimeSeries
596
-
597
- def _addExtremePeriods(
598
- self,
599
- groupedSeries,
600
- clusterCenters,
601
- clusterOrder,
602
- extremePeriodMethod="new_cluster_center",
603
- addPeakMin=None,
604
- addPeakMax=None,
605
- addMeanMin=None,
606
- addMeanMax=None,
607
- ):
608
- """
609
- Adds different extreme periods based on the to the clustered data,
610
- decribed by the clusterCenters and clusterOrder.
611
-
612
- :param groupedSeries: periodly grouped groupedSeries on which basis it should be decided,
613
- which period is an extreme period. required
614
- :type groupedSeries: pandas.DataFrame()
615
-
616
- :param clusterCenters: Output from clustering with sklearn. required
617
- :type clusterCenters: dict
618
-
619
- :param clusterOrder: Output from clsutering with sklearn. required
620
- :type clusterOrder: dict
621
-
622
- :param extremePeriodMethod: Chosen extremePeriodMethod. The method. optional(default: 'new_cluster_center' )
623
- :type extremePeriodMethod: string
624
-
625
- :returns: - **newClusterCenters** -- The new cluster centers extended with the extreme periods.
626
- - **newClusterOrder** -- The new cluster order including the extreme periods.
627
- - **extremeClusterIdx** -- A list of indices where in the newClusterCenters are the extreme
628
- periods located.
629
- """
630
-
631
- # init required dicts and lists
632
- self.extremePeriods = {}
633
- extremePeriodNo = []
634
-
635
- ccList = [center.tolist() for center in clusterCenters]
636
-
637
- # check which extreme periods exist in the profile and add them to
638
- # self.extremePeriods dict
639
- for column in self.timeSeries.columns:
640
-
641
- if column in addPeakMax:
642
- stepNo = groupedSeries[column].max(axis=1).idxmax()
643
- # add only if stepNo is not already in extremePeriods
644
- # if it is not already a cluster center
645
- if (
646
- stepNo not in extremePeriodNo
647
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
648
- ):
649
- max_col = self._append_col_with(column, " max.")
650
- self.extremePeriods[max_col] = {
651
- "stepNo": stepNo,
652
- "profile": groupedSeries.loc[stepNo, :].values,
653
- "column": column,
654
- }
655
- extremePeriodNo.append(stepNo)
656
-
657
- if column in addPeakMin:
658
- stepNo = groupedSeries[column].min(axis=1).idxmin()
659
- # add only if stepNo is not already in extremePeriods
660
- # if it is not already a cluster center
661
- if (
662
- stepNo not in extremePeriodNo
663
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
664
- ):
665
- min_col = self._append_col_with(column, " min.")
666
- self.extremePeriods[min_col] = {
667
- "stepNo": stepNo,
668
- "profile": groupedSeries.loc[stepNo, :].values,
669
- "column": column,
670
- }
671
- extremePeriodNo.append(stepNo)
672
-
673
- if column in addMeanMax:
674
- stepNo = groupedSeries[column].mean(axis=1).idxmax()
675
- # add only if stepNo is not already in extremePeriods
676
- # if it is not already a cluster center
677
- if (
678
- stepNo not in extremePeriodNo
679
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
680
- ):
681
- mean_max_col = self._append_col_with(column, " daily max.")
682
- self.extremePeriods[mean_max_col] = {
683
- "stepNo": stepNo,
684
- "profile": groupedSeries.loc[stepNo, :].values,
685
- "column": column,
686
- }
687
- extremePeriodNo.append(stepNo)
688
-
689
- if column in addMeanMin:
690
- stepNo = groupedSeries[column].mean(axis=1).idxmin()
691
- # add only if stepNo is not already in extremePeriods and
692
- # if it is not already a cluster center
693
- if (
694
- stepNo not in extremePeriodNo
695
- and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
696
- ):
697
- mean_min_col = self._append_col_with(column, " daily min.")
698
- self.extremePeriods[mean_min_col] = {
699
- "stepNo": stepNo,
700
- "profile": groupedSeries.loc[stepNo, :].values,
701
- "column": column,
702
- }
703
- extremePeriodNo.append(stepNo)
704
-
705
- for periodType in self.extremePeriods:
706
- # get current related clusters of extreme periods
707
- self.extremePeriods[periodType]["clusterNo"] = clusterOrder[
708
- self.extremePeriods[periodType]["stepNo"]
709
- ]
710
-
711
- # init new cluster structure
712
- newClusterCenters = []
713
- newClusterOrder = clusterOrder
714
- extremeClusterIdx = []
715
-
716
- # integrate extreme periods to clusters
717
- if extremePeriodMethod == "append":
718
- # attach extreme periods to cluster centers
719
- for i, cluster_center in enumerate(clusterCenters):
720
- newClusterCenters.append(cluster_center)
721
- for i, periodType in enumerate(self.extremePeriods):
722
- extremeClusterIdx.append(len(newClusterCenters))
723
- newClusterCenters.append(self.extremePeriods[periodType]["profile"])
724
- newClusterOrder[self.extremePeriods[periodType]["stepNo"]] = i + len(
725
- clusterCenters
726
- )
727
-
728
- elif extremePeriodMethod == "new_cluster_center":
729
- for i, cluster_center in enumerate(clusterCenters):
730
- newClusterCenters.append(cluster_center)
731
- # attach extrem periods to cluster centers and consider for all periods
732
- # if the fit better to the cluster or the extrem period
733
- for i, periodType in enumerate(self.extremePeriods):
734
- extremeClusterIdx.append(len(newClusterCenters))
735
- newClusterCenters.append(self.extremePeriods[periodType]["profile"])
736
- self.extremePeriods[periodType]["newClusterNo"] = i + len(
737
- clusterCenters
738
- )
739
-
740
- for i, cPeriod in enumerate(newClusterOrder):
741
- # caclulate euclidean distance to cluster center
742
- cluster_dist = sum(
743
- (groupedSeries.iloc[i].values - clusterCenters[cPeriod]) ** 2
744
- )
745
- for ii, extremPeriodType in enumerate(self.extremePeriods):
746
- # exclude other extreme periods from adding to the new
747
- # cluster center
748
- isOtherExtreme = False
749
- for otherExPeriod in self.extremePeriods:
750
- if (
751
- i == self.extremePeriods[otherExPeriod]["stepNo"]
752
- and otherExPeriod != extremPeriodType
753
- ):
754
- isOtherExtreme = True
755
- # calculate distance to extreme periods
756
- extperiod_dist = sum(
757
- (
758
- groupedSeries.iloc[i].values
759
- - self.extremePeriods[extremPeriodType]["profile"]
760
- )
761
- ** 2
762
- )
763
- # choose new cluster relation
764
- if extperiod_dist < cluster_dist and not isOtherExtreme:
765
- newClusterOrder[i] = self.extremePeriods[extremPeriodType][
766
- "newClusterNo"
767
- ]
768
-
769
- elif extremePeriodMethod == "replace_cluster_center":
770
- # Worst Case Clusterperiods
771
- newClusterCenters = clusterCenters
772
- for periodType in self.extremePeriods:
773
- index = groupedSeries.columns.get_loc(
774
- self.extremePeriods[periodType]["column"]
775
- )
776
- newClusterCenters[self.extremePeriods[periodType]["clusterNo"]][
777
- index
778
- ] = self.extremePeriods[periodType]["profile"][index]
779
- if (
780
- not self.extremePeriods[periodType]["clusterNo"]
781
- in extremeClusterIdx
782
- ):
783
- extremeClusterIdx.append(
784
- self.extremePeriods[periodType]["clusterNo"]
785
- )
786
-
787
- return newClusterCenters, newClusterOrder, extremeClusterIdx
788
-
789
- def _append_col_with(self, column, append_with=" max."):
790
- """Appends a string to the column name. For MultiIndexes, which turn out to be
791
- tuples when this method is called, only last level is changed"""
792
- if isinstance(column, str):
793
- return column + append_with
794
- elif isinstance(column, tuple):
795
- col = list(column)
796
- col[-1] = col[-1] + append_with
797
- return tuple(col)
798
-
799
- def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx):
800
- """
801
- Rescale the values of the clustered Periods such that mean of each time
802
- series in the typical Periods fits the mean value of the original time
803
- series, without changing the values of the extremePeriods.
804
- """
805
- weightingVec = pd.Series(self._clusterPeriodNoOccur).values
806
- typicalPeriods = pd.DataFrame(
807
- clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns
808
- )
809
- idx_wo_peak = np.delete(typicalPeriods.index, extremeClusterIdx)
810
- for column in self.timeSeries.columns:
811
- diff = 1
812
- sum_raw = self.normalizedPeriodlyProfiles[column].sum().sum()
813
- sum_peak = sum(
814
- weightingVec[extremeClusterIdx]
815
- * typicalPeriods[column].loc[extremeClusterIdx, :].sum(axis=1)
816
- )
817
- sum_clu_wo_peak = sum(
818
- weightingVec[idx_wo_peak]
819
- * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
820
- )
821
-
822
- # define the upper scale dependent on the weighting of the series
823
- scale_ub = 1.0
824
- if self.sameMean:
825
- scale_ub = (
826
- scale_ub
827
- * self.timeSeries[column].max()
828
- / self.timeSeries[column].mean()
829
- )
830
- if column in self.weightDict:
831
- scale_ub = scale_ub * self.weightDict[column]
832
-
833
- # difference between predicted and original sum
834
- diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
835
-
836
- # use while loop to rescale cluster periods
837
- a = 0
838
- while diff > sum_raw * TOLERANCE and a < MAX_ITERATOR:
839
- # rescale values
840
- typicalPeriods.loc[idx_wo_peak, column] = (
841
- typicalPeriods[column].loc[idx_wo_peak, :].values
842
- * (sum_raw - sum_peak)
843
- / sum_clu_wo_peak
844
- )
845
-
846
- # reset values higher than the upper sacle or less than zero
847
- typicalPeriods[column][typicalPeriods[column] > scale_ub] = scale_ub
848
- typicalPeriods[column][typicalPeriods[column] < 0.0] = 0.0
849
-
850
- typicalPeriods[column] = typicalPeriods[column].fillna(0.0)
851
-
852
- # calc new sum and new diff to orig data
853
- sum_clu_wo_peak = sum(
854
- weightingVec[idx_wo_peak]
855
- * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
856
- )
857
- diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
858
- a += 1
859
- if a == MAX_ITERATOR:
860
- deviation = str(round((diff / sum_raw) * 100, 2))
861
- warnings.warn(
862
- 'Max iteration number reached for "'
863
- + str(column)
864
- + '" while rescaling the cluster periods.'
865
- + " The integral of the aggregated time series deviates by: "
866
- + deviation
867
- + "%"
868
- )
869
- return typicalPeriods.values
870
-
871
- def _clusterSortedPeriods(self, candidates, n_init=20):
872
- """
873
- Runs the clustering algorithms for the sorted profiles within the period
874
- instead of the original profiles. (Duration curve clustering)
875
- """
876
- # initialize
877
- normalizedSortedPeriodlyProfiles = copy.deepcopy(
878
- self.normalizedPeriodlyProfiles
879
- )
880
- for column in self.timeSeries.columns:
881
- # sort each period individually
882
- df = normalizedSortedPeriodlyProfiles[column]
883
- values = df.values
884
- values.sort(axis=1)
885
- values = values[:, ::-1]
886
- normalizedSortedPeriodlyProfiles[column] = pd.DataFrame(
887
- values, df.index, df.columns
888
- )
889
- sortedClusterValues = normalizedSortedPeriodlyProfiles.values
890
-
891
- (
892
- altClusterCenters,
893
- self.clusterCenterIndices,
894
- clusterOrders_C,
895
- ) = aggregatePeriods(
896
- sortedClusterValues,
897
- n_clusters=self.noTypicalPeriods,
898
- n_iter=30,
899
- solver=self.solver,
900
- clusterMethod=self.clusterMethod,
901
- representationMethod=self.representationMethod,
902
- representationDict=self.representationDict,
903
- distributionPeriodWise=self.distributionPeriodWise,
904
- timeStepsPerPeriod=self.timeStepsPerPeriod,
905
- )
906
-
907
- clusterCenters_C = []
908
-
909
- # take the clusters and determine the most representative sorted
910
- # period as cluster center
911
- for clusterNum in np.unique(clusterOrders_C):
912
- indice = np.where(clusterOrders_C == clusterNum)[0]
913
- if len(indice) > 1:
914
- # mean value for each time step for each time series over
915
- # all Periods in the cluster
916
- currentMean_C = sortedClusterValues[indice].mean(axis=0)
917
- # index of the period with the lowest distance to the cluster
918
- # center
919
- mindistIdx_C = np.argmin(
920
- np.square(sortedClusterValues[indice] - currentMean_C).sum(axis=1)
921
- )
922
- # append original time series of this period
923
- medoid_C = candidates[indice][mindistIdx_C]
924
-
925
- # append to cluster center
926
- clusterCenters_C.append(medoid_C)
927
-
928
- else:
929
- # if only on period is part of the cluster, add this index
930
- clusterCenters_C.append(candidates[indice][0])
931
-
932
- return clusterCenters_C, clusterOrders_C
933
-
934
- def createTypicalPeriods(self):
935
- """
936
- Clusters the Periods.
937
-
938
- :returns: **self.typicalPeriods** -- All typical Periods in scaled form.
939
- """
940
- self._preProcessTimeSeries()
941
-
942
- # check for additional cluster parameters
943
- if self.evalSumPeriods:
944
- evaluationValues = (
945
- self.normalizedPeriodlyProfiles.stack(level=0)
946
- .sum(axis=1)
947
- .unstack(level=1)
948
- )
949
- # how many values have to get deleted later
950
- delClusterParams = -len(evaluationValues.columns)
951
- candidates = np.concatenate(
952
- (self.normalizedPeriodlyProfiles.values, evaluationValues.values),
953
- axis=1,
954
- )
955
- else:
956
- delClusterParams = None
957
- candidates = self.normalizedPeriodlyProfiles.values
958
-
959
- # skip aggregation procedure for the case of a predefined cluster sequence and get only the correct representation
960
- if not self.predefClusterOrder is None:
961
- self._clusterOrder = self.predefClusterOrder
962
- # check if representatives are defined
963
- if not self.predefClusterCenterIndices is None:
964
- self.clusterCenterIndices = self.predefClusterCenterIndices
965
- self.clusterCenters = candidates[self.predefClusterCenterIndices]
966
- else:
967
- # otherwise take the medoids
968
- self.clusterCenters, self.clusterCenterIndices = representations(
969
- candidates,
970
- self._clusterOrder,
971
- default="medoidRepresentation",
972
- representationMethod=self.representationMethod,
973
- representationDict=self.representationDict,
974
- timeStepsPerPeriod=self.timeStepsPerPeriod,
975
- )
976
- else:
977
- cluster_duration = time.time()
978
- if not self.sortValues:
979
- # cluster the data
980
- (
981
- self.clusterCenters,
982
- self.clusterCenterIndices,
983
- self._clusterOrder,
984
- ) = aggregatePeriods(
985
- candidates,
986
- n_clusters=self.noTypicalPeriods,
987
- n_iter=100,
988
- solver=self.solver,
989
- clusterMethod=self.clusterMethod,
990
- representationMethod=self.representationMethod,
991
- representationDict=self.representationDict,
992
- distributionPeriodWise=self.distributionPeriodWise,
993
- timeStepsPerPeriod=self.timeStepsPerPeriod,
994
- )
995
- else:
996
- self.clusterCenters, self._clusterOrder = self._clusterSortedPeriods(
997
- candidates
998
- )
999
- self.clusteringDuration = time.time() - cluster_duration
1000
-
1001
- # get cluster centers without additional evaluation values
1002
- self.clusterPeriods = []
1003
- for i, cluster_center in enumerate(self.clusterCenters):
1004
- self.clusterPeriods.append(cluster_center[:delClusterParams])
1005
-
1006
- if not self.extremePeriodMethod == "None":
1007
- # overwrite clusterPeriods and clusterOrder
1008
- (
1009
- self.clusterPeriods,
1010
- self._clusterOrder,
1011
- self.extremeClusterIdx,
1012
- ) = self._addExtremePeriods(
1013
- self.normalizedPeriodlyProfiles,
1014
- self.clusterPeriods,
1015
- self._clusterOrder,
1016
- extremePeriodMethod=self.extremePeriodMethod,
1017
- addPeakMin=self.addPeakMin,
1018
- addPeakMax=self.addPeakMax,
1019
- addMeanMin=self.addMeanMin,
1020
- addMeanMax=self.addMeanMax,
1021
- )
1022
- else:
1023
- self.extremeClusterIdx = []
1024
-
1025
- # get number of appearance of the the typical periods
1026
- nums, counts = np.unique(self._clusterOrder, return_counts=True)
1027
- self._clusterPeriodNoOccur = {num: counts[ii] for ii, num in enumerate(nums)}
1028
-
1029
- if self.rescaleClusterPeriods:
1030
- self.clusterPeriods = self._rescaleClusterPeriods(
1031
- self._clusterOrder, self.clusterPeriods, self.extremeClusterIdx
1032
- )
1033
-
1034
- # if additional time steps have been added, reduce the number of occurrence of the typical period
1035
- # which is related to these time steps
1036
- if not len(self.timeSeries) % self.timeStepsPerPeriod == 0:
1037
- self._clusterPeriodNoOccur[self._clusterOrder[-1]] -= (
1038
- 1
1039
- - float(len(self.timeSeries) % self.timeStepsPerPeriod)
1040
- / self.timeStepsPerPeriod
1041
- )
1042
-
1043
- # put the clustered data in pandas format and scale back
1044
- self.normalizedTypicalPeriods = pd.DataFrame(
1045
- self.clusterPeriods, columns=self.normalizedPeriodlyProfiles.columns
1046
- ).stack(level="TimeStep")
1047
-
1048
- if self.segmentation:
1049
- from tsam.utils.segmentation import segmentation
1050
-
1051
- (
1052
- self.segmentedNormalizedTypicalPeriods,
1053
- self.predictedSegmentedNormalizedTypicalPeriods,
1054
- ) = segmentation(
1055
- self.normalizedTypicalPeriods,
1056
- self.noSegments,
1057
- self.timeStepsPerPeriod,
1058
- self.solver,
1059
- representationMethod=self.representationMethod,
1060
- representationDict=self.representationDict,
1061
- distributionPeriodWise=self.distributionPeriodWise,
1062
- )
1063
- self.normalizedTypicalPeriods = (
1064
- self.segmentedNormalizedTypicalPeriods.reset_index(level=3, drop=True)
1065
- )
1066
-
1067
- self.typicalPeriods = self._postProcessTimeSeries(self.normalizedTypicalPeriods)
1068
-
1069
- # check if original time series boundaries are not exceeded
1070
- if np.array(
1071
- self.typicalPeriods.max(axis=0) > self.timeSeries.max(axis=0)
1072
- ).any():
1073
- warnings.warn(
1074
- "Something went wrong: At least one maximal value of the aggregated time series exceeds the maximal value the input time series"
1075
- )
1076
- if np.array(
1077
- self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1078
- ).any():
1079
- warnings.warn(
1080
- "Something went wrong: At least one minimal value of the aggregated time series exceeds the minimal value the input time series"
1081
- )
1082
- return self.typicalPeriods
1083
-
1084
- def prepareEnersysInput(self):
1085
- """
1086
- Creates all dictionaries and lists which are required for the energy system
1087
- optimization input.
1088
- """
1089
- warnings.warn(
1090
- '"prepareEnersysInput" is deprecated, since the created attributes can be directly accessed as properties',
1091
- DeprecationWarning,
1092
- )
1093
- return
1094
-
1095
- @property
1096
- def stepIdx(self):
1097
- """
1098
- Index inside a single cluster
1099
- """
1100
- if self.segmentation:
1101
- return [ix for ix in range(0, self.noSegments)]
1102
- else:
1103
- return [ix for ix in range(0, self.timeStepsPerPeriod)]
1104
-
1105
- @property
1106
- def clusterPeriodIdx(self):
1107
- """
1108
- Index of the clustered periods
1109
- """
1110
- if not hasattr(self, "clusterOrder"):
1111
- self.createTypicalPeriods()
1112
- return np.sort(np.unique(self._clusterOrder))
1113
-
1114
- @property
1115
- def clusterOrder(self):
1116
- """
1117
- How often does a typical period occur in the original time series
1118
- """
1119
- if not hasattr(self, "_clusterOrder"):
1120
- self.createTypicalPeriods()
1121
- return self._clusterOrder
1122
-
1123
- @property
1124
- def clusterPeriodNoOccur(self):
1125
- """
1126
- How often does a typical period occur in the original time series
1127
- """
1128
- if not hasattr(self, "clusterOrder"):
1129
- self.createTypicalPeriods()
1130
- return self._clusterPeriodNoOccur
1131
-
1132
- @property
1133
- def clusterPeriodDict(self):
1134
- """
1135
- Time series data for each period index as dictionary
1136
- """
1137
- if not hasattr(self, "_clusterOrder"):
1138
- self.createTypicalPeriods()
1139
- if not hasattr(self, "_clusterPeriodDict"):
1140
- self._clusterPeriodDict = {}
1141
- for column in self.typicalPeriods:
1142
- self._clusterPeriodDict[column] = self.typicalPeriods[column].to_dict()
1143
- return self._clusterPeriodDict
1144
-
1145
- @property
1146
- def segmentDurationDict(self):
1147
- """
1148
- Segment duration in time steps for each period index as dictionary
1149
- """
1150
- if not hasattr(self, "_clusterOrder"):
1151
- self.createTypicalPeriods()
1152
- if not hasattr(self, "_segmentDurationDict"):
1153
- if self.segmentation:
1154
- self._segmentDurationDict = (
1155
- self.segmentedNormalizedTypicalPeriods.drop(
1156
- self.segmentedNormalizedTypicalPeriods.columns, axis=1
1157
- )
1158
- .reset_index(level=3, drop=True)
1159
- .reset_index(2)
1160
- .to_dict()
1161
- )
1162
- else:
1163
- self._segmentDurationDict = self.typicalPeriods.drop(
1164
- self.typicalPeriods.columns, axis=1
1165
- )
1166
- self._segmentDurationDict["Segment Duration"] = 1
1167
- self._segmentDurationDict = self._segmentDurationDict.to_dict()
1168
- warnings.warn(
1169
- "Segmentation is turned off. All segments are consistent the time steps."
1170
- )
1171
- return self._segmentDurationDict
1172
-
1173
- def predictOriginalData(self):
1174
- """
1175
- Predicts the overall time series if every period would be placed in the
1176
- related cluster center
1177
-
1178
- :returns: **predictedData** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1179
- """
1180
- if not hasattr(self, "_clusterOrder"):
1181
- self.createTypicalPeriods()
1182
-
1183
- # list up typical periods according to their order of occurrence using the _clusterOrder.
1184
- new_data = []
1185
- for label in self._clusterOrder:
1186
- # if segmentation is used, use the segmented typical periods with predicted time steps with the same number
1187
- # of time steps as unsegmented typical periods
1188
- if self.segmentation:
1189
- new_data.append(
1190
- self.predictedSegmentedNormalizedTypicalPeriods.loc[label, :]
1191
- .unstack()
1192
- .values
1193
- )
1194
- else:
1195
- # new_data.append(self.clusterPeriods[label])
1196
- new_data.append(
1197
- self.normalizedTypicalPeriods.loc[label, :].unstack().values
1198
- )
1199
-
1200
- # back in matrix
1201
- clustered_data_df = pd.DataFrame(
1202
- new_data,
1203
- columns=self.normalizedPeriodlyProfiles.columns,
1204
- index=self.normalizedPeriodlyProfiles.index,
1205
- )
1206
- clustered_data_df = clustered_data_df.stack(level="TimeStep")
1207
-
1208
- # back in form
1209
- self.normalizedPredictedData = pd.DataFrame(
1210
- clustered_data_df.values[: len(self.timeSeries)],
1211
- index=self.timeSeries.index,
1212
- columns=self.timeSeries.columns,
1213
- )
1214
- # normalize again if sameMean = True to avoid doubled unnormalization when using _postProcessTimeSeries after
1215
- # createTypicalPeriods has been called
1216
- if self.sameMean:
1217
- self.normalizedPredictedData /= self._normalizedMean
1218
- self.predictedData = self._postProcessTimeSeries(
1219
- self.normalizedPredictedData, applyWeighting=False
1220
- )
1221
-
1222
- return self.predictedData
1223
-
1224
- def indexMatching(self):
1225
- """
1226
- Relates the index of the original time series with the indices
1227
- represented by the clusters
1228
-
1229
- :returns: **timeStepMatching** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1230
- """
1231
- if not hasattr(self, "_clusterOrder"):
1232
- self.createTypicalPeriods()
1233
-
1234
- # create aggregated period and time step index lists
1235
- periodIndex = []
1236
- stepIndex = []
1237
- for label in self._clusterOrder:
1238
- for step in range(self.timeStepsPerPeriod):
1239
- periodIndex.append(label)
1240
- stepIndex.append(step)
1241
-
1242
- # create a dataframe
1243
- timeStepMatching = pd.DataFrame(
1244
- [periodIndex, stepIndex],
1245
- index=["PeriodNum", "TimeStep"],
1246
- columns=self.timeIndex,
1247
- ).T
1248
-
1249
- # if segmentation is chosen, append another column stating which
1250
- if self.segmentation:
1251
- segmentIndex = []
1252
- for label in self._clusterOrder:
1253
- segmentIndex.extend(
1254
- np.repeat(
1255
- self.segmentedNormalizedTypicalPeriods.loc[
1256
- label, :
1257
- ].index.get_level_values(0),
1258
- self.segmentedNormalizedTypicalPeriods.loc[
1259
- label, :
1260
- ].index.get_level_values(1),
1261
- ).values
1262
- )
1263
- timeStepMatching = pd.DataFrame(
1264
- [periodIndex, stepIndex, segmentIndex],
1265
- index=["PeriodNum", "TimeStep", "SegmentIndex"],
1266
- columns=self.timeIndex,
1267
- ).T
1268
-
1269
- return timeStepMatching
1270
-
1271
- def accuracyIndicators(self):
1272
- """
1273
- Compares the predicted data with the original time series.
1274
-
1275
- :returns: **pd.DataFrame(indicatorRaw)** (pandas.DataFrame) -- Dataframe containing indicators evaluating the
1276
- accuracy of the
1277
- aggregation
1278
- """
1279
- if not hasattr(self, "predictedData"):
1280
- self.predictOriginalData()
1281
-
1282
- indicatorRaw = {
1283
- "RMSE": {},
1284
- "RMSE_duration": {},
1285
- "MAE": {},
1286
- } # 'Silhouette score':{},
1287
-
1288
- for column in self.normalizedTimeSeries.columns:
1289
- if self.weightDict:
1290
- origTS = self.normalizedTimeSeries[column] / self.weightDict[column]
1291
- else:
1292
- origTS = self.normalizedTimeSeries[column]
1293
- predTS = self.normalizedPredictedData[column]
1294
- indicatorRaw["RMSE"][column] = np.sqrt(mean_squared_error(origTS, predTS))
1295
- indicatorRaw["RMSE_duration"][column] = np.sqrt(
1296
- mean_squared_error(
1297
- origTS.sort_values(ascending=False).reset_index(drop=True),
1298
- predTS.sort_values(ascending=False).reset_index(drop=True),
1299
- )
1300
- )
1301
- indicatorRaw["MAE"][column] = mean_absolute_error(origTS, predTS)
1302
-
1303
- return pd.DataFrame(indicatorRaw)
1304
-
1305
- def totalAccuracyIndicators(self):
1306
- """
1307
- Derives the accuracy indicators over all time series
1308
- """
1309
- return np.sqrt(self.accuracyIndicators().pow(2).sum()/len(self.normalizedTimeSeries.columns))
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import copy
4
+ import time
5
+ import warnings
6
+
7
+ import pandas as pd
8
+ import numpy as np
9
+
10
+ from sklearn.metrics import mean_squared_error, mean_absolute_error
11
+ from sklearn import preprocessing
12
+
13
+ from tsam.periodAggregation import aggregatePeriods
14
+ from tsam.representations import representations
15
+
16
+ pd.set_option("mode.chained_assignment", None)
17
+
18
+ # max iterator while resacling cluster profiles
19
+ MAX_ITERATOR = 20
20
+
21
+ # tolerance while rescaling cluster periods to meet the annual sum of the original profile
22
+ TOLERANCE = 1e-6
23
+
24
+
25
+ # minimal weight that overwrites a weighting of zero in order to carry the profile through the aggregation process
26
+ MIN_WEIGHT = 1e-6
27
+
28
+
29
+
30
+
31
+
32
+ def unstackToPeriods(timeSeries, timeStepsPerPeriod):
33
+ """
34
+ Extend the timeseries to an integer multiple of the period length and
35
+ groups the time series to the periods.
36
+
37
+ :param timeSeries:
38
+ :type timeSeries: pandas DataFrame
39
+
40
+ :param timeStepsPerPeriod: The number of discrete timesteps which describe one period. required
41
+ :type timeStepsPerPeriod: integer
42
+
43
+ :returns: - **unstackedTimeSeries** (pandas DataFrame) -- is stacked such that each row represents a
44
+ candidate period
45
+ - **timeIndex** (pandas Series index) -- is the modification of the original
46
+ timeseriesindex in case an integer multiple was created
47
+ """
48
+ # init new grouped timeindex
49
+ unstackedTimeSeries = timeSeries.copy()
50
+
51
+ # initialize new indices
52
+ periodIndex = []
53
+ stepIndex = []
54
+
55
+ # extend to inger multiple of period length
56
+ if len(timeSeries) % timeStepsPerPeriod == 0:
57
+ attached_timesteps = 0
58
+ else:
59
+ # calculate number of timesteps which get attached
60
+ attached_timesteps = timeStepsPerPeriod - len(timeSeries) % timeStepsPerPeriod
61
+
62
+ # take these from the head of the original time series
63
+ rep_data = unstackedTimeSeries.head(attached_timesteps)
64
+
65
+ # append them at the end of the time series
66
+ unstackedTimeSeries = pd.concat([unstackedTimeSeries, rep_data])
67
+
68
+ # create period and step index
69
+ for ii in range(0, len(unstackedTimeSeries)):
70
+ periodIndex.append(int(ii / timeStepsPerPeriod))
71
+ stepIndex.append(ii - int(ii / timeStepsPerPeriod) * timeStepsPerPeriod)
72
+
73
+ # save old index
74
+ timeIndex = copy.deepcopy(unstackedTimeSeries.index)
75
+
76
+ # create new double index and unstack the time series
77
+ unstackedTimeSeries.index = pd.MultiIndex.from_arrays(
78
+ [stepIndex, periodIndex], names=["TimeStep", "PeriodNum"]
79
+ )
80
+ unstackedTimeSeries = unstackedTimeSeries.unstack(level="TimeStep")
81
+
82
+ return unstackedTimeSeries, timeIndex
83
+
84
+
85
+
86
+ class TimeSeriesAggregation(object):
87
+ """
88
+ Clusters time series data to typical periods.
89
+ """
90
+
91
+ CLUSTER_METHODS = [
92
+ "averaging",
93
+ "k_means",
94
+ "k_medoids",
95
+ "k_maxoids",
96
+ "hierarchical",
97
+ "adjacent_periods",
98
+ ]
99
+
100
+ REPRESENTATION_METHODS = [
101
+ "meanRepresentation",
102
+ "medoidRepresentation",
103
+ "maxoidRepresentation",
104
+ "minmaxmeanRepresentation",
105
+ "durationRepresentation",
106
+ "distributionRepresentation",
107
+ "distributionAndMinMaxRepresentation",
108
+ ]
109
+
110
+ EXTREME_PERIOD_METHODS = [
111
+ "None",
112
+ "append",
113
+ "new_cluster_center",
114
+ "replace_cluster_center",
115
+ ]
116
+
117
+ def __init__(
118
+ self,
119
+ timeSeries,
120
+ resolution=None,
121
+ noTypicalPeriods=10,
122
+ noSegments=10,
123
+ hoursPerPeriod=24,
124
+ clusterMethod="hierarchical",
125
+ evalSumPeriods=False,
126
+ sortValues=False,
127
+ sameMean=False,
128
+ rescaleClusterPeriods=True,
129
+ weightDict=None,
130
+ segmentation=False,
131
+ extremePeriodMethod="None",
132
+ representationMethod=None,
133
+ representationDict=None,
134
+ distributionPeriodWise=True,
135
+ segmentRepresentationMethod=None,
136
+ predefClusterOrder=None,
137
+ predefClusterCenterIndices=None,
138
+ solver="highs",
139
+ numericalTolerance=1e-13,
140
+ roundOutput=None,
141
+ addPeakMin=None,
142
+ addPeakMax=None,
143
+ addMeanMin=None,
144
+ addMeanMax=None,
145
+ ):
146
+ """
147
+ Initialize the periodly clusters.
148
+
149
+ :param timeSeries: DataFrame with the datetime as index and the relevant
150
+ time series parameters as columns. required
151
+ :type timeSeries: pandas.DataFrame() or dict
152
+
153
+ :param resolution: Resolution of the time series in hours [h]. If timeSeries is a
154
+ pandas.DataFrame() the resolution is derived from the datetime
155
+ index. optional, default: delta_T in timeSeries
156
+ :type resolution: float
157
+
158
+ :param hoursPerPeriod: Value which defines the length of a cluster period. optional, default: 24
159
+ :type hoursPerPeriod: integer
160
+
161
+ :param noTypicalPeriods: Number of typical Periods - equivalent to the number of clusters. optional, default: 10
162
+ :type noTypicalPeriods: integer
163
+
164
+ :param noSegments: Number of segments in which the typical periods shoul be subdivided - equivalent to the
165
+ number of inner-period clusters. optional, default: 10
166
+ :type noSegments: integer
167
+
168
+ :param clusterMethod: Chosen clustering method. optional, default: 'hierarchical'
169
+ |br| Options are:
170
+
171
+ * 'averaging'
172
+ * 'k_means'
173
+ * 'k_medoids'
174
+ * 'k_maxoids'
175
+ * 'hierarchical'
176
+ * 'adjacent_periods'
177
+ :type clusterMethod: string
178
+
179
+ :param evalSumPeriods: Boolean if in the clustering process also the averaged periodly values
180
+ shall be integrated additional to the periodly profiles as parameters. optional, default: False
181
+ :type evalSumPeriods: boolean
182
+
183
+ :param sameMean: Boolean which is used in the normalization procedure. If true, all time series get normalized
184
+ such that they have the same mean value. optional, default: False
185
+ :type sameMean: boolean
186
+
187
+ :param sortValues: Boolean if the clustering should be done by the periodly duration
188
+ curves (true) or the original shape of the data. optional (default: False)
189
+ :type sortValues: boolean
190
+
191
+ :param rescaleClusterPeriods: Decides if the cluster Periods shall get rescaled such that their
192
+ weighted mean value fits the mean value of the original time series. optional (default: True)
193
+ :type rescaleClusterPeriods: boolean
194
+
195
+ :param weightDict: Dictionary which weights the profiles. It is done by scaling
196
+ the time series while the normalization process. Normally all time
197
+ series have a scale from 0 to 1. By scaling them, the values get
198
+ different distances to each other and with this, they are
199
+ differently evaluated while the clustering process. optional (default: None )
200
+ :type weightDict: dict
201
+
202
+ :param extremePeriodMethod: Method how to integrate extreme Periods (peak demand, lowest temperature etc.)
203
+ into to the typical period profiles. optional, default: 'None'
204
+ |br| Options are:
205
+
206
+ * None: No integration at all.
207
+ * 'append': append typical Periods to cluster centers
208
+ * 'new_cluster_center': add the extreme period as additional cluster center. It is checked then for all
209
+ Periods if they fit better to the this new center or their original cluster center.
210
+ * 'replace_cluster_center': replaces the cluster center of the
211
+ cluster where the extreme period belongs to with the periodly profile of the extreme period. (Worst
212
+ case system design)
213
+ :type extremePeriodMethod: string
214
+
215
+ :param representationMethod: Chosen representation. If specified, the clusters are represented in the chosen
216
+ way. Otherwise, each clusterMethod has its own commonly used default representation method.
217
+ |br| Options are:
218
+
219
+ * 'meanRepresentation' (default of 'averaging' and 'k_means')
220
+ * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
221
+ * 'minmaxmeanRepresentation'
222
+ * 'durationRepresentation'/ 'distributionRepresentation'
223
+ * 'distribtionAndMinMaxRepresentation'
224
+ :type representationMethod: string
225
+
226
+ :param representationDict: Dictionary which states for each attribute whether the profiles in each cluster
227
+ should be represented by the minimum value or maximum value of each time step. This enables estimations
228
+ to the safe side. This dictionary is needed when 'minmaxmeanRepresentation' is chosen. If not specified, the
229
+ dictionary is set to containing 'mean' values only.
230
+ :type representationDict: dict
231
+
232
+ :param distributionPeriodWise: If durationRepresentation is chosen, you can choose whether the distribution of
233
+ each cluster should be separately preserved or that of the original time series only (default: True)
234
+ :type distributionPeriodWise:
235
+
236
+ :param segmentRepresentationMethod: Chosen representation for the segments. If specified, the segments are
237
+ represented in the chosen way. Otherwise, it is inherited from the representationMethod.
238
+ |br| Options are:
239
+
240
+ * 'meanRepresentation' (default of 'averaging' and 'k_means')
241
+ * 'medoidRepresentation' (default of 'k_medoids', 'hierarchical' and 'adjacent_periods')
242
+ * 'minmaxmeanRepresentation'
243
+ * 'durationRepresentation'/ 'distributionRepresentation'
244
+ * 'distribtionAndMinMaxRepresentation'
245
+ :type segmentRepresentationMethod: string
246
+
247
+ :param predefClusterOrder: Instead of aggregating a time series, a predefined grouping is taken
248
+ which is given by this list. optional (default: None)
249
+ :type predefClusterOrder: list or array
250
+
251
+ :param predefClusterCenterIndices: If predefClusterOrder is give, this list can define the representative
252
+ cluster candidates. Otherwise the medoid is taken. optional (default: None)
253
+ :type predefClusterCenterIndices: list or array
254
+
255
+ :param solver: Solver that is used for k_medoids clustering. optional (default: 'cbc' )
256
+ :type solver: string
257
+
258
+ :param numericalTolerance: Tolerance for numerical issues. Silences the warning for exceeding upper or lower bounds
259
+ of the time series. optional (default: 1e-13 )
260
+ :type numericalTolerance: float
261
+
262
+ :param roundOutput: Decimals to what the output time series get round. optional (default: None )
263
+ :type roundOutput: integer
264
+
265
+ :param addPeakMin: List of column names which's minimal value shall be added to the
266
+ typical periods. E.g.: ['Temperature']. optional, default: []
267
+ :type addPeakMin: list
268
+
269
+ :param addPeakMax: List of column names which's maximal value shall be added to the
270
+ typical periods. E.g. ['EDemand', 'HDemand']. optional, default: []
271
+ :type addPeakMax: list
272
+
273
+ :param addMeanMin: List of column names where the period with the cumulative minimal value
274
+ shall be added to the typical periods. E.g. ['Photovoltaic']. optional, default: []
275
+ :type addMeanMin: list
276
+
277
+ :param addMeanMax: List of column names where the period with the cumulative maximal value
278
+ shall be added to the typical periods. optional, default: []
279
+ :type addMeanMax: list
280
+ """
281
+ if addMeanMin is None:
282
+ addMeanMin = []
283
+ if addMeanMax is None:
284
+ addMeanMax = []
285
+ if addPeakMax is None:
286
+ addPeakMax = []
287
+ if addPeakMin is None:
288
+ addPeakMin = []
289
+ if weightDict is None:
290
+ weightDict = {}
291
+ self.timeSeries = timeSeries
292
+
293
+ self.resolution = resolution
294
+
295
+ self.hoursPerPeriod = hoursPerPeriod
296
+
297
+ self.noTypicalPeriods = noTypicalPeriods
298
+
299
+ self.noSegments = noSegments
300
+
301
+ self.clusterMethod = clusterMethod
302
+
303
+ self.extremePeriodMethod = extremePeriodMethod
304
+
305
+ self.evalSumPeriods = evalSumPeriods
306
+
307
+ self.sortValues = sortValues
308
+
309
+ self.sameMean = sameMean
310
+
311
+ self.rescaleClusterPeriods = rescaleClusterPeriods
312
+
313
+ self.weightDict = weightDict
314
+
315
+ self.representationMethod = representationMethod
316
+
317
+ self.representationDict = representationDict
318
+
319
+ self.distributionPeriodWise = distributionPeriodWise
320
+
321
+ self.segmentRepresentationMethod = segmentRepresentationMethod
322
+
323
+ self.predefClusterOrder = predefClusterOrder
324
+
325
+ self.predefClusterCenterIndices = predefClusterCenterIndices
326
+
327
+ self.solver = solver
328
+
329
+ self.numericalTolerance = numericalTolerance
330
+
331
+ self.segmentation = segmentation
332
+
333
+ self.roundOutput = roundOutput
334
+
335
+ self.addPeakMin = addPeakMin
336
+
337
+ self.addPeakMax = addPeakMax
338
+
339
+ self.addMeanMin = addMeanMin
340
+
341
+ self.addMeanMax = addMeanMax
342
+
343
+ self._check_init_args()
344
+
345
+ # internal attributes
346
+ self._normalizedMean = None
347
+
348
+ return
349
+
350
+ def _check_init_args(self):
351
+
352
+ # check timeSeries and set it as pandas DataFrame
353
+ if not isinstance(self.timeSeries, pd.DataFrame):
354
+ if isinstance(self.timeSeries, dict):
355
+ self.timeSeries = pd.DataFrame(self.timeSeries)
356
+ elif isinstance(self.timeSeries, np.ndarray):
357
+ self.timeSeries = pd.DataFrame(self.timeSeries)
358
+ else:
359
+ raise ValueError(
360
+ "timeSeries has to be of type pandas.DataFrame() "
361
+ + "or of type np.array() "
362
+ "in initialization of object of class " + type(self).__name__
363
+ )
364
+
365
+ # check if extreme periods exist in the dataframe
366
+ for peak in self.addPeakMin:
367
+ if peak not in self.timeSeries.columns:
368
+ raise ValueError(
369
+ peak
370
+ + ' listed in "addPeakMin"'
371
+ + " does not occur as timeSeries column"
372
+ )
373
+ for peak in self.addPeakMax:
374
+ if peak not in self.timeSeries.columns:
375
+ raise ValueError(
376
+ peak
377
+ + ' listed in "addPeakMax"'
378
+ + " does not occur as timeSeries column"
379
+ )
380
+ for peak in self.addMeanMin:
381
+ if peak not in self.timeSeries.columns:
382
+ raise ValueError(
383
+ peak
384
+ + ' listed in "addMeanMin"'
385
+ + " does not occur as timeSeries column"
386
+ )
387
+ for peak in self.addMeanMax:
388
+ if peak not in self.timeSeries.columns:
389
+ raise ValueError(
390
+ peak
391
+ + ' listed in "addMeanMax"'
392
+ + " does not occur as timeSeries column"
393
+ )
394
+
395
+ # derive resolution from date time index if not provided
396
+ if self.resolution is None:
397
+ try:
398
+ timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
399
+ self.resolution = float(timedelta.total_seconds()) / 3600
400
+ except AttributeError as exc:
401
+ raise ValueError(
402
+ "'resolution' argument has to be nonnegative float or int"
403
+ + " or the given timeseries needs a datetime index"
404
+ ) from exc
405
+ except TypeError:
406
+ try:
407
+ self.timeSeries.index = pd.to_datetime(self.timeSeries.index)
408
+ timedelta = self.timeSeries.index[1] - self.timeSeries.index[0]
409
+ self.resolution = float(timedelta.total_seconds()) / 3600
410
+ except Exception as exc:
411
+ raise ValueError(
412
+ "'resolution' argument has to be nonnegative float or int"
413
+ + " or the given timeseries needs a datetime index"
414
+ ) from exc
415
+
416
+ if not (isinstance(self.resolution, int) or isinstance(self.resolution, float)):
417
+ raise ValueError("resolution has to be nonnegative float or int")
418
+
419
+ # check hoursPerPeriod
420
+ if self.hoursPerPeriod is None or self.hoursPerPeriod <= 0:
421
+ raise ValueError("hoursPerPeriod has to be nonnegative float or int")
422
+
423
+ # check typical Periods
424
+ if (
425
+ self.noTypicalPeriods is None
426
+ or self.noTypicalPeriods <= 0
427
+ or not isinstance(self.noTypicalPeriods, int)
428
+ ):
429
+ raise ValueError("noTypicalPeriods has to be nonnegative integer")
430
+ self.timeStepsPerPeriod = int(self.hoursPerPeriod / self.resolution)
431
+ if not self.timeStepsPerPeriod == self.hoursPerPeriod / self.resolution:
432
+ raise ValueError(
433
+ "The combination of hoursPerPeriod and the "
434
+ + "resulution does not result in an integer "
435
+ + "number of time steps per period"
436
+ )
437
+ if self.segmentation:
438
+ if self.noSegments > self.timeStepsPerPeriod:
439
+ warnings.warn(
440
+ "The number of segments must be less than or equal to the number of time steps per period. "
441
+ "Segment number is decreased to number of time steps per period."
442
+ )
443
+ self.noSegments = self.timeStepsPerPeriod
444
+
445
+ # check clusterMethod
446
+ if self.clusterMethod not in self.CLUSTER_METHODS:
447
+ raise ValueError(
448
+ "clusterMethod needs to be one of "
449
+ + "the following: "
450
+ + "{}".format(self.CLUSTER_METHODS)
451
+ )
452
+
453
+ # check representationMethod
454
+ if (
455
+ self.representationMethod is not None
456
+ and self.representationMethod not in self.REPRESENTATION_METHODS
457
+ ):
458
+ raise ValueError(
459
+ "If specified, representationMethod needs to be one of "
460
+ + "the following: "
461
+ + "{}".format(self.REPRESENTATION_METHODS)
462
+ )
463
+
464
+ # check representationMethod
465
+ if self.segmentRepresentationMethod is None:
466
+ self.segmentRepresentationMethod = self.representationMethod
467
+ else:
468
+ if self.segmentRepresentationMethod not in self.REPRESENTATION_METHODS:
469
+ raise ValueError(
470
+ "If specified, segmentRepresentationMethod needs to be one of "
471
+ + "the following: "
472
+ + "{}".format(self.REPRESENTATION_METHODS)
473
+ )
474
+
475
+ # if representationDict None, represent by maximum time steps in each cluster
476
+ if self.representationDict is None:
477
+ self.representationDict = {i: "mean" for i in list(self.timeSeries.columns)}
478
+ # sort representationDict alphabetically to make sure that the min, max or mean function is applied to the right
479
+ # column
480
+ self.representationDict = (
481
+ pd.Series(self.representationDict).sort_index(axis=0).to_dict()
482
+ )
483
+
484
+ # check extremePeriods
485
+ if self.extremePeriodMethod not in self.EXTREME_PERIOD_METHODS:
486
+ raise ValueError(
487
+ "extremePeriodMethod needs to be one of "
488
+ + "the following: "
489
+ + "{}".format(self.EXTREME_PERIOD_METHODS)
490
+ )
491
+
492
+ # check evalSumPeriods
493
+ if not isinstance(self.evalSumPeriods, bool):
494
+ raise ValueError("evalSumPeriods has to be boolean")
495
+ # check sortValues
496
+ if not isinstance(self.sortValues, bool):
497
+ raise ValueError("sortValues has to be boolean")
498
+ # check sameMean
499
+ if not isinstance(self.sameMean, bool):
500
+ raise ValueError("sameMean has to be boolean")
501
+ # check rescaleClusterPeriods
502
+ if not isinstance(self.rescaleClusterPeriods, bool):
503
+ raise ValueError("rescaleClusterPeriods has to be boolean")
504
+
505
+ # check predefClusterOrder
506
+ if self.predefClusterOrder is not None:
507
+ if not isinstance(self.predefClusterOrder, (list, np.ndarray)):
508
+ raise ValueError("predefClusterOrder has to be an array or list")
509
+ if self.predefClusterCenterIndices is not None:
510
+ # check predefClusterCenterIndices
511
+ if not isinstance(self.predefClusterCenterIndices, (list, np.ndarray)):
512
+ raise ValueError(
513
+ "predefClusterCenterIndices has to be an array or list"
514
+ )
515
+ elif self.predefClusterCenterIndices is not None:
516
+ raise ValueError(
517
+ 'If "predefClusterCenterIndices" is defined, "predefClusterOrder" needs to be defined as well'
518
+ )
519
+
520
+ return
521
+
522
+ def _normalizeTimeSeries(self, sameMean=False):
523
+ """
524
+ Normalizes each time series independently.
525
+
526
+ :param sameMean: Decides if the time series should have all the same mean value.
527
+ Relevant for weighting time series. optional (default: False)
528
+ :type sameMean: boolean
529
+
530
+ :returns: normalized time series
531
+ """
532
+ min_max_scaler = preprocessing.MinMaxScaler()
533
+ normalizedTimeSeries = pd.DataFrame(
534
+ min_max_scaler.fit_transform(self.timeSeries),
535
+ columns=self.timeSeries.columns,
536
+ index=self.timeSeries.index,
537
+ )
538
+
539
+ self._normalizedMean = normalizedTimeSeries.mean()
540
+ if sameMean:
541
+ normalizedTimeSeries /= self._normalizedMean
542
+
543
+ return normalizedTimeSeries
544
+
545
+ def _unnormalizeTimeSeries(self, normalizedTimeSeries, sameMean=False):
546
+ """
547
+ Equivalent to '_normalizeTimeSeries'. Just does the back
548
+ transformation.
549
+
550
+ :param normalizedTimeSeries: Time series which should get back transformated. required
551
+ :type normalizedTimeSeries: pandas.DataFrame()
552
+
553
+ :param sameMean: Has to have the same value as in _normalizeTimeSeries. optional (default: False)
554
+ :type sameMean: boolean
555
+
556
+ :returns: unnormalized time series
557
+ """
558
+ from sklearn import preprocessing
559
+
560
+ min_max_scaler = preprocessing.MinMaxScaler()
561
+ min_max_scaler.fit(self.timeSeries)
562
+
563
+ if sameMean:
564
+ normalizedTimeSeries *= self._normalizedMean
565
+
566
+ unnormalizedTimeSeries = pd.DataFrame(
567
+ min_max_scaler.inverse_transform(normalizedTimeSeries),
568
+ columns=normalizedTimeSeries.columns,
569
+ index=normalizedTimeSeries.index,
570
+ )
571
+
572
+ return unnormalizedTimeSeries
573
+
574
+ def _preProcessTimeSeries(self):
575
+ """
576
+ Normalize the time series, weight them based on the weight dict and
577
+ puts them into the correct matrix format.
578
+ """
579
+ # first sort the time series in order to avoid bug mention in #18
580
+ self.timeSeries.sort_index(axis=1, inplace=True)
581
+
582
+ # convert the dataframe to floats
583
+ self.timeSeries = self.timeSeries.astype(float)
584
+
585
+ # normalize the time series and group them to periodly profiles
586
+ self.normalizedTimeSeries = self._normalizeTimeSeries(sameMean=self.sameMean)
587
+
588
+ for column in self.weightDict:
589
+ if self.weightDict[column] < MIN_WEIGHT:
590
+ print(
591
+ 'weight of "'
592
+ + str(column)
593
+ + '" set to the minmal tolerable weighting'
594
+ )
595
+ self.weightDict[column] = MIN_WEIGHT
596
+ self.normalizedTimeSeries[column] = (
597
+ self.normalizedTimeSeries[column] * self.weightDict[column]
598
+ )
599
+
600
+ self.normalizedPeriodlyProfiles, self.timeIndex = unstackToPeriods(
601
+ self.normalizedTimeSeries, self.timeStepsPerPeriod
602
+ )
603
+
604
+ # check if no NaN is in the resulting profiles
605
+ if self.normalizedPeriodlyProfiles.isnull().values.any():
606
+ raise ValueError(
607
+ "Pre processed data includes NaN. Please check the timeSeries input data."
608
+ )
609
+
610
+ def _postProcessTimeSeries(self, normalizedTimeSeries, applyWeighting=True):
611
+ """
612
+ Neutralizes the weighting the time series back and unnormalizes them.
613
+ """
614
+ if applyWeighting:
615
+ for column in self.weightDict:
616
+ normalizedTimeSeries[column] = (
617
+ normalizedTimeSeries[column] / self.weightDict[column]
618
+ )
619
+
620
+ unnormalizedTimeSeries = self._unnormalizeTimeSeries(
621
+ normalizedTimeSeries, sameMean=self.sameMean
622
+ )
623
+
624
+ if self.roundOutput is not None:
625
+ unnormalizedTimeSeries = unnormalizedTimeSeries.round(
626
+ decimals=self.roundOutput
627
+ )
628
+
629
+ return unnormalizedTimeSeries
630
+
631
+ def _addExtremePeriods(
632
+ self,
633
+ groupedSeries,
634
+ clusterCenters,
635
+ clusterOrder,
636
+ extremePeriodMethod="new_cluster_center",
637
+ addPeakMin=None,
638
+ addPeakMax=None,
639
+ addMeanMin=None,
640
+ addMeanMax=None,
641
+ ):
642
+ """
643
+ Adds different extreme periods based on the to the clustered data,
644
+ decribed by the clusterCenters and clusterOrder.
645
+
646
+ :param groupedSeries: periodly grouped groupedSeries on which basis it should be decided,
647
+ which period is an extreme period. required
648
+ :type groupedSeries: pandas.DataFrame()
649
+
650
+ :param clusterCenters: Output from clustering with sklearn. required
651
+ :type clusterCenters: dict
652
+
653
+ :param clusterOrder: Output from clsutering with sklearn. required
654
+ :type clusterOrder: dict
655
+
656
+ :param extremePeriodMethod: Chosen extremePeriodMethod. The method. optional(default: 'new_cluster_center' )
657
+ :type extremePeriodMethod: string
658
+
659
+ :returns: - **newClusterCenters** -- The new cluster centers extended with the extreme periods.
660
+ - **newClusterOrder** -- The new cluster order including the extreme periods.
661
+ - **extremeClusterIdx** -- A list of indices where in the newClusterCenters are the extreme
662
+ periods located.
663
+ """
664
+
665
+ # init required dicts and lists
666
+ self.extremePeriods = {}
667
+ extremePeriodNo = []
668
+
669
+ ccList = [center.tolist() for center in clusterCenters]
670
+
671
+ # check which extreme periods exist in the profile and add them to
672
+ # self.extremePeriods dict
673
+ for column in self.timeSeries.columns:
674
+
675
+ if column in addPeakMax:
676
+ stepNo = groupedSeries[column].max(axis=1).idxmax()
677
+ # add only if stepNo is not already in extremePeriods
678
+ # if it is not already a cluster center
679
+ if (
680
+ stepNo not in extremePeriodNo
681
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
682
+ ):
683
+ max_col = self._append_col_with(column, " max.")
684
+ self.extremePeriods[max_col] = {
685
+ "stepNo": stepNo,
686
+ "profile": groupedSeries.loc[stepNo, :].values,
687
+ "column": column,
688
+ }
689
+ extremePeriodNo.append(stepNo)
690
+
691
+ if column in addPeakMin:
692
+ stepNo = groupedSeries[column].min(axis=1).idxmin()
693
+ # add only if stepNo is not already in extremePeriods
694
+ # if it is not already a cluster center
695
+ if (
696
+ stepNo not in extremePeriodNo
697
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
698
+ ):
699
+ min_col = self._append_col_with(column, " min.")
700
+ self.extremePeriods[min_col] = {
701
+ "stepNo": stepNo,
702
+ "profile": groupedSeries.loc[stepNo, :].values,
703
+ "column": column,
704
+ }
705
+ extremePeriodNo.append(stepNo)
706
+
707
+ if column in addMeanMax:
708
+ stepNo = groupedSeries[column].mean(axis=1).idxmax()
709
+ # add only if stepNo is not already in extremePeriods
710
+ # if it is not already a cluster center
711
+ if (
712
+ stepNo not in extremePeriodNo
713
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
714
+ ):
715
+ mean_max_col = self._append_col_with(column, " daily max.")
716
+ self.extremePeriods[mean_max_col] = {
717
+ "stepNo": stepNo,
718
+ "profile": groupedSeries.loc[stepNo, :].values,
719
+ "column": column,
720
+ }
721
+ extremePeriodNo.append(stepNo)
722
+
723
+ if column in addMeanMin:
724
+ stepNo = groupedSeries[column].mean(axis=1).idxmin()
725
+ # add only if stepNo is not already in extremePeriods and
726
+ # if it is not already a cluster center
727
+ if (
728
+ stepNo not in extremePeriodNo
729
+ and groupedSeries.loc[stepNo, :].values.tolist() not in ccList
730
+ ):
731
+ mean_min_col = self._append_col_with(column, " daily min.")
732
+ self.extremePeriods[mean_min_col] = {
733
+ "stepNo": stepNo,
734
+ "profile": groupedSeries.loc[stepNo, :].values,
735
+ "column": column,
736
+ }
737
+ extremePeriodNo.append(stepNo)
738
+
739
+ for periodType in self.extremePeriods:
740
+ # get current related clusters of extreme periods
741
+ self.extremePeriods[periodType]["clusterNo"] = clusterOrder[
742
+ self.extremePeriods[periodType]["stepNo"]
743
+ ]
744
+
745
+ # init new cluster structure
746
+ newClusterCenters = []
747
+ newClusterOrder = clusterOrder
748
+ extremeClusterIdx = []
749
+
750
+ # integrate extreme periods to clusters
751
+ if extremePeriodMethod == "append":
752
+ # attach extreme periods to cluster centers
753
+ for i, cluster_center in enumerate(clusterCenters):
754
+ newClusterCenters.append(cluster_center)
755
+ for i, periodType in enumerate(self.extremePeriods):
756
+ extremeClusterIdx.append(len(newClusterCenters))
757
+ newClusterCenters.append(self.extremePeriods[periodType]["profile"])
758
+ newClusterOrder[self.extremePeriods[periodType]["stepNo"]] = i + len(
759
+ clusterCenters
760
+ )
761
+
762
+ elif extremePeriodMethod == "new_cluster_center":
763
+ for i, cluster_center in enumerate(clusterCenters):
764
+ newClusterCenters.append(cluster_center)
765
+ # attach extrem periods to cluster centers and consider for all periods
766
+ # if the fit better to the cluster or the extrem period
767
+ for i, periodType in enumerate(self.extremePeriods):
768
+ extremeClusterIdx.append(len(newClusterCenters))
769
+ newClusterCenters.append(self.extremePeriods[periodType]["profile"])
770
+ self.extremePeriods[periodType]["newClusterNo"] = i + len(
771
+ clusterCenters
772
+ )
773
+
774
+ for i, cPeriod in enumerate(newClusterOrder):
775
+ # caclulate euclidean distance to cluster center
776
+ cluster_dist = sum(
777
+ (groupedSeries.iloc[i].values - clusterCenters[cPeriod]) ** 2
778
+ )
779
+ for ii, extremPeriodType in enumerate(self.extremePeriods):
780
+ # exclude other extreme periods from adding to the new
781
+ # cluster center
782
+ isOtherExtreme = False
783
+ for otherExPeriod in self.extremePeriods:
784
+ if (
785
+ i == self.extremePeriods[otherExPeriod]["stepNo"]
786
+ and otherExPeriod != extremPeriodType
787
+ ):
788
+ isOtherExtreme = True
789
+ # calculate distance to extreme periods
790
+ extperiod_dist = sum(
791
+ (
792
+ groupedSeries.iloc[i].values
793
+ - self.extremePeriods[extremPeriodType]["profile"]
794
+ )
795
+ ** 2
796
+ )
797
+ # choose new cluster relation
798
+ if extperiod_dist < cluster_dist and not isOtherExtreme:
799
+ newClusterOrder[i] = self.extremePeriods[extremPeriodType][
800
+ "newClusterNo"
801
+ ]
802
+
803
+ elif extremePeriodMethod == "replace_cluster_center":
804
+ # Worst Case Clusterperiods
805
+ newClusterCenters = clusterCenters
806
+ for periodType in self.extremePeriods:
807
+ index = groupedSeries.columns.get_loc(
808
+ self.extremePeriods[periodType]["column"]
809
+ )
810
+ newClusterCenters[self.extremePeriods[periodType]["clusterNo"]][
811
+ index
812
+ ] = self.extremePeriods[periodType]["profile"][index]
813
+ if (
814
+ not self.extremePeriods[periodType]["clusterNo"]
815
+ in extremeClusterIdx
816
+ ):
817
+ extremeClusterIdx.append(
818
+ self.extremePeriods[periodType]["clusterNo"]
819
+ )
820
+
821
+ return newClusterCenters, newClusterOrder, extremeClusterIdx
822
+
823
+ def _append_col_with(self, column, append_with=" max."):
824
+ """Appends a string to the column name. For MultiIndexes, which turn out to be
825
+ tuples when this method is called, only last level is changed"""
826
+ if isinstance(column, str):
827
+ return column + append_with
828
+ elif isinstance(column, tuple):
829
+ col = list(column)
830
+ col[-1] = col[-1] + append_with
831
+ return tuple(col)
832
+
833
+ def _rescaleClusterPeriods(self, clusterOrder, clusterPeriods, extremeClusterIdx):
834
+ """
835
+ Rescale the values of the clustered Periods such that mean of each time
836
+ series in the typical Periods fits the mean value of the original time
837
+ series, without changing the values of the extremePeriods.
838
+ """
839
+ weightingVec = pd.Series(self._clusterPeriodNoOccur).values
840
+ typicalPeriods = pd.concat([
841
+ pd.Series(s, index=self.normalizedPeriodlyProfiles.columns)
842
+ for s in self.clusterPeriods
843
+ ], axis=1).T
844
+ idx_wo_peak = np.delete(typicalPeriods.index, extremeClusterIdx)
845
+ for column in self.timeSeries.columns:
846
+ diff = 1
847
+ sum_raw = self.normalizedPeriodlyProfiles[column].sum().sum()
848
+ sum_peak = np.sum(
849
+ weightingVec[extremeClusterIdx]
850
+ * typicalPeriods[column].loc[extremeClusterIdx, :].sum(axis=1)
851
+ )
852
+ sum_clu_wo_peak = np.sum(
853
+ weightingVec[idx_wo_peak]
854
+ * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
855
+ )
856
+
857
+ # define the upper scale dependent on the weighting of the series
858
+ scale_ub = 1.0
859
+ if self.sameMean:
860
+ scale_ub = (
861
+ scale_ub
862
+ * self.timeSeries[column].max()
863
+ / self.timeSeries[column].mean()
864
+ )
865
+ if column in self.weightDict:
866
+ scale_ub = scale_ub * self.weightDict[column]
867
+
868
+ # difference between predicted and original sum
869
+ diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
870
+
871
+ # use while loop to rescale cluster periods
872
+ a = 0
873
+ while diff > sum_raw * TOLERANCE and a < MAX_ITERATOR:
874
+ # rescale values
875
+ typicalPeriods.loc[idx_wo_peak, column] = (
876
+ typicalPeriods[column].loc[idx_wo_peak, :].values
877
+ * (sum_raw - sum_peak)
878
+ / sum_clu_wo_peak
879
+ )
880
+
881
+ # reset values higher than the upper sacle or less than zero
882
+ typicalPeriods[column] = typicalPeriods[column].clip(lower=0, upper=scale_ub)
883
+
884
+ typicalPeriods[column] = typicalPeriods[column].fillna(0.0)
885
+
886
+ # calc new sum and new diff to orig data
887
+ sum_clu_wo_peak = np.sum(
888
+ weightingVec[idx_wo_peak]
889
+ * typicalPeriods[column].loc[idx_wo_peak, :].sum(axis=1)
890
+ )
891
+ diff = abs(sum_raw - (sum_clu_wo_peak + sum_peak))
892
+ a += 1
893
+ if a == MAX_ITERATOR:
894
+ deviation = str(round((diff / sum_raw) * 100, 2))
895
+ warnings.warn(
896
+ 'Max iteration number reached for "'
897
+ + str(column)
898
+ + '" while rescaling the cluster periods.'
899
+ + " The integral of the aggregated time series deviates by: "
900
+ + deviation
901
+ + "%"
902
+ )
903
+ return typicalPeriods.values
904
+
905
+ def _clusterSortedPeriods(self, candidates, n_init=20):
906
+ """
907
+ Runs the clustering algorithms for the sorted profiles within the period
908
+ instead of the original profiles. (Duration curve clustering)
909
+ """
910
+ # initialize
911
+ normalizedSortedPeriodlyProfiles = copy.deepcopy(
912
+ self.normalizedPeriodlyProfiles
913
+ )
914
+ for column in self.timeSeries.columns:
915
+ # sort each period individually
916
+ df = normalizedSortedPeriodlyProfiles[column]
917
+ values = df.values
918
+ values.sort(axis=1)
919
+ values = values[:, ::-1]
920
+ normalizedSortedPeriodlyProfiles[column] = pd.DataFrame(
921
+ values, df.index, df.columns
922
+ )
923
+ sortedClusterValues = normalizedSortedPeriodlyProfiles.values
924
+
925
+ (
926
+ altClusterCenters,
927
+ self.clusterCenterIndices,
928
+ clusterOrders_C,
929
+ ) = aggregatePeriods(
930
+ sortedClusterValues,
931
+ n_clusters=self.noTypicalPeriods,
932
+ n_iter=30,
933
+ solver=self.solver,
934
+ clusterMethod=self.clusterMethod,
935
+ representationMethod=self.representationMethod,
936
+ representationDict=self.representationDict,
937
+ distributionPeriodWise=self.distributionPeriodWise,
938
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
939
+ )
940
+
941
+ clusterCenters_C = []
942
+
943
+ # take the clusters and determine the most representative sorted
944
+ # period as cluster center
945
+ for clusterNum in np.unique(clusterOrders_C):
946
+ indice = np.where(clusterOrders_C == clusterNum)[0]
947
+ if len(indice) > 1:
948
+ # mean value for each time step for each time series over
949
+ # all Periods in the cluster
950
+ currentMean_C = sortedClusterValues[indice].mean(axis=0)
951
+ # index of the period with the lowest distance to the cluster
952
+ # center
953
+ mindistIdx_C = np.argmin(
954
+ np.square(sortedClusterValues[indice] - currentMean_C).sum(axis=1)
955
+ )
956
+ # append original time series of this period
957
+ medoid_C = candidates[indice][mindistIdx_C]
958
+
959
+ # append to cluster center
960
+ clusterCenters_C.append(medoid_C)
961
+
962
+ else:
963
+ # if only on period is part of the cluster, add this index
964
+ clusterCenters_C.append(candidates[indice][0])
965
+
966
+ return clusterCenters_C, clusterOrders_C
967
+
968
+ def createTypicalPeriods(self):
969
+ """
970
+ Clusters the Periods.
971
+
972
+ :returns: **self.typicalPeriods** -- All typical Periods in scaled form.
973
+ """
974
+ self._preProcessTimeSeries()
975
+
976
+ # check for additional cluster parameters
977
+ if self.evalSumPeriods:
978
+ evaluationValues = (
979
+ self.normalizedPeriodlyProfiles.stack(future_stack=True,level=0)
980
+ .sum(axis=1)
981
+ .unstack(level=1)
982
+ )
983
+ # how many values have to get deleted later
984
+ delClusterParams = -len(evaluationValues.columns)
985
+ candidates = np.concatenate(
986
+ (self.normalizedPeriodlyProfiles.values, evaluationValues.values),
987
+ axis=1,
988
+ )
989
+ else:
990
+ delClusterParams = None
991
+ candidates = self.normalizedPeriodlyProfiles.values
992
+
993
+ # skip aggregation procedure for the case of a predefined cluster sequence and get only the correct representation
994
+ if not self.predefClusterOrder is None:
995
+ self._clusterOrder = self.predefClusterOrder
996
+ # check if representatives are defined
997
+ if not self.predefClusterCenterIndices is None:
998
+ self.clusterCenterIndices = self.predefClusterCenterIndices
999
+ self.clusterCenters = candidates[self.predefClusterCenterIndices]
1000
+ else:
1001
+ # otherwise take the medoids
1002
+ self.clusterCenters, self.clusterCenterIndices = representations(
1003
+ candidates,
1004
+ self._clusterOrder,
1005
+ default="medoidRepresentation",
1006
+ representationMethod=self.representationMethod,
1007
+ representationDict=self.representationDict,
1008
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
1009
+ )
1010
+ else:
1011
+ cluster_duration = time.time()
1012
+ if not self.sortValues:
1013
+ # cluster the data
1014
+ (
1015
+ self.clusterCenters,
1016
+ self.clusterCenterIndices,
1017
+ self._clusterOrder,
1018
+ ) = aggregatePeriods(
1019
+ candidates,
1020
+ n_clusters=self.noTypicalPeriods,
1021
+ n_iter=100,
1022
+ solver=self.solver,
1023
+ clusterMethod=self.clusterMethod,
1024
+ representationMethod=self.representationMethod,
1025
+ representationDict=self.representationDict,
1026
+ distributionPeriodWise=self.distributionPeriodWise,
1027
+ timeStepsPerPeriod=self.timeStepsPerPeriod,
1028
+ )
1029
+ else:
1030
+ self.clusterCenters, self._clusterOrder = self._clusterSortedPeriods(
1031
+ candidates
1032
+ )
1033
+ self.clusteringDuration = time.time() - cluster_duration
1034
+
1035
+ # get cluster centers without additional evaluation values
1036
+ self.clusterPeriods = []
1037
+ for i, cluster_center in enumerate(self.clusterCenters):
1038
+ self.clusterPeriods.append(cluster_center[:delClusterParams])
1039
+
1040
+ if not self.extremePeriodMethod == "None":
1041
+ # overwrite clusterPeriods and clusterOrder
1042
+ (
1043
+ self.clusterPeriods,
1044
+ self._clusterOrder,
1045
+ self.extremeClusterIdx,
1046
+ ) = self._addExtremePeriods(
1047
+ self.normalizedPeriodlyProfiles,
1048
+ self.clusterPeriods,
1049
+ self._clusterOrder,
1050
+ extremePeriodMethod=self.extremePeriodMethod,
1051
+ addPeakMin=self.addPeakMin,
1052
+ addPeakMax=self.addPeakMax,
1053
+ addMeanMin=self.addMeanMin,
1054
+ addMeanMax=self.addMeanMax,
1055
+ )
1056
+ else:
1057
+ self.extremeClusterIdx = []
1058
+
1059
+ # get number of appearance of the the typical periods
1060
+ nums, counts = np.unique(self._clusterOrder, return_counts=True)
1061
+ self._clusterPeriodNoOccur = {num: counts[ii] for ii, num in enumerate(nums)}
1062
+
1063
+ if self.rescaleClusterPeriods:
1064
+ self.clusterPeriods = self._rescaleClusterPeriods(
1065
+ self._clusterOrder, self.clusterPeriods, self.extremeClusterIdx
1066
+ )
1067
+
1068
+ # if additional time steps have been added, reduce the number of occurrence of the typical period
1069
+ # which is related to these time steps
1070
+ if not len(self.timeSeries) % self.timeStepsPerPeriod == 0:
1071
+ self._clusterPeriodNoOccur[self._clusterOrder[-1]] -= (
1072
+ 1
1073
+ - float(len(self.timeSeries) % self.timeStepsPerPeriod)
1074
+ / self.timeStepsPerPeriod
1075
+ )
1076
+
1077
+ # put the clustered data in pandas format and scale back
1078
+ self.normalizedTypicalPeriods = pd.concat([
1079
+ pd.Series(s, index=self.normalizedPeriodlyProfiles.columns)
1080
+ for s in self.clusterPeriods
1081
+ ], axis=1).unstack("TimeStep").T
1082
+
1083
+ if self.segmentation:
1084
+ from tsam.utils.segmentation import segmentation
1085
+
1086
+ (
1087
+ self.segmentedNormalizedTypicalPeriods,
1088
+ self.predictedSegmentedNormalizedTypicalPeriods,
1089
+ ) = segmentation(
1090
+ self.normalizedTypicalPeriods,
1091
+ self.noSegments,
1092
+ self.timeStepsPerPeriod,
1093
+ representationMethod=self.segmentRepresentationMethod,
1094
+ representationDict=self.representationDict,
1095
+ distributionPeriodWise=self.distributionPeriodWise,
1096
+ )
1097
+ self.normalizedTypicalPeriods = (
1098
+ self.segmentedNormalizedTypicalPeriods.reset_index(level=3, drop=True)
1099
+ )
1100
+
1101
+ self.typicalPeriods = self._postProcessTimeSeries(self.normalizedTypicalPeriods)
1102
+
1103
+ # check if original time series boundaries are not exceeded
1104
+ if np.array(
1105
+ self.typicalPeriods.max(axis=0) > self.timeSeries.max(axis=0)
1106
+ ).any():
1107
+ warning_list = self.typicalPeriods.max(axis=0) > self.timeSeries.max(axis=0)
1108
+ diff = self.typicalPeriods.max(axis=0) - self.timeSeries.max(axis=0)
1109
+ if abs(diff).max() > self.numericalTolerance:
1110
+ warnings.warn(
1111
+ "At least one maximal value of the " +
1112
+ "aggregated time series exceeds the maximal value " +
1113
+ "the input time series for: " +
1114
+ "{}".format(diff[warning_list[warning_list>0].index].to_dict()) +
1115
+ ". To silence the warning set the 'numericalTolerance' to a higher value."
1116
+ )
1117
+ if np.array(
1118
+ self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1119
+ ).any():
1120
+ warning_list = self.typicalPeriods.min(axis=0) < self.timeSeries.min(axis=0)
1121
+ diff = self.typicalPeriods.min(axis=0) - self.timeSeries.min(axis=0)
1122
+ if abs(diff).max() > self.numericalTolerance:
1123
+ warnings.warn(
1124
+ "Something went wrong... At least one minimal value of the " +
1125
+ "aggregated time series exceeds the minimal value " +
1126
+ "the input time series for: " +
1127
+ "{}".format(diff[warning_list[warning_list>0].index].to_dict()) +
1128
+ ". To silence the warning set the 'numericalTolerance' to a higher value."
1129
+ )
1130
+ return self.typicalPeriods
1131
+
1132
+ def prepareEnersysInput(self):
1133
+ """
1134
+ Creates all dictionaries and lists which are required for the energy system
1135
+ optimization input.
1136
+ """
1137
+ warnings.warn(
1138
+ '"prepareEnersysInput" is deprecated, since the created attributes can be directly accessed as properties',
1139
+ DeprecationWarning,
1140
+ )
1141
+ return
1142
+
1143
+ @property
1144
+ def stepIdx(self):
1145
+ """
1146
+ Index inside a single cluster
1147
+ """
1148
+ if self.segmentation:
1149
+ return [ix for ix in range(0, self.noSegments)]
1150
+ else:
1151
+ return [ix for ix in range(0, self.timeStepsPerPeriod)]
1152
+
1153
+ @property
1154
+ def clusterPeriodIdx(self):
1155
+ """
1156
+ Index of the clustered periods
1157
+ """
1158
+ if not hasattr(self, "clusterOrder"):
1159
+ self.createTypicalPeriods()
1160
+ return np.sort(np.unique(self._clusterOrder))
1161
+
1162
+ @property
1163
+ def clusterOrder(self):
1164
+ """
1165
+ The sequence/order of the typical period to represent
1166
+ the original time series
1167
+ """
1168
+ if not hasattr(self, "_clusterOrder"):
1169
+ self.createTypicalPeriods()
1170
+ return self._clusterOrder
1171
+
1172
+ @property
1173
+ def clusterPeriodNoOccur(self):
1174
+ """
1175
+ How often does a typical period occur in the original time series
1176
+ """
1177
+ if not hasattr(self, "clusterOrder"):
1178
+ self.createTypicalPeriods()
1179
+ return self._clusterPeriodNoOccur
1180
+
1181
+ @property
1182
+ def clusterPeriodDict(self):
1183
+ """
1184
+ Time series data for each period index as dictionary
1185
+ """
1186
+ if not hasattr(self, "_clusterOrder"):
1187
+ self.createTypicalPeriods()
1188
+ if not hasattr(self, "_clusterPeriodDict"):
1189
+ self._clusterPeriodDict = {}
1190
+ for column in self.typicalPeriods:
1191
+ self._clusterPeriodDict[column] = self.typicalPeriods[column].to_dict()
1192
+ return self._clusterPeriodDict
1193
+
1194
+ @property
1195
+ def segmentDurationDict(self):
1196
+ """
1197
+ Segment duration in time steps for each period index as dictionary
1198
+ """
1199
+ if not hasattr(self, "_clusterOrder"):
1200
+ self.createTypicalPeriods()
1201
+ if not hasattr(self, "_segmentDurationDict"):
1202
+ if self.segmentation:
1203
+ self._segmentDurationDict = (
1204
+ self.segmentedNormalizedTypicalPeriods.drop(
1205
+ self.segmentedNormalizedTypicalPeriods.columns, axis=1
1206
+ )
1207
+ .reset_index(level=3, drop=True)
1208
+ .reset_index(2)
1209
+ .to_dict()
1210
+ )
1211
+ else:
1212
+ self._segmentDurationDict = self.typicalPeriods.drop(
1213
+ self.typicalPeriods.columns, axis=1
1214
+ )
1215
+ self._segmentDurationDict["Segment Duration"] = 1
1216
+ self._segmentDurationDict = self._segmentDurationDict.to_dict()
1217
+ warnings.warn(
1218
+ "Segmentation is turned off. All segments are consistent the time steps."
1219
+ )
1220
+ return self._segmentDurationDict
1221
+
1222
+ def predictOriginalData(self):
1223
+ """
1224
+ Predicts the overall time series if every period would be placed in the
1225
+ related cluster center
1226
+
1227
+ :returns: **predictedData** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1228
+ """
1229
+ if not hasattr(self, "_clusterOrder"):
1230
+ self.createTypicalPeriods()
1231
+
1232
+ # list up typical periods according to their order of occurrence using the _clusterOrder.
1233
+ new_data = []
1234
+ for label in self._clusterOrder:
1235
+ # if segmentation is used, use the segmented typical periods with predicted time steps with the same number
1236
+ # of time steps as unsegmented typical periods
1237
+ if self.segmentation:
1238
+ new_data.append(
1239
+ self.predictedSegmentedNormalizedTypicalPeriods.loc[label, :]
1240
+ .unstack()
1241
+ .values
1242
+ )
1243
+ else:
1244
+ # new_data.append(self.clusterPeriods[label])
1245
+ new_data.append(
1246
+ self.normalizedTypicalPeriods.loc[label, :].unstack().values
1247
+ )
1248
+
1249
+ # back in matrix
1250
+ clustered_data_df = pd.DataFrame(
1251
+ new_data,
1252
+ columns=self.normalizedPeriodlyProfiles.columns,
1253
+ index=self.normalizedPeriodlyProfiles.index,
1254
+ )
1255
+ clustered_data_df = clustered_data_df.stack(future_stack=True,level="TimeStep")
1256
+
1257
+ # back in form
1258
+ self.normalizedPredictedData = pd.DataFrame(
1259
+ clustered_data_df.values[: len(self.timeSeries)],
1260
+ index=self.timeSeries.index,
1261
+ columns=self.timeSeries.columns,
1262
+ )
1263
+ # normalize again if sameMean = True to avoid doubled unnormalization when using _postProcessTimeSeries after
1264
+ # createTypicalPeriods has been called
1265
+ if self.sameMean:
1266
+ self.normalizedPredictedData /= self._normalizedMean
1267
+ self.predictedData = self._postProcessTimeSeries(
1268
+ self.normalizedPredictedData, applyWeighting=False
1269
+ )
1270
+
1271
+ return self.predictedData
1272
+
1273
+ def indexMatching(self):
1274
+ """
1275
+ Relates the index of the original time series with the indices
1276
+ represented by the clusters
1277
+
1278
+ :returns: **timeStepMatching** (pandas.DataFrame) -- DataFrame which has the same shape as the original one.
1279
+ """
1280
+ if not hasattr(self, "_clusterOrder"):
1281
+ self.createTypicalPeriods()
1282
+
1283
+ # create aggregated period and time step index lists
1284
+ periodIndex = []
1285
+ stepIndex = []
1286
+ for label in self._clusterOrder:
1287
+ for step in range(self.timeStepsPerPeriod):
1288
+ periodIndex.append(label)
1289
+ stepIndex.append(step)
1290
+
1291
+ # create a dataframe
1292
+ timeStepMatching = pd.DataFrame(
1293
+ [periodIndex, stepIndex],
1294
+ index=["PeriodNum", "TimeStep"],
1295
+ columns=self.timeIndex,
1296
+ ).T
1297
+
1298
+ # if segmentation is chosen, append another column stating which
1299
+ if self.segmentation:
1300
+ segmentIndex = []
1301
+ for label in self._clusterOrder:
1302
+ segmentIndex.extend(
1303
+ np.repeat(
1304
+ self.segmentedNormalizedTypicalPeriods.loc[
1305
+ label, :
1306
+ ].index.get_level_values(0),
1307
+ self.segmentedNormalizedTypicalPeriods.loc[
1308
+ label, :
1309
+ ].index.get_level_values(1),
1310
+ ).values
1311
+ )
1312
+ timeStepMatching = pd.DataFrame(
1313
+ [periodIndex, stepIndex, segmentIndex],
1314
+ index=["PeriodNum", "TimeStep", "SegmentIndex"],
1315
+ columns=self.timeIndex,
1316
+ ).T
1317
+
1318
+ return timeStepMatching
1319
+
1320
+ def accuracyIndicators(self):
1321
+ """
1322
+ Compares the predicted data with the original time series.
1323
+
1324
+ :returns: **pd.DataFrame(indicatorRaw)** (pandas.DataFrame) -- Dataframe containing indicators evaluating the
1325
+ accuracy of the
1326
+ aggregation
1327
+ """
1328
+ if not hasattr(self, "predictedData"):
1329
+ self.predictOriginalData()
1330
+
1331
+ indicatorRaw = {
1332
+ "RMSE": {},
1333
+ "RMSE_duration": {},
1334
+ "MAE": {},
1335
+ } # 'Silhouette score':{},
1336
+
1337
+ for column in self.normalizedTimeSeries.columns:
1338
+ if self.weightDict:
1339
+ origTS = self.normalizedTimeSeries[column] / self.weightDict[column]
1340
+ else:
1341
+ origTS = self.normalizedTimeSeries[column]
1342
+ predTS = self.normalizedPredictedData[column]
1343
+ indicatorRaw["RMSE"][column] = np.sqrt(mean_squared_error(origTS, predTS))
1344
+ indicatorRaw["RMSE_duration"][column] = np.sqrt(
1345
+ mean_squared_error(
1346
+ origTS.sort_values(ascending=False).reset_index(drop=True),
1347
+ predTS.sort_values(ascending=False).reset_index(drop=True),
1348
+ )
1349
+ )
1350
+ indicatorRaw["MAE"][column] = mean_absolute_error(origTS, predTS)
1351
+
1352
+ return pd.DataFrame(indicatorRaw)
1353
+
1354
+ def totalAccuracyIndicators(self):
1355
+ """
1356
+ Derives the accuracy indicators over all time series
1357
+ """
1358
+ return np.sqrt(self.accuracyIndicators().pow(2).sum()/len(self.normalizedTimeSeries.columns))