tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,118 +1,232 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- import numpy as np
4
- import pandas as pd
5
- from sklearn.cluster import AgglomerativeClustering
6
- from tsam.representations import representations
7
-
8
-
9
- def segmentation(
10
- normalizedTypicalPeriods,
11
- noSegments,
12
- timeStepsPerPeriod,
13
- representationMethod=None,
14
- representationDict=None,
15
- distributionPeriodWise=True,
16
- ):
17
- """
18
- Agglomerative clustering of adjacent time steps within a set of typical periods in order to further reduce the
19
- temporal resolution within typical periods and to further reduce complexity of input data.
20
-
21
- :param normalizedTypicalPeriods: MultiIndex DataFrame containing the typical periods as first index, the time steps
22
- within the periods as second index and the attributes as columns.
23
- :type normalizedTypicalPeriods: pandas DataFrame
24
-
25
- :param noSegments: Number of segments in which the typical periods should be subdivided - equivalent to the number of
26
- inner-period clusters.
27
- :type noSegments: integer
28
-
29
- :param timeStepsPerPeriod: Number of time steps per period
30
- :type timeStepsPerPeriod: integer
31
-
32
- :returns: - **segmentedNormalizedTypicalPeriods** (pandas DataFrame) -- MultiIndex DataFrame similar to
33
- normalizedTypicalPeriods but with segments instead of time steps. Moreover, two additional index
34
- levels define the length of each segment and the time step index at which each segment starts.
35
- - **predictedSegmentedNormalizedTypicalPeriods** (pandas DataFrame) -- MultiIndex DataFrame with the same
36
- shape of normalizedTypicalPeriods, but with overwritten values derived from segmentation used for
37
- prediction of the original periods and accuracy indicators.
38
- """
39
- # Initialize lists for predicted and segmented DataFrame
40
- segmentedNormalizedTypicalPeriodsList = []
41
- predictedSegmentedNormalizedTypicalPeriodsList = []
42
- # do for each typical period
43
- for i in normalizedTypicalPeriods.index.get_level_values(0).unique():
44
- # make numpy array with rows containing the segmenatation candidates (time steps)
45
- # and columns as dimensions of the
46
- segmentationCandidates = np.asarray(normalizedTypicalPeriods.loc[i, :])
47
- # produce adjacency matrix: Each time step is only connected to its preceding and succeeding one
48
- adjacencyMatrix = np.eye(timeStepsPerPeriod, k=1) + np.eye(
49
- timeStepsPerPeriod, k=-1
50
- )
51
- # execute clustering of adjacent time steps
52
- if noSegments == 1:
53
- clusterOrder = np.asarray([0] * len(segmentationCandidates))
54
- else:
55
- clustering = AgglomerativeClustering(
56
- n_clusters=noSegments, linkage="ward", connectivity=adjacencyMatrix
57
- )
58
- clusterOrder = clustering.fit_predict(segmentationCandidates)
59
- # determine the indices where the segments change and the number of time steps in each segment
60
- segNo, indices, segmentNoOccur = np.unique(
61
- clusterOrder, return_index=True, return_counts=True
62
- )
63
- clusterOrderUnique = [clusterOrder[index] for index in sorted(indices)]
64
- # determine the segments' values
65
- clusterCenters, clusterCenterIndices = representations(
66
- segmentationCandidates,
67
- clusterOrder,
68
- default="meanRepresentation",
69
- representationMethod=representationMethod,
70
- representationDict=representationDict,
71
- distributionPeriodWise=distributionPeriodWise,
72
- timeStepsPerPeriod=1,
73
- )
74
- # clusterCenters = meanRepresentation(segmentationCandidates, clusterOrder)
75
- # predict each time step of the period by representing it with the corresponding segment's values
76
- predictedSegmentedNormalizedTypicalPeriods = (
77
- pd.DataFrame(clusterCenters, columns=normalizedTypicalPeriods.columns)
78
- .reindex(clusterOrder)
79
- .reset_index(drop=True)
80
- )
81
- # represent the period by the segments in the right order only instead of each time step
82
- segmentedNormalizedTypicalPeriods = (
83
- pd.DataFrame(clusterCenters, columns=normalizedTypicalPeriods.columns)
84
- .reindex(clusterOrderUnique)
85
- .set_index(np.sort(indices))
86
- )
87
- # keep additional information on the lengths of the segments in the right order
88
- segmentDuration = (
89
- pd.DataFrame(segmentNoOccur, columns=["Segment Duration"])
90
- .reindex(clusterOrderUnique)
91
- .set_index(np.sort(indices))
92
- )
93
- # create DataFrame with reduced number of segments together with three indices per period:
94
- # 1. The segment number
95
- # 2. The segment duration
96
- # 3. The index of the original time step, at which the segment starts
97
- result = segmentedNormalizedTypicalPeriods.set_index(
98
- [
99
- pd.Index(segNo, name="Segment Step"),
100
- segmentDuration["Segment Duration"],
101
- pd.Index(np.sort(indices), name="Original Start Step"),
102
- ]
103
- )
104
- # append predicted and segmented DataFrame to list to create a big DataFrame for all periods
105
- predictedSegmentedNormalizedTypicalPeriodsList.append(
106
- predictedSegmentedNormalizedTypicalPeriods
107
- )
108
- segmentedNormalizedTypicalPeriodsList.append(result)
109
- # create a big DataFrame for all periods for predicted segmented time steps and segments and return
110
- predictedSegmentedNormalizedTypicalPeriods = pd.concat(
111
- predictedSegmentedNormalizedTypicalPeriodsList,
112
- keys=normalizedTypicalPeriods.index.get_level_values(0).unique(),
113
- ).rename_axis(["", "TimeStep"])
114
- segmentedNormalizedTypicalPeriods = pd.concat(
115
- segmentedNormalizedTypicalPeriodsList,
116
- keys=normalizedTypicalPeriods.index.get_level_values(0).unique(),
117
- )
118
- return segmentedNormalizedTypicalPeriods, predictedSegmentedNormalizedTypicalPeriods
1
+ import numpy as np
2
+ import pandas as pd
3
+ from sklearn.cluster import AgglomerativeClustering
4
+
5
+ from tsam.representations import representations
6
+
7
+
8
+ def segmentation(
9
+ normalizedTypicalPeriods,
10
+ noSegments,
11
+ timeStepsPerPeriod,
12
+ representationMethod=None,
13
+ representationDict=None,
14
+ distributionPeriodWise=True,
15
+ predefSegmentOrder=None,
16
+ predefSegmentDurations=None,
17
+ predefSegmentCenters=None,
18
+ ):
19
+ """
20
+ Agglomerative clustering of adjacent time steps within a set of typical periods in order to further reduce the
21
+ temporal resolution within typical periods and to further reduce complexity of input data.
22
+
23
+ :param normalizedTypicalPeriods: MultiIndex DataFrame containing the typical periods as first index, the time steps
24
+ within the periods as second index and the attributes as columns.
25
+ :type normalizedTypicalPeriods: pandas DataFrame
26
+
27
+ :param noSegments: Number of segments in which the typical periods should be subdivided - equivalent to the number of
28
+ inner-period clusters.
29
+ :type noSegments: integer
30
+
31
+ :param timeStepsPerPeriod: Number of time steps per period
32
+ :type timeStepsPerPeriod: integer
33
+
34
+ :param predefSegmentOrder: Predefined segment assignments per timestep, per typical period.
35
+ If provided, skips clustering and uses these assignments directly.
36
+ List of lists/arrays, one per typical period.
37
+ :type predefSegmentOrder: list or None
38
+
39
+ :param predefSegmentDurations: Predefined durations per segment, per typical period.
40
+ Required if predefSegmentOrder is provided.
41
+ List of lists/arrays, one per typical period.
42
+ :type predefSegmentDurations: list or None
43
+
44
+ :param predefSegmentCenters: Predefined center indices per segment, per typical period.
45
+ If provided with predefSegmentOrder, uses these as segment centers
46
+ instead of calculating representations.
47
+ List of lists/arrays, one per typical period.
48
+ :type predefSegmentCenters: list or None
49
+
50
+ :returns: - **segmentedNormalizedTypicalPeriods** (pandas DataFrame) -- MultiIndex DataFrame similar to
51
+ normalizedTypicalPeriods but with segments instead of time steps. Moreover, two additional index
52
+ levels define the length of each segment and the time step index at which each segment starts.
53
+ - **predictedSegmentedNormalizedTypicalPeriods** (pandas DataFrame) -- MultiIndex DataFrame with the same
54
+ shape of normalizedTypicalPeriods, but with overwritten values derived from segmentation used for
55
+ prediction of the original periods and accuracy indicators.
56
+ - **segmentCenterIndicesList** (list) -- List of segment center indices per typical period.
57
+ Each entry is a list of indices indicating which timestep is the representative for each segment.
58
+ """
59
+ # Initialize lists for predicted and segmented DataFrame
60
+ segmentedNormalizedTypicalPeriodsList = []
61
+ predictedSegmentedNormalizedTypicalPeriodsList = []
62
+ segmentCenterIndicesList = []
63
+
64
+ # Get unique period indices
65
+ period_indices = normalizedTypicalPeriods.index.get_level_values(0).unique()
66
+ n_clusters = len(period_indices)
67
+
68
+ # Validate predefined segment array lengths
69
+ if predefSegmentOrder is not None:
70
+ if len(predefSegmentOrder) != n_clusters:
71
+ raise ValueError(
72
+ f"predefSegmentOrder has {len(predefSegmentOrder)} entries "
73
+ f"but data has {n_clusters} periods"
74
+ )
75
+ if (
76
+ predefSegmentDurations is not None
77
+ and len(predefSegmentDurations) != n_clusters
78
+ ):
79
+ raise ValueError(
80
+ f"predefSegmentDurations has {len(predefSegmentDurations)} entries "
81
+ f"but data has {n_clusters} periods"
82
+ )
83
+ if predefSegmentCenters is not None and len(predefSegmentCenters) != n_clusters:
84
+ raise ValueError(
85
+ f"predefSegmentCenters has {len(predefSegmentCenters)} entries "
86
+ f"but data has {n_clusters} periods"
87
+ )
88
+
89
+ # Validate segment durations sum to timesteps per period
90
+ if predefSegmentDurations is not None:
91
+ for i, durations in enumerate(predefSegmentDurations):
92
+ duration_sum = sum(durations)
93
+ if duration_sum != timeStepsPerPeriod:
94
+ raise ValueError(
95
+ f"predefSegmentDurations for period {i} sum to {duration_sum} "
96
+ f"but timeStepsPerPeriod is {timeStepsPerPeriod}"
97
+ )
98
+
99
+ # Validate segment center indices are within bounds
100
+ if predefSegmentCenters is not None:
101
+ for i, centers in enumerate(predefSegmentCenters):
102
+ for idx in centers:
103
+ if idx < 0 or idx >= timeStepsPerPeriod:
104
+ raise ValueError(
105
+ f"predefSegmentCenters index {idx} for period {i} "
106
+ f"is out of bounds [0, {timeStepsPerPeriod})"
107
+ )
108
+
109
+ # do for each typical period
110
+ for period_i, period_label in enumerate(period_indices):
111
+ # make numpy array with rows containing the segmentation candidates (time steps)
112
+ # and columns as dimensions of the
113
+ segmentationCandidates = np.asarray(
114
+ normalizedTypicalPeriods.loc[period_label, :]
115
+ )
116
+
117
+ # Check if using predefined segments for this period
118
+ if predefSegmentOrder is not None:
119
+ # Use predefined segment order
120
+ clusterOrder = np.asarray(predefSegmentOrder[period_i])
121
+
122
+ # Get predefined durations
123
+ segmentNoOccur = np.asarray(predefSegmentDurations[period_i])
124
+
125
+ # Calculate segment numbers and start indices from durations
126
+ segNo = np.arange(noSegments)
127
+ indices = np.concatenate([[0], np.cumsum(segmentNoOccur)[:-1]])
128
+
129
+ # The unique cluster order is just 0, 1, 2, ..., n_segments-1 in order
130
+ clusterOrderUnique = list(range(noSegments))
131
+
132
+ # Determine segment values
133
+ if predefSegmentCenters is not None:
134
+ # Use predefined centers directly
135
+ segmentCenterIndices = list(predefSegmentCenters[period_i])
136
+ clusterCenters = segmentationCandidates[segmentCenterIndices]
137
+ else:
138
+ # Calculate representations from predefined order
139
+ clusterCenters, segmentCenterIndices = representations(
140
+ segmentationCandidates,
141
+ clusterOrder,
142
+ default="meanRepresentation",
143
+ representationMethod=representationMethod,
144
+ representationDict=representationDict,
145
+ distributionPeriodWise=distributionPeriodWise,
146
+ timeStepsPerPeriod=1,
147
+ )
148
+ else:
149
+ # Original clustering logic
150
+ # produce adjacency matrix: Each time step is only connected to its preceding and succeeding one
151
+ adjacencyMatrix = np.eye(timeStepsPerPeriod, k=1) + np.eye(
152
+ timeStepsPerPeriod, k=-1
153
+ )
154
+ # execute clustering of adjacent time steps
155
+ if noSegments == 1:
156
+ clusterOrder = np.asarray([0] * len(segmentationCandidates))
157
+ else:
158
+ clustering = AgglomerativeClustering(
159
+ n_clusters=noSegments, linkage="ward", connectivity=adjacencyMatrix
160
+ )
161
+ clusterOrder = clustering.fit_predict(segmentationCandidates)
162
+ # determine the indices where the segments change and the number of time steps in each segment
163
+ segNo, indices, segmentNoOccur = np.unique(
164
+ clusterOrder, return_index=True, return_counts=True
165
+ )
166
+ clusterOrderUnique = [clusterOrder[index] for index in sorted(indices)]
167
+ # determine the segments' values
168
+ clusterCenters, segmentCenterIndices = representations(
169
+ segmentationCandidates,
170
+ clusterOrder,
171
+ default="meanRepresentation",
172
+ representationMethod=representationMethod,
173
+ representationDict=representationDict,
174
+ distributionPeriodWise=distributionPeriodWise,
175
+ timeStepsPerPeriod=1,
176
+ )
177
+ # Reorder segment center indices to match temporal order (clusterOrderUnique)
178
+ if segmentCenterIndices is not None:
179
+ segmentCenterIndices = [
180
+ segmentCenterIndices[c] for c in clusterOrderUnique
181
+ ]
182
+
183
+ # predict each time step of the period by representing it with the corresponding segment's values
184
+ predictedSegmentedNormalizedTypicalPeriods = (
185
+ pd.DataFrame(clusterCenters, columns=normalizedTypicalPeriods.columns)
186
+ .reindex(clusterOrder)
187
+ .reset_index(drop=True)
188
+ )
189
+ # represent the period by the segments in the right order only instead of each time step
190
+ segmentedNormalizedTypicalPeriods = (
191
+ pd.DataFrame(clusterCenters, columns=normalizedTypicalPeriods.columns)
192
+ .reindex(clusterOrderUnique)
193
+ .set_index(np.sort(indices))
194
+ )
195
+ # keep additional information on the lengths of the segments in the right order
196
+ segmentDuration = (
197
+ pd.DataFrame(segmentNoOccur, columns=["Segment Duration"])
198
+ .reindex(clusterOrderUnique)
199
+ .set_index(np.sort(indices))
200
+ )
201
+ # create DataFrame with reduced number of segments together with three indices per period:
202
+ # 1. The segment number
203
+ # 2. The segment duration
204
+ # 3. The index of the original time step, at which the segment starts
205
+ result = segmentedNormalizedTypicalPeriods.set_index(
206
+ [
207
+ pd.Index(segNo, name="Segment Step"),
208
+ segmentDuration["Segment Duration"],
209
+ pd.Index(np.sort(indices), name="Original Start Step"),
210
+ ]
211
+ )
212
+ # append predicted and segmented DataFrame to list to create a big DataFrame for all periods
213
+ predictedSegmentedNormalizedTypicalPeriodsList.append(
214
+ predictedSegmentedNormalizedTypicalPeriods
215
+ )
216
+ segmentedNormalizedTypicalPeriodsList.append(result)
217
+ segmentCenterIndicesList.append(segmentCenterIndices)
218
+
219
+ # create a big DataFrame for all periods for predicted segmented time steps and segments and return
220
+ predictedSegmentedNormalizedTypicalPeriods = pd.concat(
221
+ predictedSegmentedNormalizedTypicalPeriodsList,
222
+ keys=period_indices,
223
+ ).rename_axis(["", "TimeStep"])
224
+ segmentedNormalizedTypicalPeriods = pd.concat(
225
+ segmentedNormalizedTypicalPeriodsList,
226
+ keys=period_indices,
227
+ )
228
+ return (
229
+ segmentedNormalizedTypicalPeriods,
230
+ predictedSegmentedNormalizedTypicalPeriods,
231
+ segmentCenterIndicesList,
232
+ )