tsam 2.3.9__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsam/__init__.py +79 -0
- tsam/api.py +602 -0
- tsam/config.py +852 -0
- tsam/exceptions.py +17 -0
- tsam/hyperparametertuning.py +289 -245
- tsam/periodAggregation.py +140 -141
- tsam/plot.py +513 -0
- tsam/py.typed +0 -0
- tsam/representations.py +177 -167
- tsam/result.py +397 -0
- tsam/timeseriesaggregation.py +1446 -1361
- tsam/tuning.py +1038 -0
- tsam/utils/durationRepresentation.py +229 -223
- tsam/utils/k_maxoids.py +138 -145
- tsam/utils/k_medoids_contiguity.py +139 -140
- tsam/utils/k_medoids_exact.py +232 -239
- tsam/utils/segmentation.py +232 -118
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/METADATA +124 -81
- tsam-3.0.0.dist-info/RECORD +23 -0
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/WHEEL +1 -1
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/licenses/LICENSE.txt +21 -21
- tsam-2.3.9.dist-info/RECORD +0 -16
- {tsam-2.3.9.dist-info → tsam-3.0.0.dist-info}/top_level.txt +0 -0
tsam/representations.py
CHANGED
|
@@ -1,167 +1,177 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
from
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
"""
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
"""
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
:
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
:
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
for
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
]
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
1
|
+
import numpy as np
|
|
2
|
+
from sklearn.metrics.pairwise import euclidean_distances
|
|
3
|
+
|
|
4
|
+
from tsam.utils.durationRepresentation import durationRepresentation
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def representations(
|
|
8
|
+
candidates,
|
|
9
|
+
clusterOrder,
|
|
10
|
+
default,
|
|
11
|
+
representationMethod=None,
|
|
12
|
+
representationDict=None,
|
|
13
|
+
distributionPeriodWise=True,
|
|
14
|
+
timeStepsPerPeriod=None,
|
|
15
|
+
):
|
|
16
|
+
clusterCenterIndices = None
|
|
17
|
+
if representationMethod is None:
|
|
18
|
+
representationMethod = default
|
|
19
|
+
if representationMethod == "meanRepresentation":
|
|
20
|
+
clusterCenters = meanRepresentation(candidates, clusterOrder)
|
|
21
|
+
elif representationMethod == "medoidRepresentation":
|
|
22
|
+
clusterCenters, clusterCenterIndices = medoidRepresentation(
|
|
23
|
+
candidates, clusterOrder
|
|
24
|
+
)
|
|
25
|
+
elif representationMethod == "maxoidRepresentation":
|
|
26
|
+
clusterCenters, clusterCenterIndices = maxoidRepresentation(
|
|
27
|
+
candidates, clusterOrder
|
|
28
|
+
)
|
|
29
|
+
elif representationMethod == "minmaxmeanRepresentation":
|
|
30
|
+
clusterCenters = minmaxmeanRepresentation(
|
|
31
|
+
candidates, clusterOrder, representationDict, timeStepsPerPeriod
|
|
32
|
+
)
|
|
33
|
+
elif (
|
|
34
|
+
representationMethod == "durationRepresentation"
|
|
35
|
+
or representationMethod == "distributionRepresentation"
|
|
36
|
+
):
|
|
37
|
+
clusterCenters = durationRepresentation(
|
|
38
|
+
candidates,
|
|
39
|
+
clusterOrder,
|
|
40
|
+
distributionPeriodWise,
|
|
41
|
+
timeStepsPerPeriod,
|
|
42
|
+
representMinMax=False,
|
|
43
|
+
)
|
|
44
|
+
elif representationMethod == "distributionAndMinMaxRepresentation":
|
|
45
|
+
clusterCenters = durationRepresentation(
|
|
46
|
+
candidates,
|
|
47
|
+
clusterOrder,
|
|
48
|
+
distributionPeriodWise,
|
|
49
|
+
timeStepsPerPeriod,
|
|
50
|
+
representMinMax=True,
|
|
51
|
+
)
|
|
52
|
+
else:
|
|
53
|
+
raise ValueError("Chosen 'representationMethod' does not exist.")
|
|
54
|
+
|
|
55
|
+
return clusterCenters, clusterCenterIndices
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def maxoidRepresentation(candidates, clusterOrder):
|
|
59
|
+
"""
|
|
60
|
+
Represents the candidates of a given cluster group (clusterOrder)
|
|
61
|
+
by its medoid, measured with the euclidean distance.
|
|
62
|
+
|
|
63
|
+
:param candidates: Dissimilarity matrix where each row represents a candidate. required
|
|
64
|
+
:type candidates: np.ndarray
|
|
65
|
+
|
|
66
|
+
:param clusterOrder: Integer array where the index refers to the candidate and the
|
|
67
|
+
Integer entry to the group. required
|
|
68
|
+
:type clusterOrder: np.array
|
|
69
|
+
"""
|
|
70
|
+
# set cluster member that is farthest away from the points of the other clusters as maxoid
|
|
71
|
+
clusterCenters = []
|
|
72
|
+
clusterCenterIndices = []
|
|
73
|
+
for clusterNum in np.unique(clusterOrder):
|
|
74
|
+
indice = np.where(clusterOrder == clusterNum)
|
|
75
|
+
innerDistMatrix = euclidean_distances(candidates, candidates[indice])
|
|
76
|
+
mindistIdx = np.argmax(innerDistMatrix.sum(axis=0))
|
|
77
|
+
clusterCenters.append(candidates[indice][mindistIdx])
|
|
78
|
+
clusterCenterIndices.append(indice[0][mindistIdx])
|
|
79
|
+
|
|
80
|
+
return clusterCenters, clusterCenterIndices
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def medoidRepresentation(candidates, clusterOrder):
|
|
84
|
+
"""
|
|
85
|
+
Represents the candidates of a given cluster group (clusterOrder)
|
|
86
|
+
by its medoid, measured with the euclidean distance.
|
|
87
|
+
|
|
88
|
+
:param candidates: Dissimilarity matrix where each row represents a candidate. required
|
|
89
|
+
:type candidates: np.ndarray
|
|
90
|
+
|
|
91
|
+
:param clusterOrder: Integer array where the index refers to the candidate and the
|
|
92
|
+
Integer entry to the group. required
|
|
93
|
+
:type clusterOrder: np.array
|
|
94
|
+
"""
|
|
95
|
+
# set cluster center as medoid
|
|
96
|
+
clusterCenters = []
|
|
97
|
+
clusterCenterIndices = []
|
|
98
|
+
for clusterNum in np.unique(clusterOrder):
|
|
99
|
+
indice = np.where(clusterOrder == clusterNum)
|
|
100
|
+
innerDistMatrix = euclidean_distances(candidates[indice])
|
|
101
|
+
mindistIdx = np.argmin(innerDistMatrix.sum(axis=0))
|
|
102
|
+
clusterCenters.append(candidates[indice][mindistIdx])
|
|
103
|
+
clusterCenterIndices.append(indice[0][mindistIdx])
|
|
104
|
+
|
|
105
|
+
return clusterCenters, clusterCenterIndices
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def meanRepresentation(candidates, clusterOrder):
|
|
109
|
+
"""
|
|
110
|
+
Represents the candidates of a given cluster group (clusterOrder)
|
|
111
|
+
by its mean.
|
|
112
|
+
|
|
113
|
+
:param candidates: Dissimilarity matrix where each row represents a candidate. required
|
|
114
|
+
:type candidates: np.ndarray
|
|
115
|
+
|
|
116
|
+
:param clusterOrder: Integer array where the index refers to the candidate and the
|
|
117
|
+
Integer entry to the group. required
|
|
118
|
+
:type clusterOrder: np.array
|
|
119
|
+
"""
|
|
120
|
+
# set cluster centers as means of the group candidates
|
|
121
|
+
clusterCenters = []
|
|
122
|
+
for clusterNum in np.unique(clusterOrder):
|
|
123
|
+
indice = np.where(clusterOrder == clusterNum)
|
|
124
|
+
currentMean = candidates[indice].mean(axis=0)
|
|
125
|
+
clusterCenters.append(currentMean)
|
|
126
|
+
return clusterCenters
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def minmaxmeanRepresentation(
|
|
130
|
+
candidates, clusterOrder, representationDict, timeStepsPerPeriod
|
|
131
|
+
):
|
|
132
|
+
"""
|
|
133
|
+
Represents the candidates of a given cluster group (clusterOrder)
|
|
134
|
+
by either the minimum, the maximum or the mean values of each time step for
|
|
135
|
+
all periods in that cluster depending on the command for each attribute.
|
|
136
|
+
|
|
137
|
+
:param candidates: Dissimilarity matrix where each row represents a candidate. required
|
|
138
|
+
:type candidates: np.ndarray
|
|
139
|
+
|
|
140
|
+
:param clusterOrder: Integer array where the index refers to the candidate and the
|
|
141
|
+
Integer entry to the group. required
|
|
142
|
+
:type clusterOrder: np.array
|
|
143
|
+
|
|
144
|
+
:param representationDict: A dictionary which defines for each attribute whether the typical
|
|
145
|
+
period should be represented by the minimum or maximum values within each cluster.
|
|
146
|
+
optional (default: None)
|
|
147
|
+
:type representationDict: dictionary
|
|
148
|
+
|
|
149
|
+
:param timeStepsPerPeriod: The number of discrete timesteps which describe one period. required
|
|
150
|
+
:type timeStepsPerPeriod: integer
|
|
151
|
+
"""
|
|
152
|
+
# set cluster center depending of the representationDict
|
|
153
|
+
clusterCenters = []
|
|
154
|
+
for clusterNum in np.unique(clusterOrder):
|
|
155
|
+
indice = np.where(clusterOrder == clusterNum)
|
|
156
|
+
currentClusterCenter = np.zeros(len(representationDict) * timeStepsPerPeriod)
|
|
157
|
+
for attributeNum in range(len(representationDict)):
|
|
158
|
+
startIdx = attributeNum * timeStepsPerPeriod
|
|
159
|
+
endIdx = (attributeNum + 1) * timeStepsPerPeriod
|
|
160
|
+
if list(representationDict.values())[attributeNum] == "min":
|
|
161
|
+
currentClusterCenter[startIdx:endIdx] = candidates[
|
|
162
|
+
indice, startIdx:endIdx
|
|
163
|
+
].min(axis=1)
|
|
164
|
+
elif list(representationDict.values())[attributeNum] == "max":
|
|
165
|
+
currentClusterCenter[startIdx:endIdx] = candidates[
|
|
166
|
+
indice, startIdx:endIdx
|
|
167
|
+
].max(axis=1)
|
|
168
|
+
elif list(representationDict.values())[attributeNum] == "mean":
|
|
169
|
+
currentClusterCenter[startIdx:endIdx] = candidates[
|
|
170
|
+
indice, startIdx:endIdx
|
|
171
|
+
].mean(axis=1)
|
|
172
|
+
else:
|
|
173
|
+
raise ValueError(
|
|
174
|
+
'At least one value in the representationDict is neither "min", "max" nor "mean".'
|
|
175
|
+
)
|
|
176
|
+
clusterCenters.append(currentClusterCenter)
|
|
177
|
+
return clusterCenters
|