tsam 2.1.0__py3-none-any.whl → 2.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsam/__init__.py +11 -0
- tsam/hyperparametertuning.py +245 -225
- tsam/periodAggregation.py +141 -141
- tsam/representations.py +167 -167
- tsam/timeseriesaggregation.py +1343 -1309
- tsam/utils/durationRepresentation.py +204 -128
- tsam/utils/k_maxoids.py +145 -148
- tsam/utils/k_medoids_contiguity.py +133 -133
- tsam/utils/k_medoids_exact.py +234 -230
- tsam/utils/segmentation.py +118 -119
- {tsam-2.1.0.dist-info → tsam-2.3.2.dist-info}/LICENSE.txt +20 -20
- {tsam-2.1.0.dist-info → tsam-2.3.2.dist-info}/METADATA +168 -165
- tsam-2.3.2.dist-info/RECORD +16 -0
- {tsam-2.1.0.dist-info → tsam-2.3.2.dist-info}/WHEEL +1 -1
- tsam-2.1.0.dist-info/RECORD +0 -16
- {tsam-2.1.0.dist-info → tsam-2.3.2.dist-info}/top_level.txt +0 -0
tsam/utils/k_medoids_exact.py
CHANGED
|
@@ -1,230 +1,234 @@
|
|
|
1
|
-
# -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
|
|
6
|
-
from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
|
|
7
|
-
from sklearn.utils import check_array
|
|
8
|
-
import pyomo.environ as pyomo
|
|
9
|
-
import pyomo.opt as opt
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
:
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
:
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
:
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
:
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
or
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
+ "
|
|
71
|
-
+ "
|
|
72
|
-
+ "
|
|
73
|
-
+ "
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
#
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
self.
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
# the number of
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
+ "
|
|
127
|
-
+ "
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
M.
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
#
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
#
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
#
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
|
|
6
|
+
from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
|
|
7
|
+
from sklearn.utils import check_array
|
|
8
|
+
import pyomo.environ as pyomo
|
|
9
|
+
import pyomo.opt as opt
|
|
10
|
+
from pyomo.contrib import appsi
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class KMedoids(BaseEstimator, ClusterMixin, TransformerMixin):
|
|
14
|
+
"""
|
|
15
|
+
k-medoids class.
|
|
16
|
+
|
|
17
|
+
:param n_clusters: How many medoids. Must be positive. optional, default: 8
|
|
18
|
+
:type n_clusters: integer
|
|
19
|
+
|
|
20
|
+
:param distance_metric: What distance metric to use. optional, default: 'euclidean'
|
|
21
|
+
:type distance_metric: string
|
|
22
|
+
|
|
23
|
+
:param timelimit: Specify the time limit of the solver. optional, default: 100
|
|
24
|
+
:type timelimit: integer
|
|
25
|
+
|
|
26
|
+
:param threads: Threads to use by the optimization solver. optional, default: 7
|
|
27
|
+
:type threads: integer
|
|
28
|
+
|
|
29
|
+
:param solver: Specifies the solver. optional, default: 'highs'
|
|
30
|
+
:type solver: string
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
def __init__(
|
|
34
|
+
self,
|
|
35
|
+
n_clusters=8,
|
|
36
|
+
distance_metric="euclidean",
|
|
37
|
+
timelimit=100,
|
|
38
|
+
threads=7,
|
|
39
|
+
solver="highs",
|
|
40
|
+
):
|
|
41
|
+
|
|
42
|
+
self.n_clusters = n_clusters
|
|
43
|
+
|
|
44
|
+
self.distance_metric = distance_metric
|
|
45
|
+
|
|
46
|
+
self.solver = solver
|
|
47
|
+
|
|
48
|
+
self.timelimit = timelimit
|
|
49
|
+
|
|
50
|
+
self.threads = threads
|
|
51
|
+
|
|
52
|
+
def _check_init_args(self):
|
|
53
|
+
|
|
54
|
+
# Check n_clusters
|
|
55
|
+
if (
|
|
56
|
+
self.n_clusters is None
|
|
57
|
+
or self.n_clusters <= 0
|
|
58
|
+
or not isinstance(self.n_clusters, int)
|
|
59
|
+
):
|
|
60
|
+
raise ValueError("n_clusters has to be nonnegative integer")
|
|
61
|
+
|
|
62
|
+
# Check distance_metric
|
|
63
|
+
if callable(self.distance_metric):
|
|
64
|
+
self.distance_func = self.distance_metric
|
|
65
|
+
elif self.distance_metric in PAIRWISE_DISTANCE_FUNCTIONS:
|
|
66
|
+
self.distance_func = PAIRWISE_DISTANCE_FUNCTIONS[self.distance_metric]
|
|
67
|
+
else:
|
|
68
|
+
raise ValueError(
|
|
69
|
+
"distance_metric needs to be "
|
|
70
|
+
+ "callable or one of the "
|
|
71
|
+
+ "following strings: "
|
|
72
|
+
+ "{}".format(PAIRWISE_DISTANCE_FUNCTIONS.keys())
|
|
73
|
+
+ ". Instead, '{}' ".format(self.distance_metric)
|
|
74
|
+
+ "was given."
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
def fit(self, X, y=None):
|
|
78
|
+
"""Fit K-Medoids to the provided data.
|
|
79
|
+
|
|
80
|
+
:param X: shape=(n_samples, n_features)
|
|
81
|
+
:type X: array-like or sparse matrix
|
|
82
|
+
|
|
83
|
+
:returns: self
|
|
84
|
+
"""
|
|
85
|
+
|
|
86
|
+
self._check_init_args()
|
|
87
|
+
|
|
88
|
+
# check that the array is good and attempt to convert it to
|
|
89
|
+
# Numpy array if possible
|
|
90
|
+
X = self._check_array(X)
|
|
91
|
+
|
|
92
|
+
# apply distance metric to get the distance matrix
|
|
93
|
+
D = self.distance_func(X)
|
|
94
|
+
|
|
95
|
+
# run exact optimization
|
|
96
|
+
r_y, r_x, best_inertia = self._k_medoids_exact(D, self.n_clusters)
|
|
97
|
+
|
|
98
|
+
labels_raw = r_x.argmax(axis=0)
|
|
99
|
+
|
|
100
|
+
count = 0
|
|
101
|
+
translator = {}
|
|
102
|
+
cluster_centers_ = []
|
|
103
|
+
for ix, val in enumerate(r_y):
|
|
104
|
+
if val > 0:
|
|
105
|
+
translator[ix] = count
|
|
106
|
+
cluster_centers_.append(X[ix])
|
|
107
|
+
count += 1
|
|
108
|
+
labels_ = []
|
|
109
|
+
for label in labels_raw:
|
|
110
|
+
labels_.append(translator[label])
|
|
111
|
+
|
|
112
|
+
self.labels_ = labels_
|
|
113
|
+
self.cluster_centers_ = cluster_centers_
|
|
114
|
+
|
|
115
|
+
return self
|
|
116
|
+
|
|
117
|
+
def _check_array(self, X):
|
|
118
|
+
|
|
119
|
+
X = check_array(X)
|
|
120
|
+
|
|
121
|
+
# Check that the number of clusters is less than or equal to
|
|
122
|
+
# the number of samples
|
|
123
|
+
if self.n_clusters > X.shape[0]:
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"The number of medoids "
|
|
126
|
+
+ "({}) ".format(self.n_clusters)
|
|
127
|
+
+ "must be larger than the number "
|
|
128
|
+
+ "of samples ({})".format(X.shape[0])
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
return X
|
|
132
|
+
|
|
133
|
+
def _k_medoids_exact(self, distances, n_clusters):
|
|
134
|
+
"""
|
|
135
|
+
Parameters
|
|
136
|
+
----------
|
|
137
|
+
distances : int, required
|
|
138
|
+
Pairwise distances between each row.
|
|
139
|
+
n_clusters : int, required
|
|
140
|
+
Number of clusters.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
# Create pyomo model
|
|
144
|
+
M = _setup_k_medoids(distances, n_clusters)
|
|
145
|
+
|
|
146
|
+
# And solve
|
|
147
|
+
r_x, r_y, r_obj = _solve_given_pyomo_model(M, solver=self.solver)
|
|
148
|
+
|
|
149
|
+
return (r_y, r_x.T, r_obj)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def _setup_k_medoids(distances, n_clusters):
|
|
153
|
+
"""Define the k-medoids model with pyomo.
|
|
154
|
+
In the spatial aggregation community, it is referred to as Hess Model for political districting
|
|
155
|
+
with an additional constraint of cluster-sizes/populations.
|
|
156
|
+
(W Hess, JB Weaver, HJ Siegfeldt, JN Whelan, and PA Zitlau. Nonpartisan political redistricting by computer. Operations Research, 13(6):998–1006, 1965.)
|
|
157
|
+
"""
|
|
158
|
+
# Create model
|
|
159
|
+
M = pyomo.ConcreteModel()
|
|
160
|
+
|
|
161
|
+
# get distance matrix
|
|
162
|
+
M.d = distances
|
|
163
|
+
|
|
164
|
+
# set number of clusters
|
|
165
|
+
M.no_k = n_clusters
|
|
166
|
+
|
|
167
|
+
# Distances is a symmetrical matrix, extract its length
|
|
168
|
+
length = distances.shape[0]
|
|
169
|
+
|
|
170
|
+
# get indices
|
|
171
|
+
M.i = [j for j in range(length)]
|
|
172
|
+
M.j = [j for j in range(length)]
|
|
173
|
+
|
|
174
|
+
# initialize vars
|
|
175
|
+
# Decision every candidate to every possible other candidate as cluster center
|
|
176
|
+
M.z = pyomo.Var(M.i, M.j, within=pyomo.Binary)
|
|
177
|
+
|
|
178
|
+
# get objective
|
|
179
|
+
# Minimize the distance of every candidate to the cluster center
|
|
180
|
+
def objRule(M):
|
|
181
|
+
return sum(sum(M.d[i, j] * M.z[i, j] for j in M.j) for i in M.i)
|
|
182
|
+
|
|
183
|
+
M.obj = pyomo.Objective(rule=objRule)
|
|
184
|
+
|
|
185
|
+
# s.t.
|
|
186
|
+
# Assign all candidates to one clusters
|
|
187
|
+
def candToClusterRule(M, j):
|
|
188
|
+
return sum(M.z[i, j] for i in M.i) == 1
|
|
189
|
+
|
|
190
|
+
M.candToClusterCon = pyomo.Constraint(M.j, rule=candToClusterRule)
|
|
191
|
+
|
|
192
|
+
# Predefine the number of clusters
|
|
193
|
+
def noClustersRule(M):
|
|
194
|
+
return sum(M.z[i, i] for i in M.i) == M.no_k
|
|
195
|
+
|
|
196
|
+
M.noClustersCon = pyomo.Constraint(rule=noClustersRule)
|
|
197
|
+
|
|
198
|
+
# Describe the choice of a candidate to a cluster
|
|
199
|
+
def clusterRelationRule(M, i, j):
|
|
200
|
+
return M.z[i, j] <= M.z[i, i]
|
|
201
|
+
|
|
202
|
+
M.clusterRelationCon = pyomo.Constraint(M.i, M.j, rule=clusterRelationRule)
|
|
203
|
+
return M
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def _solve_given_pyomo_model(M, solver="highs"):
|
|
207
|
+
"""Solves a given pyomo model clustering model an returns the clusters
|
|
208
|
+
|
|
209
|
+
Args:
|
|
210
|
+
M (pyomo.ConcreteModel): Concrete model instance that gets solved.
|
|
211
|
+
solver (str, optional): solver, defines the solver for the pyomo model. Defaults to "highs".
|
|
212
|
+
|
|
213
|
+
Raises:
|
|
214
|
+
ValueError: [description]
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
[type]: [description]
|
|
218
|
+
"""
|
|
219
|
+
# create optimization problem
|
|
220
|
+
if solver == "highs":
|
|
221
|
+
solver_instance = appsi.solvers.Highs()
|
|
222
|
+
else:
|
|
223
|
+
solver_instance = opt.SolverFactory(solver)
|
|
224
|
+
results = solver_instance.solve(M)
|
|
225
|
+
# check that it does not fail
|
|
226
|
+
|
|
227
|
+
# Get results
|
|
228
|
+
r_x = np.array([[round(M.z[i, j].value) for i in M.i] for j in M.j])
|
|
229
|
+
|
|
230
|
+
r_y = np.array([round(M.z[j, j].value) for j in M.j])
|
|
231
|
+
|
|
232
|
+
r_obj = pyomo.value(M.obj)
|
|
233
|
+
|
|
234
|
+
return (r_x, r_y, r_obj)
|