tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tsam/__init__.py +79 -0
- tsam/api.py +602 -0
- tsam/config.py +852 -0
- tsam/exceptions.py +17 -0
- tsam/hyperparametertuning.py +289 -245
- tsam/periodAggregation.py +140 -141
- tsam/plot.py +513 -0
- tsam/py.typed +0 -0
- tsam/representations.py +177 -167
- tsam/result.py +397 -0
- tsam/timeseriesaggregation.py +1446 -1361
- tsam/tuning.py +1038 -0
- tsam/utils/durationRepresentation.py +229 -231
- tsam/utils/k_maxoids.py +138 -145
- tsam/utils/k_medoids_contiguity.py +139 -140
- tsam/utils/k_medoids_exact.py +232 -239
- tsam/utils/segmentation.py +232 -118
- {tsam-2.3.8.dist-info → tsam-3.0.0.dist-info}/METADATA +124 -81
- tsam-3.0.0.dist-info/RECORD +23 -0
- {tsam-2.3.8.dist-info → tsam-3.0.0.dist-info}/WHEEL +1 -1
- {tsam-2.3.8.dist-info → tsam-3.0.0.dist-info}/licenses/LICENSE.txt +21 -21
- tsam-2.3.8.dist-info/RECORD +0 -16
- {tsam-2.3.8.dist-info → tsam-3.0.0.dist-info}/top_level.txt +0 -0
tsam/utils/k_maxoids.py
CHANGED
|
@@ -1,145 +1,138 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
import
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
:param
|
|
18
|
-
:type
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
n_clusters=
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
inertiaTempPrime = inertiaTemp
|
|
140
|
-
|
|
141
|
-
D = self.distance_func(X_old, Y=list(mFinal))
|
|
142
|
-
|
|
143
|
-
I = np.argmin(D, axis=1)
|
|
144
|
-
|
|
145
|
-
return list(mFinal), I
|
|
1
|
+
"""Exact K-maxoids clustering"""
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import numpy.random as rnd
|
|
5
|
+
from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
|
|
6
|
+
from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
|
|
7
|
+
from sklearn.utils import check_array
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class KMaxoids(BaseEstimator, ClusterMixin, TransformerMixin):
|
|
11
|
+
"""
|
|
12
|
+
k-maxoids class.
|
|
13
|
+
|
|
14
|
+
:param n_clusters: How many maxoids. Must be positive. optional, default: 8
|
|
15
|
+
:type n_clusters: integer
|
|
16
|
+
|
|
17
|
+
:param distance_metric: What distance metric to use. optional, default: 'euclidean'
|
|
18
|
+
:type distance_metric: string
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
n_clusters=8,
|
|
24
|
+
distance_metric="euclidean",
|
|
25
|
+
):
|
|
26
|
+
self.n_clusters = n_clusters
|
|
27
|
+
|
|
28
|
+
self.distance_metric = distance_metric
|
|
29
|
+
|
|
30
|
+
def _check_init_args(self):
|
|
31
|
+
# Check n_clusters
|
|
32
|
+
if (
|
|
33
|
+
self.n_clusters is None
|
|
34
|
+
or self.n_clusters <= 0
|
|
35
|
+
or not isinstance(self.n_clusters, int)
|
|
36
|
+
):
|
|
37
|
+
raise ValueError("n_clusters has to be nonnegative integer")
|
|
38
|
+
|
|
39
|
+
# Check distance_metric
|
|
40
|
+
if callable(self.distance_metric):
|
|
41
|
+
self.distance_func = self.distance_metric
|
|
42
|
+
elif self.distance_metric in PAIRWISE_DISTANCE_FUNCTIONS:
|
|
43
|
+
self.distance_func = PAIRWISE_DISTANCE_FUNCTIONS[self.distance_metric]
|
|
44
|
+
else:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
"distance_metric needs to be "
|
|
47
|
+
+ "callable or one of the "
|
|
48
|
+
+ "following strings: "
|
|
49
|
+
+ f"{PAIRWISE_DISTANCE_FUNCTIONS.keys()}"
|
|
50
|
+
+ f". Instead, '{self.distance_metric}' "
|
|
51
|
+
+ "was given."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
def fit(self, X, y=None):
|
|
55
|
+
"""Fit K-Maxoids to the provided data.
|
|
56
|
+
|
|
57
|
+
:param X: shape=(n_samples, n_features)
|
|
58
|
+
:type X: array-like or sparse matrix
|
|
59
|
+
|
|
60
|
+
:returns: self
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
self._check_init_args()
|
|
64
|
+
|
|
65
|
+
# check that the array is good and attempt to convert it to
|
|
66
|
+
# Numpy array if possible
|
|
67
|
+
X = self._check_array(X)
|
|
68
|
+
|
|
69
|
+
# apply distance metric to get the distance matrix (kept for potential debugging)
|
|
70
|
+
_D = self.distance_func(X)
|
|
71
|
+
|
|
72
|
+
# run mk-maxoids clustering
|
|
73
|
+
self.cluster_centers_, self.labels_ = self.k_maxoids(X, self.n_clusters)
|
|
74
|
+
|
|
75
|
+
return self
|
|
76
|
+
|
|
77
|
+
def _check_array(self, X):
|
|
78
|
+
X = check_array(X)
|
|
79
|
+
|
|
80
|
+
# Check that the number of clusters is less than or equal to
|
|
81
|
+
# the number of samples
|
|
82
|
+
if self.n_clusters > X.shape[0]:
|
|
83
|
+
raise ValueError(
|
|
84
|
+
"The number of medoids "
|
|
85
|
+
+ f"({self.n_clusters}) "
|
|
86
|
+
+ "must be larger than the number "
|
|
87
|
+
+ f"of samples ({X.shape[0]})"
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
return X
|
|
91
|
+
|
|
92
|
+
def k_maxoids(self, X, k, numpasses=5, doLogarithmic=False, n_init=100):
|
|
93
|
+
X_old = X
|
|
94
|
+
n, _m = X.shape
|
|
95
|
+
inertiaTempPrime = None
|
|
96
|
+
|
|
97
|
+
for i in range(n_init):
|
|
98
|
+
inds = rnd.permutation(np.arange(n))
|
|
99
|
+
|
|
100
|
+
X = X[inds]
|
|
101
|
+
M = np.copy(X[:k])
|
|
102
|
+
for t in range(numpasses):
|
|
103
|
+
for j in range(n):
|
|
104
|
+
x = X[j]
|
|
105
|
+
D = np.sum((M - x) ** 2, axis=1)
|
|
106
|
+
i = np.argmin(D)
|
|
107
|
+
d = np.sum((M - M[i]) ** 2, axis=1)
|
|
108
|
+
|
|
109
|
+
if doLogarithmic:
|
|
110
|
+
D[i] = 1.0
|
|
111
|
+
d[i] = 1.0
|
|
112
|
+
valx = np.prod(D)
|
|
113
|
+
valm = np.prod(d)
|
|
114
|
+
else:
|
|
115
|
+
D[i] = 0.0
|
|
116
|
+
d[i] = 0.0
|
|
117
|
+
valx = np.sum(D)
|
|
118
|
+
valm = np.sum(d)
|
|
119
|
+
|
|
120
|
+
if valx > valm:
|
|
121
|
+
M[i] = x
|
|
122
|
+
|
|
123
|
+
dTemp = self.distance_func(X_old, Y=list(M))
|
|
124
|
+
inertiaTemp = np.sum(np.min(dTemp, axis=1))
|
|
125
|
+
|
|
126
|
+
if inertiaTempPrime is None:
|
|
127
|
+
mFinal = M
|
|
128
|
+
inertiaTempPrime = inertiaTemp
|
|
129
|
+
else:
|
|
130
|
+
if inertiaTemp < inertiaTempPrime:
|
|
131
|
+
mFinal = M
|
|
132
|
+
inertiaTempPrime = inertiaTemp
|
|
133
|
+
|
|
134
|
+
D = self.distance_func(X_old, Y=list(mFinal))
|
|
135
|
+
|
|
136
|
+
I = np.argmin(D, axis=1)
|
|
137
|
+
|
|
138
|
+
return list(mFinal), I
|
|
@@ -1,140 +1,139 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
import numpy as np
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
#
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
+
|
|
61
|
-
+
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
cluster
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
#
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
+
|
|
106
|
-
+
|
|
107
|
-
+
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
weights
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
return G
|
|
1
|
+
import time
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
|
|
5
|
+
# switch to numpy 2.0 (restore deprecated aliases for backward compatibility)
|
|
6
|
+
np.float_ = np.float64 # type: ignore[attr-defined]
|
|
7
|
+
np.complex_ = np.complex128 # type: ignore[attr-defined]
|
|
8
|
+
|
|
9
|
+
import networkx as nx
|
|
10
|
+
import pyomo.environ as pyomo
|
|
11
|
+
|
|
12
|
+
from tsam.utils.k_medoids_exact import (
|
|
13
|
+
_setup_k_medoids,
|
|
14
|
+
_solve_given_pyomo_model,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
# class KMedoids_contiguity(KMedoids):
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def k_medoids_contiguity(
|
|
21
|
+
distances, n_clusters, adjacency, max_iter=500, solver="highs"
|
|
22
|
+
):
|
|
23
|
+
"""Declares a k-medoids model and iteratively adds cutting planes to hold on adjacency/contiguity
|
|
24
|
+
|
|
25
|
+
The algorithm is based on: Oehrlein and Hauner (2017): A cutting-plane method for adjacency-constrained spatial aggregation
|
|
26
|
+
"""
|
|
27
|
+
# First transform the network to a networkx instance which is required for cut generation
|
|
28
|
+
G = _contiguity_to_graph(adjacency, distances=distances)
|
|
29
|
+
|
|
30
|
+
# check if inputs are correct
|
|
31
|
+
assert np.size(distances) == np.size(adjacency), (
|
|
32
|
+
"distances and adjacency must have the same size"
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
# and test for connectivity
|
|
36
|
+
if not nx.is_connected(G):
|
|
37
|
+
raise ValueError("The give adjacency matrix is not connected.")
|
|
38
|
+
|
|
39
|
+
# Initial setup of k medoids
|
|
40
|
+
M = _setup_k_medoids(distances, n_clusters)
|
|
41
|
+
|
|
42
|
+
M.adjacency = adjacency
|
|
43
|
+
|
|
44
|
+
# Add constraintlist for the cuts later added
|
|
45
|
+
M.cuts = pyomo.ConstraintList()
|
|
46
|
+
|
|
47
|
+
# Loop over the relaxed k-medoids problem and add cuts until the problem fits
|
|
48
|
+
_all_cluster_connected = False
|
|
49
|
+
_iter = 0
|
|
50
|
+
_cuts_added = []
|
|
51
|
+
while not _all_cluster_connected and _iter < max_iter:
|
|
52
|
+
# first solve instance
|
|
53
|
+
t_presolve = time.time()
|
|
54
|
+
print(str(_iter) + " iteration: Solving instance")
|
|
55
|
+
r_x, r_y, obj = _solve_given_pyomo_model(M, solver=solver)
|
|
56
|
+
t_aftersolve = time.time()
|
|
57
|
+
print(
|
|
58
|
+
"Total distance: "
|
|
59
|
+
+ str(obj)
|
|
60
|
+
+ " with solving time: "
|
|
61
|
+
+ str(t_aftersolve - t_presolve)
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
_candidates, labels = np.where(r_x == 1)
|
|
65
|
+
# claim that the resulting clusters are connected
|
|
66
|
+
_all_cluster_connected = True
|
|
67
|
+
_new_cuts_added = []
|
|
68
|
+
for label in np.unique(labels):
|
|
69
|
+
# extract the cluster
|
|
70
|
+
cluster = G.subgraph(np.where(labels == label)[0])
|
|
71
|
+
# Identify if the cluster is contineous, instead of validating the constraints such as Validi and Oehrlein.
|
|
72
|
+
if not nx.is_connected(cluster):
|
|
73
|
+
_all_cluster_connected = False
|
|
74
|
+
# if not add contiguity constraints based on c-v (Oehrlein) o a-b (Validi) separators
|
|
75
|
+
for candidate in cluster.nodes:
|
|
76
|
+
# It is not clear in Validi and Oehrlein, if cuts between all cluster candidates or just the center and the candidates shall be made. The latter one does not converge for the test system wherefore the first one is chosen.
|
|
77
|
+
for node in cluster.nodes:
|
|
78
|
+
# different to Validi et al. (2021) and Oehrlein and Haunert (2017), check first and just add continuity constraints for the not connected candidates to increase performance
|
|
79
|
+
if nx.node_connectivity(cluster, node, candidate) == 0:
|
|
80
|
+
# check that the cut was not added so far for the cluster
|
|
81
|
+
if (label, candidate, node) not in _cuts_added:
|
|
82
|
+
# include the cut in the cut list
|
|
83
|
+
_new_cuts_added.append((label, candidate, node))
|
|
84
|
+
# Cuts to Separators - Appendix A Minimum-weight vertex separators (Oehrlein and Haunert, 2017)
|
|
85
|
+
# Validi uses an own cut generator and Oehrlein falls back to a Java library, here we use simple max flow cutting
|
|
86
|
+
# TODO: Check performance for large networks
|
|
87
|
+
cut_set = nx.minimum_node_cut(G, node, candidate)
|
|
88
|
+
# (Eq. 13 - Oehrlein and Haunert, 2017)
|
|
89
|
+
M.cuts.add(
|
|
90
|
+
sum(M.z[u, node] for u in cut_set)
|
|
91
|
+
>= M.z[candidate, node]
|
|
92
|
+
)
|
|
93
|
+
else:
|
|
94
|
+
raise ValueError(
|
|
95
|
+
"Minimal cluster,candidate separation/minimum cut does not seem sufficient. Adding additional separators is could help."
|
|
96
|
+
)
|
|
97
|
+
# Total cuts
|
|
98
|
+
_cuts_added.extend(_new_cuts_added)
|
|
99
|
+
_iter += 1
|
|
100
|
+
t_afteradding = time.time()
|
|
101
|
+
|
|
102
|
+
print(
|
|
103
|
+
str(len(_new_cuts_added))
|
|
104
|
+
+ " contiguity constraints/cuts added, adding to a total number of "
|
|
105
|
+
+ str(len(_cuts_added))
|
|
106
|
+
+ " cuts within time: "
|
|
107
|
+
+ str(t_afteradding - t_aftersolve)
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
labels = np.where(r_x == 1)
|
|
111
|
+
|
|
112
|
+
return (r_y, r_x.T, obj)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _contiguity_to_graph(adjacency, distances=None):
|
|
116
|
+
"""Transforms a adjacency matrix to a networkx.Graph
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
adjacency (np.ndarray): 2-diimensional adjacency matrix
|
|
120
|
+
distances (np.ndarray, optional): If provided, delivers the distances between the nodes. Defaults to None.
|
|
121
|
+
|
|
122
|
+
Returns:
|
|
123
|
+
nx.Graph: Graph with every index as node name.
|
|
124
|
+
"""
|
|
125
|
+
rows, cols = np.where(adjacency == 1)
|
|
126
|
+
G = nx.Graph()
|
|
127
|
+
if distances is None:
|
|
128
|
+
edges = zip(rows.tolist(), cols.tolist())
|
|
129
|
+
G.add_edges_from(edges)
|
|
130
|
+
else:
|
|
131
|
+
normed_distances = distances / np.max(distances)
|
|
132
|
+
weights = 1 - normed_distances
|
|
133
|
+
if np.any(weights < 0) or np.any(weights > 1):
|
|
134
|
+
raise ValueError("Weight calculation went wrong.")
|
|
135
|
+
|
|
136
|
+
edge_weights = weights[rows, cols]
|
|
137
|
+
edges = zip(rows.tolist(), cols.tolist(), edge_weights.tolist())
|
|
138
|
+
G.add_weighted_edges_from(edges)
|
|
139
|
+
return G
|