tsam 2.3.8__py3-none-any.whl → 3.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tsam/utils/k_maxoids.py CHANGED
@@ -1,145 +1,138 @@
1
- # -*- coding: utf-8 -*-
2
- """Exact K-maxoids clustering"""
3
-
4
-
5
- import numpy as np
6
- import numpy.random as rnd
7
-
8
- from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
9
- from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
10
- from sklearn.utils import check_array
11
-
12
-
13
- class KMaxoids(BaseEstimator, ClusterMixin, TransformerMixin):
14
- """
15
- k-maxoids class.
16
-
17
- :param n_clusters: How many maxoids. Must be positive. optional, default: 8
18
- :type n_clusters: integer
19
-
20
- :param distance_metric: What distance metric to use. optional, default: 'euclidean'
21
- :type distance_metric: string
22
- """
23
-
24
- def __init__(
25
- self,
26
- n_clusters=8,
27
- distance_metric="euclidean",
28
- ):
29
-
30
- self.n_clusters = n_clusters
31
-
32
- self.distance_metric = distance_metric
33
-
34
- def _check_init_args(self):
35
-
36
- # Check n_clusters
37
- if (
38
- self.n_clusters is None
39
- or self.n_clusters <= 0
40
- or not isinstance(self.n_clusters, int)
41
- ):
42
- raise ValueError("n_clusters has to be nonnegative integer")
43
-
44
- # Check distance_metric
45
- if callable(self.distance_metric):
46
- self.distance_func = self.distance_metric
47
- elif self.distance_metric in PAIRWISE_DISTANCE_FUNCTIONS:
48
- self.distance_func = PAIRWISE_DISTANCE_FUNCTIONS[self.distance_metric]
49
- else:
50
- raise ValueError(
51
- "distance_metric needs to be "
52
- + "callable or one of the "
53
- + "following strings: "
54
- + "{}".format(PAIRWISE_DISTANCE_FUNCTIONS.keys())
55
- + ". Instead, '{}' ".format(self.distance_metric)
56
- + "was given."
57
- )
58
-
59
- def fit(self, X, y=None):
60
- """Fit K-Maxoids to the provided data.
61
-
62
- :param X: shape=(n_samples, n_features)
63
- :type X: array-like or sparse matrix
64
-
65
- :returns: self
66
- """
67
-
68
- self._check_init_args()
69
-
70
- # check that the array is good and attempt to convert it to
71
- # Numpy array if possible
72
- X = self._check_array(X)
73
-
74
- # apply distance metric to get the distance matrix
75
- D = self.distance_func(X)
76
-
77
- # run mk-maxoids clustering
78
- self.cluster_centers_, self.labels_ = self.k_maxoids(X, self.n_clusters)
79
-
80
- return self
81
-
82
- def _check_array(self, X):
83
-
84
- X = check_array(X)
85
-
86
- # Check that the number of clusters is less than or equal to
87
- # the number of samples
88
- if self.n_clusters > X.shape[0]:
89
- raise ValueError(
90
- "The number of medoids "
91
- + "({}) ".format(self.n_clusters)
92
- + "must be larger than the number "
93
- + "of samples ({})".format(X.shape[0])
94
- )
95
-
96
- return X
97
-
98
- def k_maxoids(self, X, k, numpasses=5, doLogarithmic=False, n_init=100):
99
-
100
- X_old = X
101
- n, m = X.shape
102
- inertiaTempPrime = None
103
-
104
- for i in range(n_init):
105
- inds = rnd.permutation(np.arange(n))
106
-
107
- X = X[inds]
108
- M = np.copy(X[:k])
109
- for t in range(numpasses):
110
- for j in range(n):
111
- x = X[j]
112
- D = np.sum((M - x) ** 2, axis=1)
113
- i = np.argmin(D)
114
- d = np.sum((M - M[i]) ** 2, axis=1)
115
-
116
- if doLogarithmic:
117
- D[i] = 1.0
118
- d[i] = 1.0
119
- valx = np.prod(D)
120
- valm = np.prod(d)
121
- else:
122
- D[i] = 0.0
123
- d[i] = 0.0
124
- valx = np.sum(D)
125
- valm = np.sum(d)
126
-
127
- if valx > valm:
128
- M[i] = x
129
-
130
- dTemp = self.distance_func(X_old, Y=list(M))
131
- inertiaTemp = np.sum(np.min(dTemp, axis=1))
132
-
133
- if inertiaTempPrime is None:
134
- mFinal = M
135
- inertiaTempPrime = inertiaTemp
136
- else:
137
- if inertiaTemp < inertiaTempPrime:
138
- mFinal = M
139
- inertiaTempPrime = inertiaTemp
140
-
141
- D = self.distance_func(X_old, Y=list(mFinal))
142
-
143
- I = np.argmin(D, axis=1)
144
-
145
- return list(mFinal), I
1
+ """Exact K-maxoids clustering"""
2
+
3
+ import numpy as np
4
+ import numpy.random as rnd
5
+ from sklearn.base import BaseEstimator, ClusterMixin, TransformerMixin
6
+ from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS
7
+ from sklearn.utils import check_array
8
+
9
+
10
+ class KMaxoids(BaseEstimator, ClusterMixin, TransformerMixin):
11
+ """
12
+ k-maxoids class.
13
+
14
+ :param n_clusters: How many maxoids. Must be positive. optional, default: 8
15
+ :type n_clusters: integer
16
+
17
+ :param distance_metric: What distance metric to use. optional, default: 'euclidean'
18
+ :type distance_metric: string
19
+ """
20
+
21
+ def __init__(
22
+ self,
23
+ n_clusters=8,
24
+ distance_metric="euclidean",
25
+ ):
26
+ self.n_clusters = n_clusters
27
+
28
+ self.distance_metric = distance_metric
29
+
30
+ def _check_init_args(self):
31
+ # Check n_clusters
32
+ if (
33
+ self.n_clusters is None
34
+ or self.n_clusters <= 0
35
+ or not isinstance(self.n_clusters, int)
36
+ ):
37
+ raise ValueError("n_clusters has to be nonnegative integer")
38
+
39
+ # Check distance_metric
40
+ if callable(self.distance_metric):
41
+ self.distance_func = self.distance_metric
42
+ elif self.distance_metric in PAIRWISE_DISTANCE_FUNCTIONS:
43
+ self.distance_func = PAIRWISE_DISTANCE_FUNCTIONS[self.distance_metric]
44
+ else:
45
+ raise ValueError(
46
+ "distance_metric needs to be "
47
+ + "callable or one of the "
48
+ + "following strings: "
49
+ + f"{PAIRWISE_DISTANCE_FUNCTIONS.keys()}"
50
+ + f". Instead, '{self.distance_metric}' "
51
+ + "was given."
52
+ )
53
+
54
+ def fit(self, X, y=None):
55
+ """Fit K-Maxoids to the provided data.
56
+
57
+ :param X: shape=(n_samples, n_features)
58
+ :type X: array-like or sparse matrix
59
+
60
+ :returns: self
61
+ """
62
+
63
+ self._check_init_args()
64
+
65
+ # check that the array is good and attempt to convert it to
66
+ # Numpy array if possible
67
+ X = self._check_array(X)
68
+
69
+ # apply distance metric to get the distance matrix (kept for potential debugging)
70
+ _D = self.distance_func(X)
71
+
72
+ # run mk-maxoids clustering
73
+ self.cluster_centers_, self.labels_ = self.k_maxoids(X, self.n_clusters)
74
+
75
+ return self
76
+
77
+ def _check_array(self, X):
78
+ X = check_array(X)
79
+
80
+ # Check that the number of clusters is less than or equal to
81
+ # the number of samples
82
+ if self.n_clusters > X.shape[0]:
83
+ raise ValueError(
84
+ "The number of medoids "
85
+ + f"({self.n_clusters}) "
86
+ + "must be larger than the number "
87
+ + f"of samples ({X.shape[0]})"
88
+ )
89
+
90
+ return X
91
+
92
+ def k_maxoids(self, X, k, numpasses=5, doLogarithmic=False, n_init=100):
93
+ X_old = X
94
+ n, _m = X.shape
95
+ inertiaTempPrime = None
96
+
97
+ for i in range(n_init):
98
+ inds = rnd.permutation(np.arange(n))
99
+
100
+ X = X[inds]
101
+ M = np.copy(X[:k])
102
+ for t in range(numpasses):
103
+ for j in range(n):
104
+ x = X[j]
105
+ D = np.sum((M - x) ** 2, axis=1)
106
+ i = np.argmin(D)
107
+ d = np.sum((M - M[i]) ** 2, axis=1)
108
+
109
+ if doLogarithmic:
110
+ D[i] = 1.0
111
+ d[i] = 1.0
112
+ valx = np.prod(D)
113
+ valm = np.prod(d)
114
+ else:
115
+ D[i] = 0.0
116
+ d[i] = 0.0
117
+ valx = np.sum(D)
118
+ valm = np.sum(d)
119
+
120
+ if valx > valm:
121
+ M[i] = x
122
+
123
+ dTemp = self.distance_func(X_old, Y=list(M))
124
+ inertiaTemp = np.sum(np.min(dTemp, axis=1))
125
+
126
+ if inertiaTempPrime is None:
127
+ mFinal = M
128
+ inertiaTempPrime = inertiaTemp
129
+ else:
130
+ if inertiaTemp < inertiaTempPrime:
131
+ mFinal = M
132
+ inertiaTempPrime = inertiaTemp
133
+
134
+ D = self.distance_func(X_old, Y=list(mFinal))
135
+
136
+ I = np.argmin(D, axis=1)
137
+
138
+ return list(mFinal), I
@@ -1,140 +1,139 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- import numpy as np
4
-
5
-
6
- import time
7
-
8
- # switch to numpy 2.0
9
- np.float_ = np.float64
10
- np.complex_=np.complex128
11
-
12
- import pyomo.environ as pyomo
13
- import pyomo.opt as opt
14
- import networkx as nx
15
- from tsam.utils.k_medoids_exact import (
16
- _setup_k_medoids,
17
- KMedoids,
18
- _solve_given_pyomo_model,
19
- )
20
-
21
-
22
- # class KMedoids_contiguity(KMedoids):
23
-
24
-
25
- def k_medoids_contiguity(distances, n_clusters, adjacency, max_iter=500, solver="highs"):
26
- """Declares a k-medoids model and iteratively adds cutting planes to hold on adjacency/contiguity
27
-
28
- The algorithm is based on: Oehrlein and Hauner (2017): A cutting-plane method for adjacency-constrained spatial aggregation
29
- """
30
- # First transform the network to a networkx instance which is required for cut generation
31
- G = _contiguity_to_graph(adjacency, distances=distances)
32
-
33
- # check if inputs are correct
34
- np.size(distances) == np.size(adjacency)
35
-
36
- # and test for connectivity
37
- if not nx.is_connected(G):
38
- raise ValueError("The give adjacency matrix is not connected.")
39
-
40
- # Initial setup of k medoids
41
- M = _setup_k_medoids(distances, n_clusters)
42
-
43
- M.adjacency = adjacency
44
-
45
- # Add constraintlist for the cuts later added
46
- M.cuts = pyomo.ConstraintList()
47
-
48
- # Loop over the relaxed k-medoids problem and add cuts until the problem fits
49
- _all_cluster_connected = False
50
- _iter = 0
51
- _cuts_added = []
52
- while not _all_cluster_connected and _iter < max_iter:
53
- # first solve instance
54
- t_presolve = time.time()
55
- print(str(_iter) + " iteration: Solving instance")
56
- r_x, r_y, obj = _solve_given_pyomo_model(M, solver=solver)
57
- t_aftersolve = time.time()
58
- print(
59
- "Total distance: "
60
- + str(obj)
61
- + " with solving time: "
62
- + str(t_aftersolve - t_presolve)
63
- )
64
-
65
- candidates, labels = np.where(r_x == 1)
66
- # claim that the resulting clusters are connected
67
- _all_cluster_connected = True
68
- _new_cuts_added = []
69
- for label in np.unique(labels):
70
- # extract the cluster
71
- cluster = G.subgraph(np.where(labels == label)[0])
72
- # Identify if the cluster is contineous, instead of validating the constraints such as Validi and Oehrlein.
73
- if not nx.is_connected(cluster):
74
- _all_cluster_connected = False
75
- # if not add contiguity constraints based on c-v (Oehrlein) o a-b (Validi) separators
76
- for candidate in cluster.nodes:
77
- # It is not clear in Validi and Oehrlein, if cuts between all cluster candidates or just the center and the candidates shall be made. The latter one does not converge for the test system wherefore the first one is chosen.
78
- for node in cluster.nodes:
79
- # different to Validi et al. (2021) and Oehrlein and Haunert (2017), check first and just add continuity constraints for the not connected candidates to increase performance
80
- if nx.node_connectivity(cluster, node, candidate) == 0:
81
- # check that the cut was not added so far for the cluster
82
- if (label, candidate, node) not in _cuts_added:
83
- # include the cut in the cut list
84
- _new_cuts_added.append((label, candidate, node))
85
- # Cuts to Separators - Appendix A Minimum-weight vertex separators (Oehrlein and Haunert, 2017)
86
- # Validi uses an own cut generator and Oehrlein falls back to a Java library, here we use simple max flow cutting
87
- # TODO: Check performance for large networks
88
- cut_set = nx.minimum_node_cut(G, node, candidate)
89
- # (Eq. 13 - Oehrlein and Haunert, 2017)
90
- M.cuts.add(
91
- sum(M.z[u, node] for u in cut_set)
92
- >= M.z[candidate, node]
93
- )
94
- else:
95
- raise ValueError(
96
- "Minimal cluster,candidate separation/minimum cut does not seem sufficient. Adding additional separators is could help."
97
- )
98
- # Total cuts
99
- _cuts_added.extend(_new_cuts_added)
100
- _iter += 1
101
- t_afteradding = time.time()
102
-
103
- print(
104
- str(len(_new_cuts_added))
105
- + " contiguity constraints/cuts added, adding to a total number of "
106
- + str(len(_cuts_added))
107
- + " cuts within time: "
108
- + str(t_afteradding - t_aftersolve)
109
- )
110
-
111
- labels = np.where(r_x == 1)
112
-
113
- return (r_y, r_x.T, obj)
114
-
115
-
116
- def _contiguity_to_graph(adjacency, distances=None):
117
- """Transforms a adjacency matrix to a networkx.Graph
118
-
119
- Args:
120
- adjacency (np.ndarray): 2-diimensional adjacency matrix
121
- distances (np.ndarray, optional): If provided, delivers the distances between the nodes. Defaults to None.
122
-
123
- Returns:
124
- nx.Graph: Graph with every index as node name.
125
- """
126
- rows, cols = np.where(adjacency == 1)
127
- G = nx.Graph()
128
- if distances is None:
129
- edges = zip(rows.tolist(), cols.tolist())
130
- G.add_edges_from(edges)
131
- else:
132
- normed_distances = distances / np.max(distances)
133
- weights = 1 - normed_distances
134
- if np.any(weights < 0) or np.any(weights > 1):
135
- raise ValueError("Weight calculation went wrong.")
136
-
137
- edge_weights = weights[rows, cols]
138
- edges = zip(rows.tolist(), cols.tolist(), edge_weights.tolist())
139
- G.add_weighted_edges_from(edges)
140
- return G
1
+ import time
2
+
3
+ import numpy as np
4
+
5
+ # switch to numpy 2.0 (restore deprecated aliases for backward compatibility)
6
+ np.float_ = np.float64 # type: ignore[attr-defined]
7
+ np.complex_ = np.complex128 # type: ignore[attr-defined]
8
+
9
+ import networkx as nx
10
+ import pyomo.environ as pyomo
11
+
12
+ from tsam.utils.k_medoids_exact import (
13
+ _setup_k_medoids,
14
+ _solve_given_pyomo_model,
15
+ )
16
+
17
+ # class KMedoids_contiguity(KMedoids):
18
+
19
+
20
+ def k_medoids_contiguity(
21
+ distances, n_clusters, adjacency, max_iter=500, solver="highs"
22
+ ):
23
+ """Declares a k-medoids model and iteratively adds cutting planes to hold on adjacency/contiguity
24
+
25
+ The algorithm is based on: Oehrlein and Hauner (2017): A cutting-plane method for adjacency-constrained spatial aggregation
26
+ """
27
+ # First transform the network to a networkx instance which is required for cut generation
28
+ G = _contiguity_to_graph(adjacency, distances=distances)
29
+
30
+ # check if inputs are correct
31
+ assert np.size(distances) == np.size(adjacency), (
32
+ "distances and adjacency must have the same size"
33
+ )
34
+
35
+ # and test for connectivity
36
+ if not nx.is_connected(G):
37
+ raise ValueError("The give adjacency matrix is not connected.")
38
+
39
+ # Initial setup of k medoids
40
+ M = _setup_k_medoids(distances, n_clusters)
41
+
42
+ M.adjacency = adjacency
43
+
44
+ # Add constraintlist for the cuts later added
45
+ M.cuts = pyomo.ConstraintList()
46
+
47
+ # Loop over the relaxed k-medoids problem and add cuts until the problem fits
48
+ _all_cluster_connected = False
49
+ _iter = 0
50
+ _cuts_added = []
51
+ while not _all_cluster_connected and _iter < max_iter:
52
+ # first solve instance
53
+ t_presolve = time.time()
54
+ print(str(_iter) + " iteration: Solving instance")
55
+ r_x, r_y, obj = _solve_given_pyomo_model(M, solver=solver)
56
+ t_aftersolve = time.time()
57
+ print(
58
+ "Total distance: "
59
+ + str(obj)
60
+ + " with solving time: "
61
+ + str(t_aftersolve - t_presolve)
62
+ )
63
+
64
+ _candidates, labels = np.where(r_x == 1)
65
+ # claim that the resulting clusters are connected
66
+ _all_cluster_connected = True
67
+ _new_cuts_added = []
68
+ for label in np.unique(labels):
69
+ # extract the cluster
70
+ cluster = G.subgraph(np.where(labels == label)[0])
71
+ # Identify if the cluster is contineous, instead of validating the constraints such as Validi and Oehrlein.
72
+ if not nx.is_connected(cluster):
73
+ _all_cluster_connected = False
74
+ # if not add contiguity constraints based on c-v (Oehrlein) o a-b (Validi) separators
75
+ for candidate in cluster.nodes:
76
+ # It is not clear in Validi and Oehrlein, if cuts between all cluster candidates or just the center and the candidates shall be made. The latter one does not converge for the test system wherefore the first one is chosen.
77
+ for node in cluster.nodes:
78
+ # different to Validi et al. (2021) and Oehrlein and Haunert (2017), check first and just add continuity constraints for the not connected candidates to increase performance
79
+ if nx.node_connectivity(cluster, node, candidate) == 0:
80
+ # check that the cut was not added so far for the cluster
81
+ if (label, candidate, node) not in _cuts_added:
82
+ # include the cut in the cut list
83
+ _new_cuts_added.append((label, candidate, node))
84
+ # Cuts to Separators - Appendix A Minimum-weight vertex separators (Oehrlein and Haunert, 2017)
85
+ # Validi uses an own cut generator and Oehrlein falls back to a Java library, here we use simple max flow cutting
86
+ # TODO: Check performance for large networks
87
+ cut_set = nx.minimum_node_cut(G, node, candidate)
88
+ # (Eq. 13 - Oehrlein and Haunert, 2017)
89
+ M.cuts.add(
90
+ sum(M.z[u, node] for u in cut_set)
91
+ >= M.z[candidate, node]
92
+ )
93
+ else:
94
+ raise ValueError(
95
+ "Minimal cluster,candidate separation/minimum cut does not seem sufficient. Adding additional separators is could help."
96
+ )
97
+ # Total cuts
98
+ _cuts_added.extend(_new_cuts_added)
99
+ _iter += 1
100
+ t_afteradding = time.time()
101
+
102
+ print(
103
+ str(len(_new_cuts_added))
104
+ + " contiguity constraints/cuts added, adding to a total number of "
105
+ + str(len(_cuts_added))
106
+ + " cuts within time: "
107
+ + str(t_afteradding - t_aftersolve)
108
+ )
109
+
110
+ labels = np.where(r_x == 1)
111
+
112
+ return (r_y, r_x.T, obj)
113
+
114
+
115
+ def _contiguity_to_graph(adjacency, distances=None):
116
+ """Transforms a adjacency matrix to a networkx.Graph
117
+
118
+ Args:
119
+ adjacency (np.ndarray): 2-diimensional adjacency matrix
120
+ distances (np.ndarray, optional): If provided, delivers the distances between the nodes. Defaults to None.
121
+
122
+ Returns:
123
+ nx.Graph: Graph with every index as node name.
124
+ """
125
+ rows, cols = np.where(adjacency == 1)
126
+ G = nx.Graph()
127
+ if distances is None:
128
+ edges = zip(rows.tolist(), cols.tolist())
129
+ G.add_edges_from(edges)
130
+ else:
131
+ normed_distances = distances / np.max(distances)
132
+ weights = 1 - normed_distances
133
+ if np.any(weights < 0) or np.any(weights > 1):
134
+ raise ValueError("Weight calculation went wrong.")
135
+
136
+ edge_weights = weights[rows, cols]
137
+ edges = zip(rows.tolist(), cols.tolist(), edge_weights.tolist())
138
+ G.add_weighted_edges_from(edges)
139
+ return G