ugtm 2.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ugtm/__init__.py +25 -0
- ugtm/ugtm_classes.py +287 -0
- ugtm/ugtm_core.py +536 -0
- ugtm/ugtm_crossvalidate.py +1106 -0
- ugtm/ugtm_gtm.py +427 -0
- ugtm/ugtm_kgtm.py +227 -0
- ugtm/ugtm_landscape.py +166 -0
- ugtm/ugtm_predictions.py +678 -0
- ugtm/ugtm_preprocess.py +155 -0
- ugtm/ugtm_sklearn.py +673 -0
- ugtm-2.2.0.dist-info/METADATA +100 -0
- ugtm-2.2.0.dist-info/RECORD +15 -0
- ugtm-2.2.0.dist-info/WHEEL +5 -0
- ugtm-2.2.0.dist-info/licenses/LICENSE.txt +7 -0
- ugtm-2.2.0.dist-info/top_level.txt +1 -0
ugtm/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""ugtm: a python package for Generative Topographic Mapping (GTM)
|
|
2
|
+
|
|
3
|
+
Submodules
|
|
4
|
+
==========
|
|
5
|
+
|
|
6
|
+
.. autosummary::
|
|
7
|
+
:toctree: _autosummary
|
|
8
|
+
|
|
9
|
+
ugtm_sklearn
|
|
10
|
+
ugtm_gtm
|
|
11
|
+
ugtm_kgtm
|
|
12
|
+
ugtm_classes
|
|
13
|
+
ugtm_landscape
|
|
14
|
+
ugtm_predictions
|
|
15
|
+
ugtm_crossvalidate
|
|
16
|
+
ugtm_preprocess
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from .ugtm_landscape import *
|
|
20
|
+
from .ugtm_gtm import *
|
|
21
|
+
from .ugtm_kgtm import *
|
|
22
|
+
from .ugtm_predictions import *
|
|
23
|
+
from .ugtm_crossvalidate import *
|
|
24
|
+
from .ugtm_preprocess import *
|
|
25
|
+
from .ugtm_sklearn import *
|
ugtm/ugtm_classes.py
ADDED
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
"""Defines classes for initial and optimized GTM model.
|
|
2
|
+
"""
|
|
3
|
+
# Authors: Helena A. Gaspar <hagax8@gmail.com>
|
|
4
|
+
# License: MIT
|
|
5
|
+
|
|
6
|
+
from __future__ import print_function
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ReturnU(object):
|
|
11
|
+
def __init__(self, matU, betaInv):
|
|
12
|
+
self.matU = matU
|
|
13
|
+
self.betaInv = betaInv
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InitialGTM(object):
|
|
17
|
+
r"""Class for initial GTM model.
|
|
18
|
+
|
|
19
|
+
Arguments
|
|
20
|
+
----------
|
|
21
|
+
matX : array of shape (n_nodes, 2)
|
|
22
|
+
Coordinates of nodes defining a grid in the 2D space.
|
|
23
|
+
matM : array of shape (n_rbf_centers, 2)
|
|
24
|
+
Coordinates of radial basis function (RBF) centers,
|
|
25
|
+
defining a grid in the 2D space.
|
|
26
|
+
n_nodes : int
|
|
27
|
+
The number of nodes defining a grid in the 2D space.
|
|
28
|
+
n_rbf_centers : int
|
|
29
|
+
The number of radial basis function (RBF) centers.
|
|
30
|
+
rbfWidth : float
|
|
31
|
+
Initial radial basis function (RBF) width.
|
|
32
|
+
This is set to the average of the minimum distance between RBF centers:
|
|
33
|
+
:math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
|
|
34
|
+
where :math:`sigma` is the GTM hyperparameter s.
|
|
35
|
+
NB: if GTM hyperparameter s = 0 (not recommended),
|
|
36
|
+
rbfWidth is set to the maximum distance between RBF centers.
|
|
37
|
+
matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
|
|
38
|
+
RBF matrix plus one dimension to include a term for bias.
|
|
39
|
+
matW: array of shape (n_dimensions, n_rbf_centers+1)
|
|
40
|
+
Parameter matrix (PCA-initialized).
|
|
41
|
+
matY: array of shape (n_dimensions, n_nodes)
|
|
42
|
+
Manifold in n-dimensional space (projection of matX in data space);
|
|
43
|
+
A point matY[:,i] is a center of Gaussian component in data space.
|
|
44
|
+
:math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
|
|
45
|
+
betaInv: float
|
|
46
|
+
Noise variance parameter for the data distribution.
|
|
47
|
+
Written as :math:`\beta^{-1}` in the original paper.
|
|
48
|
+
Initialized to be the larger between:
|
|
49
|
+
(1) the 3rd eigenvalue of the data covariance matrix,
|
|
50
|
+
(2) half the average distance between Gaussian component centers
|
|
51
|
+
in the data space (matY matrix).
|
|
52
|
+
n_dimensions: int
|
|
53
|
+
Data space dimensionality (number of variables).
|
|
54
|
+
"""
|
|
55
|
+
|
|
56
|
+
def __init__(self, matX, matM, n_nodes, n_rbf_centers, rbfWidth,
|
|
57
|
+
matPhiMPlusOne, matW, matY, betaInv, n_dimensions):
|
|
58
|
+
r"""Constructor for InitialGTM class.
|
|
59
|
+
|
|
60
|
+
Parameters
|
|
61
|
+
----------
|
|
62
|
+
matX : array of shape (n_nodes, 2)
|
|
63
|
+
Coordinates of nodes defining a grid in the 2D space.
|
|
64
|
+
matM : array of shape (n_rbf_centers, 2)
|
|
65
|
+
Coordinates of radial basis function (RBF) centers,
|
|
66
|
+
defining a grid in the 2D space.
|
|
67
|
+
n_nodes : int
|
|
68
|
+
The number of nodes defining a grid in the 2D space.
|
|
69
|
+
n_rbf_centers : int
|
|
70
|
+
The number of radial basis function (RBF) centers.
|
|
71
|
+
rbfWidth : float
|
|
72
|
+
Initial radial basis function (RBF) width.
|
|
73
|
+
This is set to the average of the minimum distance between RBF centers:
|
|
74
|
+
:math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
|
|
75
|
+
where :math:`sigma` is the GTM hyperparameter s.
|
|
76
|
+
NB: if GTM hyperparameter s = 0 (not recommended),
|
|
77
|
+
rbfWidth is set to the maximum distance between RBF centers.
|
|
78
|
+
matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
|
|
79
|
+
RBF matrix plus one dimension to include a term for bias.
|
|
80
|
+
matW: array of shape (n_dimensions, n_rbf_centers+1)
|
|
81
|
+
Parameter matrix (PCA-initialized).
|
|
82
|
+
matY: array of shape (n_dimensions, n_nodes)
|
|
83
|
+
Manifold in n-dimensional space (projection of matX in data space);
|
|
84
|
+
A point matY[:,i] is a Gaussian component center in data space.
|
|
85
|
+
:math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
|
|
86
|
+
betaInv: float
|
|
87
|
+
Noise variance parameter for the data distribution.
|
|
88
|
+
Written as :math:`\beta^{-1}` in the original paper.
|
|
89
|
+
Initialized to be the larger between:
|
|
90
|
+
(1) the 3rd eigenvalue of the data covariance matrix,
|
|
91
|
+
(2) half the average distance between Gaussian component centers
|
|
92
|
+
in the data space (matY matrix).
|
|
93
|
+
n_dimensions: int
|
|
94
|
+
Data space dimensionality (number of variables).
|
|
95
|
+
"""
|
|
96
|
+
self.matX = matX
|
|
97
|
+
self.matM = matM
|
|
98
|
+
self.n_rbf_centers = n_rbf_centers
|
|
99
|
+
self.n_nodes = n_nodes
|
|
100
|
+
self.rbfWidth = rbfWidth
|
|
101
|
+
self.matPhiMPlusOne = matPhiMPlusOne
|
|
102
|
+
self.matW = matW
|
|
103
|
+
self.matY = matY
|
|
104
|
+
self.betaInv = betaInv
|
|
105
|
+
self.n_dimensions = n_dimensions
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class OptimizedGTM(object):
|
|
109
|
+
r"""Class for optimized GTM model.
|
|
110
|
+
|
|
111
|
+
Attributes
|
|
112
|
+
----------
|
|
113
|
+
matX : array of shape (n_nodes, 2)
|
|
114
|
+
Coordinates of nodes defining a grid in the 2D space.
|
|
115
|
+
matW : array of shape (n_dimensions, n_rbf_centers+1)
|
|
116
|
+
Parameter matrix (PCA-initialized).
|
|
117
|
+
matY : array of shape (n_dimensions, n_nodes)
|
|
118
|
+
Manifold in n-dimensional space (projection of matX in data space).
|
|
119
|
+
matY = np.dot(matW, np.transpose(matPhiMPlusOne))
|
|
120
|
+
matP : array of shape (n_individuals, n_nodes)
|
|
121
|
+
Data distribution with variance betaInv.
|
|
122
|
+
matR : array of shape (n_individuals, n_nodes)
|
|
123
|
+
Responsibilities (posterior probabilities),
|
|
124
|
+
used to compute data representations:
|
|
125
|
+
means (matMeans) and modes (matModes).
|
|
126
|
+
Responsibilities are the main output of GTM.
|
|
127
|
+
matR[i,:] represents the responsibility vector for an instance i.
|
|
128
|
+
The columns in matR correspond to rows in matX (nodes).
|
|
129
|
+
betaInv: float
|
|
130
|
+
Noise variance parameter for the data distribution.
|
|
131
|
+
Written as :math:`\beta^{-1}` in the original paper.
|
|
132
|
+
matMeans : array of shape (n_individuals, 2)
|
|
133
|
+
Data representation in 2D space: means (most commonly used for GTM).
|
|
134
|
+
matModes : array of shape(n_individuals, 2)
|
|
135
|
+
Data representation in 2D space: modes
|
|
136
|
+
(for each instance, coordinate with highest responsibility).
|
|
137
|
+
n_dimensions : int
|
|
138
|
+
Data space dimensionality (number of variables).
|
|
139
|
+
converged : bool
|
|
140
|
+
True if the model has converged; otherwise False.
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
def __init__(self, matW, matY, matP, matR, betaInv, matMeans,
|
|
144
|
+
matModes, matX, n_dimensions, converged):
|
|
145
|
+
r"""Constructor for OptimizedGTM class.
|
|
146
|
+
|
|
147
|
+
Parameters
|
|
148
|
+
----------
|
|
149
|
+
matX : array of shape (n_nodes, 2)
|
|
150
|
+
Coordinates of nodes defining a grid in the 2D space.
|
|
151
|
+
matW : array of shape (n_dimensions, n_rbf_centers+1)
|
|
152
|
+
Parameter matrix (PCA-initialized).
|
|
153
|
+
matY : array of shape (n_dimensions, n_nodes)
|
|
154
|
+
Manifold in n-dimensional space (projection of matX in data space).
|
|
155
|
+
matY = np.dot(matW, np.transpose(matPhiMPlusOne))
|
|
156
|
+
matP : array of shape (n_individuals, n_nodes)
|
|
157
|
+
Data distribution with variance betaInv.
|
|
158
|
+
matR : array of shape (n_individuals, n_nodes)
|
|
159
|
+
Responsibilities (posterior probabilities),
|
|
160
|
+
used to compute data representations:
|
|
161
|
+
means (matMeans) and modes (matModes).
|
|
162
|
+
Responsibilities are the main output of GTM.
|
|
163
|
+
matR[i,:] represents the responsibility vector for an instance i.
|
|
164
|
+
The columns in matR correspond to rows in matX (nodes).
|
|
165
|
+
betaInv: float
|
|
166
|
+
Noise variance parameter for the data distribution.
|
|
167
|
+
Written as :math:`\beta^{-1}` in the original paper.
|
|
168
|
+
matMeans : array of shape (n_individuals, 2)
|
|
169
|
+
Data representation in 2D space: means (most commonly used for GTM).
|
|
170
|
+
matModes : array of shape(n_individuals, 2)
|
|
171
|
+
Data representation in 2D space: modes
|
|
172
|
+
(for each instance, coordinate with highest responsibility).
|
|
173
|
+
n_dimensions : int
|
|
174
|
+
Data space dimensionality (number of variables).
|
|
175
|
+
converged : bool
|
|
176
|
+
True if the model has converged; otherwise False.
|
|
177
|
+
"""
|
|
178
|
+
self.matW = matW
|
|
179
|
+
self.matY = matY
|
|
180
|
+
self.matP = matP
|
|
181
|
+
self.matR = matR
|
|
182
|
+
self.betaInv = betaInv
|
|
183
|
+
self.matMeans = matMeans
|
|
184
|
+
self.matModes = matModes
|
|
185
|
+
self.matX = matX
|
|
186
|
+
self.n_dimensions = n_dimensions
|
|
187
|
+
self.converged = converged
|
|
188
|
+
|
|
189
|
+
def write(self, output="output"):
|
|
190
|
+
"""Write optimized GTM model: means, modes and responsibilities.
|
|
191
|
+
|
|
192
|
+
Parameters
|
|
193
|
+
----------
|
|
194
|
+
output : str, optional (default = 'output')
|
|
195
|
+
Output path.
|
|
196
|
+
|
|
197
|
+
Returns
|
|
198
|
+
-------
|
|
199
|
+
CSV files
|
|
200
|
+
Separate files for (1) means (mean position for each data point),
|
|
201
|
+
(2) modes (node with max. responsibility for each data point),
|
|
202
|
+
(3) responsibilities (posterior probabilities for each data point)
|
|
203
|
+
"""
|
|
204
|
+
np.savetxt(fname=output+"_responsibilities.csv",
|
|
205
|
+
X=self.matR, delimiter=",")
|
|
206
|
+
np.savetxt(fname=output+"_coordinates.csv",
|
|
207
|
+
X=self.matMeans, delimiter=",")
|
|
208
|
+
np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
|
|
209
|
+
print("")
|
|
210
|
+
print("Wrote to disk:")
|
|
211
|
+
print("")
|
|
212
|
+
print("%s: responsibilities, which represent "
|
|
213
|
+
"each individual's encoding "
|
|
214
|
+
"on the map (dimensions=n_individuals*n_nodes_on_the_map)"
|
|
215
|
+
% (output+"_responsibilities.csv"))
|
|
216
|
+
print("")
|
|
217
|
+
print("%s: coordinates to plot, which represent each individual's "
|
|
218
|
+
"mean position on the map (dimensions = "
|
|
219
|
+
"n_individuals*n_latent_dimensions)"
|
|
220
|
+
% (output+"_coordinates.csv"))
|
|
221
|
+
print("")
|
|
222
|
+
print("%s: modes positions for each individual on the map "
|
|
223
|
+
"(node with max probability for the individual; "
|
|
224
|
+
"dimensions = n_individuals*n_latent_dimensions)"
|
|
225
|
+
% (output+"_modes.csv"))
|
|
226
|
+
print("")
|
|
227
|
+
print("")
|
|
228
|
+
|
|
229
|
+
def write_all(self, output="output"):
|
|
230
|
+
"""Write optimized GTM model and optimized parameters.
|
|
231
|
+
|
|
232
|
+
Parameters
|
|
233
|
+
----------
|
|
234
|
+
output : str, optional (default = 'output')
|
|
235
|
+
Output path.
|
|
236
|
+
|
|
237
|
+
Returns
|
|
238
|
+
-------
|
|
239
|
+
CSV files
|
|
240
|
+
Separate files for (1) means (mean position for each data point),
|
|
241
|
+
(2) modes (node with max. responsibility for each data point),
|
|
242
|
+
(3) responsibilities (posterior probabilities for each data point),
|
|
243
|
+
(4) initial space dimension and data distribution variance,
|
|
244
|
+
(5) manifold coordinates (matY),
|
|
245
|
+
(6) parameter matrix (matW)
|
|
246
|
+
"""
|
|
247
|
+
outparams = "n_dimensions:"+str(self.n_dimensions) + \
|
|
248
|
+
"\n"+"variance:"+str(self.betaInv)
|
|
249
|
+
np.savetxt(fname=output+"_responsibilities.csv",
|
|
250
|
+
X=self.matR, delimiter=",")
|
|
251
|
+
np.savetxt(fname=output+"_coordinates.csv",
|
|
252
|
+
X=self.matMeans, delimiter=",")
|
|
253
|
+
np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
|
|
254
|
+
np.savetxt(fname=output+"_manifold.csv", X=self.matY, delimiter=",")
|
|
255
|
+
np.savetxt(fname=output+"_parametersMatrix.csv",
|
|
256
|
+
X=self.matW, delimiter=",")
|
|
257
|
+
np.savetxt(fname=output+"_dimensionsAndVariance.csv", X=outparams)
|
|
258
|
+
print("")
|
|
259
|
+
print("Wrote to disk:")
|
|
260
|
+
print("")
|
|
261
|
+
print("%s: responsibilities, which represent "
|
|
262
|
+
"each individual's encoding on the map "
|
|
263
|
+
"(dimensions=n_individuals*n_nodes_on_the_map)"
|
|
264
|
+
% (output+"_responsibilities.csv"))
|
|
265
|
+
print("")
|
|
266
|
+
print("%s: coordinates to plot, which represent each individual's "
|
|
267
|
+
"mean position on the map "
|
|
268
|
+
"(dimensions = n_individuals*n_latent_dimensions)"
|
|
269
|
+
% (output+"_coordinates.csv"))
|
|
270
|
+
print("")
|
|
271
|
+
print("%s: modes positions for each individual on the map "
|
|
272
|
+
"(node with max probability for the individual; "
|
|
273
|
+
"dimensions = n_individuals*n_latent_dimensions)"
|
|
274
|
+
% (output+"_modes.csv"))
|
|
275
|
+
print("")
|
|
276
|
+
print("%s: manifold coordinates in the initial data space "
|
|
277
|
+
"(dimensions: n_data_dimensions*n_points_on_manifold"
|
|
278
|
+
% (output+"_manifold.csv"))
|
|
279
|
+
print("")
|
|
280
|
+
print("%s: parameters matrix"
|
|
281
|
+
% (output+"_parametersMatrix.csv"))
|
|
282
|
+
print("")
|
|
283
|
+
print("%s: initial space and variance"
|
|
284
|
+
% (output+"_dimensionsAndVariance.csv"))
|
|
285
|
+
print("")
|
|
286
|
+
print("")
|
|
287
|
+
|