ugtm 2.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ugtm/__init__.py ADDED
@@ -0,0 +1,25 @@
1
+ """ugtm: a python package for Generative Topographic Mapping (GTM)
2
+
3
+ Submodules
4
+ ==========
5
+
6
+ .. autosummary::
7
+ :toctree: _autosummary
8
+
9
+ ugtm_sklearn
10
+ ugtm_gtm
11
+ ugtm_kgtm
12
+ ugtm_classes
13
+ ugtm_landscape
14
+ ugtm_predictions
15
+ ugtm_crossvalidate
16
+ ugtm_preprocess
17
+ """
18
+
19
+ from .ugtm_landscape import *
20
+ from .ugtm_gtm import *
21
+ from .ugtm_kgtm import *
22
+ from .ugtm_predictions import *
23
+ from .ugtm_crossvalidate import *
24
+ from .ugtm_preprocess import *
25
+ from .ugtm_sklearn import *
ugtm/ugtm_classes.py ADDED
@@ -0,0 +1,287 @@
1
+ """Defines classes for initial and optimized GTM model.
2
+ """
3
+ # Authors: Helena A. Gaspar <hagax8@gmail.com>
4
+ # License: MIT
5
+
6
+ from __future__ import print_function
7
+ import numpy as np
8
+
9
+
10
+ class ReturnU(object):
11
+ def __init__(self, matU, betaInv):
12
+ self.matU = matU
13
+ self.betaInv = betaInv
14
+
15
+
16
+ class InitialGTM(object):
17
+ r"""Class for initial GTM model.
18
+
19
+ Arguments
20
+ ----------
21
+ matX : array of shape (n_nodes, 2)
22
+ Coordinates of nodes defining a grid in the 2D space.
23
+ matM : array of shape (n_rbf_centers, 2)
24
+ Coordinates of radial basis function (RBF) centers,
25
+ defining a grid in the 2D space.
26
+ n_nodes : int
27
+ The number of nodes defining a grid in the 2D space.
28
+ n_rbf_centers : int
29
+ The number of radial basis function (RBF) centers.
30
+ rbfWidth : float
31
+ Initial radial basis function (RBF) width.
32
+ This is set to the average of the minimum distance between RBF centers:
33
+ :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
34
+ where :math:`sigma` is the GTM hyperparameter s.
35
+ NB: if GTM hyperparameter s = 0 (not recommended),
36
+ rbfWidth is set to the maximum distance between RBF centers.
37
+ matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
38
+ RBF matrix plus one dimension to include a term for bias.
39
+ matW: array of shape (n_dimensions, n_rbf_centers+1)
40
+ Parameter matrix (PCA-initialized).
41
+ matY: array of shape (n_dimensions, n_nodes)
42
+ Manifold in n-dimensional space (projection of matX in data space);
43
+ A point matY[:,i] is a center of Gaussian component in data space.
44
+ :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
45
+ betaInv: float
46
+ Noise variance parameter for the data distribution.
47
+ Written as :math:`\beta^{-1}` in the original paper.
48
+ Initialized to be the larger between:
49
+ (1) the 3rd eigenvalue of the data covariance matrix,
50
+ (2) half the average distance between Gaussian component centers
51
+ in the data space (matY matrix).
52
+ n_dimensions: int
53
+ Data space dimensionality (number of variables).
54
+ """
55
+
56
+ def __init__(self, matX, matM, n_nodes, n_rbf_centers, rbfWidth,
57
+ matPhiMPlusOne, matW, matY, betaInv, n_dimensions):
58
+ r"""Constructor for InitialGTM class.
59
+
60
+ Parameters
61
+ ----------
62
+ matX : array of shape (n_nodes, 2)
63
+ Coordinates of nodes defining a grid in the 2D space.
64
+ matM : array of shape (n_rbf_centers, 2)
65
+ Coordinates of radial basis function (RBF) centers,
66
+ defining a grid in the 2D space.
67
+ n_nodes : int
68
+ The number of nodes defining a grid in the 2D space.
69
+ n_rbf_centers : int
70
+ The number of radial basis function (RBF) centers.
71
+ rbfWidth : float
72
+ Initial radial basis function (RBF) width.
73
+ This is set to the average of the minimum distance between RBF centers:
74
+ :math:`rbfWidth=\sigma \times average(\mathbf{distances(rbf)}_{min})`,
75
+ where :math:`sigma` is the GTM hyperparameter s.
76
+ NB: if GTM hyperparameter s = 0 (not recommended),
77
+ rbfWidth is set to the maximum distance between RBF centers.
78
+ matPhiMPlusOne: array of shape (n_nodes, n_rbf_centers+1)
79
+ RBF matrix plus one dimension to include a term for bias.
80
+ matW: array of shape (n_dimensions, n_rbf_centers+1)
81
+ Parameter matrix (PCA-initialized).
82
+ matY: array of shape (n_dimensions, n_nodes)
83
+ Manifold in n-dimensional space (projection of matX in data space);
84
+ A point matY[:,i] is a Gaussian component center in data space.
85
+ :math:`\mathbf{Y}=\mathbf{W}\mathbf{\Phi}^T`
86
+ betaInv: float
87
+ Noise variance parameter for the data distribution.
88
+ Written as :math:`\beta^{-1}` in the original paper.
89
+ Initialized to be the larger between:
90
+ (1) the 3rd eigenvalue of the data covariance matrix,
91
+ (2) half the average distance between Gaussian component centers
92
+ in the data space (matY matrix).
93
+ n_dimensions: int
94
+ Data space dimensionality (number of variables).
95
+ """
96
+ self.matX = matX
97
+ self.matM = matM
98
+ self.n_rbf_centers = n_rbf_centers
99
+ self.n_nodes = n_nodes
100
+ self.rbfWidth = rbfWidth
101
+ self.matPhiMPlusOne = matPhiMPlusOne
102
+ self.matW = matW
103
+ self.matY = matY
104
+ self.betaInv = betaInv
105
+ self.n_dimensions = n_dimensions
106
+
107
+
108
+ class OptimizedGTM(object):
109
+ r"""Class for optimized GTM model.
110
+
111
+ Attributes
112
+ ----------
113
+ matX : array of shape (n_nodes, 2)
114
+ Coordinates of nodes defining a grid in the 2D space.
115
+ matW : array of shape (n_dimensions, n_rbf_centers+1)
116
+ Parameter matrix (PCA-initialized).
117
+ matY : array of shape (n_dimensions, n_nodes)
118
+ Manifold in n-dimensional space (projection of matX in data space).
119
+ matY = np.dot(matW, np.transpose(matPhiMPlusOne))
120
+ matP : array of shape (n_individuals, n_nodes)
121
+ Data distribution with variance betaInv.
122
+ matR : array of shape (n_individuals, n_nodes)
123
+ Responsibilities (posterior probabilities),
124
+ used to compute data representations:
125
+ means (matMeans) and modes (matModes).
126
+ Responsibilities are the main output of GTM.
127
+ matR[i,:] represents the responsibility vector for an instance i.
128
+ The columns in matR correspond to rows in matX (nodes).
129
+ betaInv: float
130
+ Noise variance parameter for the data distribution.
131
+ Written as :math:`\beta^{-1}` in the original paper.
132
+ matMeans : array of shape (n_individuals, 2)
133
+ Data representation in 2D space: means (most commonly used for GTM).
134
+ matModes : array of shape(n_individuals, 2)
135
+ Data representation in 2D space: modes
136
+ (for each instance, coordinate with highest responsibility).
137
+ n_dimensions : int
138
+ Data space dimensionality (number of variables).
139
+ converged : bool
140
+ True if the model has converged; otherwise False.
141
+ """
142
+
143
+ def __init__(self, matW, matY, matP, matR, betaInv, matMeans,
144
+ matModes, matX, n_dimensions, converged):
145
+ r"""Constructor for OptimizedGTM class.
146
+
147
+ Parameters
148
+ ----------
149
+ matX : array of shape (n_nodes, 2)
150
+ Coordinates of nodes defining a grid in the 2D space.
151
+ matW : array of shape (n_dimensions, n_rbf_centers+1)
152
+ Parameter matrix (PCA-initialized).
153
+ matY : array of shape (n_dimensions, n_nodes)
154
+ Manifold in n-dimensional space (projection of matX in data space).
155
+ matY = np.dot(matW, np.transpose(matPhiMPlusOne))
156
+ matP : array of shape (n_individuals, n_nodes)
157
+ Data distribution with variance betaInv.
158
+ matR : array of shape (n_individuals, n_nodes)
159
+ Responsibilities (posterior probabilities),
160
+ used to compute data representations:
161
+ means (matMeans) and modes (matModes).
162
+ Responsibilities are the main output of GTM.
163
+ matR[i,:] represents the responsibility vector for an instance i.
164
+ The columns in matR correspond to rows in matX (nodes).
165
+ betaInv: float
166
+ Noise variance parameter for the data distribution.
167
+ Written as :math:`\beta^{-1}` in the original paper.
168
+ matMeans : array of shape (n_individuals, 2)
169
+ Data representation in 2D space: means (most commonly used for GTM).
170
+ matModes : array of shape(n_individuals, 2)
171
+ Data representation in 2D space: modes
172
+ (for each instance, coordinate with highest responsibility).
173
+ n_dimensions : int
174
+ Data space dimensionality (number of variables).
175
+ converged : bool
176
+ True if the model has converged; otherwise False.
177
+ """
178
+ self.matW = matW
179
+ self.matY = matY
180
+ self.matP = matP
181
+ self.matR = matR
182
+ self.betaInv = betaInv
183
+ self.matMeans = matMeans
184
+ self.matModes = matModes
185
+ self.matX = matX
186
+ self.n_dimensions = n_dimensions
187
+ self.converged = converged
188
+
189
+ def write(self, output="output"):
190
+ """Write optimized GTM model: means, modes and responsibilities.
191
+
192
+ Parameters
193
+ ----------
194
+ output : str, optional (default = 'output')
195
+ Output path.
196
+
197
+ Returns
198
+ -------
199
+ CSV files
200
+ Separate files for (1) means (mean position for each data point),
201
+ (2) modes (node with max. responsibility for each data point),
202
+ (3) responsibilities (posterior probabilities for each data point)
203
+ """
204
+ np.savetxt(fname=output+"_responsibilities.csv",
205
+ X=self.matR, delimiter=",")
206
+ np.savetxt(fname=output+"_coordinates.csv",
207
+ X=self.matMeans, delimiter=",")
208
+ np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
209
+ print("")
210
+ print("Wrote to disk:")
211
+ print("")
212
+ print("%s: responsibilities, which represent "
213
+ "each individual's encoding "
214
+ "on the map (dimensions=n_individuals*n_nodes_on_the_map)"
215
+ % (output+"_responsibilities.csv"))
216
+ print("")
217
+ print("%s: coordinates to plot, which represent each individual's "
218
+ "mean position on the map (dimensions = "
219
+ "n_individuals*n_latent_dimensions)"
220
+ % (output+"_coordinates.csv"))
221
+ print("")
222
+ print("%s: modes positions for each individual on the map "
223
+ "(node with max probability for the individual; "
224
+ "dimensions = n_individuals*n_latent_dimensions)"
225
+ % (output+"_modes.csv"))
226
+ print("")
227
+ print("")
228
+
229
+ def write_all(self, output="output"):
230
+ """Write optimized GTM model and optimized parameters.
231
+
232
+ Parameters
233
+ ----------
234
+ output : str, optional (default = 'output')
235
+ Output path.
236
+
237
+ Returns
238
+ -------
239
+ CSV files
240
+ Separate files for (1) means (mean position for each data point),
241
+ (2) modes (node with max. responsibility for each data point),
242
+ (3) responsibilities (posterior probabilities for each data point),
243
+ (4) initial space dimension and data distribution variance,
244
+ (5) manifold coordinates (matY),
245
+ (6) parameter matrix (matW)
246
+ """
247
+ outparams = "n_dimensions:"+str(self.n_dimensions) + \
248
+ "\n"+"variance:"+str(self.betaInv)
249
+ np.savetxt(fname=output+"_responsibilities.csv",
250
+ X=self.matR, delimiter=",")
251
+ np.savetxt(fname=output+"_coordinates.csv",
252
+ X=self.matMeans, delimiter=",")
253
+ np.savetxt(fname=output+"_modes.csv", X=self.matModes, delimiter=",")
254
+ np.savetxt(fname=output+"_manifold.csv", X=self.matY, delimiter=",")
255
+ np.savetxt(fname=output+"_parametersMatrix.csv",
256
+ X=self.matW, delimiter=",")
257
+ np.savetxt(fname=output+"_dimensionsAndVariance.csv", X=outparams)
258
+ print("")
259
+ print("Wrote to disk:")
260
+ print("")
261
+ print("%s: responsibilities, which represent "
262
+ "each individual's encoding on the map "
263
+ "(dimensions=n_individuals*n_nodes_on_the_map)"
264
+ % (output+"_responsibilities.csv"))
265
+ print("")
266
+ print("%s: coordinates to plot, which represent each individual's "
267
+ "mean position on the map "
268
+ "(dimensions = n_individuals*n_latent_dimensions)"
269
+ % (output+"_coordinates.csv"))
270
+ print("")
271
+ print("%s: modes positions for each individual on the map "
272
+ "(node with max probability for the individual; "
273
+ "dimensions = n_individuals*n_latent_dimensions)"
274
+ % (output+"_modes.csv"))
275
+ print("")
276
+ print("%s: manifold coordinates in the initial data space "
277
+ "(dimensions: n_data_dimensions*n_points_on_manifold"
278
+ % (output+"_manifold.csv"))
279
+ print("")
280
+ print("%s: parameters matrix"
281
+ % (output+"_parametersMatrix.csv"))
282
+ print("")
283
+ print("%s: initial space and variance"
284
+ % (output+"_dimensionsAndVariance.csv"))
285
+ print("")
286
+ print("")
287
+