topologicpy 0.4.8__py3-none-any.whl → 0.4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. topologicpy/Aperture.py +46 -0
  2. topologicpy/Cell.py +1780 -0
  3. topologicpy/CellComplex.py +791 -0
  4. topologicpy/Cluster.py +591 -0
  5. topologicpy/Color.py +157 -0
  6. topologicpy/Context.py +56 -0
  7. topologicpy/DGL.py +2661 -0
  8. topologicpy/Dictionary.py +470 -0
  9. topologicpy/Edge.py +855 -0
  10. topologicpy/EnergyModel.py +1052 -0
  11. topologicpy/Face.py +1810 -0
  12. topologicpy/Graph.py +3526 -0
  13. topologicpy/Graph_Export.py +858 -0
  14. topologicpy/Grid.py +338 -0
  15. topologicpy/Helper.py +182 -0
  16. topologicpy/Honeybee.py +424 -0
  17. topologicpy/Matrix.py +255 -0
  18. topologicpy/Neo4jGraph.py +311 -0
  19. topologicpy/Plotly.py +1396 -0
  20. topologicpy/Polyskel.py +524 -0
  21. topologicpy/Process.py +1368 -0
  22. topologicpy/SQL.py +48 -0
  23. topologicpy/Shell.py +1418 -0
  24. topologicpy/Speckle.py +433 -0
  25. topologicpy/Topology.py +5854 -0
  26. topologicpy/UnitTest.py +29 -0
  27. topologicpy/Vector.py +555 -0
  28. topologicpy/Vertex.py +714 -0
  29. topologicpy/Wire.py +2346 -0
  30. topologicpy/__init__.py +20 -0
  31. topologicpy/bin/linux/topologic/__init__.py +2 -0
  32. topologicpy/bin/linux/topologic/topologic.cpython-310-x86_64-linux-gnu.so +0 -0
  33. topologicpy/bin/linux/topologic/topologic.cpython-311-x86_64-linux-gnu.so +0 -0
  34. topologicpy/bin/linux/topologic/topologic.cpython-38-x86_64-linux-gnu.so +0 -0
  35. topologicpy/bin/linux/topologic/topologic.cpython-39-x86_64-linux-gnu.so +0 -0
  36. topologicpy/bin/linux/topologic.libs/libTKBO-6bdf205d.so.7.7.0 +0 -0
  37. topologicpy/bin/linux/topologic.libs/libTKBRep-2960a069.so.7.7.0 +0 -0
  38. topologicpy/bin/linux/topologic.libs/libTKBool-c44b74bd.so.7.7.0 +0 -0
  39. topologicpy/bin/linux/topologic.libs/libTKFillet-9a670ba0.so.7.7.0 +0 -0
  40. topologicpy/bin/linux/topologic.libs/libTKG2d-8f31849e.so.7.7.0 +0 -0
  41. topologicpy/bin/linux/topologic.libs/libTKG3d-4c6bce57.so.7.7.0 +0 -0
  42. topologicpy/bin/linux/topologic.libs/libTKGeomAlgo-26066fd9.so.7.7.0 +0 -0
  43. topologicpy/bin/linux/topologic.libs/libTKGeomBase-2116cabe.so.7.7.0 +0 -0
  44. topologicpy/bin/linux/topologic.libs/libTKMath-72572fa8.so.7.7.0 +0 -0
  45. topologicpy/bin/linux/topologic.libs/libTKMesh-2a060427.so.7.7.0 +0 -0
  46. topologicpy/bin/linux/topologic.libs/libTKOffset-6cab68ff.so.7.7.0 +0 -0
  47. topologicpy/bin/linux/topologic.libs/libTKPrim-eb1262b3.so.7.7.0 +0 -0
  48. topologicpy/bin/linux/topologic.libs/libTKShHealing-e67e5cc7.so.7.7.0 +0 -0
  49. topologicpy/bin/linux/topologic.libs/libTKTopAlgo-e4c96c33.so.7.7.0 +0 -0
  50. topologicpy/bin/linux/topologic.libs/libTKernel-fb7fe3b7.so.7.7.0 +0 -0
  51. topologicpy/bin/linux/topologic.libs/libgcc_s-32c1665e.so.1 +0 -0
  52. topologicpy/bin/linux/topologic.libs/libstdc++-672d7b41.so.6.0.30 +0 -0
  53. topologicpy/bin/windows/topologic/TKBO-f6b191de.dll +0 -0
  54. topologicpy/bin/windows/topologic/TKBRep-e56a600e.dll +0 -0
  55. topologicpy/bin/windows/topologic/TKBool-7b8d47ae.dll +0 -0
  56. topologicpy/bin/windows/topologic/TKFillet-0ddbf0a8.dll +0 -0
  57. topologicpy/bin/windows/topologic/TKG2d-2e2dee3d.dll +0 -0
  58. topologicpy/bin/windows/topologic/TKG3d-6674513d.dll +0 -0
  59. topologicpy/bin/windows/topologic/TKGeomAlgo-d240e370.dll +0 -0
  60. topologicpy/bin/windows/topologic/TKGeomBase-df87aba5.dll +0 -0
  61. topologicpy/bin/windows/topologic/TKMath-45bd625a.dll +0 -0
  62. topologicpy/bin/windows/topologic/TKMesh-d6e826b1.dll +0 -0
  63. topologicpy/bin/windows/topologic/TKOffset-79b9cc94.dll +0 -0
  64. topologicpy/bin/windows/topologic/TKPrim-aa430a86.dll +0 -0
  65. topologicpy/bin/windows/topologic/TKShHealing-bb48be89.dll +0 -0
  66. topologicpy/bin/windows/topologic/TKTopAlgo-7d0d1e22.dll +0 -0
  67. topologicpy/bin/windows/topologic/TKernel-08c8cfbb.dll +0 -0
  68. topologicpy/bin/windows/topologic/__init__.py +2 -0
  69. topologicpy/bin/windows/topologic/topologic.cp310-win_amd64.pyd +0 -0
  70. topologicpy/bin/windows/topologic/topologic.cp311-win_amd64.pyd +0 -0
  71. topologicpy/bin/windows/topologic/topologic.cp38-win_amd64.pyd +0 -0
  72. topologicpy/bin/windows/topologic/topologic.cp39-win_amd64.pyd +0 -0
  73. {topologicpy-0.4.8.dist-info → topologicpy-0.4.9.dist-info}/METADATA +1 -1
  74. topologicpy-0.4.9.dist-info/RECORD +77 -0
  75. topologicpy-0.4.9.dist-info/top_level.txt +1 -0
  76. topologicpy-0.4.8.dist-info/RECORD +0 -5
  77. topologicpy-0.4.8.dist-info/top_level.txt +0 -1
  78. {topologicpy-0.4.8.dist-info → topologicpy-0.4.9.dist-info}/LICENSE +0 -0
  79. {topologicpy-0.4.8.dist-info → topologicpy-0.4.9.dist-info}/WHEEL +0 -0
topologicpy/DGL.py ADDED
@@ -0,0 +1,2661 @@
1
+
2
+ import topologicpy
3
+ import topologic
4
+ from topologicpy.Dictionary import Dictionary
5
+ import os
6
+ import random
7
+ import time
8
+ from datetime import datetime
9
+ import copy
10
+ import sys
11
+ import subprocess
12
+
13
+ try:
14
+ import numpy as np
15
+ except:
16
+ call = [sys.executable, '-m', 'pip', 'install', 'numpy', '-t', sys.path[0]]
17
+ subprocess.run(call)
18
+ try:
19
+ import numpy as np
20
+ except:
21
+ print("DGL - Error: Could not import numpy.")
22
+ try:
23
+ import pandas as pd
24
+ except:
25
+ call = [sys.executable, '-m', 'pip', 'install', 'pandas', '-t', sys.path[0]]
26
+ subprocess.run(call)
27
+ try:
28
+ import pandas as pd
29
+ except:
30
+ print("DGL - Error: Could not import pandas")
31
+ try:
32
+ import torch
33
+ import torch.nn as nn
34
+ import torch.nn.functional as F
35
+ from torch.utils.data.sampler import SubsetRandomSampler
36
+ from torch.utils.data import DataLoader, ConcatDataset
37
+ except:
38
+ call = [sys.executable, '-m', 'pip', 'install', 'torch', '-t', sys.path[0]]
39
+ subprocess.run(call)
40
+ try:
41
+ import torch
42
+ import torch.nn as nn
43
+ import torch.nn.functional as F
44
+ from torch.utils.data.sampler import SubsetRandomSampler
45
+ from torch.utils.data import DataLoader, ConcatDataset
46
+ except:
47
+ print("DGL - Error: Could not import torch")
48
+ try:
49
+ import dgl
50
+ from dgl.data import DGLDataset
51
+ from dgl.dataloading import GraphDataLoader
52
+ from dgl.nn import GINConv, GraphConv, SAGEConv, TAGConv
53
+ from dgl import save_graphs, load_graphs
54
+ except:
55
+ call = [sys.executable, '-m', 'pip', 'install', 'dgl', 'dglgo', '-f', 'https://data.dgl.ai/wheels/repo.html', '--upgrade', '-t', sys.path[0]]
56
+ subprocess.run(call)
57
+ try:
58
+ import dgl
59
+ from dgl.data import DGLDataset
60
+ from dgl.nn import GraphConv
61
+ from dgl import save_graphs, load_graphs
62
+ except:
63
+ print("DGL - Error: Could not import dgl")
64
+ try:
65
+ import sklearn
66
+ from sklearn.model_selection import KFold
67
+ from sklearn.metrics import accuracy_score
68
+ except:
69
+ call = [sys.executable, '-m', 'pip', 'install', 'scikit-learn', '-t', sys.path[0]]
70
+ subprocess.run(call)
71
+ try:
72
+ import sklearn
73
+ from sklearn.model_selection import KFold
74
+ from sklearn.metrics import accuracy_score
75
+ except:
76
+ print("DGL - Error: Could not import sklearn")
77
+ try:
78
+ from tqdm.auto import tqdm
79
+ except:
80
+ call = [sys.executable, '-m', 'pip', 'install', 'tqdm', '-t', sys.path[0]]
81
+ subprocess.run(call)
82
+ try:
83
+ from tqdm.auto import tqdm
84
+ except:
85
+ print("DGL - Error: Could not import tqdm")
86
+
87
+ class _Dataset(DGLDataset):
88
+ def __init__(self, graphs, labels, node_attr_key):
89
+ super().__init__(name='GraphDGL')
90
+ self.graphs = graphs
91
+ self.labels = torch.LongTensor(labels)
92
+ self.node_attr_key = node_attr_key
93
+ # as all graphs have same length of node features then we get dim_nfeats from first graph in the list
94
+ self.dim_nfeats = graphs[0].ndata[node_attr_key].shape[1]
95
+ # to get the number of classes for graphs
96
+ self.gclasses = len(set(labels))
97
+
98
+ def __getitem__(self, i):
99
+ return self.graphs[i], self.labels[i]
100
+
101
+ def __len__(self):
102
+ return len(self.graphs)
103
+
104
+ class _Hparams:
105
+ def __init__(self, model_type="ClassifierHoldout", optimizer_str="Adam", amsgrad=False, betas=(0.9, 0.999), eps=1e-6, lr=0.001, lr_decay= 0, maximize=False, rho=0.9, weight_decay=0, cv_type="Holdout", split=[0.8,0.1, 0.1], k_folds=5, hl_widths=[32], conv_layer_type='SAGEConv', pooling="AvgPooling", batch_size=32, epochs=1,
106
+ use_gpu=False, loss_function="Cross Entropy"):
107
+ """
108
+ Parameters
109
+ ----------
110
+ cv : str
111
+ A string to define the method of cross-validation
112
+ "Holdout": Holdout
113
+ "K-Fold": K-Fold cross validation
114
+ k_folds : int
115
+ An int value in the range of 2 to X to define the number of k-folds for cross-validation. Default is 5.
116
+ split : list
117
+ A list of three item in the range of 0 to 1 to define the split of train,
118
+ validate, and test data. A default value of [0.8,0.1,0.1] means 80% of data will be
119
+ used for training, 10% will be used for validation, and the remaining 10% will be used for training
120
+ hl_widths : list
121
+ List of hidden neurons for each layer such as [32] will mean
122
+ that there is one hidden layers in the network with 32 neurons
123
+ optimizer : torch.optim object
124
+ This will be the selected optimizer from torch.optim package. By
125
+ default, torch.optim.Adam is selected
126
+ learning_rate : float
127
+ a step value to be used to apply the gradients by optimizer
128
+ batch_size : int
129
+ to define a set of samples to be used for training and testing in
130
+ each step of an epoch
131
+ epochs : int
132
+ An epoch means training the neural network with all the training data for one cycle. In an epoch, we use all of the data exactly once. A forward pass and a backward pass together are counted as one pass
133
+ use_GPU : use the GPU. Otherwise, use the CPU
134
+
135
+ Returns
136
+ -------
137
+ None
138
+
139
+ """
140
+
141
+ self.model_type = model_type
142
+ self.optimizer_str = optimizer_str
143
+ self.amsgrad = amsgrad
144
+ self.betas = betas
145
+ self.eps = eps
146
+ self.lr = lr
147
+ self.lr_decay = lr_decay
148
+ self.maximize = maximize
149
+ self.rho = rho
150
+ self.weight_decay = weight_decay
151
+ self.cv_type = cv_type
152
+ self.split = split
153
+ self.k_folds = k_folds
154
+ self.hl_widths = hl_widths
155
+ self.conv_layer_type = conv_layer_type
156
+ self.pooling = pooling
157
+ self.batch_size = batch_size
158
+ self.epochs = epochs
159
+ self.use_gpu = use_gpu
160
+ self.loss_function = loss_function
161
+
162
+ class _Classic(nn.Module):
163
+ def __init__(self, in_feats, h_feats, num_classes):
164
+ """
165
+
166
+ Parameters
167
+ ----------
168
+ in_feats : int
169
+ Input dimension in the form of integer
170
+ h_feats : list
171
+ List of hidden neurons for each hidden layer
172
+ num_classes : int
173
+ Number of output classes
174
+
175
+ Returns
176
+ -------
177
+ None.
178
+
179
+ """
180
+ super(_Classic, self).__init__()
181
+ assert isinstance(h_feats, list), "h_feats must be a list"
182
+ h_feats = [x for x in h_feats if x is not None]
183
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
184
+ self.list_of_layers = nn.ModuleList()
185
+ dim = [in_feats] + h_feats
186
+ for i in range(1, len(dim)):
187
+ self.list_of_layers.append(GraphConv(dim[i-1], dim[i]))
188
+ self.final = GraphConv(dim[-1], num_classes)
189
+
190
+ def forward(self, g, in_feat):
191
+ h = in_feat
192
+ for i in range(len(self.list_of_layers)):
193
+ h = self.list_of_layers[i](g, h)
194
+ h = F.relu(h)
195
+ h = self.final(g, h)
196
+ g.ndata['h'] = h
197
+ return dgl.mean_nodes(g, 'h')
198
+
199
+ class _ClassicReg(nn.Module):
200
+ def __init__(self, in_feats, h_feats):
201
+ super(_ClassicReg, self).__init__()
202
+ assert isinstance(h_feats, list), "h_feats must be a list"
203
+ h_feats = [x for x in h_feats if x is not None]
204
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
205
+ self.list_of_layers = nn.ModuleList()
206
+ dim = [in_feats] + h_feats
207
+ for i in range(1, len(dim)):
208
+ self.list_of_layers.append(GraphConv(dim[i-1], dim[i]))
209
+ self.final = nn.Linear(dim[-1], 1)
210
+
211
+ def forward(self, g, in_feat):
212
+ h = in_feat
213
+ for i in range(len(self.list_of_layers)):
214
+ h = self.list_of_layers[i](g, h)
215
+ h = F.relu(h)
216
+ h = self.final(h)
217
+ g.ndata['h'] = h
218
+ return dgl.mean_nodes(g, 'h')
219
+
220
+ class _GINConv(nn.Module):
221
+ def __init__(self, in_feats, h_feats, num_classes, pooling):
222
+ super(_GINConv, self).__init__()
223
+ assert isinstance(h_feats, list), "h_feats must be a list"
224
+ h_feats = [x for x in h_feats if x is not None]
225
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
226
+ self.list_of_layers = nn.ModuleList()
227
+ dim = [in_feats] + h_feats
228
+
229
+ # Convolution (Hidden) Layers
230
+ for i in range(1, len(dim)):
231
+ lin = nn.Linear(dim[i-1], dim[i])
232
+ self.list_of_layers.append(GINConv(lin, 'sum'))
233
+
234
+ # Final Layer
235
+ self.final = nn.Linear(dim[-1], num_classes)
236
+
237
+ # Pooling layer
238
+ if pooling.lower() == "avgpooling":
239
+ self.pooling_layer = dgl.nn.AvgPooling()
240
+ elif pooling.lower() == "maxpooling":
241
+ self.pooling_layer = dgl.nn.MaxPooling()
242
+ elif pooling.lower() == "sumpooling":
243
+ self.pooling_layer = dgl.nn.SumPooling()
244
+ else:
245
+ raise NotImplementedError
246
+
247
+ def forward(self, g, in_feat):
248
+ h = in_feat
249
+ # Generate node features
250
+ for i in range(len(self.list_of_layers)): # Aim for 2 about 3 layers
251
+ h = self.list_of_layers[i](g, h)
252
+ h = F.relu(h)
253
+ # h will now be matrix of dimension num_nodes by h_feats[-1]
254
+ h = self.final(h)
255
+ g.ndata['h'] = h
256
+ # Go from node level features to graph level features by pooling
257
+ h = self.pooling_layer(g, h)
258
+ # h will now be vector of dimension num_classes
259
+ return h
260
+
261
+ class _GraphConv(nn.Module):
262
+ def __init__(self, in_feats, h_feats, num_classes, pooling):
263
+ super(_GraphConv, self).__init__()
264
+ assert isinstance(h_feats, list), "h_feats must be a list"
265
+ h_feats = [x for x in h_feats if x is not None]
266
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
267
+ self.list_of_layers = nn.ModuleList()
268
+ dim = [in_feats] + h_feats
269
+
270
+ # Convolution (Hidden) Layers
271
+ for i in range(1, len(dim)):
272
+ self.list_of_layers.append(GraphConv(dim[i-1], dim[i]))
273
+
274
+ # Final Layer
275
+ # Followed example at: https://docs.dgl.ai/tutorials/blitz/5_graph_classification.html#sphx-glr-tutorials-blitz-5-graph-classification-py
276
+ self.final = GraphConv(dim[-1], num_classes)
277
+
278
+ # Pooling layer
279
+ if pooling.lower() == "avgpooling":
280
+ self.pooling_layer = dgl.nn.AvgPooling()
281
+ elif pooling.lower() == "maxpooling":
282
+ self.pooling_layer = dgl.nn.MaxPooling()
283
+ elif pooling.lower() == "sumpooling":
284
+ self.pooling_layer = dgl.nn.SumPooling()
285
+ else:
286
+ raise NotImplementedError
287
+
288
+ def forward(self, g, in_feat):
289
+ h = in_feat
290
+ # Generate node features
291
+ for i in range(len(self.list_of_layers)): # Aim for 2 about 3 layers
292
+ h = self.list_of_layers[i](g, h)
293
+ h = F.relu(h)
294
+ # h will now be matrix of dimension num_nodes by h_feats[-1]
295
+ h = self.final(g,h)
296
+ g.ndata['h'] = h
297
+ # Go from node level features to graph level features by pooling
298
+ h = self.pooling_layer(g, h)
299
+ # h will now be vector of dimension num_classes
300
+ return h
301
+
302
+ class _SAGEConv(nn.Module):
303
+ def __init__(self, in_feats, h_feats, num_classes, pooling):
304
+ super(_SAGEConv, self).__init__()
305
+ assert isinstance(h_feats, list), "h_feats must be a list"
306
+ h_feats = [x for x in h_feats if x is not None]
307
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
308
+ self.list_of_layers = nn.ModuleList()
309
+ dim = [in_feats] + h_feats
310
+
311
+ # Convolution (Hidden) Layers
312
+ for i in range(1, len(dim)):
313
+ self.list_of_layers.append(SAGEConv(dim[i-1], dim[i], aggregator_type='pool'))
314
+
315
+ # Final Layer
316
+ self.final = nn.Linear(dim[-1], num_classes)
317
+
318
+ # Pooling layer
319
+ if pooling.lower() == "avgpooling":
320
+ self.pooling_layer = dgl.nn.AvgPooling()
321
+ elif pooling.lower() == "maxpooling":
322
+ self.pooling_layer = dgl.nn.MaxPooling()
323
+ elif pooling.lower() == "sumpooling":
324
+ self.pooling_layer = dgl.nn.SumPooling()
325
+ else:
326
+ raise NotImplementedError
327
+
328
+ def forward(self, g, in_feat):
329
+ h = in_feat
330
+ # Generate node features
331
+ for i in range(len(self.list_of_layers)): # Aim for 2 about 3 layers
332
+ h = self.list_of_layers[i](g, h)
333
+ h = F.relu(h)
334
+ # h will now be matrix of dimension num_nodes by h_feats[-1]
335
+ h = self.final(h)
336
+ g.ndata['h'] = h
337
+ # Go from node level features to graph level features by pooling
338
+ h = self.pooling_layer(g, h)
339
+ # h will now be vector of dimension num_classes
340
+ return h
341
+
342
+ class _TAGConv(nn.Module):
343
+ def __init__(self, in_feats, h_feats, num_classes, pooling):
344
+ super(_TAGConv, self).__init__()
345
+ assert isinstance(h_feats, list), "h_feats must be a list"
346
+ h_feats = [x for x in h_feats if x is not None]
347
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
348
+ self.list_of_layers = nn.ModuleList()
349
+ dim = [in_feats] + h_feats
350
+
351
+ # Convolution (Hidden) Layers
352
+ for i in range(1, len(dim)):
353
+ self.list_of_layers.append(TAGConv(dim[i-1], dim[i], k=2))
354
+
355
+ # Final Layer
356
+ self.final = nn.Linear(dim[-1], num_classes)
357
+
358
+ # Pooling layer
359
+ if pooling.lower() == "avgpooling":
360
+ self.pooling_layer = dgl.nn.AvgPooling()
361
+ elif pooling.lower() == "maxpooling":
362
+ self.pooling_layer = dgl.nn.MaxPooling()
363
+ elif pooling.lower() == "sumpooling":
364
+ self.pooling_layer = dgl.nn.SumPooling()
365
+ else:
366
+ raise NotImplementedError
367
+
368
+ def forward(self, g, in_feat):
369
+ h = in_feat
370
+ # Generate node features
371
+ for i in range(len(self.list_of_layers)): # Aim for 2 about 3 layers
372
+ h = self.list_of_layers[i](g, h)
373
+ h = F.relu(h)
374
+ # h will now be matrix of dimension num_nodes by h_feats[-1]
375
+ h = self.final(h)
376
+ g.ndata['h'] = h
377
+ # Go from node level features to graph level features by pooling
378
+ h = self.pooling_layer(g, h)
379
+ # h will now be vector of dimension num_classes
380
+ return h
381
+
382
+
383
+ class _GraphConvReg(nn.Module):
384
+ def __init__(self, in_feats, h_feats, pooling):
385
+ super(_GraphConvReg, self).__init__()
386
+ assert isinstance(h_feats, list), "h_feats must be a list"
387
+ h_feats = [x for x in h_feats if x is not None]
388
+ assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
389
+ self.list_of_layers = nn.ModuleList()
390
+ dim = [in_feats] + h_feats
391
+
392
+ # Convolution (Hidden) Layers
393
+ for i in range(1, len(dim)):
394
+ self.list_of_layers.append(GraphConv(dim[i-1], dim[i]))
395
+
396
+ # Final Layer
397
+ self.final = nn.Linear(dim[-1], 1)
398
+
399
+ # Pooling layer
400
+ if pooling.lower() == "avgpooling":
401
+ self.pooling_layer = dgl.nn.AvgPooling()
402
+ elif pooling.lower() == "maxpooling":
403
+ self.pooling_layer = dgl.nn.MaxPooling()
404
+ elif pooling.lower() == "sumpooling":
405
+ self.pooling_layer = dgl.nn.SumPooling()
406
+ else:
407
+ raise NotImplementedError
408
+
409
+ def forward(self, g, in_feat):
410
+ h = in_feat
411
+ # Generate node features
412
+ for i in range(len(self.list_of_layers)): # Aim for 2 about 3 layers
413
+ h = self.list_of_layers[i](g, h)
414
+ h = F.relu(h)
415
+ # h will now be matrix of dimension num_nodes by h_feats[-1]
416
+ h = self.final(h)
417
+ g.ndata['h'] = h
418
+ # Go from node level features to graph level features by pooling
419
+ h = self.pooling_layer(g, h)
420
+ # h will now be vector of dimension num_classes
421
+ return h
422
+
423
+
424
+ class _RegressorHoldout:
425
+ def __init__(self, hparams, trainingDataset, validationDataset=None, testingDataset=None):
426
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
427
+ device = torch.device("cpu")
428
+ self.trainingDataset = trainingDataset
429
+ self.validationDataset = validationDataset
430
+ self.testingDataset = testingDataset
431
+ self.hparams = hparams
432
+ if hparams.conv_layer_type.lower() == 'classic':
433
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
434
+ elif hparams.conv_layer_type.lower() == 'ginconv':
435
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
436
+ 1, hparams.pooling).to(device)
437
+ elif hparams.conv_layer_type.lower() == 'graphconv':
438
+ self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
439
+ elif hparams.conv_layer_type.lower() == 'sageconv':
440
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
441
+ 1, hparams.pooling).to(device)
442
+ elif hparams.conv_layer_type.lower() == 'tagconv':
443
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
444
+ 1, hparams.pooling).to(device)
445
+ elif hparams.conv_layer_type.lower() == 'gcn':
446
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
447
+ else:
448
+ raise NotImplementedError
449
+
450
+ if hparams.optimizer_str.lower() == "adadelta":
451
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
452
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
453
+ elif hparams.optimizer_str.lower() == "adagrad":
454
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
455
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
456
+ elif hparams.optimizer_str.lower() == "adam":
457
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
458
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
459
+
460
+ self.use_gpu = hparams.use_gpu
461
+ self.training_loss_list = []
462
+ self.validation_loss_list = []
463
+ self.node_attr_key = trainingDataset.node_attr_key
464
+
465
+ # train, validate, test split
466
+ num_train = int(len(trainingDataset) * (hparams.split[0]))
467
+ num_validate = int(len(trainingDataset) * (hparams.split[1]))
468
+ num_test = len(trainingDataset) - num_train - num_validate
469
+ idx = torch.randperm(len(trainingDataset))
470
+ train_sampler = SubsetRandomSampler(idx[:num_train])
471
+ validate_sampler = SubsetRandomSampler(idx[num_train:num_train+num_validate])
472
+ test_sampler = SubsetRandomSampler(idx[num_train+num_validate:num_train+num_validate+num_test])
473
+
474
+ if validationDataset:
475
+ self.train_dataloader = GraphDataLoader(trainingDataset,
476
+ batch_size=hparams.batch_size,
477
+ drop_last=False)
478
+ self.validate_dataloader = GraphDataLoader(validationDataset,
479
+ batch_size=hparams.batch_size,
480
+ drop_last=False)
481
+ else:
482
+ self.train_dataloader = GraphDataLoader(trainingDataset, sampler=train_sampler,
483
+ batch_size=hparams.batch_size,
484
+ drop_last=False)
485
+ self.validate_dataloader = GraphDataLoader(trainingDataset, sampler=validate_sampler,
486
+ batch_size=hparams.batch_size,
487
+ drop_last=False)
488
+
489
+ if testingDataset:
490
+ self.test_dataloader = GraphDataLoader(testingDataset,
491
+ batch_size=len(testingDataset),
492
+ drop_last=False)
493
+ else:
494
+ self.test_dataloader = GraphDataLoader(trainingDataset, sampler=test_sampler,
495
+ batch_size=hparams.batch_size,
496
+ drop_last=False)
497
+
498
+ def train(self):
499
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
500
+ device = torch.device("cpu")
501
+ # Init the loss and accuracy reporting lists
502
+ self.training_loss_list = []
503
+ self.validation_loss_list = []
504
+
505
+
506
+ # Run the training loop for defined number of epochs
507
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, leave=False):
508
+ # Iterate over the DataLoader for training data
509
+ for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
510
+ # Make sure the model is in training mode
511
+ self.model.train()
512
+ # Zero the gradients
513
+ self.optimizer.zero_grad()
514
+
515
+ # Perform forward pass
516
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
517
+ # Compute loss
518
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
519
+
520
+ # Perform backward pass
521
+ loss.backward()
522
+
523
+ # Perform optimization
524
+ self.optimizer.step()
525
+
526
+ self.training_loss_list.append(torch.sqrt(loss).item())
527
+ self.validate()
528
+ self.validation_loss_list.append(torch.sqrt(self.validation_loss).item())
529
+
530
+
531
+ def validate(self):
532
+ device = torch.device("cpu")
533
+ self.model.eval()
534
+ for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
535
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
536
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
537
+ self.validation_loss = loss
538
+
539
+ def test(self):
540
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
541
+ device = torch.device("cpu")
542
+ self.model.eval()
543
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
544
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
545
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
546
+ self.testing_loss = torch.sqrt(loss).item()
547
+
548
+ def save(self, path):
549
+ if path:
550
+ # Make sure the file extension is .pt
551
+ ext = path[len(path)-3:len(path)]
552
+ if ext.lower() != ".pt":
553
+ path = path+".pt"
554
+ torch.save(self.model, path)
555
+
556
+
557
+ class _RegressorKFold:
558
+ def __init__(self, hparams, trainingDataset, testingDataset=None):
559
+ self.trainingDataset = trainingDataset
560
+ self.testingDataset = testingDataset
561
+ self.hparams = hparams
562
+ self.losses = []
563
+ self.min_loss = 0
564
+ # at beginning of the script
565
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
566
+ device = torch.device("cpu")
567
+ if hparams.conv_layer_type.lower() == 'classic':
568
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
569
+ elif hparams.conv_layer_type.lower() == 'ginconv':
570
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
571
+ 1, hparams.pooling).to(device)
572
+ elif hparams.conv_layer_type.lower() == 'graphconv':
573
+ self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
574
+ elif hparams.conv_layer_type.lower() == 'sageconv':
575
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
576
+ 1, hparams.pooling).to(device)
577
+ elif hparams.conv_layer_type.lower() == 'tagconv':
578
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
579
+ 1, hparams.pooling).to(device)
580
+ elif hparams.conv_layer_type.lower() == 'gcn':
581
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
582
+ else:
583
+ raise NotImplementedError
584
+
585
+ if hparams.optimizer_str.lower() == "adadelta":
586
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
587
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
588
+ elif hparams.optimizer_str.lower() == "adagrad":
589
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
590
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
591
+ elif hparams.optimizer_str.lower() == "adam":
592
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
593
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
594
+
595
+ self.use_gpu = hparams.use_gpu
596
+ self.training_loss_list = []
597
+ self.validation_loss_list = []
598
+ self.node_attr_key = trainingDataset.node_attr_key
599
+
600
+ # train, validate, test split
601
+ num_train = int(len(trainingDataset) * (hparams.split[0]))
602
+ num_validate = int(len(trainingDataset) * (hparams.split[1]))
603
+ num_test = len(trainingDataset) - num_train - num_validate
604
+ idx = torch.randperm(len(trainingDataset))
605
+ test_sampler = SubsetRandomSampler(idx[num_train+num_validate:num_train+num_validate+num_test])
606
+
607
+ if testingDataset:
608
+ self.test_dataloader = GraphDataLoader(testingDataset,
609
+ batch_size=len(testingDataset),
610
+ drop_last=False)
611
+ else:
612
+ self.test_dataloader = GraphDataLoader(trainingDataset, sampler=test_sampler,
613
+ batch_size=hparams.batch_size,
614
+ drop_last=False)
615
+
616
+ def reset_weights(self):
617
+ '''
618
+ Try resetting model weights to avoid
619
+ weight leakage.
620
+ '''
621
+ device = torch.device("cpu")
622
+ if self.hparams.conv_layer_type.lower() == 'classic':
623
+ self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
624
+ elif self.hparams.conv_layer_type.lower() == 'ginconv':
625
+ self.model = _GINConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
626
+ 1, self.hparams.pooling).to(device)
627
+ elif self.hparams.conv_layer_type.lower() == 'graphconv':
628
+ self.model = _GraphConvReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths, self.hparams.pooling).to(device)
629
+ elif self.hparams.conv_layer_type.lower() == 'sageconv':
630
+ self.model = _SAGEConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
631
+ 1, self.hparams.pooling).to(device)
632
+ elif self.hparams.conv_layer_type.lower() == 'tagconv':
633
+ self.model = _TAGConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
634
+ 1, self.hparams.pooling).to(device)
635
+ elif self.hparams.conv_layer_type.lower() == 'gcn':
636
+ self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
637
+ else:
638
+ raise NotImplementedError
639
+
640
+ if self.hparams.optimizer_str.lower() == "adadelta":
641
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=self.hparams.eps,
642
+ lr=self.hparams.lr, rho=self.hparams.rho, weight_decay=self.hparams.weight_decay)
643
+ elif self.hparams.optimizer_str.lower() == "adagrad":
644
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=self.hparams.eps,
645
+ lr=self.hparams.lr, lr_decay=self.hparams.lr_decay, weight_decay=self.hparams.weight_decay)
646
+ elif self.hparams.optimizer_str.lower() == "adam":
647
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=self.hparams.amsgrad, betas=self.hparams.betas, eps=self.hparams.eps,
648
+ lr=self.hparams.lr, maximize=self.hparams.maximize, weight_decay=self.hparams.weight_decay)
649
+
650
+
651
+
652
+
653
+ def train(self):
654
+ device = torch.device("cpu")
655
+
656
+ # The number of folds (This should come from the hparams)
657
+ k_folds = self.hparams.k_folds
658
+
659
+ # Init the loss and accuracy reporting lists
660
+ self.training_loss_list = []
661
+ self.validation_loss_list = []
662
+
663
+ # Set fixed random number seed
664
+ torch.manual_seed(42)
665
+
666
+ # Define the K-fold Cross Validator
667
+ kfold = KFold(n_splits=k_folds, shuffle=True)
668
+
669
+ models = []
670
+ weights = []
671
+ losses = []
672
+ train_dataloaders = []
673
+ validate_dataloaders = []
674
+
675
+ # K-fold Cross-validation model evaluation
676
+ for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
677
+ epoch_training_loss_list = []
678
+ epoch_validation_loss_list = []
679
+ # Sample elements randomly from a given list of ids, no replacement.
680
+ train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
681
+ validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
682
+
683
+ # Define data loaders for training and testing data in this fold
684
+ self.train_dataloader = GraphDataLoader(self.trainingDataset, sampler=train_subsampler,
685
+ batch_size=self.hparams.batch_size,
686
+ drop_last=False)
687
+ self.validate_dataloader = GraphDataLoader(self.trainingDataset, sampler=validate_subsampler,
688
+ batch_size=self.hparams.batch_size,
689
+ drop_last=False)
690
+ # Init the neural network
691
+ self.reset_weights()
692
+
693
+ # Run the training loop for defined number of epochs
694
+ best_rmse = np.inf
695
+ # Run the training loop for defined number of epochs
696
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, initial=1, leave=False):
697
+ # Iterate over the DataLoader for training data
698
+ for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
699
+ # Make sure the model is in training mode
700
+ self.model.train()
701
+ # Zero the gradients
702
+ self.optimizer.zero_grad()
703
+
704
+ # Perform forward pass
705
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
706
+ # Compute loss
707
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
708
+
709
+ # Perform backward pass
710
+ loss.backward()
711
+
712
+ # Perform optimization
713
+ self.optimizer.step()
714
+
715
+
716
+ epoch_training_loss_list.append(torch.sqrt(loss).item())
717
+ self.validate()
718
+ epoch_validation_loss_list.append(torch.sqrt(self.validation_loss).item())
719
+
720
+ models.append(self.model)
721
+ weights.append(copy.deepcopy(self.model.state_dict()))
722
+ losses.append(torch.sqrt(self.validation_loss).item())
723
+ train_dataloaders.append(self.train_dataloader)
724
+ validate_dataloaders.append(self.validate_dataloader)
725
+ self.training_loss_list.append(epoch_training_loss_list)
726
+ self.validation_loss_list.append(epoch_validation_loss_list)
727
+ self.losses = losses
728
+ min_loss = min(losses)
729
+ self.min_loss = min_loss
730
+ ind = losses.index(min_loss)
731
+ self.model = models[ind]
732
+ self.model.load_state_dict(weights[ind])
733
+ self.model.eval()
734
+ self.training_loss_list = self.training_loss_list[ind]
735
+ self.validation_loss_list = self.validation_loss_list[ind]
736
+
737
+ def validate(self):
738
+ device = torch.device("cpu")
739
+ self.model.eval()
740
+ for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
741
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
742
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
743
+ self.validation_loss = loss
744
+
745
+ def test(self):
746
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
747
+ device = torch.device("cpu")
748
+ #self.model.eval()
749
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
750
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
751
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
752
+ self.testing_loss = torch.sqrt(loss).item()
753
+
754
+ def save(self, path):
755
+ if path:
756
+ # Make sure the file extension is .pt
757
+ ext = path[len(path)-3:len(path)]
758
+ if ext.lower() != ".pt":
759
+ path = path+".pt"
760
+ torch.save(self.model, path)
761
+
762
+ class _ClassifierHoldout:
763
+ def __init__(self, hparams, trainingDataset, validationDataset=None, testingDataset=None):
764
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
765
+ device = torch.device("cpu")
766
+ self.trainingDataset = trainingDataset
767
+ self.validationDataset = validationDataset
768
+ self.testingDataset = testingDataset
769
+ self.hparams = hparams
770
+ if hparams.conv_layer_type.lower() == 'classic':
771
+ self.model = _Classic(trainingDataset.dim_nfeats, hparams.hl_widths,
772
+ trainingDataset.gclasses).to(device)
773
+ elif hparams.conv_layer_type.lower() == 'ginconv':
774
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
775
+ trainingDataset.gclasses, hparams.pooling).to(device)
776
+ elif hparams.conv_layer_type.lower() == 'graphconv':
777
+ self.model = _GraphConv(trainingDataset.dim_nfeats, hparams.hl_widths,
778
+ trainingDataset.gclasses, hparams.pooling).to(device)
779
+ elif hparams.conv_layer_type.lower() == 'sageconv':
780
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
781
+ trainingDataset.gclasses, hparams.pooling).to(device)
782
+ elif hparams.conv_layer_type.lower() == 'tagconv':
783
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
784
+ trainingDataset.gclasses, hparams.pooling).to(device)
785
+ elif hparams.conv_layer_type.lower() == 'gcn':
786
+ self.model = _Classic(trainingDataset.dim_nfeats, hparams.hl_widths,
787
+ trainingDataset.gclasses).to(device)
788
+ else:
789
+ raise NotImplementedError
790
+
791
+ if hparams.optimizer_str.lower() == "adadelta":
792
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
793
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
794
+ elif hparams.optimizer_str.lower() == "adagrad":
795
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
796
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
797
+ elif hparams.optimizer_str.lower() == "adam":
798
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
799
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
800
+ self.use_gpu = hparams.use_gpu
801
+ self.training_loss_list = []
802
+ self.validation_loss_list = []
803
+ self.training_accuracy_list = []
804
+ self.validation_accuracy_list = []
805
+ self.node_attr_key = trainingDataset.node_attr_key
806
+
807
+ # train, validate, test split
808
+ num_train = int(len(trainingDataset) * (hparams.split[0]))
809
+ num_validate = int(len(trainingDataset) * (hparams.split[1]))
810
+ num_test = len(trainingDataset) - num_train - num_validate
811
+ idx = torch.randperm(len(trainingDataset))
812
+ train_sampler = SubsetRandomSampler(idx[:num_train])
813
+ validate_sampler = SubsetRandomSampler(idx[num_train:num_train+num_validate])
814
+ test_sampler = SubsetRandomSampler(idx[num_train+num_validate:num_train+num_validate+num_test])
815
+
816
+ if validationDataset:
817
+ self.train_dataloader = GraphDataLoader(trainingDataset,
818
+ batch_size=hparams.batch_size,
819
+ drop_last=False)
820
+ self.validate_dataloader = GraphDataLoader(validationDataset,
821
+ batch_size=hparams.batch_size,
822
+ drop_last=False)
823
+ else:
824
+ self.train_dataloader = GraphDataLoader(trainingDataset, sampler=train_sampler,
825
+ batch_size=hparams.batch_size,
826
+ drop_last=False)
827
+ self.validate_dataloader = GraphDataLoader(trainingDataset, sampler=validate_sampler,
828
+ batch_size=hparams.batch_size,
829
+ drop_last=False)
830
+
831
+ if testingDataset:
832
+ self.test_dataloader = GraphDataLoader(testingDataset,
833
+ batch_size=len(testingDataset),
834
+ drop_last=False)
835
+ else:
836
+ self.test_dataloader = GraphDataLoader(trainingDataset, sampler=test_sampler,
837
+ batch_size=hparams.batch_size,
838
+ drop_last=False)
839
+ def train(self):
840
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
841
+ device = torch.device("cpu")
842
+ # Init the loss and accuracy reporting lists
843
+ self.training_accuracy_list = []
844
+ self.training_loss_list = []
845
+ self.validation_accuracy_list = []
846
+ self.validation_loss_list = []
847
+
848
+ # Run the training loop for defined number of epochs
849
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', initial=1, leave=False):
850
+ temp_loss_list = []
851
+ temp_acc_list = []
852
+ # Iterate over the DataLoader for training data
853
+ for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
854
+ # Make sure the model is in training mode
855
+ self.model.train()
856
+
857
+ # Zero the gradients
858
+ self.optimizer.zero_grad()
859
+
860
+ # Perform forward pass
861
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
862
+ # Compute loss
863
+ if self.hparams.loss_function.lower() == "negative log likelihood":
864
+ logp = F.log_softmax(pred, 1)
865
+ loss = F.nll_loss(logp, labels)
866
+ elif self.hparams.loss_function.lower() == "cross entropy":
867
+ loss = F.cross_entropy(pred, labels)
868
+
869
+ # Save loss information for reporting
870
+ temp_loss_list.append(loss.item())
871
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
872
+
873
+ # Perform backward pass
874
+ loss.backward()
875
+
876
+ # Perform optimization
877
+ self.optimizer.step()
878
+
879
+ self.training_accuracy_list.append(np.mean(temp_acc_list).item())
880
+ self.training_loss_list.append(np.mean(temp_loss_list).item())
881
+ self.validate()
882
+ self.validation_accuracy_list.append(self.validation_accuracy)
883
+ self.validation_loss_list.append(self.validation_loss)
884
+
885
+ def validate(self):
886
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
887
+ device = torch.device("cpu")
888
+ temp_loss_list = []
889
+ temp_acc_list = []
890
+ self.model.eval()
891
+ for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
892
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
893
+ if self.hparams.loss_function.lower() == "negative log likelihood":
894
+ logp = F.log_softmax(pred, 1)
895
+ loss = F.nll_loss(logp, labels)
896
+ elif self.hparams.loss_function.lower() == "cross entropy":
897
+ loss = F.cross_entropy(pred, labels)
898
+ temp_loss_list.append(loss.item())
899
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
900
+ self.validation_accuracy = np.mean(temp_acc_list).item()
901
+ self.validation_loss = np.mean(temp_loss_list).item()
902
+
903
+ def test(self):
904
+ if self.test_dataloader:
905
+ temp_loss_list = []
906
+ temp_acc_list = []
907
+ self.model.eval()
908
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
909
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
910
+ if self.hparams.loss_function.lower() == "negative log likelihood":
911
+ logp = F.log_softmax(pred, 1)
912
+ loss = F.nll_loss(logp, labels)
913
+ elif self.hparams.loss_function.lower() == "cross entropy":
914
+ loss = F.cross_entropy(pred, labels)
915
+ temp_loss_list.append(loss.item())
916
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
917
+ self.testing_accuracy = np.mean(temp_acc_list).item()
918
+ self.testing_loss = np.mean(temp_loss_list).item()
919
+
920
+ def save(self, path):
921
+ if path:
922
+ # Make sure the file extension is .pt
923
+ ext = path[len(path)-3:len(path)]
924
+ if ext.lower() != ".pt":
925
+ path = path+".pt"
926
+ torch.save(self.model, path)
927
+
928
+ class _ClassifierKFold:
929
+ def __init__(self, hparams, trainingDataset, testingDataset=None):
930
+ self.trainingDataset = trainingDataset
931
+ self.testingDataset = testingDataset
932
+ self.hparams = hparams
933
+ self.testing_accuracy = 0
934
+ self.accuracies = []
935
+ self.max_accuracy = 0
936
+ # at beginning of the script
937
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
938
+ device = torch.device("cpu")
939
+ if hparams.conv_layer_type.lower() == 'classic':
940
+ self.model = _Classic(trainingDataset.dim_nfeats, hparams.hl_widths,
941
+ trainingDataset.gclasses).to(device)
942
+ elif hparams.conv_layer_type.lower() == 'ginconv':
943
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
944
+ trainingDataset.gclasses, hparams.pooling).to(device)
945
+ elif hparams.conv_layer_type.lower() == 'graphconv':
946
+ self.model = _GraphConv(trainingDataset.dim_nfeats, hparams.hl_widths,
947
+ trainingDataset.gclasses, hparams.pooling).to(device)
948
+ elif hparams.conv_layer_type.lower() == 'sageconv':
949
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
950
+ trainingDataset.gclasses, hparams.pooling).to(device)
951
+ elif hparams.conv_layer_type.lower() == 'tagconv':
952
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
953
+ trainingDataset.gclasses, hparams.pooling).to(device)
954
+ else:
955
+ raise NotImplementedError
956
+
957
+ if hparams.optimizer_str.lower() == "adadelta":
958
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
959
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
960
+ elif hparams.optimizer_str.lower() == "adagrad":
961
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
962
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
963
+ elif hparams.optimizer_str.lower() == "adam":
964
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
965
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
966
+ self.use_gpu = hparams.use_gpu
967
+ self.training_loss_list = []
968
+ self.validation_loss_list = []
969
+ self.training_accuracy_list = []
970
+ self.validation_accuracy_list = []
971
+ self.node_attr_key = trainingDataset.node_attr_key
972
+
973
+
974
+ def reset_weights(self):
975
+ '''
976
+ Try resetting model weights to avoid
977
+ weight leakage.
978
+ '''
979
+ device = torch.device("cpu")
980
+ if self.hparams.conv_layer_type.lower() == 'classic':
981
+ self.model = _Classic(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
982
+ self.trainingDataset.gclasses).to(device)
983
+ elif self.hparams.conv_layer_type.lower() == 'ginconv':
984
+ self.model = _GINConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
985
+ self.trainingDataset.gclasses, self.hparams.pooling).to(device)
986
+ elif self.hparams.conv_layer_type.lower() == 'graphconv':
987
+ self.model = _GraphConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
988
+ self.trainingDataset.gclasses, self.hparams.pooling).to(device)
989
+ elif self.hparams.conv_layer_type.lower() == 'sageconv':
990
+ self.model = _SAGEConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
991
+ self.trainingDataset.gclasses, self.hparams.pooling).to(device)
992
+ elif self.hparams.conv_layer_type.lower() == 'tagconv':
993
+ self.model = _TAGConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
994
+ self.trainingDataset.gclasses, self.hparams.pooling).to(device)
995
+ else:
996
+ raise NotImplementedError
997
+ if self.hparams.optimizer_str.lower() == "adadelta":
998
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=self.hparams.eps,
999
+ lr=self.hparams.lr, rho=self.hparams.rho, weight_decay=self.hparams.weight_decay)
1000
+ elif self.hparams.optimizer_str.lower() == "adagrad":
1001
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=self.hparams.eps,
1002
+ lr=self.hparams.lr, lr_decay=self.hparams.lr_decay, weight_decay=self.hparams.weight_decay)
1003
+ elif self.hparams.optimizer_str.lower() == "adam":
1004
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=self.hparams.amsgrad, betas=self.hparams.betas, eps=self.hparams.eps,
1005
+ lr=self.hparams.lr, maximize=self.hparams.maximize, weight_decay=self.hparams.weight_decay)
1006
+
1007
+ def train(self):
1008
+ # The number of folds (This should come from the hparams)
1009
+ k_folds = self.hparams.k_folds
1010
+
1011
+ # Init the loss and accuracy reporting lists
1012
+ self.training_accuracy_list = []
1013
+ self.training_loss_list = []
1014
+ self.validation_accuracy_list = []
1015
+ self.validation_loss_list = []
1016
+
1017
+ # Set fixed random number seed
1018
+ torch.manual_seed(42)
1019
+
1020
+ # Define the K-fold Cross Validator
1021
+ kfold = KFold(n_splits=k_folds, shuffle=True)
1022
+
1023
+ models = []
1024
+ weights = []
1025
+ accuracies = []
1026
+ train_dataloaders = []
1027
+ validate_dataloaders = []
1028
+
1029
+ # K-fold Cross-validation model evaluation
1030
+ for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
1031
+ epoch_training_loss_list = []
1032
+ epoch_training_accuracy_list = []
1033
+ epoch_validation_loss_list = []
1034
+ epoch_validation_accuracy_list = []
1035
+ # Sample elements randomly from a given list of ids, no replacement.
1036
+ train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
1037
+ validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
1038
+
1039
+ # Define data loaders for training and testing data in this fold
1040
+ self.train_dataloader = GraphDataLoader(self.trainingDataset, sampler=train_subsampler,
1041
+ batch_size=self.hparams.batch_size,
1042
+ drop_last=False)
1043
+ self.validate_dataloader = GraphDataLoader(self.trainingDataset, sampler=validate_subsampler,
1044
+ batch_size=self.hparams.batch_size,
1045
+ drop_last=False)
1046
+ # Init the neural network
1047
+ self.reset_weights()
1048
+
1049
+ # Run the training loop for defined number of epochs
1050
+ for _ in tqdm(range(0,self.hparams.epochs), desc='Epochs', initial=1, total=self.hparams.epochs, leave=False):
1051
+ temp_loss_list = []
1052
+ temp_acc_list = []
1053
+
1054
+ # Iterate over the DataLoader for training data
1055
+ for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
1056
+
1057
+ # Make sure the model is in training mode
1058
+ self.model.train()
1059
+
1060
+ # Zero the gradients
1061
+ self.optimizer.zero_grad()
1062
+
1063
+ # Perform forward pass
1064
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
1065
+
1066
+ # Compute loss
1067
+ if self.hparams.loss_function.lower() == "negative log likelihood":
1068
+ logp = F.log_softmax(pred, 1)
1069
+ loss = F.nll_loss(logp, labels)
1070
+ elif self.hparams.loss_function.lower() == "cross entropy":
1071
+ loss = F.cross_entropy(pred, labels)
1072
+
1073
+ # Save loss information for reporting
1074
+ temp_loss_list.append(loss.item())
1075
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
1076
+
1077
+ # Perform backward pass
1078
+ loss.backward()
1079
+
1080
+ # Perform optimization
1081
+ self.optimizer.step()
1082
+
1083
+ epoch_training_accuracy_list.append(np.mean(temp_acc_list).item())
1084
+ epoch_training_loss_list.append(np.mean(temp_loss_list).item())
1085
+ self.validate()
1086
+ epoch_validation_accuracy_list.append(self.validation_accuracy)
1087
+ epoch_validation_loss_list.append(self.validation_loss)
1088
+ models.append(self.model)
1089
+ weights.append(copy.deepcopy(self.model.state_dict()))
1090
+ accuracies.append(self.validation_accuracy)
1091
+ train_dataloaders.append(self.train_dataloader)
1092
+ validate_dataloaders.append(self.validate_dataloader)
1093
+ self.training_accuracy_list.append(epoch_training_accuracy_list)
1094
+ self.training_loss_list.append(epoch_training_loss_list)
1095
+ self.validation_accuracy_list.append(epoch_validation_accuracy_list)
1096
+ self.validation_loss_list.append(epoch_validation_loss_list)
1097
+ self.accuracies = accuracies
1098
+ max_accuracy = max(accuracies)
1099
+ self.max_accuracy = max_accuracy
1100
+ ind = accuracies.index(max_accuracy)
1101
+ self.model = models[ind]
1102
+ self.model.load_state_dict(weights[ind])
1103
+ self.model.eval()
1104
+ self.training_accuracy_list = self.training_accuracy_list[ind]
1105
+ self.training_loss_list = self.training_loss_list[ind]
1106
+ self.validation_accuracy_list = self.validation_accuracy_list[ind]
1107
+ self.validation_loss_list = self.validation_loss_list[ind]
1108
+
1109
+ def validate(self):
1110
+ temp_loss_list = []
1111
+ temp_acc_list = []
1112
+ self.model.eval()
1113
+ for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
1114
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
1115
+ if self.hparams.loss_function.lower() == "negative log likelihood":
1116
+ logp = F.log_softmax(pred, 1)
1117
+ loss = F.nll_loss(logp, labels)
1118
+ elif self.hparams.loss_function.lower() == "cross entropy":
1119
+ loss = F.cross_entropy(pred, labels)
1120
+ temp_loss_list.append(loss.item())
1121
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
1122
+ self.validation_accuracy = np.mean(temp_acc_list).item()
1123
+ self.validation_loss = np.mean(temp_loss_list).item()
1124
+
1125
+ def test(self):
1126
+ if self.testingDataset:
1127
+ self.test_dataloader = GraphDataLoader(self.testingDataset,
1128
+ batch_size=len(self.testingDataset),
1129
+ drop_last=False)
1130
+ temp_loss_list = []
1131
+ temp_acc_list = []
1132
+ self.model.eval()
1133
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
1134
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
1135
+ if self.hparams.loss_function.lower() == "negative log likelihood":
1136
+ logp = F.log_softmax(pred, 1)
1137
+ loss = F.nll_loss(logp, labels)
1138
+ elif self.hparams.loss_function.lower() == "cross entropy":
1139
+ loss = F.cross_entropy(pred, labels)
1140
+ temp_loss_list.append(loss.item())
1141
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
1142
+ self.testing_accuracy = np.mean(temp_acc_list).item()
1143
+ self.testing_loss = np.mean(temp_loss_list).item()
1144
+
1145
+ def save(self, path):
1146
+ if path:
1147
+ # Make sure the file extension is .pt
1148
+ ext = path[len(path)-3:len(path)]
1149
+ if ext.lower() != ".pt":
1150
+ path = path+".pt"
1151
+ torch.save(self.model, path)
1152
+
1153
+ class DGL:
1154
+ @staticmethod
1155
+ def Accuracy(actual, predicted, mantissa=4):
1156
+ """
1157
+ Computes the accuracy of the input predictions based on the input labels. This is to be used only with classification not with regression.
1158
+
1159
+ Parameters
1160
+ ----------
1161
+ actual : list
1162
+ The input list of actual values.
1163
+ predicted : list
1164
+ The input list of predicted values.
1165
+ mantissa : int , optional
1166
+ The desired length of the mantissa. The default is 4.
1167
+
1168
+ Returns
1169
+ -------
1170
+ dict
1171
+ A dictionary returning the accuracy information. This contains the following keys and values:
1172
+ - "accuracy" (float): The number of correct predictions divided by the length of the list.
1173
+ - "correct" (int): The number of correct predictions
1174
+ - "mask" (list): A boolean mask for correct vs. wrong predictions which can be used to filter the list of predictions
1175
+ - "size" (int): The size of the predictions list
1176
+ - "wrong" (int): The number of wrong predictions
1177
+
1178
+ """
1179
+ if len(predicted) < 1 or len(actual) < 1 or not len(predicted) == len(actual):
1180
+ return None
1181
+ correct = 0
1182
+ mask = []
1183
+ for i in range(len(predicted)):
1184
+ if predicted[i] == actual[i]:
1185
+ correct = correct + 1
1186
+ mask.append(True)
1187
+ else:
1188
+ mask.append(False)
1189
+ size = len(predicted)
1190
+ wrong = len(predicted)- correct
1191
+ accuracy = round(float(correct) / float(len(predicted)), mantissa)
1192
+ return {"accuracy":accuracy, "correct":correct, "mask":mask, "size":size, "wrong":wrong}
1193
+
1194
+ @staticmethod
1195
+ def RMSE(actual, predicted, mantissa=4):
1196
+ """
1197
+ Computes the accuracy based on the mean squared error of the input predictions based on the input actual values. This is to be used only with regression not with classification.
1198
+
1199
+ Parameters
1200
+ ----------
1201
+ actual : list
1202
+ The input list of actual values.
1203
+ predicted : list
1204
+ The input list of predicted values.
1205
+ mantissa : int , optional
1206
+ The desired length of the mantissa. The default is 4.
1207
+
1208
+ Returns
1209
+ -------
1210
+ dict
1211
+ A dictionary returning the accuracy information. This contains the following keys and values:
1212
+ - "rmse" (float): Root Mean Square Error.
1213
+ - "size" (int): The size of the predictions list
1214
+ """
1215
+ if len(predicted) < 1 or len(actual) < 1 or not len(predicted) == len(actual):
1216
+ return None
1217
+ size = len(predicted)
1218
+ mse = F.mse_loss(torch.tensor(predicted), torch.tensor(actual))
1219
+ rmse = round(torch.sqrt(mse).item(), mantissa)
1220
+ return {"rmse":rmse, "size":size}
1221
+
1222
+ @staticmethod
1223
+ def BalanceDataset(dataset, labels, method="undersampling", key="node_attr"):
1224
+ """
1225
+ Balances the input dataset using the specified method.
1226
+
1227
+ Parameters
1228
+ ----------
1229
+ dataset : DGLDataset
1230
+ The input dataset.
1231
+ labels : list
1232
+ The input list of labels.
1233
+ method : str, optional
1234
+ The method of sampling. This can be "undersampling" or "oversampling". It is case insensitive. The defaul is "undersampling".
1235
+ key : str
1236
+ The key used for the node attributes.
1237
+
1238
+ Returns
1239
+ -------
1240
+ DGLDataset
1241
+ The balanced dataset.
1242
+
1243
+ """
1244
+ df = pd.DataFrame({'graph_index': range(len(labels)), 'label': labels})
1245
+
1246
+ if method.lower() == 'undersampling':
1247
+ min_distribution = df['label'].value_counts().min()
1248
+ df = df.groupby('label').sample(n=min_distribution)
1249
+ elif method.lower() == 'oversampling':
1250
+ max_distribution = df['label'].value_counts().max()
1251
+ df = df.groupby('label').sample(n=max_distribution, replace=True)
1252
+ else:
1253
+ raise NotImplementedError
1254
+
1255
+ list_idx = df['graph_index'].tolist()
1256
+ graphs = []
1257
+ labels = []
1258
+ for index in list_idx:
1259
+ graph, label = dataset[index]
1260
+ graphs.append(graph)
1261
+ labels.append(label)
1262
+ return DGL.DatasetByGraphs(graphs=graphs, labels=labels, key=key)
1263
+
1264
+ @staticmethod
1265
+ def GraphByTopologicGraph(topologicGraph, bidirectional=True, key=None, categories=[], node_attr_key="node_attr", tolerance=0.0001):
1266
+ """
1267
+ Returns a DGL graph by the input topologic graph.
1268
+
1269
+ Parameters
1270
+ ----------
1271
+ topologicGraph : topologic.Graph
1272
+ The input topologic graph.
1273
+ bidirectional : bool , optional
1274
+ If set to True, the output DGL graph is forced to be bidirectional. The defaul is True.
1275
+ key : str
1276
+ The dictionary key where the node label is stored.
1277
+ categories : list
1278
+ The list of categories of node features.
1279
+ node_attr_key : str
1280
+ The dictionary key of the node attributes.
1281
+ tolerance : float , optional
1282
+ The desired tolerance. The default is 0.0001.
1283
+
1284
+ Returns
1285
+ -------
1286
+ DGL Graph
1287
+ The created DGL graph.
1288
+
1289
+ """
1290
+ from topologicpy.Vertex import Vertex
1291
+ from topologicpy.Graph import Graph
1292
+ from topologicpy.Dictionary import Dictionary
1293
+ from topologicpy.Topology import Topology
1294
+
1295
+ graph_dict = {}
1296
+ vertices = Graph.Vertices(topologicGraph)
1297
+ edges = Graph.Edges(topologicGraph)
1298
+ graph_dict["num_nodes"] = len(vertices)
1299
+ graph_dict["src"] = []
1300
+ graph_dict["dst"] = []
1301
+ graph_dict["node_labels"] = {}
1302
+ graph_dict["node_features"] = []
1303
+ nodes = []
1304
+ graph_edges = []
1305
+
1306
+ for i in range(len(vertices)):
1307
+ vDict = Topology.Dictionary(vertices[i])
1308
+ if key:
1309
+ vLabel = Dictionary.ValueAtKey(vDict, key)
1310
+ else:
1311
+ vLabel = ""
1312
+ graph_dict["node_labels"][i] = vLabel
1313
+ # appending tensor of onehotencoded feature for each node following index i
1314
+ graph_dict["node_features"].append(torch.tensor(DGL.OneHotEncode(vLabel, categories)))
1315
+ nodes.append(i)
1316
+
1317
+ for i in range(len(edges)):
1318
+ e = edges[i]
1319
+ sv = e.StartVertex()
1320
+ ev = e.EndVertex()
1321
+ sn = nodes[Vertex.Index(vertex=sv, vertices=vertices, strict=False, tolerance=tolerance)]
1322
+ en = nodes[Vertex.Index(vertex=ev, vertices=vertices, strict=False, tolerance=tolerance)]
1323
+ if (([sn,en] in graph_edges) == False) and (([en,sn] in graph_edges) == False):
1324
+ graph_edges.append([sn,en])
1325
+
1326
+ for anEdge in graph_edges:
1327
+ graph_dict["src"].append(anEdge[0])
1328
+ graph_dict["dst"].append(anEdge[1])
1329
+
1330
+ # Create DDGL graph
1331
+ src = np.array(graph_dict["src"])
1332
+ dst = np.array(graph_dict["dst"])
1333
+ num_nodes = graph_dict["num_nodes"]
1334
+ # Create a graph
1335
+ dgl_graph = dgl.graph((src, dst), num_nodes=num_nodes)
1336
+
1337
+ # Setting the node features as node_attr_key using onehotencoding of vlabel
1338
+ dgl_graph.ndata[node_attr_key] = torch.stack(graph_dict["node_features"])
1339
+
1340
+ if bidirectional:
1341
+ dgl_graph = dgl.add_reverse_edges(dgl_graph)
1342
+ return dgl_graph
1343
+
1344
+ @staticmethod
1345
+ def GraphsByImportedCSV(graphs_file_path, edges_file_path,
1346
+ nodes_file_path, graph_id_header="graph_id",
1347
+ graph_label_header="label", num_nodes_header="num_nodes", src_header="src",
1348
+ dst_header="dst", node_label_header="label", node_attr_key="node_attr",
1349
+ categories=[], bidirectional=True):
1350
+ """
1351
+ Returns DGL graphs according to the input CSV file paths.
1352
+
1353
+ Parameters
1354
+ ----------
1355
+ graphs_file_path : str
1356
+ The file path to the grpahs CSV file.
1357
+ edges_file_path : str
1358
+ The file path to the edges CSV file.
1359
+ nodes_file_path : str
1360
+ The file path to the nodes CSV file.
1361
+ graph_id_header : str , optional
1362
+ The header string used to specify the graph id. The default is "graph_id".
1363
+ graph_label_header : str , optional
1364
+ The header string used to specify the graph label. The default is "label".
1365
+ num_nodes_header : str , optional
1366
+ The header string used to specify the number of nodes. The default is "num_nodes".
1367
+ src_header : str , optional
1368
+ The header string used to specify the source of edges. The default is "src".
1369
+ dst_header : str , optional
1370
+ The header string used to specify the destination of edges. The default is "dst".
1371
+ node_label_header : str , optional
1372
+ The header string used to specify the node label. The default is "label".
1373
+ node_attr_key : str , optional
1374
+ The key string used to specify the node attributes. The default is "node_attr".
1375
+ categories : list
1376
+ The list of categories.
1377
+ bidirectional : bool , optional
1378
+ If set to True, the output DGL graph is forced to be bi-directional. The default is True.
1379
+
1380
+ Returns
1381
+ -------
1382
+ list
1383
+ The list of DGL graphs found in the input CSV files.
1384
+
1385
+ """
1386
+
1387
+ graphs = pd.read_csv(graphs_file_path)
1388
+ edges = pd.read_csv(edges_file_path)
1389
+ nodes = pd.read_csv(nodes_file_path)
1390
+ dgl_graphs = []
1391
+ labels = []
1392
+
1393
+ # Create a graph for each graph ID from the edges table.
1394
+ # First process the graphs table into two dictionaries with graph IDs as keys.
1395
+ # The label and number of nodes are values.
1396
+ label_dict = {}
1397
+ num_nodes_dict = {}
1398
+ for _, row in graphs.iterrows():
1399
+ label_dict[row[graph_id_header]] = row[graph_label_header]
1400
+ num_nodes_dict[row[graph_id_header]] = row[num_nodes_header]
1401
+ # For the edges, first group the table by graph IDs.
1402
+ edges_group = edges.groupby(graph_id_header)
1403
+ # For the nodes, first group the table by graph IDs.
1404
+ nodes_group = nodes.groupby(graph_id_header)
1405
+ # For each graph ID...
1406
+ for graph_id in edges_group.groups:
1407
+ graph_dict = {}
1408
+ graph_dict[src_header] = []
1409
+ graph_dict[dst_header] = []
1410
+ graph_dict[node_label_header] = {}
1411
+ graph_dict["node_features"] = []
1412
+ num_nodes = num_nodes_dict[graph_id]
1413
+ graph_label = label_dict[graph_id]
1414
+ labels.append(graph_label)
1415
+
1416
+ # Find the edges as well as the number of nodes and its label.
1417
+ edges_of_id = edges_group.get_group(graph_id)
1418
+ src = edges_of_id[src_header].to_numpy()
1419
+ dst = edges_of_id[dst_header].to_numpy()
1420
+
1421
+ # Find the nodes and their labels and features
1422
+ nodes_of_id = nodes_group.get_group(graph_id)
1423
+ node_labels = nodes_of_id[node_label_header]
1424
+ #graph_dict["node_labels"][graph_id] = node_labels
1425
+
1426
+ for node_label in node_labels:
1427
+ graph_dict["node_features"].append(torch.tensor(DGL.OneHotEncode(node_label, categories)))
1428
+ # Create a graph and add it to the list of graphs and labels.
1429
+ dgl_graph = dgl.graph((src, dst), num_nodes=num_nodes)
1430
+ # Setting the node features as node_attr_key using onehotencoding of node_label
1431
+ dgl_graph.ndata[node_attr_key] = torch.stack(graph_dict["node_features"])
1432
+ if bidirectional:
1433
+ dgl_graph = dgl.add_reverse_edges(dgl_graph)
1434
+ dgl_graphs.append(dgl_graph)
1435
+ return {"graphs":dgl_graphs, "labels":labels}
1436
+
1437
+ @staticmethod
1438
+ def GraphsByImportedDGCNN(file_path, categories=[], bidirectional=True):
1439
+ """
1440
+ Returns the Graphs from the imported DGCNN file.
1441
+
1442
+ Parameters
1443
+ ----------
1444
+ file_path : str
1445
+ The file path to the DGCNN text file.
1446
+ categories : list
1447
+ The list of node categories expected in the imported DGCNN file. This is used to one-hot-encode the node features.
1448
+ bidirectional : bool , optional
1449
+ If set to True, the output DGL graph is forced to be bi-directional. The defaults is True.
1450
+
1451
+ Returns
1452
+ -------
1453
+ dict
1454
+ A dictionary object that contains the imported graphs and their corresponding labels. The dictionary has the following keys and values:
1455
+ - "graphs" (list): The list of DGL graphs
1456
+ - "labels" (list): The list of graph labels
1457
+
1458
+ """
1459
+ graphs = []
1460
+ labels = []
1461
+ file = open(file_path)
1462
+ if file:
1463
+ lines = file.readlines()
1464
+ n_graphs = int(lines[0])
1465
+ index = 1
1466
+ for i in range(n_graphs):
1467
+ graph_dict = {}
1468
+ graph_dict["src"] = []
1469
+ graph_dict["dst"] = []
1470
+ graph_dict["node_labels"] = {}
1471
+ graph_dict["node_features"] = []
1472
+ line = lines[index].split()
1473
+ n_nodes = int(line[0])
1474
+ graph_dict["num_nodes"] = n_nodes
1475
+ graph_label = int(line[1])
1476
+ labels.append(graph_label)
1477
+ index+=1
1478
+ for j in range(n_nodes):
1479
+ line = lines[index+j].split()
1480
+ node_label = int(line[0])
1481
+ graph_dict["node_labels"][j] = node_label
1482
+ graph_dict["node_features"].append(torch.tensor(DGL.OneHotEncode(node_label, categories)))
1483
+ adj_vertices = line[2:]
1484
+ for adj_vertex in adj_vertices:
1485
+ graph_dict["src"].append(j)
1486
+ graph_dict["dst"].append(int(adj_vertex))
1487
+
1488
+ # Create DDGL graph
1489
+ src = np.array(graph_dict["src"])
1490
+ dst = np.array(graph_dict["dst"])
1491
+ # Create a graph
1492
+ dgl_graph = dgl.graph((src, dst), num_nodes=graph_dict["num_nodes"])
1493
+ # Setting the node features as 'node_attr' using onehotencoding of vlabel
1494
+ dgl_graph.ndata['node_attr'] = torch.stack(graph_dict["node_features"])
1495
+ if bidirectional:
1496
+ dgl_graph = dgl.add_reverse_edges(dgl_graph)
1497
+ graphs.append(dgl_graph)
1498
+ index+=n_nodes
1499
+ file.close()
1500
+ return {"graphs":graphs, "labels":labels}
1501
+
1502
+ @staticmethod
1503
+ def CategoryDistribution(labels, categories=None, mantissa=4):
1504
+ """
1505
+ Returns the category distribution in the input list of labels. This is useful to determine if the dataset is balanced or not.
1506
+
1507
+ Parameters
1508
+ ----------
1509
+ labels : list
1510
+ The input list of labels.
1511
+ categories : list , optional
1512
+ The list of node categories expected in the imported DGCNN file. If not specified, the categories are computed directly from the labels. The default is None.
1513
+ mantissa : int , optional
1514
+ The desired length of the mantissa. The default is 4.
1515
+
1516
+ Returns
1517
+ -------
1518
+ dict
1519
+ A dictionary object that contains the categories and their corresponding ratios. The dictionary has the following keys and values:
1520
+ - "categories" (list): The list of categories.
1521
+ - "ratios" (list): The list of ratios of each category as found in the input list of labels.
1522
+
1523
+ """
1524
+ if not categories:
1525
+ categories = list(set(labels))
1526
+ ratios = []
1527
+ for category in categories:
1528
+ ratios.append(round(float(labels.count(category))/float(len(labels)), mantissa))
1529
+ return {"categories":[categories], "ratios":[ratios]}
1530
+
1531
+ @staticmethod
1532
+ def ModelByFilePath(path):
1533
+ """
1534
+ Returns the model found at the input file path.
1535
+ Parameters
1536
+ ----------
1537
+ path : str
1538
+ File path for the saved classifier.
1539
+
1540
+ Returns
1541
+ -------
1542
+ DGL Classifier
1543
+ The classifier.
1544
+
1545
+ """
1546
+ if not path:
1547
+ return None
1548
+ return torch.load(path)
1549
+
1550
+ def ConfusionMatrix(actual, predicted, normalize=False):
1551
+ """
1552
+ Returns the confusion matrix for the input actual and predicted labels. This is to be used with classification tasks only not regression.
1553
+
1554
+ Parameters
1555
+ ----------
1556
+ actual : list
1557
+ The input list of actual labels.
1558
+ predicted : list
1559
+ The input list of predicts labels.
1560
+ normalized : bool , optional
1561
+ If set to True, the returned data will be normalized (proportion of 1). Otherwise, actual numbers are returned. The default is False.
1562
+
1563
+ Returns
1564
+ -------
1565
+ list
1566
+ The created confusion matrix.
1567
+
1568
+ """
1569
+ from sklearn import metrics
1570
+ import numpy
1571
+ if normalize:
1572
+ cm = numpy.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted, normalize="true"))
1573
+ else:
1574
+ cm = numpy.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted))
1575
+ return cm
1576
+
1577
+ @staticmethod
1578
+ def DatasetByGraphs(dictionary, key="node_attr"):
1579
+ """
1580
+ Returns a DGL Dataset from the input DGL graphs.
1581
+
1582
+ Parameters
1583
+ ----------
1584
+ dictionary : dict
1585
+ The input dictionary of graphs and labels. This dictionary must have the keys "graphs" and "labels"
1586
+ key : str
1587
+ The key used for the node attributes.
1588
+
1589
+ Returns
1590
+ -------
1591
+ DGL.Dataset
1592
+ The creatred DGL dataset.
1593
+
1594
+ """
1595
+ graphs = dictionary['graphs']
1596
+ labels = dictionary['labels']
1597
+ return _Dataset(graphs, labels, key)
1598
+
1599
+ @staticmethod
1600
+ def DatasetByImportedCSV_NC(folderPath):
1601
+ """
1602
+ UNDER CONSTRUCTION. DO NOT USE.
1603
+
1604
+ Parameters
1605
+ ----------
1606
+ folderPath : str
1607
+ The path to folder containing the input CSV files. In that folder there should be graphs.csv, edges.csv, and vertices.csv
1608
+
1609
+ Returns
1610
+ -------
1611
+ DGLDataset
1612
+ The returns DGL dataset.
1613
+
1614
+ """
1615
+ return dgl.data.CSVDataset(folderPath, force_reload=True)
1616
+
1617
+ @staticmethod
1618
+ def DatasetBySample(name="ENZYMES"):
1619
+ """
1620
+ Returns a dataset from the samples database.
1621
+
1622
+ Parameters
1623
+ ----------
1624
+ name : str
1625
+ The name of the sample dataset. This can be "ENZYMES", "DD", "COLLAB", or "MUTAG". It is case insensitive. The default is "ENZYMES".
1626
+
1627
+ Returns
1628
+ -------
1629
+ GraphDGL
1630
+ The created DGL dataset.
1631
+
1632
+ """
1633
+ name = name.upper()
1634
+ dataset = dgl.data.TUDataset(name)
1635
+ dgl_graphs, dgl_labels = zip(*[dataset[i] for i in range(len(dataset.graph_lists))])
1636
+ if name == 'ENZYMES':
1637
+ node_attr_key = 'node_attr'
1638
+ elif name == 'DD':
1639
+ node_attr_key = 'node_labels'
1640
+ elif name == 'COLLAB':
1641
+ node_attr_key = '_ID'
1642
+ elif name == 'MUTAG':
1643
+ node_attr_key = 'node_labels'
1644
+ else:
1645
+ raise NotImplementedError
1646
+ return _Dataset(dgl_graphs, dgl_labels, node_attr_key)
1647
+
1648
+ @staticmethod
1649
+ def DatasetBySample_NC(name="Cora"):
1650
+ """
1651
+ Returns the sample dataset as specified by the input sample name
1652
+
1653
+ Parameters
1654
+ ----------
1655
+ name : str
1656
+ The name of the sample dataset to load. This can be "Cora", "Citeseer", or "Pubmed". It is case insensitive. The default is "Cora".
1657
+
1658
+ Raises
1659
+ ------
1660
+ NotImplementedError
1661
+ DESCRIPTION.
1662
+
1663
+ Returns
1664
+ -------
1665
+ list
1666
+ DESCRIPTION.
1667
+
1668
+ """
1669
+ if name.lower() == 'cora':
1670
+ return [dgl.data.CoraGraphDataset(), 7]
1671
+ elif name.lower() == 'citeseer':
1672
+ return [dgl.data.CiteseerGraphDataset(), 6]
1673
+ elif name.lower() == 'pubmed':
1674
+ return [dgl.data.PubmedGraphDataset(), 3]
1675
+ else:
1676
+ raise NotImplementedError
1677
+
1678
+ @staticmethod
1679
+ def DatasetGraphs(dataset):
1680
+ """
1681
+ Returns the DGL graphs found the in the input dataset.
1682
+
1683
+ Parameters
1684
+ ----------
1685
+ dataset : DGLDataset
1686
+ The input dataset.
1687
+
1688
+ Returns
1689
+ -------
1690
+ list
1691
+ The list of DGL graphs found in the input dataset.
1692
+
1693
+ """
1694
+ try:
1695
+ _ = dataset[1]
1696
+ except:
1697
+ dataset = [dataset[0]]
1698
+ graphs = []
1699
+ for aGraph in dataset:
1700
+ if isinstance(aGraph, tuple):
1701
+ aGraph = aGraph[0]
1702
+ graphs.append(aGraph)
1703
+ return graphs
1704
+
1705
+ @staticmethod
1706
+ def GraphEdgeData(graph):
1707
+ """
1708
+ Returns the edge data found in the input DGL graph
1709
+ Parameters
1710
+ ----------
1711
+ dgl_graph : DGL Graph
1712
+ The input DGL graph.
1713
+
1714
+ Returns
1715
+ -------
1716
+ edge data
1717
+ The edge data.
1718
+
1719
+ """
1720
+ return graph.edata
1721
+
1722
+ @staticmethod
1723
+ def Hyperparameters(optimizer, model_type="classifier", cv_type="Holdout", split=[0.8,0.1,0.1], k_folds=5,
1724
+ hl_widths=[32], conv_layer_type="SAGEConv", pooling="AvgPooling",
1725
+ batch_size=1, epochs=1, use_gpu=False, loss_function="Cross Entropy"):
1726
+ """
1727
+ Creates a hyperparameters object based on the input settings.
1728
+
1729
+ Parameters
1730
+ ----------
1731
+ model_type : str , optional
1732
+ The desired type of model. The options are:
1733
+ - "Classifier"
1734
+ - "Regressor"
1735
+ The option is case insensitive. The default is "classifierholdout"
1736
+ optimizer : Optimizer
1737
+ The desired optimizer.
1738
+ cv_type : str , optional
1739
+ The desired cross-validation method. This can be "Holdout" or "K-Fold". It is case-insensitive. The default is "Holdout".
1740
+ split : list , optional
1741
+ The desired split between training validation, and testing. [0.8, 0.1, 0.1] means that 80% of the data is used for training 10% of the data is used for validation, and 10% is used for testing. The default is [0.8, 0.1, 0.1].
1742
+ k_folds : int , optional
1743
+ The desired number of k-folds. The default is 5.
1744
+ hl_widths : list , optional
1745
+ The list of hidden layer widths. A list of [16, 32, 16] means that the model will have 3 hidden layers with number of neurons in each being 16, 32, 16 respectively from input to output. The default is [32].
1746
+ conv_layer_type : str , optional
1747
+ THe desired type of the convultion layer. The options are "Classic", "GraphConv", "GINConv", "SAGEConv", "TAGConv", "DGN". It is case insensitive. The default is "SAGEConv".
1748
+ pooling : str , optional
1749
+ The desired type of pooling. The options are "AvgPooling", "MaxPooling", or "SumPooling". It is case insensitive. The default is "AvgPooling".
1750
+ batch_size : int , optional
1751
+ The desired batch size. The default is 1.
1752
+ epochs : int , optional
1753
+ The desired number of epochs. The default is 1.
1754
+ use_gpu : bool , optional
1755
+ If set to True, the model will attempt to use the GPU. The default is False.
1756
+ loss_function : str , optional
1757
+ The desired loss function. The optionals are "Cross-Entropy" or "Negative Log Likelihood". It is case insensitive. The default is "Cross-Entropy".
1758
+
1759
+ Returns
1760
+ -------
1761
+ Hyperparameters
1762
+ The created hyperparameters object.
1763
+
1764
+ """
1765
+
1766
+ if optimizer['name'].lower() == "adadelta":
1767
+ optimizer_str = "Adadelta"
1768
+ elif optimizer['name'].lower() == "adagrad":
1769
+ optimizer_str = "Adagrad"
1770
+ elif optimizer['name'].lower() == "adam":
1771
+ optimizer_str = "Adam"
1772
+ return _Hparams(model_type,
1773
+ optimizer_str,
1774
+ optimizer['amsgrad'],
1775
+ optimizer['betas'],
1776
+ optimizer['eps'],
1777
+ optimizer['lr'],
1778
+ optimizer['lr_decay'],
1779
+ optimizer['maximize'],
1780
+ optimizer['rho'],
1781
+ optimizer['weight_decay'],
1782
+ cv_type,
1783
+ split,
1784
+ k_folds,
1785
+ hl_widths,
1786
+ conv_layer_type,
1787
+ pooling,
1788
+ batch_size,
1789
+ epochs,
1790
+ use_gpu,
1791
+ loss_function)
1792
+
1793
+ @staticmethod
1794
+ def OneHotEncode(item, categories):
1795
+ """
1796
+ One-hot encodes the input item according to the input categories. One-Hot Encoding is a method to encode categorical variables to numerical data that Machine Learning algorithms can deal with. One-Hot encoding is most used during feature engineering for a ML Model. It converts categorical values into a new categorical column and assign a binary value of 1 or 0 to those columns.
1797
+
1798
+ Parameters
1799
+ ----------
1800
+ item : any
1801
+ The input item.
1802
+ categories : list
1803
+ The input list of categories.
1804
+
1805
+ Returns
1806
+ -------
1807
+ list
1808
+ A one-hot encoded list of the input item according to the input categories.
1809
+
1810
+ """
1811
+ returnList = []
1812
+ for i in range(len(categories)):
1813
+ if item == categories[i]:
1814
+ returnList.append(1)
1815
+ else:
1816
+ returnList.append(0)
1817
+ return returnList
1818
+
1819
+ @staticmethod
1820
+ def DatasetLabels(dataset):
1821
+ """
1822
+ Returns the labels of the graphs in the input dataset
1823
+
1824
+ Parameters
1825
+ ----------
1826
+ dataset : DGLDataset
1827
+ The input dataset
1828
+
1829
+ Returns
1830
+ -------
1831
+ list
1832
+ The list of labels.
1833
+ """
1834
+ return [int(g[1]) for g in dataset]
1835
+
1836
+ @staticmethod
1837
+ def DatasetMerge(datasets, key="node_attr"):
1838
+ """
1839
+ Merges the input list of datasets into one dataset
1840
+
1841
+ Parameters
1842
+ ----------
1843
+ datasets : list
1844
+ The input list of DGLdatasets
1845
+
1846
+ Returns
1847
+ -------
1848
+ DGLDataset
1849
+ The merged dataset
1850
+ """
1851
+
1852
+ graphs = []
1853
+ labels = []
1854
+ for ds in datasets:
1855
+ graphs += DGL.DatasetGraphs(ds)
1856
+ labels += DGL.DatasetLabels(ds)
1857
+ return DGL.DatasetByGraphs(graphs, labels, key=key)
1858
+
1859
+ @staticmethod
1860
+ def GraphNodeData(graph):
1861
+ """
1862
+ Returns the node data found in the input dgl_graph
1863
+
1864
+ Parameters
1865
+ ----------
1866
+ dgl_graph : DGL graph
1867
+ The input DGL graph.
1868
+
1869
+ Returns
1870
+ -------
1871
+ node data
1872
+ The node data.
1873
+
1874
+ """
1875
+ return graph.ndata
1876
+
1877
+ @staticmethod
1878
+ def DatasetRemoveCategory(dataset, label, key="node_attr"):
1879
+ """
1880
+ Removes graphs from the input dataset that have the input label
1881
+
1882
+ Parameters
1883
+ ----------
1884
+ dataset : DGLDataset
1885
+ The input dataset
1886
+ label : int
1887
+ The input label
1888
+ key : str , optional
1889
+ The input node attribute key
1890
+
1891
+ Returns
1892
+ -------
1893
+ DGLDataset
1894
+ The resulting dataset
1895
+
1896
+ """
1897
+
1898
+ graphs = DGL.DatasetGraphs(dataset)
1899
+ labels = DGL.DatasetLabels(dataset)
1900
+ new_graphs = []
1901
+ new_labels = []
1902
+ for i in range(len(labels)):
1903
+ if not labels[i] == label:
1904
+ new_graphs.append(graphs[i])
1905
+ new_labels.append(labels[i])
1906
+ return DGL.DatasetByGraphs(new_graphs, new_labels, key)
1907
+
1908
+ @staticmethod
1909
+ def DatasetSplit(dataset, fracList=[0.8, 0.1, 0.1], shuffle=False, randomState=None, key="node_attr"):
1910
+ """
1911
+ Splits the dataset into training, validation, and testing datasets.
1912
+
1913
+ Parameters
1914
+ ----------
1915
+ dataset : DGLDataset
1916
+ The input dataset
1917
+ fracList : list , optional
1918
+ A list of length 3 containing the fraction to use for training, validation and test. If None, we will use [0.8, 0.1, 0.1]. The default is [0.8, 0.1, 0.1]
1919
+ randomState : int or array_like , optional
1920
+ Random seed used to initialize the pseudo-random number generator. Can be any integer between 0 and 2**32 - 1 inclusive, an array (or other sequence) of such integers, or None (the default). If seed is None, then RandomState will try to read data from /dev/urandom (or the Windows analogue) if available or seed from the clock otherwise.
1921
+ Returns
1922
+ -------
1923
+ dict
1924
+ The dictionary of the optimizer parameters. The dictionary contains the following keys and values:
1925
+ - "train_ds" (DGLDataset)
1926
+ - "validate_ds" (DGLDataset)
1927
+ - "test_ds" (DGLDataset)
1928
+
1929
+ """
1930
+
1931
+ if not 0 <= fracList[0] <= 1:
1932
+ return None
1933
+ if not 0 <= fracList[1] <= 1:
1934
+ return None
1935
+ if not 0 <= fracList[2] <= 1:
1936
+ return None
1937
+ if sum(fracList) > 1:
1938
+ return None
1939
+ datasets = dgl.data.utils.split_dataset(dataset, frac_list=fracList, shuffle=shuffle, random_state=randomState)
1940
+ if fracList[0] > 0:
1941
+ train_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[0]), 'labels' :DGL.DatasetLabels(datasets[0])}, key=key)
1942
+ else:
1943
+ train_ds = None
1944
+ if fracList[1] > 0:
1945
+ validate_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[1]), 'labels' :DGL.DatasetLabels(datasets[1])}, key=key)
1946
+ else:
1947
+ validate_ds = None
1948
+ if fracList[2] > 0:
1949
+ test_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[2]), 'labels' :DGL.DatasetLabels(datasets[2])}, key=key)
1950
+ else:
1951
+ test_ds = None
1952
+
1953
+ return {
1954
+ "train_ds" : train_ds,
1955
+ "validate_ds" : validate_ds,
1956
+ "test_ds" : test_ds
1957
+ }
1958
+ @staticmethod
1959
+ def Optimizer(name="Adam", amsgrad=True, betas=(0.9,0.999), eps=0.000001, lr=0.001, maximize=False, weightDecay=0.0, rho=0.9, lr_decay=0.0):
1960
+ """
1961
+ Returns the parameters of the optimizer
1962
+
1963
+ Parameters
1964
+ ----------
1965
+ amsgrad : bool , optional.
1966
+ amsgrad is an extension to the Adam version of gradient descent that attempts to improve the convergence properties of the algorithm, avoiding large abrupt changes in the learning rate for each input variable. The default is True.
1967
+ betas : tuple , optional
1968
+ Betas are used as for smoothing the path to the convergence also providing some momentum to cross a local minima or saddle point. The default is (0.9, 0.999).
1969
+ eps : float . optional.
1970
+ eps is a term added to the denominator to improve numerical stability. The default is 0.000001.
1971
+ lr : float
1972
+ The learning rate (lr) defines the adjustment in the weights of our network with respect to the loss gradient descent. The default is 0.001.
1973
+ maximize : float , optional
1974
+ maximize the params based on the objective, instead of minimizing. The default is False.
1975
+ weightDecay : float , optional
1976
+ weightDecay (L2 penalty) is a regularization technique applied to the weights of a neural network. The default is 0.0.
1977
+
1978
+ Returns
1979
+ -------
1980
+ dict
1981
+ The dictionary of the optimizer parameters. The dictionary contains the following keys and values:
1982
+ - "name" (str): The name of the optimizer
1983
+ - "amsgrad" (bool):
1984
+ - "betas" (tuple):
1985
+ - "eps" (float):
1986
+ - "lr" (float):
1987
+ - "maximize" (bool):
1988
+ - weightDecay (float):
1989
+
1990
+ """
1991
+ return {"name":name, "amsgrad":amsgrad, "betas":betas, "eps":eps, "lr": lr, "maximize":maximize, "weight_decay":weightDecay, "rho":rho, "lr_decay":lr_decay}
1992
+
1993
+ @staticmethod
1994
+ def ModelClassify(model, dataset, node_attr_key="node_attr"):
1995
+ """
1996
+ Predicts the classification the labels of the input dataset.
1997
+
1998
+ Parameters
1999
+ ----------
2000
+ dataset : DGLDataset
2001
+ The input DGL dataset.
2002
+ model : Model
2003
+ The input trained model.
2004
+ node_attr_key : str , optional
2005
+ The key used for node attributes. The default is "node_attr".
2006
+
2007
+ Returns
2008
+ -------
2009
+ dict
2010
+ Dictionary containing labels and probabilities. The included keys and values are:
2011
+ - "predictions" (list): the list of predicted labels
2012
+ - "probabilities" (list): the list of probabilities that the label is one of the categories.
2013
+
2014
+ """
2015
+ labels = []
2016
+ probabilities = []
2017
+ for item in tqdm(dataset, desc='Classifying', leave=False):
2018
+ graph = item[0]
2019
+ pred = model(graph, graph.ndata[node_attr_key].float())
2020
+ labels.append(pred.argmax(1).item())
2021
+ probability = (torch.nn.functional.softmax(pred, dim=1).tolist())
2022
+ probability = probability[0]
2023
+ temp_probability = []
2024
+ for p in probability:
2025
+ temp_probability.append(round(p, 3))
2026
+ probabilities.append(temp_probability)
2027
+ return {"predictions":labels, "probabilities":probabilities}
2028
+
2029
+ @staticmethod
2030
+ def ModelPredict(model, dataset, node_attr_key="node_attr"):
2031
+ """
2032
+ Predicts the value of the input dataset.
2033
+
2034
+ Parameters
2035
+ ----------
2036
+ dataset : DGLDataset
2037
+ The input DGL dataset.
2038
+ model : Model
2039
+ The input trained model.
2040
+ node_attr_key : str , optional
2041
+ The key used for node attributes. The default is "node_attr".
2042
+
2043
+ Returns
2044
+ -------
2045
+ list
2046
+ The list of predictions
2047
+ """
2048
+ values = []
2049
+ for item in tqdm(dataset, desc='Predicting', leave=False):
2050
+ graph = item[0]
2051
+ pred = model(graph, graph.ndata[node_attr_key].float())
2052
+ print(pred)
2053
+ values.append(round(pred.item(), 3))
2054
+ return values
2055
+
2056
+ @staticmethod
2057
+ def ModelClassifyNodes(model, dataset):
2058
+ """
2059
+ Predicts the calssification of the node labels found in the input dataset using the input classifier.
2060
+
2061
+ Parameters
2062
+ ----------
2063
+ model : Model
2064
+ The input model.
2065
+ dataset : DGLDataset
2066
+ The input DGL Dataset.
2067
+
2068
+ Returns
2069
+ -------
2070
+ dict
2071
+ A dictionary containing all the results. The keys in this dictionary are:
2072
+ - "alllabels"
2073
+ - "allpredictions"
2074
+ - "trainlabels"
2075
+ - "trainpredictions"
2076
+ - "validationlabels"
2077
+ - "validationpredictions"
2078
+ - "testlabels"
2079
+ - "testpredictions"
2080
+
2081
+ """
2082
+ from topologicpy.Helper import Helper
2083
+
2084
+ # classifier, dataset = item
2085
+ allLabels = []
2086
+ allPredictions = []
2087
+ trainLabels = []
2088
+ trainPredictions = []
2089
+ valLabels = []
2090
+ valPredictions = []
2091
+ testLabels = []
2092
+ testPredictions = []
2093
+
2094
+ graphs = DGL.DatasetGraphs(dataset)
2095
+ for g in graphs:
2096
+ if not g.ndata:
2097
+ continue
2098
+ train_mask = g.ndata['train_mask']
2099
+ val_mask = g.ndata['val_mask']
2100
+ test_mask = g.ndata['test_mask']
2101
+ features = g.ndata['feat']
2102
+ labels = g.ndata['label']
2103
+ train_labels = labels[train_mask]
2104
+ val_labels = labels[val_mask]
2105
+ test_labels = labels[test_mask]
2106
+ allLabels.append(labels.tolist())
2107
+ trainLabels.append(train_labels.tolist())
2108
+ valLabels.append(val_labels.tolist())
2109
+ testLabels.append(test_labels.tolist())
2110
+
2111
+ # Forward
2112
+ logits = model(g, features)
2113
+ train_logits = logits[train_mask]
2114
+ val_logits = logits[val_mask]
2115
+ test_logits = logits[test_mask]
2116
+
2117
+ # Compute prediction
2118
+ predictions = logits.argmax(1)
2119
+ train_predictions = train_logits.argmax(1)
2120
+ val_predictions = val_logits.argmax(1)
2121
+ test_predictions = test_logits.argmax(1)
2122
+ allPredictions.append(predictions.tolist())
2123
+ trainPredictions.append(train_predictions.tolist())
2124
+ valPredictions.append(val_predictions.tolist())
2125
+ testPredictions.append(test_predictions.tolist())
2126
+
2127
+ return {
2128
+ "alllabels": Helper.Flatten(allLabels),
2129
+ "allpredictions" : Helper.Flatten(allPredictions),
2130
+ "trainlabels" : Helper.Flatten(trainLabels),
2131
+ "trainpredictions" : Helper.Flatten(trainPredictions),
2132
+ "validationlabels" : Helper.Flatten(valLabels),
2133
+ "validationpredictions" : Helper.Flatten(valPredictions),
2134
+ "testlabels" : Helper.Flatten(testLabels),
2135
+ "testpredictions" : Helper.Flatten(testPredictions)
2136
+
2137
+ }
2138
+
2139
+ @staticmethod
2140
+ def Show(data,
2141
+ labels,
2142
+ title="Training/Validation",
2143
+ xTitle="Epochs",
2144
+ xSpacing=1,
2145
+ yTitle="Accuracy and Loss",
2146
+ ySpacing=0.1,
2147
+ useMarkers=False,
2148
+ chartType="Line",
2149
+ width=950,
2150
+ height=500,
2151
+ backgroundColor='rgba(0,0,0,0)',
2152
+ gridColor='lightgray',
2153
+ marginLeft=0,
2154
+ marginRight=0,
2155
+ marginTop=40,
2156
+ marginBottom=0,
2157
+ renderer = "notebook"):
2158
+ """
2159
+ Shows the data in a plolty graph.
2160
+
2161
+ Parameters
2162
+ ----------
2163
+ data : list
2164
+ The data to display.
2165
+ labels : list
2166
+ The labels to use for the data.
2167
+ width : int , optional
2168
+ The desired width of the figure. The default is 950.
2169
+ height : int , optional
2170
+ The desired height of the figure. The default is 500.
2171
+ title : str , optional
2172
+ The chart title. The default is "Training and Testing Results".
2173
+ xTitle : str , optional
2174
+ The X-axis title. The default is "Epochs".
2175
+ xSpacing : float , optional
2176
+ The X-axis spacing. The default is 1.0.
2177
+ yTitle : str , optional
2178
+ The Y-axis title. The default is "Accuracy and Loss".
2179
+ ySpacing : float , optional
2180
+ THe Y-axis spacing. The default is 0.1.
2181
+ useMarkers : bool , optional
2182
+ If set to True, markers will be displayed. The default is False.
2183
+ chartType : str , optional
2184
+ The desired type of chart. The options are "Line", "Bar", or "Scatter". It is case insensitive. The default is "Line".
2185
+ backgroundColor : str , optional
2186
+ The desired background color. This can be any plotly color string and may be specified as:
2187
+ - A hex string (e.g. '#ff0000')
2188
+ - An rgb/rgba string (e.g. 'rgb(255,0,0)')
2189
+ - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
2190
+ - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
2191
+ - A named CSS color.
2192
+ The default is 'rgba(0,0,0,0)' (transparent).
2193
+ gridColor : str , optional
2194
+ The desired grid color. This can be any plotly color string and may be specified as:
2195
+ - A hex string (e.g. '#ff0000')
2196
+ - An rgb/rgba string (e.g. 'rgb(255,0,0)')
2197
+ - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
2198
+ - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
2199
+ - A named CSS color.
2200
+ The default is 'lightgray'.
2201
+ marginLeft : int , optional
2202
+ The desired left margin in pixels. The default is 0.
2203
+ marginRight : int , optional
2204
+ The desired right margin in pixels. The default is 0.
2205
+ marginTop : int , optional
2206
+ The desired top margin in pixels. The default is 40.
2207
+ marginBottom : int , optional
2208
+ The desired bottom margin in pixels. The default is 0.
2209
+ renderer : str , optional
2210
+ The desired plotly renderer. The default is "notebook".
2211
+
2212
+ Returns
2213
+ -------
2214
+ None.
2215
+
2216
+ """
2217
+ from topologicpy.Plotly import Plotly
2218
+
2219
+ dataFrame = Plotly.DataByDGL(data, labels)
2220
+ fig = Plotly.FigureByDataFrame(dataFrame,
2221
+ labels=labels,
2222
+ title=title,
2223
+ xTitle=xTitle,
2224
+ xSpacing=xSpacing,
2225
+ yTitle=yTitle,
2226
+ ySpacing=ySpacing,
2227
+ useMarkers=useMarkers,
2228
+ chartType=chartType,
2229
+ width=width,
2230
+ height=height,
2231
+ backgroundColor=backgroundColor,
2232
+ gridColor=gridColor,
2233
+ marginRight=marginRight,
2234
+ marginLeft=marginLeft,
2235
+ marginTop=marginTop,
2236
+ marginBottom=marginBottom
2237
+ )
2238
+ Plotly.Show(fig, renderer=renderer)
2239
+
2240
+ @staticmethod
2241
+ def Model(hparams, trainingDataset, validationDataset=None, testingDataset=None):
2242
+ """
2243
+ Creates a neural network classifier.
2244
+
2245
+ Parameters
2246
+ ----------
2247
+ hparams : HParams
2248
+ The input hyperparameters
2249
+ trainingDataset : DGLDataset
2250
+ The input training dataset.
2251
+ validationDataset : DGLDataset
2252
+ The input validation dataset. If not specified, a portion of the trainingDataset will be used for validation according the to the split list as specified in the hyper-parameters.
2253
+ testingDataset : DGLDataset
2254
+ The input testing dataset. If not specified, a portion of the trainingDataset will be used for testing according the to the split list as specified in the hyper-parameters.
2255
+
2256
+ Returns
2257
+ -------
2258
+ Classifier
2259
+ The created classifier
2260
+
2261
+ """
2262
+
2263
+ model = None
2264
+ if hparams.model_type.lower() == "classifier":
2265
+ if hparams.cv_type.lower() == "holdout":
2266
+ model = _ClassifierHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2267
+ elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
2268
+ model = _ClassifierKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
2269
+ elif hparams.model_type.lower() == "regressor":
2270
+ if hparams.cv_type.lower() == "holdout":
2271
+ model = _RegressorHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2272
+ elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
2273
+ model = _RegressorKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
2274
+ else:
2275
+ raise NotImplementedError
2276
+ return model
2277
+
2278
+ @staticmethod
2279
+ def ModelTrain(model):
2280
+ """
2281
+ Trains the neural network model.
2282
+
2283
+ Parameters
2284
+ ----------
2285
+ model : Model
2286
+ The input model.
2287
+
2288
+ Returns
2289
+ -------
2290
+ Model
2291
+ The trained model
2292
+
2293
+ """
2294
+ if not model:
2295
+ return None
2296
+ model.train()
2297
+ return model
2298
+
2299
+ @staticmethod
2300
+ def ModelTest(model):
2301
+ """
2302
+ Tests the neural network model.
2303
+
2304
+ Parameters
2305
+ ----------
2306
+ model : Model
2307
+ The input model.
2308
+
2309
+ Returns
2310
+ -------
2311
+ Model
2312
+ The tested model
2313
+
2314
+ """
2315
+ if not model:
2316
+ return None
2317
+ model.test()
2318
+ return model
2319
+
2320
+ @staticmethod
2321
+ def ModelSave(model, path=None):
2322
+ """
2323
+ Saves the model.
2324
+
2325
+ Parameters
2326
+ ----------
2327
+ model : Model
2328
+ The input model.
2329
+
2330
+ Returns
2331
+ -------
2332
+ bool
2333
+ True if the model is saved correctly. False otherwise.
2334
+
2335
+ """
2336
+ if not model:
2337
+ return None
2338
+ if path:
2339
+ # Make sure the file extension is .pt
2340
+ ext = path[len(path)-3:len(path)]
2341
+ if ext.lower() != ".pt":
2342
+ path = path+".pt"
2343
+ return model.save(path)
2344
+
2345
+ @staticmethod
2346
+ def ModelData(model):
2347
+ """
2348
+ Returns the data of the model
2349
+
2350
+ Parameters
2351
+ ----------
2352
+ model : Model
2353
+ The input model.
2354
+
2355
+ Returns
2356
+ -------
2357
+ dict
2358
+ A dictionary containing the model data.
2359
+
2360
+ """
2361
+ from topologicpy.Helper import Helper
2362
+
2363
+ data = {'Model Type': [model.hparams.model_type],
2364
+ 'Optimizer': [model.hparams.optimizer_str],
2365
+ 'CV Type': [model.hparams.cv_type],
2366
+ 'Split': model.hparams.split,
2367
+ 'K-Folds': [model.hparams.k_folds],
2368
+ 'HL Widths': model.hparams.hl_widths,
2369
+ 'Conv Layer Type': [model.hparams.conv_layer_type],
2370
+ 'Pooling': [model.hparams.pooling],
2371
+ 'Learning Rate': [model.hparams.lr],
2372
+ 'Batch Size': [model.hparams.batch_size],
2373
+ 'Epochs': [model.hparams.epochs]
2374
+ }
2375
+
2376
+ if model.hparams.model_type.lower() == "classifier":
2377
+ testing_accuracy_list = [model.testing_accuracy] * model.hparams.epochs
2378
+ testing_loss_list = [model.testing_loss] * model.hparams.epochs
2379
+ metrics_data = {
2380
+ 'Training Accuracy': [model.training_accuracy_list],
2381
+ 'Validation Accuracy': [model.validation_accuracy_list],
2382
+ 'Testing Accuracy' : [testing_accuracy_list],
2383
+ 'Training Loss': [model.training_loss_list],
2384
+ 'Validation Loss': [model.validation_loss_list],
2385
+ 'Testing Loss' : [testing_loss_list]
2386
+ }
2387
+ if model.hparams.cv_type.lower() == "k-fold":
2388
+ accuracy_data = {
2389
+ 'Accuracies' : [model.accuracies],
2390
+ 'Max Accuracy' : [model.max_accuracy]
2391
+ }
2392
+ metrics_data.update(accuracy_data)
2393
+ data.update(metrics_data)
2394
+
2395
+ elif model.hparams.model_type.lower() == "regressor":
2396
+ testing_loss_list = [model.testing_loss] * model.hparams.epochs
2397
+ metrics_data = {
2398
+ 'Training Loss': [model.training_loss_list],
2399
+ 'Validation Loss': [model.validation_loss_list],
2400
+ 'Testing Loss' : [testing_loss_list]
2401
+ }
2402
+ if model.hparams.cv_type.lower() == "k-fold":
2403
+ loss_data = {
2404
+ 'Losses' : [model.losses],
2405
+ 'Min Loss' : [model.min_loss]
2406
+ }
2407
+ metrics_data.update(loss_data)
2408
+ data.update(metrics_data)
2409
+
2410
+ return data
2411
+
2412
+ @staticmethod
2413
+ def GraphsByFilePath(path, labelKey="value", key='node_attr'):
2414
+ graphs, label_dict = load_graphs(path)
2415
+ labels = label_dict[labelKey].tolist()
2416
+ return {"graphs" : graphs, "labels": labels}
2417
+
2418
+ @staticmethod
2419
+ def DataExportToCSV(data, path, overwrite=True):
2420
+ """
2421
+ Exports the input data to a CSV file
2422
+
2423
+ Parameters
2424
+ ----------
2425
+ data : dict
2426
+ The input data. See Data(model)
2427
+ overwrite : bool , optional
2428
+ If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
2429
+
2430
+ Returns
2431
+ -------
2432
+ bool
2433
+ True if the data is saved correctly to a CSV file. False otherwise.
2434
+
2435
+ """
2436
+ from topologicpy.Helper import Helper
2437
+
2438
+ # Make sure the file extension is .csv
2439
+ ext = path[len(path)-4:len(path)]
2440
+ if ext.lower() != ".csv":
2441
+ path = path+".csv"
2442
+
2443
+ epoch_list = list(range(1, data['Epochs'][0]+1))
2444
+
2445
+ d = [data['Model Type'], data['Optimizer'], data['CV Type'], [data['Split']], data['K-Folds'], data['HL Widths'], data['Conv Layer Type'], data['Pooling'], data['Learning Rate'], data['Batch Size'], epoch_list]
2446
+ columns = ['Model Type', 'Optimizer', 'CV Type', 'Split', 'K-Folds', 'HL Widths', 'Conv Layer Type', 'Pooling', 'Learning Rate', 'Batch Size', 'Epochs']
2447
+
2448
+ if data['Model Type'][0].lower() == "classifier":
2449
+ d.extend([data['Training Accuracy'][0], data['Validation Accuracy'][0], data['Testing Accuracy'][0], data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
2450
+ columns.extend(['Training Accuracy', 'Validation Accuracy', 'Testing Accuracy', 'Training Loss', 'Validation Loss', 'Testing Loss'])
2451
+ if data['CV Type'][0].lower() == "k-fold":
2452
+ d.extend([data['Accuracies'], data['Max Accuracy']])
2453
+ columns.extend(['Accuracies', 'Max Accuracy'])
2454
+
2455
+ elif data['Model Type'][0].lower() == "regressor":
2456
+ d.extend([data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
2457
+ columns.extend(['Training Loss', 'Validation Loss', 'Testing Loss'])
2458
+ if data['CV Type'][0].lower() == "k-fold":
2459
+ d.extend([data['Losses'], data['Min Loss']])
2460
+ columns.extend(['Losses', 'Min Loss'])
2461
+
2462
+ d = Helper.Iterate(d)
2463
+ d = Helper.Transpose(d)
2464
+ df = pd.DataFrame(d, columns=columns)
2465
+
2466
+ status = False
2467
+ if path:
2468
+ if overwrite:
2469
+ mode = 'w+'
2470
+ else:
2471
+ mode = 'a'
2472
+ try:
2473
+ df.to_csv(path, mode=mode, index = False, header=True)
2474
+ status = True
2475
+ except:
2476
+ status = False
2477
+ return status
2478
+
2479
+ '''
2480
+ @staticmethod
2481
+ def TrainRegressor(hparams, trainingDataset, validationDataset=None, testingDataset=None, overwrite=True):
2482
+ """
2483
+ Trains a neural network regressor.
2484
+
2485
+ Parameters
2486
+ ----------
2487
+ hparams : HParams
2488
+ The input hyperparameters
2489
+ trainingDataset : DGLDataset
2490
+ The input training dataset.
2491
+ validationDataset : DGLDataset
2492
+ The input validation dataset. If not specified, a portion of the trainingDataset will be used for validation according the to the split list as specified in the hyper-parameters.
2493
+ testingDataset : DGLDataset
2494
+ The input testing dataset. If not specified, a portion of the trainingDataset will be used for testing according the to the split list as specified in the hyper-parameters.
2495
+ overwrite : bool , optional
2496
+ If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
2497
+
2498
+ Returns
2499
+ -------
2500
+ dict
2501
+ A dictionary containing all the results.
2502
+
2503
+ """
2504
+
2505
+ from topologicpy.Helper import Helper
2506
+ import time
2507
+ import datetime
2508
+ start = time.time()
2509
+ regressor = _RegressorHoldout(hparams, trainingDataset, validationDataset, testingDataset)
2510
+ regressor.train()
2511
+ accuracy = regressor.validate()
2512
+
2513
+ end = time.time()
2514
+ duration = round(end - start,3)
2515
+ utcnow = datetime.datetime.utcnow()
2516
+ timestamp_str = "UTC-"+str(utcnow.year)+"-"+str(utcnow.month)+"-"+str(utcnow.day)+"-"+str(utcnow.hour)+"-"+str(utcnow.minute)+"-"+str(utcnow.second)
2517
+ epoch_list = list(range(1,regressor.hparams.epochs+1))
2518
+ d2 = [[timestamp_str], [duration], [regressor.hparams.optimizer_str], [regressor.hparams.cv_type], [regressor.hparams.split], [regressor.hparams.k_folds], regressor.hparams.hl_widths, [regressor.hparams.conv_layer_type], [regressor.hparams.pooling], [regressor.hparams.lr], [regressor.hparams.batch_size], epoch_list, regressor.training_accuracy_list, regressor.validation_accuracy_list]
2519
+ d2 = Helper.Iterate(d2)
2520
+ d2 = Helper.Transpose(d2)
2521
+
2522
+ data = {'TimeStamp': "UTC-"+str(utcnow.year)+"-"+str(utcnow.month)+"-"+str(utcnow.day)+"-"+str(utcnow.hour)+"-"+str(utcnow.minute)+"-"+str(utcnow.second),
2523
+ 'Duration': [duration],
2524
+ 'Optimizer': [regressor.hparams.optimizer_str],
2525
+ 'CV Type': [regressor.hparams.cv_type],
2526
+ 'Split': [regressor.hparams.split],
2527
+ 'K-Folds': [regressor.hparams.k_folds],
2528
+ 'HL Widths': [regressor.hparams.hl_widths],
2529
+ 'Conv Layer Type': [regressor.hparams.conv_layer_type],
2530
+ 'Pooling': [regressor.hparams.pooling],
2531
+ 'Learning Rate': [regressor.hparams.lr],
2532
+ 'Batch Size': [regressor.hparams.batch_size],
2533
+ 'Epochs': [regressor.hparams.epochs],
2534
+ 'Training Accuracy': [regressor.training_accuracy_list],
2535
+ 'Validation Accuracy': [regressor.validation_accuracy_list]
2536
+ }
2537
+
2538
+ df = pd.DataFrame(d2, columns= ['TimeStamp', 'Duration', 'Optimizer', 'CV Type', 'Split', 'K-Folds', 'HL Widths', 'Conv Layer Type', 'Pooling', 'Learning Rate', 'Batch Size', 'Epochs', 'Training Accuracy', 'Testing Accuracy'])
2539
+ if regressor.hparams.results_path:
2540
+ if overwrite:
2541
+ df.to_csv(regressor.hparams.results_path, mode='w+', index = False, header=True)
2542
+ else:
2543
+ df.to_csv(regressor.hparams.results_path, mode='a', index = False, header=False)
2544
+ return data
2545
+ '''
2546
+
2547
+ @staticmethod
2548
+ def _TrainClassifier_NC(graphs, model, hparams):
2549
+ """
2550
+ Parameters
2551
+ ----------
2552
+ graphs : list
2553
+ The input list of graphs.
2554
+ model : GCN Model
2555
+ The input classifier model.
2556
+ hparams : HParams
2557
+ The input hyper-parameters.
2558
+
2559
+ Returns
2560
+ -------
2561
+ list
2562
+ The list of trained model and predictions.
2563
+
2564
+ """
2565
+ # Default optimizer
2566
+ optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
2567
+ if hparams.optimizer_str.lower() == "adadelta":
2568
+ optimizer = torch.optim.Adadelta(model.parameters(), eps=hparams.eps,
2569
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
2570
+ elif hparams.optimizer_str.lower() == "adagrad":
2571
+ optimizer = torch.optim.Adagrad(model.parameters(), eps=hparams.eps,
2572
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
2573
+ elif hparams.optimizer_str.lower() == "adam":
2574
+ optimizer = torch.optim.Adam(model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
2575
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
2576
+
2577
+ for e in range(hparams.epochs):
2578
+ best_val_acc = 0
2579
+ best_test_acc = 0
2580
+ for i in range(len(graphs)):
2581
+ g = graphs[i]
2582
+ if not g.ndata:
2583
+ continue
2584
+ features = g.ndata['feat']
2585
+ labels = g.ndata['label']
2586
+ train_mask = g.ndata['train_mask']
2587
+ val_mask = g.ndata['val_mask']
2588
+ test_mask = g.ndata['test_mask']
2589
+ # Forward
2590
+ logits = model(g, features)
2591
+
2592
+ # Compute prediction
2593
+ pred = logits.argmax(1)
2594
+
2595
+ # Compute loss
2596
+ # Note that you should only compute the losses of the nodes in the training set.
2597
+ # Compute loss
2598
+ if hparams.loss_function.lower() == "negative log likelihood":
2599
+ logp = F.log_softmax(logits[train_mask], 1)
2600
+ loss = F.nll_loss(logp, labels[train_mask])
2601
+ elif hparams.loss_function.lower() == "cross entropy":
2602
+ loss = F.cross_entropy(logits[train_mask], labels[train_mask])
2603
+ # Compute accuracy on training/validation/test
2604
+ train_acc = (pred[train_mask] == labels[train_mask]).float().mean()
2605
+ val_acc = (pred[val_mask] == labels[val_mask]).float().mean()
2606
+ test_acc = (pred[test_mask] == labels[test_mask]).float().mean()
2607
+
2608
+ # Save the best validation accuracy and the corresponding test accuracy.
2609
+ if val_acc > best_val_acc:
2610
+ best_val_acc = val_acc
2611
+ if test_acc > best_test_acc:
2612
+ best_test_acc = test_acc
2613
+
2614
+ # Backward
2615
+ optimizer.zero_grad()
2616
+ loss.backward()
2617
+ optimizer.step()
2618
+ if e % 1 == 0:
2619
+ print('In epoch {}, loss: {:.3f}, val acc: {:.3f} (best {:.3f}), test acc: {:.3f} (best {:.3f})'.format(
2620
+ e, loss, val_acc, best_val_acc, test_acc, best_test_acc))
2621
+ return [model, pred]
2622
+
2623
+ @staticmethod
2624
+ def TrainNodeClassifier(hparams, dataset, numLabels, sample):
2625
+ """
2626
+ Parameters
2627
+ ----------
2628
+ hparams : TYPE
2629
+ DESCRIPTION.
2630
+ dataset : TYPE
2631
+ DESCRIPTION.
2632
+ numLabels : TYPE
2633
+ DESCRIPTION.
2634
+ sample : TYPE
2635
+ DESCRIPTION.
2636
+
2637
+ Returns
2638
+ -------
2639
+ final_model : TYPE
2640
+ DESCRIPTION.
2641
+
2642
+ """
2643
+
2644
+ # hparams, dataset, numLabels, sample = item
2645
+ # We will consider only the first graph in the dataset.
2646
+ graphs = DGL.DatasetGraphs(dataset)
2647
+ # Sample a random list from the graphs
2648
+ if sample < len(graphs) and sample > 0:
2649
+ graphs = random.sample(graphs, sample)
2650
+ if len(graphs) == 1:
2651
+ i = 0
2652
+ elif len(graphs) > 1:
2653
+ i = random.randrange(0, len(graphs)-1)
2654
+ else: # There are no gaphs in the dataset, return None
2655
+ return None
2656
+ model = _Classic(graphs[i].ndata['feat'].shape[1], hparams.hl_widths, numLabels)
2657
+ final_model, predictions = DGL._TrainNodeClassifier(graphs, model, hparams)
2658
+ # Save the entire model
2659
+ if hparams.checkpoint_path is not None:
2660
+ torch.save(final_model, hparams.checkpoint_path)
2661
+ return final_model