topologicpy 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/Aperture.py +3 -3
- topologicpy/Cluster.py +24 -6
- topologicpy/Context.py +19 -19
- topologicpy/DGL.py +620 -453
- topologicpy/Edge.py +36 -31
- topologicpy/Face.py +120 -73
- topologicpy/Graph.py +338 -64
- topologicpy/Plotly.py +122 -16
- topologicpy/Topology.py +26 -23
- topologicpy/Vertex.py +35 -22
- topologicpy/Wire.py +114 -44
- topologicpy/__init__.py +1 -1
- {topologicpy-0.3.0.dist-info → topologicpy-0.3.2.dist-info}/METADATA +1 -1
- {topologicpy-0.3.0.dist-info → topologicpy-0.3.2.dist-info}/RECORD +17 -17
- {topologicpy-0.3.0.dist-info → topologicpy-0.3.2.dist-info}/LICENSE +0 -0
- {topologicpy-0.3.0.dist-info → topologicpy-0.3.2.dist-info}/WHEEL +0 -0
- {topologicpy-0.3.0.dist-info → topologicpy-0.3.2.dist-info}/top_level.txt +0 -0
topologicpy/DGL.py
CHANGED
|
@@ -30,20 +30,24 @@ try:
|
|
|
30
30
|
from dgl.data import DGLDataset
|
|
31
31
|
from dgl.dataloading import GraphDataLoader
|
|
32
32
|
from dgl.nn import GINConv, GraphConv, SAGEConv, TAGConv
|
|
33
|
+
from dgl import save_graphs, load_graphs
|
|
33
34
|
except:
|
|
34
35
|
call = [sys.executable, '-m', 'pip', 'install', 'dgl', 'dglgo', '-f', 'https://data.dgl.ai/wheels/repo.html', '--upgrade', '-t', sys.path[0]]
|
|
35
36
|
subprocess.run(call)
|
|
36
37
|
import dgl
|
|
37
38
|
from dgl.data import DGLDataset
|
|
38
39
|
from dgl.nn import GraphConv
|
|
40
|
+
from dgl import save_graphs, load_graphs
|
|
39
41
|
try:
|
|
40
42
|
import sklearn
|
|
41
43
|
from sklearn.model_selection import KFold
|
|
44
|
+
from sklearn.metrics import accuracy_score
|
|
42
45
|
except:
|
|
43
46
|
call = [sys.executable, '-m', 'pip', 'install', 'scikit-learn', '-t', sys.path[0]]
|
|
44
47
|
subprocess.run(call)
|
|
45
48
|
import sklearn
|
|
46
49
|
from sklearn.model_selection import KFold
|
|
50
|
+
from sklearn.metrics import accuracy_score
|
|
47
51
|
try:
|
|
48
52
|
from tqdm.auto import tqdm
|
|
49
53
|
except:
|
|
@@ -57,16 +61,12 @@ import topologic
|
|
|
57
61
|
from topologicpy.Dictionary import Dictionary
|
|
58
62
|
import os
|
|
59
63
|
|
|
60
|
-
|
|
61
64
|
import random
|
|
62
65
|
import time
|
|
63
66
|
from datetime import datetime
|
|
64
67
|
import copy
|
|
65
68
|
|
|
66
|
-
|
|
67
|
-
results_path = os.path.join(os.path.expanduser('~'), "dgl_results.csv")
|
|
68
|
-
|
|
69
|
-
class _GraphDGL(DGLDataset):
|
|
69
|
+
class _Dataset(DGLDataset):
|
|
70
70
|
def __init__(self, graphs, labels, node_attr_key):
|
|
71
71
|
super().__init__(name='GraphDGL')
|
|
72
72
|
self.graphs = graphs
|
|
@@ -84,8 +84,8 @@ class _GraphDGL(DGLDataset):
|
|
|
84
84
|
return len(self.graphs)
|
|
85
85
|
|
|
86
86
|
class _Hparams:
|
|
87
|
-
def __init__(self, optimizer_str="Adam", amsgrad=False, betas=(0.9, 0.999), eps=1e-6, lr=0.001, lr_decay= 0, maximize=False, rho=0.9, weight_decay=0, cv_type="Holdout", split=[0.8,0.1, 0.1], k_folds=5, hl_widths=[32], conv_layer_type='SAGEConv', pooling="AvgPooling", batch_size=32, epochs=1,
|
|
88
|
-
use_gpu=False, loss_function="Cross Entropy"
|
|
87
|
+
def __init__(self, model_type="ClassifierHoldout", optimizer_str="Adam", amsgrad=False, betas=(0.9, 0.999), eps=1e-6, lr=0.001, lr_decay= 0, maximize=False, rho=0.9, weight_decay=0, cv_type="Holdout", split=[0.8,0.1, 0.1], k_folds=5, hl_widths=[32], conv_layer_type='SAGEConv', pooling="AvgPooling", batch_size=32, epochs=1,
|
|
88
|
+
use_gpu=False, loss_function="Cross Entropy"):
|
|
89
89
|
"""
|
|
90
90
|
Parameters
|
|
91
91
|
----------
|
|
@@ -112,9 +112,6 @@ class _Hparams:
|
|
|
112
112
|
each step of an epoch
|
|
113
113
|
epochs : int
|
|
114
114
|
An epoch means training the neural network with all the training data for one cycle. In an epoch, we use all of the data exactly once. A forward pass and a backward pass together are counted as one pass
|
|
115
|
-
checkpoint_path: str
|
|
116
|
-
Path to save the classifier after training. It is preferred for
|
|
117
|
-
the path to have .pt extension
|
|
118
115
|
use_GPU : use the GPU. Otherwise, use the CPU
|
|
119
116
|
|
|
120
117
|
Returns
|
|
@@ -123,6 +120,7 @@ class _Hparams:
|
|
|
123
120
|
|
|
124
121
|
"""
|
|
125
122
|
|
|
123
|
+
self.model_type = model_type
|
|
126
124
|
self.optimizer_str = optimizer_str
|
|
127
125
|
self.amsgrad = amsgrad
|
|
128
126
|
self.betas = betas
|
|
@@ -142,8 +140,6 @@ class _Hparams:
|
|
|
142
140
|
self.epochs = epochs
|
|
143
141
|
self.use_gpu = use_gpu
|
|
144
142
|
self.loss_function = loss_function
|
|
145
|
-
self.checkpoint_path = checkpoint_path
|
|
146
|
-
self.results_path = results_path
|
|
147
143
|
|
|
148
144
|
class _Classic(nn.Module):
|
|
149
145
|
def __init__(self, in_feats, h_feats, num_classes):
|
|
@@ -184,7 +180,7 @@ class _Classic(nn.Module):
|
|
|
184
180
|
|
|
185
181
|
class _ClassicReg(nn.Module):
|
|
186
182
|
def __init__(self, in_feats, h_feats):
|
|
187
|
-
super(
|
|
183
|
+
super(_ClassicReg, self).__init__()
|
|
188
184
|
assert isinstance(h_feats, list), "h_feats must be a list"
|
|
189
185
|
h_feats = [x for x in h_feats if x is not None]
|
|
190
186
|
assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
|
|
@@ -417,13 +413,22 @@ class _RegressorHoldout:
|
|
|
417
413
|
self.hparams = hparams
|
|
418
414
|
if hparams.conv_layer_type.lower() == 'classic':
|
|
419
415
|
self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
|
|
416
|
+
elif hparams.conv_layer_type.lower() == 'ginconv':
|
|
417
|
+
self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
418
|
+
1, hparams.pooling).to(device)
|
|
420
419
|
elif hparams.conv_layer_type.lower() == 'graphconv':
|
|
421
420
|
self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
|
|
421
|
+
elif hparams.conv_layer_type.lower() == 'sageconv':
|
|
422
|
+
self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
423
|
+
1, hparams.pooling).to(device)
|
|
424
|
+
elif hparams.conv_layer_type.lower() == 'tagconv':
|
|
425
|
+
self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
426
|
+
1, hparams.pooling).to(device)
|
|
422
427
|
elif hparams.conv_layer_type.lower() == 'gcn':
|
|
423
428
|
self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
|
|
424
429
|
else:
|
|
425
430
|
raise NotImplementedError
|
|
426
|
-
|
|
431
|
+
|
|
427
432
|
if hparams.optimizer_str.lower() == "adadelta":
|
|
428
433
|
self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
|
|
429
434
|
lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
|
|
@@ -433,13 +438,10 @@ class _RegressorHoldout:
|
|
|
433
438
|
elif hparams.optimizer_str.lower() == "adam":
|
|
434
439
|
self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
|
|
435
440
|
lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
|
|
441
|
+
|
|
436
442
|
self.use_gpu = hparams.use_gpu
|
|
437
|
-
|
|
438
443
|
self.training_loss_list = []
|
|
439
444
|
self.validation_loss_list = []
|
|
440
|
-
self.training_accuracy_list = []
|
|
441
|
-
self.validation_accuracy_list = []
|
|
442
|
-
self.testing_accuracy_list = []
|
|
443
445
|
self.node_attr_key = trainingDataset.node_attr_key
|
|
444
446
|
|
|
445
447
|
# train, validate, test split
|
|
@@ -479,19 +481,12 @@ class _RegressorHoldout:
|
|
|
479
481
|
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
480
482
|
device = torch.device("cpu")
|
|
481
483
|
# Init the loss and accuracy reporting lists
|
|
482
|
-
self.training_accuracy_list = []
|
|
483
484
|
self.training_loss_list = []
|
|
484
|
-
self.validation_accuracy_list = []
|
|
485
485
|
self.validation_loss_list = []
|
|
486
|
-
self.testing_accuracy_list = []
|
|
487
|
-
|
|
488
486
|
|
|
489
|
-
|
|
487
|
+
|
|
490
488
|
# Run the training loop for defined number of epochs
|
|
491
|
-
for _ in tqdm(range(self.hparams.epochs), desc='Epochs'):
|
|
492
|
-
num_correct = 0
|
|
493
|
-
num_tests = 0
|
|
494
|
-
temp_loss_list = []
|
|
489
|
+
for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, leave=False):
|
|
495
490
|
# Iterate over the DataLoader for training data
|
|
496
491
|
for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
|
|
497
492
|
# Make sure the model is in training mode
|
|
@@ -510,46 +505,241 @@ class _RegressorHoldout:
|
|
|
510
505
|
# Perform optimization
|
|
511
506
|
self.optimizer.step()
|
|
512
507
|
|
|
513
|
-
self.
|
|
514
|
-
self.training_accuracy_list.append(self.training_accuracy)
|
|
508
|
+
self.training_loss_list.append(torch.sqrt(loss).item())
|
|
515
509
|
self.validate()
|
|
516
|
-
self.
|
|
517
|
-
|
|
518
|
-
best_rmse = self.validation_accuracy
|
|
519
|
-
best_weights = copy.deepcopy(self.model.state_dict())
|
|
520
|
-
self.test()
|
|
521
|
-
self.testing_accuracy_list.append(torch.sqrt(self.testing_accuracy).item())
|
|
522
|
-
if self.hparams.checkpoint_path is not None:
|
|
523
|
-
# Save the best model
|
|
524
|
-
self.model.load_state_dict(best_weights)
|
|
525
|
-
self.model.eval()
|
|
526
|
-
torch.save(self.model, self.hparams.checkpoint_path)
|
|
510
|
+
self.validation_loss_list.append(torch.sqrt(self.validation_loss).item())
|
|
511
|
+
|
|
527
512
|
|
|
528
513
|
def validate(self):
|
|
529
|
-
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
530
514
|
device = torch.device("cpu")
|
|
531
|
-
temp_validation_loss = []
|
|
532
515
|
self.model.eval()
|
|
533
516
|
for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
|
|
534
517
|
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
535
518
|
loss = F.mse_loss(torch.flatten(pred), labels.float())
|
|
536
|
-
self.
|
|
537
|
-
return self.validation_accuracy
|
|
519
|
+
self.validation_loss = loss
|
|
538
520
|
|
|
539
521
|
def test(self):
|
|
540
522
|
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
541
523
|
device = torch.device("cpu")
|
|
542
|
-
temp_validation_loss = []
|
|
543
524
|
self.model.eval()
|
|
544
|
-
for batched_graph, labels in tqdm(self.
|
|
525
|
+
for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
|
|
545
526
|
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
546
527
|
loss = F.mse_loss(torch.flatten(pred), labels.float())
|
|
547
|
-
self.
|
|
548
|
-
|
|
528
|
+
self.testing_loss = torch.sqrt(loss).item()
|
|
529
|
+
|
|
530
|
+
def save(self, path):
|
|
531
|
+
if path:
|
|
532
|
+
# Make sure the file extension is .pt
|
|
533
|
+
ext = path[len(path)-3:len(path)]
|
|
534
|
+
if ext.lower() != ".pt":
|
|
535
|
+
path = path+".pt"
|
|
536
|
+
torch.save(self.model, path)
|
|
549
537
|
|
|
550
538
|
|
|
539
|
+
class _RegressorKFold:
|
|
540
|
+
def __init__(self, hparams, trainingDataset, testingDataset=None):
|
|
541
|
+
self.trainingDataset = trainingDataset
|
|
542
|
+
self.testingDataset = testingDataset
|
|
543
|
+
self.hparams = hparams
|
|
544
|
+
self.losses = []
|
|
545
|
+
self.min_loss = 0
|
|
546
|
+
# at beginning of the script
|
|
547
|
+
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
548
|
+
device = torch.device("cpu")
|
|
549
|
+
if hparams.conv_layer_type.lower() == 'classic':
|
|
550
|
+
self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
|
|
551
|
+
elif hparams.conv_layer_type.lower() == 'ginconv':
|
|
552
|
+
self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
553
|
+
1, hparams.pooling).to(device)
|
|
554
|
+
elif hparams.conv_layer_type.lower() == 'graphconv':
|
|
555
|
+
self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
|
|
556
|
+
elif hparams.conv_layer_type.lower() == 'sageconv':
|
|
557
|
+
self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
558
|
+
1, hparams.pooling).to(device)
|
|
559
|
+
elif hparams.conv_layer_type.lower() == 'tagconv':
|
|
560
|
+
self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
|
|
561
|
+
1, hparams.pooling).to(device)
|
|
562
|
+
elif hparams.conv_layer_type.lower() == 'gcn':
|
|
563
|
+
self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
|
|
564
|
+
else:
|
|
565
|
+
raise NotImplementedError
|
|
551
566
|
|
|
567
|
+
if hparams.optimizer_str.lower() == "adadelta":
|
|
568
|
+
self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
|
|
569
|
+
lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
|
|
570
|
+
elif hparams.optimizer_str.lower() == "adagrad":
|
|
571
|
+
self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
|
|
572
|
+
lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
|
|
573
|
+
elif hparams.optimizer_str.lower() == "adam":
|
|
574
|
+
self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
|
|
575
|
+
lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
|
|
576
|
+
|
|
577
|
+
self.use_gpu = hparams.use_gpu
|
|
578
|
+
self.training_loss_list = []
|
|
579
|
+
self.validation_loss_list = []
|
|
580
|
+
self.node_attr_key = trainingDataset.node_attr_key
|
|
552
581
|
|
|
582
|
+
# train, validate, test split
|
|
583
|
+
num_train = int(len(trainingDataset) * (hparams.split[0]))
|
|
584
|
+
num_validate = int(len(trainingDataset) * (hparams.split[1]))
|
|
585
|
+
num_test = len(trainingDataset) - num_train - num_validate
|
|
586
|
+
idx = torch.randperm(len(trainingDataset))
|
|
587
|
+
test_sampler = SubsetRandomSampler(idx[num_train+num_validate:num_train+num_validate+num_test])
|
|
588
|
+
|
|
589
|
+
if testingDataset:
|
|
590
|
+
self.test_dataloader = GraphDataLoader(testingDataset,
|
|
591
|
+
batch_size=len(testingDataset),
|
|
592
|
+
drop_last=False)
|
|
593
|
+
else:
|
|
594
|
+
self.test_dataloader = GraphDataLoader(trainingDataset, sampler=test_sampler,
|
|
595
|
+
batch_size=hparams.batch_size,
|
|
596
|
+
drop_last=False)
|
|
597
|
+
|
|
598
|
+
def reset_weights(self):
|
|
599
|
+
'''
|
|
600
|
+
Try resetting model weights to avoid
|
|
601
|
+
weight leakage.
|
|
602
|
+
'''
|
|
603
|
+
device = torch.device("cpu")
|
|
604
|
+
if self.hparams.conv_layer_type.lower() == 'classic':
|
|
605
|
+
self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
|
|
606
|
+
elif self.hparams.conv_layer_type.lower() == 'ginconv':
|
|
607
|
+
self.model = _GINConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
|
|
608
|
+
1, self.hparams.pooling).to(device)
|
|
609
|
+
elif self.hparams.conv_layer_type.lower() == 'graphconv':
|
|
610
|
+
self.model = _GraphConvReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths, self.hparams.pooling).to(device)
|
|
611
|
+
elif self.hparams.conv_layer_type.lower() == 'sageconv':
|
|
612
|
+
self.model = _SAGEConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
|
|
613
|
+
1, self.hparams.pooling).to(device)
|
|
614
|
+
elif self.hparams.conv_layer_type.lower() == 'tagconv':
|
|
615
|
+
self.model = _TAGConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
|
|
616
|
+
1, self.hparams.pooling).to(device)
|
|
617
|
+
elif self.hparams.conv_layer_type.lower() == 'gcn':
|
|
618
|
+
self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
|
|
619
|
+
else:
|
|
620
|
+
raise NotImplementedError
|
|
621
|
+
|
|
622
|
+
if self.hparams.optimizer_str.lower() == "adadelta":
|
|
623
|
+
self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=self.hparams.eps,
|
|
624
|
+
lr=self.hparams.lr, rho=self.hparams.rho, weight_decay=self.hparams.weight_decay)
|
|
625
|
+
elif self.hparams.optimizer_str.lower() == "adagrad":
|
|
626
|
+
self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=self.hparams.eps,
|
|
627
|
+
lr=self.hparams.lr, lr_decay=self.hparams.lr_decay, weight_decay=self.hparams.weight_decay)
|
|
628
|
+
elif self.hparams.optimizer_str.lower() == "adam":
|
|
629
|
+
self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=self.hparams.amsgrad, betas=self.hparams.betas, eps=self.hparams.eps,
|
|
630
|
+
lr=self.hparams.lr, maximize=self.hparams.maximize, weight_decay=self.hparams.weight_decay)
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def train(self):
|
|
636
|
+
device = torch.device("cpu")
|
|
637
|
+
|
|
638
|
+
# The number of folds (This should come from the hparams)
|
|
639
|
+
k_folds = self.hparams.k_folds
|
|
640
|
+
|
|
641
|
+
# Init the loss and accuracy reporting lists
|
|
642
|
+
self.training_loss_list = []
|
|
643
|
+
self.validation_loss_list = []
|
|
644
|
+
|
|
645
|
+
# Set fixed random number seed
|
|
646
|
+
torch.manual_seed(42)
|
|
647
|
+
|
|
648
|
+
# Define the K-fold Cross Validator
|
|
649
|
+
kfold = KFold(n_splits=k_folds, shuffle=True)
|
|
650
|
+
|
|
651
|
+
models = []
|
|
652
|
+
weights = []
|
|
653
|
+
losses = []
|
|
654
|
+
train_dataloaders = []
|
|
655
|
+
validate_dataloaders = []
|
|
656
|
+
|
|
657
|
+
# K-fold Cross-validation model evaluation
|
|
658
|
+
for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
|
|
659
|
+
epoch_training_loss_list = []
|
|
660
|
+
epoch_validation_loss_list = []
|
|
661
|
+
# Sample elements randomly from a given list of ids, no replacement.
|
|
662
|
+
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
|
|
663
|
+
validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
|
|
664
|
+
|
|
665
|
+
# Define data loaders for training and testing data in this fold
|
|
666
|
+
self.train_dataloader = GraphDataLoader(self.trainingDataset, sampler=train_subsampler,
|
|
667
|
+
batch_size=self.hparams.batch_size,
|
|
668
|
+
drop_last=False)
|
|
669
|
+
self.validate_dataloader = GraphDataLoader(self.trainingDataset, sampler=validate_subsampler,
|
|
670
|
+
batch_size=self.hparams.batch_size,
|
|
671
|
+
drop_last=False)
|
|
672
|
+
# Init the neural network
|
|
673
|
+
self.reset_weights()
|
|
674
|
+
|
|
675
|
+
# Run the training loop for defined number of epochs
|
|
676
|
+
best_rmse = np.inf
|
|
677
|
+
# Run the training loop for defined number of epochs
|
|
678
|
+
for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, initial=1, leave=False):
|
|
679
|
+
# Iterate over the DataLoader for training data
|
|
680
|
+
for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
|
|
681
|
+
# Make sure the model is in training mode
|
|
682
|
+
self.model.train()
|
|
683
|
+
# Zero the gradients
|
|
684
|
+
self.optimizer.zero_grad()
|
|
685
|
+
|
|
686
|
+
# Perform forward pass
|
|
687
|
+
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
688
|
+
# Compute loss
|
|
689
|
+
loss = F.mse_loss(torch.flatten(pred), labels.float())
|
|
690
|
+
|
|
691
|
+
# Perform backward pass
|
|
692
|
+
loss.backward()
|
|
693
|
+
|
|
694
|
+
# Perform optimization
|
|
695
|
+
self.optimizer.step()
|
|
696
|
+
|
|
697
|
+
|
|
698
|
+
epoch_training_loss_list.append(torch.sqrt(loss).item())
|
|
699
|
+
self.validate()
|
|
700
|
+
epoch_validation_loss_list.append(torch.sqrt(self.validation_loss).item())
|
|
701
|
+
|
|
702
|
+
models.append(self.model)
|
|
703
|
+
weights.append(copy.deepcopy(self.model.state_dict()))
|
|
704
|
+
losses.append(torch.sqrt(self.validation_loss).item())
|
|
705
|
+
train_dataloaders.append(self.train_dataloader)
|
|
706
|
+
validate_dataloaders.append(self.validate_dataloader)
|
|
707
|
+
self.training_loss_list.append(epoch_training_loss_list)
|
|
708
|
+
self.validation_loss_list.append(epoch_validation_loss_list)
|
|
709
|
+
self.losses = losses
|
|
710
|
+
min_loss = min(losses)
|
|
711
|
+
self.min_loss = min_loss
|
|
712
|
+
ind = losses.index(min_loss)
|
|
713
|
+
self.model = models[ind]
|
|
714
|
+
self.model.load_state_dict(weights[ind])
|
|
715
|
+
self.model.eval()
|
|
716
|
+
self.training_loss_list = self.training_loss_list[ind]
|
|
717
|
+
self.validation_loss_list = self.validation_loss_list[ind]
|
|
718
|
+
|
|
719
|
+
def validate(self):
|
|
720
|
+
device = torch.device("cpu")
|
|
721
|
+
self.model.eval()
|
|
722
|
+
for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
|
|
723
|
+
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
724
|
+
loss = F.mse_loss(torch.flatten(pred), labels.float())
|
|
725
|
+
self.validation_loss = loss
|
|
726
|
+
|
|
727
|
+
def test(self):
|
|
728
|
+
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
729
|
+
device = torch.device("cpu")
|
|
730
|
+
#self.model.eval()
|
|
731
|
+
for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
|
|
732
|
+
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
733
|
+
loss = F.mse_loss(torch.flatten(pred), labels.float())
|
|
734
|
+
self.testing_loss = torch.sqrt(loss).item()
|
|
735
|
+
|
|
736
|
+
def save(self, path):
|
|
737
|
+
if path:
|
|
738
|
+
# Make sure the file extension is .pt
|
|
739
|
+
ext = path[len(path)-3:len(path)]
|
|
740
|
+
if ext.lower() != ".pt":
|
|
741
|
+
path = path+".pt"
|
|
742
|
+
torch.save(self.model, path)
|
|
553
743
|
|
|
554
744
|
class _ClassifierHoldout:
|
|
555
745
|
def __init__(self, hparams, trainingDataset, validationDataset=None, testingDataset=None):
|
|
@@ -594,8 +784,6 @@ class _ClassifierHoldout:
|
|
|
594
784
|
self.validation_loss_list = []
|
|
595
785
|
self.training_accuracy_list = []
|
|
596
786
|
self.validation_accuracy_list = []
|
|
597
|
-
self.testing_accuracy_list = []
|
|
598
|
-
self.testing_loss_list = []
|
|
599
787
|
self.node_attr_key = trainingDataset.node_attr_key
|
|
600
788
|
|
|
601
789
|
# train, validate, test split
|
|
@@ -638,14 +826,11 @@ class _ClassifierHoldout:
|
|
|
638
826
|
self.training_loss_list = []
|
|
639
827
|
self.validation_accuracy_list = []
|
|
640
828
|
self.validation_loss_list = []
|
|
641
|
-
self.testing_accuracy_list = []
|
|
642
|
-
self.testing_loss_list = []
|
|
643
829
|
|
|
644
830
|
# Run the training loop for defined number of epochs
|
|
645
|
-
for _ in tqdm(range(self.hparams.epochs), desc='Epochs', leave=False):
|
|
646
|
-
num_correct = 0
|
|
647
|
-
num_tests = 0
|
|
831
|
+
for _ in tqdm(range(self.hparams.epochs), desc='Epochs', initial=1, leave=False):
|
|
648
832
|
temp_loss_list = []
|
|
833
|
+
temp_acc_list = []
|
|
649
834
|
# Iterate over the DataLoader for training data
|
|
650
835
|
for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
|
|
651
836
|
# Make sure the model is in training mode
|
|
@@ -665,8 +850,7 @@ class _ClassifierHoldout:
|
|
|
665
850
|
|
|
666
851
|
# Save loss information for reporting
|
|
667
852
|
temp_loss_list.append(loss.item())
|
|
668
|
-
|
|
669
|
-
num_tests += len(labels)
|
|
853
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
670
854
|
|
|
671
855
|
# Perform backward pass
|
|
672
856
|
loss.backward()
|
|
@@ -674,27 +858,17 @@ class _ClassifierHoldout:
|
|
|
674
858
|
# Perform optimization
|
|
675
859
|
self.optimizer.step()
|
|
676
860
|
|
|
677
|
-
self.
|
|
678
|
-
self.
|
|
679
|
-
self.training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
|
|
861
|
+
self.training_accuracy_list.append(np.mean(temp_acc_list).item())
|
|
862
|
+
self.training_loss_list.append(np.mean(temp_loss_list).item())
|
|
680
863
|
self.validate()
|
|
681
864
|
self.validation_accuracy_list.append(self.validation_accuracy)
|
|
682
865
|
self.validation_loss_list.append(self.validation_loss)
|
|
683
|
-
self.test()
|
|
684
|
-
self.testing_accuracy_list.append(self.testing_accuracy)
|
|
685
|
-
self.testing_loss_list.append(self.testing_loss)
|
|
686
|
-
if self.hparams.checkpoint_path is not None:
|
|
687
|
-
# Save the best model
|
|
688
|
-
self.model.eval()
|
|
689
|
-
self.hparams.split = [1, 0, 0]
|
|
690
|
-
torch.save(self.model, self.hparams.checkpoint_path)
|
|
691
866
|
|
|
692
867
|
def validate(self):
|
|
693
868
|
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
694
869
|
device = torch.device("cpu")
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
temp_validation_loss = []
|
|
870
|
+
temp_loss_list = []
|
|
871
|
+
temp_acc_list = []
|
|
698
872
|
self.model.eval()
|
|
699
873
|
for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
|
|
700
874
|
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
@@ -703,60 +877,44 @@ class _ClassifierHoldout:
|
|
|
703
877
|
loss = F.nll_loss(logp, labels)
|
|
704
878
|
elif self.hparams.loss_function.lower() == "cross entropy":
|
|
705
879
|
loss = F.cross_entropy(pred, labels)
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
self.validation_loss = (
|
|
710
|
-
self.validation_accuracy = num_correct / num_tests
|
|
711
|
-
return self.validation_accuracy
|
|
712
|
-
|
|
713
|
-
def accuracy(self, dictionary):
|
|
714
|
-
labels = dictionary['labels']
|
|
715
|
-
predictions = dictionary['predictions']
|
|
716
|
-
num_correct = 0
|
|
717
|
-
for i in range(len(predictions)):
|
|
718
|
-
if predictions[i] == labels[i]:
|
|
719
|
-
num_correct = num_correct + 1
|
|
720
|
-
return (num_correct / len(predictions))
|
|
880
|
+
temp_loss_list.append(loss.item())
|
|
881
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
882
|
+
self.validation_accuracy = np.mean(temp_acc_list).item()
|
|
883
|
+
self.validation_loss = np.mean(temp_loss_list).item()
|
|
721
884
|
|
|
722
885
|
def test(self):
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
try:
|
|
747
|
-
self.model.eval()
|
|
748
|
-
torch.save(self.model, self.hparams.checkpoint_path)
|
|
749
|
-
return True
|
|
750
|
-
except:
|
|
751
|
-
return False
|
|
752
|
-
return False
|
|
886
|
+
if self.test_dataloader:
|
|
887
|
+
temp_loss_list = []
|
|
888
|
+
temp_acc_list = []
|
|
889
|
+
self.model.eval()
|
|
890
|
+
for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
|
|
891
|
+
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
|
|
892
|
+
if self.hparams.loss_function.lower() == "negative log likelihood":
|
|
893
|
+
logp = F.log_softmax(pred, 1)
|
|
894
|
+
loss = F.nll_loss(logp, labels)
|
|
895
|
+
elif self.hparams.loss_function.lower() == "cross entropy":
|
|
896
|
+
loss = F.cross_entropy(pred, labels)
|
|
897
|
+
temp_loss_list.append(loss.item())
|
|
898
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
899
|
+
self.testing_accuracy = np.mean(temp_acc_list).item()
|
|
900
|
+
self.testing_loss = np.mean(temp_loss_list).item()
|
|
901
|
+
|
|
902
|
+
def save(self, path):
|
|
903
|
+
if path:
|
|
904
|
+
# Make sure the file extension is .pt
|
|
905
|
+
ext = path[len(path)-3:len(path)]
|
|
906
|
+
if ext.lower() != ".pt":
|
|
907
|
+
path = path+".pt"
|
|
908
|
+
torch.save(self.model, path)
|
|
753
909
|
|
|
754
910
|
class _ClassifierKFold:
|
|
755
|
-
def __init__(self, hparams, trainingDataset,
|
|
911
|
+
def __init__(self, hparams, trainingDataset, testingDataset=None):
|
|
756
912
|
self.trainingDataset = trainingDataset
|
|
757
|
-
self.validationDataset = validationDataset
|
|
758
913
|
self.testingDataset = testingDataset
|
|
759
914
|
self.hparams = hparams
|
|
915
|
+
self.testing_accuracy = 0
|
|
916
|
+
self.accuracies = []
|
|
917
|
+
self.max_accuracy = 0
|
|
760
918
|
# at beginning of the script
|
|
761
919
|
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
762
920
|
device = torch.device("cpu")
|
|
@@ -792,8 +950,6 @@ class _ClassifierKFold:
|
|
|
792
950
|
self.validation_loss_list = []
|
|
793
951
|
self.training_accuracy_list = []
|
|
794
952
|
self.validation_accuracy_list = []
|
|
795
|
-
self.testing_accuracy_list = []
|
|
796
|
-
self.testing_loss_list = []
|
|
797
953
|
self.node_attr_key = trainingDataset.node_attr_key
|
|
798
954
|
|
|
799
955
|
|
|
@@ -839,8 +995,6 @@ class _ClassifierKFold:
|
|
|
839
995
|
self.training_loss_list = []
|
|
840
996
|
self.validation_accuracy_list = []
|
|
841
997
|
self.validation_loss_list = []
|
|
842
|
-
self.testing_accuracy_list = []
|
|
843
|
-
self.testing_loss_list = []
|
|
844
998
|
|
|
845
999
|
# Set fixed random number seed
|
|
846
1000
|
torch.manual_seed(42)
|
|
@@ -848,22 +1002,18 @@ class _ClassifierKFold:
|
|
|
848
1002
|
# Define the K-fold Cross Validator
|
|
849
1003
|
kfold = KFold(n_splits=k_folds, shuffle=True)
|
|
850
1004
|
|
|
851
|
-
tqdm_list = [i for i in range(k_folds)]
|
|
852
|
-
t_folds = tqdm(tqdm_list, desc='Folds', leave=False)
|
|
853
1005
|
models = []
|
|
1006
|
+
weights = []
|
|
854
1007
|
accuracies = []
|
|
855
1008
|
train_dataloaders = []
|
|
856
1009
|
validate_dataloaders = []
|
|
857
|
-
t_e = tqdm(range(1,self.hparams.epochs), desc='Epochs', leave=False)
|
|
858
1010
|
|
|
859
1011
|
# K-fold Cross-validation model evaluation
|
|
860
|
-
for fold, (train_ids, validate_ids) in enumerate(kfold.split(self.trainingDataset)):
|
|
1012
|
+
for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
|
|
861
1013
|
epoch_training_loss_list = []
|
|
862
1014
|
epoch_training_accuracy_list = []
|
|
863
1015
|
epoch_validation_loss_list = []
|
|
864
1016
|
epoch_validation_accuracy_list = []
|
|
865
|
-
epoch_testing_accuracy_list = []
|
|
866
|
-
epoch_testing_loss_list = []
|
|
867
1017
|
# Sample elements randomly from a given list of ids, no replacement.
|
|
868
1018
|
train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
|
|
869
1019
|
validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
|
|
@@ -878,14 +1028,10 @@ class _ClassifierKFold:
|
|
|
878
1028
|
# Init the neural network
|
|
879
1029
|
self.reset_weights()
|
|
880
1030
|
|
|
881
|
-
t_e.reset(1)
|
|
882
|
-
|
|
883
1031
|
# Run the training loop for defined number of epochs
|
|
884
|
-
for _ in range(self.hparams.epochs):
|
|
885
|
-
t_e.update()
|
|
886
|
-
num_correct = 0
|
|
887
|
-
num_tests = 0
|
|
1032
|
+
for _ in tqdm(range(0,self.hparams.epochs), desc='Epochs', initial=1, total=self.hparams.epochs, leave=False):
|
|
888
1033
|
temp_loss_list = []
|
|
1034
|
+
temp_acc_list = []
|
|
889
1035
|
|
|
890
1036
|
# Iterate over the DataLoader for training data
|
|
891
1037
|
for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
|
|
@@ -908,8 +1054,7 @@ class _ClassifierKFold:
|
|
|
908
1054
|
|
|
909
1055
|
# Save loss information for reporting
|
|
910
1056
|
temp_loss_list.append(loss.item())
|
|
911
|
-
|
|
912
|
-
num_tests += len(labels)
|
|
1057
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
913
1058
|
|
|
914
1059
|
# Perform backward pass
|
|
915
1060
|
loss.backward()
|
|
@@ -917,19 +1062,13 @@ class _ClassifierKFold:
|
|
|
917
1062
|
# Perform optimization
|
|
918
1063
|
self.optimizer.step()
|
|
919
1064
|
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
epoch_training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
|
|
1065
|
+
epoch_training_accuracy_list.append(np.mean(temp_acc_list).item())
|
|
1066
|
+
epoch_training_loss_list.append(np.mean(temp_loss_list).item())
|
|
923
1067
|
self.validate()
|
|
924
1068
|
epoch_validation_accuracy_list.append(self.validation_accuracy)
|
|
925
1069
|
epoch_validation_loss_list.append(self.validation_loss)
|
|
926
|
-
self.test()
|
|
927
|
-
epoch_testing_accuracy_list.append(self.testing_accuracy)
|
|
928
|
-
epoch_testing_loss_list.append(self.testing_loss)
|
|
929
|
-
if self.hparams.checkpoint_path is not None:
|
|
930
|
-
# Save the entire model
|
|
931
|
-
torch.save(self.model, self.hparams.checkpoint_path+"-fold_"+str(fold))
|
|
932
1070
|
models.append(self.model)
|
|
1071
|
+
weights.append(copy.deepcopy(self.model.state_dict()))
|
|
933
1072
|
accuracies.append(self.validation_accuracy)
|
|
934
1073
|
train_dataloaders.append(self.train_dataloader)
|
|
935
1074
|
validate_dataloaders.append(self.validate_dataloader)
|
|
@@ -937,22 +1076,21 @@ class _ClassifierKFold:
|
|
|
937
1076
|
self.training_loss_list.append(epoch_training_loss_list)
|
|
938
1077
|
self.validation_accuracy_list.append(epoch_validation_accuracy_list)
|
|
939
1078
|
self.validation_loss_list.append(epoch_validation_loss_list)
|
|
940
|
-
|
|
941
|
-
self.testing_loss_list.append(epoch_testing_loss_list)
|
|
942
|
-
t_folds.update()
|
|
1079
|
+
self.accuracies = accuracies
|
|
943
1080
|
max_accuracy = max(accuracies)
|
|
1081
|
+
self.max_accuracy = max_accuracy
|
|
944
1082
|
ind = accuracies.index(max_accuracy)
|
|
945
|
-
model = models[ind]
|
|
946
|
-
model.
|
|
947
|
-
model.
|
|
948
|
-
self.
|
|
949
|
-
self.
|
|
950
|
-
|
|
951
|
-
|
|
1083
|
+
self.model = models[ind]
|
|
1084
|
+
self.model.load_state_dict(weights[ind])
|
|
1085
|
+
self.model.eval()
|
|
1086
|
+
self.training_accuracy_list = self.training_accuracy_list[ind]
|
|
1087
|
+
self.training_loss_list = self.training_loss_list[ind]
|
|
1088
|
+
self.validation_accuracy_list = self.validation_accuracy_list[ind]
|
|
1089
|
+
self.validation_loss_list = self.validation_loss_list[ind]
|
|
1090
|
+
|
|
952
1091
|
def validate(self):
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
temp_validation_loss = []
|
|
1092
|
+
temp_loss_list = []
|
|
1093
|
+
temp_acc_list = []
|
|
956
1094
|
self.model.eval()
|
|
957
1095
|
for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
|
|
958
1096
|
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
|
|
@@ -961,134 +1099,38 @@ class _ClassifierKFold:
|
|
|
961
1099
|
loss = F.nll_loss(logp, labels)
|
|
962
1100
|
elif self.hparams.loss_function.lower() == "cross entropy":
|
|
963
1101
|
loss = F.cross_entropy(pred, labels)
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
self.validation_loss = (
|
|
968
|
-
self.validation_accuracy = num_correct / num_tests
|
|
969
|
-
return self.validation_accuracy
|
|
970
|
-
|
|
971
|
-
def accuracy(self, dictionary):
|
|
972
|
-
labels = dictionary['labels']
|
|
973
|
-
predictions = dictionary['predictions']
|
|
974
|
-
num_correct = 0
|
|
975
|
-
for i in range(len(predictions)):
|
|
976
|
-
if predictions[i] == labels[i]:
|
|
977
|
-
num_correct = num_correct + 1
|
|
978
|
-
return (num_correct / len(predictions))
|
|
1102
|
+
temp_loss_list.append(loss.item())
|
|
1103
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
1104
|
+
self.validation_accuracy = np.mean(temp_acc_list).item()
|
|
1105
|
+
self.validation_loss = np.mean(temp_loss_list).item()
|
|
979
1106
|
|
|
980
1107
|
def test(self):
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
|
|
986
|
-
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
|
|
987
|
-
if self.hparams.loss_function.lower() == "negative log likelihood":
|
|
988
|
-
logp = F.log_softmax(pred, 1)
|
|
989
|
-
loss = F.nll_loss(logp, labels)
|
|
990
|
-
elif self.hparams.loss_function.lower() == "cross entropy":
|
|
991
|
-
loss = F.cross_entropy(pred, labels)
|
|
992
|
-
temp_testing_loss.append(loss.item())
|
|
993
|
-
num_correct += (pred.argmax(1) == labels).sum().item()
|
|
994
|
-
num_tests += len(labels)
|
|
995
|
-
self.testing_loss = (sum(temp_testing_loss) / len(temp_testing_loss))
|
|
996
|
-
self.testing_accuracy = num_correct / num_tests
|
|
997
|
-
return self.testing_accuracy
|
|
998
|
-
|
|
999
|
-
def predict(self):
|
|
1000
|
-
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
1001
|
-
device = torch.device("cpu")
|
|
1002
|
-
predicted_labels = []
|
|
1003
|
-
idx = torch.randperm(len(self.validationDataset))
|
|
1004
|
-
num_train = int(len(self.validationDataset))
|
|
1005
|
-
sampler = SubsetRandomSampler(idx[:num_train])
|
|
1006
|
-
dataloader = GraphDataLoader(self.validationDataset, sampler=sampler,
|
|
1007
|
-
batch_size=1,
|
|
1008
|
-
drop_last=False)
|
|
1009
|
-
num_correct = 0
|
|
1010
|
-
num_tests = 0
|
|
1011
|
-
for batched_graph, labels in dataloader:
|
|
1012
|
-
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
|
|
1013
|
-
num_correct += (pred.argmax(1) == labels).sum().item()
|
|
1014
|
-
num_tests += len(labels)
|
|
1015
|
-
accuracy = num_correct / num_tests
|
|
1016
|
-
return accuracy
|
|
1017
|
-
|
|
1018
|
-
def train_final(self):
|
|
1019
|
-
#device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
1020
|
-
device = torch.device("cpu")
|
|
1021
|
-
# Init the loss and accuracy reporting lists
|
|
1022
|
-
self.training_accuracy_list = []
|
|
1023
|
-
self.training_loss_list = []
|
|
1024
|
-
self.validation_accuracy_list = []
|
|
1025
|
-
self.validation_loss_list = []
|
|
1026
|
-
self.testing_accuracy_list = []
|
|
1027
|
-
self.testing_lost_list = []
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
# Set training to 100% of the data, validate, and save a final model
|
|
1031
|
-
#idx = torch.randperm(len(self.trainingDataset))
|
|
1032
|
-
#num_train = int(len(self.trainingDataset))
|
|
1033
|
-
#sampler = SubsetRandomSampler(idx[:num_train])
|
|
1034
|
-
#dataloader = GraphDataLoader(self.trainingDataset, sampler=sampler,
|
|
1035
|
-
#batch_size=self.hparams.batch_size,
|
|
1036
|
-
#drop_last=False)
|
|
1037
|
-
print("Final Training/Validation/Testing")
|
|
1038
|
-
for _ in tqdm(range(self.hparams.epochs), desc='Epochs', leave=False):
|
|
1039
|
-
num_correct = 0
|
|
1040
|
-
num_tests = 0
|
|
1108
|
+
if self.testingDataset:
|
|
1109
|
+
self.test_dataloader = GraphDataLoader(self.testingDataset,
|
|
1110
|
+
batch_size=len(self.testingDataset),
|
|
1111
|
+
drop_last=False)
|
|
1041
1112
|
temp_loss_list = []
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
for batched_graph, labels in tqdm(self.
|
|
1045
|
-
|
|
1046
|
-
# Make sure the model is in training mode
|
|
1047
|
-
self.model.train()
|
|
1048
|
-
|
|
1049
|
-
# Zero the gradients
|
|
1050
|
-
self.optimizer.zero_grad()
|
|
1051
|
-
|
|
1052
|
-
# Perform forward pass
|
|
1113
|
+
temp_acc_list = []
|
|
1114
|
+
self.model.eval()
|
|
1115
|
+
for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
|
|
1053
1116
|
pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
|
|
1054
|
-
|
|
1055
|
-
# Compute loss
|
|
1056
1117
|
if self.hparams.loss_function.lower() == "negative log likelihood":
|
|
1057
1118
|
logp = F.log_softmax(pred, 1)
|
|
1058
1119
|
loss = F.nll_loss(logp, labels)
|
|
1059
1120
|
elif self.hparams.loss_function.lower() == "cross entropy":
|
|
1060
1121
|
loss = F.cross_entropy(pred, labels)
|
|
1061
|
-
|
|
1062
|
-
# Save loss information for reporting
|
|
1063
1122
|
temp_loss_list.append(loss.item())
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
self.training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
|
|
1076
|
-
self.validate()
|
|
1077
|
-
self.validation_accuracy_list.append(self.validation_accuracy)
|
|
1078
|
-
self.validation_loss_list.append(self.validation_loss)
|
|
1079
|
-
self.test()
|
|
1080
|
-
self.testing_accuracy_list.append(self.testing_accuracy)
|
|
1081
|
-
self.testing_loss_list.append(self.testing_loss)
|
|
1082
|
-
|
|
1083
|
-
def save(self):
|
|
1084
|
-
if self.hparams.checkpoint_path is not None:
|
|
1085
|
-
# Save the entire model
|
|
1086
|
-
try:
|
|
1087
|
-
torch.save(self.model, self.hparams.checkpoint_path)
|
|
1088
|
-
return True
|
|
1089
|
-
except:
|
|
1090
|
-
return False
|
|
1091
|
-
return False
|
|
1123
|
+
temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
|
|
1124
|
+
self.testing_accuracy = np.mean(temp_acc_list).item()
|
|
1125
|
+
self.testing_loss = np.mean(temp_loss_list).item()
|
|
1126
|
+
|
|
1127
|
+
def save(self, path):
|
|
1128
|
+
if path:
|
|
1129
|
+
# Make sure the file extension is .pt
|
|
1130
|
+
ext = path[len(path)-3:len(path)]
|
|
1131
|
+
if ext.lower() != ".pt":
|
|
1132
|
+
path = path+".pt"
|
|
1133
|
+
torch.save(self.model, path)
|
|
1092
1134
|
|
|
1093
1135
|
class DGL:
|
|
1094
1136
|
@staticmethod
|
|
@@ -1193,22 +1235,22 @@ class DGL:
|
|
|
1193
1235
|
raise NotImplementedError
|
|
1194
1236
|
|
|
1195
1237
|
list_idx = df['graph_index'].tolist()
|
|
1196
|
-
|
|
1238
|
+
graphs = []
|
|
1197
1239
|
labels = []
|
|
1198
1240
|
for index in list_idx:
|
|
1199
1241
|
graph, label = dataset[index]
|
|
1200
|
-
|
|
1242
|
+
graphs.append(graph)
|
|
1201
1243
|
labels.append(label)
|
|
1202
|
-
return DGL.
|
|
1244
|
+
return DGL.DatasetByGraphs(graphs=graphs, labels=labels, key=key)
|
|
1203
1245
|
|
|
1204
1246
|
@staticmethod
|
|
1205
|
-
def
|
|
1247
|
+
def GraphByTopologicGraph(topologicGraph, bidirectional=True, key=None, categories=[], node_attr_key="node_attr", tolerance=0.0001):
|
|
1206
1248
|
"""
|
|
1207
1249
|
Returns a DGL graph by the input topologic graph.
|
|
1208
1250
|
|
|
1209
1251
|
Parameters
|
|
1210
1252
|
----------
|
|
1211
|
-
|
|
1253
|
+
topologicGraph : topologic.Graph
|
|
1212
1254
|
The input topologic graph.
|
|
1213
1255
|
bidirectional : bool , optional
|
|
1214
1256
|
If set to True, the output DGL graph is forced to be bidirectional. The defaul is True.
|
|
@@ -1233,8 +1275,8 @@ class DGL:
|
|
|
1233
1275
|
from topologicpy.Topology import Topology
|
|
1234
1276
|
|
|
1235
1277
|
graph_dict = {}
|
|
1236
|
-
vertices = Graph.Vertices(
|
|
1237
|
-
edges = Graph.Edges(
|
|
1278
|
+
vertices = Graph.Vertices(topologicGraph)
|
|
1279
|
+
edges = Graph.Edges(topologicGraph)
|
|
1238
1280
|
graph_dict["num_nodes"] = len(vertices)
|
|
1239
1281
|
graph_dict["src"] = []
|
|
1240
1282
|
graph_dict["dst"] = []
|
|
@@ -1282,7 +1324,7 @@ class DGL:
|
|
|
1282
1324
|
return dgl_graph
|
|
1283
1325
|
|
|
1284
1326
|
@staticmethod
|
|
1285
|
-
def
|
|
1327
|
+
def GraphsByImportedCSV(graphs_file_path, edges_file_path,
|
|
1286
1328
|
nodes_file_path, graph_id_header="graph_id",
|
|
1287
1329
|
graph_label_header="label", num_nodes_header="num_nodes", src_header="src",
|
|
1288
1330
|
dst_header="dst", node_label_header="label", node_attr_key="node_attr",
|
|
@@ -1375,7 +1417,7 @@ class DGL:
|
|
|
1375
1417
|
return {"graphs":dgl_graphs, "labels":labels}
|
|
1376
1418
|
|
|
1377
1419
|
@staticmethod
|
|
1378
|
-
def
|
|
1420
|
+
def GraphsByImportedDGCNN(file_path, categories=[], bidirectional=True):
|
|
1379
1421
|
"""
|
|
1380
1422
|
Returns the Graphs from the imported DGCNN file.
|
|
1381
1423
|
|
|
@@ -1469,9 +1511,9 @@ class DGL:
|
|
|
1469
1511
|
return {"categories":[categories], "ratios":[ratios]}
|
|
1470
1512
|
|
|
1471
1513
|
@staticmethod
|
|
1472
|
-
def
|
|
1514
|
+
def ModelByFilePath(path):
|
|
1473
1515
|
"""
|
|
1474
|
-
Returns the
|
|
1516
|
+
Returns the model found at the input file path.
|
|
1475
1517
|
Parameters
|
|
1476
1518
|
----------
|
|
1477
1519
|
path : str
|
|
@@ -1483,6 +1525,8 @@ class DGL:
|
|
|
1483
1525
|
The classifier.
|
|
1484
1526
|
|
|
1485
1527
|
"""
|
|
1528
|
+
if not path:
|
|
1529
|
+
return None
|
|
1486
1530
|
return torch.load(path)
|
|
1487
1531
|
|
|
1488
1532
|
def ConfusionMatrix(actual, predicted, normalize=False):
|
|
@@ -1512,16 +1556,14 @@ class DGL:
|
|
|
1512
1556
|
return cm
|
|
1513
1557
|
|
|
1514
1558
|
@staticmethod
|
|
1515
|
-
def
|
|
1559
|
+
def DatasetByGraphs(dictionary, key="node_attr"):
|
|
1516
1560
|
"""
|
|
1517
1561
|
Returns a DGL Dataset from the input DGL graphs.
|
|
1518
1562
|
|
|
1519
1563
|
Parameters
|
|
1520
1564
|
----------
|
|
1521
|
-
|
|
1522
|
-
The input
|
|
1523
|
-
labels : list
|
|
1524
|
-
The list of labels.
|
|
1565
|
+
dictionary : dict
|
|
1566
|
+
The input dictionary
|
|
1525
1567
|
key : str
|
|
1526
1568
|
The key used for the node attributes.
|
|
1527
1569
|
|
|
@@ -1531,11 +1573,9 @@ class DGL:
|
|
|
1531
1573
|
The creatred DGL dataset.
|
|
1532
1574
|
|
|
1533
1575
|
"""
|
|
1534
|
-
|
|
1535
|
-
|
|
1536
|
-
|
|
1537
|
-
labels = [labels]
|
|
1538
|
-
return _GraphDGL(DGLGraphs, labels, key)
|
|
1576
|
+
graphs = dictionary['graphs']
|
|
1577
|
+
labels = dictionary['labels']
|
|
1578
|
+
return _Dataset(graphs, labels, key)
|
|
1539
1579
|
|
|
1540
1580
|
@staticmethod
|
|
1541
1581
|
def DatasetByImportedCSV_NC(folderPath):
|
|
@@ -1584,7 +1624,7 @@ class DGL:
|
|
|
1584
1624
|
node_attr_key = 'node_labels'
|
|
1585
1625
|
else:
|
|
1586
1626
|
raise NotImplementedError
|
|
1587
|
-
return
|
|
1627
|
+
return _Dataset(dgl_graphs, dgl_labels, node_attr_key)
|
|
1588
1628
|
|
|
1589
1629
|
@staticmethod
|
|
1590
1630
|
def DatasetBySample_NC(name="Cora"):
|
|
@@ -1617,7 +1657,7 @@ class DGL:
|
|
|
1617
1657
|
raise NotImplementedError
|
|
1618
1658
|
|
|
1619
1659
|
@staticmethod
|
|
1620
|
-
def
|
|
1660
|
+
def DatasetGraphs(dataset):
|
|
1621
1661
|
"""
|
|
1622
1662
|
Returns the DGL graphs found the in the input dataset.
|
|
1623
1663
|
|
|
@@ -1644,7 +1684,7 @@ class DGL:
|
|
|
1644
1684
|
return graphs
|
|
1645
1685
|
|
|
1646
1686
|
@staticmethod
|
|
1647
|
-
def
|
|
1687
|
+
def GraphEdgeData(graph):
|
|
1648
1688
|
"""
|
|
1649
1689
|
Returns the edge data found in the input DGL graph
|
|
1650
1690
|
Parameters
|
|
@@ -1658,18 +1698,22 @@ class DGL:
|
|
|
1658
1698
|
The edge data.
|
|
1659
1699
|
|
|
1660
1700
|
"""
|
|
1661
|
-
return
|
|
1701
|
+
return graph.edata
|
|
1662
1702
|
|
|
1663
1703
|
@staticmethod
|
|
1664
|
-
def Hyperparameters(optimizer, cv_type="Holdout", split=[0.8,0.1,0.1], k_folds=5,
|
|
1704
|
+
def Hyperparameters(optimizer, model_type="classifier", cv_type="Holdout", split=[0.8,0.1,0.1], k_folds=5,
|
|
1665
1705
|
hl_widths=[32], conv_layer_type="SAGEConv", pooling="AvgPooling",
|
|
1666
|
-
batch_size=1, epochs=1, use_gpu=False, loss_function="Cross Entropy"
|
|
1667
|
-
classifier_path="", results_path=""):
|
|
1706
|
+
batch_size=1, epochs=1, use_gpu=False, loss_function="Cross Entropy"):
|
|
1668
1707
|
"""
|
|
1669
1708
|
Creates a hyperparameters object based on the input settings.
|
|
1670
1709
|
|
|
1671
1710
|
Parameters
|
|
1672
1711
|
----------
|
|
1712
|
+
model_type : str , optional
|
|
1713
|
+
The desired type of model. The options are:
|
|
1714
|
+
- "Classifier"
|
|
1715
|
+
- "Regressor"
|
|
1716
|
+
The option is case insensitive. The default is "classifierholdout"
|
|
1673
1717
|
optimizer : Optimizer
|
|
1674
1718
|
The desired optimizer.
|
|
1675
1719
|
cv_type : str , optional
|
|
@@ -1692,10 +1736,6 @@ class DGL:
|
|
|
1692
1736
|
If set to True, the model will attempt to use the GPU. The default is False.
|
|
1693
1737
|
loss_function : str , optional
|
|
1694
1738
|
The desired loss function. The optionals are "Cross-Entropy" or "Negative Log Likelihood". It is case insensitive. The default is "Cross-Entropy".
|
|
1695
|
-
classifier_path : str
|
|
1696
|
-
The file path at which to save the trained classifier.
|
|
1697
|
-
results_path : str
|
|
1698
|
-
The file path at which to save the training and testing results.
|
|
1699
1739
|
|
|
1700
1740
|
Returns
|
|
1701
1741
|
-------
|
|
@@ -1705,40 +1745,31 @@ class DGL:
|
|
|
1705
1745
|
"""
|
|
1706
1746
|
|
|
1707
1747
|
if optimizer['name'].lower() == "adadelta":
|
|
1708
|
-
|
|
1748
|
+
optimizer_str = "Adadelta"
|
|
1709
1749
|
elif optimizer['name'].lower() == "adagrad":
|
|
1710
|
-
|
|
1750
|
+
optimizer_str = "Adagrad"
|
|
1711
1751
|
elif optimizer['name'].lower() == "adam":
|
|
1712
|
-
|
|
1713
|
-
|
|
1714
|
-
|
|
1715
|
-
|
|
1716
|
-
|
|
1717
|
-
|
|
1718
|
-
|
|
1719
|
-
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1723
|
-
|
|
1724
|
-
|
|
1725
|
-
|
|
1726
|
-
|
|
1727
|
-
|
|
1728
|
-
|
|
1729
|
-
|
|
1730
|
-
|
|
1731
|
-
|
|
1732
|
-
|
|
1733
|
-
hl_widths,
|
|
1734
|
-
conv_layer_type,
|
|
1735
|
-
pooling,
|
|
1736
|
-
batch_size,
|
|
1737
|
-
epochs,
|
|
1738
|
-
use_gpu,
|
|
1739
|
-
loss_function,
|
|
1740
|
-
classifier_path,
|
|
1741
|
-
results_path)
|
|
1752
|
+
optimizer_str = "Adam"
|
|
1753
|
+
return _Hparams(model_type,
|
|
1754
|
+
optimizer_str,
|
|
1755
|
+
optimizer['amsgrad'],
|
|
1756
|
+
optimizer['betas'],
|
|
1757
|
+
optimizer['eps'],
|
|
1758
|
+
optimizer['lr'],
|
|
1759
|
+
optimizer['lr_decay'],
|
|
1760
|
+
optimizer['maximize'],
|
|
1761
|
+
optimizer['rho'],
|
|
1762
|
+
optimizer['weight_decay'],
|
|
1763
|
+
cv_type,
|
|
1764
|
+
split,
|
|
1765
|
+
k_folds,
|
|
1766
|
+
hl_widths,
|
|
1767
|
+
conv_layer_type,
|
|
1768
|
+
pooling,
|
|
1769
|
+
batch_size,
|
|
1770
|
+
epochs,
|
|
1771
|
+
use_gpu,
|
|
1772
|
+
loss_function)
|
|
1742
1773
|
|
|
1743
1774
|
@staticmethod
|
|
1744
1775
|
def OneHotEncode(item, categories):
|
|
@@ -1767,7 +1798,7 @@ class DGL:
|
|
|
1767
1798
|
return returnList
|
|
1768
1799
|
|
|
1769
1800
|
@staticmethod
|
|
1770
|
-
def
|
|
1801
|
+
def DatasetLabels(dataset):
|
|
1771
1802
|
"""
|
|
1772
1803
|
Returns the labels of the graphs in the input dataset
|
|
1773
1804
|
|
|
@@ -1784,7 +1815,7 @@ class DGL:
|
|
|
1784
1815
|
return [int(g[1]) for g in dataset]
|
|
1785
1816
|
|
|
1786
1817
|
@staticmethod
|
|
1787
|
-
def
|
|
1818
|
+
def DatasetMerge(datasets, key="node_attr"):
|
|
1788
1819
|
"""
|
|
1789
1820
|
Merges the input list of datasets into one dataset
|
|
1790
1821
|
|
|
@@ -1802,12 +1833,12 @@ class DGL:
|
|
|
1802
1833
|
graphs = []
|
|
1803
1834
|
labels = []
|
|
1804
1835
|
for ds in datasets:
|
|
1805
|
-
graphs += DGL.
|
|
1806
|
-
labels += DGL.
|
|
1807
|
-
return DGL.
|
|
1836
|
+
graphs += DGL.DatasetGraphs(ds)
|
|
1837
|
+
labels += DGL.DatasetLabels(ds)
|
|
1838
|
+
return DGL.DatasetByGraphs(graphs, labels, key=key)
|
|
1808
1839
|
|
|
1809
1840
|
@staticmethod
|
|
1810
|
-
def
|
|
1841
|
+
def GraphNodeData(graph):
|
|
1811
1842
|
"""
|
|
1812
1843
|
Returns the node data found in the input dgl_graph
|
|
1813
1844
|
|
|
@@ -1822,10 +1853,10 @@ class DGL:
|
|
|
1822
1853
|
The node data.
|
|
1823
1854
|
|
|
1824
1855
|
"""
|
|
1825
|
-
return
|
|
1856
|
+
return graph.ndata
|
|
1826
1857
|
|
|
1827
1858
|
@staticmethod
|
|
1828
|
-
def
|
|
1859
|
+
def DatasetRemoveCategory(dataset, label, key="node_attr"):
|
|
1829
1860
|
"""
|
|
1830
1861
|
Removes graphs from the input dataset that have the input label
|
|
1831
1862
|
|
|
@@ -1845,18 +1876,18 @@ class DGL:
|
|
|
1845
1876
|
|
|
1846
1877
|
"""
|
|
1847
1878
|
|
|
1848
|
-
graphs = DGL.
|
|
1849
|
-
labels = DGL.
|
|
1879
|
+
graphs = DGL.DatasetGraphs(dataset)
|
|
1880
|
+
labels = DGL.DatasetLabels(dataset)
|
|
1850
1881
|
new_graphs = []
|
|
1851
1882
|
new_labels = []
|
|
1852
1883
|
for i in range(len(labels)):
|
|
1853
1884
|
if not labels[i] == label:
|
|
1854
1885
|
new_graphs.append(graphs[i])
|
|
1855
1886
|
new_labels.append(labels[i])
|
|
1856
|
-
return DGL.
|
|
1887
|
+
return DGL.DatasetByGraphs(new_graphs, new_labels, key)
|
|
1857
1888
|
|
|
1858
1889
|
@staticmethod
|
|
1859
|
-
def
|
|
1890
|
+
def DatasetSplit(dataset, fracList=[0.8, 0.1, 0.1], shuffle=False, randomState=None, key="node_attr"):
|
|
1860
1891
|
"""
|
|
1861
1892
|
Splits the dataset into training, validation, and testing datasets.
|
|
1862
1893
|
|
|
@@ -1888,15 +1919,15 @@ class DGL:
|
|
|
1888
1919
|
return None
|
|
1889
1920
|
datasets = dgl.data.utils.split_dataset(dataset, frac_list=fracList, shuffle=shuffle, random_state=randomState)
|
|
1890
1921
|
if fracList[0] > 0:
|
|
1891
|
-
train_ds = DGL.
|
|
1922
|
+
train_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[0]), 'labels' :DGL.DatasetLabels(datasets[0])}, key=key)
|
|
1892
1923
|
else:
|
|
1893
1924
|
train_ds = None
|
|
1894
1925
|
if fracList[1] > 0:
|
|
1895
|
-
validate_ds = DGL.
|
|
1926
|
+
validate_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[1]), 'labels' :DGL.DatasetLabels(datasets[1])}, key=key)
|
|
1896
1927
|
else:
|
|
1897
1928
|
validate_ds = None
|
|
1898
1929
|
if fracList[2] > 0:
|
|
1899
|
-
test_ds = DGL.
|
|
1930
|
+
test_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[2]), 'labels' :DGL.DatasetLabels(datasets[2])}, key=key)
|
|
1900
1931
|
else:
|
|
1901
1932
|
test_ds = None
|
|
1902
1933
|
|
|
@@ -1941,7 +1972,7 @@ class DGL:
|
|
|
1941
1972
|
return {"name":name, "amsgrad":amsgrad, "betas":betas, "eps":eps, "lr": lr, "maximize":maximize, "weight_decay":weightDecay, "rho":rho, "lr_decay":lr_decay}
|
|
1942
1973
|
|
|
1943
1974
|
@staticmethod
|
|
1944
|
-
def
|
|
1975
|
+
def ModelClassify(model, dataset, node_attr_key="node_attr"):
|
|
1945
1976
|
"""
|
|
1946
1977
|
Predicts the classification the labels of the input dataset.
|
|
1947
1978
|
|
|
@@ -1949,8 +1980,8 @@ class DGL:
|
|
|
1949
1980
|
----------
|
|
1950
1981
|
dataset : DGLDataset
|
|
1951
1982
|
The input DGL dataset.
|
|
1952
|
-
|
|
1953
|
-
The input trained
|
|
1983
|
+
model : Model
|
|
1984
|
+
The input trained model.
|
|
1954
1985
|
node_attr_key : str , optional
|
|
1955
1986
|
The key used for node attributes. The default is "node_attr".
|
|
1956
1987
|
|
|
@@ -1966,7 +1997,7 @@ class DGL:
|
|
|
1966
1997
|
probabilities = []
|
|
1967
1998
|
for item in tqdm(dataset, desc='Classifying', leave=False):
|
|
1968
1999
|
graph = item[0]
|
|
1969
|
-
pred =
|
|
2000
|
+
pred = model(graph, graph.ndata[node_attr_key].float())
|
|
1970
2001
|
labels.append(pred.argmax(1).item())
|
|
1971
2002
|
probability = (torch.nn.functional.softmax(pred, dim=1).tolist())
|
|
1972
2003
|
probability = probability[0]
|
|
@@ -1977,16 +2008,16 @@ class DGL:
|
|
|
1977
2008
|
return {"predictions":labels, "probabilities":probabilities}
|
|
1978
2009
|
|
|
1979
2010
|
@staticmethod
|
|
1980
|
-
def
|
|
2011
|
+
def ModelPredict(model, dataset, node_attr_key="node_attr"):
|
|
1981
2012
|
"""
|
|
1982
|
-
Predicts the
|
|
2013
|
+
Predicts the value of the input dataset.
|
|
1983
2014
|
|
|
1984
2015
|
Parameters
|
|
1985
2016
|
----------
|
|
1986
2017
|
dataset : DGLDataset
|
|
1987
2018
|
The input DGL dataset.
|
|
1988
|
-
|
|
1989
|
-
The input trained
|
|
2019
|
+
model : Model
|
|
2020
|
+
The input trained model.
|
|
1990
2021
|
node_attr_key : str , optional
|
|
1991
2022
|
The key used for node attributes. The default is "node_attr".
|
|
1992
2023
|
|
|
@@ -1996,24 +2027,23 @@ class DGL:
|
|
|
1996
2027
|
The list of predictions
|
|
1997
2028
|
"""
|
|
1998
2029
|
values = []
|
|
1999
|
-
for item in tqdm(dataset, desc='Predicting'):
|
|
2030
|
+
for item in tqdm(dataset, desc='Predicting', leave=False):
|
|
2000
2031
|
graph = item[0]
|
|
2001
|
-
pred =
|
|
2032
|
+
pred = model(graph, graph.ndata[node_attr_key].float())
|
|
2002
2033
|
values.append(round(pred.item(), 3))
|
|
2003
2034
|
return values
|
|
2004
2035
|
|
|
2005
2036
|
@staticmethod
|
|
2006
|
-
def
|
|
2037
|
+
def ModelClassifyNodes(model, dataset):
|
|
2007
2038
|
"""
|
|
2008
2039
|
Predicts the calssification of the node labels found in the input dataset using the input classifier.
|
|
2009
2040
|
|
|
2010
2041
|
Parameters
|
|
2011
2042
|
----------
|
|
2043
|
+
model : Model
|
|
2044
|
+
The input model.
|
|
2012
2045
|
dataset : DGLDataset
|
|
2013
2046
|
The input DGL Dataset.
|
|
2014
|
-
|
|
2015
|
-
classifier : Classifier
|
|
2016
|
-
The input classifier.
|
|
2017
2047
|
|
|
2018
2048
|
Returns
|
|
2019
2049
|
-------
|
|
@@ -2041,7 +2071,7 @@ class DGL:
|
|
|
2041
2071
|
testLabels = []
|
|
2042
2072
|
testPredictions = []
|
|
2043
2073
|
|
|
2044
|
-
graphs = DGL.
|
|
2074
|
+
graphs = DGL.DatasetGraphs(dataset)
|
|
2045
2075
|
for g in graphs:
|
|
2046
2076
|
if not g.ndata:
|
|
2047
2077
|
continue
|
|
@@ -2059,7 +2089,7 @@ class DGL:
|
|
|
2059
2089
|
testLabels.append(test_labels.tolist())
|
|
2060
2090
|
|
|
2061
2091
|
# Forward
|
|
2062
|
-
logits =
|
|
2092
|
+
logits = model(g, features)
|
|
2063
2093
|
train_logits = logits[train_mask]
|
|
2064
2094
|
val_logits = logits[val_mask]
|
|
2065
2095
|
test_logits = logits[test_mask]
|
|
@@ -2089,7 +2119,7 @@ class DGL:
|
|
|
2089
2119
|
@staticmethod
|
|
2090
2120
|
def Show(data,
|
|
2091
2121
|
labels,
|
|
2092
|
-
title="Training/Validation
|
|
2122
|
+
title="Training/Validation",
|
|
2093
2123
|
xTitle="Epochs",
|
|
2094
2124
|
xSpacing=1,
|
|
2095
2125
|
yTitle="Accuracy and Loss",
|
|
@@ -2186,11 +2216,11 @@ class DGL:
|
|
|
2186
2216
|
marginBottom=marginBottom
|
|
2187
2217
|
)
|
|
2188
2218
|
Plotly.Show(fig, renderer=renderer)
|
|
2189
|
-
|
|
2219
|
+
|
|
2190
2220
|
@staticmethod
|
|
2191
|
-
def
|
|
2221
|
+
def Model(hparams, trainingDataset, validationDataset=None, testingDataset=None):
|
|
2192
2222
|
"""
|
|
2193
|
-
|
|
2223
|
+
Creates a neural network classifier.
|
|
2194
2224
|
|
|
2195
2225
|
Parameters
|
|
2196
2226
|
----------
|
|
@@ -2202,91 +2232,231 @@ class DGL:
|
|
|
2202
2232
|
The input validation dataset. If not specified, a portion of the trainingDataset will be used for validation according the to the split list as specified in the hyper-parameters.
|
|
2203
2233
|
testingDataset : DGLDataset
|
|
2204
2234
|
The input testing dataset. If not specified, a portion of the trainingDataset will be used for testing according the to the split list as specified in the hyper-parameters.
|
|
2205
|
-
overwrite : bool , optional
|
|
2206
|
-
If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
|
|
2207
2235
|
|
|
2208
2236
|
Returns
|
|
2209
2237
|
-------
|
|
2210
|
-
|
|
2211
|
-
|
|
2238
|
+
Classifier
|
|
2239
|
+
The created classifier
|
|
2240
|
+
|
|
2241
|
+
"""
|
|
2242
|
+
|
|
2243
|
+
model = None
|
|
2244
|
+
if hparams.model_type.lower() == "classifier":
|
|
2245
|
+
if hparams.cv_type.lower() == "holdout":
|
|
2246
|
+
model = _ClassifierHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
|
|
2247
|
+
elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
|
|
2248
|
+
model = _ClassifierKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
|
|
2249
|
+
elif hparams.model_type.lower() == "regressor":
|
|
2250
|
+
if hparams.cv_type.lower() == "holdout":
|
|
2251
|
+
model = _RegressorHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
|
|
2252
|
+
elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
|
|
2253
|
+
model = _RegressorKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
|
|
2254
|
+
else:
|
|
2255
|
+
raise NotImplementedError
|
|
2256
|
+
return model
|
|
2212
2257
|
|
|
2258
|
+
@staticmethod
|
|
2259
|
+
def ModelTrain(model):
|
|
2213
2260
|
"""
|
|
2214
|
-
|
|
2215
|
-
|
|
2216
|
-
|
|
2217
|
-
|
|
2218
|
-
|
|
2219
|
-
|
|
2220
|
-
|
|
2221
|
-
|
|
2222
|
-
|
|
2223
|
-
|
|
2224
|
-
|
|
2225
|
-
|
|
2226
|
-
|
|
2227
|
-
|
|
2228
|
-
|
|
2229
|
-
|
|
2230
|
-
|
|
2231
|
-
#for l in temp_list:
|
|
2232
|
-
#tr_a_l.append((sum(l) / len(l)))
|
|
2233
|
-
#temp_list = Helper.Transpose(classifier.training_loss_list)
|
|
2234
|
-
#tr_l_l = []
|
|
2235
|
-
#for l in temp_list:
|
|
2236
|
-
#tr_l_l.append((sum(l) / len(l)))
|
|
2237
|
-
#temp_list = Helper.Transpose(classifier.validation_accuracy_list)
|
|
2238
|
-
#te_a_l = []
|
|
2239
|
-
#for l in temp_list:
|
|
2240
|
-
#te_a_l.append((sum(l) / len(l)))
|
|
2241
|
-
#temp_list = Helper.Transpose(classifier.validation_loss_list)
|
|
2242
|
-
#te_l_l = []
|
|
2243
|
-
#for l in temp_list:
|
|
2244
|
-
#te_l_l.append((sum(l) / len(l)))
|
|
2245
|
-
|
|
2246
|
-
#classifier.training_accuracy_list = tr_a_l
|
|
2247
|
-
#classifier.training_loss_list = tr_l_l
|
|
2248
|
-
#classifier.validation_accuracy_list = te_a_l
|
|
2249
|
-
#classifier.validation_loss_list = te_l_l
|
|
2261
|
+
Trains the neural network model.
|
|
2262
|
+
|
|
2263
|
+
Parameters
|
|
2264
|
+
----------
|
|
2265
|
+
model : Model
|
|
2266
|
+
The input model.
|
|
2267
|
+
|
|
2268
|
+
Returns
|
|
2269
|
+
-------
|
|
2270
|
+
Model
|
|
2271
|
+
The trained model
|
|
2272
|
+
|
|
2273
|
+
"""
|
|
2274
|
+
if not model:
|
|
2275
|
+
return None
|
|
2276
|
+
model.train()
|
|
2277
|
+
return model
|
|
2250
2278
|
|
|
2251
|
-
|
|
2252
|
-
|
|
2253
|
-
|
|
2254
|
-
|
|
2255
|
-
|
|
2256
|
-
|
|
2257
|
-
|
|
2258
|
-
|
|
2279
|
+
@staticmethod
|
|
2280
|
+
def ModelTest(model):
|
|
2281
|
+
"""
|
|
2282
|
+
Tests the neural network model.
|
|
2283
|
+
|
|
2284
|
+
Parameters
|
|
2285
|
+
----------
|
|
2286
|
+
model : Model
|
|
2287
|
+
The input model.
|
|
2288
|
+
|
|
2289
|
+
Returns
|
|
2290
|
+
-------
|
|
2291
|
+
Model
|
|
2292
|
+
The tested model
|
|
2293
|
+
|
|
2294
|
+
"""
|
|
2295
|
+
if not model:
|
|
2296
|
+
return None
|
|
2297
|
+
model.test()
|
|
2298
|
+
return model
|
|
2259
2299
|
|
|
2260
|
-
|
|
2261
|
-
|
|
2262
|
-
|
|
2263
|
-
|
|
2264
|
-
'Split': [classifier.hparams.split],
|
|
2265
|
-
'K-Folds': [classifier.hparams.k_folds],
|
|
2266
|
-
'HL Widths': [classifier.hparams.hl_widths],
|
|
2267
|
-
'Conv Layer Type': [classifier.hparams.conv_layer_type],
|
|
2268
|
-
'Pooling': [classifier.hparams.pooling],
|
|
2269
|
-
'Learning Rate': [classifier.hparams.lr],
|
|
2270
|
-
'Batch Size': [classifier.hparams.batch_size],
|
|
2271
|
-
'Epochs': [classifier.hparams.epochs],
|
|
2272
|
-
'Training Accuracy': [classifier.training_accuracy_list],
|
|
2273
|
-
'Validation Accuracy': [classifier.validation_accuracy_list],
|
|
2274
|
-
'Testing Accuracy': [classifier.testing_accuracy_list],
|
|
2275
|
-
'Training Loss': [classifier.training_loss_list],
|
|
2276
|
-
'Validation Loss': [classifier.validation_loss_list],
|
|
2277
|
-
'Testing Loss': [classifier.testing_loss_list]
|
|
2278
|
-
}
|
|
2300
|
+
@staticmethod
|
|
2301
|
+
def ModelSave(model, path=None):
|
|
2302
|
+
"""
|
|
2303
|
+
Saves the model.
|
|
2279
2304
|
|
|
2280
|
-
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2284
|
-
|
|
2285
|
-
|
|
2305
|
+
Parameters
|
|
2306
|
+
----------
|
|
2307
|
+
model : Model
|
|
2308
|
+
The input model.
|
|
2309
|
+
|
|
2310
|
+
Returns
|
|
2311
|
+
-------
|
|
2312
|
+
bool
|
|
2313
|
+
True if the model is saved correctly. False otherwise.
|
|
2314
|
+
|
|
2315
|
+
"""
|
|
2316
|
+
if not model:
|
|
2317
|
+
return None
|
|
2318
|
+
if path:
|
|
2319
|
+
# Make sure the file extension is .pt
|
|
2320
|
+
ext = path[len(path)-3:len(path)]
|
|
2321
|
+
if ext.lower() != ".pt":
|
|
2322
|
+
path = path+".pt"
|
|
2323
|
+
return model.save(path)
|
|
2324
|
+
|
|
2325
|
+
@staticmethod
|
|
2326
|
+
def ModelData(model):
|
|
2327
|
+
"""
|
|
2328
|
+
Returns the data of the model
|
|
2329
|
+
|
|
2330
|
+
Parameters
|
|
2331
|
+
----------
|
|
2332
|
+
model : Model
|
|
2333
|
+
The input model.
|
|
2334
|
+
|
|
2335
|
+
Returns
|
|
2336
|
+
-------
|
|
2337
|
+
dict
|
|
2338
|
+
A dictionary containing the model data.
|
|
2339
|
+
|
|
2340
|
+
"""
|
|
2341
|
+
from topologicpy.Helper import Helper
|
|
2342
|
+
|
|
2343
|
+
data = {'Model Type': [model.hparams.model_type],
|
|
2344
|
+
'Optimizer': [model.hparams.optimizer_str],
|
|
2345
|
+
'CV Type': [model.hparams.cv_type],
|
|
2346
|
+
'Split': model.hparams.split,
|
|
2347
|
+
'K-Folds': [model.hparams.k_folds],
|
|
2348
|
+
'HL Widths': model.hparams.hl_widths,
|
|
2349
|
+
'Conv Layer Type': [model.hparams.conv_layer_type],
|
|
2350
|
+
'Pooling': [model.hparams.pooling],
|
|
2351
|
+
'Learning Rate': [model.hparams.lr],
|
|
2352
|
+
'Batch Size': [model.hparams.batch_size],
|
|
2353
|
+
'Epochs': [model.hparams.epochs]
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
if model.hparams.model_type.lower() == "classifier":
|
|
2357
|
+
testing_accuracy_list = [model.testing_accuracy] * model.hparams.epochs
|
|
2358
|
+
testing_loss_list = [model.testing_loss] * model.hparams.epochs
|
|
2359
|
+
metrics_data = {
|
|
2360
|
+
'Training Accuracy': [model.training_accuracy_list],
|
|
2361
|
+
'Validation Accuracy': [model.validation_accuracy_list],
|
|
2362
|
+
'Testing Accuracy' : [testing_accuracy_list],
|
|
2363
|
+
'Training Loss': [model.training_loss_list],
|
|
2364
|
+
'Validation Loss': [model.validation_loss_list],
|
|
2365
|
+
'Testing Loss' : [testing_loss_list]
|
|
2366
|
+
}
|
|
2367
|
+
if model.hparams.cv_type.lower() == "k-fold":
|
|
2368
|
+
accuracy_data = {
|
|
2369
|
+
'Accuracies' : [model.accuracies],
|
|
2370
|
+
'Max Accuracy' : [model.max_accuracy]
|
|
2371
|
+
}
|
|
2372
|
+
metrics_data.update(accuracy_data)
|
|
2373
|
+
data.update(metrics_data)
|
|
2374
|
+
|
|
2375
|
+
elif model.hparams.model_type.lower() == "regressor":
|
|
2376
|
+
testing_loss_list = [model.testing_loss] * model.hparams.epochs
|
|
2377
|
+
metrics_data = {
|
|
2378
|
+
'Training Loss': [model.training_loss_list],
|
|
2379
|
+
'Validation Loss': [model.validation_loss_list],
|
|
2380
|
+
'Testing Loss' : [testing_loss_list]
|
|
2381
|
+
}
|
|
2382
|
+
if model.hparams.cv_type.lower() == "k-fold":
|
|
2383
|
+
loss_data = {
|
|
2384
|
+
'Losses' : [model.losses],
|
|
2385
|
+
'Min Loss' : [model.min_loss]
|
|
2386
|
+
}
|
|
2387
|
+
metrics_data.update(loss_data)
|
|
2388
|
+
data.update(metrics_data)
|
|
2389
|
+
|
|
2286
2390
|
return data
|
|
2287
2391
|
|
|
2392
|
+
@staticmethod
|
|
2393
|
+
def GraphsByFilePath(path, labelKey="value", key='node_attr'):
|
|
2394
|
+
graphs, label_dict = load_graphs(path)
|
|
2395
|
+
labels = label_dict[labelKey].tolist()
|
|
2396
|
+
return {"graphs" : graphs, "labels": labels}
|
|
2397
|
+
|
|
2398
|
+
@staticmethod
|
|
2399
|
+
def DataExportToCSV(data, path, overwrite=True):
|
|
2400
|
+
"""
|
|
2401
|
+
Exports the input data to a CSV file
|
|
2288
2402
|
|
|
2403
|
+
Parameters
|
|
2404
|
+
----------
|
|
2405
|
+
data : dict
|
|
2406
|
+
The input data. See Data(model)
|
|
2407
|
+
overwrite : bool , optional
|
|
2408
|
+
If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
|
|
2409
|
+
|
|
2410
|
+
Returns
|
|
2411
|
+
-------
|
|
2412
|
+
bool
|
|
2413
|
+
True if the data is saved correctly to a CSV file. False otherwise.
|
|
2289
2414
|
|
|
2415
|
+
"""
|
|
2416
|
+
from topologicpy.Helper import Helper
|
|
2417
|
+
|
|
2418
|
+
# Make sure the file extension is .csv
|
|
2419
|
+
ext = path[len(path)-4:len(path)]
|
|
2420
|
+
if ext.lower() != ".csv":
|
|
2421
|
+
path = path+".csv"
|
|
2422
|
+
|
|
2423
|
+
epoch_list = list(range(1, data['Epochs'][0]+1))
|
|
2424
|
+
|
|
2425
|
+
d = [data['Model Type'], data['Optimizer'], data['CV Type'], [data['Split']], data['K-Folds'], data['HL Widths'], data['Conv Layer Type'], data['Pooling'], data['Learning Rate'], data['Batch Size'], epoch_list]
|
|
2426
|
+
columns = ['Model Type', 'Optimizer', 'CV Type', 'Split', 'K-Folds', 'HL Widths', 'Conv Layer Type', 'Pooling', 'Learning Rate', 'Batch Size', 'Epochs']
|
|
2427
|
+
|
|
2428
|
+
if data['Model Type'][0].lower() == "classifier":
|
|
2429
|
+
d.extend([data['Training Accuracy'][0], data['Validation Accuracy'][0], data['Testing Accuracy'][0], data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
|
|
2430
|
+
columns.extend(['Training Accuracy', 'Validation Accuracy', 'Testing Accuracy', 'Training Loss', 'Validation Loss', 'Testing Loss'])
|
|
2431
|
+
if data['CV Type'][0].lower() == "k-fold":
|
|
2432
|
+
d.extend([data['Accuracies'], data['Max Accuracy']])
|
|
2433
|
+
columns.extend(['Accuracies', 'Max Accuracy'])
|
|
2434
|
+
|
|
2435
|
+
elif data['Model Type'][0].lower() == "regressor":
|
|
2436
|
+
d.extend([data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
|
|
2437
|
+
columns.extend(['Training Loss', 'Validation Loss', 'Testing Loss'])
|
|
2438
|
+
if data['CV Type'][0].lower() == "k-fold":
|
|
2439
|
+
d.extend([data['Losses'], data['Min Loss']])
|
|
2440
|
+
columns.extend(['Losses', 'Min Loss'])
|
|
2441
|
+
|
|
2442
|
+
d = Helper.Iterate(d)
|
|
2443
|
+
d = Helper.Transpose(d)
|
|
2444
|
+
df = pd.DataFrame(d, columns=columns)
|
|
2445
|
+
|
|
2446
|
+
status = False
|
|
2447
|
+
if path:
|
|
2448
|
+
if overwrite:
|
|
2449
|
+
mode = 'w+'
|
|
2450
|
+
else:
|
|
2451
|
+
mode = 'a'
|
|
2452
|
+
try:
|
|
2453
|
+
df.to_csv(path, mode=mode, index = False, header=True)
|
|
2454
|
+
status = True
|
|
2455
|
+
except:
|
|
2456
|
+
status = False
|
|
2457
|
+
return status
|
|
2458
|
+
|
|
2459
|
+
'''
|
|
2290
2460
|
@staticmethod
|
|
2291
2461
|
def TrainRegressor(hparams, trainingDataset, validationDataset=None, testingDataset=None, overwrite=True):
|
|
2292
2462
|
"""
|
|
@@ -2352,10 +2522,7 @@ class DGL:
|
|
|
2352
2522
|
else:
|
|
2353
2523
|
df.to_csv(regressor.hparams.results_path, mode='a', index = False, header=False)
|
|
2354
2524
|
return data
|
|
2355
|
-
|
|
2356
|
-
|
|
2357
|
-
|
|
2358
|
-
|
|
2525
|
+
'''
|
|
2359
2526
|
|
|
2360
2527
|
@staticmethod
|
|
2361
2528
|
def _TrainClassifier_NC(graphs, model, hparams):
|
|
@@ -2456,7 +2623,7 @@ class DGL:
|
|
|
2456
2623
|
|
|
2457
2624
|
# hparams, dataset, numLabels, sample = item
|
|
2458
2625
|
# We will consider only the first graph in the dataset.
|
|
2459
|
-
graphs = DGL.
|
|
2626
|
+
graphs = DGL.DatasetGraphs(dataset)
|
|
2460
2627
|
# Sample a random list from the graphs
|
|
2461
2628
|
if sample < len(graphs) and sample > 0:
|
|
2462
2629
|
graphs = random.sample(graphs, sample)
|