topologicpy 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
topologicpy/DGL.py CHANGED
@@ -30,20 +30,24 @@ try:
30
30
  from dgl.data import DGLDataset
31
31
  from dgl.dataloading import GraphDataLoader
32
32
  from dgl.nn import GINConv, GraphConv, SAGEConv, TAGConv
33
+ from dgl import save_graphs, load_graphs
33
34
  except:
34
35
  call = [sys.executable, '-m', 'pip', 'install', 'dgl', 'dglgo', '-f', 'https://data.dgl.ai/wheels/repo.html', '--upgrade', '-t', sys.path[0]]
35
36
  subprocess.run(call)
36
37
  import dgl
37
38
  from dgl.data import DGLDataset
38
39
  from dgl.nn import GraphConv
40
+ from dgl import save_graphs, load_graphs
39
41
  try:
40
42
  import sklearn
41
43
  from sklearn.model_selection import KFold
44
+ from sklearn.metrics import accuracy_score
42
45
  except:
43
46
  call = [sys.executable, '-m', 'pip', 'install', 'scikit-learn', '-t', sys.path[0]]
44
47
  subprocess.run(call)
45
48
  import sklearn
46
49
  from sklearn.model_selection import KFold
50
+ from sklearn.metrics import accuracy_score
47
51
  try:
48
52
  from tqdm.auto import tqdm
49
53
  except:
@@ -57,16 +61,12 @@ import topologic
57
61
  from topologicpy.Dictionary import Dictionary
58
62
  import os
59
63
 
60
-
61
64
  import random
62
65
  import time
63
66
  from datetime import datetime
64
67
  import copy
65
68
 
66
- checkpoint_path = os.path.join(os.path.expanduser('~'), "dgl_classifier.pt")
67
- results_path = os.path.join(os.path.expanduser('~'), "dgl_results.csv")
68
-
69
- class _GraphDGL(DGLDataset):
69
+ class _Dataset(DGLDataset):
70
70
  def __init__(self, graphs, labels, node_attr_key):
71
71
  super().__init__(name='GraphDGL')
72
72
  self.graphs = graphs
@@ -84,8 +84,8 @@ class _GraphDGL(DGLDataset):
84
84
  return len(self.graphs)
85
85
 
86
86
  class _Hparams:
87
- def __init__(self, optimizer_str="Adam", amsgrad=False, betas=(0.9, 0.999), eps=1e-6, lr=0.001, lr_decay= 0, maximize=False, rho=0.9, weight_decay=0, cv_type="Holdout", split=[0.8,0.1, 0.1], k_folds=5, hl_widths=[32], conv_layer_type='SAGEConv', pooling="AvgPooling", batch_size=32, epochs=1,
88
- use_gpu=False, loss_function="Cross Entropy", checkpoint_path=checkpoint_path, results_path=results_path):
87
+ def __init__(self, model_type="ClassifierHoldout", optimizer_str="Adam", amsgrad=False, betas=(0.9, 0.999), eps=1e-6, lr=0.001, lr_decay= 0, maximize=False, rho=0.9, weight_decay=0, cv_type="Holdout", split=[0.8,0.1, 0.1], k_folds=5, hl_widths=[32], conv_layer_type='SAGEConv', pooling="AvgPooling", batch_size=32, epochs=1,
88
+ use_gpu=False, loss_function="Cross Entropy"):
89
89
  """
90
90
  Parameters
91
91
  ----------
@@ -112,9 +112,6 @@ class _Hparams:
112
112
  each step of an epoch
113
113
  epochs : int
114
114
  An epoch means training the neural network with all the training data for one cycle. In an epoch, we use all of the data exactly once. A forward pass and a backward pass together are counted as one pass
115
- checkpoint_path: str
116
- Path to save the classifier after training. It is preferred for
117
- the path to have .pt extension
118
115
  use_GPU : use the GPU. Otherwise, use the CPU
119
116
 
120
117
  Returns
@@ -123,6 +120,7 @@ class _Hparams:
123
120
 
124
121
  """
125
122
 
123
+ self.model_type = model_type
126
124
  self.optimizer_str = optimizer_str
127
125
  self.amsgrad = amsgrad
128
126
  self.betas = betas
@@ -142,8 +140,6 @@ class _Hparams:
142
140
  self.epochs = epochs
143
141
  self.use_gpu = use_gpu
144
142
  self.loss_function = loss_function
145
- self.checkpoint_path = checkpoint_path
146
- self.results_path = results_path
147
143
 
148
144
  class _Classic(nn.Module):
149
145
  def __init__(self, in_feats, h_feats, num_classes):
@@ -184,7 +180,7 @@ class _Classic(nn.Module):
184
180
 
185
181
  class _ClassicReg(nn.Module):
186
182
  def __init__(self, in_feats, h_feats):
187
- super(GCN_Classic_reg, self).__init__()
183
+ super(_ClassicReg, self).__init__()
188
184
  assert isinstance(h_feats, list), "h_feats must be a list"
189
185
  h_feats = [x for x in h_feats if x is not None]
190
186
  assert len(h_feats) !=0, "h_feats is empty. unable to add hidden layers"
@@ -417,13 +413,22 @@ class _RegressorHoldout:
417
413
  self.hparams = hparams
418
414
  if hparams.conv_layer_type.lower() == 'classic':
419
415
  self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
416
+ elif hparams.conv_layer_type.lower() == 'ginconv':
417
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
418
+ 1, hparams.pooling).to(device)
420
419
  elif hparams.conv_layer_type.lower() == 'graphconv':
421
420
  self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
421
+ elif hparams.conv_layer_type.lower() == 'sageconv':
422
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
423
+ 1, hparams.pooling).to(device)
424
+ elif hparams.conv_layer_type.lower() == 'tagconv':
425
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
426
+ 1, hparams.pooling).to(device)
422
427
  elif hparams.conv_layer_type.lower() == 'gcn':
423
428
  self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
424
429
  else:
425
430
  raise NotImplementedError
426
-
431
+
427
432
  if hparams.optimizer_str.lower() == "adadelta":
428
433
  self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
429
434
  lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
@@ -433,13 +438,10 @@ class _RegressorHoldout:
433
438
  elif hparams.optimizer_str.lower() == "adam":
434
439
  self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
435
440
  lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
441
+
436
442
  self.use_gpu = hparams.use_gpu
437
-
438
443
  self.training_loss_list = []
439
444
  self.validation_loss_list = []
440
- self.training_accuracy_list = []
441
- self.validation_accuracy_list = []
442
- self.testing_accuracy_list = []
443
445
  self.node_attr_key = trainingDataset.node_attr_key
444
446
 
445
447
  # train, validate, test split
@@ -479,19 +481,12 @@ class _RegressorHoldout:
479
481
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
480
482
  device = torch.device("cpu")
481
483
  # Init the loss and accuracy reporting lists
482
- self.training_accuracy_list = []
483
484
  self.training_loss_list = []
484
- self.validation_accuracy_list = []
485
485
  self.validation_loss_list = []
486
- self.testing_accuracy_list = []
487
-
488
486
 
489
- best_rmse = np.inf
487
+
490
488
  # Run the training loop for defined number of epochs
491
- for _ in tqdm(range(self.hparams.epochs), desc='Epochs'):
492
- num_correct = 0
493
- num_tests = 0
494
- temp_loss_list = []
489
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, leave=False):
495
490
  # Iterate over the DataLoader for training data
496
491
  for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
497
492
  # Make sure the model is in training mode
@@ -510,46 +505,241 @@ class _RegressorHoldout:
510
505
  # Perform optimization
511
506
  self.optimizer.step()
512
507
 
513
- self.training_accuracy = torch.sqrt(loss).item()
514
- self.training_accuracy_list.append(self.training_accuracy)
508
+ self.training_loss_list.append(torch.sqrt(loss).item())
515
509
  self.validate()
516
- self.validation_accuracy_list.append(torch.sqrt(self.validation_accuracy).item())
517
- if self.validation_accuracy < best_rmse:
518
- best_rmse = self.validation_accuracy
519
- best_weights = copy.deepcopy(self.model.state_dict())
520
- self.test()
521
- self.testing_accuracy_list.append(torch.sqrt(self.testing_accuracy).item())
522
- if self.hparams.checkpoint_path is not None:
523
- # Save the best model
524
- self.model.load_state_dict(best_weights)
525
- self.model.eval()
526
- torch.save(self.model, self.hparams.checkpoint_path)
510
+ self.validation_loss_list.append(torch.sqrt(self.validation_loss).item())
511
+
527
512
 
528
513
  def validate(self):
529
- #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
530
514
  device = torch.device("cpu")
531
- temp_validation_loss = []
532
515
  self.model.eval()
533
516
  for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
534
517
  pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
535
518
  loss = F.mse_loss(torch.flatten(pred), labels.float())
536
- self.validation_accuracy = loss
537
- return self.validation_accuracy
519
+ self.validation_loss = loss
538
520
 
539
521
  def test(self):
540
522
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
541
523
  device = torch.device("cpu")
542
- temp_validation_loss = []
543
524
  self.model.eval()
544
- for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
525
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
545
526
  pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
546
527
  loss = F.mse_loss(torch.flatten(pred), labels.float())
547
- self.testing_accuracy = loss
548
- return self.testing_accuracy
528
+ self.testing_loss = torch.sqrt(loss).item()
529
+
530
+ def save(self, path):
531
+ if path:
532
+ # Make sure the file extension is .pt
533
+ ext = path[len(path)-3:len(path)]
534
+ if ext.lower() != ".pt":
535
+ path = path+".pt"
536
+ torch.save(self.model, path)
549
537
 
550
538
 
539
+ class _RegressorKFold:
540
+ def __init__(self, hparams, trainingDataset, testingDataset=None):
541
+ self.trainingDataset = trainingDataset
542
+ self.testingDataset = testingDataset
543
+ self.hparams = hparams
544
+ self.losses = []
545
+ self.min_loss = 0
546
+ # at beginning of the script
547
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
548
+ device = torch.device("cpu")
549
+ if hparams.conv_layer_type.lower() == 'classic':
550
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
551
+ elif hparams.conv_layer_type.lower() == 'ginconv':
552
+ self.model = _GINConv(trainingDataset.dim_nfeats, hparams.hl_widths,
553
+ 1, hparams.pooling).to(device)
554
+ elif hparams.conv_layer_type.lower() == 'graphconv':
555
+ self.model = _GraphConvReg(trainingDataset.dim_nfeats, hparams.hl_widths, hparams.pooling).to(device)
556
+ elif hparams.conv_layer_type.lower() == 'sageconv':
557
+ self.model = _SAGEConv(trainingDataset.dim_nfeats, hparams.hl_widths,
558
+ 1, hparams.pooling).to(device)
559
+ elif hparams.conv_layer_type.lower() == 'tagconv':
560
+ self.model = _TAGConv(trainingDataset.dim_nfeats, hparams.hl_widths,
561
+ 1, hparams.pooling).to(device)
562
+ elif hparams.conv_layer_type.lower() == 'gcn':
563
+ self.model = _ClassicReg(trainingDataset.dim_nfeats, hparams.hl_widths).to(device)
564
+ else:
565
+ raise NotImplementedError
551
566
 
567
+ if hparams.optimizer_str.lower() == "adadelta":
568
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=hparams.eps,
569
+ lr=hparams.lr, rho=hparams.rho, weight_decay=hparams.weight_decay)
570
+ elif hparams.optimizer_str.lower() == "adagrad":
571
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=hparams.eps,
572
+ lr=hparams.lr, lr_decay=hparams.lr_decay, weight_decay=hparams.weight_decay)
573
+ elif hparams.optimizer_str.lower() == "adam":
574
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=hparams.amsgrad, betas=hparams.betas, eps=hparams.eps,
575
+ lr=hparams.lr, maximize=hparams.maximize, weight_decay=hparams.weight_decay)
576
+
577
+ self.use_gpu = hparams.use_gpu
578
+ self.training_loss_list = []
579
+ self.validation_loss_list = []
580
+ self.node_attr_key = trainingDataset.node_attr_key
552
581
 
582
+ # train, validate, test split
583
+ num_train = int(len(trainingDataset) * (hparams.split[0]))
584
+ num_validate = int(len(trainingDataset) * (hparams.split[1]))
585
+ num_test = len(trainingDataset) - num_train - num_validate
586
+ idx = torch.randperm(len(trainingDataset))
587
+ test_sampler = SubsetRandomSampler(idx[num_train+num_validate:num_train+num_validate+num_test])
588
+
589
+ if testingDataset:
590
+ self.test_dataloader = GraphDataLoader(testingDataset,
591
+ batch_size=len(testingDataset),
592
+ drop_last=False)
593
+ else:
594
+ self.test_dataloader = GraphDataLoader(trainingDataset, sampler=test_sampler,
595
+ batch_size=hparams.batch_size,
596
+ drop_last=False)
597
+
598
+ def reset_weights(self):
599
+ '''
600
+ Try resetting model weights to avoid
601
+ weight leakage.
602
+ '''
603
+ device = torch.device("cpu")
604
+ if self.hparams.conv_layer_type.lower() == 'classic':
605
+ self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
606
+ elif self.hparams.conv_layer_type.lower() == 'ginconv':
607
+ self.model = _GINConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
608
+ 1, self.hparams.pooling).to(device)
609
+ elif self.hparams.conv_layer_type.lower() == 'graphconv':
610
+ self.model = _GraphConvReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths, self.hparams.pooling).to(device)
611
+ elif self.hparams.conv_layer_type.lower() == 'sageconv':
612
+ self.model = _SAGEConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
613
+ 1, self.hparams.pooling).to(device)
614
+ elif self.hparams.conv_layer_type.lower() == 'tagconv':
615
+ self.model = _TAGConv(self.trainingDataset.dim_nfeats, self.hparams.hl_widths,
616
+ 1, self.hparams.pooling).to(device)
617
+ elif self.hparams.conv_layer_type.lower() == 'gcn':
618
+ self.model = _ClassicReg(self.trainingDataset.dim_nfeats, self.hparams.hl_widths).to(device)
619
+ else:
620
+ raise NotImplementedError
621
+
622
+ if self.hparams.optimizer_str.lower() == "adadelta":
623
+ self.optimizer = torch.optim.Adadelta(self.model.parameters(), eps=self.hparams.eps,
624
+ lr=self.hparams.lr, rho=self.hparams.rho, weight_decay=self.hparams.weight_decay)
625
+ elif self.hparams.optimizer_str.lower() == "adagrad":
626
+ self.optimizer = torch.optim.Adagrad(self.model.parameters(), eps=self.hparams.eps,
627
+ lr=self.hparams.lr, lr_decay=self.hparams.lr_decay, weight_decay=self.hparams.weight_decay)
628
+ elif self.hparams.optimizer_str.lower() == "adam":
629
+ self.optimizer = torch.optim.Adam(self.model.parameters(), amsgrad=self.hparams.amsgrad, betas=self.hparams.betas, eps=self.hparams.eps,
630
+ lr=self.hparams.lr, maximize=self.hparams.maximize, weight_decay=self.hparams.weight_decay)
631
+
632
+
633
+
634
+
635
+ def train(self):
636
+ device = torch.device("cpu")
637
+
638
+ # The number of folds (This should come from the hparams)
639
+ k_folds = self.hparams.k_folds
640
+
641
+ # Init the loss and accuracy reporting lists
642
+ self.training_loss_list = []
643
+ self.validation_loss_list = []
644
+
645
+ # Set fixed random number seed
646
+ torch.manual_seed(42)
647
+
648
+ # Define the K-fold Cross Validator
649
+ kfold = KFold(n_splits=k_folds, shuffle=True)
650
+
651
+ models = []
652
+ weights = []
653
+ losses = []
654
+ train_dataloaders = []
655
+ validate_dataloaders = []
656
+
657
+ # K-fold Cross-validation model evaluation
658
+ for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
659
+ epoch_training_loss_list = []
660
+ epoch_validation_loss_list = []
661
+ # Sample elements randomly from a given list of ids, no replacement.
662
+ train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
663
+ validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
664
+
665
+ # Define data loaders for training and testing data in this fold
666
+ self.train_dataloader = GraphDataLoader(self.trainingDataset, sampler=train_subsampler,
667
+ batch_size=self.hparams.batch_size,
668
+ drop_last=False)
669
+ self.validate_dataloader = GraphDataLoader(self.trainingDataset, sampler=validate_subsampler,
670
+ batch_size=self.hparams.batch_size,
671
+ drop_last=False)
672
+ # Init the neural network
673
+ self.reset_weights()
674
+
675
+ # Run the training loop for defined number of epochs
676
+ best_rmse = np.inf
677
+ # Run the training loop for defined number of epochs
678
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', total=self.hparams.epochs, initial=1, leave=False):
679
+ # Iterate over the DataLoader for training data
680
+ for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
681
+ # Make sure the model is in training mode
682
+ self.model.train()
683
+ # Zero the gradients
684
+ self.optimizer.zero_grad()
685
+
686
+ # Perform forward pass
687
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
688
+ # Compute loss
689
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
690
+
691
+ # Perform backward pass
692
+ loss.backward()
693
+
694
+ # Perform optimization
695
+ self.optimizer.step()
696
+
697
+
698
+ epoch_training_loss_list.append(torch.sqrt(loss).item())
699
+ self.validate()
700
+ epoch_validation_loss_list.append(torch.sqrt(self.validation_loss).item())
701
+
702
+ models.append(self.model)
703
+ weights.append(copy.deepcopy(self.model.state_dict()))
704
+ losses.append(torch.sqrt(self.validation_loss).item())
705
+ train_dataloaders.append(self.train_dataloader)
706
+ validate_dataloaders.append(self.validate_dataloader)
707
+ self.training_loss_list.append(epoch_training_loss_list)
708
+ self.validation_loss_list.append(epoch_validation_loss_list)
709
+ self.losses = losses
710
+ min_loss = min(losses)
711
+ self.min_loss = min_loss
712
+ ind = losses.index(min_loss)
713
+ self.model = models[ind]
714
+ self.model.load_state_dict(weights[ind])
715
+ self.model.eval()
716
+ self.training_loss_list = self.training_loss_list[ind]
717
+ self.validation_loss_list = self.validation_loss_list[ind]
718
+
719
+ def validate(self):
720
+ device = torch.device("cpu")
721
+ self.model.eval()
722
+ for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
723
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
724
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
725
+ self.validation_loss = loss
726
+
727
+ def test(self):
728
+ #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
729
+ device = torch.device("cpu")
730
+ #self.model.eval()
731
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
732
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
733
+ loss = F.mse_loss(torch.flatten(pred), labels.float())
734
+ self.testing_loss = torch.sqrt(loss).item()
735
+
736
+ def save(self, path):
737
+ if path:
738
+ # Make sure the file extension is .pt
739
+ ext = path[len(path)-3:len(path)]
740
+ if ext.lower() != ".pt":
741
+ path = path+".pt"
742
+ torch.save(self.model, path)
553
743
 
554
744
  class _ClassifierHoldout:
555
745
  def __init__(self, hparams, trainingDataset, validationDataset=None, testingDataset=None):
@@ -594,8 +784,6 @@ class _ClassifierHoldout:
594
784
  self.validation_loss_list = []
595
785
  self.training_accuracy_list = []
596
786
  self.validation_accuracy_list = []
597
- self.testing_accuracy_list = []
598
- self.testing_loss_list = []
599
787
  self.node_attr_key = trainingDataset.node_attr_key
600
788
 
601
789
  # train, validate, test split
@@ -638,14 +826,11 @@ class _ClassifierHoldout:
638
826
  self.training_loss_list = []
639
827
  self.validation_accuracy_list = []
640
828
  self.validation_loss_list = []
641
- self.testing_accuracy_list = []
642
- self.testing_loss_list = []
643
829
 
644
830
  # Run the training loop for defined number of epochs
645
- for _ in tqdm(range(self.hparams.epochs), desc='Epochs', leave=False):
646
- num_correct = 0
647
- num_tests = 0
831
+ for _ in tqdm(range(self.hparams.epochs), desc='Epochs', initial=1, leave=False):
648
832
  temp_loss_list = []
833
+ temp_acc_list = []
649
834
  # Iterate over the DataLoader for training data
650
835
  for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
651
836
  # Make sure the model is in training mode
@@ -665,8 +850,7 @@ class _ClassifierHoldout:
665
850
 
666
851
  # Save loss information for reporting
667
852
  temp_loss_list.append(loss.item())
668
- num_correct += (pred.argmax(1) == labels).sum().item()
669
- num_tests += len(labels)
853
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
670
854
 
671
855
  # Perform backward pass
672
856
  loss.backward()
@@ -674,27 +858,17 @@ class _ClassifierHoldout:
674
858
  # Perform optimization
675
859
  self.optimizer.step()
676
860
 
677
- self.training_accuracy = num_correct / num_tests
678
- self.training_accuracy_list.append(self.training_accuracy)
679
- self.training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
861
+ self.training_accuracy_list.append(np.mean(temp_acc_list).item())
862
+ self.training_loss_list.append(np.mean(temp_loss_list).item())
680
863
  self.validate()
681
864
  self.validation_accuracy_list.append(self.validation_accuracy)
682
865
  self.validation_loss_list.append(self.validation_loss)
683
- self.test()
684
- self.testing_accuracy_list.append(self.testing_accuracy)
685
- self.testing_loss_list.append(self.testing_loss)
686
- if self.hparams.checkpoint_path is not None:
687
- # Save the best model
688
- self.model.eval()
689
- self.hparams.split = [1, 0, 0]
690
- torch.save(self.model, self.hparams.checkpoint_path)
691
866
 
692
867
  def validate(self):
693
868
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
694
869
  device = torch.device("cpu")
695
- num_correct = 0
696
- num_tests = 0
697
- temp_validation_loss = []
870
+ temp_loss_list = []
871
+ temp_acc_list = []
698
872
  self.model.eval()
699
873
  for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
700
874
  pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
@@ -703,60 +877,44 @@ class _ClassifierHoldout:
703
877
  loss = F.nll_loss(logp, labels)
704
878
  elif self.hparams.loss_function.lower() == "cross entropy":
705
879
  loss = F.cross_entropy(pred, labels)
706
- temp_validation_loss.append(loss.item())
707
- num_correct += (pred.argmax(1) == labels).sum().item()
708
- num_tests += len(labels)
709
- self.validation_loss = (sum(temp_validation_loss) / len(temp_validation_loss))
710
- self.validation_accuracy = num_correct / num_tests
711
- return self.validation_accuracy
712
-
713
- def accuracy(self, dictionary):
714
- labels = dictionary['labels']
715
- predictions = dictionary['predictions']
716
- num_correct = 0
717
- for i in range(len(predictions)):
718
- if predictions[i] == labels[i]:
719
- num_correct = num_correct + 1
720
- return (num_correct / len(predictions))
880
+ temp_loss_list.append(loss.item())
881
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
882
+ self.validation_accuracy = np.mean(temp_acc_list).item()
883
+ self.validation_loss = np.mean(temp_loss_list).item()
721
884
 
722
885
  def test(self):
723
- #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
724
- device = torch.device("cpu")
725
- num_correct = 0
726
- num_tests = 0
727
- temp_testing_loss = []
728
- self.model.eval()
729
- for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
730
- pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
731
- if self.hparams.loss_function.lower() == "negative log likelihood":
732
- logp = F.log_softmax(pred, 1)
733
- loss = F.nll_loss(logp, labels)
734
- elif self.hparams.loss_function.lower() == "cross entropy":
735
- loss = F.cross_entropy(pred, labels)
736
- temp_testing_loss.append(loss.item())
737
- num_correct += (pred.argmax(1) == labels).sum().item()
738
- num_tests += len(labels)
739
- self.testing_loss = (sum(temp_testing_loss) / len(temp_testing_loss))
740
- self.testing_accuracy = num_correct / num_tests
741
- return self.testing_accuracy
742
-
743
- def save(self):
744
- if self.hparams.checkpoint_path is not None:
745
- # Save the entire model
746
- try:
747
- self.model.eval()
748
- torch.save(self.model, self.hparams.checkpoint_path)
749
- return True
750
- except:
751
- return False
752
- return False
886
+ if self.test_dataloader:
887
+ temp_loss_list = []
888
+ temp_acc_list = []
889
+ self.model.eval()
890
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
891
+ pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
892
+ if self.hparams.loss_function.lower() == "negative log likelihood":
893
+ logp = F.log_softmax(pred, 1)
894
+ loss = F.nll_loss(logp, labels)
895
+ elif self.hparams.loss_function.lower() == "cross entropy":
896
+ loss = F.cross_entropy(pred, labels)
897
+ temp_loss_list.append(loss.item())
898
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
899
+ self.testing_accuracy = np.mean(temp_acc_list).item()
900
+ self.testing_loss = np.mean(temp_loss_list).item()
901
+
902
+ def save(self, path):
903
+ if path:
904
+ # Make sure the file extension is .pt
905
+ ext = path[len(path)-3:len(path)]
906
+ if ext.lower() != ".pt":
907
+ path = path+".pt"
908
+ torch.save(self.model, path)
753
909
 
754
910
  class _ClassifierKFold:
755
- def __init__(self, hparams, trainingDataset, validationDataset=None, testingDataset=None):
911
+ def __init__(self, hparams, trainingDataset, testingDataset=None):
756
912
  self.trainingDataset = trainingDataset
757
- self.validationDataset = validationDataset
758
913
  self.testingDataset = testingDataset
759
914
  self.hparams = hparams
915
+ self.testing_accuracy = 0
916
+ self.accuracies = []
917
+ self.max_accuracy = 0
760
918
  # at beginning of the script
761
919
  #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
762
920
  device = torch.device("cpu")
@@ -792,8 +950,6 @@ class _ClassifierKFold:
792
950
  self.validation_loss_list = []
793
951
  self.training_accuracy_list = []
794
952
  self.validation_accuracy_list = []
795
- self.testing_accuracy_list = []
796
- self.testing_loss_list = []
797
953
  self.node_attr_key = trainingDataset.node_attr_key
798
954
 
799
955
 
@@ -839,8 +995,6 @@ class _ClassifierKFold:
839
995
  self.training_loss_list = []
840
996
  self.validation_accuracy_list = []
841
997
  self.validation_loss_list = []
842
- self.testing_accuracy_list = []
843
- self.testing_loss_list = []
844
998
 
845
999
  # Set fixed random number seed
846
1000
  torch.manual_seed(42)
@@ -848,22 +1002,18 @@ class _ClassifierKFold:
848
1002
  # Define the K-fold Cross Validator
849
1003
  kfold = KFold(n_splits=k_folds, shuffle=True)
850
1004
 
851
- tqdm_list = [i for i in range(k_folds)]
852
- t_folds = tqdm(tqdm_list, desc='Folds', leave=False)
853
1005
  models = []
1006
+ weights = []
854
1007
  accuracies = []
855
1008
  train_dataloaders = []
856
1009
  validate_dataloaders = []
857
- t_e = tqdm(range(1,self.hparams.epochs), desc='Epochs', leave=False)
858
1010
 
859
1011
  # K-fold Cross-validation model evaluation
860
- for fold, (train_ids, validate_ids) in enumerate(kfold.split(self.trainingDataset)):
1012
+ for fold, (train_ids, validate_ids) in tqdm(enumerate(kfold.split(self.trainingDataset)), desc="Fold", initial=1, total=k_folds, leave=False):
861
1013
  epoch_training_loss_list = []
862
1014
  epoch_training_accuracy_list = []
863
1015
  epoch_validation_loss_list = []
864
1016
  epoch_validation_accuracy_list = []
865
- epoch_testing_accuracy_list = []
866
- epoch_testing_loss_list = []
867
1017
  # Sample elements randomly from a given list of ids, no replacement.
868
1018
  train_subsampler = torch.utils.data.SubsetRandomSampler(train_ids)
869
1019
  validate_subsampler = torch.utils.data.SubsetRandomSampler(validate_ids)
@@ -878,14 +1028,10 @@ class _ClassifierKFold:
878
1028
  # Init the neural network
879
1029
  self.reset_weights()
880
1030
 
881
- t_e.reset(1)
882
-
883
1031
  # Run the training loop for defined number of epochs
884
- for _ in range(self.hparams.epochs):
885
- t_e.update()
886
- num_correct = 0
887
- num_tests = 0
1032
+ for _ in tqdm(range(0,self.hparams.epochs), desc='Epochs', initial=1, total=self.hparams.epochs, leave=False):
888
1033
  temp_loss_list = []
1034
+ temp_acc_list = []
889
1035
 
890
1036
  # Iterate over the DataLoader for training data
891
1037
  for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
@@ -908,8 +1054,7 @@ class _ClassifierKFold:
908
1054
 
909
1055
  # Save loss information for reporting
910
1056
  temp_loss_list.append(loss.item())
911
- num_correct += (pred.argmax(1) == labels).sum().item()
912
- num_tests += len(labels)
1057
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
913
1058
 
914
1059
  # Perform backward pass
915
1060
  loss.backward()
@@ -917,19 +1062,13 @@ class _ClassifierKFold:
917
1062
  # Perform optimization
918
1063
  self.optimizer.step()
919
1064
 
920
- self.training_accuracy = num_correct / num_tests
921
- epoch_training_accuracy_list.append(self.training_accuracy)
922
- epoch_training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
1065
+ epoch_training_accuracy_list.append(np.mean(temp_acc_list).item())
1066
+ epoch_training_loss_list.append(np.mean(temp_loss_list).item())
923
1067
  self.validate()
924
1068
  epoch_validation_accuracy_list.append(self.validation_accuracy)
925
1069
  epoch_validation_loss_list.append(self.validation_loss)
926
- self.test()
927
- epoch_testing_accuracy_list.append(self.testing_accuracy)
928
- epoch_testing_loss_list.append(self.testing_loss)
929
- if self.hparams.checkpoint_path is not None:
930
- # Save the entire model
931
- torch.save(self.model, self.hparams.checkpoint_path+"-fold_"+str(fold))
932
1070
  models.append(self.model)
1071
+ weights.append(copy.deepcopy(self.model.state_dict()))
933
1072
  accuracies.append(self.validation_accuracy)
934
1073
  train_dataloaders.append(self.train_dataloader)
935
1074
  validate_dataloaders.append(self.validate_dataloader)
@@ -937,22 +1076,21 @@ class _ClassifierKFold:
937
1076
  self.training_loss_list.append(epoch_training_loss_list)
938
1077
  self.validation_accuracy_list.append(epoch_validation_accuracy_list)
939
1078
  self.validation_loss_list.append(epoch_validation_loss_list)
940
- self.testing_accuracy_list.append(epoch_testing_accuracy_list)
941
- self.testing_loss_list.append(epoch_testing_loss_list)
942
- t_folds.update()
1079
+ self.accuracies = accuracies
943
1080
  max_accuracy = max(accuracies)
1081
+ self.max_accuracy = max_accuracy
944
1082
  ind = accuracies.index(max_accuracy)
945
- model = models[ind]
946
- model.train_dataloader = train_dataloaders[ind]
947
- model.validate_dataloader = validate_dataloaders[ind]
948
- self.model = model
949
- self.train_final()
950
-
951
-
1083
+ self.model = models[ind]
1084
+ self.model.load_state_dict(weights[ind])
1085
+ self.model.eval()
1086
+ self.training_accuracy_list = self.training_accuracy_list[ind]
1087
+ self.training_loss_list = self.training_loss_list[ind]
1088
+ self.validation_accuracy_list = self.validation_accuracy_list[ind]
1089
+ self.validation_loss_list = self.validation_loss_list[ind]
1090
+
952
1091
  def validate(self):
953
- num_correct = 0
954
- num_tests = 0
955
- temp_validation_loss = []
1092
+ temp_loss_list = []
1093
+ temp_acc_list = []
956
1094
  self.model.eval()
957
1095
  for batched_graph, labels in tqdm(self.validate_dataloader, desc='Validating', leave=False):
958
1096
  pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
@@ -961,134 +1099,38 @@ class _ClassifierKFold:
961
1099
  loss = F.nll_loss(logp, labels)
962
1100
  elif self.hparams.loss_function.lower() == "cross entropy":
963
1101
  loss = F.cross_entropy(pred, labels)
964
- temp_validation_loss.append(loss.item())
965
- num_correct += (pred.argmax(1) == labels).sum().item()
966
- num_tests += len(labels)
967
- self.validation_loss = (sum(temp_validation_loss) / len(temp_validation_loss))
968
- self.validation_accuracy = num_correct / num_tests
969
- return self.validation_accuracy
970
-
971
- def accuracy(self, dictionary):
972
- labels = dictionary['labels']
973
- predictions = dictionary['predictions']
974
- num_correct = 0
975
- for i in range(len(predictions)):
976
- if predictions[i] == labels[i]:
977
- num_correct = num_correct + 1
978
- return (num_correct / len(predictions))
1102
+ temp_loss_list.append(loss.item())
1103
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
1104
+ self.validation_accuracy = np.mean(temp_acc_list).item()
1105
+ self.validation_loss = np.mean(temp_loss_list).item()
979
1106
 
980
1107
  def test(self):
981
- num_correct = 0
982
- num_tests = 0
983
- temp_testing_loss = []
984
- self.model.eval()
985
- for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
986
- pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
987
- if self.hparams.loss_function.lower() == "negative log likelihood":
988
- logp = F.log_softmax(pred, 1)
989
- loss = F.nll_loss(logp, labels)
990
- elif self.hparams.loss_function.lower() == "cross entropy":
991
- loss = F.cross_entropy(pred, labels)
992
- temp_testing_loss.append(loss.item())
993
- num_correct += (pred.argmax(1) == labels).sum().item()
994
- num_tests += len(labels)
995
- self.testing_loss = (sum(temp_testing_loss) / len(temp_testing_loss))
996
- self.testing_accuracy = num_correct / num_tests
997
- return self.testing_accuracy
998
-
999
- def predict(self):
1000
- #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
1001
- device = torch.device("cpu")
1002
- predicted_labels = []
1003
- idx = torch.randperm(len(self.validationDataset))
1004
- num_train = int(len(self.validationDataset))
1005
- sampler = SubsetRandomSampler(idx[:num_train])
1006
- dataloader = GraphDataLoader(self.validationDataset, sampler=sampler,
1007
- batch_size=1,
1008
- drop_last=False)
1009
- num_correct = 0
1010
- num_tests = 0
1011
- for batched_graph, labels in dataloader:
1012
- pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float()).to(device)
1013
- num_correct += (pred.argmax(1) == labels).sum().item()
1014
- num_tests += len(labels)
1015
- accuracy = num_correct / num_tests
1016
- return accuracy
1017
-
1018
- def train_final(self):
1019
- #device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
1020
- device = torch.device("cpu")
1021
- # Init the loss and accuracy reporting lists
1022
- self.training_accuracy_list = []
1023
- self.training_loss_list = []
1024
- self.validation_accuracy_list = []
1025
- self.validation_loss_list = []
1026
- self.testing_accuracy_list = []
1027
- self.testing_lost_list = []
1028
-
1029
-
1030
- # Set training to 100% of the data, validate, and save a final model
1031
- #idx = torch.randperm(len(self.trainingDataset))
1032
- #num_train = int(len(self.trainingDataset))
1033
- #sampler = SubsetRandomSampler(idx[:num_train])
1034
- #dataloader = GraphDataLoader(self.trainingDataset, sampler=sampler,
1035
- #batch_size=self.hparams.batch_size,
1036
- #drop_last=False)
1037
- print("Final Training/Validation/Testing")
1038
- for _ in tqdm(range(self.hparams.epochs), desc='Epochs', leave=False):
1039
- num_correct = 0
1040
- num_tests = 0
1108
+ if self.testingDataset:
1109
+ self.test_dataloader = GraphDataLoader(self.testingDataset,
1110
+ batch_size=len(self.testingDataset),
1111
+ drop_last=False)
1041
1112
  temp_loss_list = []
1042
-
1043
- # Iterate over the DataLoader for training data
1044
- for batched_graph, labels in tqdm(self.train_dataloader, desc='Training', leave=False):
1045
-
1046
- # Make sure the model is in training mode
1047
- self.model.train()
1048
-
1049
- # Zero the gradients
1050
- self.optimizer.zero_grad()
1051
-
1052
- # Perform forward pass
1113
+ temp_acc_list = []
1114
+ self.model.eval()
1115
+ for batched_graph, labels in tqdm(self.test_dataloader, desc='Testing', leave=False):
1053
1116
  pred = self.model(batched_graph, batched_graph.ndata[self.node_attr_key].float())
1054
-
1055
- # Compute loss
1056
1117
  if self.hparams.loss_function.lower() == "negative log likelihood":
1057
1118
  logp = F.log_softmax(pred, 1)
1058
1119
  loss = F.nll_loss(logp, labels)
1059
1120
  elif self.hparams.loss_function.lower() == "cross entropy":
1060
1121
  loss = F.cross_entropy(pred, labels)
1061
-
1062
- # Save loss information for reporting
1063
1122
  temp_loss_list.append(loss.item())
1064
- num_correct += (pred.argmax(1) == labels).sum().item()
1065
- num_tests += len(labels)
1066
-
1067
- # Perform backward pass
1068
- loss.backward()
1069
-
1070
- # Perform optimization
1071
- self.optimizer.step()
1072
-
1073
- self.training_accuracy = num_correct / num_tests
1074
- self.training_accuracy_list.append(self.training_accuracy)
1075
- self.training_loss_list.append(sum(temp_loss_list) / len(temp_loss_list))
1076
- self.validate()
1077
- self.validation_accuracy_list.append(self.validation_accuracy)
1078
- self.validation_loss_list.append(self.validation_loss)
1079
- self.test()
1080
- self.testing_accuracy_list.append(self.testing_accuracy)
1081
- self.testing_loss_list.append(self.testing_loss)
1082
-
1083
- def save(self):
1084
- if self.hparams.checkpoint_path is not None:
1085
- # Save the entire model
1086
- try:
1087
- torch.save(self.model, self.hparams.checkpoint_path)
1088
- return True
1089
- except:
1090
- return False
1091
- return False
1123
+ temp_acc_list.append(accuracy_score(labels, pred.argmax(1)))
1124
+ self.testing_accuracy = np.mean(temp_acc_list).item()
1125
+ self.testing_loss = np.mean(temp_loss_list).item()
1126
+
1127
+ def save(self, path):
1128
+ if path:
1129
+ # Make sure the file extension is .pt
1130
+ ext = path[len(path)-3:len(path)]
1131
+ if ext.lower() != ".pt":
1132
+ path = path+".pt"
1133
+ torch.save(self.model, path)
1092
1134
 
1093
1135
  class DGL:
1094
1136
  @staticmethod
@@ -1193,22 +1235,22 @@ class DGL:
1193
1235
  raise NotImplementedError
1194
1236
 
1195
1237
  list_idx = df['graph_index'].tolist()
1196
- DGLGraphs = []
1238
+ graphs = []
1197
1239
  labels = []
1198
1240
  for index in list_idx:
1199
1241
  graph, label = dataset[index]
1200
- DGLGraphs.append(graph)
1242
+ graphs.append(graph)
1201
1243
  labels.append(label)
1202
- return DGL.DatasetByDGLGraphs(DGLGraphs=DGLGraphs, labels=labels, key=key)
1244
+ return DGL.DatasetByGraphs(graphs=graphs, labels=labels, key=key)
1203
1245
 
1204
1246
  @staticmethod
1205
- def ByGraph(graph, bidirectional=True, key=None, categories=[], node_attr_key="node_attr", tolerance=0.0001):
1247
+ def GraphByTopologicGraph(topologicGraph, bidirectional=True, key=None, categories=[], node_attr_key="node_attr", tolerance=0.0001):
1206
1248
  """
1207
1249
  Returns a DGL graph by the input topologic graph.
1208
1250
 
1209
1251
  Parameters
1210
1252
  ----------
1211
- graph : topologic.Graph
1253
+ topologicGraph : topologic.Graph
1212
1254
  The input topologic graph.
1213
1255
  bidirectional : bool , optional
1214
1256
  If set to True, the output DGL graph is forced to be bidirectional. The defaul is True.
@@ -1233,8 +1275,8 @@ class DGL:
1233
1275
  from topologicpy.Topology import Topology
1234
1276
 
1235
1277
  graph_dict = {}
1236
- vertices = Graph.Vertices(graph)
1237
- edges = Graph.Edges(graph)
1278
+ vertices = Graph.Vertices(topologicGraph)
1279
+ edges = Graph.Edges(topologicGraph)
1238
1280
  graph_dict["num_nodes"] = len(vertices)
1239
1281
  graph_dict["src"] = []
1240
1282
  graph_dict["dst"] = []
@@ -1282,7 +1324,7 @@ class DGL:
1282
1324
  return dgl_graph
1283
1325
 
1284
1326
  @staticmethod
1285
- def ByImportedCSV(graphs_file_path, edges_file_path,
1327
+ def GraphsByImportedCSV(graphs_file_path, edges_file_path,
1286
1328
  nodes_file_path, graph_id_header="graph_id",
1287
1329
  graph_label_header="label", num_nodes_header="num_nodes", src_header="src",
1288
1330
  dst_header="dst", node_label_header="label", node_attr_key="node_attr",
@@ -1375,7 +1417,7 @@ class DGL:
1375
1417
  return {"graphs":dgl_graphs, "labels":labels}
1376
1418
 
1377
1419
  @staticmethod
1378
- def ByImportedDGCNN(file_path, categories=[], bidirectional=True):
1420
+ def GraphsByImportedDGCNN(file_path, categories=[], bidirectional=True):
1379
1421
  """
1380
1422
  Returns the Graphs from the imported DGCNN file.
1381
1423
 
@@ -1469,9 +1511,9 @@ class DGL:
1469
1511
  return {"categories":[categories], "ratios":[ratios]}
1470
1512
 
1471
1513
  @staticmethod
1472
- def ClassifierByFilePath(path):
1514
+ def ModelByFilePath(path):
1473
1515
  """
1474
- Returns the classifier found at the input file path.
1516
+ Returns the model found at the input file path.
1475
1517
  Parameters
1476
1518
  ----------
1477
1519
  path : str
@@ -1483,6 +1525,8 @@ class DGL:
1483
1525
  The classifier.
1484
1526
 
1485
1527
  """
1528
+ if not path:
1529
+ return None
1486
1530
  return torch.load(path)
1487
1531
 
1488
1532
  def ConfusionMatrix(actual, predicted, normalize=False):
@@ -1512,16 +1556,14 @@ class DGL:
1512
1556
  return cm
1513
1557
 
1514
1558
  @staticmethod
1515
- def DatasetByDGLGraphs(DGLGraphs, labels, key="node_attr"):
1559
+ def DatasetByGraphs(dictionary, key="node_attr"):
1516
1560
  """
1517
1561
  Returns a DGL Dataset from the input DGL graphs.
1518
1562
 
1519
1563
  Parameters
1520
1564
  ----------
1521
- DGLGraphs : list
1522
- The input list dgl graphs.
1523
- labels : list
1524
- The list of labels.
1565
+ dictionary : dict
1566
+ The input dictionary
1525
1567
  key : str
1526
1568
  The key used for the node attributes.
1527
1569
 
@@ -1531,11 +1573,9 @@ class DGL:
1531
1573
  The creatred DGL dataset.
1532
1574
 
1533
1575
  """
1534
- if isinstance(DGLGraphs, list) == False:
1535
- DGLGraphs = [DGLGraphs]
1536
- if isinstance(labels, list) == False:
1537
- labels = [labels]
1538
- return _GraphDGL(DGLGraphs, labels, key)
1576
+ graphs = dictionary['graphs']
1577
+ labels = dictionary['labels']
1578
+ return _Dataset(graphs, labels, key)
1539
1579
 
1540
1580
  @staticmethod
1541
1581
  def DatasetByImportedCSV_NC(folderPath):
@@ -1584,7 +1624,7 @@ class DGL:
1584
1624
  node_attr_key = 'node_labels'
1585
1625
  else:
1586
1626
  raise NotImplementedError
1587
- return _GraphDGL(dgl_graphs, dgl_labels, node_attr_key)
1627
+ return _Dataset(dgl_graphs, dgl_labels, node_attr_key)
1588
1628
 
1589
1629
  @staticmethod
1590
1630
  def DatasetBySample_NC(name="Cora"):
@@ -1617,7 +1657,7 @@ class DGL:
1617
1657
  raise NotImplementedError
1618
1658
 
1619
1659
  @staticmethod
1620
- def Graphs(dataset):
1660
+ def DatasetGraphs(dataset):
1621
1661
  """
1622
1662
  Returns the DGL graphs found the in the input dataset.
1623
1663
 
@@ -1644,7 +1684,7 @@ class DGL:
1644
1684
  return graphs
1645
1685
 
1646
1686
  @staticmethod
1647
- def EdgeData(dgl_graph):
1687
+ def GraphEdgeData(graph):
1648
1688
  """
1649
1689
  Returns the edge data found in the input DGL graph
1650
1690
  Parameters
@@ -1658,18 +1698,22 @@ class DGL:
1658
1698
  The edge data.
1659
1699
 
1660
1700
  """
1661
- return dgl_graph.edata
1701
+ return graph.edata
1662
1702
 
1663
1703
  @staticmethod
1664
- def Hyperparameters(optimizer, cv_type="Holdout", split=[0.8,0.1,0.1], k_folds=5,
1704
+ def Hyperparameters(optimizer, model_type="classifier", cv_type="Holdout", split=[0.8,0.1,0.1], k_folds=5,
1665
1705
  hl_widths=[32], conv_layer_type="SAGEConv", pooling="AvgPooling",
1666
- batch_size=1, epochs=1, use_gpu=False, loss_function="Cross Entropy",
1667
- classifier_path="", results_path=""):
1706
+ batch_size=1, epochs=1, use_gpu=False, loss_function="Cross Entropy"):
1668
1707
  """
1669
1708
  Creates a hyperparameters object based on the input settings.
1670
1709
 
1671
1710
  Parameters
1672
1711
  ----------
1712
+ model_type : str , optional
1713
+ The desired type of model. The options are:
1714
+ - "Classifier"
1715
+ - "Regressor"
1716
+ The option is case insensitive. The default is "classifierholdout"
1673
1717
  optimizer : Optimizer
1674
1718
  The desired optimizer.
1675
1719
  cv_type : str , optional
@@ -1692,10 +1736,6 @@ class DGL:
1692
1736
  If set to True, the model will attempt to use the GPU. The default is False.
1693
1737
  loss_function : str , optional
1694
1738
  The desired loss function. The optionals are "Cross-Entropy" or "Negative Log Likelihood". It is case insensitive. The default is "Cross-Entropy".
1695
- classifier_path : str
1696
- The file path at which to save the trained classifier.
1697
- results_path : str
1698
- The file path at which to save the training and testing results.
1699
1739
 
1700
1740
  Returns
1701
1741
  -------
@@ -1705,40 +1745,31 @@ class DGL:
1705
1745
  """
1706
1746
 
1707
1747
  if optimizer['name'].lower() == "adadelta":
1708
- name = "Adadelta"
1748
+ optimizer_str = "Adadelta"
1709
1749
  elif optimizer['name'].lower() == "adagrad":
1710
- name = "Adagrad"
1750
+ optimizer_str = "Adagrad"
1711
1751
  elif optimizer['name'].lower() == "adam":
1712
- name = "Adam"
1713
- # Classifier: Make sure the file extension is .pt
1714
- ext = classifier_path[len(classifier_path)-3:len(classifier_path)]
1715
- if ext.lower() != ".pt":
1716
- classifier_path = classifier_path+".pt"
1717
- # Results: Make sure the file extension is .csv
1718
- ext = results_path[len(results_path)-4:len(results_path)]
1719
- if ext.lower() != ".csv":
1720
- results_path = results_path+".csv"
1721
- return _Hparams(name,
1722
- optimizer['amsgrad'],
1723
- optimizer['betas'],
1724
- optimizer['eps'],
1725
- optimizer['lr'],
1726
- optimizer['lr_decay'],
1727
- optimizer['maximize'],
1728
- optimizer['rho'],
1729
- optimizer['weight_decay'],
1730
- cv_type,
1731
- split,
1732
- k_folds,
1733
- hl_widths,
1734
- conv_layer_type,
1735
- pooling,
1736
- batch_size,
1737
- epochs,
1738
- use_gpu,
1739
- loss_function,
1740
- classifier_path,
1741
- results_path)
1752
+ optimizer_str = "Adam"
1753
+ return _Hparams(model_type,
1754
+ optimizer_str,
1755
+ optimizer['amsgrad'],
1756
+ optimizer['betas'],
1757
+ optimizer['eps'],
1758
+ optimizer['lr'],
1759
+ optimizer['lr_decay'],
1760
+ optimizer['maximize'],
1761
+ optimizer['rho'],
1762
+ optimizer['weight_decay'],
1763
+ cv_type,
1764
+ split,
1765
+ k_folds,
1766
+ hl_widths,
1767
+ conv_layer_type,
1768
+ pooling,
1769
+ batch_size,
1770
+ epochs,
1771
+ use_gpu,
1772
+ loss_function)
1742
1773
 
1743
1774
  @staticmethod
1744
1775
  def OneHotEncode(item, categories):
@@ -1767,7 +1798,7 @@ class DGL:
1767
1798
  return returnList
1768
1799
 
1769
1800
  @staticmethod
1770
- def Labels(dataset):
1801
+ def DatasetLabels(dataset):
1771
1802
  """
1772
1803
  Returns the labels of the graphs in the input dataset
1773
1804
 
@@ -1784,7 +1815,7 @@ class DGL:
1784
1815
  return [int(g[1]) for g in dataset]
1785
1816
 
1786
1817
  @staticmethod
1787
- def Merge(datasets, key="node_attr"):
1818
+ def DatasetMerge(datasets, key="node_attr"):
1788
1819
  """
1789
1820
  Merges the input list of datasets into one dataset
1790
1821
 
@@ -1802,12 +1833,12 @@ class DGL:
1802
1833
  graphs = []
1803
1834
  labels = []
1804
1835
  for ds in datasets:
1805
- graphs += DGL.Graphs(ds)
1806
- labels += DGL.Labels(ds)
1807
- return DGL.DatasetByDGLGraphs(graphs, labels, key=key)
1836
+ graphs += DGL.DatasetGraphs(ds)
1837
+ labels += DGL.DatasetLabels(ds)
1838
+ return DGL.DatasetByGraphs(graphs, labels, key=key)
1808
1839
 
1809
1840
  @staticmethod
1810
- def NodeData(dgl_graph):
1841
+ def GraphNodeData(graph):
1811
1842
  """
1812
1843
  Returns the node data found in the input dgl_graph
1813
1844
 
@@ -1822,10 +1853,10 @@ class DGL:
1822
1853
  The node data.
1823
1854
 
1824
1855
  """
1825
- return dgl_graph.ndata
1856
+ return graph.ndata
1826
1857
 
1827
1858
  @staticmethod
1828
- def RemoveCategory(dataset, label, key="node_attr"):
1859
+ def DatasetRemoveCategory(dataset, label, key="node_attr"):
1829
1860
  """
1830
1861
  Removes graphs from the input dataset that have the input label
1831
1862
 
@@ -1845,18 +1876,18 @@ class DGL:
1845
1876
 
1846
1877
  """
1847
1878
 
1848
- graphs = DGL.Graphs(dataset)
1849
- labels = DGL.Labels(dataset)
1879
+ graphs = DGL.DatasetGraphs(dataset)
1880
+ labels = DGL.DatasetLabels(dataset)
1850
1881
  new_graphs = []
1851
1882
  new_labels = []
1852
1883
  for i in range(len(labels)):
1853
1884
  if not labels[i] == label:
1854
1885
  new_graphs.append(graphs[i])
1855
1886
  new_labels.append(labels[i])
1856
- return DGL.DatasetByDGLGraphs(new_graphs, new_labels, key)
1887
+ return DGL.DatasetByGraphs(new_graphs, new_labels, key)
1857
1888
 
1858
1889
  @staticmethod
1859
- def Split(dataset, fracList=[0.8, 0.1, 0.1], shuffle=False, randomState=None, key="node_attr"):
1890
+ def DatasetSplit(dataset, fracList=[0.8, 0.1, 0.1], shuffle=False, randomState=None, key="node_attr"):
1860
1891
  """
1861
1892
  Splits the dataset into training, validation, and testing datasets.
1862
1893
 
@@ -1888,15 +1919,15 @@ class DGL:
1888
1919
  return None
1889
1920
  datasets = dgl.data.utils.split_dataset(dataset, frac_list=fracList, shuffle=shuffle, random_state=randomState)
1890
1921
  if fracList[0] > 0:
1891
- train_ds = DGL.DatasetByDGLGraphs(DGLGraphs=DGL.Graphs(datasets[0]), labels=DGL.Labels(datasets[0]), key=key)
1922
+ train_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[0]), 'labels' :DGL.DatasetLabels(datasets[0])}, key=key)
1892
1923
  else:
1893
1924
  train_ds = None
1894
1925
  if fracList[1] > 0:
1895
- validate_ds = DGL.DatasetByDGLGraphs(DGLGraphs=DGL.Graphs(datasets[1]), labels=DGL.Labels(datasets[1]), key=key)
1926
+ validate_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[1]), 'labels' :DGL.DatasetLabels(datasets[1])}, key=key)
1896
1927
  else:
1897
1928
  validate_ds = None
1898
1929
  if fracList[2] > 0:
1899
- test_ds = DGL.DatasetByDGLGraphs(DGLGraphs=DGL.Graphs(datasets[2]), labels=DGL.Labels(datasets[2]), key=key)
1930
+ test_ds = DGL.DatasetByGraphs({'graphs': DGL.DatasetGraphs(datasets[2]), 'labels' :DGL.DatasetLabels(datasets[2])}, key=key)
1900
1931
  else:
1901
1932
  test_ds = None
1902
1933
 
@@ -1941,7 +1972,7 @@ class DGL:
1941
1972
  return {"name":name, "amsgrad":amsgrad, "betas":betas, "eps":eps, "lr": lr, "maximize":maximize, "weight_decay":weightDecay, "rho":rho, "lr_decay":lr_decay}
1942
1973
 
1943
1974
  @staticmethod
1944
- def Classify(dataset, classifier, node_attr_key="node_attr"):
1975
+ def ModelClassify(model, dataset, node_attr_key="node_attr"):
1945
1976
  """
1946
1977
  Predicts the classification the labels of the input dataset.
1947
1978
 
@@ -1949,8 +1980,8 @@ class DGL:
1949
1980
  ----------
1950
1981
  dataset : DGLDataset
1951
1982
  The input DGL dataset.
1952
- classifier : Classifier
1953
- The input trained classifier.
1983
+ model : Model
1984
+ The input trained model.
1954
1985
  node_attr_key : str , optional
1955
1986
  The key used for node attributes. The default is "node_attr".
1956
1987
 
@@ -1966,7 +1997,7 @@ class DGL:
1966
1997
  probabilities = []
1967
1998
  for item in tqdm(dataset, desc='Classifying', leave=False):
1968
1999
  graph = item[0]
1969
- pred = classifier(graph, graph.ndata[node_attr_key].float())
2000
+ pred = model(graph, graph.ndata[node_attr_key].float())
1970
2001
  labels.append(pred.argmax(1).item())
1971
2002
  probability = (torch.nn.functional.softmax(pred, dim=1).tolist())
1972
2003
  probability = probability[0]
@@ -1977,16 +2008,16 @@ class DGL:
1977
2008
  return {"predictions":labels, "probabilities":probabilities}
1978
2009
 
1979
2010
  @staticmethod
1980
- def Predict(dataset, regressor, node_attr_key="node_attr"):
2011
+ def ModelPredict(model, dataset, node_attr_key="node_attr"):
1981
2012
  """
1982
- Predicts the label of the input dataset.
2013
+ Predicts the value of the input dataset.
1983
2014
 
1984
2015
  Parameters
1985
2016
  ----------
1986
2017
  dataset : DGLDataset
1987
2018
  The input DGL dataset.
1988
- regressor : Classifier
1989
- The input trained regressor.
2019
+ model : Model
2020
+ The input trained model.
1990
2021
  node_attr_key : str , optional
1991
2022
  The key used for node attributes. The default is "node_attr".
1992
2023
 
@@ -1996,24 +2027,23 @@ class DGL:
1996
2027
  The list of predictions
1997
2028
  """
1998
2029
  values = []
1999
- for item in tqdm(dataset, desc='Predicting'):
2030
+ for item in tqdm(dataset, desc='Predicting', leave=False):
2000
2031
  graph = item[0]
2001
- pred = regressor(graph, graph.ndata[node_attr_key].float())
2032
+ pred = model(graph, graph.ndata[node_attr_key].float())
2002
2033
  values.append(round(pred.item(), 3))
2003
2034
  return values
2004
2035
 
2005
2036
  @staticmethod
2006
- def ClassifyNode(dataset, classifier):
2037
+ def ModelClassifyNodes(model, dataset):
2007
2038
  """
2008
2039
  Predicts the calssification of the node labels found in the input dataset using the input classifier.
2009
2040
 
2010
2041
  Parameters
2011
2042
  ----------
2043
+ model : Model
2044
+ The input model.
2012
2045
  dataset : DGLDataset
2013
2046
  The input DGL Dataset.
2014
-
2015
- classifier : Classifier
2016
- The input classifier.
2017
2047
 
2018
2048
  Returns
2019
2049
  -------
@@ -2041,7 +2071,7 @@ class DGL:
2041
2071
  testLabels = []
2042
2072
  testPredictions = []
2043
2073
 
2044
- graphs = DGL.Graphs(dataset)
2074
+ graphs = DGL.DatasetGraphs(dataset)
2045
2075
  for g in graphs:
2046
2076
  if not g.ndata:
2047
2077
  continue
@@ -2059,7 +2089,7 @@ class DGL:
2059
2089
  testLabels.append(test_labels.tolist())
2060
2090
 
2061
2091
  # Forward
2062
- logits = classifier(g, features)
2092
+ logits = model(g, features)
2063
2093
  train_logits = logits[train_mask]
2064
2094
  val_logits = logits[val_mask]
2065
2095
  test_logits = logits[test_mask]
@@ -2089,7 +2119,7 @@ class DGL:
2089
2119
  @staticmethod
2090
2120
  def Show(data,
2091
2121
  labels,
2092
- title="Training/Validation/Testing",
2122
+ title="Training/Validation",
2093
2123
  xTitle="Epochs",
2094
2124
  xSpacing=1,
2095
2125
  yTitle="Accuracy and Loss",
@@ -2186,11 +2216,11 @@ class DGL:
2186
2216
  marginBottom=marginBottom
2187
2217
  )
2188
2218
  Plotly.Show(fig, renderer=renderer)
2189
-
2219
+
2190
2220
  @staticmethod
2191
- def TrainClassifier(hparams, trainingDataset, validationDataset=None, testingDataset=None, overwrite=True):
2221
+ def Model(hparams, trainingDataset, validationDataset=None, testingDataset=None):
2192
2222
  """
2193
- Trains a neural network classifier.
2223
+ Creates a neural network classifier.
2194
2224
 
2195
2225
  Parameters
2196
2226
  ----------
@@ -2202,91 +2232,231 @@ class DGL:
2202
2232
  The input validation dataset. If not specified, a portion of the trainingDataset will be used for validation according the to the split list as specified in the hyper-parameters.
2203
2233
  testingDataset : DGLDataset
2204
2234
  The input testing dataset. If not specified, a portion of the trainingDataset will be used for testing according the to the split list as specified in the hyper-parameters.
2205
- overwrite : bool , optional
2206
- If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
2207
2235
 
2208
2236
  Returns
2209
2237
  -------
2210
- dict
2211
- A dictionary containing all the results.
2238
+ Classifier
2239
+ The created classifier
2240
+
2241
+ """
2242
+
2243
+ model = None
2244
+ if hparams.model_type.lower() == "classifier":
2245
+ if hparams.cv_type.lower() == "holdout":
2246
+ model = _ClassifierHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2247
+ elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
2248
+ model = _ClassifierKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
2249
+ elif hparams.model_type.lower() == "regressor":
2250
+ if hparams.cv_type.lower() == "holdout":
2251
+ model = _RegressorHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2252
+ elif hparams.cv_type.lower() == "k-fold" or hparams.cv_type.lower() == "kfold":
2253
+ model = _RegressorKFold(hparams=hparams, trainingDataset=trainingDataset, testingDataset=testingDataset)
2254
+ else:
2255
+ raise NotImplementedError
2256
+ return model
2212
2257
 
2258
+ @staticmethod
2259
+ def ModelTrain(model):
2213
2260
  """
2214
- from topologicpy.Helper import Helper
2215
- import time
2216
- import datetime
2217
- start = time.time()
2218
- if hparams.cv_type.lower() == "holdout":
2219
- classifier = _ClassifierHoldout(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2220
- classifier.train()
2221
- classifier.save()
2222
- elif hparams.cv_type.lower() == "k-fold":
2223
- classifier = _ClassifierKFold(hparams=hparams, trainingDataset=trainingDataset, validationDataset=validationDataset, testingDataset=testingDataset)
2224
- classifier.train()
2225
- classifier.save()
2226
-
2227
- #print(classifier.training_accuracy_list)
2228
- # Transpose the fold data
2229
- # temp_list = Helper.Transpose(classifier.training_accuracy_list)
2230
- #tr_a_l = []
2231
- #for l in temp_list:
2232
- #tr_a_l.append((sum(l) / len(l)))
2233
- #temp_list = Helper.Transpose(classifier.training_loss_list)
2234
- #tr_l_l = []
2235
- #for l in temp_list:
2236
- #tr_l_l.append((sum(l) / len(l)))
2237
- #temp_list = Helper.Transpose(classifier.validation_accuracy_list)
2238
- #te_a_l = []
2239
- #for l in temp_list:
2240
- #te_a_l.append((sum(l) / len(l)))
2241
- #temp_list = Helper.Transpose(classifier.validation_loss_list)
2242
- #te_l_l = []
2243
- #for l in temp_list:
2244
- #te_l_l.append((sum(l) / len(l)))
2245
-
2246
- #classifier.training_accuracy_list = tr_a_l
2247
- #classifier.training_loss_list = tr_l_l
2248
- #classifier.validation_accuracy_list = te_a_l
2249
- #classifier.validation_loss_list = te_l_l
2261
+ Trains the neural network model.
2262
+
2263
+ Parameters
2264
+ ----------
2265
+ model : Model
2266
+ The input model.
2267
+
2268
+ Returns
2269
+ -------
2270
+ Model
2271
+ The trained model
2272
+
2273
+ """
2274
+ if not model:
2275
+ return None
2276
+ model.train()
2277
+ return model
2250
2278
 
2251
- end = time.time()
2252
- duration = round(end - start,3)
2253
- utcnow = datetime.datetime.utcnow()
2254
- timestamp_str = "UTC-"+str(utcnow.year)+"-"+str(utcnow.month)+"-"+str(utcnow.day)+"-"+str(utcnow.hour)+"-"+str(utcnow.minute)+"-"+str(utcnow.second)
2255
- epoch_list = list(range(1,classifier.hparams.epochs+1))
2256
- d2 = [[timestamp_str], [duration], [classifier.hparams.optimizer_str], [classifier.hparams.cv_type], [classifier.hparams.split], [classifier.hparams.k_folds], [classifier.hparams.hl_widths], [classifier.hparams.conv_layer_type], [classifier.hparams.pooling], [classifier.hparams.lr], [classifier.hparams.batch_size], epoch_list, classifier.training_accuracy_list, classifier.validation_accuracy_list, classifier.testing_accuracy_list, classifier.training_loss_list, classifier.validation_loss_list, classifier.testing_loss_list]
2257
- d2 = Helper.Iterate(d2)
2258
- d2 = Helper.Transpose(d2)
2279
+ @staticmethod
2280
+ def ModelTest(model):
2281
+ """
2282
+ Tests the neural network model.
2283
+
2284
+ Parameters
2285
+ ----------
2286
+ model : Model
2287
+ The input model.
2288
+
2289
+ Returns
2290
+ -------
2291
+ Model
2292
+ The tested model
2293
+
2294
+ """
2295
+ if not model:
2296
+ return None
2297
+ model.test()
2298
+ return model
2259
2299
 
2260
- data = {'TimeStamp': "UTC-"+str(utcnow.year)+"-"+str(utcnow.month)+"-"+str(utcnow.day)+"-"+str(utcnow.hour)+"-"+str(utcnow.minute)+"-"+str(utcnow.second),
2261
- 'Duration': [duration],
2262
- 'Optimizer': [classifier.hparams.optimizer_str],
2263
- 'CV Type': [classifier.hparams.cv_type],
2264
- 'Split': [classifier.hparams.split],
2265
- 'K-Folds': [classifier.hparams.k_folds],
2266
- 'HL Widths': [classifier.hparams.hl_widths],
2267
- 'Conv Layer Type': [classifier.hparams.conv_layer_type],
2268
- 'Pooling': [classifier.hparams.pooling],
2269
- 'Learning Rate': [classifier.hparams.lr],
2270
- 'Batch Size': [classifier.hparams.batch_size],
2271
- 'Epochs': [classifier.hparams.epochs],
2272
- 'Training Accuracy': [classifier.training_accuracy_list],
2273
- 'Validation Accuracy': [classifier.validation_accuracy_list],
2274
- 'Testing Accuracy': [classifier.testing_accuracy_list],
2275
- 'Training Loss': [classifier.training_loss_list],
2276
- 'Validation Loss': [classifier.validation_loss_list],
2277
- 'Testing Loss': [classifier.testing_loss_list]
2278
- }
2300
+ @staticmethod
2301
+ def ModelSave(model, path=None):
2302
+ """
2303
+ Saves the model.
2279
2304
 
2280
- df = pd.DataFrame(d2, columns= ['TimeStamp', 'Duration', 'Optimizer', 'CV Type', 'Split', 'K-Folds', 'HL Widths', 'Conv Layer Type', 'Pooling', 'Learning Rate', 'Batch Size', 'Epochs', 'Training Accuracy', 'Validation Accuracy', 'Testing Accuracy', 'Training Loss', 'Validation Loss', 'Testing Loss'])
2281
- if classifier.hparams.results_path:
2282
- if overwrite:
2283
- df.to_csv(classifier.hparams.results_path, mode='w+', index = False, header=True)
2284
- else:
2285
- df.to_csv(classifier.hparams.results_path, mode='a', index = False, header=False)
2305
+ Parameters
2306
+ ----------
2307
+ model : Model
2308
+ The input model.
2309
+
2310
+ Returns
2311
+ -------
2312
+ bool
2313
+ True if the model is saved correctly. False otherwise.
2314
+
2315
+ """
2316
+ if not model:
2317
+ return None
2318
+ if path:
2319
+ # Make sure the file extension is .pt
2320
+ ext = path[len(path)-3:len(path)]
2321
+ if ext.lower() != ".pt":
2322
+ path = path+".pt"
2323
+ return model.save(path)
2324
+
2325
+ @staticmethod
2326
+ def ModelData(model):
2327
+ """
2328
+ Returns the data of the model
2329
+
2330
+ Parameters
2331
+ ----------
2332
+ model : Model
2333
+ The input model.
2334
+
2335
+ Returns
2336
+ -------
2337
+ dict
2338
+ A dictionary containing the model data.
2339
+
2340
+ """
2341
+ from topologicpy.Helper import Helper
2342
+
2343
+ data = {'Model Type': [model.hparams.model_type],
2344
+ 'Optimizer': [model.hparams.optimizer_str],
2345
+ 'CV Type': [model.hparams.cv_type],
2346
+ 'Split': model.hparams.split,
2347
+ 'K-Folds': [model.hparams.k_folds],
2348
+ 'HL Widths': model.hparams.hl_widths,
2349
+ 'Conv Layer Type': [model.hparams.conv_layer_type],
2350
+ 'Pooling': [model.hparams.pooling],
2351
+ 'Learning Rate': [model.hparams.lr],
2352
+ 'Batch Size': [model.hparams.batch_size],
2353
+ 'Epochs': [model.hparams.epochs]
2354
+ }
2355
+
2356
+ if model.hparams.model_type.lower() == "classifier":
2357
+ testing_accuracy_list = [model.testing_accuracy] * model.hparams.epochs
2358
+ testing_loss_list = [model.testing_loss] * model.hparams.epochs
2359
+ metrics_data = {
2360
+ 'Training Accuracy': [model.training_accuracy_list],
2361
+ 'Validation Accuracy': [model.validation_accuracy_list],
2362
+ 'Testing Accuracy' : [testing_accuracy_list],
2363
+ 'Training Loss': [model.training_loss_list],
2364
+ 'Validation Loss': [model.validation_loss_list],
2365
+ 'Testing Loss' : [testing_loss_list]
2366
+ }
2367
+ if model.hparams.cv_type.lower() == "k-fold":
2368
+ accuracy_data = {
2369
+ 'Accuracies' : [model.accuracies],
2370
+ 'Max Accuracy' : [model.max_accuracy]
2371
+ }
2372
+ metrics_data.update(accuracy_data)
2373
+ data.update(metrics_data)
2374
+
2375
+ elif model.hparams.model_type.lower() == "regressor":
2376
+ testing_loss_list = [model.testing_loss] * model.hparams.epochs
2377
+ metrics_data = {
2378
+ 'Training Loss': [model.training_loss_list],
2379
+ 'Validation Loss': [model.validation_loss_list],
2380
+ 'Testing Loss' : [testing_loss_list]
2381
+ }
2382
+ if model.hparams.cv_type.lower() == "k-fold":
2383
+ loss_data = {
2384
+ 'Losses' : [model.losses],
2385
+ 'Min Loss' : [model.min_loss]
2386
+ }
2387
+ metrics_data.update(loss_data)
2388
+ data.update(metrics_data)
2389
+
2286
2390
  return data
2287
2391
 
2392
+ @staticmethod
2393
+ def GraphsByFilePath(path, labelKey="value", key='node_attr'):
2394
+ graphs, label_dict = load_graphs(path)
2395
+ labels = label_dict[labelKey].tolist()
2396
+ return {"graphs" : graphs, "labels": labels}
2397
+
2398
+ @staticmethod
2399
+ def DataExportToCSV(data, path, overwrite=True):
2400
+ """
2401
+ Exports the input data to a CSV file
2288
2402
 
2403
+ Parameters
2404
+ ----------
2405
+ data : dict
2406
+ The input data. See Data(model)
2407
+ overwrite : bool , optional
2408
+ If set to True, previous saved results files are overwritten. Otherwise, the new results are appended to the previously saved files. The default is True.
2409
+
2410
+ Returns
2411
+ -------
2412
+ bool
2413
+ True if the data is saved correctly to a CSV file. False otherwise.
2289
2414
 
2415
+ """
2416
+ from topologicpy.Helper import Helper
2417
+
2418
+ # Make sure the file extension is .csv
2419
+ ext = path[len(path)-4:len(path)]
2420
+ if ext.lower() != ".csv":
2421
+ path = path+".csv"
2422
+
2423
+ epoch_list = list(range(1, data['Epochs'][0]+1))
2424
+
2425
+ d = [data['Model Type'], data['Optimizer'], data['CV Type'], [data['Split']], data['K-Folds'], data['HL Widths'], data['Conv Layer Type'], data['Pooling'], data['Learning Rate'], data['Batch Size'], epoch_list]
2426
+ columns = ['Model Type', 'Optimizer', 'CV Type', 'Split', 'K-Folds', 'HL Widths', 'Conv Layer Type', 'Pooling', 'Learning Rate', 'Batch Size', 'Epochs']
2427
+
2428
+ if data['Model Type'][0].lower() == "classifier":
2429
+ d.extend([data['Training Accuracy'][0], data['Validation Accuracy'][0], data['Testing Accuracy'][0], data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
2430
+ columns.extend(['Training Accuracy', 'Validation Accuracy', 'Testing Accuracy', 'Training Loss', 'Validation Loss', 'Testing Loss'])
2431
+ if data['CV Type'][0].lower() == "k-fold":
2432
+ d.extend([data['Accuracies'], data['Max Accuracy']])
2433
+ columns.extend(['Accuracies', 'Max Accuracy'])
2434
+
2435
+ elif data['Model Type'][0].lower() == "regressor":
2436
+ d.extend([data['Training Loss'][0], data['Validation Loss'][0], data['Testing Loss'][0]])
2437
+ columns.extend(['Training Loss', 'Validation Loss', 'Testing Loss'])
2438
+ if data['CV Type'][0].lower() == "k-fold":
2439
+ d.extend([data['Losses'], data['Min Loss']])
2440
+ columns.extend(['Losses', 'Min Loss'])
2441
+
2442
+ d = Helper.Iterate(d)
2443
+ d = Helper.Transpose(d)
2444
+ df = pd.DataFrame(d, columns=columns)
2445
+
2446
+ status = False
2447
+ if path:
2448
+ if overwrite:
2449
+ mode = 'w+'
2450
+ else:
2451
+ mode = 'a'
2452
+ try:
2453
+ df.to_csv(path, mode=mode, index = False, header=True)
2454
+ status = True
2455
+ except:
2456
+ status = False
2457
+ return status
2458
+
2459
+ '''
2290
2460
  @staticmethod
2291
2461
  def TrainRegressor(hparams, trainingDataset, validationDataset=None, testingDataset=None, overwrite=True):
2292
2462
  """
@@ -2352,10 +2522,7 @@ class DGL:
2352
2522
  else:
2353
2523
  df.to_csv(regressor.hparams.results_path, mode='a', index = False, header=False)
2354
2524
  return data
2355
-
2356
-
2357
-
2358
-
2525
+ '''
2359
2526
 
2360
2527
  @staticmethod
2361
2528
  def _TrainClassifier_NC(graphs, model, hparams):
@@ -2456,7 +2623,7 @@ class DGL:
2456
2623
 
2457
2624
  # hparams, dataset, numLabels, sample = item
2458
2625
  # We will consider only the first graph in the dataset.
2459
- graphs = DGL.Graphs(dataset)
2626
+ graphs = DGL.DatasetGraphs(dataset)
2460
2627
  # Sample a random list from the graphs
2461
2628
  if sample < len(graphs) and sample > 0:
2462
2629
  graphs = random.sample(graphs, sample)