topologicpy 0.7.17__py3-none-any.whl → 0.7.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/ANN.py +1265 -0
- topologicpy/Cell.py +3 -2
- topologicpy/CellComplex.py +1 -1
- topologicpy/Face.py +83 -86
- topologicpy/Graph.py +82 -0
- topologicpy/Plotly.py +2 -2
- topologicpy/Topology.py +11 -33
- topologicpy/Wire.py +98 -3
- topologicpy/version.py +1 -1
- {topologicpy-0.7.17.dist-info → topologicpy-0.7.19.dist-info}/METADATA +37 -1
- {topologicpy-0.7.17.dist-info → topologicpy-0.7.19.dist-info}/RECORD +14 -13
- {topologicpy-0.7.17.dist-info → topologicpy-0.7.19.dist-info}/WHEEL +1 -1
- {topologicpy-0.7.17.dist-info → topologicpy-0.7.19.dist-info}/LICENSE +0 -0
- {topologicpy-0.7.17.dist-info → topologicpy-0.7.19.dist-info}/top_level.txt +0 -0
topologicpy/ANN.py
ADDED
@@ -0,0 +1,1265 @@
|
|
1
|
+
# Copyright (C) 2024
|
2
|
+
# Wassim Jabi <wassim.jabi@gmail.com>
|
3
|
+
#
|
4
|
+
# This program is free software: you can redistribute it and/or modify it under
|
5
|
+
# the terms of the GNU Affero General Public License as published by the Free Software
|
6
|
+
# Foundation, either version 3 of the License, or (at your option) any later
|
7
|
+
# version.
|
8
|
+
#
|
9
|
+
# This program is distributed in the hope that it will be useful, but WITHOUT
|
10
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
11
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
12
|
+
# details.
|
13
|
+
#
|
14
|
+
# You should have received a copy of the GNU Affero General Public License along with
|
15
|
+
# this program. If not, see <https://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
import os
|
18
|
+
import random
|
19
|
+
import copy
|
20
|
+
import warnings
|
21
|
+
|
22
|
+
try:
|
23
|
+
import numpy as np
|
24
|
+
except:
|
25
|
+
print("ANN - Installing required numpy library.")
|
26
|
+
try:
|
27
|
+
os.system("pip install numpy")
|
28
|
+
except:
|
29
|
+
os.system("pip install numpy --user")
|
30
|
+
try:
|
31
|
+
import numpy as np
|
32
|
+
print("ANN - numpy library installed correctly.")
|
33
|
+
except:
|
34
|
+
warnings.warn("ANN - Error: Could not import numpy.")
|
35
|
+
|
36
|
+
try:
|
37
|
+
import pandas as pd
|
38
|
+
except:
|
39
|
+
print("DGL - Installing required pandas library.")
|
40
|
+
try:
|
41
|
+
os.system("pip install pandas")
|
42
|
+
except:
|
43
|
+
os.system("pip install pandas --user")
|
44
|
+
try:
|
45
|
+
import pandas as pd
|
46
|
+
print("ANN - pandas library installed correctly.")
|
47
|
+
except:
|
48
|
+
warnings.warn("ANN - Error: Could not import pandas.")
|
49
|
+
|
50
|
+
try:
|
51
|
+
import torch
|
52
|
+
import torch.optim as optim
|
53
|
+
import torch.nn as nn
|
54
|
+
import torch.nn.functional as F
|
55
|
+
from torch.utils.data import DataLoader, TensorDataset
|
56
|
+
except:
|
57
|
+
print("ANN - Installing required torch library.")
|
58
|
+
try:
|
59
|
+
os.system("pip install torch")
|
60
|
+
except:
|
61
|
+
os.system("pip install torch --user")
|
62
|
+
try:
|
63
|
+
import torch
|
64
|
+
import torch.optim as optim
|
65
|
+
import torch.nn as nn
|
66
|
+
import torch.nn.functional as F
|
67
|
+
from torch.utils.data import DataLoader, TensorDataset
|
68
|
+
print("ANN - torch library installed correctly.")
|
69
|
+
except:
|
70
|
+
warnings.warn("ANN - Error: Could not import torch.")
|
71
|
+
|
72
|
+
try:
|
73
|
+
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
|
74
|
+
from sklearn.model_selection import KFold, train_test_split
|
75
|
+
from sklearn.preprocessing import StandardScaler
|
76
|
+
from sklearn.datasets import load_breast_cancer, load_iris, load_wine, load_digits, fetch_california_housing
|
77
|
+
except:
|
78
|
+
print("ANN - Installing required scikit-learn library.")
|
79
|
+
try:
|
80
|
+
os.system("pip install -U scikit-learn")
|
81
|
+
except:
|
82
|
+
os.system("pip install -U scikit-learn --user")
|
83
|
+
try:
|
84
|
+
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, mean_squared_error, mean_absolute_error, r2_score
|
85
|
+
from sklearn.model_selection import KFold, train_test_split
|
86
|
+
from sklearn.preprocessing import StandardScaler
|
87
|
+
from sklearn.datasets import load_breast_cancer, load_iris, load_wine, load_digits, fetch_california_housing
|
88
|
+
print("ANN - scikit-learn library installed correctly.")
|
89
|
+
except:
|
90
|
+
warnings.warn("ANN - Error: Could not import scikit. Please install it manually.")
|
91
|
+
|
92
|
+
class _ANNModel(nn.Module):
|
93
|
+
def __init__(self,
|
94
|
+
inputSize=1,
|
95
|
+
outputSize=1,
|
96
|
+
taskType='classification',
|
97
|
+
validationRatio=0.2,
|
98
|
+
hiddenLayers=[12,12,12],
|
99
|
+
learningRate=0.001,
|
100
|
+
epochs=10,
|
101
|
+
activation="relu",
|
102
|
+
batchSize=1,
|
103
|
+
patience=4,
|
104
|
+
earlyStopping = True,
|
105
|
+
randomState = 42,
|
106
|
+
holdout=True,
|
107
|
+
kFolds=3,
|
108
|
+
):
|
109
|
+
|
110
|
+
super(_ANNModel, self).__init__()
|
111
|
+
|
112
|
+
# Initialize parameters
|
113
|
+
self.hidden_layers = hiddenLayers
|
114
|
+
self.output_size = outputSize
|
115
|
+
self.activation = activation
|
116
|
+
self.learning_rate = learningRate
|
117
|
+
self.epochs = epochs
|
118
|
+
self.validation_ratio = validationRatio
|
119
|
+
self.holdout = holdout
|
120
|
+
self.k_folds = kFolds
|
121
|
+
self.batch_size = batchSize
|
122
|
+
self.patience = patience
|
123
|
+
self.early_stopping = earlyStopping
|
124
|
+
self.random_state = randomState
|
125
|
+
self.task_type = taskType
|
126
|
+
|
127
|
+
self.training_loss_list = []
|
128
|
+
self.validation_loss_list = []
|
129
|
+
self.training_accuracy_list = []
|
130
|
+
self.validation_accuracy_list = []
|
131
|
+
self.training_mae_list = []
|
132
|
+
self.validation_mae_list = []
|
133
|
+
self.labels = []
|
134
|
+
self.predictions = []
|
135
|
+
|
136
|
+
|
137
|
+
# Define layers
|
138
|
+
layers = []
|
139
|
+
previous_size = inputSize
|
140
|
+
|
141
|
+
# Create hidden layers
|
142
|
+
for h in self.hidden_layers:
|
143
|
+
layers.append(nn.Linear(previous_size, h))
|
144
|
+
if activation == 'relu':
|
145
|
+
layers.append(nn.ReLU())
|
146
|
+
elif activation == 'tanh':
|
147
|
+
layers.append(nn.Tanh())
|
148
|
+
elif activation == 'sigmoid':
|
149
|
+
layers.append(nn.Sigmoid())
|
150
|
+
else:
|
151
|
+
raise ValueError(f"Unsupported activation function: {self.activation}")
|
152
|
+
previous_size = h
|
153
|
+
|
154
|
+
# Output layer
|
155
|
+
layers.append(nn.Linear(previous_size, self.output_size))
|
156
|
+
|
157
|
+
if self.task_type == 'classification':
|
158
|
+
if self.output_size == 1:
|
159
|
+
layers.append(nn.Sigmoid()) # Use Sigmoid for binary classification
|
160
|
+
else:
|
161
|
+
layers.append(nn.LogSoftmax(dim=1)) # Use LogSoftmax for multi-category classification
|
162
|
+
elif self.task_type != 'regression':
|
163
|
+
raise ValueError(f"Unsupported task type: {self.task_type}")
|
164
|
+
|
165
|
+
self.model = nn.Sequential(*layers)
|
166
|
+
|
167
|
+
# Define the optimizer
|
168
|
+
self.optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
|
169
|
+
|
170
|
+
# Define the loss function
|
171
|
+
if self.task_type == 'classification':
|
172
|
+
if self.output_size == 1:
|
173
|
+
self.criterion = nn.BCELoss() # Binary Cross Entropy Loss for binary classification
|
174
|
+
else:
|
175
|
+
self.criterion = nn.NLLLoss() # Negative Log Likelihood Loss for multi-category classification
|
176
|
+
elif self.task_type == 'regression':
|
177
|
+
self.criterion = nn.MSELoss()
|
178
|
+
|
179
|
+
def forward(self, x):
|
180
|
+
return self.model(x)
|
181
|
+
|
182
|
+
def train(self, X, y):
|
183
|
+
self.training_loss_list = []
|
184
|
+
self.validation_loss_list = []
|
185
|
+
self.training_accuracy_list = []
|
186
|
+
self.validation_accuracy_list = []
|
187
|
+
self.training_mae_list = []
|
188
|
+
self.validation_mae_list = []
|
189
|
+
if self.holdout == True or self.k_folds == 1:
|
190
|
+
self._train_holdout(X, y)
|
191
|
+
else:
|
192
|
+
self._train_kfold(X, y)
|
193
|
+
|
194
|
+
def _train_holdout(self, X, y):
|
195
|
+
# Split data into training and validation sets
|
196
|
+
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_ratio, random_state=self.random_state)
|
197
|
+
|
198
|
+
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
|
199
|
+
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
|
200
|
+
|
201
|
+
train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
|
202
|
+
val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
|
203
|
+
|
204
|
+
self._train_epochs(train_loader, val_loader)
|
205
|
+
|
206
|
+
def _train_kfold(self, X, y):
|
207
|
+
kf = KFold(n_splits=self.k_folds, shuffle=True)
|
208
|
+
fold = 0
|
209
|
+
total_loss = 0.0
|
210
|
+
for train_idx, val_idx in kf.split(X):
|
211
|
+
fold += 1
|
212
|
+
print(f"Fold {fold}/{self.k_folds}")
|
213
|
+
|
214
|
+
X_train, X_val = X[train_idx], X[val_idx]
|
215
|
+
y_train, y_val = y[train_idx], y[val_idx]
|
216
|
+
|
217
|
+
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
|
218
|
+
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
|
219
|
+
|
220
|
+
train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
|
221
|
+
val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
|
222
|
+
|
223
|
+
self._train_epochs(train_loader, val_loader)
|
224
|
+
|
225
|
+
def _train_epochs(self, train_loader, val_loader):
|
226
|
+
best_val_loss = float('inf')
|
227
|
+
epochs_no_improve = 0
|
228
|
+
best_model_state = None
|
229
|
+
|
230
|
+
for epoch in range(self.epochs):
|
231
|
+
self.model.train()
|
232
|
+
running_loss = 0.0
|
233
|
+
correct_train = 0
|
234
|
+
total_train = 0
|
235
|
+
|
236
|
+
for inputs, labels in train_loader:
|
237
|
+
self.optimizer.zero_grad()
|
238
|
+
outputs = self(inputs)
|
239
|
+
|
240
|
+
# Ensure labels have the same shape as outputs
|
241
|
+
labels = labels.view(-1, 1) if outputs.shape[-1] == 1 else labels
|
242
|
+
|
243
|
+
loss = self.criterion(outputs, labels)
|
244
|
+
loss.backward()
|
245
|
+
self.optimizer.step()
|
246
|
+
running_loss += loss.item()
|
247
|
+
|
248
|
+
# Calculate training accuracy or MAE for regression
|
249
|
+
if self.task_type == 'classification':
|
250
|
+
if outputs.shape[-1] > 1:
|
251
|
+
_, predicted = torch.max(outputs, 1)
|
252
|
+
else:
|
253
|
+
predicted = (outputs > 0.5).float()
|
254
|
+
total_train += labels.size(0)
|
255
|
+
correct_train += (predicted == labels).sum().item()
|
256
|
+
elif self.task_type == 'regression':
|
257
|
+
correct_train += torch.abs(outputs - labels).sum().item()
|
258
|
+
total_train += labels.size(0)
|
259
|
+
|
260
|
+
train_loss = running_loss / len(train_loader)
|
261
|
+
if self.task_type == 'classification':
|
262
|
+
train_accuracy = 100 * correct_train / total_train
|
263
|
+
elif self.task_type == 'regression':
|
264
|
+
train_accuracy = correct_train / total_train
|
265
|
+
|
266
|
+
# Calculate validation loss and accuracy/MAE
|
267
|
+
val_loss, val_accuracy = self.evaluate_loss(val_loader)
|
268
|
+
self.training_loss_list.append(train_loss)
|
269
|
+
self.validation_loss_list.append(val_loss)
|
270
|
+
if self.task_type == 'classification':
|
271
|
+
# print(f"Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, "
|
272
|
+
# f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")
|
273
|
+
self.training_accuracy_list.append(train_accuracy)
|
274
|
+
self.validation_accuracy_list.append(val_accuracy)
|
275
|
+
elif self.task_type == 'regression':
|
276
|
+
# print(f"Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training MAE: {train_accuracy:.4f}, "
|
277
|
+
# f"Validation Loss: {val_loss:.4f}, Validation MAE: {val_accuracy:.4f}")
|
278
|
+
self.training_mae_list.append(train_accuracy)
|
279
|
+
self.validation_mae_list.append(val_accuracy)
|
280
|
+
|
281
|
+
# Early stopping
|
282
|
+
if self.early_stopping:
|
283
|
+
if val_loss < best_val_loss:
|
284
|
+
best_val_loss = val_loss
|
285
|
+
epochs_no_improve = 0
|
286
|
+
best_model_state = self.state_dict()
|
287
|
+
else:
|
288
|
+
epochs_no_improve += 1
|
289
|
+
if epochs_no_improve >= self.patience:
|
290
|
+
# print(f'Early stopping! Best validation loss: {best_val_loss}')
|
291
|
+
break
|
292
|
+
# Update the epochs parameter to reflect the actual epochs ran.
|
293
|
+
self.epochs = epoch + 1
|
294
|
+
|
295
|
+
# Load the best model state
|
296
|
+
if best_model_state:
|
297
|
+
self.load_state_dict(best_model_state)
|
298
|
+
|
299
|
+
def evaluate_loss(self, data_loader):
|
300
|
+
self.model.eval()
|
301
|
+
total_loss = 0.0
|
302
|
+
correct_val = 0
|
303
|
+
total_val = 0
|
304
|
+
|
305
|
+
with torch.no_grad():
|
306
|
+
for inputs, labels in data_loader:
|
307
|
+
outputs = self(inputs)
|
308
|
+
labels = labels.view(-1, 1) if outputs.shape[-1] == 1 else labels
|
309
|
+
|
310
|
+
loss = self.criterion(outputs, labels)
|
311
|
+
total_loss += loss.item()
|
312
|
+
|
313
|
+
# Calculate validation accuracy or MAE for regression
|
314
|
+
if self.task_type == 'classification':
|
315
|
+
if outputs.shape[-1] > 1:
|
316
|
+
_, predicted = torch.max(outputs, 1)
|
317
|
+
else:
|
318
|
+
predicted = (outputs > 0.5).float()
|
319
|
+
total_val += labels.size(0)
|
320
|
+
correct_val += (predicted == labels).sum().item()
|
321
|
+
elif self.task_type == 'regression':
|
322
|
+
correct_val += torch.abs(outputs - labels).sum().item()
|
323
|
+
total_val += labels.size(0)
|
324
|
+
|
325
|
+
avg_loss = total_loss / len(data_loader)
|
326
|
+
if self.task_type == 'classification':
|
327
|
+
accuracy = 100 * correct_val / total_val
|
328
|
+
elif self.task_type == 'regression':
|
329
|
+
accuracy = correct_val / total_val
|
330
|
+
|
331
|
+
return avg_loss, accuracy
|
332
|
+
|
333
|
+
|
334
|
+
def evaluate(self, X_test, y_test):
|
335
|
+
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
|
336
|
+
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)
|
337
|
+
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
|
338
|
+
test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
|
339
|
+
self.model.eval()
|
340
|
+
all_preds = []
|
341
|
+
all_labels = []
|
342
|
+
with torch.no_grad():
|
343
|
+
for inputs, labels in test_loader:
|
344
|
+
outputs = self(inputs).view(-1) if self.task_type == 'classification' and self.model[-1].__class__.__name__ == 'Sigmoid' else self(inputs)
|
345
|
+
if self.task_type == 'classification':
|
346
|
+
if self.model[-1].__class__.__name__ == 'Sigmoid':
|
347
|
+
# Convert probabilities to binary predictions (0 or 1)
|
348
|
+
preds = [1 if x >= 0.5 else 0 for x in outputs.cpu().numpy()]
|
349
|
+
else:
|
350
|
+
# Get predicted class indices
|
351
|
+
_, preds = torch.max(outputs.data, 1)
|
352
|
+
preds = preds.cpu().numpy()
|
353
|
+
elif self.task_type == 'regression':
|
354
|
+
preds = outputs.cpu().numpy()
|
355
|
+
|
356
|
+
all_labels.extend(labels.cpu().numpy())
|
357
|
+
all_preds.extend(preds)
|
358
|
+
self.labels = all_labels
|
359
|
+
self.predictions = all_preds
|
360
|
+
return all_labels, all_preds
|
361
|
+
|
362
|
+
def metrics(self, labels, predictions):
|
363
|
+
from sklearn import metrics
|
364
|
+
if self.task_type == 'regression':
|
365
|
+
metrics = {
|
366
|
+
"Mean Squared Error": mean_squared_error(labels, predictions),
|
367
|
+
"Mean Absolute Error": mean_absolute_error(labels, predictions),
|
368
|
+
"R-squared": r2_score(labels, predictions)
|
369
|
+
}
|
370
|
+
elif self.task_type == 'classification':
|
371
|
+
metrics = {
|
372
|
+
"Accuracy": accuracy_score(labels, predictions),
|
373
|
+
"Precision": precision_score(labels, predictions, average='weighted'),
|
374
|
+
"Recall": recall_score(labels, predictions, average='weighted'),
|
375
|
+
"F1 Score": f1_score(labels, predictions, average='weighted'),
|
376
|
+
"Confusion Matrix": metrics.confusion_matrix(labels, predictions)
|
377
|
+
}
|
378
|
+
else:
|
379
|
+
metrics = None
|
380
|
+
return metrics
|
381
|
+
|
382
|
+
def save(self, path):
|
383
|
+
if path:
|
384
|
+
# Make sure the file extension is .pt
|
385
|
+
ext = path[len(path)-3:len(path)]
|
386
|
+
if ext.lower() != ".pt":
|
387
|
+
path = path+".pt"
|
388
|
+
torch.save(self.state_dict(), path)
|
389
|
+
|
390
|
+
def load(self, path):
|
391
|
+
if path:
|
392
|
+
self.load_state_dict(torch.load(path))
|
393
|
+
|
394
|
+
|
395
|
+
class ANN():
|
396
|
+
@staticmethod
|
397
|
+
def DatasetByCSVPath(path, taskType='classification', trainRatio=0.6, randomState=42):
|
398
|
+
"""
|
399
|
+
Returns a dataset according to the input CSV file path.
|
400
|
+
|
401
|
+
Parameters
|
402
|
+
----------
|
403
|
+
path : str
|
404
|
+
The path to the folder containing the necessary CSV and YML files.
|
405
|
+
taskType : str , optional
|
406
|
+
The type of evaluation task. This can be 'classification' or 'regression'. The default is 'classification'.
|
407
|
+
trainRatio : float , optional
|
408
|
+
The ratio of the data to use for training and validation vs. the ratio to use for testing. The default is 0.6
|
409
|
+
which means that 60% of the data will be used for training and validation while 40% of the data will be reserved for testing.
|
410
|
+
randomState : int , optional
|
411
|
+
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
412
|
+
it controls the shuffling of the data before splitting it into training and testing sets.
|
413
|
+
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
414
|
+
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
415
|
+
Returns
|
416
|
+
-------
|
417
|
+
list
|
418
|
+
Returns the following list:
|
419
|
+
X_train, X_test, y_train, y_test, taskType
|
420
|
+
X_train is the list of features used for training
|
421
|
+
X_test is the list of features used for testing
|
422
|
+
y_train is the list of targets used for training
|
423
|
+
y_test is the list of targets used for testing
|
424
|
+
taskType is the type of task ('classification' or 'regression'). This is included for compatibility with DatasetBySample()
|
425
|
+
|
426
|
+
"""
|
427
|
+
import pandas as pd
|
428
|
+
import numpy as np
|
429
|
+
from sklearn.preprocessing import StandardScaler
|
430
|
+
from sklearn.model_selection import train_test_split
|
431
|
+
# Load the CSV file into a pandas DataFrame
|
432
|
+
df = pd.read_csv(path)
|
433
|
+
|
434
|
+
# Assume the last column is the target
|
435
|
+
features = df.iloc[:, :-1].values
|
436
|
+
target = df.iloc[:, -1].values
|
437
|
+
|
438
|
+
scaler = StandardScaler()
|
439
|
+
X = scaler.fit_transform(features)
|
440
|
+
y = target
|
441
|
+
|
442
|
+
# Ensure target is in the correct format
|
443
|
+
if taskType == 'classification' and len(np.unique(y)) == 2:
|
444
|
+
y = y.reshape(-1, 1) # Reshape for binary classification
|
445
|
+
elif taskType == 'classification':
|
446
|
+
y = y.astype(np.int64) # Convert to long for multi-class classification
|
447
|
+
|
448
|
+
y = y.astype(np.float32) # Convert to float32 for PyTorch
|
449
|
+
|
450
|
+
input_size = X.shape[1] # Number of features
|
451
|
+
num_classes = len(np.unique(y))
|
452
|
+
output_size = 1 if taskType == 'regression' or num_classes == 2 else num_classes
|
453
|
+
|
454
|
+
# Split data into train and test sets
|
455
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1.0 - trainRatio), random_state=randomState)
|
456
|
+
|
457
|
+
return {'XTrain': X_train,
|
458
|
+
'XTest': X_test,
|
459
|
+
'yTrain': y_train,
|
460
|
+
'yTest': y_test,
|
461
|
+
'inputSize': input_size,
|
462
|
+
'outputSize': output_size}
|
463
|
+
|
464
|
+
@staticmethod
|
465
|
+
def DatasetBySampleName(name, trainRatio=0.6, randomState=42):
|
466
|
+
"""
|
467
|
+
Returns a dataset from the scikit-learn dataset samples.
|
468
|
+
|
469
|
+
Parameters
|
470
|
+
----------
|
471
|
+
name : str
|
472
|
+
The name of the dataset. This can be one of ['breast_cancer', 'california_housing', 'digits', 'iris', 'wine']
|
473
|
+
|
474
|
+
trainRatio : float , optional
|
475
|
+
The ratio of the data to use for training and validation vs. the ratio to use for testing. The default is 0.6
|
476
|
+
which means that 60% of the data will be used for training and validation while 40% of the data will be reserved for testing.
|
477
|
+
randomState : int , optional
|
478
|
+
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
479
|
+
it controls the shuffling of the data before splitting it into training and testing sets.
|
480
|
+
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
481
|
+
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
482
|
+
Returns
|
483
|
+
-------
|
484
|
+
list
|
485
|
+
Returns the following list:
|
486
|
+
X_train, X_test, y_train, y_test
|
487
|
+
X_train is the list of features used for training
|
488
|
+
X_test is the list of features used for testing
|
489
|
+
y_train is the list of targets used for training
|
490
|
+
y_test is the list of targets used for testing
|
491
|
+
|
492
|
+
"""
|
493
|
+
from sklearn.datasets import load_breast_cancer, load_iris, load_wine, load_digits, fetch_california_housing
|
494
|
+
from sklearn.model_selection import train_test_split
|
495
|
+
|
496
|
+
if name == 'breast_cancer':
|
497
|
+
dataset = load_breast_cancer()
|
498
|
+
elif name == 'california_housing':
|
499
|
+
dataset = fetch_california_housing()
|
500
|
+
elif name == 'digits':
|
501
|
+
dataset = load_digits()
|
502
|
+
elif name == 'iris':
|
503
|
+
dataset = load_iris()
|
504
|
+
elif name == 'wine':
|
505
|
+
dataset = load_wine()
|
506
|
+
else:
|
507
|
+
print(f"ANN.DatasetBySampleName - Error: Unsupported dataset: {name}. Returning None.")
|
508
|
+
return None
|
509
|
+
|
510
|
+
# Standardize the features
|
511
|
+
scaler = StandardScaler()
|
512
|
+
X = scaler.fit_transform(dataset.data)
|
513
|
+
y = dataset.target
|
514
|
+
|
515
|
+
task_type = ANN.HyperparametersBySampleDatasetName(name)['taskType']
|
516
|
+
# For binary classification, ensure the target is in the correct format (1D tensor)
|
517
|
+
if task_type == 'classification' and len(np.unique(y)) == 2:
|
518
|
+
y = y.astype(np.float32)
|
519
|
+
elif task_type == 'classification':
|
520
|
+
y = y.astype(np.int64)
|
521
|
+
|
522
|
+
input_size = X.shape[1] # Number of features
|
523
|
+
num_classes = len(np.unique(y))
|
524
|
+
output_size = 1 if task_type == 'regression' or num_classes == 2 else num_classes
|
525
|
+
|
526
|
+
# First split: train and temp (remaining)
|
527
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1.0 - trainRatio), random_state=randomState)
|
528
|
+
|
529
|
+
return {'XTrain': X_train,
|
530
|
+
'XTest': X_test,
|
531
|
+
'yTrain': y_train,
|
532
|
+
'yTest': y_test,
|
533
|
+
'inputSize': input_size,
|
534
|
+
'outputSize': output_size}
|
535
|
+
|
536
|
+
@staticmethod
|
537
|
+
def DatasetSamplesNames():
|
538
|
+
"""
|
539
|
+
Returns the names of the available sample datasets from sci-kit learn.
|
540
|
+
|
541
|
+
Parameters
|
542
|
+
----------
|
543
|
+
|
544
|
+
Returns
|
545
|
+
----------
|
546
|
+
list
|
547
|
+
The list of names of available sample datasets
|
548
|
+
"""
|
549
|
+
return ['breast_cancer', 'california_housing', 'digits', 'iris', 'wine']
|
550
|
+
|
551
|
+
@staticmethod
|
552
|
+
def DatasetSplit(X, y, trainRatio=0.6, randomState=42):
|
553
|
+
"""
|
554
|
+
Splits the input dataset according to the input ratios.
|
555
|
+
|
556
|
+
Parameters
|
557
|
+
----------
|
558
|
+
X : list
|
559
|
+
The list of features.
|
560
|
+
y : list
|
561
|
+
The list of targets.
|
562
|
+
trainRatio : float , optional
|
563
|
+
The ratio of the data to use for training. The default is 0.6.
|
564
|
+
This means that 60% of the data will be used for training and validation while 40% of the data will be reserved for testing.
|
565
|
+
randomState : int , optional
|
566
|
+
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
567
|
+
it controls the shuffling of the data before splitting it into training and testing sets.
|
568
|
+
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
569
|
+
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
570
|
+
|
571
|
+
Returns
|
572
|
+
-------
|
573
|
+
list
|
574
|
+
Returns the following list:
|
575
|
+
X_train, X_test, y_train,y_test
|
576
|
+
X_train is the list of features used for training
|
577
|
+
X_test is the list of features used for testing
|
578
|
+
y_train is the list of targets used for training
|
579
|
+
y_test is the list of targets used for testing
|
580
|
+
|
581
|
+
"""
|
582
|
+
# First split: train and temp (remaining)
|
583
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1.0 - trainRatio), random_state=randomState)
|
584
|
+
|
585
|
+
return X_train, X_test, y_train, y_test
|
586
|
+
|
587
|
+
@staticmethod
|
588
|
+
def HyperparametersByInput(taskType='classification',
|
589
|
+
validationRatio=0.2,
|
590
|
+
hiddenLayers= [12,12,12],
|
591
|
+
learningRate = 0.001,
|
592
|
+
epochs = 10,
|
593
|
+
activation = 'relu',
|
594
|
+
batchSize = 1,
|
595
|
+
patience = 5,
|
596
|
+
earlyStopping = True,
|
597
|
+
randomState = 42,
|
598
|
+
holdout = True,
|
599
|
+
kFolds = 3):
|
600
|
+
"""
|
601
|
+
taskType : str , optional
|
602
|
+
The desired task type. This can be either 'classification' or 'regression' (case insensitive).
|
603
|
+
Classification is a type of supervised learning where the model is trained to predict categorical labels (classes) from input data.
|
604
|
+
Regression is a type of supervised learning where the model is trained to predict continuous numerical values from input data.
|
605
|
+
validationRatio : float , optional
|
606
|
+
The split ratio between training and validation. The default is 0.2. This means that
|
607
|
+
80% of the data will be used for training and 20% will be used for validation.
|
608
|
+
hiddenLayers : list , optional
|
609
|
+
The number of hidden layers and the number of nodes in each layer.
|
610
|
+
If you wish to have 3hidden layers with 8 nodes in the first
|
611
|
+
16 nodes in the second, and 4 nodes in the last layer, you specify [8,16,4].
|
612
|
+
The default is [12,12,12]
|
613
|
+
learningRate : float, optional
|
614
|
+
The desired learning rate. The default is 0.001. See https://en.wikipedia.org/wiki/Learning_rate
|
615
|
+
epochs : int , optional
|
616
|
+
The desired number of epochs. The default is 10. See https://en.wikipedia.org/wiki/Neural_network_(machine_learning)
|
617
|
+
activation : str , optional
|
618
|
+
The type of activation layer. See https://en.wikipedia.org/wiki/Activation_function
|
619
|
+
Some common alternatives include:
|
620
|
+
'relu' : ReLU (Rectified Linear Unit) is an activation function that outputs the input directly if it is positive; otherwise, it outputs zero.
|
621
|
+
'sigmoid' : The sigmoid activation function, which maps inputs to a range between 0 and 1.
|
622
|
+
'tanh' : The hyperbolic tangent activation function, which maps inputs to a range between -1 and 1.
|
623
|
+
'leaky_relu': A variant of the ReLU that allows a small, non-zero gradient when the unit is not active.
|
624
|
+
'elu' : Exponential Linear Unit, which improves learning characteristics by having a smooth curve.
|
625
|
+
'swish' : An activation function defined as x . sigmoid(x)
|
626
|
+
'softmax' : Often used in the output layer of a classification network, it normalizes the outputs to a probability distribution.
|
627
|
+
'linear' : A linear activation function, which is often used in the output layer of regression networks.
|
628
|
+
The default is 'relu'.
|
629
|
+
batchSize : int , optional
|
630
|
+
The desired number of samples that will be propagated through the network at one time before the model's internal parameters are updated. Instead of updating the model parameters after every single training sample
|
631
|
+
(stochastic gradient descent) or after the entire training dataset (batch gradient descent), mini-batch gradient descent updates the model parameters after
|
632
|
+
a specified number of samples, which is determined by batchSize. The default is 1.
|
633
|
+
patience : int , optional
|
634
|
+
The desired number of epochs with no improvement in the validation loss after which training will be stopped if early stopping is enabled.
|
635
|
+
earlyStopping : bool , optional
|
636
|
+
If set to True, the training will stop if the validation loss does not improve after a certain number of epochs defined by patience. The default is True.
|
637
|
+
randomState : int , optional
|
638
|
+
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
639
|
+
it controls the shuffling of the data before splitting it into training and testing sets.
|
640
|
+
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
641
|
+
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
642
|
+
holdout : bool , optional
|
643
|
+
If set to True, the Holdout cross-validation method is used. Otherwise, the K-fold method is used. The default is True.
|
644
|
+
kFolds : int , optional
|
645
|
+
The number of splits (folds) to use if K-Fold cross validation is selected. The default is 5.
|
646
|
+
|
647
|
+
Returns
|
648
|
+
-------
|
649
|
+
dict
|
650
|
+
Returns a dictionary with the following keys:
|
651
|
+
'taskType'
|
652
|
+
'validationRatio'
|
653
|
+
'hiddenLayers'
|
654
|
+
'learningRate'
|
655
|
+
'epochs'
|
656
|
+
'activation'
|
657
|
+
'batchSize'
|
658
|
+
'patience'
|
659
|
+
'earlyStopping'
|
660
|
+
'randomState'
|
661
|
+
'holdout'
|
662
|
+
'kFolds'
|
663
|
+
"""
|
664
|
+
return {
|
665
|
+
'taskType': taskType,
|
666
|
+
'validationRatio': validationRatio,
|
667
|
+
'hiddenLayers': hiddenLayers,
|
668
|
+
'learningRate': learningRate,
|
669
|
+
'epochs': epochs,
|
670
|
+
'activation': activation,
|
671
|
+
'batchSize': batchSize,
|
672
|
+
'patience': patience,
|
673
|
+
'earlyStopping': earlyStopping,
|
674
|
+
'randomState': randomState,
|
675
|
+
'holdout': holdout,
|
676
|
+
'kFolds': kFolds }
|
677
|
+
|
678
|
+
@staticmethod
|
679
|
+
def HyperparametersBySampleDatasetName(name):
|
680
|
+
"""
|
681
|
+
Returns the suggested initial hyperparameters to use for the dataset named in the name input parameter.
|
682
|
+
You can get a list of available sample datasets using ANN.SampleDatasets().
|
683
|
+
|
684
|
+
Parameters
|
685
|
+
----------
|
686
|
+
name : str
|
687
|
+
The input name of the sample dataset. This must be one of ['breast_cancer', 'california_housing', 'digits', 'iris', 'wine']
|
688
|
+
|
689
|
+
Returns
|
690
|
+
-------
|
691
|
+
dict
|
692
|
+
Returns a dictionary with the following keys:
|
693
|
+
'taskType'
|
694
|
+
'validationRatio'
|
695
|
+
'hiddenLayers'
|
696
|
+
'learningRate'
|
697
|
+
'epochs'
|
698
|
+
'activation'
|
699
|
+
'batchSize'
|
700
|
+
'patience'
|
701
|
+
'earlyStopping'
|
702
|
+
'randomState'
|
703
|
+
'holdout'
|
704
|
+
'kFolds'
|
705
|
+
|
706
|
+
"""
|
707
|
+
hyperparameters = {
|
708
|
+
'breast_cancer': {
|
709
|
+
'taskType': 'classification',
|
710
|
+
'validationRatio': 0.2,
|
711
|
+
'hiddenLayers': [30, 15],
|
712
|
+
'learningRate': 0.001,
|
713
|
+
'epochs': 100,
|
714
|
+
'activation': 'relu',
|
715
|
+
'batchSize': 32,
|
716
|
+
'patience': 10,
|
717
|
+
'earlyStopping': True,
|
718
|
+
'randomState': 42,
|
719
|
+
'holdout': True,
|
720
|
+
'kFolds': 3
|
721
|
+
},
|
722
|
+
'california_housing': {
|
723
|
+
'taskType': 'regression',
|
724
|
+
'validationRatio': 0.2,
|
725
|
+
'hiddenLayers': [50, 25],
|
726
|
+
'learningRate': 0.001,
|
727
|
+
'epochs': 150,
|
728
|
+
'activation': 'relu',
|
729
|
+
'batchSize': 32,
|
730
|
+
'patience': 10,
|
731
|
+
'earlyStopping': True,
|
732
|
+
'randomState': 42,
|
733
|
+
'holdout': True,
|
734
|
+
'kFolds': 3
|
735
|
+
},
|
736
|
+
'digits': {
|
737
|
+
'taskType': 'classification',
|
738
|
+
'validationRatio': 0.2,
|
739
|
+
'hiddenLayers': [64, 32],
|
740
|
+
'learningRate': 0.001,
|
741
|
+
'epochs': 50,
|
742
|
+
'activation': 'relu',
|
743
|
+
'batchSize': 32,
|
744
|
+
'patience': 10,
|
745
|
+
'earlyStopping': True,
|
746
|
+
'randomState': 42,
|
747
|
+
'holdout': True,
|
748
|
+
'kFolds': 3
|
749
|
+
},
|
750
|
+
'iris': {
|
751
|
+
'taskType': 'classification',
|
752
|
+
'validationRatio': 0.2,
|
753
|
+
'hiddenLayers': [10, 5],
|
754
|
+
'learningRate': 0.001,
|
755
|
+
'epochs': 100,
|
756
|
+
'activation': 'relu',
|
757
|
+
'batchSize': 16,
|
758
|
+
'patience': 10,
|
759
|
+
'earlyStopping': True,
|
760
|
+
'randomState': 42,
|
761
|
+
'holdout': True,
|
762
|
+
'kFolds': 3
|
763
|
+
},
|
764
|
+
'wine': {
|
765
|
+
'taskType': 'classification',
|
766
|
+
'validationRatio': 0.2,
|
767
|
+
'hiddenLayers': [50, 25],
|
768
|
+
'learningRate': 0.001,
|
769
|
+
'epochs': 100,
|
770
|
+
'activation': 'relu',
|
771
|
+
'batchSize': 16,
|
772
|
+
'patience': 10,
|
773
|
+
'earlyStopping': True,
|
774
|
+
'randomState': 42,
|
775
|
+
'holdout': True,
|
776
|
+
'kFolds': 3
|
777
|
+
}
|
778
|
+
}
|
779
|
+
|
780
|
+
if name in hyperparameters:
|
781
|
+
return hyperparameters[name]
|
782
|
+
else:
|
783
|
+
print(f"ANN-HyperparametersBySampleDatasetName - Error: Dataset name '{name}' not recognized. Available datasets: {list(hyperparameters.keys())}. Returning None.")
|
784
|
+
return None
|
785
|
+
|
786
|
+
@staticmethod
|
787
|
+
def ModelData(model):
|
788
|
+
"""
|
789
|
+
Returns the data of the model
|
790
|
+
|
791
|
+
Parameters
|
792
|
+
----------
|
793
|
+
model : Model
|
794
|
+
The input model.
|
795
|
+
|
796
|
+
Returns
|
797
|
+
-------
|
798
|
+
dict
|
799
|
+
A dictionary containing the model data. The keys in the dictionary are:
|
800
|
+
'epochs'
|
801
|
+
'trainingLoss'
|
802
|
+
'validationLoss'
|
803
|
+
'trainingAccuracy' (for classification tasks only)
|
804
|
+
'validationAccuracy' (for classification tasks only)
|
805
|
+
'trainingMAE' (for regression tasks only)
|
806
|
+
'validationMAE' (for regression tasks only)
|
807
|
+
|
808
|
+
"""
|
809
|
+
|
810
|
+
return {
|
811
|
+
' epochs': model.epochs,
|
812
|
+
'trainingLoss': model.training_loss_list,
|
813
|
+
'validationLoss': model.validation_loss_list,
|
814
|
+
'trainingAccuracy': model.training_accuracy_list,
|
815
|
+
'validationAccuracy': model.validation_accuracy_list,
|
816
|
+
'trainingMAE': model.training_mae_list,
|
817
|
+
'validationMAE': model.validation_mae_list
|
818
|
+
}
|
819
|
+
|
820
|
+
@staticmethod
|
821
|
+
def ModelInitialize(inputSize, outputSize, hyperparameters = None):
|
822
|
+
"""
|
823
|
+
Initializes an ANN model given the input parameter.
|
824
|
+
|
825
|
+
Parameters
|
826
|
+
----------
|
827
|
+
inputSize : int
|
828
|
+
The number of initial inputs. This is usually computed directly from the dataset.
|
829
|
+
outputSize : int
|
830
|
+
The number of categories for classification tasks. This is usually computed directly from the dataset.
|
831
|
+
hyperparameters : dict
|
832
|
+
The hyperparameters dictionary. You can create one using ANN.HyperparametersByInput or, if you are using a sample Dataset, you can get it from ANN.HyperParametersBySampleDatasetName.
|
833
|
+
|
834
|
+
Returns
|
835
|
+
-------
|
836
|
+
_ANNModel
|
837
|
+
Returns the trained model.
|
838
|
+
|
839
|
+
"""
|
840
|
+
|
841
|
+
task_type = hyperparameters['taskType']
|
842
|
+
validation_ratio = hyperparameters['validationRatio']
|
843
|
+
hidden_layers = hyperparameters['hiddenLayers']
|
844
|
+
learning_rate = hyperparameters['learningRate']
|
845
|
+
epochs = hyperparameters['epochs']
|
846
|
+
activation = hyperparameters['activation']
|
847
|
+
batch_size = hyperparameters['batchSize']
|
848
|
+
patience = hyperparameters['patience']
|
849
|
+
early_stopping = hyperparameters['earlyStopping']
|
850
|
+
random_state = hyperparameters['randomState']
|
851
|
+
holdout = hyperparameters['holdout']
|
852
|
+
k_folds = hyperparameters['kFolds']
|
853
|
+
|
854
|
+
task_type = task_type.lower()
|
855
|
+
if task_type not in ['classification', 'regression']:
|
856
|
+
print("ANN.ModelInitialize - Error: The input parameter taskType is not recognized. It must be either 'classification' or 'regression'. Returning None.")
|
857
|
+
return None
|
858
|
+
|
859
|
+
model = _ANNModel(inputSize=inputSize,
|
860
|
+
outputSize=outputSize,
|
861
|
+
taskType=task_type,
|
862
|
+
validationRatio=validation_ratio,
|
863
|
+
hiddenLayers=hidden_layers,
|
864
|
+
learningRate=learning_rate,
|
865
|
+
epochs=epochs,
|
866
|
+
activation=activation,
|
867
|
+
batchSize=batch_size,
|
868
|
+
patience=patience,
|
869
|
+
earlyStopping = early_stopping,
|
870
|
+
randomState = random_state,
|
871
|
+
holdout=holdout,
|
872
|
+
kFolds=k_folds
|
873
|
+
)
|
874
|
+
return model
|
875
|
+
|
876
|
+
@staticmethod
|
877
|
+
def ModelTrain(model, X, y):
|
878
|
+
"""
|
879
|
+
Trains the input model given the input features (X), and target (y).
|
880
|
+
|
881
|
+
Parameters
|
882
|
+
----------
|
883
|
+
model : ANN Model
|
884
|
+
The input model.
|
885
|
+
X : list
|
886
|
+
The input list of features.
|
887
|
+
y : list
|
888
|
+
The input list of targets
|
889
|
+
|
890
|
+
Returns
|
891
|
+
-------
|
892
|
+
_ANNModel
|
893
|
+
Returns the trained model.
|
894
|
+
|
895
|
+
"""
|
896
|
+
model.train(X, y)
|
897
|
+
return model
|
898
|
+
|
899
|
+
@staticmethod
|
900
|
+
def ModelEvaluate(model, X, y):
|
901
|
+
"""
|
902
|
+
Returns the labels (actual values) and predictions (predicted values) given the input model, features (X), and target (y).
|
903
|
+
|
904
|
+
Parameters
|
905
|
+
----------
|
906
|
+
model : ANN Model
|
907
|
+
The input model.
|
908
|
+
X : list
|
909
|
+
The input list of features.
|
910
|
+
y : list
|
911
|
+
The input list of targets
|
912
|
+
|
913
|
+
Returns
|
914
|
+
-------
|
915
|
+
list, list
|
916
|
+
Returns two lists: labels, and predictions.
|
917
|
+
|
918
|
+
"""
|
919
|
+
labels, predictions = model.evaluate(X, y)
|
920
|
+
return labels, predictions
|
921
|
+
|
922
|
+
@staticmethod
|
923
|
+
def ModelFigures(model, width=900, height=600, template="plotly", colorScale='viridis', colorSamples=10):
|
924
|
+
"""
|
925
|
+
Creates Plotly Figures from the model data. For classification tasks this includes
|
926
|
+
a confusion matrix, loss, and accuracy figures. For regression tasks this includes
|
927
|
+
loss and MAE figures.
|
928
|
+
|
929
|
+
Parameters
|
930
|
+
----------
|
931
|
+
model : ANN Model
|
932
|
+
The input model.
|
933
|
+
width : int , optional
|
934
|
+
The desired figure width in pixels. The default is 900.
|
935
|
+
height : int , optional
|
936
|
+
The desired figure height in pixels. The default is 900.
|
937
|
+
template : str , optional
|
938
|
+
The desired Plotly template to use for the scatter plot.
|
939
|
+
This can be one of ['ggplot2', 'seaborn', 'simple_white', 'plotly',
|
940
|
+
'plotly_white', 'plotly_dark', 'presentation', 'xgridoff',
|
941
|
+
'ygridoff', 'gridon', 'none']. The default is "plotly".
|
942
|
+
colorScale : str , optional
|
943
|
+
The desired type of plotly color scales to use (e.g. "viridis", "plasma"). The default is "viridis". For a full list of names, see https://plotly.com/python/builtin-colorscales/.
|
944
|
+
colorSamples : int , optional
|
945
|
+
The number of discrete color samples to use for displaying the data. The default is 10.
|
946
|
+
|
947
|
+
Returns
|
948
|
+
-------
|
949
|
+
list
|
950
|
+
Returns a list of Plotly figures and a corresponding list of file names.
|
951
|
+
|
952
|
+
"""
|
953
|
+
import plotly.graph_objects as go
|
954
|
+
from topologicpy.Plotly import Plotly
|
955
|
+
figures = []
|
956
|
+
filenames = []
|
957
|
+
if model.task_type.lower() == 'classification':
|
958
|
+
data_lists = [[model.training_loss_list, model.validation_loss_list], [model.training_accuracy_list, model.validation_accuracy_list]]
|
959
|
+
label_lists = [['Training Loss', 'Validation Loss'], ['Training Accuracy', 'Validation Accuracy']]
|
960
|
+
titles = ['Training and Validation Loss', 'Training and Validation Accuracy']
|
961
|
+
legend_titles = ['Loss Type', 'Accuracy Type']
|
962
|
+
xaxis_titles = ['Epoch', 'Epoch']
|
963
|
+
yaxis_titles = ['Loss', 'Accuracy']
|
964
|
+
filenames = yaxis_titles
|
965
|
+
if len(model.labels) > 0 and len(model.labels) == len(model.predictions):
|
966
|
+
confusion_matrix = ANN.ModelMetrics(model, labels = model.labels, predictions = model.predictions)['Confusion Matrix']
|
967
|
+
confusion_matrix_figure = Plotly.FigureByConfusionMatrix(confusion_matrix, width=width, height=height, colorScale=colorScale, colorSamples=colorSamples)
|
968
|
+
figures.append(confusion_matrix_figure)
|
969
|
+
filenames.append("ConfusionMatrix")
|
970
|
+
elif model.task_type.lower() == 'regression':
|
971
|
+
data_lists = [[model.training_loss_list, model.validation_loss_list], [model.training_mae_list, model.validation_mae_list]]
|
972
|
+
label_lists = [['Training Loss', 'Validation Loss'], ['Training MAE', 'Validation MAE']]
|
973
|
+
titles = ['Training and Validation Loss', 'Training and Validation MAE']
|
974
|
+
legend_titles = ['Loss Type', 'MAE Type']
|
975
|
+
xaxis_titles = ['Epoch', 'Epoch']
|
976
|
+
yaxis_titles = ['Loss', 'MAE']
|
977
|
+
filenames = yaxis_titles
|
978
|
+
else:
|
979
|
+
print("ANN.ModelFigures - Error: Could not recognize model task type. Returning None.")
|
980
|
+
return None
|
981
|
+
for i in range(2):
|
982
|
+
data = data_lists[i]
|
983
|
+
labels = label_lists[i]
|
984
|
+
title = titles[i]
|
985
|
+
legend_title = legend_titles[i]
|
986
|
+
xaxis_title = xaxis_titles[i]
|
987
|
+
yaxis_title = yaxis_titles[i]
|
988
|
+
lengths = [len(d) for d in data]
|
989
|
+
|
990
|
+
max_length = max(lengths)
|
991
|
+
x_ticks = list(range(1, max_length + 1))
|
992
|
+
|
993
|
+
figure = go.Figure()
|
994
|
+
for j in range(len(data)):
|
995
|
+
figure.add_trace(go.Scatter(x=x_ticks, y=data[j], mode='lines+markers', name=labels[j]))
|
996
|
+
|
997
|
+
|
998
|
+
figure.update_layout(
|
999
|
+
title=title,
|
1000
|
+
xaxis_title=xaxis_title,
|
1001
|
+
yaxis_title=yaxis_title,
|
1002
|
+
legend_title= legend_title,
|
1003
|
+
template=template,
|
1004
|
+
width=width,
|
1005
|
+
height=height
|
1006
|
+
)
|
1007
|
+
figures.append(figure)
|
1008
|
+
return figures, filenames
|
1009
|
+
|
1010
|
+
@staticmethod
|
1011
|
+
def ModelMetrics(model, labels, predictions):
|
1012
|
+
"""
|
1013
|
+
Returns the model performance metrics given the input labels and predictions, and the model's task type.
|
1014
|
+
|
1015
|
+
Parameters
|
1016
|
+
----------
|
1017
|
+
model : ANN Model
|
1018
|
+
The input model.
|
1019
|
+
labels : list
|
1020
|
+
The input list of labels (actual values).
|
1021
|
+
predictions : list
|
1022
|
+
The input list of predictions (predicted values).
|
1023
|
+
|
1024
|
+
Returns
|
1025
|
+
-------
|
1026
|
+
dict
|
1027
|
+
if the task type is 'classification', this methods return a dictionary with the following keys:
|
1028
|
+
"Accuracy"
|
1029
|
+
"Precision"
|
1030
|
+
"Recall"
|
1031
|
+
"F1 Score"
|
1032
|
+
"Confusion Matrix"
|
1033
|
+
else if the task type is 'regression', this method returns:
|
1034
|
+
"Mean Squared Error"
|
1035
|
+
"Mean Absolute Error"
|
1036
|
+
"R-squared"
|
1037
|
+
|
1038
|
+
"""
|
1039
|
+
metrics = model.metrics(labels, predictions)
|
1040
|
+
return metrics
|
1041
|
+
|
1042
|
+
@staticmethod
|
1043
|
+
def ModelSave(model, path, overwrite=False):
|
1044
|
+
"""
|
1045
|
+
Saves the model.
|
1046
|
+
|
1047
|
+
Parameters
|
1048
|
+
----------
|
1049
|
+
model : Model
|
1050
|
+
The input model.
|
1051
|
+
path : str
|
1052
|
+
The file path at which to save the model.
|
1053
|
+
overwrite : bool, optional
|
1054
|
+
If set to True, any existing file will be overwritten. Otherwise, it won't. The default is False.
|
1055
|
+
|
1056
|
+
Returns
|
1057
|
+
-------
|
1058
|
+
bool
|
1059
|
+
True if the model is saved correctly. False otherwise.
|
1060
|
+
|
1061
|
+
"""
|
1062
|
+
import os
|
1063
|
+
|
1064
|
+
if model == None:
|
1065
|
+
print("DGL.ModelSave - Error: The input model parameter is invalid. Returning None.")
|
1066
|
+
return None
|
1067
|
+
if path == None:
|
1068
|
+
print("DGL.ModelSave - Error: The input path parameter is invalid. Returning None.")
|
1069
|
+
return None
|
1070
|
+
if not overwrite and os.path.exists(path):
|
1071
|
+
print("DGL.ModelSave - Error: a file already exists at the specified path and overwrite is set to False. Returning None.")
|
1072
|
+
return None
|
1073
|
+
if overwrite and os.path.exists(path):
|
1074
|
+
os.remove(path)
|
1075
|
+
# Make sure the file extension is .pt
|
1076
|
+
ext = path[len(path)-3:len(path)]
|
1077
|
+
if ext.lower() != ".pt":
|
1078
|
+
path = path+".pt"
|
1079
|
+
# Save the trained model
|
1080
|
+
torch.save(model.state_dict(), path)
|
1081
|
+
return True
|
1082
|
+
|
1083
|
+
@staticmethod
|
1084
|
+
def ModelLoad(model, path):
|
1085
|
+
"""
|
1086
|
+
Loads the model state dictionary found at the input file path. The model input parameter must be pre-initialized using the ModelInitialize method.
|
1087
|
+
|
1088
|
+
Parameters
|
1089
|
+
----------
|
1090
|
+
model : ANN object
|
1091
|
+
The input ANN model. The model must be pre-initialized using the ModelInitialize method.
|
1092
|
+
path : str
|
1093
|
+
File path for the saved model state dictionary.
|
1094
|
+
|
1095
|
+
Returns
|
1096
|
+
-------
|
1097
|
+
ANN model
|
1098
|
+
The neural network class.
|
1099
|
+
|
1100
|
+
"""
|
1101
|
+
from os.path import exists
|
1102
|
+
|
1103
|
+
if not exists(path):
|
1104
|
+
print("ANN.ModelLoad - Error: The specified path does not exist. Returning None.")
|
1105
|
+
return None
|
1106
|
+
model.load(path)
|
1107
|
+
return model
|
1108
|
+
|
1109
|
+
@staticmethod
|
1110
|
+
def ConfusionMatrix(actual, predicted, normalize=False):
|
1111
|
+
"""
|
1112
|
+
Returns the confusion matrix for the input actual and predicted labels. This is to be used with classification tasks only not regression.
|
1113
|
+
|
1114
|
+
Parameters
|
1115
|
+
----------
|
1116
|
+
actual : list
|
1117
|
+
The input list of actual labels.
|
1118
|
+
predicted : list
|
1119
|
+
The input list of predicts labels.
|
1120
|
+
normalized : bool , optional
|
1121
|
+
If set to True, the returned data will be normalized (proportion of 1). Otherwise, actual numbers are returned. The default is False.
|
1122
|
+
|
1123
|
+
Returns
|
1124
|
+
-------
|
1125
|
+
list
|
1126
|
+
The created confusion matrix.
|
1127
|
+
|
1128
|
+
"""
|
1129
|
+
import os
|
1130
|
+
import warnings
|
1131
|
+
|
1132
|
+
try:
|
1133
|
+
from sklearn import metrics
|
1134
|
+
from sklearn.metrics import accuracy_score
|
1135
|
+
except:
|
1136
|
+
print("ANN.ConfusionMatrix - Installing required scikit-learn (sklearn) library.")
|
1137
|
+
try:
|
1138
|
+
os.system("pip install scikit-learn")
|
1139
|
+
except:
|
1140
|
+
os.system("pip install scikit-learn --user")
|
1141
|
+
try:
|
1142
|
+
from sklearn import metrics
|
1143
|
+
from sklearn.metrics import accuracy_score
|
1144
|
+
print("ANN.ConfusionMatrix - scikit-learn (sklearn) library installed correctly.")
|
1145
|
+
except:
|
1146
|
+
warnings.warn("ANN.ConfusionMatrix - Error: Could not import scikit-learn (sklearn). Please try to install scikit-learn manually. Returning None.")
|
1147
|
+
return None
|
1148
|
+
|
1149
|
+
if not isinstance(actual, list):
|
1150
|
+
print("ANN.ConfusionMatrix - ERROR: The actual input is not a list. Returning None")
|
1151
|
+
return None
|
1152
|
+
if not isinstance(predicted, list):
|
1153
|
+
print("ANN.ConfusionMatrix - ERROR: The predicted input is not a list. Returning None")
|
1154
|
+
return None
|
1155
|
+
if len(actual) != len(predicted):
|
1156
|
+
print("ANN.ConfusionMatrix - ERROR: The two input lists do not have the same length. Returning None")
|
1157
|
+
return None
|
1158
|
+
if normalize:
|
1159
|
+
cm = np.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted, normalize="true"))
|
1160
|
+
else:
|
1161
|
+
cm = np.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted))
|
1162
|
+
return cm
|
1163
|
+
|
1164
|
+
@staticmethod
|
1165
|
+
def Show(data,
|
1166
|
+
labels,
|
1167
|
+
title="Training/Validation",
|
1168
|
+
xTitle="Epochs",
|
1169
|
+
xSpacing=1,
|
1170
|
+
yTitle="Accuracy and Loss",
|
1171
|
+
ySpacing=0.1,
|
1172
|
+
useMarkers=False,
|
1173
|
+
chartType="Line",
|
1174
|
+
width=950,
|
1175
|
+
height=500,
|
1176
|
+
backgroundColor='rgba(0,0,0,0)',
|
1177
|
+
gridColor='lightgray',
|
1178
|
+
marginLeft=0,
|
1179
|
+
marginRight=0,
|
1180
|
+
marginTop=40,
|
1181
|
+
marginBottom=0,
|
1182
|
+
renderer = "notebook"):
|
1183
|
+
"""
|
1184
|
+
Shows the data in a plolty graph.
|
1185
|
+
|
1186
|
+
Parameters
|
1187
|
+
----------
|
1188
|
+
data : list
|
1189
|
+
The data to display.
|
1190
|
+
labels : list
|
1191
|
+
The labels to use for the data.
|
1192
|
+
width : int , optional
|
1193
|
+
The desired width of the figure. The default is 950.
|
1194
|
+
height : int , optional
|
1195
|
+
The desired height of the figure. The default is 500.
|
1196
|
+
title : str , optional
|
1197
|
+
The chart title. The default is "Training and Testing Results".
|
1198
|
+
xTitle : str , optional
|
1199
|
+
The X-axis title. The default is "Epochs".
|
1200
|
+
xSpacing : float , optional
|
1201
|
+
The X-axis spacing. The default is 1.0.
|
1202
|
+
yTitle : str , optional
|
1203
|
+
The Y-axis title. The default is "Accuracy and Loss".
|
1204
|
+
ySpacing : float , optional
|
1205
|
+
The Y-axis spacing. The default is 0.1.
|
1206
|
+
useMarkers : bool , optional
|
1207
|
+
If set to True, markers will be displayed. The default is False.
|
1208
|
+
chartType : str , optional
|
1209
|
+
The desired type of chart. The options are "Line", "Bar", or "Scatter". It is case insensitive. The default is "Line".
|
1210
|
+
backgroundColor : str , optional
|
1211
|
+
The desired background color. This can be any plotly color string and may be specified as:
|
1212
|
+
- A hex string (e.g. '#ff0000')
|
1213
|
+
- An rgb/rgba string (e.g. 'rgb(255,0,0)')
|
1214
|
+
- An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
|
1215
|
+
- An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
|
1216
|
+
- A named CSS color.
|
1217
|
+
The default is 'rgba(0,0,0,0)' (transparent).
|
1218
|
+
gridColor : str , optional
|
1219
|
+
The desired grid color. This can be any plotly color string and may be specified as:
|
1220
|
+
- A hex string (e.g. '#ff0000')
|
1221
|
+
- An rgb/rgba string (e.g. 'rgb(255,0,0)')
|
1222
|
+
- An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
|
1223
|
+
- An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
|
1224
|
+
- A named CSS color.
|
1225
|
+
The default is 'lightgray'.
|
1226
|
+
marginLeft : int , optional
|
1227
|
+
The desired left margin in pixels. The default is 0.
|
1228
|
+
marginRight : int , optional
|
1229
|
+
The desired right margin in pixels. The default is 0.
|
1230
|
+
marginTop : int , optional
|
1231
|
+
The desired top margin in pixels. The default is 40.
|
1232
|
+
marginBottom : int , optional
|
1233
|
+
The desired bottom margin in pixels. The default is 0.
|
1234
|
+
renderer : str , optional
|
1235
|
+
The desired plotly renderer. The default is "notebook".
|
1236
|
+
|
1237
|
+
Returns
|
1238
|
+
-------
|
1239
|
+
None.
|
1240
|
+
|
1241
|
+
"""
|
1242
|
+
from topologicpy.Plotly import Plotly
|
1243
|
+
|
1244
|
+
dataFrame = Plotly.DataByDGL(data, labels)
|
1245
|
+
fig = Plotly.FigureByDataFrame(dataFrame,
|
1246
|
+
labels=labels,
|
1247
|
+
title=title,
|
1248
|
+
xTitle=xTitle,
|
1249
|
+
xSpacing=xSpacing,
|
1250
|
+
yTitle=yTitle,
|
1251
|
+
ySpacing=ySpacing,
|
1252
|
+
useMarkers=useMarkers,
|
1253
|
+
chartType=chartType,
|
1254
|
+
width=width,
|
1255
|
+
height=height,
|
1256
|
+
backgroundColor=backgroundColor,
|
1257
|
+
gridColor=gridColor,
|
1258
|
+
marginRight=marginRight,
|
1259
|
+
marginLeft=marginLeft,
|
1260
|
+
marginTop=marginTop,
|
1261
|
+
marginBottom=marginBottom
|
1262
|
+
)
|
1263
|
+
Plotly.Show(fig, renderer=renderer)
|
1264
|
+
|
1265
|
+
|