topologicpy 0.7.19__py3-none-any.whl → 0.7.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- topologicpy/ANN.py +573 -705
- topologicpy/Cell.py +0 -12
- topologicpy/CellComplex.py +0 -2
- topologicpy/Face.py +21 -3
- topologicpy/Graph.py +5 -3
- topologicpy/Matrix.py +0 -14
- topologicpy/Shell.py +12 -1
- topologicpy/Wire.py +14 -5
- topologicpy/version.py +1 -1
- {topologicpy-0.7.19.dist-info → topologicpy-0.7.21.dist-info}/METADATA +1 -1
- {topologicpy-0.7.19.dist-info → topologicpy-0.7.21.dist-info}/RECORD +14 -14
- {topologicpy-0.7.19.dist-info → topologicpy-0.7.21.dist-info}/LICENSE +0 -0
- {topologicpy-0.7.19.dist-info → topologicpy-0.7.21.dist-info}/WHEEL +0 -0
- {topologicpy-0.7.19.dist-info → topologicpy-0.7.21.dist-info}/top_level.txt +0 -0
topologicpy/ANN.py
CHANGED
@@ -89,312 +89,334 @@ except:
|
|
89
89
|
except:
|
90
90
|
warnings.warn("ANN - Error: Could not import scikit. Please install it manually.")
|
91
91
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
92
|
+
import torch
|
93
|
+
import torch.nn as nn
|
94
|
+
import torch.optim as optim
|
95
|
+
from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris
|
96
|
+
from sklearn.model_selection import train_test_split, KFold
|
97
|
+
from sklearn.preprocessing import StandardScaler
|
98
|
+
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, r2_score, precision_score, recall_score, f1_score, confusion_matrix
|
99
|
+
import numpy as np
|
100
|
+
|
101
|
+
import torch
|
102
|
+
import torch.nn as nn
|
103
|
+
import torch.optim as optim
|
104
|
+
from sklearn.datasets import fetch_california_housing, load_breast_cancer, load_iris
|
105
|
+
from sklearn.model_selection import train_test_split, KFold
|
106
|
+
from sklearn.preprocessing import StandardScaler
|
107
|
+
from sklearn.metrics import accuracy_score, mean_squared_error, mean_absolute_error, r2_score, precision_score, recall_score, f1_score, confusion_matrix
|
108
|
+
import numpy as np
|
109
|
+
|
110
|
+
class _ANN(nn.Module):
|
111
|
+
def __init__(self, input_size, hyperparameters, dataset=None):
|
112
|
+
super(_ANN, self).__init__()
|
113
|
+
self.title = hyperparameters['title']
|
114
|
+
self.task_type = hyperparameters['task_type']
|
115
|
+
self.cross_val_type = hyperparameters['cross_val_type']
|
116
|
+
self.k_folds = hyperparameters.get('k_folds', 5)
|
117
|
+
self.test_size = hyperparameters.get('test_size', 0.3)
|
118
|
+
self.validation_ratio = hyperparameters.get('validation_ratio', 0.1)
|
119
|
+
self.random_state = hyperparameters.get('random_state', 42)
|
120
|
+
self.batch_size = hyperparameters.get('batch_size', 32)
|
121
|
+
self.learning_rate = hyperparameters.get('learning_rate', 0.001)
|
122
|
+
self.epochs = hyperparameters.get('epochs', 100)
|
123
|
+
self.early_stopping = hyperparameters.get('early_stopping', False)
|
124
|
+
self.patience = hyperparameters.get('patience', 10)
|
125
|
+
self.interval = hyperparameters.get('interval',1)
|
126
|
+
self.mantissa = hyperparameters.get('mantissa', 4)
|
111
127
|
|
112
|
-
|
113
|
-
self.
|
114
|
-
self.output_size = outputSize
|
115
|
-
self.activation = activation
|
116
|
-
self.learning_rate = learningRate
|
117
|
-
self.epochs = epochs
|
118
|
-
self.validation_ratio = validationRatio
|
119
|
-
self.holdout = holdout
|
120
|
-
self.k_folds = kFolds
|
121
|
-
self.batch_size = batchSize
|
122
|
-
self.patience = patience
|
123
|
-
self.early_stopping = earlyStopping
|
124
|
-
self.random_state = randomState
|
125
|
-
self.task_type = taskType
|
126
|
-
|
127
|
-
self.training_loss_list = []
|
128
|
-
self.validation_loss_list = []
|
129
|
-
self.training_accuracy_list = []
|
130
|
-
self.validation_accuracy_list = []
|
131
|
-
self.training_mae_list = []
|
132
|
-
self.validation_mae_list = []
|
133
|
-
self.labels = []
|
134
|
-
self.predictions = []
|
128
|
+
self.train_loss_list = []
|
129
|
+
self.val_loss_list = []
|
135
130
|
|
131
|
+
self.train_accuracy_list = []
|
132
|
+
self.val_accuracy_list = []
|
133
|
+
|
134
|
+
self.train_mse_list = []
|
135
|
+
self.val_mse_list = []
|
136
|
+
|
137
|
+
self.train_mae_list = []
|
138
|
+
self.val_mae_list = []
|
139
|
+
|
140
|
+
self.train_r2_list = []
|
141
|
+
self.val_r2_list = []
|
142
|
+
self.epoch_list = []
|
143
|
+
|
144
|
+
self.metrics = {}
|
136
145
|
|
137
|
-
# Define layers
|
138
146
|
layers = []
|
139
|
-
|
147
|
+
hidden_layers = hyperparameters['hidden_layers']
|
148
|
+
|
149
|
+
# Compute output_size based on task type and dataset
|
150
|
+
if self.task_type == 'regression':
|
151
|
+
output_size = 1
|
152
|
+
elif self.task_type == 'binary_classification':
|
153
|
+
output_size = 1
|
154
|
+
elif self.task_type == 'classification' and dataset is not None:
|
155
|
+
output_size = len(np.unique(dataset.target))
|
156
|
+
else:
|
157
|
+
print("ANN - Error: Invalid task type or dataset not provided for classification. Returning None.")
|
158
|
+
return None
|
140
159
|
|
141
160
|
# Create hidden layers
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
layers.append(nn.Tanh())
|
148
|
-
elif activation == 'sigmoid':
|
149
|
-
layers.append(nn.Sigmoid())
|
150
|
-
else:
|
151
|
-
raise ValueError(f"Unsupported activation function: {self.activation}")
|
152
|
-
previous_size = h
|
161
|
+
in_features = input_size
|
162
|
+
for hidden_units in hidden_layers:
|
163
|
+
layers.append(nn.Linear(in_features, hidden_units))
|
164
|
+
layers.append(nn.ReLU())
|
165
|
+
in_features = hidden_units
|
153
166
|
|
154
167
|
# Output layer
|
155
|
-
layers.append(nn.Linear(
|
156
|
-
|
157
|
-
if self.task_type == 'classification':
|
158
|
-
if self.output_size == 1:
|
159
|
-
layers.append(nn.Sigmoid()) # Use Sigmoid for binary classification
|
160
|
-
else:
|
161
|
-
layers.append(nn.LogSoftmax(dim=1)) # Use LogSoftmax for multi-category classification
|
162
|
-
elif self.task_type != 'regression':
|
163
|
-
raise ValueError(f"Unsupported task type: {self.task_type}")
|
164
|
-
|
168
|
+
layers.append(nn.Linear(in_features, output_size))
|
165
169
|
self.model = nn.Sequential(*layers)
|
166
170
|
|
167
|
-
#
|
168
|
-
self.
|
171
|
+
# Loss function based on task type
|
172
|
+
if self.task_type == 'regression':
|
173
|
+
self.loss_fn = nn.MSELoss()
|
174
|
+
elif self.task_type == 'binary_classification':
|
175
|
+
self.loss_fn = nn.BCEWithLogitsLoss()
|
176
|
+
else: # multi-category classification
|
177
|
+
self.loss_fn = nn.CrossEntropyLoss()
|
169
178
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
self.criterion = nn.NLLLoss() # Negative Log Likelihood Loss for multi-category classification
|
176
|
-
elif self.task_type == 'regression':
|
177
|
-
self.criterion = nn.MSELoss()
|
179
|
+
|
180
|
+
|
181
|
+
# Initialize best model variables
|
182
|
+
self.best_model_state = None
|
183
|
+
self.best_val_loss = np.inf
|
178
184
|
|
179
185
|
def forward(self, x):
|
180
186
|
return self.model(x)
|
181
|
-
|
182
|
-
def
|
183
|
-
self.
|
184
|
-
self.
|
185
|
-
self.training_accuracy_list = []
|
186
|
-
self.validation_accuracy_list = []
|
187
|
-
self.training_mae_list = []
|
188
|
-
self.validation_mae_list = []
|
189
|
-
if self.holdout == True or self.k_folds == 1:
|
190
|
-
self._train_holdout(X, y)
|
191
|
-
else:
|
192
|
-
self._train_kfold(X, y)
|
193
|
-
|
194
|
-
def _train_holdout(self, X, y):
|
195
|
-
# Split data into training and validation sets
|
196
|
-
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=self.validation_ratio, random_state=self.random_state)
|
187
|
+
|
188
|
+
def train_model(self, X_train, y_train, X_val=None, y_val=None):
|
189
|
+
self.train_loss_list = []
|
190
|
+
self.val_loss_list = []
|
197
191
|
|
198
|
-
|
199
|
-
|
192
|
+
self.train_accuracy_list = []
|
193
|
+
self.val_accuracy_list = []
|
200
194
|
|
201
|
-
|
202
|
-
|
195
|
+
self.train_mse_list = []
|
196
|
+
self.val_mse_list = []
|
203
197
|
|
204
|
-
self.
|
205
|
-
|
206
|
-
def _train_kfold(self, X, y):
|
207
|
-
kf = KFold(n_splits=self.k_folds, shuffle=True)
|
208
|
-
fold = 0
|
209
|
-
total_loss = 0.0
|
210
|
-
for train_idx, val_idx in kf.split(X):
|
211
|
-
fold += 1
|
212
|
-
print(f"Fold {fold}/{self.k_folds}")
|
213
|
-
|
214
|
-
X_train, X_val = X[train_idx], X[val_idx]
|
215
|
-
y_train, y_val = y[train_idx], y[val_idx]
|
216
|
-
|
217
|
-
train_dataset = TensorDataset(torch.tensor(X_train, dtype=torch.float32), torch.tensor(y_train, dtype=torch.float32))
|
218
|
-
val_dataset = TensorDataset(torch.tensor(X_val, dtype=torch.float32), torch.tensor(y_val, dtype=torch.float32))
|
219
|
-
|
220
|
-
train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=True)
|
221
|
-
val_loader = DataLoader(val_dataset, batch_size=self.batch_size, shuffle=False)
|
222
|
-
|
223
|
-
self._train_epochs(train_loader, val_loader)
|
224
|
-
|
225
|
-
def _train_epochs(self, train_loader, val_loader):
|
226
|
-
best_val_loss = float('inf')
|
227
|
-
epochs_no_improve = 0
|
228
|
-
best_model_state = None
|
198
|
+
self.train_mae_list = []
|
199
|
+
self.val_mae_list = []
|
229
200
|
|
201
|
+
self.train_r2_list = []
|
202
|
+
self.val_r2_list = []
|
203
|
+
self.epoch_list = []
|
204
|
+
optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
|
205
|
+
# Reinitialize optimizer for each fold
|
206
|
+
optimizer = optim.Adam(self.parameters(), lr=self.learning_rate)
|
207
|
+
current_patience = self.patience if self.early_stopping else self.epochs
|
208
|
+
|
209
|
+
# Convert to DataLoader for batching
|
210
|
+
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
|
211
|
+
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True)
|
212
|
+
|
230
213
|
for epoch in range(self.epochs):
|
231
|
-
self.
|
232
|
-
|
214
|
+
self.train()
|
215
|
+
epoch_loss = 0.0
|
233
216
|
correct_train = 0
|
234
217
|
total_train = 0
|
235
|
-
|
236
|
-
for inputs, labels in train_loader:
|
237
|
-
self.optimizer.zero_grad()
|
238
|
-
outputs = self(inputs)
|
239
|
-
|
240
|
-
# Ensure labels have the same shape as outputs
|
241
|
-
labels = labels.view(-1, 1) if outputs.shape[-1] == 1 else labels
|
242
218
|
|
243
|
-
|
244
|
-
|
245
|
-
self
|
246
|
-
running_loss += loss.item()
|
219
|
+
for inputs, targets in train_loader:
|
220
|
+
optimizer.zero_grad()
|
221
|
+
outputs = self(inputs)
|
247
222
|
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
_, predicted = torch.max(outputs, 1)
|
252
|
-
else:
|
253
|
-
predicted = (outputs > 0.5).float()
|
254
|
-
total_train += labels.size(0)
|
255
|
-
correct_train += (predicted == labels).sum().item()
|
223
|
+
if self.task_type == 'binary_classification':
|
224
|
+
outputs = outputs.squeeze()
|
225
|
+
targets = targets.float()
|
256
226
|
elif self.task_type == 'regression':
|
257
|
-
|
258
|
-
total_train += labels.size(0)
|
259
|
-
|
260
|
-
train_loss = running_loss / len(train_loader)
|
261
|
-
if self.task_type == 'classification':
|
262
|
-
train_accuracy = 100 * correct_train / total_train
|
263
|
-
elif self.task_type == 'regression':
|
264
|
-
train_accuracy = correct_train / total_train
|
265
|
-
|
266
|
-
# Calculate validation loss and accuracy/MAE
|
267
|
-
val_loss, val_accuracy = self.evaluate_loss(val_loader)
|
268
|
-
self.training_loss_list.append(train_loss)
|
269
|
-
self.validation_loss_list.append(val_loss)
|
270
|
-
if self.task_type == 'classification':
|
271
|
-
# print(f"Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training Accuracy: {train_accuracy:.2f}%, "
|
272
|
-
# f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%")
|
273
|
-
self.training_accuracy_list.append(train_accuracy)
|
274
|
-
self.validation_accuracy_list.append(val_accuracy)
|
275
|
-
elif self.task_type == 'regression':
|
276
|
-
# print(f"Epoch {epoch+1}, Training Loss: {train_loss:.4f}, Training MAE: {train_accuracy:.4f}, "
|
277
|
-
# f"Validation Loss: {val_loss:.4f}, Validation MAE: {val_accuracy:.4f}")
|
278
|
-
self.training_mae_list.append(train_accuracy)
|
279
|
-
self.validation_mae_list.append(val_accuracy)
|
280
|
-
|
281
|
-
# Early stopping
|
282
|
-
if self.early_stopping:
|
283
|
-
if val_loss < best_val_loss:
|
284
|
-
best_val_loss = val_loss
|
285
|
-
epochs_no_improve = 0
|
286
|
-
best_model_state = self.state_dict()
|
287
|
-
else:
|
288
|
-
epochs_no_improve += 1
|
289
|
-
if epochs_no_improve >= self.patience:
|
290
|
-
# print(f'Early stopping! Best validation loss: {best_val_loss}')
|
291
|
-
break
|
292
|
-
# Update the epochs parameter to reflect the actual epochs ran.
|
293
|
-
self.epochs = epoch + 1
|
227
|
+
outputs = outputs.squeeze()
|
294
228
|
|
295
|
-
|
296
|
-
|
297
|
-
self.load_state_dict(best_model_state)
|
298
|
-
|
299
|
-
def evaluate_loss(self, data_loader):
|
300
|
-
self.model.eval()
|
301
|
-
total_loss = 0.0
|
302
|
-
correct_val = 0
|
303
|
-
total_val = 0
|
304
|
-
|
305
|
-
with torch.no_grad():
|
306
|
-
for inputs, labels in data_loader:
|
307
|
-
outputs = self(inputs)
|
308
|
-
labels = labels.view(-1, 1) if outputs.shape[-1] == 1 else labels
|
229
|
+
loss = self.loss_fn(outputs, targets)
|
230
|
+
epoch_loss += loss.item()
|
309
231
|
|
310
|
-
loss
|
311
|
-
|
232
|
+
loss.backward()
|
233
|
+
optimizer.step()
|
312
234
|
|
313
|
-
# Calculate
|
235
|
+
# Calculate metrics for training set
|
314
236
|
if self.task_type == 'classification':
|
315
|
-
|
316
|
-
|
237
|
+
_, predicted = torch.max(outputs, 1)
|
238
|
+
correct_train += (predicted == targets).sum().item()
|
239
|
+
total_train += targets.size(0)
|
240
|
+
elif self.task_type == 'binary_classification':
|
241
|
+
predicted = torch.round(torch.sigmoid(outputs))
|
242
|
+
correct_train += (predicted == targets).sum().item()
|
243
|
+
total_train += targets.size(0)
|
244
|
+
|
245
|
+
if X_val is not None and y_val is not None:
|
246
|
+
self.eval()
|
247
|
+
with torch.no_grad():
|
248
|
+
val_outputs = self(X_val)
|
249
|
+
if self.task_type == 'binary_classification':
|
250
|
+
val_outputs = val_outputs.squeeze()
|
251
|
+
y_val = y_val.float()
|
252
|
+
elif self.task_type == 'regression':
|
253
|
+
val_outputs = val_outputs.squeeze()
|
254
|
+
|
255
|
+
val_loss = self.loss_fn(val_outputs, y_val)
|
256
|
+
val_loss_item = val_loss.item()
|
257
|
+
|
258
|
+
# Track the best model state
|
259
|
+
if val_loss < self.best_val_loss:
|
260
|
+
self.best_val_loss = val_loss
|
261
|
+
self.best_model_state = self.state_dict()
|
262
|
+
current_patience = self.patience if self.early_stopping else self.epochs
|
317
263
|
else:
|
318
|
-
|
319
|
-
|
320
|
-
correct_val += (predicted == labels).sum().item()
|
321
|
-
elif self.task_type == 'regression':
|
322
|
-
correct_val += torch.abs(outputs - labels).sum().item()
|
323
|
-
total_val += labels.size(0)
|
324
|
-
|
325
|
-
avg_loss = total_loss / len(data_loader)
|
326
|
-
if self.task_type == 'classification':
|
327
|
-
accuracy = 100 * correct_val / total_val
|
328
|
-
elif self.task_type == 'regression':
|
329
|
-
accuracy = correct_val / total_val
|
330
|
-
|
331
|
-
return avg_loss, accuracy
|
264
|
+
if self.early_stopping:
|
265
|
+
current_patience -= 1
|
332
266
|
|
267
|
+
if self.early_stopping and current_patience == 0:
|
268
|
+
print(f'ANN - Information: Early stopping after epoch {epoch + 1}')
|
269
|
+
break
|
333
270
|
|
334
|
-
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
if self.model[-1].__class__.__name__ == 'Sigmoid':
|
347
|
-
# Convert probabilities to binary predictions (0 or 1)
|
348
|
-
preds = [1 if x >= 0.5 else 0 for x in outputs.cpu().numpy()]
|
349
|
-
else:
|
350
|
-
# Get predicted class indices
|
351
|
-
_, preds = torch.max(outputs.data, 1)
|
352
|
-
preds = preds.cpu().numpy()
|
271
|
+
if (epoch + 1) % self.interval == 0:
|
272
|
+
self.epoch_list.append(epoch + 1)
|
273
|
+
avg_epoch_loss = epoch_loss / len(train_loader)
|
274
|
+
self.train_loss_list.append(round(avg_epoch_loss, self.mantissa))
|
275
|
+
|
276
|
+
if self.task_type == 'classification' or self.task_type == 'binary_classification':
|
277
|
+
train_accuracy = round(correct_train / total_train, self.mantissa)
|
278
|
+
self.train_accuracy_list.append(train_accuracy)
|
279
|
+
if X_val is not None and y_val is not None:
|
280
|
+
val_accuracy = (torch.round(torch.sigmoid(val_outputs)) if self.task_type == 'binary_classification' else torch.max(val_outputs, 1)[1] == y_val).float().mean().item()
|
281
|
+
val_accuracy = round(val_accuracy, self.mantissa)
|
282
|
+
self.val_accuracy_list.append(val_accuracy)
|
353
283
|
elif self.task_type == 'regression':
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
284
|
+
train_preds = self(X_train).detach().numpy().squeeze()
|
285
|
+
train_mse = round(mean_squared_error(y_train.numpy(), train_preds), self.mantissa)
|
286
|
+
train_mae = round(mean_absolute_error(y_train.numpy(), train_preds), self.mantissa)
|
287
|
+
train_r2 = round(r2_score(y_train.numpy(), train_preds), self.mantissa)
|
288
|
+
self.train_mse_list.append(train_mse)
|
289
|
+
self.train_mae_list.append(train_mae)
|
290
|
+
self.train_r2_list.append(train_r2)
|
291
|
+
if X_val is not None and y_val is not None:
|
292
|
+
val_preds = val_outputs.numpy().squeeze()
|
293
|
+
val_mse = round(mean_squared_error(y_val.numpy(), val_preds), self.mantissa)
|
294
|
+
val_mae = round(mean_absolute_error(y_val.numpy(), val_preds), self.mantissa)
|
295
|
+
val_r2 = round(r2_score(y_val.numpy(), val_preds), self.mantissa)
|
296
|
+
self.val_mse_list.append(val_mse)
|
297
|
+
self.val_mae_list.append(val_mae)
|
298
|
+
self.val_r2_list.append(val_r2)
|
299
|
+
|
300
|
+
if X_val is not None and y_val is not None:
|
301
|
+
self.val_loss_list.append(round(val_loss_item, self.mantissa))
|
302
|
+
print(f'Epoch [{epoch + 1}/{self.epochs}], Loss: {avg_epoch_loss:.4f}, Val Loss: {val_loss_item:.4f}')
|
303
|
+
else:
|
304
|
+
print(f'Epoch [{epoch + 1}/{self.epochs}], Loss: {avg_epoch_loss:.4f}')
|
361
305
|
|
362
|
-
def
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
376
|
-
|
377
|
-
|
306
|
+
def evaluate_model(self, X_test, y_test):
|
307
|
+
self.eval()
|
308
|
+
with torch.no_grad():
|
309
|
+
outputs = self(X_test)
|
310
|
+
|
311
|
+
if self.task_type == 'regression':
|
312
|
+
outputs = outputs.squeeze()
|
313
|
+
predictions = outputs.numpy()
|
314
|
+
mse = mean_squared_error(y_test.numpy(), outputs.numpy())
|
315
|
+
mae = mean_absolute_error(y_test.numpy(), outputs.numpy())
|
316
|
+
r2 = r2_score(y_test.numpy(), outputs.numpy())
|
317
|
+
#print(f'MSE: {mse:.4f}, MAE: {mae:.4f}, R^2: {r2:.4f}')
|
318
|
+
metrics = {'mae': round(mae, self.mantissa), 'mse': round(mse, self.mantissa), 'r2': round(r2, self.mantissa)}
|
319
|
+
elif self.task_type == 'binary_classification':
|
320
|
+
outputs = torch.sigmoid(outputs).squeeze()
|
321
|
+
predicted = (outputs > 0.5).int()
|
322
|
+
predictions = predicted.numpy()
|
323
|
+
accuracy = accuracy_score(y_test.numpy(), predicted.numpy())
|
324
|
+
precision = precision_score(y_test.numpy(), predicted.numpy(), zero_division=0)
|
325
|
+
recall = recall_score(y_test.numpy(), predicted.numpy(), zero_division=0)
|
326
|
+
f1 = f1_score(y_test.numpy(), predicted.numpy(), zero_division=0)
|
327
|
+
#print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
|
328
|
+
cm = self.confusion_matrix(y_test, predictions)
|
329
|
+
metrics = {'accuracy': round(accuracy, self.mantissa), 'precision': round(precision, self.mantissa), 'recall': round(recall, self.mantissa), 'f1': round(f1, self.mantissa), 'confusion_matrix': cm}
|
330
|
+
else: # multi-category classification
|
331
|
+
_, predicted = torch.max(outputs, 1)
|
332
|
+
predictions = predicted.numpy()
|
333
|
+
accuracy = accuracy_score(y_test.numpy(), predictions)
|
334
|
+
precision = precision_score(y_test.numpy(), predictions, average='macro', zero_division=0)
|
335
|
+
recall = recall_score(y_test.numpy(), predictions, average='macro', zero_division=0)
|
336
|
+
f1 = f1_score(y_test.numpy(), predictions, average='macro', zero_division=0)
|
337
|
+
cm = self.confusion_matrix(y_test, predicted.numpy())
|
338
|
+
#print(f'Accuracy: {accuracy:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1 Score: {f1:.4f}')
|
339
|
+
metrics = {'accuracy': round(accuracy, self.mantissa), 'precision': round(precision, self.mantissa), 'recall': round(recall, self.mantissa), 'f1': round(f1, self.mantissa), 'confusion_matrix': cm}
|
340
|
+
self.metrics = metrics
|
341
|
+
|
342
|
+
return metrics, predictions
|
343
|
+
|
344
|
+
def confusion_matrix(self, y_test, predictions):
|
345
|
+
if self.task_type != 'regression':
|
346
|
+
cm = confusion_matrix(y_test.numpy(), predictions)
|
347
|
+
return cm.tolist()
|
378
348
|
else:
|
379
|
-
|
380
|
-
|
349
|
+
print("ANN - Error: Confusion matrix is not applicable for regression tasks. Returning None")
|
350
|
+
return None
|
351
|
+
|
352
|
+
def reset_parameters(self):
|
353
|
+
for layer in self.model:
|
354
|
+
if hasattr(layer, 'reset_parameters'):
|
355
|
+
layer.reset_parameters()
|
356
|
+
|
357
|
+
def cross_validate(self, X, y):
|
358
|
+
if 'hold' in self.cross_val_type:
|
359
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state)
|
360
|
+
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=self.validation_ratio, random_state=self.random_state)
|
361
|
+
self.train_model(X_train, y_train, X_val=X_val, y_val=y_val)
|
362
|
+
metrics, predictions = self.evaluate_model(X_test, y_test)
|
363
|
+
if self.task_type != 'regression':
|
364
|
+
self.confusion_matrix(y_test, predictions)
|
365
|
+
return metrics
|
366
|
+
|
367
|
+
elif 'fold' in self.cross_val_type:
|
368
|
+
kf = KFold(n_splits=self.k_folds, shuffle=True, random_state=self.random_state)
|
369
|
+
best_fold_index = -1
|
370
|
+
best_val_loss = np.inf
|
371
|
+
best_model_state = None
|
372
|
+
|
373
|
+
for fold_idx, (train_index, test_index) in enumerate(kf.split(X)):
|
374
|
+
# Reinitialize model parameters
|
375
|
+
self.reset_parameters()
|
376
|
+
print("Fold:", fold_idx+1)
|
377
|
+
X_train, X_test = X[train_index], X[test_index]
|
378
|
+
y_train, y_test = y[train_index], y[test_index]
|
379
|
+
|
380
|
+
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=self.validation_ratio, random_state=self.random_state)
|
381
|
+
|
382
|
+
self.train_model(X_train, y_train, X_val=X_val, y_val=y_val)
|
383
|
+
|
384
|
+
print(f'Self Best Val Loss: {self.best_val_loss.item():.4f}')
|
385
|
+
if self.best_val_loss < best_val_loss:
|
386
|
+
best_val_loss = self.best_val_loss
|
387
|
+
best_fold_index = fold_idx
|
388
|
+
best_model_state = self.best_model_state
|
389
|
+
|
390
|
+
if best_fold_index == -1:
|
391
|
+
raise ValueError("No best fold found. Check early stopping and validation handling.")
|
392
|
+
|
393
|
+
print(f'Selecting best fold: {best_fold_index + 1}')
|
394
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=self.test_size, random_state=self.random_state)
|
395
|
+
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=self.validation_ratio, random_state=self.random_state)
|
396
|
+
self.load_state_dict(best_model_state)
|
397
|
+
#print("Training on Best fold.")
|
398
|
+
#self.train_model(X_train, y_train, X_val=X_val, y_val=y_val)
|
399
|
+
|
400
|
+
metrics, predictions = self.evaluate_model(X_val, y_val)
|
401
|
+
if self.task_type != 'regression':
|
402
|
+
self.confusion_matrix(y_val, predictions)
|
403
|
+
|
404
|
+
return metrics
|
381
405
|
|
382
406
|
def save(self, path):
|
383
407
|
if path:
|
384
|
-
|
385
|
-
ext = path[len(path)-3:len(path)]
|
408
|
+
ext = path[-3:]
|
386
409
|
if ext.lower() != ".pt":
|
387
|
-
path = path+".pt"
|
410
|
+
path = path + ".pt"
|
388
411
|
torch.save(self.state_dict(), path)
|
389
412
|
|
390
413
|
def load(self, path):
|
391
414
|
if path:
|
392
415
|
self.load_state_dict(torch.load(path))
|
393
416
|
|
394
|
-
|
395
417
|
class ANN():
|
396
418
|
@staticmethod
|
397
|
-
def DatasetByCSVPath(path, taskType='classification',
|
419
|
+
def DatasetByCSVPath(path, taskType='classification', description=""):
|
398
420
|
"""
|
399
421
|
Returns a dataset according to the input CSV file path.
|
400
422
|
|
@@ -404,65 +426,44 @@ class ANN():
|
|
404
426
|
The path to the folder containing the necessary CSV and YML files.
|
405
427
|
taskType : str , optional
|
406
428
|
The type of evaluation task. This can be 'classification' or 'regression'. The default is 'classification'.
|
407
|
-
|
408
|
-
The
|
409
|
-
|
410
|
-
randomState : int , optional
|
411
|
-
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
412
|
-
it controls the shuffling of the data before splitting it into training and testing sets.
|
413
|
-
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
414
|
-
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
429
|
+
description : str , optional
|
430
|
+
The description of the dataset. In keeping with the scikit BUNCH class, this will be saved in the DESCR parameter.
|
431
|
+
|
415
432
|
Returns
|
416
433
|
-------
|
417
|
-
|
418
|
-
|
419
|
-
X_train, X_test, y_train, y_test, taskType
|
420
|
-
X_train is the list of features used for training
|
421
|
-
X_test is the list of features used for testing
|
422
|
-
y_train is the list of targets used for training
|
423
|
-
y_test is the list of targets used for testing
|
424
|
-
taskType is the type of task ('classification' or 'regression'). This is included for compatibility with DatasetBySample()
|
434
|
+
sklearn.utils._bunch.Bunch
|
435
|
+
The created dataset.
|
425
436
|
|
426
437
|
"""
|
427
438
|
import pandas as pd
|
428
439
|
import numpy as np
|
429
440
|
from sklearn.preprocessing import StandardScaler
|
430
441
|
from sklearn.model_selection import train_test_split
|
442
|
+
from sklearn.utils import Bunch
|
443
|
+
|
431
444
|
# Load the CSV file into a pandas DataFrame
|
432
445
|
df = pd.read_csv(path)
|
433
446
|
|
434
447
|
# Assume the last column is the target
|
435
448
|
features = df.iloc[:, :-1].values
|
436
449
|
target = df.iloc[:, -1].values
|
437
|
-
|
438
|
-
scaler = StandardScaler()
|
439
|
-
X = scaler.fit_transform(features)
|
440
|
-
y = target
|
441
|
-
|
442
|
-
# Ensure target is in the correct format
|
443
|
-
if taskType == 'classification' and len(np.unique(y)) == 2:
|
444
|
-
y = y.reshape(-1, 1) # Reshape for binary classification
|
445
|
-
elif taskType == 'classification':
|
446
|
-
y = y.astype(np.int64) # Convert to long for multi-class classification
|
447
|
-
|
448
|
-
y = y.astype(np.float32) # Convert to float32 for PyTorch
|
449
450
|
|
450
|
-
|
451
|
-
|
452
|
-
output_size = 1 if taskType == 'regression' or num_classes == 2 else num_classes
|
451
|
+
# Set target_names based on the name of the target column
|
452
|
+
target_names = [df.columns[-1]]
|
453
453
|
|
454
|
-
#
|
455
|
-
|
454
|
+
# Create a Bunch object
|
455
|
+
dataset = Bunch(
|
456
|
+
data=features,
|
457
|
+
target=target,
|
458
|
+
feature_names=df.columns[:-1].tolist(),
|
459
|
+
target_names=target_names,
|
460
|
+
frame=df,
|
461
|
+
DESCR=description,
|
462
|
+
)
|
463
|
+
return dataset
|
456
464
|
|
457
|
-
return {'XTrain': X_train,
|
458
|
-
'XTest': X_test,
|
459
|
-
'yTrain': y_train,
|
460
|
-
'yTest': y_test,
|
461
|
-
'inputSize': input_size,
|
462
|
-
'outputSize': output_size}
|
463
|
-
|
464
465
|
@staticmethod
|
465
|
-
def DatasetBySampleName(name
|
466
|
+
def DatasetBySampleName(name):
|
466
467
|
"""
|
467
468
|
Returns a dataset from the scikit-learn dataset samples.
|
468
469
|
|
@@ -481,18 +482,17 @@ class ANN():
|
|
481
482
|
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
482
483
|
Returns
|
483
484
|
-------
|
484
|
-
|
485
|
-
Returns the following
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
485
|
+
dict
|
486
|
+
Returns the following dictionary:
|
487
|
+
XTrain, XTest, yTrain, yTest, inputSize, outputSize
|
488
|
+
XTrain is the list of features used for training
|
489
|
+
XTest is the list of features used for testing
|
490
|
+
yTrain is the list of targets used for training
|
491
|
+
yTest is the list of targets used for testing
|
492
|
+
inputSize is the size (length) of the input
|
493
|
+
outputSize is the size (length) of the output
|
492
494
|
"""
|
493
|
-
|
494
|
-
from sklearn.model_selection import train_test_split
|
495
|
-
|
495
|
+
# Load dataset
|
496
496
|
if name == 'breast_cancer':
|
497
497
|
dataset = load_breast_cancer()
|
498
498
|
elif name == 'california_housing':
|
@@ -506,35 +506,10 @@ class ANN():
|
|
506
506
|
else:
|
507
507
|
print(f"ANN.DatasetBySampleName - Error: Unsupported dataset: {name}. Returning None.")
|
508
508
|
return None
|
509
|
-
|
510
|
-
# Standardize the features
|
511
|
-
scaler = StandardScaler()
|
512
|
-
X = scaler.fit_transform(dataset.data)
|
513
|
-
y = dataset.target
|
514
|
-
|
515
|
-
task_type = ANN.HyperparametersBySampleDatasetName(name)['taskType']
|
516
|
-
# For binary classification, ensure the target is in the correct format (1D tensor)
|
517
|
-
if task_type == 'classification' and len(np.unique(y)) == 2:
|
518
|
-
y = y.astype(np.float32)
|
519
|
-
elif task_type == 'classification':
|
520
|
-
y = y.astype(np.int64)
|
521
|
-
|
522
|
-
input_size = X.shape[1] # Number of features
|
523
|
-
num_classes = len(np.unique(y))
|
524
|
-
output_size = 1 if task_type == 'regression' or num_classes == 2 else num_classes
|
525
|
-
|
526
|
-
# First split: train and temp (remaining)
|
527
|
-
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=(1.0 - trainRatio), random_state=randomState)
|
509
|
+
return dataset
|
528
510
|
|
529
|
-
return {'XTrain': X_train,
|
530
|
-
'XTest': X_test,
|
531
|
-
'yTrain': y_train,
|
532
|
-
'yTest': y_test,
|
533
|
-
'inputSize': input_size,
|
534
|
-
'outputSize': output_size}
|
535
|
-
|
536
511
|
@staticmethod
|
537
|
-
def
|
512
|
+
def DatasetSampleNames():
|
538
513
|
"""
|
539
514
|
Returns the names of the available sample datasets from sci-kit learn.
|
540
515
|
|
@@ -549,7 +524,7 @@ class ANN():
|
|
549
524
|
return ['breast_cancer', 'california_housing', 'digits', 'iris', 'wine']
|
550
525
|
|
551
526
|
@staticmethod
|
552
|
-
def DatasetSplit(X, y,
|
527
|
+
def DatasetSplit(X, y, testRatio=0.3, randomState=42):
|
553
528
|
"""
|
554
529
|
Splits the input dataset according to the input ratios.
|
555
530
|
|
@@ -559,9 +534,8 @@ class ANN():
|
|
559
534
|
The list of features.
|
560
535
|
y : list
|
561
536
|
The list of targets.
|
562
|
-
|
563
|
-
The ratio of the
|
564
|
-
This means that 60% of the data will be used for training and validation while 40% of the data will be reserved for testing.
|
537
|
+
testRatio : float , optional
|
538
|
+
The ratio of the dataset to reserve as unseen data for testing. The default is 0.3
|
565
539
|
randomState : int , optional
|
566
540
|
The randomState parameter is used to ensure reproducibility of the results. When you set the randomState parameter to a specific integer value,
|
567
541
|
it controls the shuffling of the data before splitting it into training and testing sets.
|
@@ -571,40 +545,48 @@ class ANN():
|
|
571
545
|
Returns
|
572
546
|
-------
|
573
547
|
list
|
574
|
-
Returns the following list:
|
575
|
-
X_train, X_test, y_train,y_test
|
548
|
+
Returns the following list : [X_train, X_test, y_train,y_test]
|
576
549
|
X_train is the list of features used for training
|
577
550
|
X_test is the list of features used for testing
|
578
551
|
y_train is the list of targets used for training
|
579
552
|
y_test is the list of targets used for testing
|
580
553
|
|
581
554
|
"""
|
555
|
+
if testRatio < 0 or testRatio > 1:
|
556
|
+
print("ANN.DatasetSplit - Error: testRatio parameter cannot be outside the range [0,1]. Returning None.")
|
557
|
+
return None
|
582
558
|
# First split: train and temp (remaining)
|
583
|
-
|
584
|
-
|
585
|
-
return X_train, X_test, y_train, y_test
|
559
|
+
return train_test_split(X, y, test_size=testRatio, random_state=randomState)
|
586
560
|
|
587
561
|
@staticmethod
|
588
|
-
def
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
562
|
+
def Hyperparameters(title='Untitled',
|
563
|
+
taskType='classification',
|
564
|
+
testRatio = 0.3,
|
565
|
+
validationRatio = 0.2,
|
566
|
+
hiddenLayers = [12,12,12],
|
567
|
+
learningRate = 0.001,
|
568
|
+
epochs = 10,
|
569
|
+
batchSize = 1,
|
570
|
+
patience = 5,
|
571
|
+
earlyStopping = True,
|
572
|
+
randomState = 42,
|
573
|
+
crossValidationType = "holdout",
|
574
|
+
kFolds = 3,
|
575
|
+
interval = 1,
|
576
|
+
mantissa = 6):
|
600
577
|
"""
|
578
|
+
title : str , optional
|
579
|
+
The desired title for the dataset. The default is "Untitled".
|
601
580
|
taskType : str , optional
|
602
581
|
The desired task type. This can be either 'classification' or 'regression' (case insensitive).
|
603
582
|
Classification is a type of supervised learning where the model is trained to predict categorical labels (classes) from input data.
|
604
583
|
Regression is a type of supervised learning where the model is trained to predict continuous numerical values from input data.
|
584
|
+
testRatio : float , optional
|
585
|
+
The split ratio between training and testing. The default is 0.3. This means that
|
586
|
+
70% of the data will be used for training/validation and 30% will be reserved for testing as unseen data.
|
605
587
|
validationRatio : float , optional
|
606
588
|
The split ratio between training and validation. The default is 0.2. This means that
|
607
|
-
80% of the data will be used for training and 20% will be used for validation.
|
589
|
+
80% of the validation data (left over after reserving test data) will be used for training and 20% will be used for validation.
|
608
590
|
hiddenLayers : list , optional
|
609
591
|
The number of hidden layers and the number of nodes in each layer.
|
610
592
|
If you wish to have 3hidden layers with 8 nodes in the first
|
@@ -614,18 +596,6 @@ class ANN():
|
|
614
596
|
The desired learning rate. The default is 0.001. See https://en.wikipedia.org/wiki/Learning_rate
|
615
597
|
epochs : int , optional
|
616
598
|
The desired number of epochs. The default is 10. See https://en.wikipedia.org/wiki/Neural_network_(machine_learning)
|
617
|
-
activation : str , optional
|
618
|
-
The type of activation layer. See https://en.wikipedia.org/wiki/Activation_function
|
619
|
-
Some common alternatives include:
|
620
|
-
'relu' : ReLU (Rectified Linear Unit) is an activation function that outputs the input directly if it is positive; otherwise, it outputs zero.
|
621
|
-
'sigmoid' : The sigmoid activation function, which maps inputs to a range between 0 and 1.
|
622
|
-
'tanh' : The hyperbolic tangent activation function, which maps inputs to a range between -1 and 1.
|
623
|
-
'leaky_relu': A variant of the ReLU that allows a small, non-zero gradient when the unit is not active.
|
624
|
-
'elu' : Exponential Linear Unit, which improves learning characteristics by having a smooth curve.
|
625
|
-
'swish' : An activation function defined as x . sigmoid(x)
|
626
|
-
'softmax' : Often used in the output layer of a classification network, it normalizes the outputs to a probability distribution.
|
627
|
-
'linear' : A linear activation function, which is often used in the output layer of regression networks.
|
628
|
-
The default is 'relu'.
|
629
599
|
batchSize : int , optional
|
630
600
|
The desired number of samples that will be propagated through the network at one time before the model's internal parameters are updated. Instead of updating the model parameters after every single training sample
|
631
601
|
(stochastic gradient descent) or after the entire training dataset (batch gradient descent), mini-batch gradient descent updates the model parameters after
|
@@ -639,44 +609,52 @@ class ANN():
|
|
639
609
|
it controls the shuffling of the data before splitting it into training and testing sets.
|
640
610
|
This means that every time you run your code with the same randomState value and the same dataset, you will get the same split of the data.
|
641
611
|
The default is 42 which is just a randomly picked integer number. Specify None for random sampling.
|
642
|
-
|
643
|
-
|
612
|
+
crossValidationType : str , optional
|
613
|
+
The desired type of cross-validation. This can be one of 'holdout' or 'k-fold'. The default is 'holdout'
|
644
614
|
kFolds : int , optional
|
645
615
|
The number of splits (folds) to use if K-Fold cross validation is selected. The default is 5.
|
616
|
+
interval : int , optional
|
617
|
+
The desired epoch interval at which to report and save metrics data. This must be less than the total number of epochs. The default is 1.
|
618
|
+
mantissa : int , optional
|
619
|
+
The desired length of the mantissa. The default is 6.
|
646
620
|
|
647
621
|
Returns
|
648
622
|
-------
|
649
623
|
dict
|
650
624
|
Returns a dictionary with the following keys:
|
651
|
-
'
|
652
|
-
'
|
653
|
-
'
|
654
|
-
'
|
625
|
+
'task_type'
|
626
|
+
'test_ratio'
|
627
|
+
'validation_ratio'
|
628
|
+
'hidden_layers'
|
629
|
+
'learning_rate'
|
655
630
|
'epochs'
|
656
|
-
'
|
657
|
-
'
|
631
|
+
'batch_size'
|
632
|
+
'early_stopping'
|
658
633
|
'patience'
|
659
|
-
'
|
660
|
-
'
|
661
|
-
'holdout'
|
634
|
+
'random_tate'
|
635
|
+
'cross_val_type'
|
662
636
|
'kFolds'
|
637
|
+
'interval'
|
638
|
+
'mantissa'
|
663
639
|
"""
|
664
640
|
return {
|
665
|
-
'
|
666
|
-
'
|
667
|
-
'
|
668
|
-
'
|
641
|
+
'task_type': taskType,
|
642
|
+
'test_ratio': testRatio,
|
643
|
+
'validation_ratio': validationRatio,
|
644
|
+
'hidden_layers': hiddenLayers,
|
645
|
+
'learning_rate': learningRate,
|
669
646
|
'epochs': epochs,
|
670
|
-
'
|
671
|
-
'
|
647
|
+
'batch_size': batchSize,
|
648
|
+
'early_stopping': earlyStopping,
|
672
649
|
'patience': patience,
|
673
|
-
'
|
674
|
-
'
|
675
|
-
'
|
676
|
-
'
|
650
|
+
'random_state': randomState,
|
651
|
+
'cross_val_type': crossValidationType,
|
652
|
+
'k_folds': kFolds,
|
653
|
+
'interval': interval,
|
654
|
+
'mantissa': mantissa}
|
677
655
|
|
678
656
|
@staticmethod
|
679
|
-
def
|
657
|
+
def HyperparametersBySampleName(name):
|
680
658
|
"""
|
681
659
|
Returns the suggested initial hyperparameters to use for the dataset named in the name input parameter.
|
682
660
|
You can get a list of available sample datasets using ANN.SampleDatasets().
|
@@ -690,90 +668,108 @@ class ANN():
|
|
690
668
|
-------
|
691
669
|
dict
|
692
670
|
Returns a dictionary with the following keys:
|
693
|
-
'
|
694
|
-
'
|
695
|
-
'
|
696
|
-
'
|
671
|
+
'title'
|
672
|
+
'task_type'
|
673
|
+
'test_ratio'
|
674
|
+
'validation_ratio'
|
675
|
+
'hidden_layers'
|
676
|
+
'learning_rate'
|
697
677
|
'epochs'
|
698
|
-
'
|
699
|
-
'
|
678
|
+
'batch_size'
|
679
|
+
'early_stopping'
|
700
680
|
'patience'
|
701
|
-
'
|
702
|
-
'
|
703
|
-
'
|
704
|
-
'
|
681
|
+
'random_state'
|
682
|
+
'cross_val_type'
|
683
|
+
'k_folds'
|
684
|
+
'interval'
|
685
|
+
'mantissa'
|
705
686
|
|
706
687
|
"""
|
707
688
|
hyperparameters = {
|
708
689
|
'breast_cancer': {
|
709
|
-
'
|
710
|
-
'
|
711
|
-
'
|
712
|
-
'
|
690
|
+
'title': 'Breast Cancer',
|
691
|
+
'task_type': 'classification',
|
692
|
+
'test_ratio': 0.3,
|
693
|
+
'validation_ratio': 0.2,
|
694
|
+
'hidden_layers': [30, 15],
|
695
|
+
'learning_rate': 0.001,
|
713
696
|
'epochs': 100,
|
714
|
-
'
|
715
|
-
'
|
697
|
+
'batch_size': 32,
|
698
|
+
'early_stopping': True,
|
716
699
|
'patience': 10,
|
717
|
-
'
|
718
|
-
'
|
719
|
-
'
|
720
|
-
'
|
700
|
+
'random_state': 42,
|
701
|
+
'cross_val_type': "holdout",
|
702
|
+
'k_folds': 3,
|
703
|
+
'interval': 10,
|
704
|
+
'mantissa': 6
|
721
705
|
},
|
722
706
|
'california_housing': {
|
723
|
-
'
|
724
|
-
'
|
725
|
-
'
|
726
|
-
'
|
727
|
-
'
|
728
|
-
'
|
729
|
-
'
|
707
|
+
'title': 'California Housing',
|
708
|
+
'task_type': 'regression',
|
709
|
+
'test_ratio': 0.3,
|
710
|
+
'validation_atio': 0.2,
|
711
|
+
'hidden_layers': [50, 25],
|
712
|
+
'learning_rate': 0.001,
|
713
|
+
'epochs': 50,
|
714
|
+
'batch_size': 16,
|
715
|
+
'early_stopping': False,
|
730
716
|
'patience': 10,
|
731
|
-
'
|
732
|
-
'
|
733
|
-
'
|
734
|
-
'
|
717
|
+
'random_state': 42,
|
718
|
+
'cross_val_type': "k-fold",
|
719
|
+
'k_folds': 3,
|
720
|
+
'interval': 5,
|
721
|
+
'mantissa': 6
|
735
722
|
},
|
736
723
|
'digits': {
|
737
|
-
'
|
738
|
-
'
|
739
|
-
'
|
740
|
-
'
|
724
|
+
'title': 'Digits',
|
725
|
+
'task_type': 'classification',
|
726
|
+
'test_ratio': 0.3,
|
727
|
+
'validation_ratio': 0.2,
|
728
|
+
'hidden_layers': [64, 32],
|
729
|
+
'learning_rate': 0.001,
|
741
730
|
'epochs': 50,
|
742
|
-
'
|
743
|
-
'
|
731
|
+
'batch_size': 32,
|
732
|
+
'early_stopping': True,
|
744
733
|
'patience': 10,
|
745
|
-
'
|
746
|
-
'
|
747
|
-
'
|
748
|
-
'
|
734
|
+
'random_state': 42,
|
735
|
+
'cross_val_type': "holdout",
|
736
|
+
'kFolds': 3,
|
737
|
+
'interval': 5,
|
738
|
+
'mantissa': 6
|
749
739
|
},
|
750
740
|
'iris': {
|
751
|
-
'
|
752
|
-
'
|
753
|
-
'
|
754
|
-
'
|
741
|
+
'title': 'Iris',
|
742
|
+
'task_type': 'classification',
|
743
|
+
'test_ratio': 0.3,
|
744
|
+
'validation_ratio': 0.2,
|
745
|
+
'hidden_layers': [10, 5],
|
746
|
+
'learning_rate': 0.001,
|
755
747
|
'epochs': 100,
|
756
|
-
'
|
757
|
-
'
|
748
|
+
'batch_size': 16,
|
749
|
+
'early_stopping': False,
|
758
750
|
'patience': 10,
|
759
|
-
'
|
760
|
-
'
|
761
|
-
'
|
762
|
-
'
|
751
|
+
'random_state': 42,
|
752
|
+
'cross_val_type': "holdout",
|
753
|
+
'k_folds': 3,
|
754
|
+
'interval': 2,
|
755
|
+
'mantissa': 6
|
763
756
|
},
|
764
757
|
'wine': {
|
765
|
-
'
|
766
|
-
'
|
767
|
-
'
|
768
|
-
'
|
758
|
+
'title': 'Wine',
|
759
|
+
'task_type': 'classification',
|
760
|
+
'test_ratio': 0.3,
|
761
|
+
'validation_ratio': 0.2,
|
762
|
+
'hidden_layers': [50, 25],
|
763
|
+
'learning_rate': 0.001,
|
769
764
|
'epochs': 100,
|
770
|
-
'
|
771
|
-
'
|
765
|
+
'batch_size': 16,
|
766
|
+
'early_stopping': False,
|
772
767
|
'patience': 10,
|
773
|
-
'
|
774
|
-
'
|
775
|
-
'
|
776
|
-
'
|
768
|
+
'random_state': 42,
|
769
|
+
'cross_val_type': "holdout",
|
770
|
+
'k_folds': 3,
|
771
|
+
'interval': 2,
|
772
|
+
'mantissa': 6
|
777
773
|
}
|
778
774
|
}
|
779
775
|
|
@@ -797,95 +793,82 @@ class ANN():
|
|
797
793
|
-------
|
798
794
|
dict
|
799
795
|
A dictionary containing the model data. The keys in the dictionary are:
|
800
|
-
'epochs'
|
801
|
-
'
|
802
|
-
'
|
803
|
-
'
|
804
|
-
'
|
805
|
-
'
|
806
|
-
'
|
796
|
+
'epochs' (list of epoch numbers at which metrics data was collected)
|
797
|
+
'training_loss' (LOSS)
|
798
|
+
'validation_loss' (VALIDATION LOSS)
|
799
|
+
'training_accuracy' (ACCURACY for classification tasks only)
|
800
|
+
'validation_accuracy' (ACCURACYfor classification tasks only)
|
801
|
+
'training_mae' (MAE for regression tasks only)
|
802
|
+
'validation_mae' (MAE for regression tasks only)
|
803
|
+
'training_mse' (MSE for regression tasks only)
|
804
|
+
'validation_mse' (MSE for regression tasks only)
|
805
|
+
'training_r2' (R^2 for regression tasks only)
|
806
|
+
'validation_r2' (R^2 for regression tasks only)
|
807
|
+
|
807
808
|
|
808
809
|
"""
|
809
810
|
|
810
811
|
return {
|
811
|
-
'
|
812
|
-
'
|
813
|
-
'
|
814
|
-
'
|
815
|
-
'
|
816
|
-
'
|
817
|
-
'
|
812
|
+
'epochs': model.epoch_list,
|
813
|
+
'training_loss': model.training_loss_list,
|
814
|
+
'validation_loss': model.validation_loss_list,
|
815
|
+
'training_accuracy': model.training_accuracy_list,
|
816
|
+
'validation_accuracy': model.validation_accuracy_list,
|
817
|
+
'training_mae': model.training_mae_list,
|
818
|
+
'validation_mae': model.validation_mae_list,
|
819
|
+
'training_mse': model.training_mse_list,
|
820
|
+
'validation_mse': model.validation_mse_list,
|
821
|
+
'training_r2': model.training_r2_list,
|
822
|
+
'validation_r2': model.validation_r2_list
|
818
823
|
}
|
819
824
|
|
820
825
|
@staticmethod
|
821
|
-
def
|
826
|
+
def Initialize(hyperparameters, dataset):
|
822
827
|
"""
|
823
|
-
Initializes an ANN model
|
828
|
+
Initializes an ANN model with the input dataset and hyperparameters.
|
824
829
|
|
825
830
|
Parameters
|
826
831
|
----------
|
827
|
-
inputSize : int
|
828
|
-
The number of initial inputs. This is usually computed directly from the dataset.
|
829
|
-
outputSize : int
|
830
|
-
The number of categories for classification tasks. This is usually computed directly from the dataset.
|
831
832
|
hyperparameters : dict
|
832
|
-
The hyperparameters dictionary. You can create one using ANN.
|
833
|
-
|
833
|
+
The hyperparameters dictionary. You can create one using ANN.Hyperparameters() or, if you are using a sample Dataset, you can get it from ANN.HyperParametersBySampleName.
|
834
|
+
dataset : sklearn.utils._bunch.Bunch
|
835
|
+
The input dataset.
|
836
|
+
|
834
837
|
Returns
|
835
838
|
-------
|
836
839
|
_ANNModel
|
837
840
|
Returns the trained model.
|
838
841
|
|
839
842
|
"""
|
843
|
+
def prepare_data(dataset, task_type='classification'):
|
844
|
+
X, y = dataset.data, dataset.target
|
845
|
+
|
846
|
+
# Standardize features
|
847
|
+
scaler = StandardScaler()
|
848
|
+
X = scaler.fit_transform(X)
|
849
|
+
X = torch.tensor(X, dtype=torch.float32)
|
850
|
+
y = torch.tensor(y, dtype=torch.long if task_type != 'regression' else torch.float32)
|
851
|
+
return X, y
|
840
852
|
|
841
|
-
task_type = hyperparameters['
|
842
|
-
validation_ratio = hyperparameters['validationRatio']
|
843
|
-
hidden_layers = hyperparameters['hiddenLayers']
|
844
|
-
learning_rate = hyperparameters['learningRate']
|
845
|
-
epochs = hyperparameters['epochs']
|
846
|
-
activation = hyperparameters['activation']
|
847
|
-
batch_size = hyperparameters['batchSize']
|
848
|
-
patience = hyperparameters['patience']
|
849
|
-
early_stopping = hyperparameters['earlyStopping']
|
850
|
-
random_state = hyperparameters['randomState']
|
851
|
-
holdout = hyperparameters['holdout']
|
852
|
-
k_folds = hyperparameters['kFolds']
|
853
|
-
|
854
|
-
task_type = task_type.lower()
|
853
|
+
task_type = hyperparameters['task_type']
|
855
854
|
if task_type not in ['classification', 'regression']:
|
856
|
-
print("ANN.ModelInitialize - Error: The input parameter
|
855
|
+
print("ANN.ModelInitialize - Error: The task type in the input hyperparameters parameter is not recognized. It must be either 'classification' or 'regression'. Returning None.")
|
857
856
|
return None
|
858
|
-
|
859
|
-
model =
|
860
|
-
outputSize=outputSize,
|
861
|
-
taskType=task_type,
|
862
|
-
validationRatio=validation_ratio,
|
863
|
-
hiddenLayers=hidden_layers,
|
864
|
-
learningRate=learning_rate,
|
865
|
-
epochs=epochs,
|
866
|
-
activation=activation,
|
867
|
-
batchSize=batch_size,
|
868
|
-
patience=patience,
|
869
|
-
earlyStopping = early_stopping,
|
870
|
-
randomState = random_state,
|
871
|
-
holdout=holdout,
|
872
|
-
kFolds=k_folds
|
873
|
-
)
|
857
|
+
X, y = prepare_data(dataset, task_type=task_type)
|
858
|
+
model = _ANN(input_size=X.shape[1], hyperparameters=hyperparameters, dataset=dataset)
|
874
859
|
return model
|
875
860
|
|
876
861
|
@staticmethod
|
877
|
-
def
|
862
|
+
def Train(hyperparameters, dataset):
|
878
863
|
"""
|
879
864
|
Trains the input model given the input features (X), and target (y).
|
880
865
|
|
881
866
|
Parameters
|
882
867
|
----------
|
883
|
-
|
884
|
-
The
|
885
|
-
|
886
|
-
The input
|
887
|
-
y : list
|
888
|
-
The input list of targets
|
868
|
+
hyperparameters : dict
|
869
|
+
The hyperparameters dictionary. You can create one using ANN.Hyperparameters() or, if you are using a sample Dataset, you can get it from ANN.HyperParametersBySampleName.
|
870
|
+
dataset : sklearn.utils._bunch.Bunch
|
871
|
+
The input dataset.
|
889
872
|
|
890
873
|
Returns
|
891
874
|
-------
|
@@ -893,11 +876,23 @@ class ANN():
|
|
893
876
|
Returns the trained model.
|
894
877
|
|
895
878
|
"""
|
896
|
-
|
879
|
+
def prepare_data(dataset, task_type='classification'):
|
880
|
+
X, y = dataset.data, dataset.target
|
881
|
+
|
882
|
+
# Standardize features
|
883
|
+
scaler = StandardScaler()
|
884
|
+
X = scaler.fit_transform(X)
|
885
|
+
X = torch.tensor(X, dtype=torch.float32)
|
886
|
+
y = torch.tensor(y, dtype=torch.long if task_type != 'regression' else torch.float32)
|
887
|
+
return X, y
|
888
|
+
|
889
|
+
X, y = prepare_data(dataset, task_type=hyperparameters['task_type'])
|
890
|
+
model = _ANN(input_size=X.shape[1], hyperparameters=hyperparameters, dataset=dataset)
|
891
|
+
model.cross_validate(X, y)
|
897
892
|
return model
|
898
893
|
|
899
894
|
@staticmethod
|
900
|
-
def
|
895
|
+
def Test(model, hyperparameters, dataset):
|
901
896
|
"""
|
902
897
|
Returns the labels (actual values) and predictions (predicted values) given the input model, features (X), and target (y).
|
903
898
|
|
@@ -913,14 +908,29 @@ class ANN():
|
|
913
908
|
Returns
|
914
909
|
-------
|
915
910
|
list, list
|
916
|
-
Returns two lists:
|
911
|
+
Returns two lists: metrics, and predictions.
|
917
912
|
|
918
913
|
"""
|
919
|
-
|
920
|
-
|
914
|
+
def prepare_data(dataset, task_type='classification'):
|
915
|
+
X, y = dataset.data, dataset.target
|
916
|
+
|
917
|
+
# Standardize features
|
918
|
+
scaler = StandardScaler()
|
919
|
+
X = scaler.fit_transform(X)
|
920
|
+
X = torch.tensor(X, dtype=torch.float32)
|
921
|
+
y = torch.tensor(y, dtype=torch.long if task_type != 'regression' else torch.float32)
|
922
|
+
return X, y
|
923
|
+
|
924
|
+
X, y = prepare_data(dataset, task_type=hyperparameters['task_type'])
|
925
|
+
X_train, X_test, y_train,y_test = ANN.DatasetSplit(X, y, testRatio=hyperparameters['test_ratio'], randomState=hyperparameters['random_state'])
|
926
|
+
metrics, predictions = model.evaluate_model(X_test, y_test)
|
927
|
+
confusion_matrix = None
|
928
|
+
if hyperparameters['task_type'] != 'regression':
|
929
|
+
confusion_matrix = model.confusion_matrix(y_test, predictions)
|
930
|
+
return y_test, predictions, metrics, confusion_matrix
|
921
931
|
|
922
932
|
@staticmethod
|
923
|
-
def
|
933
|
+
def Figures(model, width=900, height=600, template="plotly", colorScale='viridis', colorSamples=10):
|
924
934
|
"""
|
925
935
|
Creates Plotly Figures from the model data. For classification tasks this includes
|
926
936
|
a confusion matrix, loss, and accuracy figures. For regression tasks this includes
|
@@ -952,50 +962,62 @@ class ANN():
|
|
952
962
|
"""
|
953
963
|
import plotly.graph_objects as go
|
954
964
|
from topologicpy.Plotly import Plotly
|
965
|
+
import numpy as np
|
955
966
|
figures = []
|
956
967
|
filenames = []
|
957
|
-
if model.task_type
|
958
|
-
|
968
|
+
if model.task_type == 'classification':
|
969
|
+
confusion_matrix = model.metrics['confusion_matrix']
|
970
|
+
confusion_matrix_figure = Plotly.FigureByConfusionMatrix(confusion_matrix, width=width, height=height, colorScale=colorScale, colorSamples=colorSamples)
|
971
|
+
confusion_matrix_figure.update_layout(title=model.title+"<BR>Confusion Matrix")
|
972
|
+
figures.append(confusion_matrix_figure)
|
973
|
+
filenames.append("ConfusionMatrix")
|
974
|
+
data_lists = [[model.train_loss_list, model.val_loss_list], [model.train_accuracy_list, model.val_accuracy_list]]
|
959
975
|
label_lists = [['Training Loss', 'Validation Loss'], ['Training Accuracy', 'Validation Accuracy']]
|
960
976
|
titles = ['Training and Validation Loss', 'Training and Validation Accuracy']
|
977
|
+
titles = [model.title+"<BR>"+t for t in titles]
|
961
978
|
legend_titles = ['Loss Type', 'Accuracy Type']
|
962
979
|
xaxis_titles = ['Epoch', 'Epoch']
|
963
980
|
yaxis_titles = ['Loss', 'Accuracy']
|
964
981
|
filenames = yaxis_titles
|
965
|
-
|
966
|
-
confusion_matrix = ANN.ModelMetrics(model, labels = model.labels, predictions = model.predictions)['Confusion Matrix']
|
967
|
-
confusion_matrix_figure = Plotly.FigureByConfusionMatrix(confusion_matrix, width=width, height=height, colorScale=colorScale, colorSamples=colorSamples)
|
968
|
-
figures.append(confusion_matrix_figure)
|
969
|
-
filenames.append("ConfusionMatrix")
|
982
|
+
|
970
983
|
elif model.task_type.lower() == 'regression':
|
971
|
-
data_lists = [[model.
|
972
|
-
label_lists = [['Training Loss', 'Validation Loss'], ['Training MAE', 'Validation MAE']]
|
973
|
-
titles = ['Training and Validation Loss', 'Training and Validation MAE']
|
974
|
-
|
975
|
-
|
976
|
-
|
984
|
+
data_lists = [[model.train_loss_list, model.val_loss_list], [model.train_mae_list, model.val_mae_list], [model.train_mse_list, model.val_mse_list], [model.train_r2_list, model.val_r2_list]]
|
985
|
+
label_lists = [['Training Loss', 'Validation Loss'], ['Training MAE', 'Validation MAE'], ['Training MSE', 'Validation MSE'],['Training R^2', 'Validation R^2']]
|
986
|
+
titles = ['Training and Validation Loss', 'Training and Validation MAE', 'Training and Validation MSE', 'Training and Validation R^2']
|
987
|
+
titles = [model.title+"<BR>"+t for t in titles]
|
988
|
+
legend_titles = ['Loss Type', 'MAE Type', 'MSE Type', 'R^2 Type']
|
989
|
+
xaxis_titles = ['Epoch', 'Epoch', 'Epoch', 'Epoch']
|
990
|
+
yaxis_titles = ['Loss', 'MAE', 'MSE', 'R^2']
|
977
991
|
filenames = yaxis_titles
|
978
992
|
else:
|
979
993
|
print("ANN.ModelFigures - Error: Could not recognize model task type. Returning None.")
|
980
994
|
return None
|
981
|
-
for i in range(
|
995
|
+
for i in range(len(data_lists)):
|
982
996
|
data = data_lists[i]
|
983
997
|
labels = label_lists[i]
|
984
998
|
title = titles[i]
|
985
999
|
legend_title = legend_titles[i]
|
986
1000
|
xaxis_title = xaxis_titles[i]
|
987
1001
|
yaxis_title = yaxis_titles[i]
|
988
|
-
|
989
|
-
|
990
|
-
max_length = max(lengths)
|
991
|
-
x_ticks = list(range(1, max_length + 1))
|
1002
|
+
x = model.epoch_list
|
1003
|
+
|
992
1004
|
|
993
1005
|
figure = go.Figure()
|
1006
|
+
min_x = np.inf
|
1007
|
+
max_x = -np.inf
|
1008
|
+
min_y = np.inf
|
1009
|
+
max_y = -np.inf
|
994
1010
|
for j in range(len(data)):
|
995
|
-
|
996
|
-
|
1011
|
+
y = data[j]
|
1012
|
+
figure.add_trace(go.Scatter(x=x, y=y, mode='lines+markers', name=labels[j]))
|
1013
|
+
min_x = min(min_x, min(x))
|
1014
|
+
max_x = max(max_x, max(x))
|
1015
|
+
min_y = min(min_y, min(y))
|
1016
|
+
max_y = max(max_y, max(y))
|
997
1017
|
|
998
1018
|
figure.update_layout(
|
1019
|
+
xaxis=dict(range=[0, max_x+max_x*0.01]),
|
1020
|
+
yaxis=dict(range=[min_y-min_y*0.01, max_y+max_y*0.01]),
|
999
1021
|
title=title,
|
1000
1022
|
xaxis_title=xaxis_title,
|
1001
1023
|
yaxis_title=yaxis_title,
|
@@ -1008,7 +1030,7 @@ class ANN():
|
|
1008
1030
|
return figures, filenames
|
1009
1031
|
|
1010
1032
|
@staticmethod
|
1011
|
-
def
|
1033
|
+
def Metrics(model):
|
1012
1034
|
"""
|
1013
1035
|
Returns the model performance metrics given the input labels and predictions, and the model's task type.
|
1014
1036
|
|
@@ -1036,11 +1058,11 @@ class ANN():
|
|
1036
1058
|
"R-squared"
|
1037
1059
|
|
1038
1060
|
"""
|
1039
|
-
metrics = model.metrics
|
1061
|
+
metrics = model.metrics
|
1040
1062
|
return metrics
|
1041
1063
|
|
1042
1064
|
@staticmethod
|
1043
|
-
def
|
1065
|
+
def Save(model, path, overwrite=False):
|
1044
1066
|
"""
|
1045
1067
|
Saves the model.
|
1046
1068
|
|
@@ -1062,13 +1084,13 @@ class ANN():
|
|
1062
1084
|
import os
|
1063
1085
|
|
1064
1086
|
if model == None:
|
1065
|
-
print("
|
1087
|
+
print("ANN.Save - Error: The input model parameter is invalid. Returning None.")
|
1066
1088
|
return None
|
1067
1089
|
if path == None:
|
1068
|
-
print("
|
1090
|
+
print("ANN.Save - Error: The input path parameter is invalid. Returning None.")
|
1069
1091
|
return None
|
1070
1092
|
if not overwrite and os.path.exists(path):
|
1071
|
-
print("
|
1093
|
+
print("ANN.Save - Error: a file already exists at the specified path and overwrite is set to False. Returning None.")
|
1072
1094
|
return None
|
1073
1095
|
if overwrite and os.path.exists(path):
|
1074
1096
|
os.remove(path)
|
@@ -1081,9 +1103,9 @@ class ANN():
|
|
1081
1103
|
return True
|
1082
1104
|
|
1083
1105
|
@staticmethod
|
1084
|
-
def
|
1106
|
+
def Load(model, path):
|
1085
1107
|
"""
|
1086
|
-
Loads the model state dictionary found at the input file path. The model input parameter must be pre-initialized using the
|
1108
|
+
Loads the model state dictionary found at the input file path. The model input parameter must be pre-initialized using the ANN.Initialize() method.
|
1087
1109
|
|
1088
1110
|
Parameters
|
1089
1111
|
----------
|
@@ -1101,165 +1123,11 @@ class ANN():
|
|
1101
1123
|
from os.path import exists
|
1102
1124
|
|
1103
1125
|
if not exists(path):
|
1104
|
-
print("ANN.
|
1126
|
+
print("ANN.Load - Error: The specified path does not exist. Returning None.")
|
1105
1127
|
return None
|
1106
1128
|
model.load(path)
|
1107
1129
|
return model
|
1108
1130
|
|
1109
|
-
@staticmethod
|
1110
|
-
def ConfusionMatrix(actual, predicted, normalize=False):
|
1111
|
-
"""
|
1112
|
-
Returns the confusion matrix for the input actual and predicted labels. This is to be used with classification tasks only not regression.
|
1113
|
-
|
1114
|
-
Parameters
|
1115
|
-
----------
|
1116
|
-
actual : list
|
1117
|
-
The input list of actual labels.
|
1118
|
-
predicted : list
|
1119
|
-
The input list of predicts labels.
|
1120
|
-
normalized : bool , optional
|
1121
|
-
If set to True, the returned data will be normalized (proportion of 1). Otherwise, actual numbers are returned. The default is False.
|
1122
|
-
|
1123
|
-
Returns
|
1124
|
-
-------
|
1125
|
-
list
|
1126
|
-
The created confusion matrix.
|
1127
|
-
|
1128
|
-
"""
|
1129
|
-
import os
|
1130
|
-
import warnings
|
1131
|
-
|
1132
|
-
try:
|
1133
|
-
from sklearn import metrics
|
1134
|
-
from sklearn.metrics import accuracy_score
|
1135
|
-
except:
|
1136
|
-
print("ANN.ConfusionMatrix - Installing required scikit-learn (sklearn) library.")
|
1137
|
-
try:
|
1138
|
-
os.system("pip install scikit-learn")
|
1139
|
-
except:
|
1140
|
-
os.system("pip install scikit-learn --user")
|
1141
|
-
try:
|
1142
|
-
from sklearn import metrics
|
1143
|
-
from sklearn.metrics import accuracy_score
|
1144
|
-
print("ANN.ConfusionMatrix - scikit-learn (sklearn) library installed correctly.")
|
1145
|
-
except:
|
1146
|
-
warnings.warn("ANN.ConfusionMatrix - Error: Could not import scikit-learn (sklearn). Please try to install scikit-learn manually. Returning None.")
|
1147
|
-
return None
|
1148
|
-
|
1149
|
-
if not isinstance(actual, list):
|
1150
|
-
print("ANN.ConfusionMatrix - ERROR: The actual input is not a list. Returning None")
|
1151
|
-
return None
|
1152
|
-
if not isinstance(predicted, list):
|
1153
|
-
print("ANN.ConfusionMatrix - ERROR: The predicted input is not a list. Returning None")
|
1154
|
-
return None
|
1155
|
-
if len(actual) != len(predicted):
|
1156
|
-
print("ANN.ConfusionMatrix - ERROR: The two input lists do not have the same length. Returning None")
|
1157
|
-
return None
|
1158
|
-
if normalize:
|
1159
|
-
cm = np.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted, normalize="true"))
|
1160
|
-
else:
|
1161
|
-
cm = np.transpose(metrics.confusion_matrix(y_true=actual, y_pred=predicted))
|
1162
|
-
return cm
|
1163
|
-
|
1164
|
-
@staticmethod
|
1165
|
-
def Show(data,
|
1166
|
-
labels,
|
1167
|
-
title="Training/Validation",
|
1168
|
-
xTitle="Epochs",
|
1169
|
-
xSpacing=1,
|
1170
|
-
yTitle="Accuracy and Loss",
|
1171
|
-
ySpacing=0.1,
|
1172
|
-
useMarkers=False,
|
1173
|
-
chartType="Line",
|
1174
|
-
width=950,
|
1175
|
-
height=500,
|
1176
|
-
backgroundColor='rgba(0,0,0,0)',
|
1177
|
-
gridColor='lightgray',
|
1178
|
-
marginLeft=0,
|
1179
|
-
marginRight=0,
|
1180
|
-
marginTop=40,
|
1181
|
-
marginBottom=0,
|
1182
|
-
renderer = "notebook"):
|
1183
|
-
"""
|
1184
|
-
Shows the data in a plolty graph.
|
1185
|
-
|
1186
|
-
Parameters
|
1187
|
-
----------
|
1188
|
-
data : list
|
1189
|
-
The data to display.
|
1190
|
-
labels : list
|
1191
|
-
The labels to use for the data.
|
1192
|
-
width : int , optional
|
1193
|
-
The desired width of the figure. The default is 950.
|
1194
|
-
height : int , optional
|
1195
|
-
The desired height of the figure. The default is 500.
|
1196
|
-
title : str , optional
|
1197
|
-
The chart title. The default is "Training and Testing Results".
|
1198
|
-
xTitle : str , optional
|
1199
|
-
The X-axis title. The default is "Epochs".
|
1200
|
-
xSpacing : float , optional
|
1201
|
-
The X-axis spacing. The default is 1.0.
|
1202
|
-
yTitle : str , optional
|
1203
|
-
The Y-axis title. The default is "Accuracy and Loss".
|
1204
|
-
ySpacing : float , optional
|
1205
|
-
The Y-axis spacing. The default is 0.1.
|
1206
|
-
useMarkers : bool , optional
|
1207
|
-
If set to True, markers will be displayed. The default is False.
|
1208
|
-
chartType : str , optional
|
1209
|
-
The desired type of chart. The options are "Line", "Bar", or "Scatter". It is case insensitive. The default is "Line".
|
1210
|
-
backgroundColor : str , optional
|
1211
|
-
The desired background color. This can be any plotly color string and may be specified as:
|
1212
|
-
- A hex string (e.g. '#ff0000')
|
1213
|
-
- An rgb/rgba string (e.g. 'rgb(255,0,0)')
|
1214
|
-
- An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
|
1215
|
-
- An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
|
1216
|
-
- A named CSS color.
|
1217
|
-
The default is 'rgba(0,0,0,0)' (transparent).
|
1218
|
-
gridColor : str , optional
|
1219
|
-
The desired grid color. This can be any plotly color string and may be specified as:
|
1220
|
-
- A hex string (e.g. '#ff0000')
|
1221
|
-
- An rgb/rgba string (e.g. 'rgb(255,0,0)')
|
1222
|
-
- An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
|
1223
|
-
- An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
|
1224
|
-
- A named CSS color.
|
1225
|
-
The default is 'lightgray'.
|
1226
|
-
marginLeft : int , optional
|
1227
|
-
The desired left margin in pixels. The default is 0.
|
1228
|
-
marginRight : int , optional
|
1229
|
-
The desired right margin in pixels. The default is 0.
|
1230
|
-
marginTop : int , optional
|
1231
|
-
The desired top margin in pixels. The default is 40.
|
1232
|
-
marginBottom : int , optional
|
1233
|
-
The desired bottom margin in pixels. The default is 0.
|
1234
|
-
renderer : str , optional
|
1235
|
-
The desired plotly renderer. The default is "notebook".
|
1236
|
-
|
1237
|
-
Returns
|
1238
|
-
-------
|
1239
|
-
None.
|
1240
|
-
|
1241
|
-
"""
|
1242
|
-
from topologicpy.Plotly import Plotly
|
1243
1131
|
|
1244
|
-
dataFrame = Plotly.DataByDGL(data, labels)
|
1245
|
-
fig = Plotly.FigureByDataFrame(dataFrame,
|
1246
|
-
labels=labels,
|
1247
|
-
title=title,
|
1248
|
-
xTitle=xTitle,
|
1249
|
-
xSpacing=xSpacing,
|
1250
|
-
yTitle=yTitle,
|
1251
|
-
ySpacing=ySpacing,
|
1252
|
-
useMarkers=useMarkers,
|
1253
|
-
chartType=chartType,
|
1254
|
-
width=width,
|
1255
|
-
height=height,
|
1256
|
-
backgroundColor=backgroundColor,
|
1257
|
-
gridColor=gridColor,
|
1258
|
-
marginRight=marginRight,
|
1259
|
-
marginLeft=marginLeft,
|
1260
|
-
marginTop=marginTop,
|
1261
|
-
marginBottom=marginBottom
|
1262
|
-
)
|
1263
|
-
Plotly.Show(fig, renderer=renderer)
|
1264
1132
|
|
1265
1133
|
|