vanillanets 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,112 @@
1
+ """
2
+ VanillaNets v1.0.0 - A transparent, NumPy-only neural network library.
3
+
4
+ A from-scratch implementation of core neural network components using only Python and NumPy.
5
+ Every component is written explicitly with clarity prioritized over convenience.
6
+
7
+ Main Classes:
8
+ - Model: Sequential model for building neural networks
9
+ - DenseLayer: Fully connected layers
10
+ - Activation functions: Linear, ReLU, LeakyReLU, Tanh, Sigmoid, Softmax
11
+ - Loss functions: BinaryCrossEntropy, CategoricalCrossEntropy, SparseCategoricalCrossEntropy, MeanSquaredError
12
+ - Optimizers: Optimizer_SGD, Optimizer_Adam
13
+ - Metrics: Accuracy, Precision, Recall, F1Score, ConfusionMatrix, R2Score, MAE, RMSE
14
+
15
+ Example Usage:
16
+ >>> from vanillanets import Model, DenseLayer
17
+ >>> from vanillanets.activations import ReLU, Sigmoid
18
+ >>> from vanillanets.losses import BinaryCrossEntropy
19
+ >>> from vanillanets.optimizers import Optimizer_Adam
20
+ >>> from vanillanets.metrics import Accuracy
21
+ >>>
22
+ >>> model = Model()
23
+ >>> model.add(DenseLayer(30, 64))
24
+ >>> model.add(ReLU())
25
+ >>> model.add(DenseLayer(64, 1))
26
+ >>> model.add(Sigmoid())
27
+ >>>
28
+ >>> model.set(
29
+ ... loss=BinaryCrossEntropy(),
30
+ ... optimizer=Optimizer_Adam(learning_rate=0.01),
31
+ ... metrics={'accuracy': Accuracy()}
32
+ ... )
33
+ >>> model.finalize()
34
+ >>>
35
+ >>> model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))
36
+ >>> predictions = model.predict(X_test)
37
+ """
38
+
39
+ __version__ = "1.0.0"
40
+ __author__ = "Umar Balak"
41
+ __license__ = "MIT"
42
+
43
+ # Core model and layer
44
+ from .model import Model
45
+ from .layers import DenseLayer
46
+
47
+ # Activation functions
48
+ from .activations import (
49
+ Linear,
50
+ ReLU,
51
+ LeakyReLU,
52
+ Sigmoid,
53
+ Tanh,
54
+ Softmax,
55
+ )
56
+
57
+ # Loss functions
58
+ from .losses import (
59
+ BinaryCrossEntropy,
60
+ CategoricalCrossEntropy,
61
+ SparseCategoricalCrossEntropy,
62
+ MeanSquaredError,
63
+ )
64
+
65
+ # Optimizers
66
+ from .optimizers import Optimizer_SGD, Optimizer_Adam
67
+
68
+ # Metrics
69
+ from .metrics import (
70
+ Accuracy,
71
+ Precision,
72
+ Recall,
73
+ F1Score,
74
+ ConfusionMatrix,
75
+ R2Score,
76
+ MAE,
77
+ RMSE,
78
+ )
79
+
80
+ # Legacy accuracy
81
+ from .accuracy import Accuracy as Accuracy_Legacy
82
+
83
+ __all__ = [
84
+ # Core
85
+ "Model",
86
+ "DenseLayer",
87
+ # Activations
88
+ "Linear",
89
+ "ReLU",
90
+ "LeakyReLU",
91
+ "Sigmoid",
92
+ "Tanh",
93
+ "Softmax",
94
+ # Losses
95
+ "BinaryCrossEntropy",
96
+ "CategoricalCrossEntropy",
97
+ "SparseCategoricalCrossEntropy",
98
+ "MeanSquaredError",
99
+ # Optimizers
100
+ "Optimizer_SGD",
101
+ "Optimizer_Adam",
102
+ # Metrics
103
+ "Accuracy",
104
+ "Precision",
105
+ "Recall",
106
+ "F1Score",
107
+ "ConfusionMatrix",
108
+ "R2Score",
109
+ "MAE",
110
+ "RMSE",
111
+ "Accuracy_Legacy",
112
+ ]
@@ -0,0 +1,23 @@
1
+ import numpy as np
2
+
3
+ class Accuracy:
4
+ # Calculates accuracy based on predictions and ground truth
5
+ def calculate(self, predictions, y):
6
+ # Handle binary classification (single output)
7
+ if predictions.shape[1] == 1:
8
+ # Binary case: threshold at 0.5
9
+ predictions = (predictions > 0.5) * 1
10
+ predictions = predictions.flatten()
11
+
12
+ # y for binary is shape (n, 1), NOT one-hot -> just flatten
13
+ if len(y.shape) == 2:
14
+ y = y.flatten()
15
+ else:
16
+ # Multiclass case: use argmax
17
+ predictions = np.argmax(predictions, axis=1)
18
+
19
+ # Handle one-hot encoded y
20
+ if len(y.shape) == 2:
21
+ y = np.argmax(y, axis=1)
22
+
23
+ return np.mean(predictions == y)
@@ -0,0 +1,99 @@
1
+ import numpy as np
2
+
3
+ class Linear:
4
+
5
+ def forward(self, inputs):
6
+ self.inputs = inputs
7
+ self.output = inputs
8
+
9
+ def backward(self, dvalues):
10
+ # The derivative is 1, so 1 * dvalues = dvalues
11
+ self.dinputs = dvalues.copy()
12
+
13
+ class Sigmoid:
14
+
15
+ def forward(self, inputs):
16
+ self.inputs = inputs
17
+ self.output = 1 / (1 + np.exp(-inputs))
18
+
19
+ def backward(self, dvalues):
20
+ # Derivative of Sigmoid is: output * (1 - output)
21
+ self.dinputs = dvalues * (1 - self.output) * self.output
22
+
23
+ class ReLU:
24
+
25
+ def forward(self, inputs):
26
+ # Remember inputs for backward pass
27
+ self.inputs = inputs
28
+ self.output = np.maximum(0, inputs)
29
+
30
+ def backward(self, dvalues):
31
+ # Since we need to modify the original variable, we make a copy first
32
+ self.dinputs = dvalues.copy()
33
+
34
+ # Zero gradient where input values were negative
35
+ self.dinputs[self.inputs <= 0] = 0
36
+
37
+
38
+ class LeakyReLU:
39
+
40
+ def forward(self, inputs):
41
+ # Remember inputs for backward pass
42
+ self.inputs = inputs
43
+ self.output = np.where(inputs > 0, inputs, 0.1 * inputs)
44
+
45
+ def backward(self, dvalues):
46
+ # Make a copy of values first
47
+ self.dinputs = dvalues.copy()
48
+
49
+ # Multiply gradient by 0.1 where input values were negative or zero
50
+ self.dinputs[self.inputs <= 0] *= 0.1
51
+
52
+ class Tanh:
53
+ """
54
+ NumPy provides direct, built-in function for Tanh
55
+ because it is a standard mathematical function (hyperbolic tangent),
56
+ just like sine or cosine.
57
+ """
58
+
59
+ def forward(self, inputs):
60
+ """
61
+ # self.output = (np.exp(inputs) - np.exp(-inputs)) / (np.exp(inputs) + np.exp(-inputs))
62
+
63
+ # Manual formula for tanh works mathematically for small/typical inputs
64
+ # but produces nan for large values because np.exp(inputs) overflows (is too large for float storage),
65
+ # and the denominator also overflows, leading to division by infinity or undefined math.
66
+ """
67
+
68
+ # The objective is to utilize NumPy for implementing the neural network, rather than hardcoding operations using pure Python.
69
+ self.inputs = inputs
70
+ self.output = np.tanh(inputs)
71
+
72
+ def backward(self, dvalues):
73
+ # Derivative of Tanh is: 1 - (output)^2
74
+ self.dinputs = dvalues * (1 - self.output ** 2)
75
+
76
+ class Softmax:
77
+
78
+ def forward(self, inputs):
79
+ self.inputs = inputs
80
+ # Get unnormalized probabilities
81
+ exp_values = np.exp(inputs - np.max(inputs, axis=1, keepdims=True))
82
+ # Normalize them for each sample
83
+ probabilities = exp_values / np.sum(exp_values, axis=1, keepdims=True)
84
+ self.output = probabilities
85
+
86
+ def backward(self, dvalues):
87
+ # Create uninitialized array to hold the gradients
88
+ self.dinputs = np.empty_like(dvalues)
89
+
90
+ # Enumerate outputs and gradients
91
+ for index, (single_output, single_dvalues) in enumerate(zip(self.output, dvalues)):
92
+ # Flatten output array
93
+ single_output = single_output.reshape(-1, 1)
94
+
95
+ # Calculate Jacobian matrix of the output
96
+ jacobian_matrix = np.diagflat(single_output) - np.dot(single_output, single_output.T)
97
+
98
+ # Calculate sample-wise gradient and add it to the array of sample gradients
99
+ self.dinputs[index] = np.dot(jacobian_matrix, single_dvalues)
vanillanets/layers.py ADDED
@@ -0,0 +1,79 @@
1
+ import numpy as np
2
+
3
+
4
+ class DenseLayer:
5
+ def __init__(self, n_inputs, n_neurons, *, activation='relu', init='auto',
6
+ distribution='normal', bias_init='zeros', seed=None):
7
+ """
8
+ n_inputs: fan-in
9
+ n_neurons: fan-out
10
+ activation: 'relu', 'leaky_relu', 'tanh', 'sigmoid', 'softmax', 'linear'
11
+ init: 'auto' | 'he' | 'xavier'
12
+ distribution: 'normal' or 'uniform'
13
+ bias_init: 'zeros' or float (small constant)
14
+ seed: optional int for reproducibility
15
+ """
16
+ if seed is not None:
17
+ rng = np.random.default_rng(seed)
18
+ else:
19
+ rng = np.random.default_rng()
20
+
21
+ fan_in, fan_out = n_inputs, n_neurons
22
+
23
+ # choose initializer
24
+ if init == 'auto':
25
+ if activation in ('relu', 'leaky_relu'):
26
+ init = 'he'
27
+ else:
28
+ init = 'xavier'
29
+
30
+ if init == 'he':
31
+ # std = sqrt(2 / fan_in)
32
+ if distribution == 'normal':
33
+ std = np.sqrt(2.0 / fan_in)
34
+ self.weights = rng.normal(0.0, std, size=(fan_in, fan_out))
35
+ else:
36
+ # limit = sqrt(6 / fan_in)
37
+ limit = np.sqrt(6.0 / fan_in)
38
+ self.weights = rng.uniform(-limit, limit, size=(fan_in, fan_out))
39
+
40
+ elif init == 'xavier':
41
+ # normal: std = sqrt(2 / (fan_in + fan_out))
42
+ # uniform: limit = sqrt(6 / (fan_in + fan_out))
43
+ denom = (fan_in + fan_out)
44
+ if distribution == 'normal':
45
+ std = np.sqrt(2.0 / denom)
46
+ self.weights = rng.normal(0.0, std, size=(fan_in, fan_out))
47
+ else:
48
+ limit = np.sqrt(6.0 / denom)
49
+ self.weights = rng.uniform(-limit, limit, size=(fan_in, fan_out))
50
+
51
+ else:
52
+ raise ValueError("init must be 'auto', 'he', or 'xavier'")
53
+
54
+ # biases: prefer zeros; optionally small positive for ReLU to reduce dead units
55
+ if bias_init == 'zeros':
56
+ self.biases = np.zeros((1, fan_out))
57
+ elif isinstance(bias_init, (int, float)):
58
+ self.biases = np.full((1, fan_out), float(bias_init))
59
+ else:
60
+ raise ValueError("bias_init must be 'zeros' or a numeric constant")
61
+
62
+ self.output = None
63
+
64
+ def forward(self, inputs):
65
+ # Remember inputs for backward pass
66
+ self.inputs = inputs
67
+ self.output = np.dot(inputs, self.weights) + self.biases
68
+
69
+ def backward(self, dvalues):
70
+ # Gradients on parameters
71
+ # self.inputs.T is the transposed inputs from the forward pass
72
+ self.dweights = np.dot(self.inputs.T, dvalues)
73
+
74
+ # np.sum with axis=0 calculates the sum of gradients for each bias
75
+ # keepdims=True ensures the output shape matches self.biases (1, n_neurons)
76
+ self.dbiases = np.sum(dvalues, axis=0, keepdims=True)
77
+
78
+ # Gradient on values to pass to the previous layer
79
+ self.dinputs = np.dot(dvalues, self.weights.T)
vanillanets/losses.py ADDED
@@ -0,0 +1,145 @@
1
+ import numpy as np
2
+
3
+ # Common loss class
4
+ class Loss:
5
+
6
+ """
7
+ Calculate mean loss between actual value and predicted value
8
+ """
9
+
10
+ # Calculate the data and regularization losses given model output and ground truth values
11
+ def calculate(self, output, y):
12
+ # Calculate sample losses
13
+ sample_losses = self.forward(output, y)
14
+
15
+ # Calculate mean loss
16
+ data_loss = np.mean(sample_losses)
17
+
18
+ return data_loss
19
+
20
+ class BinaryCrossEntropy(Loss):
21
+
22
+ def forward(self, y_pred, y_true):
23
+ """
24
+ Returns shape (n_samples,) when y_pred and y_true are both shape (n_samples,)
25
+
26
+ """
27
+
28
+ # Clip data to prevent division by 0
29
+ # Clip both sides to not drag mean towards any value
30
+ y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
31
+
32
+ # Binary cross-entropy formula
33
+ sample_losses = -(y_true * np.log(y_pred_clipped) + (1 - y_true) * np.log(1 - y_pred_clipped))
34
+
35
+ return sample_losses
36
+
37
+ def backward(self, dvalues, y_true):
38
+ # Number of samples
39
+ samples = len(dvalues)
40
+
41
+ # Number of outputs in every sample
42
+ outputs = len(dvalues[0])
43
+
44
+ # Clip data to prevent division by 0
45
+ # Clip both sides to not drag mean towards any value
46
+ clipped_dvalues = np.clip(dvalues, 1e-7, 1 - 1e-7)
47
+
48
+ # Calculate gradient
49
+ self.dinputs = -(y_true / clipped_dvalues - (1 - y_true) / (1 - clipped_dvalues)) / outputs
50
+
51
+ # Normalize gradient across the batch
52
+ self.dinputs = self.dinputs / samples
53
+
54
+ # Cross-entropy loss
55
+ class CategoricalCrossEntropy(Loss):
56
+
57
+ def forward(self, y_pred, y_true):
58
+ """
59
+ Returns shape (n_samples,) after extracting correct_confidences
60
+
61
+ """
62
+
63
+ # Number of samples in a batch
64
+ samples = len(y_pred)
65
+
66
+ # Clip data to prevent division by 0
67
+ # Clip both sides to not drag mean towards any value
68
+ y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
69
+
70
+ # Probabilities for target values
71
+ # Only if categorical labels
72
+ if len(y_true.shape) == 1:
73
+ correct_confidences = np.array([y_pred_clipped[i, y_true[i]] for i in range(samples)])
74
+
75
+ # Mask values - only for one-hot encoded labels
76
+ elif len(y_true.shape) == 2:
77
+ correct_confidences = np.sum(
78
+ y_pred_clipped * y_true,
79
+ axis=1
80
+ )
81
+
82
+ # Losses
83
+ negative_log_likelihood = -np.log(correct_confidences)
84
+
85
+ return negative_log_likelihood
86
+
87
+ def backward(self, dvalues, y_true):
88
+ samples = len(dvalues)
89
+ labels = len(dvalues[0])
90
+
91
+ if len(y_true.shape) == 1:
92
+ y_true = np.eye(labels)[y_true]
93
+
94
+ # Clip data to prevent division by 0
95
+ clipped_dvalues = np.clip(dvalues, 1e-7, 1 - 1e-7)
96
+
97
+ # Calculate gradient using the clipped values
98
+ self.dinputs = -y_true / clipped_dvalues
99
+
100
+ # Normalize gradient across the batch
101
+ self.dinputs = self.dinputs / samples
102
+
103
+ # Cross-entropy loss
104
+ class SparseCategoricalCrossEntropy(Loss):
105
+
106
+ def forward(self, y_pred, y_true):
107
+ """
108
+ Returns shape (n_samples,) after extracting correct_confidences
109
+
110
+ """
111
+
112
+ # Number of samples in a batch
113
+ samples = len(y_pred)
114
+
115
+ # Clip data to prevent division by 0
116
+ # Clip both sides to not drag mean towards any value
117
+ y_pred_clipped = np.clip(y_pred, 1e-7, 1 - 1e-7)
118
+
119
+ correct_confidences = np.array([y_pred_clipped[i, y_true[i]] for i in range(samples)])
120
+
121
+ # Losses
122
+ negative_log_likelihood = -np.log(correct_confidences)
123
+
124
+ return negative_log_likelihood
125
+
126
+
127
+ class MeanSquaredError(Loss):
128
+
129
+ def forward(self, y_pred, y_true):
130
+ # Calculate loss
131
+ sample_losses = np.mean((y_true - y_pred)**2, axis=-1)
132
+ return sample_losses
133
+
134
+ def backward(self, dvalues, y_true):
135
+ # Number of samples
136
+ samples = len(dvalues)
137
+
138
+ # Number of outputs in every sample
139
+ outputs = len(dvalues[0])
140
+
141
+ # Gradient on values
142
+ self.dinputs = -2 * (y_true - dvalues) / outputs
143
+
144
+ # Normalize gradient across the batch
145
+ self.dinputs = self.dinputs / samples
vanillanets/metrics.py ADDED
@@ -0,0 +1,131 @@
1
+ import numpy as np
2
+
3
+
4
+ def _to_labels(predictions, y):
5
+ """
6
+ Convert raw model outputs and ground truth into 1D label arrays
7
+ suitable for classification metrics.
8
+
9
+ - predictions: (n, 1) -> threshold at 0.5 -> (n,)
10
+ (n, C) -> argmax over classes -> (n,)
11
+ - y: (n, 1) -> flatten -> (n,) [binary, NOT one-hot]
12
+ (n, C) -> argmax over classes -> (n,) [one-hot, multiclass]
13
+ (n,) -> unchanged
14
+ """
15
+ if predictions.shape[1] == 1:
16
+ predictions = (predictions > 0.5).astype(int).flatten()
17
+ if len(y.shape) == 2:
18
+ y = y.flatten()
19
+ else:
20
+ predictions = np.argmax(predictions, axis=1)
21
+ if len(y.shape) == 2:
22
+ y = np.argmax(y, axis=1)
23
+
24
+ return predictions, y.astype(int)
25
+
26
+
27
+ class Accuracy:
28
+ """Fraction of correct predictions. Works for binary and multiclass."""
29
+
30
+ def calculate(self, predictions, y):
31
+ predictions, y = _to_labels(predictions, y)
32
+ return np.mean(predictions == y)
33
+
34
+
35
+ class Precision:
36
+ """
37
+ Binary precision: TP / (TP + FP).
38
+ For multiclass, computes macro-averaged precision (mean over classes).
39
+ """
40
+
41
+ def calculate(self, predictions, y):
42
+ predictions, y = _to_labels(predictions, y)
43
+ classes = np.unique(np.concatenate([predictions, y]))
44
+
45
+ if len(classes) <= 2:
46
+ tp = np.sum((predictions == 1) & (y == 1))
47
+ fp = np.sum((predictions == 1) & (y == 0))
48
+ return tp / (tp + fp + 1e-7)
49
+
50
+ # macro-average over classes
51
+ scores = []
52
+ for c in classes:
53
+ tp = np.sum((predictions == c) & (y == c))
54
+ fp = np.sum((predictions == c) & (y != c))
55
+ scores.append(tp / (tp + fp + 1e-7))
56
+ return np.mean(scores)
57
+
58
+
59
+ class Recall:
60
+ """
61
+ Binary recall: TP / (TP + FN).
62
+ For multiclass, computes macro-averaged recall (mean over classes).
63
+ """
64
+
65
+ def calculate(self, predictions, y):
66
+ predictions, y = _to_labels(predictions, y)
67
+ classes = np.unique(np.concatenate([predictions, y]))
68
+
69
+ if len(classes) <= 2:
70
+ tp = np.sum((predictions == 1) & (y == 1))
71
+ fn = np.sum((predictions == 0) & (y == 1))
72
+ return tp / (tp + fn + 1e-7)
73
+
74
+ scores = []
75
+ for c in classes:
76
+ tp = np.sum((predictions == c) & (y == c))
77
+ fn = np.sum((predictions != c) & (y == c))
78
+ scores.append(tp / (tp + fn + 1e-7))
79
+ return np.mean(scores)
80
+
81
+
82
+ class F1Score:
83
+ """Harmonic mean of precision and recall."""
84
+
85
+ def calculate(self, predictions, y):
86
+ p = Precision().calculate(predictions, y)
87
+ r = Recall().calculate(predictions, y)
88
+ return 2 * p * r / (p + r + 1e-7)
89
+
90
+
91
+ class ConfusionMatrix:
92
+ """
93
+ Returns an (n_classes, n_classes) integer matrix where
94
+ rows = true labels, columns = predicted labels.
95
+
96
+ num_classes can be passed explicitly (recommended), otherwise
97
+ it is inferred from the data (max label + 1), which may miss
98
+ classes absent from a given batch.
99
+ """
100
+
101
+ def calculate(self, predictions, y, num_classes=None):
102
+ predictions, y = _to_labels(predictions, y)
103
+ n = num_classes if num_classes is not None else int(max(predictions.max(), y.max())) + 1
104
+
105
+ cm = np.zeros((n, n), dtype=int)
106
+ for true_label, pred_label in zip(y, predictions):
107
+ cm[true_label, pred_label] += 1
108
+ return cm
109
+
110
+
111
+ class R2Score:
112
+ """Coefficient of determination for regression. 1.0 is a perfect fit."""
113
+
114
+ def calculate(self, y_pred, y_true):
115
+ ss_res = np.sum((y_true - y_pred) ** 2)
116
+ ss_tot = np.sum((y_true - np.mean(y_true)) ** 2)
117
+ return 1 - ss_res / (ss_tot + 1e-7)
118
+
119
+
120
+ class MAE:
121
+ """Mean Absolute Error for regression."""
122
+
123
+ def calculate(self, y_pred, y_true):
124
+ return np.mean(np.abs(y_true - y_pred))
125
+
126
+
127
+ class RMSE:
128
+ """Root Mean Squared Error for regression."""
129
+
130
+ def calculate(self, y_pred, y_true):
131
+ return np.sqrt(np.mean((y_true - y_pred) ** 2))
vanillanets/model.py ADDED
@@ -0,0 +1,172 @@
1
+ import numpy as np
2
+ from vanillanets.activations import Softmax
3
+ from vanillanets.losses import CategoricalCrossEntropy
4
+ from vanillanets.softmax_loss import Activation_Softmax_Loss_CategoricalCrossentropy
5
+
6
+
7
+ class Model:
8
+
9
+ def __init__(self):
10
+ # Create a list of network objects
11
+ self.layers = []
12
+ # Catch-all object for the fast Softmax+CrossEntropy backward pass
13
+ self.softmax_classifier_output = None
14
+ self.metrics = {}
15
+
16
+ # Add objects to the model
17
+ def add(self, layer):
18
+ self.layers.append(layer)
19
+
20
+ # Set loss, optimizer and metrics
21
+ def set(self, *, loss, optimizer, accuracy=None, metrics=None):
22
+ """
23
+ loss: a loss instance (CategoricalCrossEntropy, BinaryCrossEntropy, MeanSquaredError)
24
+ optimizer: an optimizer instance
25
+ accuracy: (legacy) single metric object with a .calculate(predictions, y) method.
26
+ Kept for backward compatibility - automatically folded into `metrics`.
27
+ metrics: dict of {name: metric_object} or list of metric objects.
28
+ Each metric object must implement .calculate(predictions, y).
29
+ Examples: Accuracy(), Precision(), Recall(), F1Score(),
30
+ R2Score(), MAE(), RMSE()
31
+ """
32
+ self.loss = loss
33
+ self.optimizer = optimizer
34
+
35
+ self.metrics = {}
36
+
37
+ # Backward-compatible single 'accuracy' metric
38
+ if accuracy is not None:
39
+ self.metrics['accuracy'] = accuracy
40
+
41
+ # New flexible metrics interface
42
+ if metrics is not None:
43
+ if isinstance(metrics, dict):
44
+ self.metrics.update(metrics)
45
+ else:
46
+ # list/tuple of metric objects -> derive names from class names
47
+ for m in metrics:
48
+ name = type(m).__name__.lower()
49
+ self.metrics[name] = m
50
+
51
+ # Finalize the model setup
52
+ def finalize(self):
53
+ # If the last layer is Softmax and the loss is Categorical Cross-Entropy,
54
+ # we create the combined object for a much faster backward pass
55
+ if self.loss is not None and isinstance(self.layers[-1], Softmax) and \
56
+ isinstance(self.loss, CategoricalCrossEntropy):
57
+ self.softmax_classifier_output = Activation_Softmax_Loss_CategoricalCrossentropy()
58
+
59
+ # Run a forward pass through all layers and return final output
60
+ def predict(self, X):
61
+ layer_input = X
62
+ for layer in self.layers:
63
+ layer.forward(layer_input)
64
+ layer_input = layer.output
65
+ return layer_input
66
+
67
+ # Compute loss and all configured metrics for a given dataset
68
+ # without performing any backward pass / parameter updates
69
+ def evaluate(self, X, y):
70
+ predictions = self.predict(X)
71
+ loss_value = self.loss.calculate(predictions, y)
72
+
73
+ results = {}
74
+ for name, metric in self.metrics.items():
75
+ results[name] = metric.calculate(predictions, y)
76
+
77
+ return loss_value, results
78
+
79
+ # Train the model
80
+ def fit(self, X, y, *, epochs=1, print_every=100, validation_data=None):
81
+ """
82
+ validation_data: optional (X_val, y_val) tuple. If provided, validation
83
+ loss and metrics are computed (without affecting training) and printed
84
+ alongside training stats.
85
+ """
86
+
87
+ # Main training loop
88
+ for epoch in range(1, epochs + 1):
89
+
90
+ # --- FORWARD PASS ---
91
+ # The initial input is our training data
92
+ layer_input = X
93
+
94
+ # Forward pass through all layers in the list
95
+ for layer in self.layers:
96
+ layer.forward(layer_input)
97
+ # The output of this layer becomes the input of the next layer
98
+ layer_input = layer.output
99
+
100
+ # Calculate loss from the output of the final layer
101
+ data_loss = self.loss.calculate(layer_input, y)
102
+
103
+ # --- METRICS ---
104
+ metric_results = {}
105
+ for name, metric in self.metrics.items():
106
+ metric_results[name] = metric.calculate(layer_input, y)
107
+
108
+ # --- BACKWARD PASS ---
109
+
110
+ # Check if we are using the fast Softmax+CCE combination
111
+ if self.softmax_classifier_output is not None:
112
+ # Do the fast backward pass
113
+ self.softmax_classifier_output.backward(layer_input, y)
114
+ # The gradient to pass back comes from this fused object
115
+ dinputs = self.softmax_classifier_output.dinputs
116
+
117
+ # We safely ignore the standalone Softmax layer for the backward loop
118
+ layers_to_backprop = self.layers[:-1]
119
+
120
+ else:
121
+ # ONLY if we aren't using the shortcut, calculate standalone loss gradient
122
+ self.loss.backward(layer_input, y)
123
+ dinputs = self.loss.dinputs
124
+
125
+ # Backpropagate through all layers normally
126
+ layers_to_backprop = self.layers
127
+
128
+ # Loop backward through the remaining layers
129
+ for layer in reversed(layers_to_backprop):
130
+ layer.backward(dinputs)
131
+ dinputs = layer.dinputs
132
+
133
+ # --- OPTIMIZATION ---
134
+ self.optimizer.pre_update_lr()
135
+
136
+ # We only update parameters for layers that actually have weights (DenseLayers)
137
+ for layer in self.layers:
138
+ if hasattr(layer, 'weights'):
139
+ self.optimizer.update_params(layer)
140
+
141
+ # Increment iteration counter
142
+ self.optimizer.post_update_params()
143
+
144
+ # --- VALIDATION (optional) ---
145
+ # Run after backward/update so this forward pass doesn't
146
+ # clobber the cached layer state used during backprop.
147
+ val_loss = None
148
+ val_metric_results = {}
149
+ if validation_data is not None:
150
+ X_val, y_val = validation_data
151
+ val_loss, val_metric_results = self.evaluate(X_val, y_val)
152
+
153
+ # Print status updates
154
+ if not epoch % print_every:
155
+ metric_str = ', '.join(
156
+ f'{name}: {value:.3f}' for name, value in metric_results.items()
157
+ )
158
+ line = f'epoch: {epoch}'
159
+ if metric_str:
160
+ line += f', {metric_str}'
161
+ line += f', loss: {data_loss:.3f}'
162
+ line += f', lr: {self.optimizer.current_learning_rate:.6f}'
163
+
164
+ if validation_data is not None:
165
+ val_metric_str = ', '.join(
166
+ f'val_{name}: {value:.3f}' for name, value in val_metric_results.items()
167
+ )
168
+ line += f', val_loss: {val_loss:.3f}'
169
+ if val_metric_str:
170
+ line += f', {val_metric_str}'
171
+
172
+ print(line)
@@ -0,0 +1,109 @@
1
+ import numpy as np
2
+
3
+ class Optimizer_SGD:
4
+
5
+ # Initialize optimizer - set learning rate, decay, and momentum
6
+ def __init__(self, learning_rate=1.0, decay=0., momentum=0.):
7
+ self.learning_rate = learning_rate
8
+ self.current_learning_rate = learning_rate
9
+ self.decay = decay
10
+ self.iterations = 0
11
+ self.momentum = momentum
12
+
13
+ # Call once before any parameter updates
14
+ def pre_update_lr(self):
15
+ # If we have a decay rate, calculate the decayed learning rate
16
+ if self.decay:
17
+ self.current_learning_rate = self.learning_rate * (1. / (1. + self.decay * self.iterations))
18
+
19
+ # Update parameters
20
+ def update_params(self, layer):
21
+
22
+ # If we use momentum
23
+ if self.momentum:
24
+
25
+ # If layer does not contain momentum arrays, create them filled with zeros
26
+ if not hasattr(layer, 'weight_momentums'):
27
+ layer.weight_momentums = np.zeros_like(layer.weights)
28
+ layer.bias_momentums = np.zeros_like(layer.biases)
29
+
30
+ # Build weight updates with momentum - take previous updates multiplied by retain factor and update with current gradients
31
+ weight_updates = \
32
+ self.momentum * layer.weight_momentums - \
33
+ self.current_learning_rate * layer.dweights
34
+
35
+ # Save the updates for the next iteration
36
+ layer.weight_momentums = weight_updates
37
+
38
+ # Build bias updates with momentum
39
+ bias_updates = \
40
+ self.momentum * layer.bias_momentums - \
41
+ self.current_learning_rate * layer.dbiases
42
+
43
+ layer.bias_momentums = bias_updates
44
+
45
+ # Vanilla SGD updates if momentum is 0
46
+ else:
47
+ weight_updates = -self.current_learning_rate * layer.dweights
48
+ bias_updates = -self.current_learning_rate * layer.dbiases
49
+
50
+ # Update weights and biases using either vanilla or momentum updates
51
+ layer.weights += weight_updates
52
+ layer.biases += bias_updates
53
+
54
+ # Call once after any parameter updates
55
+ def post_update_params(self):
56
+ self.iterations += 1
57
+
58
+
59
+ class Optimizer_Adam:
60
+
61
+ # Initialize optimizer - set parameters to Adam's standard defaults
62
+ def __init__(self, learning_rate=0.001, decay=0., epsilon=1e-7, beta_1=0.9, beta_2=0.999):
63
+ self.learning_rate = learning_rate
64
+ self.current_learning_rate = learning_rate
65
+ self.decay = decay
66
+ self.iterations = 0
67
+ self.epsilon = epsilon
68
+ self.beta_1 = beta_1
69
+ self.beta_2 = beta_2
70
+
71
+ # Call once before any parameter updates
72
+ def pre_update_lr(self):
73
+ if self.decay:
74
+ self.current_learning_rate = self.learning_rate * (1. / (1. + self.decay * self.iterations))
75
+
76
+ # Update parameters
77
+ def update_params(self, layer):
78
+
79
+ # If layer does not contain cache arrays, create them filled with zeros
80
+ if not hasattr(layer, 'weight_cache'):
81
+ layer.weight_momentums = np.zeros_like(layer.weights)
82
+ layer.weight_cache = np.zeros_like(layer.weights)
83
+ layer.bias_momentums = np.zeros_like(layer.biases)
84
+ layer.bias_cache = np.zeros_like(layer.biases)
85
+
86
+ # --- Update momentum with current gradients ---
87
+ layer.weight_momentums = self.beta_1 * layer.weight_momentums + (1 - self.beta_1) * layer.dweights
88
+ layer.bias_momentums = self.beta_1 * layer.bias_momentums + (1 - self.beta_1) * layer.dbiases
89
+
90
+ # Get corrected momentum (to account for the zero initialization bias at the start of training)
91
+ # self.iteration is 0 at first pass, so we add 1
92
+ weight_momentums_corrected = layer.weight_momentums / (1 - self.beta_1 ** (self.iterations + 1))
93
+ bias_momentums_corrected = layer.bias_momentums / (1 - self.beta_1 ** (self.iterations + 1))
94
+
95
+ # --- Update cache with squared current gradients ---
96
+ layer.weight_cache = self.beta_2 * layer.weight_cache + (1 - self.beta_2) * layer.dweights**2
97
+ layer.bias_cache = self.beta_2 * layer.bias_cache + (1 - self.beta_2) * layer.dbiases**2
98
+
99
+ # Get corrected cache
100
+ weight_cache_corrected = layer.weight_cache / (1 - self.beta_2 ** (self.iterations + 1))
101
+ bias_cache_corrected = layer.bias_cache / (1 - self.beta_2 ** (self.iterations + 1))
102
+
103
+ # --- Perform the actual parameter updates ---
104
+ layer.weights += -self.current_learning_rate * weight_momentums_corrected / (np.sqrt(weight_cache_corrected) + self.epsilon)
105
+ layer.biases += -self.current_learning_rate * bias_momentums_corrected / (np.sqrt(bias_cache_corrected) + self.epsilon)
106
+
107
+ # Call once after any parameter updates
108
+ def post_update_params(self):
109
+ self.iterations += 1
@@ -0,0 +1,41 @@
1
+ import numpy as np
2
+ from vanillanets.activations import Softmax
3
+ from vanillanets.losses import CategoricalCrossEntropy
4
+
5
+ class Activation_Softmax_Loss_CategoricalCrossentropy:
6
+ """
7
+ Combined Softmax activation and cross-entropy loss for faster backward step
8
+ """
9
+ def __init__(self):
10
+ self.activation = Softmax()
11
+ self.loss = CategoricalCrossEntropy()
12
+
13
+ # Forward pass
14
+ def forward(self, inputs, y_true):
15
+ # Output layer's activation function
16
+ self.activation.forward(inputs)
17
+ # Set the output
18
+ self.output = self.activation.output
19
+ # Calculate and return loss value
20
+ return self.loss.calculate(self.output, y_true)
21
+
22
+ # Backward pass
23
+ def backward(self, dvalues, y_true):
24
+ # Number of samples
25
+ samples = len(dvalues)
26
+
27
+ # If labels are one-hot encoded, turn them into discrete values
28
+ if len(y_true.shape) == 2:
29
+ y_true = np.argmax(y_true, axis=1)
30
+
31
+ # Copy so we can safely modify
32
+ self.dinputs = dvalues.copy()
33
+
34
+ # Calculate gradient (predicted probability - true label)
35
+ # We subtract 1 from the predicted probability at the index of the true label
36
+ self.dinputs[range(samples), y_true] -= 1
37
+
38
+ # Normalize gradient
39
+ # If we don't normalize, larger batch sizes will result in larger gradients,
40
+ # making training unstable.
41
+ self.dinputs = self.dinputs / samples
@@ -0,0 +1,383 @@
1
+ Metadata-Version: 2.4
2
+ Name: vanillanets
3
+ Version: 1.0.0
4
+ Summary: A transparent, NumPy-only neural network library for learning and experimentation.
5
+ Home-page: https://github.com/UmarBalak/vanillanets
6
+ Author: Umar Balak
7
+ Author-email: Umar Balak <umarbalak35@gmail.com>
8
+ License: MIT
9
+ Project-URL: Homepage, https://github.com/UmarBalak/vanillanets
10
+ Project-URL: Documentation, https://github.com/UmarBalak/vanillanets#readme
11
+ Project-URL: Repository, https://github.com/UmarBalak/vanillanets
12
+ Project-URL: Bug Tracker, https://github.com/UmarBalak/vanillanets/issues
13
+ Keywords: neural-network,deep-learning,machine-learning,numpy,education,from-scratch
14
+ Classifier: Development Status :: 5 - Production/Stable
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Education
17
+ Classifier: Intended Audience :: Science/Research
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Natural Language :: English
20
+ Classifier: Operating System :: OS Independent
21
+ Classifier: Programming Language :: Python :: 3
22
+ Classifier: Programming Language :: Python :: 3.8
23
+ Classifier: Programming Language :: Python :: 3.9
24
+ Classifier: Programming Language :: Python :: 3.10
25
+ Classifier: Programming Language :: Python :: 3.11
26
+ Classifier: Programming Language :: Python :: 3.12
27
+ Classifier: Topic :: Education
28
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
29
+ Requires-Python: >=3.8
30
+ Description-Content-Type: text/markdown
31
+ License-File: LICENSE
32
+ Requires-Dist: numpy>=2.3.3
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=7.0; extra == "dev"
35
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
36
+ Dynamic: author
37
+ Dynamic: home-page
38
+ Dynamic: license-file
39
+ Dynamic: requires-python
40
+
41
+ # VanillaNets v1.0.0
42
+
43
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8%2B-blue)](https://www.python.org/downloads/)
44
+ [![NumPy](https://img.shields.io/badge/dependency-numpy%202.3.3%2B-green)](https://numpy.org/)
45
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](LICENSE)
46
+ [![Status: Production Ready ✓](https://img.shields.io/badge/status-production%20ready-success)]()
47
+
48
+ **A transparent, NumPy-only neural network library designed for learning and experimentation.**
49
+
50
+ VanillaNets is a from-scratch implementation of core neural network components using only Python and NumPy. Every component is written explicitly with clarity prioritized over convenience, making the entire system transparent, easy to inspect, and perfect for understanding how neural networks operate under the hood.
51
+
52
+ Whether you're a student learning fundamentals, a researcher prototyping new ideas, or an educator building curriculum, VanillaNets provides a crystal-clear window into neural network mechanics without framework abstractions.
53
+
54
+ ---
55
+
56
+ ## Features
57
+
58
+ ### Core Architecture
59
+ * **Dense Layers** - Fully connected layers with efficient forward and backward passes
60
+ * **Advanced Weight Initialization** - He, Xavier (Glorot), normal and uniform distributions for optimized training
61
+ * **Flexible Bias Initialization** - Zeros or small positive constants to reduce dead units
62
+
63
+ ### Activation Functions (with derivatives)
64
+ * Linear, ReLU & LeakyReLU
65
+ * Tanh & Sigmoid
66
+ * Softmax (with fused Softmax+CrossEntropy backward pass optimization)
67
+
68
+ ### Loss Functions
69
+ * Binary Cross-Entropy (for binary classification)
70
+ * Categorical Cross-Entropy (for multiclass classification)
71
+ * Sparse Categorical Cross-Entropy (for integer-encoded labels)
72
+ * Mean Squared Error (for regression)
73
+
74
+ ### Optimizers
75
+ * **SGD** - Stochastic Gradient Descent with momentum and learning rate decay
76
+ * **Adam** - Adaptive Moment Estimation with adaptive learning rates per parameter
77
+
78
+ ### Metrics & Evaluation
79
+ * **Classification:** Accuracy, Precision, Recall, F1 Score, Confusion Matrix
80
+ * **Regression:** R² Score, Mean Absolute Error (MAE), Root Mean Squared Error (RMSE)
81
+
82
+ ### Model API
83
+ * Sequential model building (`model.add()`)
84
+ * Flexible metrics interface (single metric or multiple metrics as dict/list)
85
+ * Training with `fit()` and optional validation data
86
+ * Inference with `predict()`
87
+ * Batch evaluation with `evaluate()`
88
+
89
+ ### Performance Optimizations
90
+ * Fused Softmax + Categorical Cross-Entropy backward pass (faster training)
91
+ * Efficient NumPy vectorization throughout
92
+ * Memory-conscious layer implementations
93
+
94
+ ---
95
+
96
+ ## Installation
97
+
98
+ ### From PyPI (Recommended)
99
+
100
+ Install directly from PyPI:
101
+
102
+ ```bash
103
+ pip install vanillanets
104
+ ```
105
+
106
+ ### From Source
107
+
108
+ Clone the repository and install in development mode:
109
+
110
+ ```bash
111
+ git clone https://github.com/UmarBalak/vanillanets.git
112
+ cd vanillanets
113
+ pip install -e .
114
+ ```
115
+
116
+ Or install with development dependencies:
117
+
118
+ ```bash
119
+ pip install -e ".[dev]"
120
+ ```
121
+
122
+ ### Requirements
123
+
124
+ - **Python:** 3.8 or higher
125
+ - **NumPy:** 2.3.3+ (for efficient numerical computation)
126
+
127
+ ### Verify Installation
128
+
129
+ ```python
130
+ import vanillanets
131
+ print(f"VanillaNets {vanillanets.__version__} installed successfully!")
132
+
133
+ # Import core components
134
+ from vanillanets import Model, DenseLayer
135
+ from vanillanets.activations import ReLU, Sigmoid
136
+ from vanillanets.losses import BinaryCrossEntropy
137
+ from vanillanets.optimizers import Optimizer_Adam
138
+ from vanillanets.metrics import Accuracy
139
+
140
+ print("✓ All modules imported successfully!")
141
+ ```
142
+
143
+ ---
144
+
145
+ ## Quick Start
146
+
147
+ ### Example 1: Binary Classification
148
+
149
+ ```python
150
+ from vanillanets import Model, DenseLayer, Optimizer_Adam
151
+ from vanillanets.activations import ReLU, Sigmoid
152
+ from vanillanets.losses import BinaryCrossEntropy
153
+ from vanillanets.metrics import Accuracy
154
+
155
+ # Build model
156
+ model = Model()
157
+ model.add(DenseLayer(30, 64))
158
+ model.add(ReLU())
159
+ model.add(DenseLayer(64, 1))
160
+ model.add(Sigmoid())
161
+
162
+ # Compile with loss, optimizer, and metrics
163
+ model.set(
164
+ loss=BinaryCrossEntropy(),
165
+ optimizer=Optimizer_Adam(learning_rate=0.01),
166
+ metrics={'accuracy': Accuracy()}
167
+ )
168
+ model.finalize()
169
+
170
+ # Train the model
171
+ model.fit(X_train, y_train, epochs=100, print_every=10,
172
+ validation_data=(X_val, y_val))
173
+
174
+ # Evaluate on test set
175
+ loss, metrics = model.evaluate(X_test, y_test)
176
+ print(f"Test Loss: {loss:.4f}, Accuracy: {metrics['accuracy']:.4f}")
177
+
178
+ # Make predictions
179
+ predictions = model.predict(X_new)
180
+ ```
181
+
182
+ ### Example 2: Multiclass Classification
183
+
184
+ ```python
185
+ from vanillanets import Model, DenseLayer, Optimizer_Adam
186
+ from vanillanets.activations import ReLU, Softmax
187
+ from vanillanets.losses import CategoricalCrossEntropy
188
+ from vanillanets.metrics import Accuracy
189
+
190
+ # Build model
191
+ model = Model()
192
+ model.add(DenseLayer(784, 128))
193
+ model.add(ReLU())
194
+ model.add(DenseLayer(128, 64))
195
+ model.add(ReLU())
196
+ model.add(DenseLayer(64, 10))
197
+ model.add(Softmax())
198
+
199
+ # Compile
200
+ model.set(
201
+ loss=CategoricalCrossEntropy(),
202
+ optimizer=Optimizer_Adam(learning_rate=0.05),
203
+ metrics={'accuracy': Accuracy()}
204
+ )
205
+ model.finalize()
206
+
207
+ # Train
208
+ model.fit(X_train, y_train, epochs=50, print_every=5)
209
+ ```
210
+
211
+ ### Example 3: Regression
212
+
213
+ ```python
214
+ from vanillanets import Model, DenseLayer, Optimizer_Adam
215
+ from vanillanets.activations import Linear, ReLU
216
+ from vanillanets.losses import MeanSquaredError
217
+ from vanillanets.metrics import RMSE, MAE
218
+
219
+ # Build model
220
+ model = Model()
221
+ model.add(DenseLayer(8, 64))
222
+ model.add(ReLU())
223
+ model.add(DenseLayer(64, 1))
224
+ model.add(Linear())
225
+
226
+ # Compile with multiple metrics
227
+ model.set(
228
+ loss=MeanSquaredError(),
229
+ optimizer=Optimizer_Adam(learning_rate=0.01),
230
+ metrics={'rmse': RMSE(), 'mae': MAE()}
231
+ )
232
+ model.finalize()
233
+
234
+ # Train and evaluate
235
+ model.fit(X_train, y_train, epochs=100, validation_data=(X_val, y_val))
236
+ loss, metrics = model.evaluate(X_test, y_test)
237
+ print(f"Test RMSE: {metrics['rmse']:.4f}, MAE: {metrics['mae']:.4f}")
238
+ ```
239
+
240
+ ---
241
+
242
+ ## Examples
243
+
244
+ Full working examples included:
245
+ * `binary_classification.py` - Breast cancer classification
246
+ * `multiclass_classification.py` - Handwritten digit recognition
247
+ * `regression.py` - California housing price prediction
248
+
249
+ Run any example:
250
+ ```bash
251
+ python binary_classification.py
252
+ python multiclass_classification.py
253
+ python regression.py
254
+ ```
255
+
256
+ ---
257
+
258
+ ## Testing
259
+
260
+ Run the comprehensive test suite (requires pytest):
261
+
262
+ ```bash
263
+ pip install pytest
264
+ pytest tests/ -v
265
+ ```
266
+
267
+ Or run tests with coverage:
268
+
269
+ ```bash
270
+ pip install pytest pytest-cov
271
+ pytest tests/ -v --cov=vanillanets
272
+ ```
273
+
274
+ ### Test Coverage
275
+
276
+ Comprehensive unit and integration tests cover:
277
+ - ✓ All activation functions (Linear, Sigmoid, ReLU, LeakyReLU, Tanh, Softmax) and their derivatives
278
+ - ✓ All loss functions (BCE, CCE, SparseCCE, MSE) with gradient validation
279
+ - ✓ Dense layer forward/backward passes
280
+ - ✓ Optimizer updates (SGD momentum, Adam adaptive rates)
281
+ - ✓ Fused Softmax+CrossEntropy optimization
282
+ - ✓ All metrics (classification & regression)
283
+ - ✓ Model training, evaluation, and prediction workflows
284
+ - ✓ Edge cases and numerical stability
285
+
286
+ ---
287
+
288
+ ## Design Philosophy
289
+
290
+ VanillaNets is built on the principle that **understanding requires transparency**:
291
+
292
+ - **No magic** ✓ Every computation is explicit; no hidden state or black-box frameworks
293
+ - **Learn by reading** ✓ Source code is the primary documentation
294
+ - **Experimentation-friendly** ✓ Modify any component without framework constraints
295
+ - **Pure NumPy** ✓ No external dependencies beyond NumPy for core functionality
296
+ - **Production-ready** ✓ Full test coverage, efficient implementations, stable API
297
+
298
+ ## Use Cases
299
+
300
+ - **Education:** Perfect for coursework on neural networks and deep learning
301
+ - **Research Prototyping:** Experiment with new loss functions, activations, or optimization strategies
302
+ - **Interview Prep:** Implement solutions from scratch during ML engineering interviews
303
+ - **Curriculum Development:** Build course materials with fully transparent implementations
304
+ - **Algorithmic Learning:** Understand backpropagation, gradient descent, and optimizer mechanics
305
+
306
+ ---
307
+
308
+ ## Project Status
309
+
310
+ ### v1.0.0 - Production Ready
311
+
312
+ **Fully Implemented & Tested:**
313
+ - ✓ Dense layer implementation with He, Xavier, normal, and uniform weight initialization
314
+ - ✓ All activation functions with proper gradient computation (Linear, ReLU, LeakyReLU, Tanh, Sigmoid, Softmax)
315
+ - ✓ All loss functions with backward passes (BCE, CCE, SparseCCE, MSE)
316
+ - ✓ SGD optimizer with momentum and learning rate decay
317
+ - ✓ Adam optimizer with adaptive learning rates
318
+ - ✓ Comprehensive metrics suite (Accuracy, Precision, Recall, F1, Confusion Matrix, R², MAE, RMSE)
319
+ - ✓ Full Model API (add, set, finalize, predict, evaluate, fit)
320
+ - ✓ Fused Softmax+CrossEntropy optimization for faster training
321
+ - ✓ Extensive test coverage (50+ test cases)
322
+ - ✓ Complete example applications (binary classification, multiclass classification, regression)
323
+ - ✓ Validation data support during training
324
+
325
+ ### Future Enhancements (Post-v1.0)
326
+ - Convolutional (Conv2D) layers with pooling
327
+ - Recurrent layers (LSTM, GRU)
328
+ - Batch normalization and layer normalization
329
+ - Dropout regularization
330
+ - Custom layer support through base class
331
+ - Learning rate scheduling
332
+ - Distributed training utilities (multi-GPU)
333
+ - Quantization and pruning support
334
+
335
+ ---
336
+
337
+ ## License
338
+
339
+ MIT License - See [LICENSE](LICENSE) for full details.
340
+
341
+ You are free to use, modify, and distribute this software for any purpose (commercial or personal) with proper attribution.
342
+
343
+ ---
344
+
345
+ ## Acknowledgments
346
+
347
+ VanillaNets was built with a singular mission: to demystify neural networks for learners everywhere. This library stands on the shoulders of foundational work in deep learning by pioneers like Yann LeCun, Geoffrey Hinton, Yoshua Bengio, and the broader machine learning community.
348
+
349
+ Special thanks to:
350
+ - The NumPy team for creating an incredible numerical computing foundation
351
+ - All educators who emphasize understanding over black-box frameworks
352
+ - Contributors and users who provide feedback and improvements
353
+
354
+ ## Citation
355
+
356
+ If you use VanillaNets in your research or teaching, please cite:
357
+
358
+ ```bibtex
359
+ @software{vanillanets2026,
360
+ title={VanillaNets: A Transparent Neural Network Library},
361
+ author={Umar Balak},
362
+ year={2026},
363
+ url={https://github.com/UmarBalak/vanillanets}
364
+ }
365
+ ```
366
+
367
+ ---
368
+
369
+ ## Resources & References
370
+
371
+ ### Recommended Reading
372
+ - **Neural Networks from Scratch in Python**, co-authored by Harrison Kinsley and Daniel Kukieła
373
+ - **Deep Learning** by Goodfellow, I., Bengio, Y., & Courville, A.
374
+ - **Hands-On Machine Learning with Scikit-Learn, Keras, and TensorFlow, 3rd Edition** by Aurélien Géron
375
+
376
+
377
+ ### Related Projects
378
+ - [NumPy](https://numpy.org/) - Our computational foundation
379
+ - [3blue1brown Neural Network Series](https://www.youtube.com/watch?v=aircAruvnKk) - Visual learning guide
380
+
381
+ ---
382
+
383
+ **Built with ❤️ for learners by learners.**
@@ -0,0 +1,14 @@
1
+ vanillanets/__init__.py,sha256=ywI6UnQa1AVEyGSxv2OKjmP1Obx54Fskh8XWKEU8jCk,2814
2
+ vanillanets/accuracy.py,sha256=x5Qb7XvTaojrY4Vd1OEz992ecup6LNMDCPXW7JH7DXM,806
3
+ vanillanets/activations.py,sha256=X028DOX9tKw3tMDHBOd0vtDfVyeXje1wu4LXY5YSR_M,3544
4
+ vanillanets/layers.py,sha256=kJ72oq2B_8cACxog6WAzRM2F7svgF9IwkgL_XyNVrPs,3074
5
+ vanillanets/losses.py,sha256=7GQ4L1sG_YUESPpLMgs5YIUqmrW3jOKAvhEgccBj6a0,4413
6
+ vanillanets/metrics.py,sha256=nAJ28aYXDGNr3HZTwAPwrQc-h69Zep63iX2lPycsQnc,4180
7
+ vanillanets/model.py,sha256=m_uRH3IxSY3iE6BSRQscwbR5mFAxPjBJ2MOrchfkQv4,7007
8
+ vanillanets/optimizers.py,sha256=dowIBxR0di44yF9DWTar9XKVA4AeO3X-OhnTV7UV7RA,4925
9
+ vanillanets/softmax_loss.py,sha256=MHctKr2e95f9FURuMihbXnZb0LiuunuF6_YaIjlcc5U,1498
10
+ vanillanets-1.0.0.dist-info/licenses/LICENSE,sha256=Pkze_eXRJG35hZ53fp3bvOwIEJdIZUUY7FHJFT1YMtQ,1086
11
+ vanillanets-1.0.0.dist-info/METADATA,sha256=9h6vbuR_uajR0BqDG84O6k6cy2Shx7YgglRxag39wJY,12836
12
+ vanillanets-1.0.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
13
+ vanillanets-1.0.0.dist-info/top_level.txt,sha256=6S4aQN1FXxrD2H-PBo2DVAPLgRhXnpYpJ_6X9mNgSnw,12
14
+ vanillanets-1.0.0.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (82.0.1)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Umar Balak
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ vanillanets