sorix 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sorix-1.0.0/LICENSE +21 -0
- sorix-1.0.0/PKG-INFO +150 -0
- sorix-1.0.0/README.md +136 -0
- sorix-1.0.0/pyproject.toml +28 -0
- sorix-1.0.0/setup.cfg +4 -0
- sorix-1.0.0/sorix/__init__.py +11 -0
- sorix-1.0.0/sorix/clustering/__init__.py +1 -0
- sorix-1.0.0/sorix/clustering/k_means.py +186 -0
- sorix-1.0.0/sorix/cuda/__init__.py +1 -0
- sorix-1.0.0/sorix/cuda/cuda.py +52 -0
- sorix-1.0.0/sorix/cupy/cupy.py +8 -0
- sorix-1.0.0/sorix/datasets/__init__.py +3 -0
- sorix-1.0.0/sorix/datasets/dataloader.py +17 -0
- sorix-1.0.0/sorix/datasets/dataset.py +21 -0
- sorix-1.0.0/sorix/datasets/train_test.py +48 -0
- sorix-1.0.0/sorix/metrics/__init__.py +1 -0
- sorix-1.0.0/sorix/metrics/metrics.py +87 -0
- sorix-1.0.0/sorix/nn/__init__.py +3 -0
- sorix-1.0.0/sorix/nn/layers.py +161 -0
- sorix-1.0.0/sorix/nn/loss.py +70 -0
- sorix-1.0.0/sorix/nn/net.py +94 -0
- sorix-1.0.0/sorix/optim/__init__.py +1 -0
- sorix-1.0.0/sorix/optim/optim.py +92 -0
- sorix-1.0.0/sorix/preprocessing/__init__.py +3 -0
- sorix-1.0.0/sorix/preprocessing/enconders.py +57 -0
- sorix-1.0.0/sorix/preprocessing/scalers.py +122 -0
- sorix-1.0.0/sorix/preprocessing/transformers.py +52 -0
- sorix-1.0.0/sorix/tensor/tensor.py +403 -0
- sorix-1.0.0/sorix/utils/math.py +148 -0
- sorix-1.0.0/sorix/utils/utils.py +227 -0
- sorix-1.0.0/sorix.egg-info/PKG-INFO +150 -0
- sorix-1.0.0/sorix.egg-info/SOURCES.txt +33 -0
- sorix-1.0.0/sorix.egg-info/dependency_links.txt +1 -0
- sorix-1.0.0/sorix.egg-info/requires.txt +5 -0
- sorix-1.0.0/sorix.egg-info/top_level.txt +1 -0
sorix-1.0.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2021 Mitchell-Mirano
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
sorix-1.0.0/PKG-INFO
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: sorix
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: A minimalistic framework for building neural networks in Python.
|
|
5
|
+
Author-email: Mitchell Mirano <mitchellmirano25@gmail.com>
|
|
6
|
+
Requires-Python: >=3.12
|
|
7
|
+
Description-Content-Type: text/markdown
|
|
8
|
+
License-File: LICENSE
|
|
9
|
+
Requires-Dist: numpy>=2.0
|
|
10
|
+
Provides-Extra: cp13
|
|
11
|
+
Requires-Dist: cupy-cuda13x>=13.0; extra == "cp13"
|
|
12
|
+
Requires-Dist: numpy<3.0,>=2.0; extra == "cp13"
|
|
13
|
+
Dynamic: license-file
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# **Sorix**
|
|
17
|
+
|
|
18
|
+
sorix: is a library of Artificial Intelligence for
|
|
19
|
+
beginners.
|
|
20
|
+
|
|
21
|
+

|
|
22
|
+
|
|
23
|
+
It provides a **NumPy/CuPy-based backend** for handling tensors on both **CPU and GPU**, making it ideal for beginners who want to learn how frameworks like PyTorch work internally.
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## **Install**
|
|
28
|
+
- With pip
|
|
29
|
+
```bash
|
|
30
|
+
pip install sorix
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Or with [Poetry](https://python-poetry.org/):
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
poetry add sorix
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Or with [UV](https://docs.astral.sh/uv/guides/install-python/)
|
|
40
|
+
```bash
|
|
41
|
+
uv add sorix
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
---
|
|
45
|
+
|
|
46
|
+
## ⚡ **Quick Start**
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
### Autograd Example
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from sorix import tensor
|
|
53
|
+
|
|
54
|
+
# Create tensors with gradient tracking
|
|
55
|
+
x = tensor([2.0], requires_grad=True)
|
|
56
|
+
w = tensor([3.0], requires_grad=True)
|
|
57
|
+
b = tensor([1.0], requires_grad=True)
|
|
58
|
+
|
|
59
|
+
# Define a simple function: y = w*x + b
|
|
60
|
+
y = w * x + b
|
|
61
|
+
|
|
62
|
+
# Compute gradients via backpropagation
|
|
63
|
+
y.backward()
|
|
64
|
+
|
|
65
|
+
print("dy/dx:", x.grad) # → should be w = 3
|
|
66
|
+
print("dy/dw:", w.grad) # → should be x = 2
|
|
67
|
+
print("dy/db:", b.grad) # → should be 1
|
|
68
|
+
```
|
|
69
|
+
---
|
|
70
|
+
|
|
71
|
+
### 🔢 Linear Regression Example
|
|
72
|
+
|
|
73
|
+
```python
|
|
74
|
+
import numpy as np
|
|
75
|
+
from sorix import tensor
|
|
76
|
+
from sorix.nn import Linear, MSELoss
|
|
77
|
+
from sorix.optim import SGD
|
|
78
|
+
|
|
79
|
+
# 🎯 Generate synthetic data (y = 3x + 2 + noise)
|
|
80
|
+
X = np.linspace(-1, 1, 100).reshape(-1, 1)
|
|
81
|
+
y = 3 * X + 2 + 0.1 * np.random.randn(*X.shape)
|
|
82
|
+
|
|
83
|
+
# Convert to sorix tensors (CPU, use device="cuda" for GPU)
|
|
84
|
+
X_tensor = tensor(X, device="cpu")
|
|
85
|
+
y_tensor = tensor(y, device="cpu")
|
|
86
|
+
|
|
87
|
+
# Define model, loss, and optimizer
|
|
88
|
+
features, outputs = 1, 1
|
|
89
|
+
model = Linear(features, outputs)
|
|
90
|
+
criterion = MSELoss()
|
|
91
|
+
optimizer = SGD(model.parameters(), lr=0.1)
|
|
92
|
+
|
|
93
|
+
# 🏋️ Training loop
|
|
94
|
+
for epoch in range(200):
|
|
95
|
+
# Forward pass
|
|
96
|
+
y_pred = model(X_tensor)
|
|
97
|
+
loss = criterion(y_pred, y_tensor)
|
|
98
|
+
|
|
99
|
+
# Backward pass
|
|
100
|
+
optimizer.zero_grad()
|
|
101
|
+
loss.backward()
|
|
102
|
+
optimizer.step()
|
|
103
|
+
|
|
104
|
+
# Print progress every 20 epochs
|
|
105
|
+
if (epoch + 1) % 20 == 0:
|
|
106
|
+
print(f"Epoch [{epoch+1}/200] - Loss: {loss.item():.4f}")
|
|
107
|
+
|
|
108
|
+
# ✅ Final learned parameters
|
|
109
|
+
print("Learned weight:", model.coef_)
|
|
110
|
+
print("Learned bias:", model.intercept_)
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
---
|
|
114
|
+
|
|
115
|
+
## 📖 **Documentation & Examples**
|
|
116
|
+
|
|
117
|
+
Explore the interactive examples:
|
|
118
|
+
|
|
119
|
+
* [1 - Tensor Basics](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/basics/1-tensor.ipynb)
|
|
120
|
+
* [2 - Regression](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/nn/1-regression.ipynb)
|
|
121
|
+
* [3 - Neural Network Layers](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/basics/2-layers.ipynb)
|
|
122
|
+
|
|
123
|
+
👉 More examples available in the [examples folder](https://github.com/Mitchell-Mirano/sorix/tree/main/examples).
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
## 🛠️ **Project Status**
|
|
128
|
+
|
|
129
|
+
sorix is **under active development** 🚧.
|
|
130
|
+
New features are being added frequently, including:
|
|
131
|
+
|
|
132
|
+
* More neural network layers.
|
|
133
|
+
* Better GPU support.
|
|
134
|
+
* Extended autograd functionality.
|
|
135
|
+
|
|
136
|
+
You can contribute by:
|
|
137
|
+
|
|
138
|
+
* Reporting issues
|
|
139
|
+
* Adding new features
|
|
140
|
+
* Improving documentation
|
|
141
|
+
* Writing tests
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
## 📌 **Links**
|
|
146
|
+
|
|
147
|
+
* [PyPI Package](https://pypi.org/project/sorix/)
|
|
148
|
+
* [GitHub Repository](https://github.com/Mitchell-Mirano/sorix)
|
|
149
|
+
|
|
150
|
+
---
|
sorix-1.0.0/README.md
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
|
|
2
|
+
# **Sorix**
|
|
3
|
+
|
|
4
|
+
sorix: is a library of Artificial Intelligence for
|
|
5
|
+
beginners.
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
It provides a **NumPy/CuPy-based backend** for handling tensors on both **CPU and GPU**, making it ideal for beginners who want to learn how frameworks like PyTorch work internally.
|
|
10
|
+
|
|
11
|
+
---
|
|
12
|
+
|
|
13
|
+
## **Install**
|
|
14
|
+
- With pip
|
|
15
|
+
```bash
|
|
16
|
+
pip install sorix
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Or with [Poetry](https://python-poetry.org/):
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
poetry add sorix
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
Or with [UV](https://docs.astral.sh/uv/guides/install-python/)
|
|
26
|
+
```bash
|
|
27
|
+
uv add sorix
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
---
|
|
31
|
+
|
|
32
|
+
## ⚡ **Quick Start**
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
### Autograd Example
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
from sorix import tensor
|
|
39
|
+
|
|
40
|
+
# Create tensors with gradient tracking
|
|
41
|
+
x = tensor([2.0], requires_grad=True)
|
|
42
|
+
w = tensor([3.0], requires_grad=True)
|
|
43
|
+
b = tensor([1.0], requires_grad=True)
|
|
44
|
+
|
|
45
|
+
# Define a simple function: y = w*x + b
|
|
46
|
+
y = w * x + b
|
|
47
|
+
|
|
48
|
+
# Compute gradients via backpropagation
|
|
49
|
+
y.backward()
|
|
50
|
+
|
|
51
|
+
print("dy/dx:", x.grad) # → should be w = 3
|
|
52
|
+
print("dy/dw:", w.grad) # → should be x = 2
|
|
53
|
+
print("dy/db:", b.grad) # → should be 1
|
|
54
|
+
```
|
|
55
|
+
---
|
|
56
|
+
|
|
57
|
+
### 🔢 Linear Regression Example
|
|
58
|
+
|
|
59
|
+
```python
|
|
60
|
+
import numpy as np
|
|
61
|
+
from sorix import tensor
|
|
62
|
+
from sorix.nn import Linear, MSELoss
|
|
63
|
+
from sorix.optim import SGD
|
|
64
|
+
|
|
65
|
+
# 🎯 Generate synthetic data (y = 3x + 2 + noise)
|
|
66
|
+
X = np.linspace(-1, 1, 100).reshape(-1, 1)
|
|
67
|
+
y = 3 * X + 2 + 0.1 * np.random.randn(*X.shape)
|
|
68
|
+
|
|
69
|
+
# Convert to sorix tensors (CPU, use device="cuda" for GPU)
|
|
70
|
+
X_tensor = tensor(X, device="cpu")
|
|
71
|
+
y_tensor = tensor(y, device="cpu")
|
|
72
|
+
|
|
73
|
+
# Define model, loss, and optimizer
|
|
74
|
+
features, outputs = 1, 1
|
|
75
|
+
model = Linear(features, outputs)
|
|
76
|
+
criterion = MSELoss()
|
|
77
|
+
optimizer = SGD(model.parameters(), lr=0.1)
|
|
78
|
+
|
|
79
|
+
# 🏋️ Training loop
|
|
80
|
+
for epoch in range(200):
|
|
81
|
+
# Forward pass
|
|
82
|
+
y_pred = model(X_tensor)
|
|
83
|
+
loss = criterion(y_pred, y_tensor)
|
|
84
|
+
|
|
85
|
+
# Backward pass
|
|
86
|
+
optimizer.zero_grad()
|
|
87
|
+
loss.backward()
|
|
88
|
+
optimizer.step()
|
|
89
|
+
|
|
90
|
+
# Print progress every 20 epochs
|
|
91
|
+
if (epoch + 1) % 20 == 0:
|
|
92
|
+
print(f"Epoch [{epoch+1}/200] - Loss: {loss.item():.4f}")
|
|
93
|
+
|
|
94
|
+
# ✅ Final learned parameters
|
|
95
|
+
print("Learned weight:", model.coef_)
|
|
96
|
+
print("Learned bias:", model.intercept_)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
---
|
|
100
|
+
|
|
101
|
+
## 📖 **Documentation & Examples**
|
|
102
|
+
|
|
103
|
+
Explore the interactive examples:
|
|
104
|
+
|
|
105
|
+
* [1 - Tensor Basics](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/basics/1-tensor.ipynb)
|
|
106
|
+
* [2 - Regression](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/nn/1-regression.ipynb)
|
|
107
|
+
* [3 - Neural Network Layers](https://github.com/Mitchell-Mirano/sorix/blob/main/examples/basics/2-layers.ipynb)
|
|
108
|
+
|
|
109
|
+
👉 More examples available in the [examples folder](https://github.com/Mitchell-Mirano/sorix/tree/main/examples).
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## 🛠️ **Project Status**
|
|
114
|
+
|
|
115
|
+
sorix is **under active development** 🚧.
|
|
116
|
+
New features are being added frequently, including:
|
|
117
|
+
|
|
118
|
+
* More neural network layers.
|
|
119
|
+
* Better GPU support.
|
|
120
|
+
* Extended autograd functionality.
|
|
121
|
+
|
|
122
|
+
You can contribute by:
|
|
123
|
+
|
|
124
|
+
* Reporting issues
|
|
125
|
+
* Adding new features
|
|
126
|
+
* Improving documentation
|
|
127
|
+
* Writing tests
|
|
128
|
+
|
|
129
|
+
---
|
|
130
|
+
|
|
131
|
+
## 📌 **Links**
|
|
132
|
+
|
|
133
|
+
* [PyPI Package](https://pypi.org/project/sorix/)
|
|
134
|
+
* [GitHub Repository](https://github.com/Mitchell-Mirano/sorix)
|
|
135
|
+
|
|
136
|
+
---
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "sorix"
|
|
3
|
+
version = "1.0.0"
|
|
4
|
+
description = "A minimalistic framework for building neural networks in Python."
|
|
5
|
+
authors = [
|
|
6
|
+
{name = "Mitchell Mirano", email = "mitchellmirano25@gmail.com"},
|
|
7
|
+
]
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
requires-python = ">=3.12"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"numpy>=2.0",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[project.optional-dependencies]
|
|
15
|
+
|
|
16
|
+
# Para CUDA 13.x
|
|
17
|
+
cp13 = [
|
|
18
|
+
"cupy-cuda13x>=13.0",
|
|
19
|
+
"numpy>=2.0,<3.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[dependency-groups]
|
|
23
|
+
dev = [
|
|
24
|
+
"joblib>=1.5.2",
|
|
25
|
+
"jupyter>=1.1.1",
|
|
26
|
+
"matplotlib>=3.10.6",
|
|
27
|
+
"seaborn>=0.13.2",
|
|
28
|
+
]
|
sorix-1.0.0/setup.cfg
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
from .tensor.tensor import tensor,no_grad
|
|
2
|
+
from .cuda import cuda
|
|
3
|
+
from .utils.utils import sigmoid,softmax,argmax
|
|
4
|
+
from .utils.utils import (as_tensor,from_numpy,
|
|
5
|
+
zeros, ones,full,eye,diag,empty,
|
|
6
|
+
arange,linspace, logspace,
|
|
7
|
+
rand, randn,randint,randperm,
|
|
8
|
+
zeros_like,ones_like,empty_like,full_like,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
from .utils.math import (sin,cos,tanh,exp,log,sqrt,mean,sum)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .k_means import Kmeans
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import numpy as np
|
|
3
|
+
import pandas as pd
|
|
4
|
+
from typing import Union
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class Kmeans:
|
|
8
|
+
|
|
9
|
+
"""K-means algorithm
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
n_centroids : int
|
|
14
|
+
Number of centroids.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, n_centroids:int):
|
|
18
|
+
self.n_centroids = n_centroids
|
|
19
|
+
self.centroids = None
|
|
20
|
+
self.features_names = None
|
|
21
|
+
self.history_train = {}
|
|
22
|
+
self.labels = None
|
|
23
|
+
|
|
24
|
+
def _data_preprocessing_train(self,
|
|
25
|
+
features:Union[pd.DataFrame, np.ndarray],
|
|
26
|
+
history_train:bool) -> np.ndarray:
|
|
27
|
+
|
|
28
|
+
if isinstance(features, pd.DataFrame):
|
|
29
|
+
self.features_names = features.columns.to_list()
|
|
30
|
+
|
|
31
|
+
features_train = features.to_numpy() if isinstance(features, pd.DataFrame) else features
|
|
32
|
+
|
|
33
|
+
self.centroids = features_train[np.random.randint(0, len(features_train), self.n_centroids)]
|
|
34
|
+
if history_train:
|
|
35
|
+
self.history_train[0] = self.centroids
|
|
36
|
+
return features_train
|
|
37
|
+
|
|
38
|
+
def _distances(self, features:np.ndarray, centroids:np.ndarray) -> np.ndarray:
|
|
39
|
+
distances_features_centroides = []
|
|
40
|
+
for feature in features:
|
|
41
|
+
distances_feature = []
|
|
42
|
+
for centroid in centroids:
|
|
43
|
+
distances_feature.append(np.linalg.norm(centroid - feature))
|
|
44
|
+
distances_features_centroides.append(distances_feature)
|
|
45
|
+
return np.array(distances_features_centroides)
|
|
46
|
+
|
|
47
|
+
def _new_labels(self, distances:np.ndarray) -> np.ndarray:
|
|
48
|
+
new_labels = np.array([np.where(distance == np.min(distance))[0][0] for distance in distances])
|
|
49
|
+
return new_labels
|
|
50
|
+
|
|
51
|
+
def _new_centroids(self, features:np.ndarray, labels:np.array) -> np.ndarray:
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
new_centroids = []
|
|
55
|
+
for label in set(labels):
|
|
56
|
+
filter = np.where(labels == label)
|
|
57
|
+
vectors = features[filter]
|
|
58
|
+
media_vectors = vectors.sum(axis=0) / len(vectors)
|
|
59
|
+
new_centroids.append(media_vectors)
|
|
60
|
+
return np.array(new_centroids)
|
|
61
|
+
|
|
62
|
+
def _moviment(self, centroids_before:np.ndarray, centroids_after:np.ndarray) -> float:
|
|
63
|
+
moviments = []
|
|
64
|
+
for centroid_bf, centroid_af in zip(centroids_before, centroids_after):
|
|
65
|
+
moviments.append(np.linalg.norm(centroid_bf - centroid_af))
|
|
66
|
+
return np.mean(moviments)
|
|
67
|
+
|
|
68
|
+
def train(self,
|
|
69
|
+
features:Union[pd.DataFrame, np.ndarray],
|
|
70
|
+
moviment_limit:float=0.0001,
|
|
71
|
+
max_iters:int=300,
|
|
72
|
+
history_train:bool=False) -> None:
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
Train the k-means algorithm
|
|
76
|
+
|
|
77
|
+
Parameters
|
|
78
|
+
----------
|
|
79
|
+
features : Union[pd.DataFrame, np.ndarray]
|
|
80
|
+
Features to train.
|
|
81
|
+
moviment_limit : float, optional
|
|
82
|
+
Limit of moviment. The default is 0.0001.
|
|
83
|
+
max_iters : int, optional
|
|
84
|
+
Maximum iterations. The default is 300.
|
|
85
|
+
history_train : bool, optional
|
|
86
|
+
Save history of train. The default is False.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
features_train = self._data_preprocessing_train(features, history_train)
|
|
90
|
+
iters = 0
|
|
91
|
+
while True:
|
|
92
|
+
iters += 1
|
|
93
|
+
distances = self._distances(features_train, self.centroids)
|
|
94
|
+
self.labels = self._new_labels(distances)
|
|
95
|
+
centroids_before = self.centroids
|
|
96
|
+
self.centroids = self._new_centroids(features_train, self.labels)
|
|
97
|
+
moviment = self._moviment(centroids_before, self.centroids)
|
|
98
|
+
if history_train:
|
|
99
|
+
self.history_train[iters] = self.centroids
|
|
100
|
+
print('Iter: {} \t {} \t moviment: {:.3f}'.format(iters, 50 * '=' + '>', moviment))
|
|
101
|
+
if moviment < moviment_limit:
|
|
102
|
+
break
|
|
103
|
+
if iters > max_iters:
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
def predict(self, features:Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
|
|
107
|
+
|
|
108
|
+
"""
|
|
109
|
+
Predict the labels of features
|
|
110
|
+
|
|
111
|
+
Parameters
|
|
112
|
+
----------
|
|
113
|
+
features : Union[pd.DataFrame, np.ndarray]
|
|
114
|
+
Features to predict.
|
|
115
|
+
|
|
116
|
+
Returns
|
|
117
|
+
-------
|
|
118
|
+
np.ndarray
|
|
119
|
+
Predicted labels.
|
|
120
|
+
"""
|
|
121
|
+
|
|
122
|
+
features = features.to_numpy() if isinstance(features, pd.DataFrame) else features
|
|
123
|
+
|
|
124
|
+
distances = self._distances(features, self.centroids)
|
|
125
|
+
labels = self._new_labels(distances)
|
|
126
|
+
return labels
|
|
127
|
+
|
|
128
|
+
def get_distances(self, features:Union[pd.DataFrame, np.ndarray]) -> np.ndarray:
|
|
129
|
+
|
|
130
|
+
"""
|
|
131
|
+
Get distances between features and centroids
|
|
132
|
+
|
|
133
|
+
Parameters
|
|
134
|
+
----------
|
|
135
|
+
features : Union[pd.DataFrame, np.ndarray]
|
|
136
|
+
Features to predict.
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
np.ndarray
|
|
141
|
+
Distances between features and centroids.
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
features = features.to_numpy() if isinstance(features, pd.DataFrame) else features
|
|
145
|
+
|
|
146
|
+
return self._distances(features, self.centroids)
|
|
147
|
+
|
|
148
|
+
def get_inertia(self, features:Union[pd.DataFrame, np.ndarray]) -> float:
|
|
149
|
+
|
|
150
|
+
"""
|
|
151
|
+
Get inertia of features for k-centroids
|
|
152
|
+
|
|
153
|
+
Parameters
|
|
154
|
+
----------
|
|
155
|
+
features : Union[pd.DataFrame, np.ndarray]
|
|
156
|
+
Features to predict.
|
|
157
|
+
|
|
158
|
+
Returns
|
|
159
|
+
-------
|
|
160
|
+
float
|
|
161
|
+
Inertia of features.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
features = features.to_numpy() if isinstance(features, pd.DataFrame) else features
|
|
165
|
+
distances = self._distances(features, self.centroids)
|
|
166
|
+
labels = self._new_labels(distances)
|
|
167
|
+
|
|
168
|
+
inertia = 0
|
|
169
|
+
for label in set(labels):
|
|
170
|
+
filter = np.where(labels == label)
|
|
171
|
+
vectors = features[filter]
|
|
172
|
+
inertia += np.sum((vectors - self.centroids[label])**2)
|
|
173
|
+
return inertia
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def __str__(self):
|
|
177
|
+
|
|
178
|
+
text = f"""
|
|
179
|
+
model: {self.__class__.__name__} \n
|
|
180
|
+
n_centroids: {self.n_centroids} \n
|
|
181
|
+
"""
|
|
182
|
+
return text
|
|
183
|
+
|
|
184
|
+
def __repr__(self):
|
|
185
|
+
return self.__str__()
|
|
186
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .cuda import is_available
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
from sorix.cupy.cupy import _cupy_available
|
|
2
|
+
|
|
3
|
+
def is_available(verbose: bool = True) -> bool:
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
if not _cupy_available:
|
|
7
|
+
if verbose:
|
|
8
|
+
print("❌ CuPy is not installed.")
|
|
9
|
+
return False
|
|
10
|
+
|
|
11
|
+
import cupy as cp
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
n_gpus = cp.cuda.runtime.getDeviceCount()
|
|
15
|
+
if n_gpus == 0:
|
|
16
|
+
if verbose:
|
|
17
|
+
print("❌ No CUDA devices found.")
|
|
18
|
+
return False
|
|
19
|
+
|
|
20
|
+
# Test 1: operación simple en GPU (memoria + aritmética básica)
|
|
21
|
+
try:
|
|
22
|
+
_ = int((cp.arange(5) * 2).sum())
|
|
23
|
+
print("✅ GPU basic operation passed")
|
|
24
|
+
except Exception as e:
|
|
25
|
+
if verbose:
|
|
26
|
+
print("❌ GPU basic operation failed:", e)
|
|
27
|
+
return False
|
|
28
|
+
|
|
29
|
+
# Test 2: operación que fuerza cuBLAS (matmul)
|
|
30
|
+
try:
|
|
31
|
+
A = cp.random.rand(4, 4)
|
|
32
|
+
B = cp.random.rand(4, 4)
|
|
33
|
+
_ = int((A @ B).sum()) # fuerza cuBLAS
|
|
34
|
+
except Exception as e:
|
|
35
|
+
if verbose:
|
|
36
|
+
print("❌ cuBLAS/linear algebra operation failed:", e)
|
|
37
|
+
return False
|
|
38
|
+
|
|
39
|
+
if verbose:
|
|
40
|
+
props = cp.cuda.runtime.getDeviceProperties(0)
|
|
41
|
+
print(f"✅ GPU available: {props['name'].decode('utf-8')}")
|
|
42
|
+
print(f"CUDA runtime version: {cp.cuda.runtime.runtimeGetVersion()}")
|
|
43
|
+
print(f"CuPy version: {cp.__version__}")
|
|
44
|
+
|
|
45
|
+
return True
|
|
46
|
+
|
|
47
|
+
except Exception as e:
|
|
48
|
+
if verbose:
|
|
49
|
+
print("❌ Error while checking CuPy/CUDA:", e)
|
|
50
|
+
return False
|
|
51
|
+
|
|
52
|
+
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
class DataLoader:
|
|
2
|
+
def __init__(self, dataset, batch_size=16):
|
|
3
|
+
self.dataset = dataset
|
|
4
|
+
self.batch_size = batch_size
|
|
5
|
+
|
|
6
|
+
def __iter__(self):
|
|
7
|
+
|
|
8
|
+
for i in range(0, len(self.dataset), self.batch_size):
|
|
9
|
+
yield self.dataset[i:i+self.batch_size]
|
|
10
|
+
|
|
11
|
+
def __len__(self):
|
|
12
|
+
|
|
13
|
+
ln = len(self.dataset) // self.batch_size
|
|
14
|
+
if len(self.dataset) % self.batch_size != 0:
|
|
15
|
+
ln += 1
|
|
16
|
+
return ln
|
|
17
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
class Dataset:
|
|
2
|
+
def __init__(self, X, y):
|
|
3
|
+
self.X = X
|
|
4
|
+
self.y = y
|
|
5
|
+
|
|
6
|
+
def __len__(self):
|
|
7
|
+
return len(self.X)
|
|
8
|
+
|
|
9
|
+
def __getitem__(self, idx):
|
|
10
|
+
|
|
11
|
+
return self.X[idx], self.y[idx]
|
|
12
|
+
|
|
13
|
+
def __setitem__(self, idx, value):
|
|
14
|
+
self.X[idx], self.y[idx] = value
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def __str__(self):
|
|
18
|
+
return f"Dataset(\nX=\n{self.X}, y={self.y})"
|
|
19
|
+
|
|
20
|
+
def __repr__(self):
|
|
21
|
+
return self.__str__()
|