torch-device-manager 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_device_manager-0.1.0/LICENSE +21 -0
- torch_device_manager-0.1.0/MANIFEST.in +4 -0
- torch_device_manager-0.1.0/PKG-INFO +184 -0
- torch_device_manager-0.1.0/README.md +145 -0
- torch_device_manager-0.1.0/examples/training_example.py +148 -0
- torch_device_manager-0.1.0/requirements.txt +1 -0
- torch_device_manager-0.1.0/setup.cfg +4 -0
- torch_device_manager-0.1.0/setup.py +39 -0
- torch_device_manager-0.1.0/tests/__init__.py +0 -0
- torch_device_manager-0.1.0/torch_device_manager/__init__.py +119 -0
- torch_device_manager-0.1.0/torch_device_manager/py.typed +0 -0
- torch_device_manager-0.1.0/torch_device_manager.egg-info/PKG-INFO +184 -0
- torch_device_manager-0.1.0/torch_device_manager.egg-info/SOURCES.txt +14 -0
- torch_device_manager-0.1.0/torch_device_manager.egg-info/dependency_links.txt +1 -0
- torch_device_manager-0.1.0/torch_device_manager.egg-info/requires.txt +1 -0
- torch_device_manager-0.1.0/torch_device_manager.egg-info/top_level.txt +2 -0
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2025 Ali B.M.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,184 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: torch-device-manager
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A PyTorch device manager for automatic hardware detection and memory optimization
|
5
|
+
Home-page: https://github.com/yourusername/torch-device-manager
|
6
|
+
Author: Ali B.M.
|
7
|
+
Author-email: mainabukarali@gmail.com
|
8
|
+
Project-URL: Bug Reports, https://github.com/TempCoder82/torch-device-manager/issues
|
9
|
+
Project-URL: Source, https://github.com/TempCoder82/torch-device-manager
|
10
|
+
Keywords: pytorch,cuda,mps,device,memory,optimization,machine learning
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
22
|
+
Classifier: Operating System :: OS Independent
|
23
|
+
Requires-Python: >=3.8
|
24
|
+
Description-Content-Type: text/markdown
|
25
|
+
License-File: LICENSE
|
26
|
+
Requires-Dist: torch>=2.1.1
|
27
|
+
Dynamic: author
|
28
|
+
Dynamic: author-email
|
29
|
+
Dynamic: classifier
|
30
|
+
Dynamic: description
|
31
|
+
Dynamic: description-content-type
|
32
|
+
Dynamic: home-page
|
33
|
+
Dynamic: keywords
|
34
|
+
Dynamic: license-file
|
35
|
+
Dynamic: project-url
|
36
|
+
Dynamic: requires-dist
|
37
|
+
Dynamic: requires-python
|
38
|
+
Dynamic: summary
|
39
|
+
|
40
|
+
# Torch Device Manager
|
41
|
+
|
42
|
+
A lightweight PyTorch utility for automatic hardware detection and memory optimization across different devices (CPU, CUDA, MPS).
|
43
|
+
|
44
|
+
## Features
|
45
|
+
|
46
|
+
- 🔍 **Automatic Device Detection**: Detects the best available hardware (CUDA, Apple Silicon MPS, or CPU)
|
47
|
+
- 🧠 **Memory Optimization**: Automatically adjusts batch sizes and gradient accumulation based on available memory
|
48
|
+
- ⚡ **Mixed Precision Support**: Optional automatic mixed precision with gradient scaling
|
49
|
+
- 📊 **Memory Monitoring**: Real-time memory usage tracking and logging
|
50
|
+
- 🛡️ **Fallback Protection**: Graceful fallback to CPU when requested devices aren't available
|
51
|
+
|
52
|
+
## Installation
|
53
|
+
|
54
|
+
```bash
|
55
|
+
pip install torch-device-manager
|
56
|
+
```
|
57
|
+
|
58
|
+
## Quick Start
|
59
|
+
|
60
|
+
```python
|
61
|
+
from torch_device_manager import DeviceManager
|
62
|
+
import torch
|
63
|
+
|
64
|
+
# Initialize device manager (auto-detects best device)
|
65
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
66
|
+
|
67
|
+
# Get the torch device
|
68
|
+
device = device_manager.get_device()
|
69
|
+
|
70
|
+
# Move your model to the optimal device
|
71
|
+
model = YourModel().to(device)
|
72
|
+
|
73
|
+
# Optimize batch size based on available memory
|
74
|
+
optimal_batch_size, gradient_steps = device_manager.optimize_for_memory(
|
75
|
+
model=model,
|
76
|
+
batch_size=32
|
77
|
+
)
|
78
|
+
|
79
|
+
print(f"Using device: {device}")
|
80
|
+
print(f"Optimized batch size: {optimal_batch_size}")
|
81
|
+
print(f"Gradient accumulation steps: {gradient_steps}")
|
82
|
+
```
|
83
|
+
|
84
|
+
## Usage in Training Scripts
|
85
|
+
|
86
|
+
### Basic Integration
|
87
|
+
|
88
|
+
```python
|
89
|
+
import torch
|
90
|
+
import torch.nn as nn
|
91
|
+
from torch_device_manager import DeviceManager
|
92
|
+
|
93
|
+
def train_model():
|
94
|
+
# Initialize device manager
|
95
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
96
|
+
device = device_manager.get_device()
|
97
|
+
|
98
|
+
# Setup model
|
99
|
+
model = YourModel().to(device)
|
100
|
+
optimizer = torch.optim.Adam(model.parameters())
|
101
|
+
|
102
|
+
# Optimize memory usage
|
103
|
+
batch_size, gradient_steps = device_manager.optimize_for_memory(model, 32)
|
104
|
+
|
105
|
+
# Training loop
|
106
|
+
for epoch in range(num_epochs):
|
107
|
+
for batch_idx, (data, target) in enumerate(dataloader):
|
108
|
+
data, target = data.to(device), target.to(device)
|
109
|
+
|
110
|
+
# Use mixed precision if available
|
111
|
+
if device_manager.mixed_precision and device_manager.scaler:
|
112
|
+
with torch.cuda.amp.autocast():
|
113
|
+
output = model(data)
|
114
|
+
loss = criterion(output, target)
|
115
|
+
|
116
|
+
device_manager.scaler.scale(loss).backward()
|
117
|
+
|
118
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
119
|
+
device_manager.scaler.step(optimizer)
|
120
|
+
device_manager.scaler.update()
|
121
|
+
optimizer.zero_grad()
|
122
|
+
else:
|
123
|
+
output = model(data)
|
124
|
+
loss = criterion(output, target)
|
125
|
+
loss.backward()
|
126
|
+
|
127
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
128
|
+
optimizer.step()
|
129
|
+
optimizer.zero_grad()
|
130
|
+
|
131
|
+
# Log memory usage
|
132
|
+
device_manager.log_memory_usage()
|
133
|
+
```
|
134
|
+
|
135
|
+
### Advanced Usage
|
136
|
+
|
137
|
+
```python
|
138
|
+
from torch_device_manager import DeviceManager
|
139
|
+
|
140
|
+
# Force specific device
|
141
|
+
device_manager = DeviceManager(device="cuda", mixed_precision=False)
|
142
|
+
|
143
|
+
# Check memory info
|
144
|
+
memory_info = device_manager.get_memory_info()
|
145
|
+
print(f"Available memory: {memory_info}")
|
146
|
+
|
147
|
+
# Manual memory optimization
|
148
|
+
if memory_info.get("free_gb", 0) < 2.0:
|
149
|
+
print("Low memory detected, reducing batch size")
|
150
|
+
batch_size = 4
|
151
|
+
```
|
152
|
+
|
153
|
+
## API Reference
|
154
|
+
|
155
|
+
### DeviceManager
|
156
|
+
|
157
|
+
#### Constructor
|
158
|
+
- `device` (str, default="auto"): Device to use ("auto", "cuda", "mps", "cpu")
|
159
|
+
- `mixed_precision` (bool, default=True): Enable mixed precision training
|
160
|
+
|
161
|
+
#### Methods
|
162
|
+
- `get_device()`: Returns torch.device object
|
163
|
+
- `get_memory_info()`: Returns memory information dict
|
164
|
+
- `log_memory_usage()`: Logs current memory usage
|
165
|
+
- `optimize_for_memory(model, batch_size)`: Returns optimized (batch_size, gradient_steps)
|
166
|
+
|
167
|
+
## Device Support
|
168
|
+
|
169
|
+
- **CUDA**: Full support with memory optimization and mixed precision
|
170
|
+
- **Apple Silicon (MPS)**: Basic support with conservative memory settings
|
171
|
+
- **CPU**: Fallback support with optimized batch sizes
|
172
|
+
|
173
|
+
## Requirements
|
174
|
+
|
175
|
+
- Python >= 3.8
|
176
|
+
- PyTorch >= 2.1.1
|
177
|
+
|
178
|
+
## License
|
179
|
+
|
180
|
+
MIT License
|
181
|
+
|
182
|
+
## Contributing
|
183
|
+
|
184
|
+
Contributions are welcome!
|
@@ -0,0 +1,145 @@
|
|
1
|
+
# Torch Device Manager
|
2
|
+
|
3
|
+
A lightweight PyTorch utility for automatic hardware detection and memory optimization across different devices (CPU, CUDA, MPS).
|
4
|
+
|
5
|
+
## Features
|
6
|
+
|
7
|
+
- 🔍 **Automatic Device Detection**: Detects the best available hardware (CUDA, Apple Silicon MPS, or CPU)
|
8
|
+
- 🧠 **Memory Optimization**: Automatically adjusts batch sizes and gradient accumulation based on available memory
|
9
|
+
- ⚡ **Mixed Precision Support**: Optional automatic mixed precision with gradient scaling
|
10
|
+
- 📊 **Memory Monitoring**: Real-time memory usage tracking and logging
|
11
|
+
- 🛡️ **Fallback Protection**: Graceful fallback to CPU when requested devices aren't available
|
12
|
+
|
13
|
+
## Installation
|
14
|
+
|
15
|
+
```bash
|
16
|
+
pip install torch-device-manager
|
17
|
+
```
|
18
|
+
|
19
|
+
## Quick Start
|
20
|
+
|
21
|
+
```python
|
22
|
+
from torch_device_manager import DeviceManager
|
23
|
+
import torch
|
24
|
+
|
25
|
+
# Initialize device manager (auto-detects best device)
|
26
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
27
|
+
|
28
|
+
# Get the torch device
|
29
|
+
device = device_manager.get_device()
|
30
|
+
|
31
|
+
# Move your model to the optimal device
|
32
|
+
model = YourModel().to(device)
|
33
|
+
|
34
|
+
# Optimize batch size based on available memory
|
35
|
+
optimal_batch_size, gradient_steps = device_manager.optimize_for_memory(
|
36
|
+
model=model,
|
37
|
+
batch_size=32
|
38
|
+
)
|
39
|
+
|
40
|
+
print(f"Using device: {device}")
|
41
|
+
print(f"Optimized batch size: {optimal_batch_size}")
|
42
|
+
print(f"Gradient accumulation steps: {gradient_steps}")
|
43
|
+
```
|
44
|
+
|
45
|
+
## Usage in Training Scripts
|
46
|
+
|
47
|
+
### Basic Integration
|
48
|
+
|
49
|
+
```python
|
50
|
+
import torch
|
51
|
+
import torch.nn as nn
|
52
|
+
from torch_device_manager import DeviceManager
|
53
|
+
|
54
|
+
def train_model():
|
55
|
+
# Initialize device manager
|
56
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
57
|
+
device = device_manager.get_device()
|
58
|
+
|
59
|
+
# Setup model
|
60
|
+
model = YourModel().to(device)
|
61
|
+
optimizer = torch.optim.Adam(model.parameters())
|
62
|
+
|
63
|
+
# Optimize memory usage
|
64
|
+
batch_size, gradient_steps = device_manager.optimize_for_memory(model, 32)
|
65
|
+
|
66
|
+
# Training loop
|
67
|
+
for epoch in range(num_epochs):
|
68
|
+
for batch_idx, (data, target) in enumerate(dataloader):
|
69
|
+
data, target = data.to(device), target.to(device)
|
70
|
+
|
71
|
+
# Use mixed precision if available
|
72
|
+
if device_manager.mixed_precision and device_manager.scaler:
|
73
|
+
with torch.cuda.amp.autocast():
|
74
|
+
output = model(data)
|
75
|
+
loss = criterion(output, target)
|
76
|
+
|
77
|
+
device_manager.scaler.scale(loss).backward()
|
78
|
+
|
79
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
80
|
+
device_manager.scaler.step(optimizer)
|
81
|
+
device_manager.scaler.update()
|
82
|
+
optimizer.zero_grad()
|
83
|
+
else:
|
84
|
+
output = model(data)
|
85
|
+
loss = criterion(output, target)
|
86
|
+
loss.backward()
|
87
|
+
|
88
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
89
|
+
optimizer.step()
|
90
|
+
optimizer.zero_grad()
|
91
|
+
|
92
|
+
# Log memory usage
|
93
|
+
device_manager.log_memory_usage()
|
94
|
+
```
|
95
|
+
|
96
|
+
### Advanced Usage
|
97
|
+
|
98
|
+
```python
|
99
|
+
from torch_device_manager import DeviceManager
|
100
|
+
|
101
|
+
# Force specific device
|
102
|
+
device_manager = DeviceManager(device="cuda", mixed_precision=False)
|
103
|
+
|
104
|
+
# Check memory info
|
105
|
+
memory_info = device_manager.get_memory_info()
|
106
|
+
print(f"Available memory: {memory_info}")
|
107
|
+
|
108
|
+
# Manual memory optimization
|
109
|
+
if memory_info.get("free_gb", 0) < 2.0:
|
110
|
+
print("Low memory detected, reducing batch size")
|
111
|
+
batch_size = 4
|
112
|
+
```
|
113
|
+
|
114
|
+
## API Reference
|
115
|
+
|
116
|
+
### DeviceManager
|
117
|
+
|
118
|
+
#### Constructor
|
119
|
+
- `device` (str, default="auto"): Device to use ("auto", "cuda", "mps", "cpu")
|
120
|
+
- `mixed_precision` (bool, default=True): Enable mixed precision training
|
121
|
+
|
122
|
+
#### Methods
|
123
|
+
- `get_device()`: Returns torch.device object
|
124
|
+
- `get_memory_info()`: Returns memory information dict
|
125
|
+
- `log_memory_usage()`: Logs current memory usage
|
126
|
+
- `optimize_for_memory(model, batch_size)`: Returns optimized (batch_size, gradient_steps)
|
127
|
+
|
128
|
+
## Device Support
|
129
|
+
|
130
|
+
- **CUDA**: Full support with memory optimization and mixed precision
|
131
|
+
- **Apple Silicon (MPS)**: Basic support with conservative memory settings
|
132
|
+
- **CPU**: Fallback support with optimized batch sizes
|
133
|
+
|
134
|
+
## Requirements
|
135
|
+
|
136
|
+
- Python >= 3.8
|
137
|
+
- PyTorch >= 2.1.1
|
138
|
+
|
139
|
+
## License
|
140
|
+
|
141
|
+
MIT License
|
142
|
+
|
143
|
+
## Contributing
|
144
|
+
|
145
|
+
Contributions are welcome!
|
@@ -0,0 +1,148 @@
|
|
1
|
+
"""
|
2
|
+
Example training script showing how to use torch-device-manager in a real training scenario
|
3
|
+
"""
|
4
|
+
|
5
|
+
import torch
|
6
|
+
import torch.nn as nn
|
7
|
+
import torch.optim as optim
|
8
|
+
from torch.utils.data import DataLoader, TensorDataset
|
9
|
+
import logging
|
10
|
+
from torch_device_manager import DeviceManager
|
11
|
+
|
12
|
+
# Setup logging
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
14
|
+
|
15
|
+
class SimpleModel(nn.Module):
|
16
|
+
"""Simple neural network for demonstration"""
|
17
|
+
def __init__(self, input_size=784, hidden_size=256, num_classes=10):
|
18
|
+
super(SimpleModel, self).__init__()
|
19
|
+
self.fc1 = nn.Linear(input_size, hidden_size)
|
20
|
+
self.relu = nn.ReLU()
|
21
|
+
self.fc2 = nn.Linear(hidden_size, hidden_size)
|
22
|
+
self.fc3 = nn.Linear(hidden_size, num_classes)
|
23
|
+
self.dropout = nn.Dropout(0.2)
|
24
|
+
|
25
|
+
def forward(self, x):
|
26
|
+
x = x.view(x.size(0), -1)
|
27
|
+
x = self.fc1(x)
|
28
|
+
x = self.relu(x)
|
29
|
+
x = self.dropout(x)
|
30
|
+
x = self.fc2(x)
|
31
|
+
x = self.relu(x)
|
32
|
+
x = self.dropout(x)
|
33
|
+
x = self.fc3(x)
|
34
|
+
return x
|
35
|
+
|
36
|
+
def create_dummy_data(num_samples=1000, input_size=784, num_classes=10):
|
37
|
+
"""Create dummy data for demonstration"""
|
38
|
+
X = torch.randn(num_samples, input_size)
|
39
|
+
y = torch.randint(0, num_classes, (num_samples,))
|
40
|
+
return TensorDataset(X, y)
|
41
|
+
|
42
|
+
def train_with_device_manager():
|
43
|
+
"""Training function using DeviceManager"""
|
44
|
+
|
45
|
+
# Initialize device manager
|
46
|
+
print("Initializing Device Manager...")
|
47
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
48
|
+
device = device_manager.get_device()
|
49
|
+
|
50
|
+
# Create model and move to device
|
51
|
+
print("Setting up model...")
|
52
|
+
model = SimpleModel().to(device)
|
53
|
+
criterion = nn.CrossEntropyLoss()
|
54
|
+
optimizer = optim.Adam(model.parameters(), lr=0.001)
|
55
|
+
|
56
|
+
# Create dummy dataset
|
57
|
+
print("Creating dataset...")
|
58
|
+
dataset = create_dummy_data()
|
59
|
+
|
60
|
+
# Optimize batch size for available memory
|
61
|
+
original_batch_size = 64
|
62
|
+
optimized_batch_size, gradient_accumulation_steps = device_manager.optimize_for_memory(
|
63
|
+
model, original_batch_size
|
64
|
+
)
|
65
|
+
|
66
|
+
dataloader = DataLoader(
|
67
|
+
dataset,
|
68
|
+
batch_size=optimized_batch_size,
|
69
|
+
shuffle=True
|
70
|
+
)
|
71
|
+
|
72
|
+
print(f"Training configuration:")
|
73
|
+
print(f" - Device: {device}")
|
74
|
+
print(f" - Original batch size: {original_batch_size}")
|
75
|
+
print(f" - Optimized batch size: {optimized_batch_size}")
|
76
|
+
print(f" - Gradient accumulation steps: {gradient_accumulation_steps}")
|
77
|
+
print(f" - Mixed precision: {device_manager.mixed_precision}")
|
78
|
+
|
79
|
+
# Training loop
|
80
|
+
num_epochs = 5
|
81
|
+
model.train()
|
82
|
+
|
83
|
+
for epoch in range(num_epochs):
|
84
|
+
total_loss = 0.0
|
85
|
+
accumulation_count = 0
|
86
|
+
|
87
|
+
for batch_idx, (data, target) in enumerate(dataloader):
|
88
|
+
data, target = data.to(device), target.to(device)
|
89
|
+
|
90
|
+
# Use mixed precision if available
|
91
|
+
if device_manager.mixed_precision and device_manager.scaler is not None:
|
92
|
+
with torch.cuda.amp.autocast():
|
93
|
+
output = model(data)
|
94
|
+
loss = criterion(output, target) / gradient_accumulation_steps
|
95
|
+
|
96
|
+
device_manager.scaler.scale(loss).backward()
|
97
|
+
accumulation_count += 1
|
98
|
+
|
99
|
+
# Update weights after accumulating gradients
|
100
|
+
if accumulation_count >= gradient_accumulation_steps:
|
101
|
+
device_manager.scaler.step(optimizer)
|
102
|
+
device_manager.scaler.update()
|
103
|
+
optimizer.zero_grad()
|
104
|
+
accumulation_count = 0
|
105
|
+
|
106
|
+
else:
|
107
|
+
output = model(data)
|
108
|
+
loss = criterion(output, target) / gradient_accumulation_steps
|
109
|
+
loss.backward()
|
110
|
+
accumulation_count += 1
|
111
|
+
|
112
|
+
# Update weights after accumulating gradients
|
113
|
+
if accumulation_count >= gradient_accumulation_steps:
|
114
|
+
optimizer.step()
|
115
|
+
optimizer.zero_grad()
|
116
|
+
accumulation_count = 0
|
117
|
+
|
118
|
+
total_loss += loss.item() * gradient_accumulation_steps
|
119
|
+
|
120
|
+
# Log progress every 10 batches
|
121
|
+
if batch_idx % 10 == 0:
|
122
|
+
print(f'Epoch {epoch+1}/{num_epochs}, Batch {batch_idx}, '
|
123
|
+
f'Loss: {loss.item() * gradient_accumulation_steps:.4f}')
|
124
|
+
|
125
|
+
# Handle any remaining accumulated gradients
|
126
|
+
if accumulation_count > 0:
|
127
|
+
if device_manager.mixed_precision and device_manager.scaler is not None:
|
128
|
+
device_manager.scaler.step(optimizer)
|
129
|
+
device_manager.scaler.update()
|
130
|
+
else:
|
131
|
+
optimizer.step()
|
132
|
+
optimizer.zero_grad()
|
133
|
+
|
134
|
+
avg_loss = total_loss / len(dataloader)
|
135
|
+
print(f'Epoch {epoch+1}/{num_epochs} completed. Average Loss: {avg_loss:.4f}')
|
136
|
+
|
137
|
+
# Log memory usage after each epoch
|
138
|
+
device_manager.log_memory_usage()
|
139
|
+
|
140
|
+
print("Training completed!")
|
141
|
+
|
142
|
+
# Final memory check
|
143
|
+
print("\nFinal memory state:")
|
144
|
+
memory_info = device_manager.get_memory_info()
|
145
|
+
print(memory_info)
|
146
|
+
|
147
|
+
if __name__ == "__main__":
|
148
|
+
train_with_device_manager()
|
@@ -0,0 +1 @@
|
|
1
|
+
torch>=2.1.1
|
@@ -0,0 +1,39 @@
|
|
1
|
+
from setuptools import setup, find_packages
|
2
|
+
|
3
|
+
with open("README.md", "r", encoding="utf-8") as fh:
|
4
|
+
long_description = fh.read()
|
5
|
+
|
6
|
+
setup(
|
7
|
+
name="torch-device-manager",
|
8
|
+
version="0.1.0",
|
9
|
+
author="Ali B.M.",
|
10
|
+
author_email="mainabukarali@gmail.com",
|
11
|
+
description="A PyTorch device manager for automatic hardware detection and memory optimization",
|
12
|
+
long_description=long_description,
|
13
|
+
long_description_content_type="text/markdown",
|
14
|
+
url="https://github.com/yourusername/torch-device-manager",
|
15
|
+
packages=find_packages(),
|
16
|
+
classifiers=[
|
17
|
+
"Development Status :: 4 - Beta",
|
18
|
+
"Intended Audience :: Developers",
|
19
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
20
|
+
"License :: OSI Approved :: MIT License",
|
21
|
+
"Programming Language :: Python :: 3",
|
22
|
+
"Programming Language :: Python :: 3.8",
|
23
|
+
"Programming Language :: Python :: 3.9",
|
24
|
+
"Programming Language :: Python :: 3.10",
|
25
|
+
"Programming Language :: Python :: 3.11",
|
26
|
+
"Programming Language :: Python :: 3.12",
|
27
|
+
"Programming Language :: Python :: 3.13",
|
28
|
+
"Operating System :: OS Independent",
|
29
|
+
],
|
30
|
+
python_requires=">=3.8",
|
31
|
+
install_requires=[
|
32
|
+
"torch>=2.1.1",
|
33
|
+
],
|
34
|
+
keywords="pytorch, cuda, mps, device, memory, optimization, machine learning",
|
35
|
+
project_urls={
|
36
|
+
"Bug Reports": "https://github.com/TempCoder82/torch-device-manager/issues",
|
37
|
+
"Source": "https://github.com/TempCoder82/torch-device-manager",
|
38
|
+
},
|
39
|
+
)
|
File without changes
|
@@ -0,0 +1,119 @@
|
|
1
|
+
"""
|
2
|
+
Torch Device Manager - Automatic hardware detection and memory optimization for PyTorch
|
3
|
+
"""
|
4
|
+
|
5
|
+
import logging
|
6
|
+
from typing import Tuple
|
7
|
+
import torch
|
8
|
+
|
9
|
+
__version__ = "0.1.0"
|
10
|
+
__author__ = "Ali B.M."
|
11
|
+
|
12
|
+
# Set up logging
|
13
|
+
logger = logging.getLogger(__name__)
|
14
|
+
|
15
|
+
class DeviceManager:
|
16
|
+
"""Manage device selection and memory optimization for different hardware"""
|
17
|
+
|
18
|
+
def __init__(self, device: str = "auto", mixed_precision: bool = True):
|
19
|
+
self.device = self._detect_device(device)
|
20
|
+
self.mixed_precision = mixed_precision
|
21
|
+
self.scaler = None
|
22
|
+
|
23
|
+
logger.info(f"Device Manager initialized:")
|
24
|
+
logger.info(f" - Device: {self.device}")
|
25
|
+
logger.info(f" - Mixed Precision: {self.mixed_precision}")
|
26
|
+
|
27
|
+
if self.mixed_precision and self.device != "cpu":
|
28
|
+
self.scaler = torch.cuda.amp.GradScaler()
|
29
|
+
logger.info(f" - Gradient Scaler: Enabled")
|
30
|
+
|
31
|
+
def _detect_device(self, device: str) -> str:
|
32
|
+
"""Detect the best available device"""
|
33
|
+
if device == "auto":
|
34
|
+
if torch.cuda.is_available():
|
35
|
+
device = "cuda"
|
36
|
+
logger.info(f"CUDA detected: {torch.cuda.get_device_name()}")
|
37
|
+
logger.info(f"CUDA memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
|
38
|
+
elif hasattr(torch.backends, 'mps') and torch.backends.mps.is_available():
|
39
|
+
device = "mps"
|
40
|
+
logger.info(f"Apple Silicon MPS detected")
|
41
|
+
else:
|
42
|
+
device = "cpu"
|
43
|
+
logger.info(f"Using CPU")
|
44
|
+
else:
|
45
|
+
if device == "cuda" and not torch.cuda.is_available():
|
46
|
+
logger.warning("CUDA requested but not available, falling back to CPU")
|
47
|
+
device = "cpu"
|
48
|
+
elif device == "mps" and not (hasattr(torch.backends, 'mps') and torch.backends.mps.is_available()):
|
49
|
+
logger.warning("MPS requested but not available, falling back to CPU")
|
50
|
+
device = "cpu"
|
51
|
+
|
52
|
+
return device
|
53
|
+
|
54
|
+
def get_device(self):
|
55
|
+
"""Get the torch device object"""
|
56
|
+
return torch.device(self.device)
|
57
|
+
|
58
|
+
def get_memory_info(self):
|
59
|
+
"""Get memory information for the current device"""
|
60
|
+
if self.device == "cuda":
|
61
|
+
allocated = torch.cuda.memory_allocated() / 1e9
|
62
|
+
reserved = torch.cuda.memory_reserved() / 1e9
|
63
|
+
total = torch.cuda.get_device_properties(0).total_memory / 1e9
|
64
|
+
return {
|
65
|
+
"allocated_gb": allocated,
|
66
|
+
"reserved_gb": reserved,
|
67
|
+
"total_gb": total,
|
68
|
+
"free_gb": total - reserved
|
69
|
+
}
|
70
|
+
elif self.device == "mps":
|
71
|
+
# MPS doesn't provide detailed memory info like CUDA
|
72
|
+
return {"device": "mps", "info": "Memory info not available for MPS"}
|
73
|
+
else:
|
74
|
+
return {"device": "cpu", "info": "Memory info not available for CPU"}
|
75
|
+
|
76
|
+
def log_memory_usage(self):
|
77
|
+
"""Log current memory usage"""
|
78
|
+
memory_info = self.get_memory_info()
|
79
|
+
if "allocated_gb" in memory_info:
|
80
|
+
logger.info(f"Memory Usage: {memory_info['allocated_gb']:.2f}GB allocated, "
|
81
|
+
f"{memory_info['free_gb']:.2f}GB free")
|
82
|
+
|
83
|
+
def optimize_for_memory(self, model, batch_size: int) -> Tuple[int, int]:
|
84
|
+
"""Optimize batch size and gradient accumulation for available memory"""
|
85
|
+
|
86
|
+
if self.device == "cpu":
|
87
|
+
# CPU: Use smaller batches
|
88
|
+
optimized_batch_size = min(batch_size, 8)
|
89
|
+
gradient_steps = max(1, batch_size // optimized_batch_size)
|
90
|
+
logger.info(f"CPU optimization: batch_size={optimized_batch_size}, gradient_steps={gradient_steps}")
|
91
|
+
|
92
|
+
elif self.device == "mps":
|
93
|
+
# Apple Silicon: Conservative settings
|
94
|
+
optimized_batch_size = min(batch_size, 4)
|
95
|
+
gradient_steps = max(1, batch_size // optimized_batch_size)
|
96
|
+
logger.info(f"MPS optimization: batch_size={optimized_batch_size}, gradient_steps={gradient_steps}")
|
97
|
+
|
98
|
+
elif self.device == "cuda":
|
99
|
+
# CUDA: Check available memory
|
100
|
+
memory_info = self.get_memory_info()
|
101
|
+
total_memory = memory_info["total_gb"]
|
102
|
+
|
103
|
+
if total_memory < 8: # Less than 8GB
|
104
|
+
optimized_batch_size = min(batch_size, 4)
|
105
|
+
gradient_steps = max(1, batch_size // optimized_batch_size)
|
106
|
+
logger.info(f"CUDA <8GB optimization: batch_size={optimized_batch_size}, gradient_steps={gradient_steps}")
|
107
|
+
elif total_memory < 16: # Less than 16GB
|
108
|
+
optimized_batch_size = min(batch_size, 8)
|
109
|
+
gradient_steps = max(1, batch_size // optimized_batch_size)
|
110
|
+
logger.info(f"CUDA <16GB optimization: batch_size={optimized_batch_size}, gradient_steps={gradient_steps}")
|
111
|
+
else: # 16GB or more
|
112
|
+
optimized_batch_size = batch_size
|
113
|
+
gradient_steps = 1
|
114
|
+
logger.info(f"CUDA >=16GB: using full batch_size={optimized_batch_size}")
|
115
|
+
|
116
|
+
return optimized_batch_size, gradient_steps
|
117
|
+
|
118
|
+
# Make DeviceManager easily importable
|
119
|
+
__all__ = ["DeviceManager"]
|
File without changes
|
@@ -0,0 +1,184 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: torch-device-manager
|
3
|
+
Version: 0.1.0
|
4
|
+
Summary: A PyTorch device manager for automatic hardware detection and memory optimization
|
5
|
+
Home-page: https://github.com/yourusername/torch-device-manager
|
6
|
+
Author: Ali B.M.
|
7
|
+
Author-email: mainabukarali@gmail.com
|
8
|
+
Project-URL: Bug Reports, https://github.com/TempCoder82/torch-device-manager/issues
|
9
|
+
Project-URL: Source, https://github.com/TempCoder82/torch-device-manager
|
10
|
+
Keywords: pytorch,cuda,mps,device,memory,optimization,machine learning
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
12
|
+
Classifier: Intended Audience :: Developers
|
13
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
16
|
+
Classifier: Programming Language :: Python :: 3.8
|
17
|
+
Classifier: Programming Language :: Python :: 3.9
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
21
|
+
Classifier: Programming Language :: Python :: 3.13
|
22
|
+
Classifier: Operating System :: OS Independent
|
23
|
+
Requires-Python: >=3.8
|
24
|
+
Description-Content-Type: text/markdown
|
25
|
+
License-File: LICENSE
|
26
|
+
Requires-Dist: torch>=2.1.1
|
27
|
+
Dynamic: author
|
28
|
+
Dynamic: author-email
|
29
|
+
Dynamic: classifier
|
30
|
+
Dynamic: description
|
31
|
+
Dynamic: description-content-type
|
32
|
+
Dynamic: home-page
|
33
|
+
Dynamic: keywords
|
34
|
+
Dynamic: license-file
|
35
|
+
Dynamic: project-url
|
36
|
+
Dynamic: requires-dist
|
37
|
+
Dynamic: requires-python
|
38
|
+
Dynamic: summary
|
39
|
+
|
40
|
+
# Torch Device Manager
|
41
|
+
|
42
|
+
A lightweight PyTorch utility for automatic hardware detection and memory optimization across different devices (CPU, CUDA, MPS).
|
43
|
+
|
44
|
+
## Features
|
45
|
+
|
46
|
+
- 🔍 **Automatic Device Detection**: Detects the best available hardware (CUDA, Apple Silicon MPS, or CPU)
|
47
|
+
- 🧠 **Memory Optimization**: Automatically adjusts batch sizes and gradient accumulation based on available memory
|
48
|
+
- ⚡ **Mixed Precision Support**: Optional automatic mixed precision with gradient scaling
|
49
|
+
- 📊 **Memory Monitoring**: Real-time memory usage tracking and logging
|
50
|
+
- 🛡️ **Fallback Protection**: Graceful fallback to CPU when requested devices aren't available
|
51
|
+
|
52
|
+
## Installation
|
53
|
+
|
54
|
+
```bash
|
55
|
+
pip install torch-device-manager
|
56
|
+
```
|
57
|
+
|
58
|
+
## Quick Start
|
59
|
+
|
60
|
+
```python
|
61
|
+
from torch_device_manager import DeviceManager
|
62
|
+
import torch
|
63
|
+
|
64
|
+
# Initialize device manager (auto-detects best device)
|
65
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
66
|
+
|
67
|
+
# Get the torch device
|
68
|
+
device = device_manager.get_device()
|
69
|
+
|
70
|
+
# Move your model to the optimal device
|
71
|
+
model = YourModel().to(device)
|
72
|
+
|
73
|
+
# Optimize batch size based on available memory
|
74
|
+
optimal_batch_size, gradient_steps = device_manager.optimize_for_memory(
|
75
|
+
model=model,
|
76
|
+
batch_size=32
|
77
|
+
)
|
78
|
+
|
79
|
+
print(f"Using device: {device}")
|
80
|
+
print(f"Optimized batch size: {optimal_batch_size}")
|
81
|
+
print(f"Gradient accumulation steps: {gradient_steps}")
|
82
|
+
```
|
83
|
+
|
84
|
+
## Usage in Training Scripts
|
85
|
+
|
86
|
+
### Basic Integration
|
87
|
+
|
88
|
+
```python
|
89
|
+
import torch
|
90
|
+
import torch.nn as nn
|
91
|
+
from torch_device_manager import DeviceManager
|
92
|
+
|
93
|
+
def train_model():
|
94
|
+
# Initialize device manager
|
95
|
+
device_manager = DeviceManager(device="auto", mixed_precision=True)
|
96
|
+
device = device_manager.get_device()
|
97
|
+
|
98
|
+
# Setup model
|
99
|
+
model = YourModel().to(device)
|
100
|
+
optimizer = torch.optim.Adam(model.parameters())
|
101
|
+
|
102
|
+
# Optimize memory usage
|
103
|
+
batch_size, gradient_steps = device_manager.optimize_for_memory(model, 32)
|
104
|
+
|
105
|
+
# Training loop
|
106
|
+
for epoch in range(num_epochs):
|
107
|
+
for batch_idx, (data, target) in enumerate(dataloader):
|
108
|
+
data, target = data.to(device), target.to(device)
|
109
|
+
|
110
|
+
# Use mixed precision if available
|
111
|
+
if device_manager.mixed_precision and device_manager.scaler:
|
112
|
+
with torch.cuda.amp.autocast():
|
113
|
+
output = model(data)
|
114
|
+
loss = criterion(output, target)
|
115
|
+
|
116
|
+
device_manager.scaler.scale(loss).backward()
|
117
|
+
|
118
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
119
|
+
device_manager.scaler.step(optimizer)
|
120
|
+
device_manager.scaler.update()
|
121
|
+
optimizer.zero_grad()
|
122
|
+
else:
|
123
|
+
output = model(data)
|
124
|
+
loss = criterion(output, target)
|
125
|
+
loss.backward()
|
126
|
+
|
127
|
+
if (batch_idx + 1) % gradient_steps == 0:
|
128
|
+
optimizer.step()
|
129
|
+
optimizer.zero_grad()
|
130
|
+
|
131
|
+
# Log memory usage
|
132
|
+
device_manager.log_memory_usage()
|
133
|
+
```
|
134
|
+
|
135
|
+
### Advanced Usage
|
136
|
+
|
137
|
+
```python
|
138
|
+
from torch_device_manager import DeviceManager
|
139
|
+
|
140
|
+
# Force specific device
|
141
|
+
device_manager = DeviceManager(device="cuda", mixed_precision=False)
|
142
|
+
|
143
|
+
# Check memory info
|
144
|
+
memory_info = device_manager.get_memory_info()
|
145
|
+
print(f"Available memory: {memory_info}")
|
146
|
+
|
147
|
+
# Manual memory optimization
|
148
|
+
if memory_info.get("free_gb", 0) < 2.0:
|
149
|
+
print("Low memory detected, reducing batch size")
|
150
|
+
batch_size = 4
|
151
|
+
```
|
152
|
+
|
153
|
+
## API Reference
|
154
|
+
|
155
|
+
### DeviceManager
|
156
|
+
|
157
|
+
#### Constructor
|
158
|
+
- `device` (str, default="auto"): Device to use ("auto", "cuda", "mps", "cpu")
|
159
|
+
- `mixed_precision` (bool, default=True): Enable mixed precision training
|
160
|
+
|
161
|
+
#### Methods
|
162
|
+
- `get_device()`: Returns torch.device object
|
163
|
+
- `get_memory_info()`: Returns memory information dict
|
164
|
+
- `log_memory_usage()`: Logs current memory usage
|
165
|
+
- `optimize_for_memory(model, batch_size)`: Returns optimized (batch_size, gradient_steps)
|
166
|
+
|
167
|
+
## Device Support
|
168
|
+
|
169
|
+
- **CUDA**: Full support with memory optimization and mixed precision
|
170
|
+
- **Apple Silicon (MPS)**: Basic support with conservative memory settings
|
171
|
+
- **CPU**: Fallback support with optimized batch sizes
|
172
|
+
|
173
|
+
## Requirements
|
174
|
+
|
175
|
+
- Python >= 3.8
|
176
|
+
- PyTorch >= 2.1.1
|
177
|
+
|
178
|
+
## License
|
179
|
+
|
180
|
+
MIT License
|
181
|
+
|
182
|
+
## Contributing
|
183
|
+
|
184
|
+
Contributions are welcome!
|
@@ -0,0 +1,14 @@
|
|
1
|
+
LICENSE
|
2
|
+
MANIFEST.in
|
3
|
+
README.md
|
4
|
+
requirements.txt
|
5
|
+
setup.py
|
6
|
+
examples/training_example.py
|
7
|
+
tests/__init__.py
|
8
|
+
torch_device_manager/__init__.py
|
9
|
+
torch_device_manager/py.typed
|
10
|
+
torch_device_manager.egg-info/PKG-INFO
|
11
|
+
torch_device_manager.egg-info/SOURCES.txt
|
12
|
+
torch_device_manager.egg-info/dependency_links.txt
|
13
|
+
torch_device_manager.egg-info/requires.txt
|
14
|
+
torch_device_manager.egg-info/top_level.txt
|
@@ -0,0 +1 @@
|
|
1
|
+
|
@@ -0,0 +1 @@
|
|
1
|
+
torch>=2.1.1
|