torchada 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchada-0.1.0/LICENSE +21 -0
- torchada-0.1.0/PKG-INFO +249 -0
- torchada-0.1.0/README.md +215 -0
- torchada-0.1.0/pyproject.toml +51 -0
- torchada-0.1.0/setup.cfg +4 -0
- torchada-0.1.0/setup.py +12 -0
- torchada-0.1.0/src/torchada/__init__.py +88 -0
- torchada-0.1.0/src/torchada/_mapping.py +396 -0
- torchada-0.1.0/src/torchada/_patch.py +578 -0
- torchada-0.1.0/src/torchada/_platform.py +116 -0
- torchada-0.1.0/src/torchada/cuda/__init__.py +166 -0
- torchada-0.1.0/src/torchada/cuda/amp.py +120 -0
- torchada-0.1.0/src/torchada/cuda/nvtx.py +111 -0
- torchada-0.1.0/src/torchada/utils/__init__.py +10 -0
- torchada-0.1.0/src/torchada/utils/cpp_extension.py +524 -0
- torchada-0.1.0/src/torchada.egg-info/PKG-INFO +249 -0
- torchada-0.1.0/src/torchada.egg-info/SOURCES.txt +24 -0
- torchada-0.1.0/src/torchada.egg-info/dependency_links.txt +1 -0
- torchada-0.1.0/src/torchada.egg-info/requires.txt +11 -0
- torchada-0.1.0/src/torchada.egg-info/top_level.txt +1 -0
- torchada-0.1.0/tests/test_cpp_extension.py +161 -0
- torchada-0.1.0/tests/test_cuda_patching.py +574 -0
- torchada-0.1.0/tests/test_device_strings.py +201 -0
- torchada-0.1.0/tests/test_extension_build.py +202 -0
- torchada-0.1.0/tests/test_mappings.py +436 -0
- torchada-0.1.0/tests/test_platform.py +66 -0
torchada-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 R0CKSTAR
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
torchada-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: torchada
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Adapter package for torch_musa to act exactly like PyTorch CUDA
|
|
5
|
+
Author: torchada contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/yeahdongcn/torchada
|
|
8
|
+
Project-URL: Repository, https://github.com/yeahdongcn/torchada
|
|
9
|
+
Keywords: pytorch,cuda,musa,moore-threads,gpu,adapter
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: torch
|
|
25
|
+
Provides-Extra: musa
|
|
26
|
+
Requires-Dist: torch_musa; extra == "musa"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
30
|
+
Requires-Dist: black; extra == "dev"
|
|
31
|
+
Requires-Dist: isort; extra == "dev"
|
|
32
|
+
Requires-Dist: mypy; extra == "dev"
|
|
33
|
+
Dynamic: license-file
|
|
34
|
+
|
|
35
|
+
# torchada
|
|
36
|
+
|
|
37
|
+
**Adapter package for torch_musa to act exactly like PyTorch CUDA**
|
|
38
|
+
|
|
39
|
+
torchada provides a unified interface that works transparently on both NVIDIA GPUs (CUDA) and Moore Threads GPUs (MUSA). Write your code once using standard PyTorch CUDA APIs, and it will run on MUSA hardware without any changes.
|
|
40
|
+
|
|
41
|
+
## Features
|
|
42
|
+
|
|
43
|
+
- **Zero Code Changes**: Just `import torchada` once, then use standard `torch.cuda.*` APIs
|
|
44
|
+
- **Automatic Platform Detection**: Detects whether you're running on CUDA or MUSA
|
|
45
|
+
- **Transparent Device Mapping**: `tensor.cuda()` and `tensor.to("cuda")` work on MUSA
|
|
46
|
+
- **Extension Building**: Standard `torch.utils.cpp_extension` works on MUSA after importing torchada
|
|
47
|
+
- **Source Code Porting**: Automatic CUDA → MUSA symbol mapping for C++/CUDA extensions
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install torchada
|
|
53
|
+
|
|
54
|
+
# Or install from source
|
|
55
|
+
git clone https://github.com/yeahdongcn/torchada.git
|
|
56
|
+
cd torchada
|
|
57
|
+
pip install -e .
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Quick Start
|
|
61
|
+
|
|
62
|
+
### Basic Usage
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
import torchada # Import once to apply patches - that's it!
|
|
66
|
+
import torch
|
|
67
|
+
|
|
68
|
+
# Use standard torch.cuda APIs - they work on both CUDA and MUSA:
|
|
69
|
+
if torch.cuda.is_available():
|
|
70
|
+
device = torch.device("cuda")
|
|
71
|
+
tensor = torch.randn(10, 10).cuda()
|
|
72
|
+
model = MyModel().cuda()
|
|
73
|
+
|
|
74
|
+
# All torch.cuda.* APIs work transparently
|
|
75
|
+
print(f"Device count: {torch.cuda.device_count()}")
|
|
76
|
+
print(f"Device name: {torch.cuda.get_device_name()}")
|
|
77
|
+
torch.cuda.synchronize()
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Building C++ Extensions
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
# setup.py - Use standard torch imports!
|
|
84
|
+
import torchada # Import first to apply patches
|
|
85
|
+
from setuptools import setup
|
|
86
|
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CUDA_HOME
|
|
87
|
+
|
|
88
|
+
print(f"Building with CUDA/MUSA home: {CUDA_HOME}")
|
|
89
|
+
|
|
90
|
+
ext_modules = [
|
|
91
|
+
CUDAExtension(
|
|
92
|
+
name="my_extension",
|
|
93
|
+
sources=[
|
|
94
|
+
"my_extension.cpp",
|
|
95
|
+
"my_extension_kernel.cu",
|
|
96
|
+
],
|
|
97
|
+
extra_compile_args={
|
|
98
|
+
"cxx": ["-O3"],
|
|
99
|
+
"nvcc": ["-O3"], # Automatically mapped to mcc on MUSA
|
|
100
|
+
},
|
|
101
|
+
),
|
|
102
|
+
]
|
|
103
|
+
|
|
104
|
+
setup(
|
|
105
|
+
name="my_package",
|
|
106
|
+
ext_modules=ext_modules,
|
|
107
|
+
cmdclass={"build_ext": BuildExtension.with_options(use_ninja=True)},
|
|
108
|
+
)
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### JIT Compilation
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
import torchada # Import first to apply patches
|
|
115
|
+
from torch.utils.cpp_extension import load
|
|
116
|
+
|
|
117
|
+
# Load extension at runtime (works on both CUDA and MUSA)
|
|
118
|
+
my_extension = load(
|
|
119
|
+
name="my_extension",
|
|
120
|
+
sources=["my_extension.cpp", "my_extension_kernel.cu"],
|
|
121
|
+
verbose=True,
|
|
122
|
+
)
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Mixed Precision Training
|
|
126
|
+
|
|
127
|
+
```python
|
|
128
|
+
import torchada # Import first to apply patches
|
|
129
|
+
import torch
|
|
130
|
+
|
|
131
|
+
model = MyModel().cuda()
|
|
132
|
+
optimizer = torch.optim.Adam(model.parameters())
|
|
133
|
+
scaler = torch.cuda.amp.GradScaler()
|
|
134
|
+
|
|
135
|
+
for data, target in dataloader:
|
|
136
|
+
data, target = data.cuda(), target.cuda()
|
|
137
|
+
|
|
138
|
+
with torch.cuda.amp.autocast():
|
|
139
|
+
output = model(data)
|
|
140
|
+
loss = criterion(output, target)
|
|
141
|
+
|
|
142
|
+
scaler.scale(loss).backward()
|
|
143
|
+
scaler.step(optimizer)
|
|
144
|
+
scaler.update()
|
|
145
|
+
optimizer.zero_grad()
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Distributed Training
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
import torchada # Import first to apply patches
|
|
152
|
+
import torch.distributed as dist
|
|
153
|
+
|
|
154
|
+
# Use 'nccl' backend as usual - torchada maps it to 'mccl' on MUSA
|
|
155
|
+
dist.init_process_group(backend='nccl')
|
|
156
|
+
```
|
|
157
|
+
|
|
158
|
+
### CUDA Graphs
|
|
159
|
+
|
|
160
|
+
```python
|
|
161
|
+
import torchada # Import first to apply patches
|
|
162
|
+
import torch
|
|
163
|
+
|
|
164
|
+
# Use standard torch.cuda.CUDAGraph - works on MUSA too
|
|
165
|
+
g = torch.cuda.CUDAGraph()
|
|
166
|
+
with torch.cuda.graph(g):
|
|
167
|
+
y = model(x)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Platform Detection
|
|
171
|
+
|
|
172
|
+
torchada automatically detects the platform:
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
import torchada
|
|
176
|
+
from torchada import detect_platform, Platform
|
|
177
|
+
|
|
178
|
+
platform = detect_platform()
|
|
179
|
+
if platform == Platform.MUSA:
|
|
180
|
+
print("Running on Moore Threads GPU")
|
|
181
|
+
elif platform == Platform.CUDA:
|
|
182
|
+
print("Running on NVIDIA GPU")
|
|
183
|
+
|
|
184
|
+
# Or use convenience functions
|
|
185
|
+
if torchada.is_musa_platform():
|
|
186
|
+
print("MUSA platform detected")
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
## What Gets Patched
|
|
190
|
+
|
|
191
|
+
After `import torchada`, the following standard PyTorch APIs work on MUSA:
|
|
192
|
+
|
|
193
|
+
| Standard Import | Works On MUSA |
|
|
194
|
+
|----------------|---------------|
|
|
195
|
+
| `torch.cuda.*` | ✅ All APIs |
|
|
196
|
+
| `torch.cuda.amp.*` | ✅ autocast, GradScaler |
|
|
197
|
+
| `torch.cuda.CUDAGraph` | ✅ Maps to MUSAGraph |
|
|
198
|
+
| `torch.distributed` (backend='nccl') | ✅ Uses MCCL |
|
|
199
|
+
| `torch.utils.cpp_extension.*` | ✅ CUDAExtension, BuildExtension |
|
|
200
|
+
|
|
201
|
+
## API Reference
|
|
202
|
+
|
|
203
|
+
### torchada
|
|
204
|
+
|
|
205
|
+
| Function | Description |
|
|
206
|
+
|----------|-------------|
|
|
207
|
+
| `detect_platform()` | Returns the detected platform (CUDA, MUSA, or CPU) |
|
|
208
|
+
| `is_musa_platform()` | Check if running on MUSA |
|
|
209
|
+
| `is_cuda_platform()` | Check if running on CUDA |
|
|
210
|
+
| `get_device_name()` | Get device name string ("cuda", "musa", or "cpu") |
|
|
211
|
+
|
|
212
|
+
### torch.cuda (after importing torchada)
|
|
213
|
+
|
|
214
|
+
All standard `torch.cuda` APIs work, including:
|
|
215
|
+
- `is_available()`, `device_count()`, `current_device()`, `set_device()`
|
|
216
|
+
- `memory_allocated()`, `memory_reserved()`, `empty_cache()`
|
|
217
|
+
- `synchronize()`, `Stream`, `Event`, `CUDAGraph`
|
|
218
|
+
- `amp.autocast()`, `amp.GradScaler()`
|
|
219
|
+
|
|
220
|
+
### torch.utils.cpp_extension (after importing torchada)
|
|
221
|
+
|
|
222
|
+
| Symbol | Description |
|
|
223
|
+
|--------|-------------|
|
|
224
|
+
| `CUDAExtension` | Creates CUDA or MUSA extension based on platform |
|
|
225
|
+
| `CppExtension` | Creates C++ extension (no GPU code) |
|
|
226
|
+
| `BuildExtension` | Build command for extensions |
|
|
227
|
+
| `CUDA_HOME` | Path to CUDA/MUSA installation |
|
|
228
|
+
| `load()` | JIT compile and load extension |
|
|
229
|
+
|
|
230
|
+
## Symbol Mapping
|
|
231
|
+
|
|
232
|
+
torchada automatically maps CUDA symbols to MUSA equivalents when building extensions:
|
|
233
|
+
|
|
234
|
+
| CUDA | MUSA |
|
|
235
|
+
|------|------|
|
|
236
|
+
| `cudaMalloc` | `musaMalloc` |
|
|
237
|
+
| `cudaMemcpy` | `musaMemcpy` |
|
|
238
|
+
| `cudaStream_t` | `musaStream_t` |
|
|
239
|
+
| `cublasHandle_t` | `mublasHandle_t` |
|
|
240
|
+
| `curandState` | `murandState` |
|
|
241
|
+
| `at::cuda` | `at::musa` |
|
|
242
|
+
| `c10::cuda` | `c10::musa` |
|
|
243
|
+
| ... | ... |
|
|
244
|
+
|
|
245
|
+
See `src/torchada/_mapping.py` for the complete mapping table.
|
|
246
|
+
|
|
247
|
+
## License
|
|
248
|
+
|
|
249
|
+
MIT License
|
torchada-0.1.0/README.md
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
# torchada
|
|
2
|
+
|
|
3
|
+
**Adapter package for torch_musa to act exactly like PyTorch CUDA**
|
|
4
|
+
|
|
5
|
+
torchada provides a unified interface that works transparently on both NVIDIA GPUs (CUDA) and Moore Threads GPUs (MUSA). Write your code once using standard PyTorch CUDA APIs, and it will run on MUSA hardware without any changes.
|
|
6
|
+
|
|
7
|
+
## Features
|
|
8
|
+
|
|
9
|
+
- **Zero Code Changes**: Just `import torchada` once, then use standard `torch.cuda.*` APIs
|
|
10
|
+
- **Automatic Platform Detection**: Detects whether you're running on CUDA or MUSA
|
|
11
|
+
- **Transparent Device Mapping**: `tensor.cuda()` and `tensor.to("cuda")` work on MUSA
|
|
12
|
+
- **Extension Building**: Standard `torch.utils.cpp_extension` works on MUSA after importing torchada
|
|
13
|
+
- **Source Code Porting**: Automatic CUDA → MUSA symbol mapping for C++/CUDA extensions
|
|
14
|
+
|
|
15
|
+
## Installation
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install torchada
|
|
19
|
+
|
|
20
|
+
# Or install from source
|
|
21
|
+
git clone https://github.com/yeahdongcn/torchada.git
|
|
22
|
+
cd torchada
|
|
23
|
+
pip install -e .
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
## Quick Start
|
|
27
|
+
|
|
28
|
+
### Basic Usage
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import torchada # Import once to apply patches - that's it!
|
|
32
|
+
import torch
|
|
33
|
+
|
|
34
|
+
# Use standard torch.cuda APIs - they work on both CUDA and MUSA:
|
|
35
|
+
if torch.cuda.is_available():
|
|
36
|
+
device = torch.device("cuda")
|
|
37
|
+
tensor = torch.randn(10, 10).cuda()
|
|
38
|
+
model = MyModel().cuda()
|
|
39
|
+
|
|
40
|
+
# All torch.cuda.* APIs work transparently
|
|
41
|
+
print(f"Device count: {torch.cuda.device_count()}")
|
|
42
|
+
print(f"Device name: {torch.cuda.get_device_name()}")
|
|
43
|
+
torch.cuda.synchronize()
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Building C++ Extensions
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
# setup.py - Use standard torch imports!
|
|
50
|
+
import torchada # Import first to apply patches
|
|
51
|
+
from setuptools import setup
|
|
52
|
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CUDA_HOME
|
|
53
|
+
|
|
54
|
+
print(f"Building with CUDA/MUSA home: {CUDA_HOME}")
|
|
55
|
+
|
|
56
|
+
ext_modules = [
|
|
57
|
+
CUDAExtension(
|
|
58
|
+
name="my_extension",
|
|
59
|
+
sources=[
|
|
60
|
+
"my_extension.cpp",
|
|
61
|
+
"my_extension_kernel.cu",
|
|
62
|
+
],
|
|
63
|
+
extra_compile_args={
|
|
64
|
+
"cxx": ["-O3"],
|
|
65
|
+
"nvcc": ["-O3"], # Automatically mapped to mcc on MUSA
|
|
66
|
+
},
|
|
67
|
+
),
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
setup(
|
|
71
|
+
name="my_package",
|
|
72
|
+
ext_modules=ext_modules,
|
|
73
|
+
cmdclass={"build_ext": BuildExtension.with_options(use_ninja=True)},
|
|
74
|
+
)
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### JIT Compilation
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
import torchada # Import first to apply patches
|
|
81
|
+
from torch.utils.cpp_extension import load
|
|
82
|
+
|
|
83
|
+
# Load extension at runtime (works on both CUDA and MUSA)
|
|
84
|
+
my_extension = load(
|
|
85
|
+
name="my_extension",
|
|
86
|
+
sources=["my_extension.cpp", "my_extension_kernel.cu"],
|
|
87
|
+
verbose=True,
|
|
88
|
+
)
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Mixed Precision Training
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import torchada # Import first to apply patches
|
|
95
|
+
import torch
|
|
96
|
+
|
|
97
|
+
model = MyModel().cuda()
|
|
98
|
+
optimizer = torch.optim.Adam(model.parameters())
|
|
99
|
+
scaler = torch.cuda.amp.GradScaler()
|
|
100
|
+
|
|
101
|
+
for data, target in dataloader:
|
|
102
|
+
data, target = data.cuda(), target.cuda()
|
|
103
|
+
|
|
104
|
+
with torch.cuda.amp.autocast():
|
|
105
|
+
output = model(data)
|
|
106
|
+
loss = criterion(output, target)
|
|
107
|
+
|
|
108
|
+
scaler.scale(loss).backward()
|
|
109
|
+
scaler.step(optimizer)
|
|
110
|
+
scaler.update()
|
|
111
|
+
optimizer.zero_grad()
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Distributed Training
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
import torchada # Import first to apply patches
|
|
118
|
+
import torch.distributed as dist
|
|
119
|
+
|
|
120
|
+
# Use 'nccl' backend as usual - torchada maps it to 'mccl' on MUSA
|
|
121
|
+
dist.init_process_group(backend='nccl')
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### CUDA Graphs
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
import torchada # Import first to apply patches
|
|
128
|
+
import torch
|
|
129
|
+
|
|
130
|
+
# Use standard torch.cuda.CUDAGraph - works on MUSA too
|
|
131
|
+
g = torch.cuda.CUDAGraph()
|
|
132
|
+
with torch.cuda.graph(g):
|
|
133
|
+
y = model(x)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Platform Detection
|
|
137
|
+
|
|
138
|
+
torchada automatically detects the platform:
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
import torchada
|
|
142
|
+
from torchada import detect_platform, Platform
|
|
143
|
+
|
|
144
|
+
platform = detect_platform()
|
|
145
|
+
if platform == Platform.MUSA:
|
|
146
|
+
print("Running on Moore Threads GPU")
|
|
147
|
+
elif platform == Platform.CUDA:
|
|
148
|
+
print("Running on NVIDIA GPU")
|
|
149
|
+
|
|
150
|
+
# Or use convenience functions
|
|
151
|
+
if torchada.is_musa_platform():
|
|
152
|
+
print("MUSA platform detected")
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
## What Gets Patched
|
|
156
|
+
|
|
157
|
+
After `import torchada`, the following standard PyTorch APIs work on MUSA:
|
|
158
|
+
|
|
159
|
+
| Standard Import | Works On MUSA |
|
|
160
|
+
|----------------|---------------|
|
|
161
|
+
| `torch.cuda.*` | ✅ All APIs |
|
|
162
|
+
| `torch.cuda.amp.*` | ✅ autocast, GradScaler |
|
|
163
|
+
| `torch.cuda.CUDAGraph` | ✅ Maps to MUSAGraph |
|
|
164
|
+
| `torch.distributed` (backend='nccl') | ✅ Uses MCCL |
|
|
165
|
+
| `torch.utils.cpp_extension.*` | ✅ CUDAExtension, BuildExtension |
|
|
166
|
+
|
|
167
|
+
## API Reference
|
|
168
|
+
|
|
169
|
+
### torchada
|
|
170
|
+
|
|
171
|
+
| Function | Description |
|
|
172
|
+
|----------|-------------|
|
|
173
|
+
| `detect_platform()` | Returns the detected platform (CUDA, MUSA, or CPU) |
|
|
174
|
+
| `is_musa_platform()` | Check if running on MUSA |
|
|
175
|
+
| `is_cuda_platform()` | Check if running on CUDA |
|
|
176
|
+
| `get_device_name()` | Get device name string ("cuda", "musa", or "cpu") |
|
|
177
|
+
|
|
178
|
+
### torch.cuda (after importing torchada)
|
|
179
|
+
|
|
180
|
+
All standard `torch.cuda` APIs work, including:
|
|
181
|
+
- `is_available()`, `device_count()`, `current_device()`, `set_device()`
|
|
182
|
+
- `memory_allocated()`, `memory_reserved()`, `empty_cache()`
|
|
183
|
+
- `synchronize()`, `Stream`, `Event`, `CUDAGraph`
|
|
184
|
+
- `amp.autocast()`, `amp.GradScaler()`
|
|
185
|
+
|
|
186
|
+
### torch.utils.cpp_extension (after importing torchada)
|
|
187
|
+
|
|
188
|
+
| Symbol | Description |
|
|
189
|
+
|--------|-------------|
|
|
190
|
+
| `CUDAExtension` | Creates CUDA or MUSA extension based on platform |
|
|
191
|
+
| `CppExtension` | Creates C++ extension (no GPU code) |
|
|
192
|
+
| `BuildExtension` | Build command for extensions |
|
|
193
|
+
| `CUDA_HOME` | Path to CUDA/MUSA installation |
|
|
194
|
+
| `load()` | JIT compile and load extension |
|
|
195
|
+
|
|
196
|
+
## Symbol Mapping
|
|
197
|
+
|
|
198
|
+
torchada automatically maps CUDA symbols to MUSA equivalents when building extensions:
|
|
199
|
+
|
|
200
|
+
| CUDA | MUSA |
|
|
201
|
+
|------|------|
|
|
202
|
+
| `cudaMalloc` | `musaMalloc` |
|
|
203
|
+
| `cudaMemcpy` | `musaMemcpy` |
|
|
204
|
+
| `cudaStream_t` | `musaStream_t` |
|
|
205
|
+
| `cublasHandle_t` | `mublasHandle_t` |
|
|
206
|
+
| `curandState` | `murandState` |
|
|
207
|
+
| `at::cuda` | `at::musa` |
|
|
208
|
+
| `c10::cuda` | `c10::musa` |
|
|
209
|
+
| ... | ... |
|
|
210
|
+
|
|
211
|
+
See `src/torchada/_mapping.py` for the complete mapping table.
|
|
212
|
+
|
|
213
|
+
## License
|
|
214
|
+
|
|
215
|
+
MIT License
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "torchada"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "Adapter package for torch_musa to act exactly like PyTorch CUDA"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "torchada contributors"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["pytorch", "cuda", "musa", "moore-threads", "gpu", "adapter"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"torch",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
musa = ["torch_musa"]
|
|
35
|
+
dev = ["pytest", "pytest-cov", "black", "isort", "mypy"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/yeahdongcn/torchada"
|
|
39
|
+
Repository = "https://github.com/yeahdongcn/torchada"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["src"]
|
|
43
|
+
|
|
44
|
+
[tool.black]
|
|
45
|
+
line-length = 100
|
|
46
|
+
target-version = ["py38", "py39", "py310", "py311", "py312"]
|
|
47
|
+
|
|
48
|
+
[tool.isort]
|
|
49
|
+
profile = "black"
|
|
50
|
+
line_length = 100
|
|
51
|
+
|
torchada-0.1.0/setup.cfg
ADDED
torchada-0.1.0/setup.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
"""
|
|
2
|
+
torchada - Adapter package for torch_musa to act exactly like PyTorch CUDA.
|
|
3
|
+
|
|
4
|
+
torchada provides a unified interface that works transparently on both
|
|
5
|
+
NVIDIA GPUs (CUDA) and Moore Threads GPUs (MUSA).
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
Just import torchada at the top of your script, then use standard
|
|
9
|
+
torch.cuda.* and torch.utils.cpp_extension APIs as you normally would.
|
|
10
|
+
torchada patches PyTorch to transparently redirect to MUSA on
|
|
11
|
+
Moore Threads hardware.
|
|
12
|
+
|
|
13
|
+
# Add this at the top of your script:
|
|
14
|
+
import torchada # noqa: F401
|
|
15
|
+
|
|
16
|
+
# Then use standard torch APIs - they work on MUSA too!
|
|
17
|
+
import torch
|
|
18
|
+
torch.cuda.is_available()
|
|
19
|
+
x = torch.randn(3, 3).cuda()
|
|
20
|
+
from torch.cuda.amp import autocast, GradScaler
|
|
21
|
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension, CUDA_HOME
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__version__ = "0.1.0"
|
|
25
|
+
|
|
26
|
+
from . import cuda
|
|
27
|
+
from . import utils
|
|
28
|
+
from ._platform import (
|
|
29
|
+
Platform,
|
|
30
|
+
detect_platform,
|
|
31
|
+
is_musa_platform,
|
|
32
|
+
is_cuda_platform,
|
|
33
|
+
is_cpu_platform,
|
|
34
|
+
get_device_name,
|
|
35
|
+
get_torch_device_module,
|
|
36
|
+
)
|
|
37
|
+
from ._patch import apply_patches, is_patched, get_original_init_process_group
|
|
38
|
+
from .utils.cpp_extension import CUDA_HOME
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Automatically apply patches on import
|
|
42
|
+
apply_patches()
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_version() -> str:
|
|
46
|
+
"""Return the version of torchada."""
|
|
47
|
+
return __version__
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_platform() -> Platform:
|
|
51
|
+
"""Return the detected platform."""
|
|
52
|
+
return detect_platform()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def get_backend():
|
|
56
|
+
"""
|
|
57
|
+
Get the underlying torch device module (torch.cuda or torch.musa).
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
The torch.cuda or torch.musa module.
|
|
61
|
+
"""
|
|
62
|
+
return get_torch_device_module()
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
__all__ = [
|
|
66
|
+
# Version
|
|
67
|
+
"__version__",
|
|
68
|
+
"get_version",
|
|
69
|
+
# Modules
|
|
70
|
+
"cuda",
|
|
71
|
+
"utils",
|
|
72
|
+
# Platform detection
|
|
73
|
+
"Platform",
|
|
74
|
+
"detect_platform",
|
|
75
|
+
"is_musa_platform",
|
|
76
|
+
"is_cuda_platform",
|
|
77
|
+
"is_cpu_platform",
|
|
78
|
+
"get_device_name",
|
|
79
|
+
"get_platform",
|
|
80
|
+
"get_backend",
|
|
81
|
+
# Patching
|
|
82
|
+
"apply_patches",
|
|
83
|
+
"is_patched",
|
|
84
|
+
"get_original_init_process_group",
|
|
85
|
+
# C++ Extension building
|
|
86
|
+
"CUDA_HOME",
|
|
87
|
+
]
|
|
88
|
+
|