torchada 0.1.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchada-0.1.19/LICENSE +21 -0
- torchada-0.1.19/PKG-INFO +296 -0
- torchada-0.1.19/README.md +263 -0
- torchada-0.1.19/pyproject.toml +51 -0
- torchada-0.1.19/setup.cfg +4 -0
- torchada-0.1.19/setup.py +12 -0
- torchada-0.1.19/src/torchada/__init__.py +92 -0
- torchada-0.1.19/src/torchada/_mapping.py +494 -0
- torchada-0.1.19/src/torchada/_patch.py +1084 -0
- torchada-0.1.19/src/torchada/_platform.py +170 -0
- torchada-0.1.19/src/torchada/cuda/__init__.py +167 -0
- torchada-0.1.19/src/torchada/cuda/amp.py +134 -0
- torchada-0.1.19/src/torchada/cuda/nvtx.py +109 -0
- torchada-0.1.19/src/torchada/utils/__init__.py +9 -0
- torchada-0.1.19/src/torchada/utils/cpp_extension.py +739 -0
- torchada-0.1.19/src/torchada.egg-info/PKG-INFO +296 -0
- torchada-0.1.19/src/torchada.egg-info/SOURCES.txt +25 -0
- torchada-0.1.19/src/torchada.egg-info/dependency_links.txt +1 -0
- torchada-0.1.19/src/torchada.egg-info/requires.txt +11 -0
- torchada-0.1.19/src/torchada.egg-info/top_level.txt +1 -0
- torchada-0.1.19/tests/test_cpp_extension.py +173 -0
- torchada-0.1.19/tests/test_cuda_patching.py +1269 -0
- torchada-0.1.19/tests/test_device_strings.py +441 -0
- torchada-0.1.19/tests/test_extension_build.py +210 -0
- torchada-0.1.19/tests/test_mappings.py +669 -0
- torchada-0.1.19/tests/test_platform.py +115 -0
- torchada-0.1.19/tests/test_python_compat.py +191 -0
torchada-0.1.19/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Moore Threads Technology Co., Ltd ("Moore Threads"). All rights reserved.
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
torchada-0.1.19/PKG-INFO
ADDED
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: torchada
|
|
3
|
+
Version: 0.1.19
|
|
4
|
+
Summary: Adapter package for torch_musa to act exactly like PyTorch CUDA
|
|
5
|
+
Author: torchada contributors
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/MooreThreads/torchada
|
|
8
|
+
Project-URL: Repository, https://github.com/MooreThreads/torchada
|
|
9
|
+
Keywords: pytorch,cuda,musa,moore-threads,gpu,adapter
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
21
|
+
Requires-Python: >=3.8
|
|
22
|
+
Description-Content-Type: text/markdown
|
|
23
|
+
License-File: LICENSE
|
|
24
|
+
Requires-Dist: torch
|
|
25
|
+
Provides-Extra: musa
|
|
26
|
+
Requires-Dist: torch_musa; extra == "musa"
|
|
27
|
+
Provides-Extra: dev
|
|
28
|
+
Requires-Dist: pytest; extra == "dev"
|
|
29
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
30
|
+
Requires-Dist: black; extra == "dev"
|
|
31
|
+
Requires-Dist: isort; extra == "dev"
|
|
32
|
+
Requires-Dist: mypy; extra == "dev"
|
|
33
|
+
|
|
34
|
+
<div align="center" id="sglangtop">
|
|
35
|
+
<img src="https://raw.githubusercontent.com/MooreThreads/torchada/main/assets/logo.png" alt="logo" width="250" margin="10px"></img>
|
|
36
|
+
</div>
|
|
37
|
+
|
|
38
|
+
--------------------------------------------------------------------------------
|
|
39
|
+
|
|
40
|
+
# torchada
|
|
41
|
+
|
|
42
|
+
English | [中文](README_CN.md)
|
|
43
|
+
|
|
44
|
+
**Run your CUDA code on Moore Threads GPUs — zero code changes required**
|
|
45
|
+
|
|
46
|
+
torchada is an adapter that makes [torch_musa](https://github.com/MooreThreads/torch_musa) (Moore Threads GPU support for PyTorch) compatible with standard PyTorch CUDA APIs. Import it once, and your existing `torch.cuda.*` code works on MUSA hardware.
|
|
47
|
+
|
|
48
|
+
## Why torchada?
|
|
49
|
+
|
|
50
|
+
Many PyTorch projects are written for NVIDIA GPUs using `torch.cuda.*` APIs. To run these on Moore Threads GPUs, you would normally need to change every `cuda` reference to `musa`. torchada eliminates this by automatically translating CUDA API calls to MUSA equivalents at runtime.
|
|
51
|
+
|
|
52
|
+
## Prerequisites
|
|
53
|
+
|
|
54
|
+
- **torch_musa**: You must have [torch_musa](https://github.com/MooreThreads/torch_musa) installed (this provides MUSA support for PyTorch)
|
|
55
|
+
- **Moore Threads GPU**: A Moore Threads GPU with proper driver installed
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
pip install torchada
|
|
61
|
+
|
|
62
|
+
# Or install from source
|
|
63
|
+
git clone https://github.com/MooreThreads/torchada.git
|
|
64
|
+
cd torchada
|
|
65
|
+
pip install -e .
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Quick Start
|
|
69
|
+
|
|
70
|
+
```python
|
|
71
|
+
import torchada # ← Add this one line at the top
|
|
72
|
+
import torch
|
|
73
|
+
|
|
74
|
+
# Your existing CUDA code works unchanged:
|
|
75
|
+
x = torch.randn(10, 10).cuda()
|
|
76
|
+
print(torch.cuda.device_count())
|
|
77
|
+
torch.cuda.synchronize()
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
That's it! All `torch.cuda.*` APIs are automatically redirected to `torch.musa.*`.
|
|
81
|
+
|
|
82
|
+
## What Works
|
|
83
|
+
|
|
84
|
+
| Feature | Example |
|
|
85
|
+
|---------|---------|
|
|
86
|
+
| Device operations | `tensor.cuda()`, `model.cuda()`, `torch.device("cuda")` |
|
|
87
|
+
| Memory management | `torch.cuda.memory_allocated()`, `empty_cache()` |
|
|
88
|
+
| Synchronization | `torch.cuda.synchronize()`, `Stream`, `Event` |
|
|
89
|
+
| Mixed precision | `torch.cuda.amp.autocast()`, `GradScaler()` |
|
|
90
|
+
| CUDA Graphs | `torch.cuda.CUDAGraph`, `torch.cuda.graph()` |
|
|
91
|
+
| Profiler | `ProfilerActivity.CUDA` → uses PrivateUse1 |
|
|
92
|
+
| Custom Ops | `Library.impl(..., "CUDA")` → uses PrivateUse1 |
|
|
93
|
+
| Distributed | `dist.init_process_group(backend='nccl')` → uses MCCL |
|
|
94
|
+
| torch.compile | `torch.compile(model)` with all backends |
|
|
95
|
+
| C++ Extensions | `CUDAExtension`, `BuildExtension`, `load()` |
|
|
96
|
+
|
|
97
|
+
## Examples
|
|
98
|
+
|
|
99
|
+
### Mixed Precision Training
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import torchada
|
|
103
|
+
import torch
|
|
104
|
+
|
|
105
|
+
model = MyModel().cuda()
|
|
106
|
+
scaler = torch.cuda.amp.GradScaler()
|
|
107
|
+
|
|
108
|
+
with torch.cuda.amp.autocast():
|
|
109
|
+
output = model(data.cuda())
|
|
110
|
+
loss = criterion(output, target.cuda())
|
|
111
|
+
|
|
112
|
+
scaler.scale(loss).backward()
|
|
113
|
+
scaler.step(optimizer)
|
|
114
|
+
scaler.update()
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Distributed Training
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
import torchada
|
|
121
|
+
import torch.distributed as dist
|
|
122
|
+
|
|
123
|
+
# 'nccl' is automatically mapped to 'mccl' on MUSA
|
|
124
|
+
dist.init_process_group(backend='nccl')
|
|
125
|
+
```
|
|
126
|
+
|
|
127
|
+
### CUDA Graphs
|
|
128
|
+
|
|
129
|
+
```python
|
|
130
|
+
import torchada
|
|
131
|
+
import torch
|
|
132
|
+
|
|
133
|
+
g = torch.cuda.CUDAGraph()
|
|
134
|
+
with torch.cuda.graph(cuda_graph=g): # cuda_graph= keyword works on MUSA
|
|
135
|
+
y = model(x)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### torch.compile
|
|
139
|
+
|
|
140
|
+
```python
|
|
141
|
+
import torchada
|
|
142
|
+
import torch
|
|
143
|
+
|
|
144
|
+
compiled_model = torch.compile(model.cuda(), backend='inductor')
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Building C++ Extensions
|
|
148
|
+
|
|
149
|
+
```python
|
|
150
|
+
import torchada # Must import before torch.utils.cpp_extension
|
|
151
|
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
|
|
152
|
+
|
|
153
|
+
# Standard CUDAExtension works — torchada handles CUDA→MUSA translation
|
|
154
|
+
ext = CUDAExtension("my_ext", sources=["kernel.cu"])
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
### Custom Ops
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
import torchada
|
|
161
|
+
import torch
|
|
162
|
+
|
|
163
|
+
my_lib = torch.library.Library("my_lib", "DEF")
|
|
164
|
+
my_lib.define("my_op(Tensor x) -> Tensor")
|
|
165
|
+
my_lib.impl("my_op", my_func, "CUDA") # Works on MUSA!
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Profiler
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
import torchada
|
|
172
|
+
import torch
|
|
173
|
+
|
|
174
|
+
# ProfilerActivity.CUDA works on MUSA
|
|
175
|
+
with torch.profiler.profile(
|
|
176
|
+
activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
|
|
177
|
+
) as prof:
|
|
178
|
+
model(x)
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
## Platform Detection
|
|
182
|
+
|
|
183
|
+
```python
|
|
184
|
+
import torchada
|
|
185
|
+
from torchada import detect_platform, Platform
|
|
186
|
+
|
|
187
|
+
platform = detect_platform()
|
|
188
|
+
if platform == Platform.MUSA:
|
|
189
|
+
print("Running on Moore Threads GPU")
|
|
190
|
+
elif platform == Platform.CUDA:
|
|
191
|
+
print("Running on NVIDIA GPU")
|
|
192
|
+
|
|
193
|
+
# Or use torch.version-based detection
|
|
194
|
+
def is_musa():
|
|
195
|
+
import torch
|
|
196
|
+
return hasattr(torch.version, 'musa') and torch.version.musa is not None
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Known Limitation
|
|
200
|
+
|
|
201
|
+
**Device type string comparisons fail on MUSA:**
|
|
202
|
+
|
|
203
|
+
```python
|
|
204
|
+
device = torch.device("cuda:0") # On MUSA, this becomes musa:0
|
|
205
|
+
device.type == "cuda" # Returns False!
|
|
206
|
+
```
|
|
207
|
+
|
|
208
|
+
**Solution:** Use `torchada.is_gpu_device()`:
|
|
209
|
+
|
|
210
|
+
```python
|
|
211
|
+
import torchada
|
|
212
|
+
|
|
213
|
+
if torchada.is_gpu_device(device): # Works on both CUDA and MUSA
|
|
214
|
+
...
|
|
215
|
+
# Or: device.type in ("cuda", "musa")
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
## API Reference
|
|
219
|
+
|
|
220
|
+
| Function | Description |
|
|
221
|
+
|----------|-------------|
|
|
222
|
+
| `detect_platform()` | Returns `Platform.CUDA`, `Platform.MUSA`, or `Platform.CPU` |
|
|
223
|
+
| `is_musa_platform()` | Returns True if running on MUSA |
|
|
224
|
+
| `is_cuda_platform()` | Returns True if running on CUDA |
|
|
225
|
+
| `is_gpu_device(device)` | Returns True if device is CUDA or MUSA |
|
|
226
|
+
| `CUDA_HOME` | Path to CUDA/MUSA installation |
|
|
227
|
+
|
|
228
|
+
**Note**: `torch.cuda.is_available()` is intentionally NOT redirected — it returns `False` on MUSA. This allows proper platform detection. Use `torch.musa.is_available()` or `is_musa()` function instead.
|
|
229
|
+
|
|
230
|
+
## C++ Extension Symbol Mapping
|
|
231
|
+
|
|
232
|
+
When building C++ extensions, torchada automatically translates CUDA symbols to MUSA:
|
|
233
|
+
|
|
234
|
+
| CUDA | MUSA |
|
|
235
|
+
|------|------|
|
|
236
|
+
| `cudaMalloc` | `musaMalloc` |
|
|
237
|
+
| `cudaStream_t` | `musaStream_t` |
|
|
238
|
+
| `cublasHandle_t` | `mublasHandle_t` |
|
|
239
|
+
| `at::cuda` | `at::musa` |
|
|
240
|
+
| `c10::cuda` | `c10::musa` |
|
|
241
|
+
| `#include <cuda/*>` | `#include <musa/*>` |
|
|
242
|
+
|
|
243
|
+
See `src/torchada/_mapping.py` for the complete mapping table (380+ mappings).
|
|
244
|
+
|
|
245
|
+
## Integrating torchada into Your Project
|
|
246
|
+
|
|
247
|
+
### Step 1: Add Dependency
|
|
248
|
+
|
|
249
|
+
```
|
|
250
|
+
# pyproject.toml or requirements.txt
|
|
251
|
+
torchada>=0.1.19
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
### Step 2: Conditional Import
|
|
255
|
+
|
|
256
|
+
```python
|
|
257
|
+
# At your application entry point
|
|
258
|
+
def is_musa():
|
|
259
|
+
import torch
|
|
260
|
+
return hasattr(torch.version, "musa") and torch.version.musa is not None
|
|
261
|
+
|
|
262
|
+
if is_musa():
|
|
263
|
+
import torchada # noqa: F401
|
|
264
|
+
|
|
265
|
+
# Rest of your code uses torch.cuda.* as normal
|
|
266
|
+
```
|
|
267
|
+
|
|
268
|
+
### Step 3: Extend Feature Flags (if applicable)
|
|
269
|
+
|
|
270
|
+
```python
|
|
271
|
+
# Include MUSA in GPU capability checks
|
|
272
|
+
if is_nvidia() or is_musa():
|
|
273
|
+
ENABLE_FLASH_ATTENTION = True
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
### Step 4: Fix Device Type Checks (if applicable)
|
|
277
|
+
|
|
278
|
+
```python
|
|
279
|
+
# Instead of: device.type == "cuda"
|
|
280
|
+
# Use: device.type in ("cuda", "musa")
|
|
281
|
+
# Or: torchada.is_gpu_device(device)
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## Projects Using torchada
|
|
285
|
+
|
|
286
|
+
| Project | Category | Status |
|
|
287
|
+
|---------|----------|--------|
|
|
288
|
+
| [Xinference](https://github.com/xorbitsai/inference) | Model Serving | ✅ Merged |
|
|
289
|
+
| [LightLLM](https://github.com/ModelTC/LightLLM) | Model Serving | ✅ Merged |
|
|
290
|
+
| [LightX2V](https://github.com/ModelTC/LightX2V) | Image/Video Generation | ✅ Merged |
|
|
291
|
+
| [SGLang](https://github.com/sgl-project/sglang) | Model Serving | In Progress |
|
|
292
|
+
| [ComfyUI](https://github.com/comfyanonymous/ComfyUI) | Image/Video Generation | In Progress |
|
|
293
|
+
|
|
294
|
+
## License
|
|
295
|
+
|
|
296
|
+
MIT License
|
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
<div align="center" id="sglangtop">
|
|
2
|
+
<img src="https://raw.githubusercontent.com/MooreThreads/torchada/main/assets/logo.png" alt="logo" width="250" margin="10px"></img>
|
|
3
|
+
</div>
|
|
4
|
+
|
|
5
|
+
--------------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
# torchada
|
|
8
|
+
|
|
9
|
+
English | [中文](README_CN.md)
|
|
10
|
+
|
|
11
|
+
**Run your CUDA code on Moore Threads GPUs — zero code changes required**
|
|
12
|
+
|
|
13
|
+
torchada is an adapter that makes [torch_musa](https://github.com/MooreThreads/torch_musa) (Moore Threads GPU support for PyTorch) compatible with standard PyTorch CUDA APIs. Import it once, and your existing `torch.cuda.*` code works on MUSA hardware.
|
|
14
|
+
|
|
15
|
+
## Why torchada?
|
|
16
|
+
|
|
17
|
+
Many PyTorch projects are written for NVIDIA GPUs using `torch.cuda.*` APIs. To run these on Moore Threads GPUs, you would normally need to change every `cuda` reference to `musa`. torchada eliminates this by automatically translating CUDA API calls to MUSA equivalents at runtime.
|
|
18
|
+
|
|
19
|
+
## Prerequisites
|
|
20
|
+
|
|
21
|
+
- **torch_musa**: You must have [torch_musa](https://github.com/MooreThreads/torch_musa) installed (this provides MUSA support for PyTorch)
|
|
22
|
+
- **Moore Threads GPU**: A Moore Threads GPU with proper driver installed
|
|
23
|
+
|
|
24
|
+
## Installation
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
pip install torchada
|
|
28
|
+
|
|
29
|
+
# Or install from source
|
|
30
|
+
git clone https://github.com/MooreThreads/torchada.git
|
|
31
|
+
cd torchada
|
|
32
|
+
pip install -e .
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Quick Start
|
|
36
|
+
|
|
37
|
+
```python
|
|
38
|
+
import torchada # ← Add this one line at the top
|
|
39
|
+
import torch
|
|
40
|
+
|
|
41
|
+
# Your existing CUDA code works unchanged:
|
|
42
|
+
x = torch.randn(10, 10).cuda()
|
|
43
|
+
print(torch.cuda.device_count())
|
|
44
|
+
torch.cuda.synchronize()
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
That's it! All `torch.cuda.*` APIs are automatically redirected to `torch.musa.*`.
|
|
48
|
+
|
|
49
|
+
## What Works
|
|
50
|
+
|
|
51
|
+
| Feature | Example |
|
|
52
|
+
|---------|---------|
|
|
53
|
+
| Device operations | `tensor.cuda()`, `model.cuda()`, `torch.device("cuda")` |
|
|
54
|
+
| Memory management | `torch.cuda.memory_allocated()`, `empty_cache()` |
|
|
55
|
+
| Synchronization | `torch.cuda.synchronize()`, `Stream`, `Event` |
|
|
56
|
+
| Mixed precision | `torch.cuda.amp.autocast()`, `GradScaler()` |
|
|
57
|
+
| CUDA Graphs | `torch.cuda.CUDAGraph`, `torch.cuda.graph()` |
|
|
58
|
+
| Profiler | `ProfilerActivity.CUDA` → uses PrivateUse1 |
|
|
59
|
+
| Custom Ops | `Library.impl(..., "CUDA")` → uses PrivateUse1 |
|
|
60
|
+
| Distributed | `dist.init_process_group(backend='nccl')` → uses MCCL |
|
|
61
|
+
| torch.compile | `torch.compile(model)` with all backends |
|
|
62
|
+
| C++ Extensions | `CUDAExtension`, `BuildExtension`, `load()` |
|
|
63
|
+
|
|
64
|
+
## Examples
|
|
65
|
+
|
|
66
|
+
### Mixed Precision Training
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
import torchada
|
|
70
|
+
import torch
|
|
71
|
+
|
|
72
|
+
model = MyModel().cuda()
|
|
73
|
+
scaler = torch.cuda.amp.GradScaler()
|
|
74
|
+
|
|
75
|
+
with torch.cuda.amp.autocast():
|
|
76
|
+
output = model(data.cuda())
|
|
77
|
+
loss = criterion(output, target.cuda())
|
|
78
|
+
|
|
79
|
+
scaler.scale(loss).backward()
|
|
80
|
+
scaler.step(optimizer)
|
|
81
|
+
scaler.update()
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
### Distributed Training
|
|
85
|
+
|
|
86
|
+
```python
|
|
87
|
+
import torchada
|
|
88
|
+
import torch.distributed as dist
|
|
89
|
+
|
|
90
|
+
# 'nccl' is automatically mapped to 'mccl' on MUSA
|
|
91
|
+
dist.init_process_group(backend='nccl')
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
### CUDA Graphs
|
|
95
|
+
|
|
96
|
+
```python
|
|
97
|
+
import torchada
|
|
98
|
+
import torch
|
|
99
|
+
|
|
100
|
+
g = torch.cuda.CUDAGraph()
|
|
101
|
+
with torch.cuda.graph(cuda_graph=g): # cuda_graph= keyword works on MUSA
|
|
102
|
+
y = model(x)
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### torch.compile
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
import torchada
|
|
109
|
+
import torch
|
|
110
|
+
|
|
111
|
+
compiled_model = torch.compile(model.cuda(), backend='inductor')
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
### Building C++ Extensions
|
|
115
|
+
|
|
116
|
+
```python
|
|
117
|
+
import torchada # Must import before torch.utils.cpp_extension
|
|
118
|
+
from torch.utils.cpp_extension import CUDAExtension, BuildExtension
|
|
119
|
+
|
|
120
|
+
# Standard CUDAExtension works — torchada handles CUDA→MUSA translation
|
|
121
|
+
ext = CUDAExtension("my_ext", sources=["kernel.cu"])
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
### Custom Ops
|
|
125
|
+
|
|
126
|
+
```python
|
|
127
|
+
import torchada
|
|
128
|
+
import torch
|
|
129
|
+
|
|
130
|
+
my_lib = torch.library.Library("my_lib", "DEF")
|
|
131
|
+
my_lib.define("my_op(Tensor x) -> Tensor")
|
|
132
|
+
my_lib.impl("my_op", my_func, "CUDA") # Works on MUSA!
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
### Profiler
|
|
136
|
+
|
|
137
|
+
```python
|
|
138
|
+
import torchada
|
|
139
|
+
import torch
|
|
140
|
+
|
|
141
|
+
# ProfilerActivity.CUDA works on MUSA
|
|
142
|
+
with torch.profiler.profile(
|
|
143
|
+
activities=[torch.profiler.ProfilerActivity.CPU, torch.profiler.ProfilerActivity.CUDA]
|
|
144
|
+
) as prof:
|
|
145
|
+
model(x)
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Platform Detection
|
|
149
|
+
|
|
150
|
+
```python
|
|
151
|
+
import torchada
|
|
152
|
+
from torchada import detect_platform, Platform
|
|
153
|
+
|
|
154
|
+
platform = detect_platform()
|
|
155
|
+
if platform == Platform.MUSA:
|
|
156
|
+
print("Running on Moore Threads GPU")
|
|
157
|
+
elif platform == Platform.CUDA:
|
|
158
|
+
print("Running on NVIDIA GPU")
|
|
159
|
+
|
|
160
|
+
# Or use torch.version-based detection
|
|
161
|
+
def is_musa():
|
|
162
|
+
import torch
|
|
163
|
+
return hasattr(torch.version, 'musa') and torch.version.musa is not None
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
## Known Limitation
|
|
167
|
+
|
|
168
|
+
**Device type string comparisons fail on MUSA:**
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
device = torch.device("cuda:0") # On MUSA, this becomes musa:0
|
|
172
|
+
device.type == "cuda" # Returns False!
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
**Solution:** Use `torchada.is_gpu_device()`:
|
|
176
|
+
|
|
177
|
+
```python
|
|
178
|
+
import torchada
|
|
179
|
+
|
|
180
|
+
if torchada.is_gpu_device(device): # Works on both CUDA and MUSA
|
|
181
|
+
...
|
|
182
|
+
# Or: device.type in ("cuda", "musa")
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
## API Reference
|
|
186
|
+
|
|
187
|
+
| Function | Description |
|
|
188
|
+
|----------|-------------|
|
|
189
|
+
| `detect_platform()` | Returns `Platform.CUDA`, `Platform.MUSA`, or `Platform.CPU` |
|
|
190
|
+
| `is_musa_platform()` | Returns True if running on MUSA |
|
|
191
|
+
| `is_cuda_platform()` | Returns True if running on CUDA |
|
|
192
|
+
| `is_gpu_device(device)` | Returns True if device is CUDA or MUSA |
|
|
193
|
+
| `CUDA_HOME` | Path to CUDA/MUSA installation |
|
|
194
|
+
|
|
195
|
+
**Note**: `torch.cuda.is_available()` is intentionally NOT redirected — it returns `False` on MUSA. This allows proper platform detection. Use `torch.musa.is_available()` or `is_musa()` function instead.
|
|
196
|
+
|
|
197
|
+
## C++ Extension Symbol Mapping
|
|
198
|
+
|
|
199
|
+
When building C++ extensions, torchada automatically translates CUDA symbols to MUSA:
|
|
200
|
+
|
|
201
|
+
| CUDA | MUSA |
|
|
202
|
+
|------|------|
|
|
203
|
+
| `cudaMalloc` | `musaMalloc` |
|
|
204
|
+
| `cudaStream_t` | `musaStream_t` |
|
|
205
|
+
| `cublasHandle_t` | `mublasHandle_t` |
|
|
206
|
+
| `at::cuda` | `at::musa` |
|
|
207
|
+
| `c10::cuda` | `c10::musa` |
|
|
208
|
+
| `#include <cuda/*>` | `#include <musa/*>` |
|
|
209
|
+
|
|
210
|
+
See `src/torchada/_mapping.py` for the complete mapping table (380+ mappings).
|
|
211
|
+
|
|
212
|
+
## Integrating torchada into Your Project
|
|
213
|
+
|
|
214
|
+
### Step 1: Add Dependency
|
|
215
|
+
|
|
216
|
+
```
|
|
217
|
+
# pyproject.toml or requirements.txt
|
|
218
|
+
torchada>=0.1.19
|
|
219
|
+
```
|
|
220
|
+
|
|
221
|
+
### Step 2: Conditional Import
|
|
222
|
+
|
|
223
|
+
```python
|
|
224
|
+
# At your application entry point
|
|
225
|
+
def is_musa():
|
|
226
|
+
import torch
|
|
227
|
+
return hasattr(torch.version, "musa") and torch.version.musa is not None
|
|
228
|
+
|
|
229
|
+
if is_musa():
|
|
230
|
+
import torchada # noqa: F401
|
|
231
|
+
|
|
232
|
+
# Rest of your code uses torch.cuda.* as normal
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Step 3: Extend Feature Flags (if applicable)
|
|
236
|
+
|
|
237
|
+
```python
|
|
238
|
+
# Include MUSA in GPU capability checks
|
|
239
|
+
if is_nvidia() or is_musa():
|
|
240
|
+
ENABLE_FLASH_ATTENTION = True
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
### Step 4: Fix Device Type Checks (if applicable)
|
|
244
|
+
|
|
245
|
+
```python
|
|
246
|
+
# Instead of: device.type == "cuda"
|
|
247
|
+
# Use: device.type in ("cuda", "musa")
|
|
248
|
+
# Or: torchada.is_gpu_device(device)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Projects Using torchada
|
|
252
|
+
|
|
253
|
+
| Project | Category | Status |
|
|
254
|
+
|---------|----------|--------|
|
|
255
|
+
| [Xinference](https://github.com/xorbitsai/inference) | Model Serving | ✅ Merged |
|
|
256
|
+
| [LightLLM](https://github.com/ModelTC/LightLLM) | Model Serving | ✅ Merged |
|
|
257
|
+
| [LightX2V](https://github.com/ModelTC/LightX2V) | Image/Video Generation | ✅ Merged |
|
|
258
|
+
| [SGLang](https://github.com/sgl-project/sglang) | Model Serving | In Progress |
|
|
259
|
+
| [ComfyUI](https://github.com/comfyanonymous/ComfyUI) | Image/Video Generation | In Progress |
|
|
260
|
+
|
|
261
|
+
## License
|
|
262
|
+
|
|
263
|
+
MIT License
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=64.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "torchada"
|
|
7
|
+
version = "0.1.19"
|
|
8
|
+
description = "Adapter package for torch_musa to act exactly like PyTorch CUDA"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
requires-python = ">=3.8"
|
|
12
|
+
authors = [
|
|
13
|
+
{name = "torchada contributors"}
|
|
14
|
+
]
|
|
15
|
+
keywords = ["pytorch", "cuda", "musa", "moore-threads", "gpu", "adapter"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.8",
|
|
23
|
+
"Programming Language :: Python :: 3.9",
|
|
24
|
+
"Programming Language :: Python :: 3.10",
|
|
25
|
+
"Programming Language :: Python :: 3.11",
|
|
26
|
+
"Programming Language :: Python :: 3.12",
|
|
27
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
28
|
+
]
|
|
29
|
+
dependencies = [
|
|
30
|
+
"torch",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[project.optional-dependencies]
|
|
34
|
+
musa = ["torch_musa"]
|
|
35
|
+
dev = ["pytest", "pytest-cov", "black", "isort", "mypy"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/MooreThreads/torchada"
|
|
39
|
+
Repository = "https://github.com/MooreThreads/torchada"
|
|
40
|
+
|
|
41
|
+
[tool.setuptools.packages.find]
|
|
42
|
+
where = ["src"]
|
|
43
|
+
|
|
44
|
+
[tool.black]
|
|
45
|
+
line-length = 100
|
|
46
|
+
target-version = ["py38", "py39", "py310", "py311", "py312"]
|
|
47
|
+
|
|
48
|
+
[tool.isort]
|
|
49
|
+
profile = "black"
|
|
50
|
+
line_length = 100
|
|
51
|
+
|
torchada-0.1.19/setup.py
ADDED