torchgl 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torchgl-0.1.0/PKG-INFO +106 -0
- torchgl-0.1.0/README.md +93 -0
- torchgl-0.1.0/pyproject.toml +37 -0
- torchgl-0.1.0/src/torchgl/__init__.py +528 -0
- torchgl-0.1.0/src/torchgl/py.typed +0 -0
torchgl-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: torchgl
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Interop between ModernGL and PyTorch CUDA
|
|
5
|
+
Author: Adam Alcolado
|
|
6
|
+
Author-email: Adam Alcolado <adam.alcolado@mtl.ai>
|
|
7
|
+
Requires-Dist: cuda-python
|
|
8
|
+
Requires-Dist: moderngl
|
|
9
|
+
Requires-Dist: numpy
|
|
10
|
+
Requires-Dist: torch
|
|
11
|
+
Requires-Python: >=3.10
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# torchgl
|
|
15
|
+
`torchgl` is a simple library for sharing data between PyTorch and [ModernGL](https://github.com/moderngl/moderngl)
|
|
16
|
+
directly on the GPU.
|
|
17
|
+
|
|
18
|
+
It wraps CUDA graphics interoperability calls, which avoids slow CPU round-trips. No C++ compiling is needed
|
|
19
|
+
since we use the python CUDA API.
|
|
20
|
+
|
|
21
|
+
# Use Cases
|
|
22
|
+
- Render visualizations in real-time
|
|
23
|
+
- Use GLSL shaders in pre- or post-processing
|
|
24
|
+
- Combine ML with traditional graphics pipelines
|
|
25
|
+
|
|
26
|
+
# Basic Usage
|
|
27
|
+
## Write a tensor into a texture
|
|
28
|
+
```python
|
|
29
|
+
import moderngl
|
|
30
|
+
import torch
|
|
31
|
+
import torchgl
|
|
32
|
+
|
|
33
|
+
# need a moderngl context, e.g.
|
|
34
|
+
moderngl.create_context(standalone=True)
|
|
35
|
+
|
|
36
|
+
# float16
|
|
37
|
+
tensor = torch.rand(1080, 1920, 4, device="cuda", dtype=torch.float16)
|
|
38
|
+
texture = torchgl.to_texture(tensor)
|
|
39
|
+
print(texture.size, texture.components, texture.dtype) # (1920, 1080) 4 f2
|
|
40
|
+
|
|
41
|
+
# uint8 with 2 channels
|
|
42
|
+
tensor = (255 * tensor[:, :, :2]).to(torch.uint8)
|
|
43
|
+
texture = torchgl.to_texture(tensor)
|
|
44
|
+
print(texture.size, texture.components, texture.dtype) # (1920, 1080) 2 f1
|
|
45
|
+
```
|
|
46
|
+
## Read a texture into a tensor
|
|
47
|
+
```python
|
|
48
|
+
import moderngl
|
|
49
|
+
import torchgl
|
|
50
|
+
|
|
51
|
+
ctx = moderngl.create_context(standalone=True)
|
|
52
|
+
texture = ctx.texture((1920, 1080), 1, dtype="f1")
|
|
53
|
+
tensor = torchgl.to_tensor(texture)
|
|
54
|
+
print(tensor.shape, tensor.dtype) # torch.Size([1080, 1920, 1]) torch.uint8
|
|
55
|
+
```
|
|
56
|
+
## Other examples
|
|
57
|
+
See [other examples](examples) here.
|
|
58
|
+
|
|
59
|
+
# Installation
|
|
60
|
+
`torchgl` is (will be) published on PyPI and can be installed with pip
|
|
61
|
+
|
|
62
|
+
```commandline
|
|
63
|
+
pip install torchgl
|
|
64
|
+
```
|
|
65
|
+
Before installing, make sure you have PyTorch with CUDA support.
|
|
66
|
+
|
|
67
|
+
This package depends on [CUDA Python](https://nvidia.github.io/cuda-python/latest/) bindings. It is recommended to
|
|
68
|
+
match the version are using with PyTorch, e.g. if you want to use CUDA 12.8
|
|
69
|
+
```bash
|
|
70
|
+
pip install cuda-python==12.8
|
|
71
|
+
pip install torch --index-url https://download.pytorch.org/whl/cu128
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
# Texture Formats
|
|
75
|
+
`torchgl` supports 1, 2, or 4 component Textures (3 component textures are not supported by CUDA).
|
|
76
|
+
For most ModernGL dtypes, the converted Tensor type matches what you would use to supply pixel data.
|
|
77
|
+
|
|
78
|
+
| ModernGL dtype | PyTorch dtype |
|
|
79
|
+
|----------------|----------------|
|
|
80
|
+
| f1 | uint8 |
|
|
81
|
+
| f2 | half |
|
|
82
|
+
| f4 | float32 |
|
|
83
|
+
| u1 | uint8 |
|
|
84
|
+
| u2 | uint16 |
|
|
85
|
+
| u4 | uint32 |
|
|
86
|
+
| i1 | int8 |
|
|
87
|
+
| i2 | int16 |
|
|
88
|
+
| i4 | in32 |
|
|
89
|
+
|
|
90
|
+
The exceptions are "ni1", and "ni2" which are really unsigned internally, but the pixel data
|
|
91
|
+
is expected to be signed (GL_BYTE or GL_SHORT), see https://github.com/moderngl/moderngl/blob/main/src/moderngl.cpp#L800.
|
|
92
|
+
We suggest avoiding these types unless you know really know what you are doing.
|
|
93
|
+
|
|
94
|
+
Also, if you override ModernGL internal format conversion may not behave as expected.
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
# Advanced Usage
|
|
98
|
+
For more control of the resource management and synchronization between CUDA and OpenGL,
|
|
99
|
+
you can allocate your textures and buffers ahead of time and register them once for interop using `register()`. Then, use
|
|
100
|
+
`map()` and `unmap()` to efficiently pipeline the CUDA and OpenGL and reduce unnecessary synchronization points.
|
|
101
|
+
|
|
102
|
+
Streams are also supported, although you should be aware of how they work with the PyTorch allocation of CUDA tensors,
|
|
103
|
+
see [docs here](https://docs.pytorch.org/docs/stable/notes/cuda.html#cuda-semantics).
|
|
104
|
+
|
|
105
|
+
# Related Packages
|
|
106
|
+
[torch2moderngl](https://github.com/geospaitial-lab/torch2moderngl) provides similar basic usage for Textures only.
|
torchgl-0.1.0/README.md
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# torchgl
|
|
2
|
+
`torchgl` is a simple library for sharing data between PyTorch and [ModernGL](https://github.com/moderngl/moderngl)
|
|
3
|
+
directly on the GPU.
|
|
4
|
+
|
|
5
|
+
It wraps CUDA graphics interoperability calls, which avoids slow CPU round-trips. No C++ compiling is needed
|
|
6
|
+
since we use the python CUDA API.
|
|
7
|
+
|
|
8
|
+
# Use Cases
|
|
9
|
+
- Render visualizations in real-time
|
|
10
|
+
- Use GLSL shaders in pre- or post-processing
|
|
11
|
+
- Combine ML with traditional graphics pipelines
|
|
12
|
+
|
|
13
|
+
# Basic Usage
|
|
14
|
+
## Write a tensor into a texture
|
|
15
|
+
```python
|
|
16
|
+
import moderngl
|
|
17
|
+
import torch
|
|
18
|
+
import torchgl
|
|
19
|
+
|
|
20
|
+
# need a moderngl context, e.g.
|
|
21
|
+
moderngl.create_context(standalone=True)
|
|
22
|
+
|
|
23
|
+
# float16
|
|
24
|
+
tensor = torch.rand(1080, 1920, 4, device="cuda", dtype=torch.float16)
|
|
25
|
+
texture = torchgl.to_texture(tensor)
|
|
26
|
+
print(texture.size, texture.components, texture.dtype) # (1920, 1080) 4 f2
|
|
27
|
+
|
|
28
|
+
# uint8 with 2 channels
|
|
29
|
+
tensor = (255 * tensor[:, :, :2]).to(torch.uint8)
|
|
30
|
+
texture = torchgl.to_texture(tensor)
|
|
31
|
+
print(texture.size, texture.components, texture.dtype) # (1920, 1080) 2 f1
|
|
32
|
+
```
|
|
33
|
+
## Read a texture into a tensor
|
|
34
|
+
```python
|
|
35
|
+
import moderngl
|
|
36
|
+
import torchgl
|
|
37
|
+
|
|
38
|
+
ctx = moderngl.create_context(standalone=True)
|
|
39
|
+
texture = ctx.texture((1920, 1080), 1, dtype="f1")
|
|
40
|
+
tensor = torchgl.to_tensor(texture)
|
|
41
|
+
print(tensor.shape, tensor.dtype) # torch.Size([1080, 1920, 1]) torch.uint8
|
|
42
|
+
```
|
|
43
|
+
## Other examples
|
|
44
|
+
See [other examples](examples) here.
|
|
45
|
+
|
|
46
|
+
# Installation
|
|
47
|
+
`torchgl` is (will be) published on PyPI and can be installed with pip
|
|
48
|
+
|
|
49
|
+
```commandline
|
|
50
|
+
pip install torchgl
|
|
51
|
+
```
|
|
52
|
+
Before installing, make sure you have PyTorch with CUDA support.
|
|
53
|
+
|
|
54
|
+
This package depends on [CUDA Python](https://nvidia.github.io/cuda-python/latest/) bindings. It is recommended to
|
|
55
|
+
match the version are using with PyTorch, e.g. if you want to use CUDA 12.8
|
|
56
|
+
```bash
|
|
57
|
+
pip install cuda-python==12.8
|
|
58
|
+
pip install torch --index-url https://download.pytorch.org/whl/cu128
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
# Texture Formats
|
|
62
|
+
`torchgl` supports 1, 2, or 4 component Textures (3 component textures are not supported by CUDA).
|
|
63
|
+
For most ModernGL dtypes, the converted Tensor type matches what you would use to supply pixel data.
|
|
64
|
+
|
|
65
|
+
| ModernGL dtype | PyTorch dtype |
|
|
66
|
+
|----------------|----------------|
|
|
67
|
+
| f1 | uint8 |
|
|
68
|
+
| f2 | half |
|
|
69
|
+
| f4 | float32 |
|
|
70
|
+
| u1 | uint8 |
|
|
71
|
+
| u2 | uint16 |
|
|
72
|
+
| u4 | uint32 |
|
|
73
|
+
| i1 | int8 |
|
|
74
|
+
| i2 | int16 |
|
|
75
|
+
| i4 | in32 |
|
|
76
|
+
|
|
77
|
+
The exceptions are "ni1", and "ni2" which are really unsigned internally, but the pixel data
|
|
78
|
+
is expected to be signed (GL_BYTE or GL_SHORT), see https://github.com/moderngl/moderngl/blob/main/src/moderngl.cpp#L800.
|
|
79
|
+
We suggest avoiding these types unless you know really know what you are doing.
|
|
80
|
+
|
|
81
|
+
Also, if you override ModernGL internal format conversion may not behave as expected.
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# Advanced Usage
|
|
85
|
+
For more control of the resource management and synchronization between CUDA and OpenGL,
|
|
86
|
+
you can allocate your textures and buffers ahead of time and register them once for interop using `register()`. Then, use
|
|
87
|
+
`map()` and `unmap()` to efficiently pipeline the CUDA and OpenGL and reduce unnecessary synchronization points.
|
|
88
|
+
|
|
89
|
+
Streams are also supported, although you should be aware of how they work with the PyTorch allocation of CUDA tensors,
|
|
90
|
+
see [docs here](https://docs.pytorch.org/docs/stable/notes/cuda.html#cuda-semantics).
|
|
91
|
+
|
|
92
|
+
# Related Packages
|
|
93
|
+
[torch2moderngl](https://github.com/geospaitial-lab/torch2moderngl) provides similar basic usage for Textures only.
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "torchgl"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "Interop between ModernGL and PyTorch CUDA"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Adam Alcolado", email = "adam.alcolado@mtl.ai" }
|
|
10
|
+
]
|
|
11
|
+
|
|
12
|
+
dependencies = [
|
|
13
|
+
"cuda-python",
|
|
14
|
+
"moderngl",
|
|
15
|
+
"numpy",
|
|
16
|
+
"torch",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[dependency-groups]
|
|
20
|
+
dev = [
|
|
21
|
+
"pytest>=9.0.2",
|
|
22
|
+
"ruff>=0.15.0",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[tool.uv.sources]
|
|
26
|
+
torch = [
|
|
27
|
+
{ index = "pytorch-cu130", marker = "sys_platform == 'linux' or sys_platform == 'win32'" },
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[[tool.uv.index]]
|
|
31
|
+
name = "pytorch-cu130"
|
|
32
|
+
url = "https://download.pytorch.org/whl/cu130"
|
|
33
|
+
explicit = true
|
|
34
|
+
|
|
35
|
+
[build-system]
|
|
36
|
+
requires = ["uv_build>=0.9.7,<0.10.0"]
|
|
37
|
+
build-backend = "uv_build"
|
|
@@ -0,0 +1,528 @@
|
|
|
1
|
+
from typing import Any, Literal
|
|
2
|
+
|
|
3
|
+
import cuda.bindings.runtime as cudart
|
|
4
|
+
import moderngl
|
|
5
|
+
import torch
|
|
6
|
+
|
|
7
|
+
if not torch.cuda.is_available():
|
|
8
|
+
raise RuntimeError("PyTorch with CUDA backend is required")
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _check_cuda_error(result: Any):
|
|
12
|
+
# cuda calls should return a tuple with the error has the first item
|
|
13
|
+
assert isinstance(result, tuple)
|
|
14
|
+
err: cudart.cudaError_t = result[0]
|
|
15
|
+
if err != 0:
|
|
16
|
+
_, msg = cudart.cudaGetErrorString(err)
|
|
17
|
+
raise RuntimeError(f"CUDA error: {msg.decode('ascii')}")
|
|
18
|
+
if len(result) == 1:
|
|
19
|
+
return None
|
|
20
|
+
elif len(result) == 2:
|
|
21
|
+
return result[1]
|
|
22
|
+
else:
|
|
23
|
+
return result[1:]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
Mode = Literal["r", "w", "rw"]
|
|
27
|
+
|
|
28
|
+
# preferred ModernGL dtype for a given torch tensor dtype
|
|
29
|
+
_torch_to_gl_dtype = {
|
|
30
|
+
torch.uint8: "f1",
|
|
31
|
+
torch.uint16: "u2",
|
|
32
|
+
torch.uint32: "u4",
|
|
33
|
+
torch.int8: "i1",
|
|
34
|
+
torch.int16: "i2",
|
|
35
|
+
torch.int32: "i4",
|
|
36
|
+
torch.float16: "f2",
|
|
37
|
+
torch.float32: "f4",
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _create_cuda_descriptor(
|
|
42
|
+
c: int, dtype: torch.dtype
|
|
43
|
+
) -> tuple[int, int, int, int, cudart.cudaChannelFormatKind]:
|
|
44
|
+
"""
|
|
45
|
+
Create the matching cudaChannelFormatDesc for a given channels and dtype of a tensor.
|
|
46
|
+
|
|
47
|
+
Returns bit-depth in (x, y, z, w) components + kind (either signed, unsigned, or float).
|
|
48
|
+
"""
|
|
49
|
+
|
|
50
|
+
if c not in (1, 2, 4):
|
|
51
|
+
raise ValueError(f"channels must be 1, 2, or 4, got {c}")
|
|
52
|
+
|
|
53
|
+
if dtype.is_floating_point:
|
|
54
|
+
kind = cudart.cudaChannelFormatKind.cudaChannelFormatKindFloat
|
|
55
|
+
elif dtype.is_signed:
|
|
56
|
+
kind = cudart.cudaChannelFormatKind.cudaChannelFormatKindSigned
|
|
57
|
+
else:
|
|
58
|
+
kind = cudart.cudaChannelFormatKind.cudaChannelFormatKindUnsigned
|
|
59
|
+
|
|
60
|
+
bits: int = (
|
|
61
|
+
torch.finfo(dtype) if dtype.is_floating_point else torch.iinfo(dtype)
|
|
62
|
+
).bits
|
|
63
|
+
|
|
64
|
+
bits_x, bits_y, bits_z, bits_w = [bits if i < c else 0 for i in range(4)]
|
|
65
|
+
|
|
66
|
+
return bits_x, bits_y, bits_z, bits_w, kind
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
_cuda_descriptor_to_torch = {
|
|
70
|
+
_create_cuda_descriptor(c, dtype): (c, dtype)
|
|
71
|
+
for dtype in _torch_to_gl_dtype.keys()
|
|
72
|
+
for c in (1, 2, 4)
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _object_key(
|
|
77
|
+
obj: moderngl.Texture | moderngl.Buffer,
|
|
78
|
+
) -> tuple[type[moderngl.Texture | moderngl.Buffer], int]:
|
|
79
|
+
if isinstance(obj, moderngl.Texture):
|
|
80
|
+
t = moderngl.Texture
|
|
81
|
+
elif isinstance(obj, moderngl.Buffer):
|
|
82
|
+
t = moderngl.Buffer
|
|
83
|
+
else:
|
|
84
|
+
raise ValueError("Object is not a Texture or a Buffer")
|
|
85
|
+
|
|
86
|
+
return t, obj.glo
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
_registered_objects: dict[
|
|
90
|
+
tuple[type[moderngl.Texture | moderngl.Buffer], int],
|
|
91
|
+
tuple[cudart.cudaGraphicsResource_t, Mode],
|
|
92
|
+
] = dict()
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def register(obj: moderngl.Texture | moderngl.Buffer, mode: Mode):
|
|
96
|
+
"""
|
|
97
|
+
Register a ModernGL Texture or Buffer for CUDA interoperability.
|
|
98
|
+
|
|
99
|
+
This function is provided for advanced usage. For basic usage, the object will automatically be registered
|
|
100
|
+
and unregistered as needed.
|
|
101
|
+
|
|
102
|
+
If you manually register an object, you are also responsible for mapping and unmapping the object as required.
|
|
103
|
+
|
|
104
|
+
Parameters
|
|
105
|
+
----------
|
|
106
|
+
obj : moderngl.Texture | moderngl.Buffer
|
|
107
|
+
The ModernGL object to register. For textures, only 1, 2, or 4 components
|
|
108
|
+
are supported.
|
|
109
|
+
mode : {"r", "w", "rw"}
|
|
110
|
+
Access mode describing how PyTorch-CUDA will use the object:
|
|
111
|
+
- "r" : read-only (usable with `to_tensor()`)
|
|
112
|
+
- "w" : write-only (usable with `to_texture() or to_buffer()`)
|
|
113
|
+
- "rw" : read–write
|
|
114
|
+
"""
|
|
115
|
+
key = _object_key(obj)
|
|
116
|
+
if key in _registered_objects:
|
|
117
|
+
raise ValueError("Object already registered")
|
|
118
|
+
|
|
119
|
+
if mode not in (
|
|
120
|
+
"r",
|
|
121
|
+
"w",
|
|
122
|
+
"rw",
|
|
123
|
+
):
|
|
124
|
+
raise ValueError(f"Mode {mode} is not one of 'r', 'w', 'rw'")
|
|
125
|
+
|
|
126
|
+
flags = {
|
|
127
|
+
"r": cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsReadOnly,
|
|
128
|
+
"w": cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsWriteDiscard,
|
|
129
|
+
"rw": cudart.cudaGraphicsRegisterFlags.cudaGraphicsRegisterFlagsNone,
|
|
130
|
+
}[mode]
|
|
131
|
+
|
|
132
|
+
if isinstance(obj, moderngl.Texture):
|
|
133
|
+
if obj.components == 3:
|
|
134
|
+
raise ValueError("Textures with 3 components are not supported")
|
|
135
|
+
|
|
136
|
+
# see https://raw.githubusercontent.com/KhronosGroup/OpenGL-Registry/refs/heads/main/xml/gl.xml
|
|
137
|
+
_GL_TEXTURE_2D = 0x0DE1
|
|
138
|
+
|
|
139
|
+
resource = _check_cuda_error(
|
|
140
|
+
cudart.cudaGraphicsGLRegisterImage(obj.glo, _GL_TEXTURE_2D, flags)
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
elif isinstance(obj, moderngl.Buffer):
|
|
144
|
+
resource = _check_cuda_error(
|
|
145
|
+
cudart.cudaGraphicsGLRegisterBuffer(obj.glo, flags)
|
|
146
|
+
)
|
|
147
|
+
else:
|
|
148
|
+
assert False # unreachable
|
|
149
|
+
|
|
150
|
+
_registered_objects[key] = (resource, mode)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def unregister(obj: moderngl.Texture | moderngl.Buffer):
|
|
154
|
+
"""
|
|
155
|
+
Unregister a ModernGL Texture or Buffer for CUDA interoperability.
|
|
156
|
+
|
|
157
|
+
This function is provided for advanced usage. For basic usage, the buffer will automatically be registered
|
|
158
|
+
and unregistered as needed.
|
|
159
|
+
|
|
160
|
+
Parameters
|
|
161
|
+
----------
|
|
162
|
+
obj : moderngl.Texture | moderngl.Buffer
|
|
163
|
+
The ModernGL object to unregister. Must have been registered previously with `register()`.
|
|
164
|
+
"""
|
|
165
|
+
|
|
166
|
+
key = _object_key(obj)
|
|
167
|
+
if key not in _registered_objects:
|
|
168
|
+
raise ValueError("Object not registered")
|
|
169
|
+
|
|
170
|
+
resource, _ = _registered_objects[key]
|
|
171
|
+
|
|
172
|
+
_check_cuda_error(cudart.cudaGraphicsUnregisterResource(resource))
|
|
173
|
+
|
|
174
|
+
del _registered_objects[key]
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def map(obj: moderngl.Texture | moderngl.Buffer):
|
|
178
|
+
"""
|
|
179
|
+
Map a ModernGL Texture or Buffer for CUDA interoperability.
|
|
180
|
+
|
|
181
|
+
Mapping will ensure all pending OpenGL operations complete before work begins in the current CUDA stream
|
|
182
|
+
(given by `torch.cuda.current_stream()`).
|
|
183
|
+
|
|
184
|
+
This function is provided for advanced usage. For basic usage, the object will automatically be mapped and
|
|
185
|
+
unmapped as required.
|
|
186
|
+
|
|
187
|
+
Parameters
|
|
188
|
+
----------
|
|
189
|
+
obj : moderngl.Texture | moderngl.Buffer
|
|
190
|
+
The object to map. It must have been previously registered using
|
|
191
|
+
`register()`.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
key = _object_key(obj)
|
|
195
|
+
if key not in _registered_objects:
|
|
196
|
+
raise ValueError("Object not registered")
|
|
197
|
+
|
|
198
|
+
resource, _ = _registered_objects[key]
|
|
199
|
+
|
|
200
|
+
stream = cudart.cudaStream_t(torch.cuda.current_stream().cuda_stream)
|
|
201
|
+
|
|
202
|
+
_check_cuda_error(cudart.cudaGraphicsMapResources(1, resource, stream))
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def unmap(obj: moderngl.Texture | moderngl.Buffer):
|
|
206
|
+
"""
|
|
207
|
+
Unmap a ModernGL Texture or Buffer for CUDA interoperability.
|
|
208
|
+
|
|
209
|
+
Unmapping will ensure all work in the current CUDA stream (given by `torch.cuda.current_stream()`) will complete
|
|
210
|
+
before any OpenGL work starts.
|
|
211
|
+
|
|
212
|
+
This function is provided for advanced usage. For basic usage, the object will automatically be mapped and
|
|
213
|
+
unmapped as required.
|
|
214
|
+
|
|
215
|
+
Parameters
|
|
216
|
+
----------
|
|
217
|
+
obj : moderngl.Texture
|
|
218
|
+
The texture to unmap. It must have been previously mapped using `map()`.
|
|
219
|
+
"""
|
|
220
|
+
|
|
221
|
+
key = _object_key(obj)
|
|
222
|
+
if key not in _registered_objects:
|
|
223
|
+
raise ValueError("Object not registered")
|
|
224
|
+
|
|
225
|
+
resource, _ = _registered_objects[key]
|
|
226
|
+
|
|
227
|
+
stream = cudart.cudaStream_t(torch.cuda.current_stream().cuda_stream)
|
|
228
|
+
|
|
229
|
+
_check_cuda_error(cudart.cudaGraphicsUnmapResources(1, resource, stream))
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def to_tensor(
|
|
233
|
+
obj: moderngl.Texture | moderngl.Buffer, tensor: torch.Tensor = None
|
|
234
|
+
) -> torch.Tensor:
|
|
235
|
+
"""
|
|
236
|
+
Copy a ModernGL Texture or Buffer to a CUDA Tensor.
|
|
237
|
+
|
|
238
|
+
In the case of a Texture object, the returned Tensor will have shape (H, W, C), where
|
|
239
|
+
(W, H) is the size of the texture and C is the number of components (1, 2, or 4).
|
|
240
|
+
An optional output tensor can be provided, but it must match the expected shape and dtype. Note that with the
|
|
241
|
+
current implementation, there is still an intermediate tensor created.
|
|
242
|
+
|
|
243
|
+
In the case of a Buffer object, the returned Tensor will have shape (N) where N is the Buffer size in bytes,
|
|
244
|
+
and dtype uint8. If an output Tensor is provided, it must be contiguous and have the same size as
|
|
245
|
+
the buffer in bytes, but may have any shape or dtype. Unlike the case for Textures, no intermediate Tensors
|
|
246
|
+
will need to be created when an output Tensor is provided.
|
|
247
|
+
|
|
248
|
+
If the Texture or Buffer is not registered, it will temporarily be registered and mapped for the copy.
|
|
249
|
+
|
|
250
|
+
Parameters
|
|
251
|
+
----------
|
|
252
|
+
obj : moderngl.Texture | moderngl.Buffer
|
|
253
|
+
The ModernGL object to read from.
|
|
254
|
+
|
|
255
|
+
tensor: optional output Tensor
|
|
256
|
+
|
|
257
|
+
Returns
|
|
258
|
+
-------
|
|
259
|
+
torch.Tensor
|
|
260
|
+
A CUDA tensor containing the texture's pixel data.
|
|
261
|
+
"""
|
|
262
|
+
|
|
263
|
+
device = torch.device(f"cuda:{torch.cuda.current_device()}")
|
|
264
|
+
stream = cudart.cudaStream_t(torch.cuda.current_stream().cuda_stream)
|
|
265
|
+
|
|
266
|
+
if tensor is not None:
|
|
267
|
+
if not tensor.is_cuda:
|
|
268
|
+
raise ValueError("Tensor must be on CUDA device")
|
|
269
|
+
|
|
270
|
+
if not tensor.is_contiguous():
|
|
271
|
+
raise ValueError("Tensor must be contiguous")
|
|
272
|
+
|
|
273
|
+
key = _object_key(obj)
|
|
274
|
+
is_registered_by_user = key in _registered_objects
|
|
275
|
+
if not is_registered_by_user:
|
|
276
|
+
register(obj, "r")
|
|
277
|
+
map(obj)
|
|
278
|
+
|
|
279
|
+
resource, mode = _registered_objects[key]
|
|
280
|
+
if mode not in ("r", "rw"):
|
|
281
|
+
raise ValueError(f"Invalid access mode '{mode}' (need 'r' or 'rw')")
|
|
282
|
+
|
|
283
|
+
if isinstance(obj, moderngl.Texture):
|
|
284
|
+
array = _check_cuda_error(
|
|
285
|
+
cudart.cudaGraphicsSubResourceGetMappedArray(resource, 0, 0)
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
desc, extent, _flags = _check_cuda_error(cudart.cudaArrayGetInfo(array))
|
|
289
|
+
|
|
290
|
+
descriptor = (desc.x, desc.y, desc.z, desc.w, desc.f)
|
|
291
|
+
assert (
|
|
292
|
+
descriptor in _cuda_descriptor_to_torch
|
|
293
|
+
) # assume we have covered every possibility
|
|
294
|
+
c, dtype = _cuda_descriptor_to_torch[descriptor]
|
|
295
|
+
w, h = extent.width, extent.height
|
|
296
|
+
assert ((w, h), c) == (
|
|
297
|
+
obj.size,
|
|
298
|
+
obj.components,
|
|
299
|
+
) # assume that the ModernGL attributes match the underlying texture
|
|
300
|
+
|
|
301
|
+
_tensor = torch.empty((h, w, c), dtype=dtype, device=device)
|
|
302
|
+
b = _tensor.dtype.itemsize
|
|
303
|
+
|
|
304
|
+
_check_cuda_error(
|
|
305
|
+
cudart.cudaMemcpy2DFromArrayAsync(
|
|
306
|
+
_tensor.data_ptr(),
|
|
307
|
+
w * c * b,
|
|
308
|
+
array,
|
|
309
|
+
0,
|
|
310
|
+
0,
|
|
311
|
+
w * c * b,
|
|
312
|
+
h,
|
|
313
|
+
cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice,
|
|
314
|
+
stream,
|
|
315
|
+
)
|
|
316
|
+
)
|
|
317
|
+
_tensor = _tensor.flip(dims=[0]).contiguous()
|
|
318
|
+
|
|
319
|
+
if tensor is not None:
|
|
320
|
+
if tensor.shape != _tensor.shape:
|
|
321
|
+
raise ValueError(
|
|
322
|
+
f"Expected tensor to have shape {tuple(_tensor.shape)}, but found {(tuple(tensor.shape))}"
|
|
323
|
+
)
|
|
324
|
+
if tensor.dtype != _tensor.dtype:
|
|
325
|
+
raise ValueError(
|
|
326
|
+
f"Expected tensor to have dtype {_tensor.dtype}, but found {tensor.dtype}"
|
|
327
|
+
)
|
|
328
|
+
tensor.copy_(_tensor)
|
|
329
|
+
else:
|
|
330
|
+
tensor = _tensor
|
|
331
|
+
|
|
332
|
+
elif isinstance(obj, moderngl.Buffer):
|
|
333
|
+
ptr, size = _check_cuda_error(
|
|
334
|
+
cudart.cudaGraphicsResourceGetMappedPointer(resource)
|
|
335
|
+
)
|
|
336
|
+
assert size == obj.size
|
|
337
|
+
|
|
338
|
+
if tensor is None:
|
|
339
|
+
tensor = torch.empty(size=size, device=device, dtype=torch.uint8)
|
|
340
|
+
|
|
341
|
+
tensor_size = tensor.nelement() * tensor.element_size()
|
|
342
|
+
|
|
343
|
+
if tensor_size != size:
|
|
344
|
+
raise ValueError(
|
|
345
|
+
f"Buffer size {size} and tensor size {tensor_size} in bytes don't match"
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
_check_cuda_error(
|
|
349
|
+
cudart.cudaMemcpyAsync(
|
|
350
|
+
tensor.data_ptr(),
|
|
351
|
+
ptr,
|
|
352
|
+
size,
|
|
353
|
+
cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice,
|
|
354
|
+
stream,
|
|
355
|
+
)
|
|
356
|
+
)
|
|
357
|
+
|
|
358
|
+
else:
|
|
359
|
+
assert False # unreachable
|
|
360
|
+
|
|
361
|
+
if not is_registered_by_user:
|
|
362
|
+
unmap(obj)
|
|
363
|
+
unregister(obj)
|
|
364
|
+
|
|
365
|
+
return tensor
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def to_texture(
|
|
369
|
+
tensor: torch.Tensor, texture: moderngl.Texture = None
|
|
370
|
+
) -> moderngl.Texture:
|
|
371
|
+
"""
|
|
372
|
+
Copy a CUDA tensor into a ModernGL texture.
|
|
373
|
+
|
|
374
|
+
If no texture is provided, a new one is created (from the current context) with dimensions and format
|
|
375
|
+
inferred from the tensor. The tensor must have shape (H, W, C) with
|
|
376
|
+
1, 2, or 4 channels, and its dtype must correspond with the ModernGL format of the texture.
|
|
377
|
+
|
|
378
|
+
If the texture is not registered, it will temporarily be registered and mapped for the copy.
|
|
379
|
+
|
|
380
|
+
Parameters
|
|
381
|
+
----------
|
|
382
|
+
tensor: torch.Tensor
|
|
383
|
+
A CUDA tensor containing the pixel data.
|
|
384
|
+
|
|
385
|
+
texture : moderngl.Texture, optional
|
|
386
|
+
The ModernGL texture to store the pixel data in.
|
|
387
|
+
|
|
388
|
+
Returns
|
|
389
|
+
-------
|
|
390
|
+
torch.Tensor
|
|
391
|
+
A CUDA tensor containing the texture's pixel data.
|
|
392
|
+
"""
|
|
393
|
+
|
|
394
|
+
if not tensor.is_cuda:
|
|
395
|
+
raise ValueError("Tensor must be on CUDA device")
|
|
396
|
+
|
|
397
|
+
if tensor.ndim != 3:
|
|
398
|
+
raise ValueError("Tensor must have 3 dims")
|
|
399
|
+
|
|
400
|
+
h, w, c = tensor.shape
|
|
401
|
+
if c not in (1, 2, 4):
|
|
402
|
+
raise ValueError(
|
|
403
|
+
f"Only tensors with 1, 2, or 4 channels are supported, got {c}"
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
if tensor.dtype not in _torch_to_gl_dtype:
|
|
407
|
+
raise ValueError(f"Tensor dtype must be in {list(_torch_to_gl_dtype.keys())}")
|
|
408
|
+
b = tensor.dtype.itemsize
|
|
409
|
+
|
|
410
|
+
if texture is None:
|
|
411
|
+
ctx = moderngl.get_context()
|
|
412
|
+
texture = ctx.texture(
|
|
413
|
+
(w, h), components=c, dtype=_torch_to_gl_dtype[tensor.dtype]
|
|
414
|
+
) # assume the first format is preferred
|
|
415
|
+
|
|
416
|
+
if (texture.size, texture.components) != ((w, h), c):
|
|
417
|
+
raise ValueError(
|
|
418
|
+
f"Texture with size {texture.size} and components {texture.components} does not correspond to tensor with shape {tuple(tensor.shape)}"
|
|
419
|
+
)
|
|
420
|
+
|
|
421
|
+
key = _object_key(texture)
|
|
422
|
+
is_already_registered = key in _registered_objects
|
|
423
|
+
if not is_already_registered:
|
|
424
|
+
register(texture, "w")
|
|
425
|
+
map(texture)
|
|
426
|
+
|
|
427
|
+
resource, mode = _registered_objects[key]
|
|
428
|
+
|
|
429
|
+
if mode not in ("w", "rw"):
|
|
430
|
+
raise ValueError(f"Invalid texture access mode '{mode}' (need 'w' or 'rw')")
|
|
431
|
+
|
|
432
|
+
array = _check_cuda_error(
|
|
433
|
+
cudart.cudaGraphicsSubResourceGetMappedArray(resource, 0, 0)
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
stream = cudart.cudaStream_t(torch.cuda.current_stream().cuda_stream)
|
|
437
|
+
|
|
438
|
+
tensor = tensor.flip(dims=[0]).contiguous()
|
|
439
|
+
_check_cuda_error(
|
|
440
|
+
cudart.cudaMemcpy2DToArrayAsync(
|
|
441
|
+
array,
|
|
442
|
+
0,
|
|
443
|
+
0,
|
|
444
|
+
tensor.data_ptr(),
|
|
445
|
+
w * c * b,
|
|
446
|
+
w * c * b,
|
|
447
|
+
h,
|
|
448
|
+
cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice,
|
|
449
|
+
stream,
|
|
450
|
+
)
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
if not is_already_registered:
|
|
454
|
+
unmap(texture)
|
|
455
|
+
unregister(texture)
|
|
456
|
+
|
|
457
|
+
return texture
|
|
458
|
+
|
|
459
|
+
|
|
460
|
+
def to_buffer(tensor: torch.Tensor, buffer: moderngl.Buffer = None) -> moderngl.Buffer:
|
|
461
|
+
"""
|
|
462
|
+
Copy a CUDA Tensor into a ModernGL Buffer.
|
|
463
|
+
|
|
464
|
+
If no Buffer is provided, a new one is created (from the current context) with the same size in bytes as
|
|
465
|
+
the incoming Tensor. If a Buffer is provided, it must match the size in bytes of the Tensor.
|
|
466
|
+
|
|
467
|
+
If the Buffer is not registered, it will temporarily be registered and mapped for the copy.
|
|
468
|
+
|
|
469
|
+
Parameters
|
|
470
|
+
----------
|
|
471
|
+
tensor: torch.Tensor
|
|
472
|
+
A CUDA tensor containing the pixel data.
|
|
473
|
+
|
|
474
|
+
buffer : moderngl.Texture, optional
|
|
475
|
+
The ModernGL texture to store the pixel data in.
|
|
476
|
+
|
|
477
|
+
Returns
|
|
478
|
+
-------
|
|
479
|
+
torch.Tensor
|
|
480
|
+
A CUDA tensor containing the texture's pixel data.
|
|
481
|
+
"""
|
|
482
|
+
|
|
483
|
+
if not tensor.is_cuda:
|
|
484
|
+
raise ValueError("Tensor must be on CUDA device")
|
|
485
|
+
|
|
486
|
+
tensor = tensor.flatten().contiguous()
|
|
487
|
+
tensor_size = tensor.nelement() * tensor.element_size()
|
|
488
|
+
|
|
489
|
+
if buffer is None:
|
|
490
|
+
ctx = moderngl.get_context()
|
|
491
|
+
buffer = ctx.buffer(reserve=tensor_size)
|
|
492
|
+
|
|
493
|
+
if tensor_size != buffer.size:
|
|
494
|
+
raise ValueError(
|
|
495
|
+
f"Tensor has size {tensor_size} in bytes, but buffer has size {buffer.size}"
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
key = _object_key(buffer)
|
|
499
|
+
is_already_registered = key in _registered_objects
|
|
500
|
+
if not is_already_registered:
|
|
501
|
+
register(buffer, "w")
|
|
502
|
+
map(buffer)
|
|
503
|
+
|
|
504
|
+
resource, mode = _registered_objects[key]
|
|
505
|
+
|
|
506
|
+
if mode not in ("w", "rw"):
|
|
507
|
+
raise ValueError(f"Invalid access mode '{mode}' (need 'w' or 'rw')")
|
|
508
|
+
|
|
509
|
+
ptr, size = _check_cuda_error(cudart.cudaGraphicsResourceGetMappedPointer(resource))
|
|
510
|
+
assert size == buffer.size
|
|
511
|
+
|
|
512
|
+
stream = cudart.cudaStream_t(torch.cuda.current_stream().cuda_stream)
|
|
513
|
+
|
|
514
|
+
_check_cuda_error(
|
|
515
|
+
cudart.cudaMemcpyAsync(
|
|
516
|
+
ptr,
|
|
517
|
+
tensor.data_ptr(),
|
|
518
|
+
size,
|
|
519
|
+
cudart.cudaMemcpyKind.cudaMemcpyDeviceToDevice,
|
|
520
|
+
stream,
|
|
521
|
+
)
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
if not is_already_registered:
|
|
525
|
+
unmap(buffer)
|
|
526
|
+
unregister(buffer)
|
|
527
|
+
|
|
528
|
+
return buffer
|
|
File without changes
|