warp-lang 1.6.0__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/builtins.py +1 -1
- warp/codegen.py +10 -3
- warp/config.py +65 -21
- warp/context.py +202 -65
- warp/examples/core/example_marching_cubes.py +1 -1
- warp/examples/core/example_mesh.py +1 -1
- warp/examples/core/example_wave.py +1 -1
- warp/examples/sim/example_cloth_self_contact.py +81 -27
- warp/examples/tile/example_tile_nbody.py +26 -15
- warp/native/clang/clang.cpp +1 -1
- warp/native/crt.h +1 -0
- warp/native/mat.h +16 -3
- warp/native/tile.h +12 -8
- warp/render/render_opengl.py +23 -15
- warp/render/render_usd.py +10 -2
- warp/sim/collide.py +29 -16
- warp/sim/import_urdf.py +20 -5
- warp/sim/integrator_featherstone.py +4 -11
- warp/sim/model.py +62 -59
- warp/sim/render.py +2 -2
- warp/stubs.py +1 -1
- warp/tests/test_array.py +26 -0
- warp/tests/test_collision.py +6 -6
- warp/tests/test_examples.py +7 -1
- warp/tests/test_launch.py +77 -26
- warp/tests/test_mat.py +75 -1
- warp/tests/test_overwrite.py +4 -3
- warp/tests/test_tile_load.py +44 -1
- warp/thirdparty/unittest_parallel.py +3 -0
- warp/types.py +66 -68
- {warp_lang-1.6.0.dist-info → warp_lang-1.6.1.dist-info}/METADATA +34 -17
- {warp_lang-1.6.0.dist-info → warp_lang-1.6.1.dist-info}/RECORD +37 -37
- {warp_lang-1.6.0.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
- {warp_lang-1.6.0.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.6.0.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/bin/warp-clang.so
CHANGED
|
Binary file
|
warp/bin/warp.so
CHANGED
|
Binary file
|
warp/builtins.py
CHANGED
|
@@ -4173,7 +4173,7 @@ add_builtin(
|
|
|
4173
4173
|
input_types={"state": uint32},
|
|
4174
4174
|
value_type=int,
|
|
4175
4175
|
group="Random",
|
|
4176
|
-
doc="Return a random integer in the range [
|
|
4176
|
+
doc="Return a random integer in the range [-2^31, 2^31).",
|
|
4177
4177
|
)
|
|
4178
4178
|
add_builtin(
|
|
4179
4179
|
"randi",
|
warp/codegen.py
CHANGED
|
@@ -2278,15 +2278,22 @@ class Adjoint:
|
|
|
2278
2278
|
out = adj.add_call(func, args, kwargs, type_args, min_outputs=min_outputs)
|
|
2279
2279
|
|
|
2280
2280
|
if warp.config.verify_autograd_array_access:
|
|
2281
|
+
# Extract the types and values passed as arguments to the function call.
|
|
2282
|
+
arg_types = tuple(strip_reference(get_arg_type(x)) for x in args)
|
|
2283
|
+
kwarg_types = {k: strip_reference(get_arg_type(v)) for k, v in kwargs.items()}
|
|
2284
|
+
|
|
2285
|
+
# Resolve the exact function signature among any existing overload.
|
|
2286
|
+
resolved_func = adj.resolve_func(func, arg_types, kwarg_types, min_outputs)
|
|
2287
|
+
|
|
2281
2288
|
# update arg read/write states according to what happens to that arg in the called function
|
|
2282
|
-
if hasattr(
|
|
2289
|
+
if hasattr(resolved_func, "adj"):
|
|
2283
2290
|
for i, arg in enumerate(args):
|
|
2284
|
-
if
|
|
2291
|
+
if resolved_func.adj.args[i].is_write:
|
|
2285
2292
|
kernel_name = adj.fun_name
|
|
2286
2293
|
filename = adj.filename
|
|
2287
2294
|
lineno = adj.lineno + adj.fun_lineno
|
|
2288
2295
|
arg.mark_write(kernel_name=kernel_name, filename=filename, lineno=lineno)
|
|
2289
|
-
if
|
|
2296
|
+
if resolved_func.adj.args[i].is_read:
|
|
2290
2297
|
arg.mark_read()
|
|
2291
2298
|
|
|
2292
2299
|
return out
|
warp/config.py
CHANGED
|
@@ -7,64 +7,108 @@
|
|
|
7
7
|
|
|
8
8
|
from typing import Optional
|
|
9
9
|
|
|
10
|
-
version: str = "1.6.
|
|
10
|
+
version: str = "1.6.1"
|
|
11
11
|
"""Warp version string"""
|
|
12
12
|
|
|
13
13
|
verify_fp: bool = False
|
|
14
|
-
"""
|
|
15
|
-
|
|
14
|
+
"""Enable floating-point verification for inputs and outputs.
|
|
15
|
+
|
|
16
|
+
When enabled, checks if all values are finite before and after operations.
|
|
17
|
+
|
|
18
|
+
Note: Enabling this flag impacts performance.
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
21
|
verify_cuda: bool = False
|
|
19
|
-
"""
|
|
20
|
-
|
|
22
|
+
"""Enable CUDA error checking after kernel launches.
|
|
23
|
+
|
|
24
|
+
This setting cannot be used during graph capture
|
|
25
|
+
|
|
26
|
+
Note: Enabling this flag impacts performance
|
|
21
27
|
"""
|
|
22
28
|
|
|
23
29
|
print_launches: bool = False
|
|
24
|
-
"""
|
|
25
|
-
|
|
30
|
+
"""Enable detailed kernel launch logging.
|
|
31
|
+
|
|
32
|
+
Prints information about each kernel launch including:
|
|
33
|
+
|
|
34
|
+
- Launch dimensions
|
|
35
|
+
- Input/output parameters
|
|
36
|
+
- Target device
|
|
37
|
+
|
|
38
|
+
Note: Enabling this flag impacts performance.
|
|
26
39
|
"""
|
|
27
40
|
|
|
28
41
|
mode: str = "release"
|
|
29
|
-
"""
|
|
30
|
-
|
|
42
|
+
"""Compilation mode for Warp kernels.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
mode: Either ``"release"`` or ``"debug"``.
|
|
46
|
+
|
|
47
|
+
Note: Debug mode may impact performance.
|
|
31
48
|
"""
|
|
32
49
|
|
|
33
50
|
verbose: bool = False
|
|
34
|
-
"""
|
|
51
|
+
"""Enable detailed logging during code generation and compilation."""
|
|
35
52
|
|
|
36
53
|
verbose_warnings: bool = False
|
|
37
|
-
"""
|
|
54
|
+
"""Enable extended warning messages with source location information."""
|
|
38
55
|
|
|
39
56
|
quiet: bool = False
|
|
40
|
-
"""
|
|
57
|
+
"""Disable Warp module initialization messages.
|
|
58
|
+
|
|
59
|
+
Error messages and warnings remain unaffected.
|
|
60
|
+
"""
|
|
41
61
|
|
|
42
62
|
verify_autograd_array_access: bool = False
|
|
43
|
-
"""
|
|
63
|
+
"""Enable warnings for array overwrites that may affect gradient computation."""
|
|
64
|
+
|
|
65
|
+
enable_vector_component_overwrites: bool = False
|
|
66
|
+
"""Allow multiple writes to vector/matrix/quaternion components.
|
|
67
|
+
|
|
68
|
+
Note: Enabling this may significantly increase kernel compilation time.
|
|
69
|
+
"""
|
|
44
70
|
|
|
45
71
|
cache_kernels: bool = True
|
|
46
|
-
"""
|
|
72
|
+
"""Enable kernel caching between application launches."""
|
|
47
73
|
|
|
48
74
|
kernel_cache_dir: Optional[str] = None
|
|
49
|
-
"""
|
|
75
|
+
"""Directory path for storing compiled kernel cache.
|
|
76
|
+
|
|
77
|
+
If ``None``, the path is determined in the following order:
|
|
78
|
+
|
|
79
|
+
1. ``WARP_CACHE_PATH`` environment variable.
|
|
80
|
+
2. System's user cache directory (via ``appdirs.user_cache_directory``).
|
|
81
|
+
|
|
82
|
+
Note: Subdirectories prefixed with ``wp_`` will be created in this location.
|
|
83
|
+
"""
|
|
50
84
|
|
|
51
85
|
cuda_output: Optional[str] = None
|
|
52
|
-
"""Preferred CUDA output format for
|
|
86
|
+
"""Preferred CUDA output format for kernel compilation.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
cuda_output: One of {``None``, ``"ptx"``, ``"cubin"``}. If ``None``, format is auto-determined.
|
|
90
|
+
"""
|
|
53
91
|
|
|
54
92
|
ptx_target_arch: int = 75
|
|
55
|
-
"""Target architecture for PTX generation
|
|
93
|
+
"""Target architecture version for PTX generation.
|
|
94
|
+
|
|
95
|
+
Defaults to minimum architecture version supporting all Warp features.
|
|
96
|
+
"""
|
|
56
97
|
|
|
57
98
|
enable_backward: bool = True
|
|
58
|
-
"""
|
|
99
|
+
"""Enable compilation of kernel backward passes."""
|
|
59
100
|
|
|
60
101
|
llvm_cuda: bool = False
|
|
61
|
-
"""Use Clang/LLVM instead of NVRTC
|
|
102
|
+
"""Use Clang/LLVM compiler instead of NVRTC for CUDA compilation."""
|
|
62
103
|
|
|
63
104
|
enable_graph_capture_module_load_by_default: bool = True
|
|
64
|
-
"""
|
|
105
|
+
"""Enable automatic module loading before graph capture.
|
|
106
|
+
|
|
107
|
+
Only affects systems with CUDA driver versions below 12.3.
|
|
108
|
+
"""
|
|
65
109
|
|
|
66
110
|
enable_mempools_at_init: bool = True
|
|
67
|
-
"""
|
|
111
|
+
"""Enable CUDA memory pools during device initialization when supported."""
|
|
68
112
|
|
|
69
113
|
max_unroll: int = 16
|
|
70
114
|
"""Maximum unroll factor for loops."""
|
warp/context.py
CHANGED
|
@@ -34,6 +34,7 @@ import warp
|
|
|
34
34
|
import warp.build
|
|
35
35
|
import warp.codegen
|
|
36
36
|
import warp.config
|
|
37
|
+
from warp.types import launch_bounds_t
|
|
37
38
|
|
|
38
39
|
# represents either a built-in or user-defined function
|
|
39
40
|
|
|
@@ -5187,8 +5188,23 @@ def pack_arg(kernel, arg_type, arg_name, value, device, adjoint=False):
|
|
|
5187
5188
|
# represents all data required for a kernel launch
|
|
5188
5189
|
# so that launches can be replayed quickly, use `wp.launch(..., record_cmd=True)`
|
|
5189
5190
|
class Launch:
|
|
5191
|
+
"""Represents all data required for a kernel launch so that launches can be replayed quickly.
|
|
5192
|
+
|
|
5193
|
+
Users should not directly instantiate this class, instead use
|
|
5194
|
+
``wp.launch(..., record_cmd=True)`` to record a launch.
|
|
5195
|
+
"""
|
|
5196
|
+
|
|
5190
5197
|
def __init__(
|
|
5191
|
-
self,
|
|
5198
|
+
self,
|
|
5199
|
+
kernel,
|
|
5200
|
+
device: Device,
|
|
5201
|
+
hooks: Optional[KernelHooks] = None,
|
|
5202
|
+
params: Optional[Sequence[Any]] = None,
|
|
5203
|
+
params_addr: Optional[Sequence[ctypes.c_void_p]] = None,
|
|
5204
|
+
bounds: Optional[launch_bounds_t] = None,
|
|
5205
|
+
max_blocks: int = 0,
|
|
5206
|
+
block_dim: int = 256,
|
|
5207
|
+
adjoint: bool = False,
|
|
5192
5208
|
):
|
|
5193
5209
|
# retain the module executable so it doesn't get unloaded
|
|
5194
5210
|
self.module_exec = kernel.module.load(device)
|
|
@@ -5201,13 +5217,14 @@ class Launch:
|
|
|
5201
5217
|
|
|
5202
5218
|
# if not specified set a zero bound
|
|
5203
5219
|
if not bounds:
|
|
5204
|
-
bounds =
|
|
5220
|
+
bounds = launch_bounds_t(0)
|
|
5205
5221
|
|
|
5206
5222
|
# if not specified then build a list of default value params for args
|
|
5207
5223
|
if not params:
|
|
5208
5224
|
params = []
|
|
5209
5225
|
params.append(bounds)
|
|
5210
5226
|
|
|
5227
|
+
# Pack forward parameters
|
|
5211
5228
|
for a in kernel.adj.args:
|
|
5212
5229
|
if isinstance(a.type, warp.types.array):
|
|
5213
5230
|
params.append(a.type.__ctype__())
|
|
@@ -5216,6 +5233,18 @@ class Launch:
|
|
|
5216
5233
|
else:
|
|
5217
5234
|
params.append(pack_arg(kernel, a.type, a.label, 0, device, False))
|
|
5218
5235
|
|
|
5236
|
+
# Pack adjoint parameters if adjoint=True
|
|
5237
|
+
if adjoint:
|
|
5238
|
+
for a in kernel.adj.args:
|
|
5239
|
+
if isinstance(a.type, warp.types.array):
|
|
5240
|
+
params.append(a.type.__ctype__())
|
|
5241
|
+
elif isinstance(a.type, warp.codegen.Struct):
|
|
5242
|
+
params.append(a.type().__ctype__())
|
|
5243
|
+
else:
|
|
5244
|
+
# For primitive types in adjoint mode, initialize with 0
|
|
5245
|
+
params.append(pack_arg(kernel, a.type, a.label, 0, device, True))
|
|
5246
|
+
|
|
5247
|
+
# Create array of parameter addresses
|
|
5219
5248
|
kernel_args = [ctypes.c_void_p(ctypes.addressof(x)) for x in params]
|
|
5220
5249
|
kernel_params = (ctypes.c_void_p * len(kernel_args))(*kernel_args)
|
|
5221
5250
|
|
|
@@ -5225,13 +5254,30 @@ class Launch:
|
|
|
5225
5254
|
self.hooks = hooks
|
|
5226
5255
|
self.params = params
|
|
5227
5256
|
self.params_addr = params_addr
|
|
5228
|
-
self.device = device
|
|
5229
|
-
|
|
5230
|
-
|
|
5231
|
-
|
|
5257
|
+
self.device: Device = device
|
|
5258
|
+
"""The device to launch on.
|
|
5259
|
+
This should not be changed after the launch object is created.
|
|
5260
|
+
"""
|
|
5261
|
+
|
|
5262
|
+
self.bounds: launch_bounds_t = bounds
|
|
5263
|
+
"""The launch bounds. Update with :meth:`set_dim`."""
|
|
5264
|
+
|
|
5265
|
+
self.max_blocks: int = max_blocks
|
|
5266
|
+
"""The maximum number of CUDA thread blocks to use."""
|
|
5267
|
+
|
|
5268
|
+
self.block_dim: int = block_dim
|
|
5269
|
+
"""The number of threads per block."""
|
|
5232
5270
|
|
|
5233
|
-
|
|
5234
|
-
|
|
5271
|
+
self.adjoint: bool = adjoint
|
|
5272
|
+
"""Whether to run the adjoint kernel instead of the forward kernel."""
|
|
5273
|
+
|
|
5274
|
+
def set_dim(self, dim: Union[int, List[int], Tuple[int, ...]]):
|
|
5275
|
+
"""Set the launch dimensions.
|
|
5276
|
+
|
|
5277
|
+
Args:
|
|
5278
|
+
dim: The dimensions of the launch.
|
|
5279
|
+
"""
|
|
5280
|
+
self.bounds = launch_bounds_t(dim)
|
|
5235
5281
|
|
|
5236
5282
|
# launch bounds always at index 0
|
|
5237
5283
|
self.params[0] = self.bounds
|
|
@@ -5240,22 +5286,36 @@ class Launch:
|
|
|
5240
5286
|
if self.params_addr:
|
|
5241
5287
|
self.params_addr[0] = ctypes.c_void_p(ctypes.addressof(self.bounds))
|
|
5242
5288
|
|
|
5243
|
-
|
|
5244
|
-
|
|
5289
|
+
def set_param_at_index(self, index: int, value: Any, adjoint: bool = False):
|
|
5290
|
+
"""Set a kernel parameter at an index.
|
|
5291
|
+
|
|
5292
|
+
Args:
|
|
5293
|
+
index: The index of the param to set.
|
|
5294
|
+
value: The value to set the param to.
|
|
5295
|
+
"""
|
|
5245
5296
|
arg_type = self.kernel.adj.args[index].type
|
|
5246
5297
|
arg_name = self.kernel.adj.args[index].label
|
|
5247
5298
|
|
|
5248
|
-
carg = pack_arg(self.kernel, arg_type, arg_name, value, self.device,
|
|
5299
|
+
carg = pack_arg(self.kernel, arg_type, arg_name, value, self.device, adjoint)
|
|
5300
|
+
|
|
5301
|
+
if adjoint:
|
|
5302
|
+
params_index = index + len(self.kernel.adj.args) + 1
|
|
5303
|
+
else:
|
|
5304
|
+
params_index = index + 1
|
|
5249
5305
|
|
|
5250
|
-
self.params[
|
|
5306
|
+
self.params[params_index] = carg
|
|
5251
5307
|
|
|
5252
5308
|
# for CUDA kernels we need to update the address to each arg
|
|
5253
5309
|
if self.params_addr:
|
|
5254
|
-
self.params_addr[
|
|
5310
|
+
self.params_addr[params_index] = ctypes.c_void_p(ctypes.addressof(carg))
|
|
5255
5311
|
|
|
5256
|
-
|
|
5257
|
-
|
|
5258
|
-
|
|
5312
|
+
def set_param_at_index_from_ctype(self, index: int, value: Union[ctypes.Structure, int, float]):
|
|
5313
|
+
"""Set a kernel parameter at an index without any type conversion.
|
|
5314
|
+
|
|
5315
|
+
Args:
|
|
5316
|
+
index: The index of the param to set.
|
|
5317
|
+
value: The value to set the param to.
|
|
5318
|
+
"""
|
|
5259
5319
|
if isinstance(value, ctypes.Structure):
|
|
5260
5320
|
# not sure how to directly assign struct->struct without reallocating using ctypes
|
|
5261
5321
|
self.params[index + 1] = value
|
|
@@ -5267,32 +5327,62 @@ class Launch:
|
|
|
5267
5327
|
else:
|
|
5268
5328
|
self.params[index + 1].__init__(value)
|
|
5269
5329
|
|
|
5270
|
-
|
|
5271
|
-
|
|
5330
|
+
def set_param_by_name(self, name: str, value: Any, adjoint: bool = False):
|
|
5331
|
+
"""Set a kernel parameter by argument name.
|
|
5332
|
+
|
|
5333
|
+
Args:
|
|
5334
|
+
name: The name of the argument to set.
|
|
5335
|
+
value: The value to set the argument to.
|
|
5336
|
+
adjoint: If ``True``, set the adjoint of this parameter instead of the forward parameter.
|
|
5337
|
+
"""
|
|
5272
5338
|
for i, arg in enumerate(self.kernel.adj.args):
|
|
5273
5339
|
if arg.label == name:
|
|
5274
|
-
self.set_param_at_index(i, value)
|
|
5340
|
+
self.set_param_at_index(i, value, adjoint)
|
|
5341
|
+
return
|
|
5342
|
+
|
|
5343
|
+
raise ValueError(f"Argument '{name}' not found in kernel '{self.kernel.key}'")
|
|
5275
5344
|
|
|
5276
|
-
|
|
5277
|
-
|
|
5345
|
+
def set_param_by_name_from_ctype(self, name: str, value: ctypes.Structure):
|
|
5346
|
+
"""Set a kernel parameter by argument name with no type conversions.
|
|
5347
|
+
|
|
5348
|
+
Args:
|
|
5349
|
+
name: The name of the argument to set.
|
|
5350
|
+
value: The value to set the argument to.
|
|
5351
|
+
"""
|
|
5278
5352
|
# lookup argument index
|
|
5279
5353
|
for i, arg in enumerate(self.kernel.adj.args):
|
|
5280
5354
|
if arg.label == name:
|
|
5281
5355
|
self.set_param_at_index_from_ctype(i, value)
|
|
5282
5356
|
|
|
5283
|
-
|
|
5284
|
-
|
|
5357
|
+
def set_params(self, values: Sequence[Any]):
|
|
5358
|
+
"""Set all parameters.
|
|
5359
|
+
|
|
5360
|
+
Args:
|
|
5361
|
+
values: A list of values to set the params to.
|
|
5362
|
+
"""
|
|
5285
5363
|
for i, v in enumerate(values):
|
|
5286
5364
|
self.set_param_at_index(i, v)
|
|
5287
5365
|
|
|
5288
|
-
|
|
5289
|
-
|
|
5366
|
+
def set_params_from_ctypes(self, values: Sequence[ctypes.Structure]):
|
|
5367
|
+
"""Set all parameters without performing type-conversions.
|
|
5368
|
+
|
|
5369
|
+
Args:
|
|
5370
|
+
values: A list of ctypes or basic int / float types.
|
|
5371
|
+
"""
|
|
5290
5372
|
for i, v in enumerate(values):
|
|
5291
5373
|
self.set_param_at_index_from_ctype(i, v)
|
|
5292
5374
|
|
|
5293
|
-
def launch(self, stream=None) ->
|
|
5375
|
+
def launch(self, stream: Optional[Stream] = None) -> None:
|
|
5376
|
+
"""Launch the kernel.
|
|
5377
|
+
|
|
5378
|
+
Args:
|
|
5379
|
+
stream: The stream to launch on.
|
|
5380
|
+
"""
|
|
5294
5381
|
if self.device.is_cpu:
|
|
5295
|
-
self.
|
|
5382
|
+
if self.adjoint:
|
|
5383
|
+
self.hooks.backward(*self.params)
|
|
5384
|
+
else:
|
|
5385
|
+
self.hooks.forward(*self.params)
|
|
5296
5386
|
else:
|
|
5297
5387
|
if stream is None:
|
|
5298
5388
|
stream = self.device.stream
|
|
@@ -5305,32 +5395,44 @@ class Launch:
|
|
|
5305
5395
|
if graph is not None:
|
|
5306
5396
|
graph.retain_module_exec(self.module_exec)
|
|
5307
5397
|
|
|
5308
|
-
|
|
5309
|
-
|
|
5310
|
-
|
|
5311
|
-
|
|
5312
|
-
|
|
5313
|
-
|
|
5314
|
-
|
|
5315
|
-
|
|
5316
|
-
|
|
5317
|
-
|
|
5398
|
+
if self.adjoint:
|
|
5399
|
+
runtime.core.cuda_launch_kernel(
|
|
5400
|
+
self.device.context,
|
|
5401
|
+
self.hooks.backward,
|
|
5402
|
+
self.bounds.size,
|
|
5403
|
+
self.max_blocks,
|
|
5404
|
+
self.block_dim,
|
|
5405
|
+
self.hooks.backward_smem_bytes,
|
|
5406
|
+
self.params_addr,
|
|
5407
|
+
stream.cuda_stream,
|
|
5408
|
+
)
|
|
5409
|
+
else:
|
|
5410
|
+
runtime.core.cuda_launch_kernel(
|
|
5411
|
+
self.device.context,
|
|
5412
|
+
self.hooks.forward,
|
|
5413
|
+
self.bounds.size,
|
|
5414
|
+
self.max_blocks,
|
|
5415
|
+
self.block_dim,
|
|
5416
|
+
self.hooks.forward_smem_bytes,
|
|
5417
|
+
self.params_addr,
|
|
5418
|
+
stream.cuda_stream,
|
|
5419
|
+
)
|
|
5318
5420
|
|
|
5319
5421
|
|
|
5320
5422
|
def launch(
|
|
5321
5423
|
kernel,
|
|
5322
|
-
dim:
|
|
5424
|
+
dim: Union[int, Sequence[int]],
|
|
5323
5425
|
inputs: Sequence = [],
|
|
5324
5426
|
outputs: Sequence = [],
|
|
5325
5427
|
adj_inputs: Sequence = [],
|
|
5326
5428
|
adj_outputs: Sequence = [],
|
|
5327
5429
|
device: Devicelike = None,
|
|
5328
|
-
stream: Stream = None,
|
|
5329
|
-
adjoint=False,
|
|
5330
|
-
record_tape=True,
|
|
5331
|
-
record_cmd=False,
|
|
5332
|
-
max_blocks=0,
|
|
5333
|
-
block_dim=256,
|
|
5430
|
+
stream: Optional[Stream] = None,
|
|
5431
|
+
adjoint: bool = False,
|
|
5432
|
+
record_tape: bool = True,
|
|
5433
|
+
record_cmd: bool = False,
|
|
5434
|
+
max_blocks: int = 0,
|
|
5435
|
+
block_dim: int = 256,
|
|
5334
5436
|
):
|
|
5335
5437
|
"""Launch a Warp kernel on the target device
|
|
5336
5438
|
|
|
@@ -5338,18 +5440,23 @@ def launch(
|
|
|
5338
5440
|
|
|
5339
5441
|
Args:
|
|
5340
5442
|
kernel: The name of a Warp kernel function, decorated with the ``@wp.kernel`` decorator
|
|
5341
|
-
dim: The number of threads to launch the kernel, can be an integer
|
|
5443
|
+
dim: The number of threads to launch the kernel, can be an integer or a
|
|
5444
|
+
sequence of integers with a maximum of 4 dimensions.
|
|
5342
5445
|
inputs: The input parameters to the kernel (optional)
|
|
5343
5446
|
outputs: The output parameters (optional)
|
|
5344
5447
|
adj_inputs: The adjoint inputs (optional)
|
|
5345
5448
|
adj_outputs: The adjoint outputs (optional)
|
|
5346
|
-
device: The device to launch on
|
|
5347
|
-
stream: The stream to launch on
|
|
5348
|
-
adjoint: Whether to run forward or backward pass (typically use False)
|
|
5349
|
-
record_tape: When
|
|
5350
|
-
|
|
5351
|
-
|
|
5352
|
-
|
|
5449
|
+
device: The device to launch on.
|
|
5450
|
+
stream: The stream to launch on.
|
|
5451
|
+
adjoint: Whether to run forward or backward pass (typically use ``False``).
|
|
5452
|
+
record_tape: When ``True``, the launch will be recorded the global
|
|
5453
|
+
:class:`wp.Tape() <warp.Tape>` object when present.
|
|
5454
|
+
record_cmd: When ``True``, the launch will return a :class:`Launch`
|
|
5455
|
+
object. The launch will not occur until the user calls
|
|
5456
|
+
:meth:`Launch.launch()`.
|
|
5457
|
+
max_blocks: The maximum number of CUDA thread blocks to use.
|
|
5458
|
+
Only has an effect for CUDA kernel launches.
|
|
5459
|
+
If negative or zero, the maximum hardware value will be used.
|
|
5353
5460
|
block_dim: The number of threads per block.
|
|
5354
5461
|
"""
|
|
5355
5462
|
|
|
@@ -5370,7 +5477,7 @@ def launch(
|
|
|
5370
5477
|
print(f"kernel: {kernel.key} dim: {dim} inputs: {inputs} outputs: {outputs} device: {device}")
|
|
5371
5478
|
|
|
5372
5479
|
# construct launch bounds
|
|
5373
|
-
bounds =
|
|
5480
|
+
bounds = launch_bounds_t(dim)
|
|
5374
5481
|
|
|
5375
5482
|
if bounds.size > 0:
|
|
5376
5483
|
# first param is the number of threads
|
|
@@ -5427,6 +5534,17 @@ def launch(
|
|
|
5427
5534
|
f"Failed to find backward kernel '{kernel.key}' from module '{kernel.module.name}' for device '{device}'"
|
|
5428
5535
|
)
|
|
5429
5536
|
|
|
5537
|
+
if record_cmd:
|
|
5538
|
+
launch = Launch(
|
|
5539
|
+
kernel=kernel,
|
|
5540
|
+
hooks=hooks,
|
|
5541
|
+
params=params,
|
|
5542
|
+
params_addr=None,
|
|
5543
|
+
bounds=bounds,
|
|
5544
|
+
device=device,
|
|
5545
|
+
adjoint=adjoint,
|
|
5546
|
+
)
|
|
5547
|
+
return launch
|
|
5430
5548
|
hooks.backward(*params)
|
|
5431
5549
|
|
|
5432
5550
|
else:
|
|
@@ -5437,7 +5555,13 @@ def launch(
|
|
|
5437
5555
|
|
|
5438
5556
|
if record_cmd:
|
|
5439
5557
|
launch = Launch(
|
|
5440
|
-
kernel=kernel,
|
|
5558
|
+
kernel=kernel,
|
|
5559
|
+
hooks=hooks,
|
|
5560
|
+
params=params,
|
|
5561
|
+
params_addr=None,
|
|
5562
|
+
bounds=bounds,
|
|
5563
|
+
device=device,
|
|
5564
|
+
adjoint=adjoint,
|
|
5441
5565
|
)
|
|
5442
5566
|
return launch
|
|
5443
5567
|
else:
|
|
@@ -5464,16 +5588,30 @@ def launch(
|
|
|
5464
5588
|
f"Failed to find backward kernel '{kernel.key}' from module '{kernel.module.name}' for device '{device}'"
|
|
5465
5589
|
)
|
|
5466
5590
|
|
|
5467
|
-
|
|
5468
|
-
|
|
5469
|
-
|
|
5470
|
-
|
|
5471
|
-
|
|
5472
|
-
|
|
5473
|
-
|
|
5474
|
-
|
|
5475
|
-
|
|
5476
|
-
|
|
5591
|
+
if record_cmd:
|
|
5592
|
+
launch = Launch(
|
|
5593
|
+
kernel=kernel,
|
|
5594
|
+
hooks=hooks,
|
|
5595
|
+
params=params,
|
|
5596
|
+
params_addr=kernel_params,
|
|
5597
|
+
bounds=bounds,
|
|
5598
|
+
device=device,
|
|
5599
|
+
max_blocks=max_blocks,
|
|
5600
|
+
block_dim=block_dim,
|
|
5601
|
+
adjoint=adjoint,
|
|
5602
|
+
)
|
|
5603
|
+
return launch
|
|
5604
|
+
else:
|
|
5605
|
+
runtime.core.cuda_launch_kernel(
|
|
5606
|
+
device.context,
|
|
5607
|
+
hooks.backward,
|
|
5608
|
+
bounds.size,
|
|
5609
|
+
max_blocks,
|
|
5610
|
+
block_dim,
|
|
5611
|
+
hooks.backward_smem_bytes,
|
|
5612
|
+
kernel_params,
|
|
5613
|
+
stream.cuda_stream,
|
|
5614
|
+
)
|
|
5477
5615
|
|
|
5478
5616
|
else:
|
|
5479
5617
|
if hooks.forward is None:
|
|
@@ -5493,7 +5631,6 @@ def launch(
|
|
|
5493
5631
|
block_dim=block_dim,
|
|
5494
5632
|
)
|
|
5495
5633
|
return launch
|
|
5496
|
-
|
|
5497
5634
|
else:
|
|
5498
5635
|
# launch
|
|
5499
5636
|
runtime.core.cuda_launch_kernel(
|
|
@@ -138,7 +138,7 @@ class Example:
|
|
|
138
138
|
name="mesh",
|
|
139
139
|
points=self.mesh.points.numpy(),
|
|
140
140
|
indices=self.mesh.indices.numpy(),
|
|
141
|
-
colors=(
|
|
141
|
+
colors=(0.35, 0.55, 0.9),
|
|
142
142
|
)
|
|
143
143
|
self.renderer.render_points(
|
|
144
144
|
name="points", points=self.positions.numpy(), radius=self.sim_margin, colors=(0.8, 0.3, 0.2)
|
|
@@ -223,7 +223,7 @@ class Example:
|
|
|
223
223
|
vertices = self.sim_verts.numpy()
|
|
224
224
|
|
|
225
225
|
self.renderer.begin_frame(self.sim_time)
|
|
226
|
-
self.renderer.render_mesh("surface", vertices, self.indices, colors=(
|
|
226
|
+
self.renderer.render_mesh("surface", vertices, self.indices, colors=(0.35, 0.55, 0.9))
|
|
227
227
|
self.renderer.render_sphere(
|
|
228
228
|
"sphere",
|
|
229
229
|
(self.cx * self.grid_size, 0.0, self.cy * self.grid_size),
|