warp-lang 1.6.0__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

warp/bin/warp-clang.so CHANGED
Binary file
warp/bin/warp.so CHANGED
Binary file
warp/builtins.py CHANGED
@@ -4173,7 +4173,7 @@ add_builtin(
4173
4173
  input_types={"state": uint32},
4174
4174
  value_type=int,
4175
4175
  group="Random",
4176
- doc="Return a random integer in the range [0, 2^32).",
4176
+ doc="Return a random integer in the range [-2^31, 2^31).",
4177
4177
  )
4178
4178
  add_builtin(
4179
4179
  "randi",
warp/codegen.py CHANGED
@@ -2278,15 +2278,22 @@ class Adjoint:
2278
2278
  out = adj.add_call(func, args, kwargs, type_args, min_outputs=min_outputs)
2279
2279
 
2280
2280
  if warp.config.verify_autograd_array_access:
2281
+ # Extract the types and values passed as arguments to the function call.
2282
+ arg_types = tuple(strip_reference(get_arg_type(x)) for x in args)
2283
+ kwarg_types = {k: strip_reference(get_arg_type(v)) for k, v in kwargs.items()}
2284
+
2285
+ # Resolve the exact function signature among any existing overload.
2286
+ resolved_func = adj.resolve_func(func, arg_types, kwarg_types, min_outputs)
2287
+
2281
2288
  # update arg read/write states according to what happens to that arg in the called function
2282
- if hasattr(func, "adj"):
2289
+ if hasattr(resolved_func, "adj"):
2283
2290
  for i, arg in enumerate(args):
2284
- if func.adj.args[i].is_write:
2291
+ if resolved_func.adj.args[i].is_write:
2285
2292
  kernel_name = adj.fun_name
2286
2293
  filename = adj.filename
2287
2294
  lineno = adj.lineno + adj.fun_lineno
2288
2295
  arg.mark_write(kernel_name=kernel_name, filename=filename, lineno=lineno)
2289
- if func.adj.args[i].is_read:
2296
+ if resolved_func.adj.args[i].is_read:
2290
2297
  arg.mark_read()
2291
2298
 
2292
2299
  return out
warp/config.py CHANGED
@@ -7,64 +7,108 @@
7
7
 
8
8
  from typing import Optional
9
9
 
10
- version: str = "1.6.0"
10
+ version: str = "1.6.1"
11
11
  """Warp version string"""
12
12
 
13
13
  verify_fp: bool = False
14
- """If `True`, Warp will check that inputs and outputs are finite before and/or after various operations.
15
- Has performance implications.
14
+ """Enable floating-point verification for inputs and outputs.
15
+
16
+ When enabled, checks if all values are finite before and after operations.
17
+
18
+ Note: Enabling this flag impacts performance.
16
19
  """
17
20
 
18
21
  verify_cuda: bool = False
19
- """If `True`, Warp will check for CUDA errors after every launch operation.
20
- CUDA error verification cannot be used during graph capture. Has performance implications.
22
+ """Enable CUDA error checking after kernel launches.
23
+
24
+ This setting cannot be used during graph capture
25
+
26
+ Note: Enabling this flag impacts performance
21
27
  """
22
28
 
23
29
  print_launches: bool = False
24
- """If `True`, Warp will print details of every kernel launch to standard out
25
- (e.g. launch dimensions, inputs, outputs, device, etc.). Has performance implications
30
+ """Enable detailed kernel launch logging.
31
+
32
+ Prints information about each kernel launch including:
33
+
34
+ - Launch dimensions
35
+ - Input/output parameters
36
+ - Target device
37
+
38
+ Note: Enabling this flag impacts performance.
26
39
  """
27
40
 
28
41
  mode: str = "release"
29
- """Controls whether to compile Warp kernels in debug or release mode.
30
- Valid choices are `"release"` or `"debug"`. Has performance implications.
42
+ """Compilation mode for Warp kernels.
43
+
44
+ Args:
45
+ mode: Either ``"release"`` or ``"debug"``.
46
+
47
+ Note: Debug mode may impact performance.
31
48
  """
32
49
 
33
50
  verbose: bool = False
34
- """If `True`, additional information will be printed to standard out during code generation, compilation, etc."""
51
+ """Enable detailed logging during code generation and compilation."""
35
52
 
36
53
  verbose_warnings: bool = False
37
- """If `True`, Warp warnings will include extra information such as the source file and line number."""
54
+ """Enable extended warning messages with source location information."""
38
55
 
39
56
  quiet: bool = False
40
- """Suppress all output except errors and warnings."""
57
+ """Disable Warp module initialization messages.
58
+
59
+ Error messages and warnings remain unaffected.
60
+ """
41
61
 
42
62
  verify_autograd_array_access: bool = False
43
- """print warnings related to array overwrites that may result in incorrect gradients"""
63
+ """Enable warnings for array overwrites that may affect gradient computation."""
64
+
65
+ enable_vector_component_overwrites: bool = False
66
+ """Allow multiple writes to vector/matrix/quaternion components.
67
+
68
+ Note: Enabling this may significantly increase kernel compilation time.
69
+ """
44
70
 
45
71
  cache_kernels: bool = True
46
- """If `True`, kernels that have already been compiled from previous application launches will not be recompiled."""
72
+ """Enable kernel caching between application launches."""
47
73
 
48
74
  kernel_cache_dir: Optional[str] = None
49
- """Path to kernel cache directory, if `None`, a default path will be used."""
75
+ """Directory path for storing compiled kernel cache.
76
+
77
+ If ``None``, the path is determined in the following order:
78
+
79
+ 1. ``WARP_CACHE_PATH`` environment variable.
80
+ 2. System's user cache directory (via ``appdirs.user_cache_directory``).
81
+
82
+ Note: Subdirectories prefixed with ``wp_`` will be created in this location.
83
+ """
50
84
 
51
85
  cuda_output: Optional[str] = None
52
- """Preferred CUDA output format for kernels (`"ptx"` or `"cubin"`), determined automatically if unspecified"""
86
+ """Preferred CUDA output format for kernel compilation.
87
+
88
+ Args:
89
+ cuda_output: One of {``None``, ``"ptx"``, ``"cubin"``}. If ``None``, format is auto-determined.
90
+ """
53
91
 
54
92
  ptx_target_arch: int = 75
55
- """Target architecture for PTX generation, defaults to the lowest architecture that supports all of Warp's features."""
93
+ """Target architecture version for PTX generation.
94
+
95
+ Defaults to minimum architecture version supporting all Warp features.
96
+ """
56
97
 
57
98
  enable_backward: bool = True
58
- """Whether to compiler the backward passes of the kernels."""
99
+ """Enable compilation of kernel backward passes."""
59
100
 
60
101
  llvm_cuda: bool = False
61
- """Use Clang/LLVM instead of NVRTC to compile CUDA."""
102
+ """Use Clang/LLVM compiler instead of NVRTC for CUDA compilation."""
62
103
 
63
104
  enable_graph_capture_module_load_by_default: bool = True
64
- """Default value of `force_module_load` for `capture_begin()` if CUDA driver does not support at least CUDA 12.3."""
105
+ """Enable automatic module loading before graph capture.
106
+
107
+ Only affects systems with CUDA driver versions below 12.3.
108
+ """
65
109
 
66
110
  enable_mempools_at_init: bool = True
67
- """Whether CUDA devices will be initialized with mempools enabled (if supported)."""
111
+ """Enable CUDA memory pools during device initialization when supported."""
68
112
 
69
113
  max_unroll: int = 16
70
114
  """Maximum unroll factor for loops."""
warp/context.py CHANGED
@@ -34,6 +34,7 @@ import warp
34
34
  import warp.build
35
35
  import warp.codegen
36
36
  import warp.config
37
+ from warp.types import launch_bounds_t
37
38
 
38
39
  # represents either a built-in or user-defined function
39
40
 
@@ -5187,8 +5188,23 @@ def pack_arg(kernel, arg_type, arg_name, value, device, adjoint=False):
5187
5188
  # represents all data required for a kernel launch
5188
5189
  # so that launches can be replayed quickly, use `wp.launch(..., record_cmd=True)`
5189
5190
  class Launch:
5191
+ """Represents all data required for a kernel launch so that launches can be replayed quickly.
5192
+
5193
+ Users should not directly instantiate this class, instead use
5194
+ ``wp.launch(..., record_cmd=True)`` to record a launch.
5195
+ """
5196
+
5190
5197
  def __init__(
5191
- self, kernel, device, hooks=None, params=None, params_addr=None, bounds=None, max_blocks=0, block_dim=256
5198
+ self,
5199
+ kernel,
5200
+ device: Device,
5201
+ hooks: Optional[KernelHooks] = None,
5202
+ params: Optional[Sequence[Any]] = None,
5203
+ params_addr: Optional[Sequence[ctypes.c_void_p]] = None,
5204
+ bounds: Optional[launch_bounds_t] = None,
5205
+ max_blocks: int = 0,
5206
+ block_dim: int = 256,
5207
+ adjoint: bool = False,
5192
5208
  ):
5193
5209
  # retain the module executable so it doesn't get unloaded
5194
5210
  self.module_exec = kernel.module.load(device)
@@ -5201,13 +5217,14 @@ class Launch:
5201
5217
 
5202
5218
  # if not specified set a zero bound
5203
5219
  if not bounds:
5204
- bounds = warp.types.launch_bounds_t(0)
5220
+ bounds = launch_bounds_t(0)
5205
5221
 
5206
5222
  # if not specified then build a list of default value params for args
5207
5223
  if not params:
5208
5224
  params = []
5209
5225
  params.append(bounds)
5210
5226
 
5227
+ # Pack forward parameters
5211
5228
  for a in kernel.adj.args:
5212
5229
  if isinstance(a.type, warp.types.array):
5213
5230
  params.append(a.type.__ctype__())
@@ -5216,6 +5233,18 @@ class Launch:
5216
5233
  else:
5217
5234
  params.append(pack_arg(kernel, a.type, a.label, 0, device, False))
5218
5235
 
5236
+ # Pack adjoint parameters if adjoint=True
5237
+ if adjoint:
5238
+ for a in kernel.adj.args:
5239
+ if isinstance(a.type, warp.types.array):
5240
+ params.append(a.type.__ctype__())
5241
+ elif isinstance(a.type, warp.codegen.Struct):
5242
+ params.append(a.type().__ctype__())
5243
+ else:
5244
+ # For primitive types in adjoint mode, initialize with 0
5245
+ params.append(pack_arg(kernel, a.type, a.label, 0, device, True))
5246
+
5247
+ # Create array of parameter addresses
5219
5248
  kernel_args = [ctypes.c_void_p(ctypes.addressof(x)) for x in params]
5220
5249
  kernel_params = (ctypes.c_void_p * len(kernel_args))(*kernel_args)
5221
5250
 
@@ -5225,13 +5254,30 @@ class Launch:
5225
5254
  self.hooks = hooks
5226
5255
  self.params = params
5227
5256
  self.params_addr = params_addr
5228
- self.device = device
5229
- self.bounds = bounds
5230
- self.max_blocks = max_blocks
5231
- self.block_dim = block_dim
5257
+ self.device: Device = device
5258
+ """The device to launch on.
5259
+ This should not be changed after the launch object is created.
5260
+ """
5261
+
5262
+ self.bounds: launch_bounds_t = bounds
5263
+ """The launch bounds. Update with :meth:`set_dim`."""
5264
+
5265
+ self.max_blocks: int = max_blocks
5266
+ """The maximum number of CUDA thread blocks to use."""
5267
+
5268
+ self.block_dim: int = block_dim
5269
+ """The number of threads per block."""
5232
5270
 
5233
- def set_dim(self, dim):
5234
- self.bounds = warp.types.launch_bounds_t(dim)
5271
+ self.adjoint: bool = adjoint
5272
+ """Whether to run the adjoint kernel instead of the forward kernel."""
5273
+
5274
+ def set_dim(self, dim: Union[int, List[int], Tuple[int, ...]]):
5275
+ """Set the launch dimensions.
5276
+
5277
+ Args:
5278
+ dim: The dimensions of the launch.
5279
+ """
5280
+ self.bounds = launch_bounds_t(dim)
5235
5281
 
5236
5282
  # launch bounds always at index 0
5237
5283
  self.params[0] = self.bounds
@@ -5240,22 +5286,36 @@ class Launch:
5240
5286
  if self.params_addr:
5241
5287
  self.params_addr[0] = ctypes.c_void_p(ctypes.addressof(self.bounds))
5242
5288
 
5243
- # set kernel param at an index, will convert to ctype as necessary
5244
- def set_param_at_index(self, index, value):
5289
+ def set_param_at_index(self, index: int, value: Any, adjoint: bool = False):
5290
+ """Set a kernel parameter at an index.
5291
+
5292
+ Args:
5293
+ index: The index of the param to set.
5294
+ value: The value to set the param to.
5295
+ """
5245
5296
  arg_type = self.kernel.adj.args[index].type
5246
5297
  arg_name = self.kernel.adj.args[index].label
5247
5298
 
5248
- carg = pack_arg(self.kernel, arg_type, arg_name, value, self.device, False)
5299
+ carg = pack_arg(self.kernel, arg_type, arg_name, value, self.device, adjoint)
5300
+
5301
+ if adjoint:
5302
+ params_index = index + len(self.kernel.adj.args) + 1
5303
+ else:
5304
+ params_index = index + 1
5249
5305
 
5250
- self.params[index + 1] = carg
5306
+ self.params[params_index] = carg
5251
5307
 
5252
5308
  # for CUDA kernels we need to update the address to each arg
5253
5309
  if self.params_addr:
5254
- self.params_addr[index + 1] = ctypes.c_void_p(ctypes.addressof(carg))
5310
+ self.params_addr[params_index] = ctypes.c_void_p(ctypes.addressof(carg))
5255
5311
 
5256
- # set kernel param at an index without any type conversion
5257
- # args must be passed as ctypes or basic int / float types
5258
- def set_param_at_index_from_ctype(self, index, value):
5312
+ def set_param_at_index_from_ctype(self, index: int, value: Union[ctypes.Structure, int, float]):
5313
+ """Set a kernel parameter at an index without any type conversion.
5314
+
5315
+ Args:
5316
+ index: The index of the param to set.
5317
+ value: The value to set the param to.
5318
+ """
5259
5319
  if isinstance(value, ctypes.Structure):
5260
5320
  # not sure how to directly assign struct->struct without reallocating using ctypes
5261
5321
  self.params[index + 1] = value
@@ -5267,32 +5327,62 @@ class Launch:
5267
5327
  else:
5268
5328
  self.params[index + 1].__init__(value)
5269
5329
 
5270
- # set kernel param by argument name
5271
- def set_param_by_name(self, name, value):
5330
+ def set_param_by_name(self, name: str, value: Any, adjoint: bool = False):
5331
+ """Set a kernel parameter by argument name.
5332
+
5333
+ Args:
5334
+ name: The name of the argument to set.
5335
+ value: The value to set the argument to.
5336
+ adjoint: If ``True``, set the adjoint of this parameter instead of the forward parameter.
5337
+ """
5272
5338
  for i, arg in enumerate(self.kernel.adj.args):
5273
5339
  if arg.label == name:
5274
- self.set_param_at_index(i, value)
5340
+ self.set_param_at_index(i, value, adjoint)
5341
+ return
5342
+
5343
+ raise ValueError(f"Argument '{name}' not found in kernel '{self.kernel.key}'")
5275
5344
 
5276
- # set kernel param by argument name with no type conversions
5277
- def set_param_by_name_from_ctype(self, name, value):
5345
+ def set_param_by_name_from_ctype(self, name: str, value: ctypes.Structure):
5346
+ """Set a kernel parameter by argument name with no type conversions.
5347
+
5348
+ Args:
5349
+ name: The name of the argument to set.
5350
+ value: The value to set the argument to.
5351
+ """
5278
5352
  # lookup argument index
5279
5353
  for i, arg in enumerate(self.kernel.adj.args):
5280
5354
  if arg.label == name:
5281
5355
  self.set_param_at_index_from_ctype(i, value)
5282
5356
 
5283
- # set all params
5284
- def set_params(self, values):
5357
+ def set_params(self, values: Sequence[Any]):
5358
+ """Set all parameters.
5359
+
5360
+ Args:
5361
+ values: A list of values to set the params to.
5362
+ """
5285
5363
  for i, v in enumerate(values):
5286
5364
  self.set_param_at_index(i, v)
5287
5365
 
5288
- # set all params without performing type-conversions
5289
- def set_params_from_ctypes(self, values):
5366
+ def set_params_from_ctypes(self, values: Sequence[ctypes.Structure]):
5367
+ """Set all parameters without performing type-conversions.
5368
+
5369
+ Args:
5370
+ values: A list of ctypes or basic int / float types.
5371
+ """
5290
5372
  for i, v in enumerate(values):
5291
5373
  self.set_param_at_index_from_ctype(i, v)
5292
5374
 
5293
- def launch(self, stream=None) -> Any:
5375
+ def launch(self, stream: Optional[Stream] = None) -> None:
5376
+ """Launch the kernel.
5377
+
5378
+ Args:
5379
+ stream: The stream to launch on.
5380
+ """
5294
5381
  if self.device.is_cpu:
5295
- self.hooks.forward(*self.params)
5382
+ if self.adjoint:
5383
+ self.hooks.backward(*self.params)
5384
+ else:
5385
+ self.hooks.forward(*self.params)
5296
5386
  else:
5297
5387
  if stream is None:
5298
5388
  stream = self.device.stream
@@ -5305,32 +5395,44 @@ class Launch:
5305
5395
  if graph is not None:
5306
5396
  graph.retain_module_exec(self.module_exec)
5307
5397
 
5308
- runtime.core.cuda_launch_kernel(
5309
- self.device.context,
5310
- self.hooks.forward,
5311
- self.bounds.size,
5312
- self.max_blocks,
5313
- self.block_dim,
5314
- self.hooks.forward_smem_bytes,
5315
- self.params_addr,
5316
- stream.cuda_stream,
5317
- )
5398
+ if self.adjoint:
5399
+ runtime.core.cuda_launch_kernel(
5400
+ self.device.context,
5401
+ self.hooks.backward,
5402
+ self.bounds.size,
5403
+ self.max_blocks,
5404
+ self.block_dim,
5405
+ self.hooks.backward_smem_bytes,
5406
+ self.params_addr,
5407
+ stream.cuda_stream,
5408
+ )
5409
+ else:
5410
+ runtime.core.cuda_launch_kernel(
5411
+ self.device.context,
5412
+ self.hooks.forward,
5413
+ self.bounds.size,
5414
+ self.max_blocks,
5415
+ self.block_dim,
5416
+ self.hooks.forward_smem_bytes,
5417
+ self.params_addr,
5418
+ stream.cuda_stream,
5419
+ )
5318
5420
 
5319
5421
 
5320
5422
  def launch(
5321
5423
  kernel,
5322
- dim: Tuple[int],
5424
+ dim: Union[int, Sequence[int]],
5323
5425
  inputs: Sequence = [],
5324
5426
  outputs: Sequence = [],
5325
5427
  adj_inputs: Sequence = [],
5326
5428
  adj_outputs: Sequence = [],
5327
5429
  device: Devicelike = None,
5328
- stream: Stream = None,
5329
- adjoint=False,
5330
- record_tape=True,
5331
- record_cmd=False,
5332
- max_blocks=0,
5333
- block_dim=256,
5430
+ stream: Optional[Stream] = None,
5431
+ adjoint: bool = False,
5432
+ record_tape: bool = True,
5433
+ record_cmd: bool = False,
5434
+ max_blocks: int = 0,
5435
+ block_dim: int = 256,
5334
5436
  ):
5335
5437
  """Launch a Warp kernel on the target device
5336
5438
 
@@ -5338,18 +5440,23 @@ def launch(
5338
5440
 
5339
5441
  Args:
5340
5442
  kernel: The name of a Warp kernel function, decorated with the ``@wp.kernel`` decorator
5341
- dim: The number of threads to launch the kernel, can be an integer, or a Tuple of ints with max of 4 dimensions
5443
+ dim: The number of threads to launch the kernel, can be an integer or a
5444
+ sequence of integers with a maximum of 4 dimensions.
5342
5445
  inputs: The input parameters to the kernel (optional)
5343
5446
  outputs: The output parameters (optional)
5344
5447
  adj_inputs: The adjoint inputs (optional)
5345
5448
  adj_outputs: The adjoint outputs (optional)
5346
- device: The device to launch on (optional)
5347
- stream: The stream to launch on (optional)
5348
- adjoint: Whether to run forward or backward pass (typically use False)
5349
- record_tape: When true the launch will be recorded the global wp.Tape() object when present
5350
- record_cmd: When True the launch will be returned as a ``Launch`` command object, the launch will not occur until the user calls ``cmd.launch()``
5351
- max_blocks: The maximum number of CUDA thread blocks to use. Only has an effect for CUDA kernel launches.
5352
- If negative or zero, the maximum hardware value will be used.
5449
+ device: The device to launch on.
5450
+ stream: The stream to launch on.
5451
+ adjoint: Whether to run forward or backward pass (typically use ``False``).
5452
+ record_tape: When ``True``, the launch will be recorded the global
5453
+ :class:`wp.Tape() <warp.Tape>` object when present.
5454
+ record_cmd: When ``True``, the launch will return a :class:`Launch`
5455
+ object. The launch will not occur until the user calls
5456
+ :meth:`Launch.launch()`.
5457
+ max_blocks: The maximum number of CUDA thread blocks to use.
5458
+ Only has an effect for CUDA kernel launches.
5459
+ If negative or zero, the maximum hardware value will be used.
5353
5460
  block_dim: The number of threads per block.
5354
5461
  """
5355
5462
 
@@ -5370,7 +5477,7 @@ def launch(
5370
5477
  print(f"kernel: {kernel.key} dim: {dim} inputs: {inputs} outputs: {outputs} device: {device}")
5371
5478
 
5372
5479
  # construct launch bounds
5373
- bounds = warp.types.launch_bounds_t(dim)
5480
+ bounds = launch_bounds_t(dim)
5374
5481
 
5375
5482
  if bounds.size > 0:
5376
5483
  # first param is the number of threads
@@ -5427,6 +5534,17 @@ def launch(
5427
5534
  f"Failed to find backward kernel '{kernel.key}' from module '{kernel.module.name}' for device '{device}'"
5428
5535
  )
5429
5536
 
5537
+ if record_cmd:
5538
+ launch = Launch(
5539
+ kernel=kernel,
5540
+ hooks=hooks,
5541
+ params=params,
5542
+ params_addr=None,
5543
+ bounds=bounds,
5544
+ device=device,
5545
+ adjoint=adjoint,
5546
+ )
5547
+ return launch
5430
5548
  hooks.backward(*params)
5431
5549
 
5432
5550
  else:
@@ -5437,7 +5555,13 @@ def launch(
5437
5555
 
5438
5556
  if record_cmd:
5439
5557
  launch = Launch(
5440
- kernel=kernel, hooks=hooks, params=params, params_addr=None, bounds=bounds, device=device
5558
+ kernel=kernel,
5559
+ hooks=hooks,
5560
+ params=params,
5561
+ params_addr=None,
5562
+ bounds=bounds,
5563
+ device=device,
5564
+ adjoint=adjoint,
5441
5565
  )
5442
5566
  return launch
5443
5567
  else:
@@ -5464,16 +5588,30 @@ def launch(
5464
5588
  f"Failed to find backward kernel '{kernel.key}' from module '{kernel.module.name}' for device '{device}'"
5465
5589
  )
5466
5590
 
5467
- runtime.core.cuda_launch_kernel(
5468
- device.context,
5469
- hooks.backward,
5470
- bounds.size,
5471
- max_blocks,
5472
- block_dim,
5473
- hooks.backward_smem_bytes,
5474
- kernel_params,
5475
- stream.cuda_stream,
5476
- )
5591
+ if record_cmd:
5592
+ launch = Launch(
5593
+ kernel=kernel,
5594
+ hooks=hooks,
5595
+ params=params,
5596
+ params_addr=kernel_params,
5597
+ bounds=bounds,
5598
+ device=device,
5599
+ max_blocks=max_blocks,
5600
+ block_dim=block_dim,
5601
+ adjoint=adjoint,
5602
+ )
5603
+ return launch
5604
+ else:
5605
+ runtime.core.cuda_launch_kernel(
5606
+ device.context,
5607
+ hooks.backward,
5608
+ bounds.size,
5609
+ max_blocks,
5610
+ block_dim,
5611
+ hooks.backward_smem_bytes,
5612
+ kernel_params,
5613
+ stream.cuda_stream,
5614
+ )
5477
5615
 
5478
5616
  else:
5479
5617
  if hooks.forward is None:
@@ -5493,7 +5631,6 @@ def launch(
5493
5631
  block_dim=block_dim,
5494
5632
  )
5495
5633
  return launch
5496
-
5497
5634
  else:
5498
5635
  # launch
5499
5636
  runtime.core.cuda_launch_kernel(
@@ -153,7 +153,7 @@ class Example:
153
153
  "surface",
154
154
  self.mc.verts.numpy(),
155
155
  self.mc.indices.numpy(),
156
- colors=((0.35, 0.55, 0.9),) * len(self.mc.verts),
156
+ colors=(0.35, 0.55, 0.9),
157
157
  update_topology=True,
158
158
  )
159
159
  self.renderer.end_frame()
@@ -138,7 +138,7 @@ class Example:
138
138
  name="mesh",
139
139
  points=self.mesh.points.numpy(),
140
140
  indices=self.mesh.indices.numpy(),
141
- colors=((0.35, 0.55, 0.9),) * len(self.mesh.points),
141
+ colors=(0.35, 0.55, 0.9),
142
142
  )
143
143
  self.renderer.render_points(
144
144
  name="points", points=self.positions.numpy(), radius=self.sim_margin, colors=(0.8, 0.3, 0.2)
@@ -223,7 +223,7 @@ class Example:
223
223
  vertices = self.sim_verts.numpy()
224
224
 
225
225
  self.renderer.begin_frame(self.sim_time)
226
- self.renderer.render_mesh("surface", vertices, self.indices, colors=((0.35, 0.55, 0.9),) * len(vertices))
226
+ self.renderer.render_mesh("surface", vertices, self.indices, colors=(0.35, 0.55, 0.9))
227
227
  self.renderer.render_sphere(
228
228
  "sphere",
229
229
  (self.cx * self.grid_size, 0.0, self.cy * self.grid_size),