warp-lang 1.6.0__py3-none-manylinux2014_x86_64.whl → 1.6.1__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

warp/tests/test_launch.py CHANGED
@@ -46,6 +46,12 @@ def kernel4d(a: wp.array(dtype=int, ndim=4)):
46
46
  wp.expect_eq(a[i, j, k, l], i * dim_y * dim_z * dim_w + j * dim_z * dim_w + k * dim_w + l)
47
47
 
48
48
 
49
+ @wp.kernel
50
+ def square_kernel(input: wp.array(dtype=float), output: wp.array(dtype=float)):
51
+ i = wp.tid()
52
+ output[i] = input[i] * input[i]
53
+
54
+
49
55
  def test1d(test, device):
50
56
  a = np.arange(0, dim_x).reshape(dim_x)
51
57
 
@@ -98,8 +104,19 @@ def kernel_cmd(params: Params, i: int, f: float, v: wp.vec3, m: wp.mat33, out: w
98
104
 
99
105
 
100
106
  def test_launch_cmd(test, device):
107
+ """Tests recording and executing a kernel launch command.
108
+
109
+ Verifies that:
110
+ - A kernel can be recorded as a command without immediate execution
111
+ - The recorded command can be launched later
112
+ - Parameters are correctly passed to the kernel
113
+ - Output matches expected results for both immediate and delayed launches
114
+
115
+ Args:
116
+ test: Test context
117
+ device: Device to run the test on
118
+ """
101
119
  n = 1
102
-
103
120
  ref = np.arange(0, n)
104
121
  out = wp.zeros(n, dtype=int, device=device)
105
122
 
@@ -274,12 +291,62 @@ def test_launch_cmd_empty(test, device):
274
291
  assert_np_equal(out.numpy(), ref)
275
292
 
276
293
 
294
+ def test_launch_cmd_adjoint(test, device):
295
+ """Test recording an adjoint launch with record_cmd=True."""
296
+ input_arr = wp.array([1.0, 2.0, 3.0], dtype=float, requires_grad=True, device=device)
297
+ output_arr = wp.empty_like(input_arr)
298
+
299
+ output_arr.grad.fill_(1.0)
300
+
301
+ cmd = wp.launch(
302
+ square_kernel,
303
+ dim=input_arr.size,
304
+ inputs=[input_arr, output_arr],
305
+ adj_inputs=[None, None],
306
+ adjoint=True,
307
+ device=device,
308
+ record_cmd=True,
309
+ )
310
+
311
+ cmd.launch()
312
+
313
+ assert_np_equal(input_arr.grad.numpy(), np.array([2.0, 4.0, 6.0]))
314
+
315
+
316
+ def test_launch_cmd_adjoint_empty(test, device):
317
+ """Test constructing a Launch object for an adjoint kernel."""
318
+ input_arr = wp.array([1.0, 2.0, 3.0], dtype=float, requires_grad=True, device=device)
319
+ output_arr = wp.empty_like(input_arr)
320
+ output_arr.grad.fill_(1.0)
321
+
322
+ cmd = wp.Launch(square_kernel, device, adjoint=True)
323
+ cmd.set_param_by_name("input", input_arr)
324
+ cmd.set_param_by_name("output", output_arr)
325
+ cmd.set_dim(input_arr.size)
326
+ cmd.launch()
327
+
328
+ assert_np_equal(input_arr.grad.numpy(), np.array([2.0, 4.0, 6.0]))
329
+
330
+ # Now update the launch object's parameters with arrays of different sizes and values
331
+ # and check that the adjoints are correctly computed
332
+ input_arr_updated = wp.array([4.0, 5.0, 6.0, 7.0], dtype=float, device=device)
333
+ input_arr_updated_grad = wp.zeros_like(input_arr_updated)
334
+
335
+ output_arr_updated = wp.empty_like(input_arr_updated)
336
+ output_arr_updated_grad = wp.full_like(output_arr_updated, 1.0)
337
+
338
+ cmd.set_param_by_name("input", input_arr_updated)
339
+ cmd.set_param_by_name("output", output_arr_updated)
340
+ cmd.set_param_by_name("input", input_arr_updated_grad, adjoint=True)
341
+ cmd.set_param_by_name("output", output_arr_updated_grad, adjoint=True)
342
+ cmd.set_dim(input_arr_updated.size)
343
+ cmd.launch()
344
+
345
+ assert_np_equal(input_arr_updated_grad.numpy(), np.array([8.0, 10.0, 12.0, 14.0]))
346
+
347
+
277
348
  @wp.kernel
278
- def kernel_mul(
279
- values: wp.array(dtype=int),
280
- coeff: int,
281
- out: wp.array(dtype=int),
282
- ):
349
+ def kernel_mul(values: wp.array(dtype=int), coeff: int, out: wp.array(dtype=int)):
283
350
  tid = wp.tid()
284
351
  out[tid] = values[tid] * coeff
285
352
 
@@ -301,28 +368,10 @@ def test_launch_tuple_args(test, device):
301
368
  )
302
369
  assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
303
370
 
304
- wp.launch(
305
- kernel_mul,
306
- dim=len(values),
307
- inputs=(
308
- values,
309
- coeff,
310
- out,
311
- ),
312
- device=device,
313
- )
371
+ wp.launch(kernel_mul, dim=len(values), inputs=(values, coeff, out), device=device)
314
372
  assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
315
373
 
316
- wp.launch(
317
- kernel_mul,
318
- dim=len(values),
319
- outputs=(
320
- values,
321
- coeff,
322
- out,
323
- ),
324
- device=device,
325
- )
374
+ wp.launch(kernel_mul, dim=len(values), outputs=(values, coeff, out), device=device)
326
375
  assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
327
376
 
328
377
 
@@ -343,6 +392,8 @@ add_function_test(TestLaunch, "test_launch_cmd_set_param", test_launch_cmd_set_p
343
392
  add_function_test(TestLaunch, "test_launch_cmd_set_ctype", test_launch_cmd_set_ctype, devices=devices)
344
393
  add_function_test(TestLaunch, "test_launch_cmd_set_dim", test_launch_cmd_set_dim, devices=devices)
345
394
  add_function_test(TestLaunch, "test_launch_cmd_empty", test_launch_cmd_empty, devices=devices)
395
+ add_function_test(TestLaunch, "test_launch_cmd_adjoint", test_launch_cmd_adjoint, devices=devices)
396
+ add_function_test(TestLaunch, "test_launch_cmd_adjoint_empty", test_launch_cmd_adjoint_empty, devices=devices)
346
397
 
347
398
  add_function_test(TestLaunch, "test_launch_tuple_args", test_launch_tuple_args, devices=devices)
348
399
 
warp/tests/test_mat.py CHANGED
@@ -384,6 +384,77 @@ def test_negation(test, device, dtype, register_kernels=False):
384
384
  idx = idx + 1
385
385
 
386
386
 
387
+ def test_matmul(test, device, dtype, register_kernels=False):
388
+ rng = np.random.default_rng(123)
389
+
390
+ tol = {
391
+ np.float16: 5.0e-3,
392
+ np.float32: 1.0e-6,
393
+ np.float64: 1.0e-12,
394
+ }.get(dtype, 0)
395
+
396
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
397
+ mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
398
+ mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
399
+ mat23 = wp.types.matrix(shape=(2, 3), dtype=wptype)
400
+ mat32 = wp.types.matrix(shape=(3, 2), dtype=wptype)
401
+ mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
402
+
403
+ output_select_kernel = get_select_kernel(wptype)
404
+
405
+ def check_mat_mul(
406
+ i23: wp.array(dtype=mat23),
407
+ i32: wp.array(dtype=mat32),
408
+ i44: wp.array(dtype=mat44),
409
+ o22: wp.array(dtype=mat22),
410
+ o33: wp.array(dtype=mat33),
411
+ o44: wp.array(dtype=mat44),
412
+ ):
413
+ i = wp.tid()
414
+ o22[i] = i23[i] @ i32[i]
415
+ o33[i] = i32[i] @ i23[i]
416
+ o44[i] = i44[i] @ i44[i]
417
+
418
+ kernel = getkernel(check_mat_mul, suffix=dtype.__name__)
419
+
420
+ if register_kernels:
421
+ return
422
+
423
+ test_adj = dtype in np_float_types
424
+
425
+ i23 = wp.array(randvals(rng, [1, 2, 3], dtype), dtype=mat23, requires_grad=test_adj, device=device)
426
+ i32 = wp.array(randvals(rng, [1, 3, 2], dtype), dtype=mat32, requires_grad=test_adj, device=device)
427
+ i44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
428
+ o22 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=test_adj, device=device)
429
+ o33 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=test_adj, device=device)
430
+ o44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
431
+
432
+ tape = wp.Tape()
433
+ with tape:
434
+ wp.launch(
435
+ kernel,
436
+ dim=1,
437
+ inputs=[i23, i32, i44],
438
+ outputs=[o22, o33, o44],
439
+ device=device,
440
+ )
441
+
442
+ assert_np_equal(o22.numpy(), i23.numpy() @ i32.numpy(), tol=tol)
443
+ assert_np_equal(o33.numpy(), i32.numpy() @ i23.numpy(), tol=tol)
444
+ assert_np_equal(o44.numpy(), i44.numpy() @ i44.numpy(), tol=tol)
445
+
446
+ if test_adj:
447
+ o22.grad.assign([np.eye(2)])
448
+ o33.grad.assign([np.eye(3)])
449
+ o44.grad.assign([np.eye(4)])
450
+
451
+ tape.backward()
452
+
453
+ assert_np_equal(i23.grad.numpy(), 2.0 * i32.numpy().T, tol=tol)
454
+ assert_np_equal(i32.grad.numpy(), 2.0 * i23.numpy().T, tol=tol)
455
+ assert_np_equal(i44.grad.numpy(), 2.0 * i44.numpy().T, tol=tol)
456
+
457
+
387
458
  def test_subtraction(test, device, dtype, register_kernels=False):
388
459
  rng = np.random.default_rng(123)
389
460
 
@@ -874,7 +945,7 @@ def test_svd(test, device, dtype, register_kernels=False):
874
945
  tol = {
875
946
  np.float16: 1.0e-3,
876
947
  np.float32: 1.0e-6,
877
- np.float64: 1.0e-6,
948
+ np.float64: 1.0e-12,
878
949
  }.get(dtype, 0)
879
950
 
880
951
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -1765,6 +1836,9 @@ for dtype in np_signed_int_types + np_float_types:
1765
1836
  add_function_test_register_kernel(
1766
1837
  TestMat, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
1767
1838
  )
1839
+ add_function_test_register_kernel(
1840
+ TestMat, f"test_matmul_{dtype.__name__}", test_matmul, devices=devices, dtype=dtype
1841
+ )
1768
1842
 
1769
1843
  add_function_test(
1770
1844
  TestMat,
@@ -1,6 +1,7 @@
1
1
  import contextlib
2
2
  import io
3
3
  import unittest
4
+ from typing import Any
4
5
 
5
6
  import numpy as np
6
7
 
@@ -164,18 +165,18 @@ def test_kernel_writeread_kernel_write(test, device):
164
165
 
165
166
 
166
167
  @wp.func
167
- def read_func(a: wp.array(dtype=float), idx: int):
168
+ def read_func(a: wp.array(dtype=Any), idx: int):
168
169
  x = a[idx]
169
170
  return x
170
171
 
171
172
 
172
173
  @wp.func
173
- def read_return_func(b: wp.array(dtype=float), idx: int):
174
+ def read_return_func(b: wp.array(dtype=Any), idx: int):
174
175
  return 1.0, b[idx]
175
176
 
176
177
 
177
178
  @wp.func
178
- def write_func(c: wp.array(dtype=float), idx: int):
179
+ def write_func(c: wp.array(dtype=Any), idx: int):
179
180
  c[idx] = 1.0
180
181
 
181
182
 
@@ -133,6 +133,49 @@ def test_tile_load(kernel, ndim):
133
133
  return test
134
134
 
135
135
 
136
+ @wp.kernel
137
+ def tile_load_unaligned_kernel(
138
+ input: wp.array2d(dtype=float),
139
+ output: wp.array2d(dtype=float),
140
+ ):
141
+ t = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(1, 1), storage="shared")
142
+ wp.tile_store(output, t, offset=(1, 1))
143
+
144
+
145
+ def test_tile_load_unaligned(test, device):
146
+ rng = np.random.default_rng(42)
147
+
148
+ shape = [TILE_M + 1, TILE_N + 1]
149
+
150
+ input = wp.array(rng.random(shape), dtype=float, requires_grad=True, device=device)
151
+ output = wp.zeros(shape, dtype=float, device=device)
152
+
153
+ with wp.Tape() as tape:
154
+ wp.launch_tiled(
155
+ tile_load_unaligned_kernel,
156
+ dim=[1],
157
+ inputs=[input, output],
158
+ block_dim=TILE_DIM,
159
+ device=device,
160
+ )
161
+
162
+ # first row and column should be zero
163
+ assert_np_equal(output.numpy()[0, :], np.zeros(TILE_N + 1))
164
+ assert_np_equal(output.numpy()[:, 0], np.zeros(TILE_M + 1))
165
+
166
+ # check output elements
167
+ assert_np_equal(output.numpy()[1:, 1:], input.numpy()[1:, 1:])
168
+
169
+ output.grad = wp.ones_like(output)
170
+ tape.backward()
171
+
172
+ expected_grad = np.ones_like(input.grad.numpy())
173
+ expected_grad[0, :] = 0.0
174
+ expected_grad[:, 0] = 0.0
175
+
176
+ assert_np_equal(input.grad.numpy(), expected_grad)
177
+
178
+
136
179
  # ----------------------------------------------------------------------------------------
137
180
 
138
181
  TILE_SIZE = 4
@@ -336,7 +379,7 @@ add_function_test(TestTileLoad, "test_tile_load_1d", test_tile_load(tile_load_1d
336
379
  add_function_test(TestTileLoad, "test_tile_load_2d", test_tile_load(tile_load_2d_kernel, 2), devices=devices)
337
380
  add_function_test(TestTileLoad, "test_tile_load_3d", test_tile_load(tile_load_3d_kernel, 3), devices=devices)
338
381
  add_function_test(TestTileLoad, "test_tile_load_4d", test_tile_load(tile_load_4d_kernel, 4), devices=devices)
339
-
382
+ add_function_test(TestTileLoad, "test_tile_load_unaligned", test_tile_load_unaligned, devices=devices)
340
383
 
341
384
  add_function_test(TestTileLoad, "test_tile_extract_1d", test_tile_extract(tile_extract_1d_kernel, 1), devices=devices)
342
385
  add_function_test(TestTileLoad, "test_tile_extract_2d", test_tile_extract(tile_extract_2d_kernel, 2), devices=devices)
@@ -554,6 +554,9 @@ def initialize_test_process(lock, shared_index, args, temp_dir):
554
554
  wp.config.kernel_cache_dir = cache_root_dir
555
555
 
556
556
  wp.build.clear_kernel_cache()
557
+ elif "WARP_CACHE_ROOT" in os.environ:
558
+ # Using a shared cache for all test processes
559
+ wp.config.kernel_cache_dir = os.path.join(os.getenv("WARP_CACHE_ROOT"), wp.config.version)
557
560
 
558
561
 
559
562
  if __name__ == "__main__": # pragma: no cover
warp/types.py CHANGED
@@ -1768,77 +1768,75 @@ class array(Array):
1768
1768
  dtype_ndim = 0
1769
1769
  scalar_dtype = dtype
1770
1770
 
1771
- if hasattr(data, "__cuda_array_interface__"):
1772
- try:
1773
- # Performance note: try first, ask questions later
1774
- device = warp.context.runtime.get_device(device)
1775
- except Exception:
1776
- # Fallback to using the public API for retrieving the device,
1777
- # which takes take of initializing Warp if needed.
1778
- device = warp.context.get_device(device)
1779
-
1780
- if device.is_cuda:
1781
- desc = data.__cuda_array_interface__
1782
- data_shape = desc.get("shape")
1783
- data_strides = desc.get("strides")
1784
- data_dtype = np.dtype(desc.get("typestr"))
1785
- data_ptr = desc.get("data")[0]
1786
-
1787
- if dtype == Any:
1788
- dtype = np_dtype_to_warp_type[data_dtype]
1789
-
1790
- data_ndim = len(data_shape)
1791
-
1792
- # determine whether the input needs reshaping
1793
- target_npshape = None
1794
- if shape is not None:
1795
- target_npshape = (*shape, *dtype_shape)
1796
- elif dtype_ndim > 0:
1797
- # prune inner dimensions of length 1
1798
- while data_ndim > 1 and data_shape[-1] == 1:
1799
- data_shape = data_shape[:-1]
1800
- # if the inner dims don't match exactly, check if the innermost dim is a multiple of type length
1801
- if data_ndim < dtype_ndim or data_shape[-dtype_ndim:] != dtype_shape:
1802
- if data_shape[-1] == dtype._length_:
1803
- target_npshape = (*data_shape[:-1], *dtype_shape)
1804
- elif data_shape[-1] % dtype._length_ == 0:
1805
- target_npshape = (*data_shape[:-1], data_shape[-1] // dtype._length_, *dtype_shape)
1771
+ try:
1772
+ # Performance note: try first, ask questions later
1773
+ device = warp.context.runtime.get_device(device)
1774
+ except Exception:
1775
+ # Fallback to using the public API for retrieving the device,
1776
+ # which takes take of initializing Warp if needed.
1777
+ device = warp.context.get_device(device)
1778
+
1779
+ if device.is_cuda and hasattr(data, "__cuda_array_interface__"):
1780
+ desc = data.__cuda_array_interface__
1781
+ data_shape = desc.get("shape")
1782
+ data_strides = desc.get("strides")
1783
+ data_dtype = np.dtype(desc.get("typestr"))
1784
+ data_ptr = desc.get("data")[0]
1785
+
1786
+ if dtype == Any:
1787
+ dtype = np_dtype_to_warp_type[data_dtype]
1788
+
1789
+ if data_strides is None:
1790
+ data_strides = strides_from_shape(data_shape, dtype)
1791
+
1792
+ data_ndim = len(data_shape)
1793
+
1794
+ # determine whether the input needs reshaping
1795
+ target_npshape = None
1796
+ if shape is not None:
1797
+ target_npshape = (*shape, *dtype_shape)
1798
+ elif dtype_ndim > 0:
1799
+ # prune inner dimensions of length 1
1800
+ while data_ndim > 1 and data_shape[-1] == 1:
1801
+ data_shape = data_shape[:-1]
1802
+ # if the inner dims don't match exactly, check if the innermost dim is a multiple of type length
1803
+ if data_ndim < dtype_ndim or data_shape[-dtype_ndim:] != dtype_shape:
1804
+ if data_shape[-1] == dtype._length_:
1805
+ target_npshape = (*data_shape[:-1], *dtype_shape)
1806
+ elif data_shape[-1] % dtype._length_ == 0:
1807
+ target_npshape = (*data_shape[:-1], data_shape[-1] // dtype._length_, *dtype_shape)
1808
+ else:
1809
+ if dtype_ndim == 1:
1810
+ raise RuntimeError(
1811
+ f"The inner dimensions of the input data are not compatible with the requested vector type {warp.context.type_str(dtype)}: expected an inner dimension that is a multiple of {dtype._length_}"
1812
+ )
1806
1813
  else:
1807
- if dtype_ndim == 1:
1808
- raise RuntimeError(
1809
- f"The inner dimensions of the input data are not compatible with the requested vector type {warp.context.type_str(dtype)}: expected an inner dimension that is a multiple of {dtype._length_}"
1810
- )
1811
- else:
1812
- raise RuntimeError(
1813
- f"The inner dimensions of the input data are not compatible with the requested matrix type {warp.context.type_str(dtype)}: expected inner dimensions {dtype._shape_} or a multiple of {dtype._length_}"
1814
- )
1815
-
1816
- if target_npshape is None:
1817
- target_npshape = data_shape if shape is None else shape
1818
-
1819
- # determine final shape and strides
1820
- if dtype_ndim > 0:
1821
- # make sure the inner dims are contiguous for vector/matrix types
1822
- scalar_size = type_size_in_bytes(dtype._wp_scalar_type_)
1823
- inner_contiguous = data_strides[-1] == scalar_size
1824
- if inner_contiguous and dtype_ndim > 1:
1825
- inner_contiguous = data_strides[-2] == scalar_size * dtype_shape[-1]
1826
-
1827
- shape = target_npshape[:-dtype_ndim] or (1,)
1828
- strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1829
- else:
1830
- shape = target_npshape or (1,)
1831
- strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1814
+ raise RuntimeError(
1815
+ f"The inner dimensions of the input data are not compatible with the requested matrix type {warp.context.type_str(dtype)}: expected inner dimensions {dtype._shape_} or a multiple of {dtype._length_}"
1816
+ )
1817
+
1818
+ if target_npshape is None:
1819
+ target_npshape = data_shape if shape is None else shape
1820
+
1821
+ # determine final shape and strides
1822
+ if dtype_ndim > 0:
1823
+ # make sure the inner dims are contiguous for vector/matrix types
1824
+ scalar_size = type_size_in_bytes(dtype._wp_scalar_type_)
1825
+ inner_contiguous = data_strides[-1] == scalar_size
1826
+ if inner_contiguous and dtype_ndim > 1:
1827
+ inner_contiguous = data_strides[-2] == scalar_size * dtype_shape[-1]
1828
+
1829
+ shape = target_npshape[:-dtype_ndim] or (1,)
1830
+ strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1831
+ else:
1832
+ shape = target_npshape or (1,)
1833
+ strides = data_strides if shape == data_shape else strides_from_shape(shape, dtype)
1832
1834
 
1833
- self._init_from_ptr(data_ptr, dtype, shape, strides, None, device, False, None)
1835
+ self._init_from_ptr(data_ptr, dtype, shape, strides, None, device, False, None)
1834
1836
 
1835
- # keep a ref to the source data to keep allocation alive
1836
- self._ref = data
1837
- return
1838
- else:
1839
- raise RuntimeError(
1840
- f"Trying to construct a Warp array from data argument's __cuda_array_interface__ but {device} is not CUDA-capable"
1841
- )
1837
+ # keep a ref to the source data to keep allocation alive
1838
+ self._ref = data
1839
+ return
1842
1840
 
1843
1841
  # convert input data to ndarray (handles lists, tuples, etc.) and determine dtype
1844
1842
  if dtype == Any:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: warp-lang
3
- Version: 1.6.0
3
+ Version: 1.6.1
4
4
  Summary: A Python framework for high-performance simulation and graphics programming
5
5
  Author-email: NVIDIA Corporation <mmacklin@nvidia.com>
6
6
  License: NVIDIA Software License
@@ -78,12 +78,24 @@ the `pip install` command, e.g.
78
78
 
79
79
  | Platform | Install Command |
80
80
  | --------------- | ----------------------------------------------------------------------------------------------------------------------------- |
81
- | Linux aarch64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.0/warp_lang-1.6.0+cu11-py3-none-manylinux2014_aarch64.whl` |
82
- | Linux x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.0/warp_lang-1.6.0+cu11-py3-none-manylinux2014_x86_64.whl` |
83
- | Windows x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.0/warp_lang-1.6.0+cu11-py3-none-win_amd64.whl` |
81
+ | Linux aarch64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.1/warp_lang-1.6.1+cu11-py3-none-manylinux2014_aarch64.whl` |
82
+ | Linux x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.1/warp_lang-1.6.1+cu11-py3-none-manylinux2014_x86_64.whl` |
83
+ | Windows x86-64 | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.1/warp_lang-1.6.1+cu11-py3-none-win_amd64.whl` |
84
84
 
85
85
  The `--force-reinstall` option may need to be used to overwrite a previous installation.
86
86
 
87
+ ### Nightly Builds
88
+
89
+ Nightly builds of Warp from the `main` branch are available on the [NVIDIA Package Index](https://pypi.nvidia.com/warp-lang/).
90
+
91
+ To install the latest nightly build, use the following command:
92
+
93
+ ```text
94
+ pip install -U --pre warp-lang --extra-index-url=https://pypi.nvidia.com/
95
+ ```
96
+
97
+ Note that the nightly builds are built with the CUDA 12 runtime and are not published for macOS.
98
+
87
99
  ### CUDA Requirements
88
100
 
89
101
  * Warp packages built with CUDA Toolkit 11.x require NVIDIA driver 470 or newer.
@@ -300,25 +312,13 @@ python -m warp.tests
300
312
  <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_inverse_kinematics.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_inverse_kinematics.png"></a></td>
301
313
  <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_spring_cage.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_spring_cage.png"></a></td>
302
314
  <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_trajectory.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_trajectory.png"></a></td>
303
- <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_walker.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_walker.png"></a></td>
315
+ <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_softbody_properties.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_softbody_properties.png"></a></td>
304
316
  </tr>
305
317
  <tr>
306
318
  <td align="center">inverse kinematics</td>
307
319
  <td align="center">spring cage</td>
308
320
  <td align="center">trajectory</td>
309
- <td align="center">walker</td>
310
- </tr>
311
- <tr>
312
- <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_softbody_properties.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_softbody_properties.png"></a></td>
313
- <td></td>
314
- <td></td>
315
- <td></td>
316
- </tr>
317
- <tr>
318
321
  <td align="center">soft body properties</td>
319
- <td align="center"></td>
320
- <td align="center"></td>
321
- <td align="center"></td>
322
322
  </tr>
323
323
  </tbody>
324
324
  </table>
@@ -378,6 +378,23 @@ python -m warp.tests
378
378
  </tbody>
379
379
  </table>
380
380
 
381
+ ### warp/examples/tile
382
+
383
+ <table>
384
+ <tbody>
385
+ <tr>
386
+ <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/tile/example_tile_mlp.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/tile_mlp.png"></a></td>
387
+ <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/tile/example_tile_nbody.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/tile_nbody.png"></a></td>
388
+ <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/tile/example_tile_walker.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/tile_walker.png"></a></td>
389
+ </tr>
390
+ <tr>
391
+ <td align="center">mlp</td>
392
+ <td align="center">nbody</td>
393
+ <td align="center">walker</td>
394
+ </tr>
395
+ </tbody>
396
+ </table>
397
+
381
398
  ## Building
382
399
 
383
400
  For developers who want to build the library themselves, the following tools are required: