warp-lang 1.4.1__py3-none-manylinux2014_x86_64.whl → 1.5.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (164) hide show
  1. warp/__init__.py +4 -0
  2. warp/autograd.py +43 -8
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +21 -2
  6. warp/build_dll.py +23 -6
  7. warp/builtins.py +1920 -111
  8. warp/codegen.py +186 -62
  9. warp/config.py +2 -2
  10. warp/context.py +322 -73
  11. warp/examples/assets/pixel.jpg +0 -0
  12. warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
  13. warp/examples/benchmarks/benchmark_gemm.py +121 -0
  14. warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
  15. warp/examples/benchmarks/benchmark_tile.py +179 -0
  16. warp/examples/core/example_dem.py +2 -1
  17. warp/examples/core/example_mesh_intersect.py +3 -3
  18. warp/examples/fem/example_adaptive_grid.py +37 -10
  19. warp/examples/fem/example_apic_fluid.py +3 -2
  20. warp/examples/fem/example_convection_diffusion_dg.py +4 -5
  21. warp/examples/fem/example_deformed_geometry.py +1 -1
  22. warp/examples/fem/example_diffusion_3d.py +47 -4
  23. warp/examples/fem/example_distortion_energy.py +220 -0
  24. warp/examples/fem/example_magnetostatics.py +127 -85
  25. warp/examples/fem/example_nonconforming_contact.py +5 -5
  26. warp/examples/fem/example_stokes.py +3 -1
  27. warp/examples/fem/example_streamlines.py +12 -19
  28. warp/examples/fem/utils.py +38 -15
  29. warp/examples/optim/example_walker.py +2 -2
  30. warp/examples/sim/example_cloth.py +2 -25
  31. warp/examples/sim/example_jacobian_ik.py +6 -2
  32. warp/examples/sim/example_quadruped.py +2 -1
  33. warp/examples/tile/example_tile_convolution.py +58 -0
  34. warp/examples/tile/example_tile_fft.py +47 -0
  35. warp/examples/tile/example_tile_filtering.py +105 -0
  36. warp/examples/tile/example_tile_matmul.py +79 -0
  37. warp/examples/tile/example_tile_mlp.py +375 -0
  38. warp/fem/__init__.py +8 -0
  39. warp/fem/cache.py +16 -12
  40. warp/fem/dirichlet.py +1 -1
  41. warp/fem/domain.py +44 -1
  42. warp/fem/field/__init__.py +1 -2
  43. warp/fem/field/field.py +31 -19
  44. warp/fem/field/nodal_field.py +101 -49
  45. warp/fem/field/virtual.py +794 -0
  46. warp/fem/geometry/__init__.py +2 -2
  47. warp/fem/geometry/deformed_geometry.py +3 -105
  48. warp/fem/geometry/element.py +13 -0
  49. warp/fem/geometry/geometry.py +165 -5
  50. warp/fem/geometry/grid_2d.py +3 -6
  51. warp/fem/geometry/grid_3d.py +31 -28
  52. warp/fem/geometry/hexmesh.py +3 -46
  53. warp/fem/geometry/nanogrid.py +3 -2
  54. warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
  55. warp/fem/geometry/tetmesh.py +2 -43
  56. warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
  57. warp/fem/integrate.py +683 -261
  58. warp/fem/linalg.py +404 -0
  59. warp/fem/operator.py +101 -18
  60. warp/fem/polynomial.py +5 -5
  61. warp/fem/quadrature/quadrature.py +45 -21
  62. warp/fem/space/__init__.py +45 -11
  63. warp/fem/space/basis_function_space.py +451 -0
  64. warp/fem/space/basis_space.py +58 -11
  65. warp/fem/space/function_space.py +146 -5
  66. warp/fem/space/grid_2d_function_space.py +80 -66
  67. warp/fem/space/grid_3d_function_space.py +113 -68
  68. warp/fem/space/hexmesh_function_space.py +96 -108
  69. warp/fem/space/nanogrid_function_space.py +62 -110
  70. warp/fem/space/quadmesh_function_space.py +208 -0
  71. warp/fem/space/shape/__init__.py +45 -7
  72. warp/fem/space/shape/cube_shape_function.py +328 -54
  73. warp/fem/space/shape/shape_function.py +10 -1
  74. warp/fem/space/shape/square_shape_function.py +328 -60
  75. warp/fem/space/shape/tet_shape_function.py +269 -19
  76. warp/fem/space/shape/triangle_shape_function.py +238 -19
  77. warp/fem/space/tetmesh_function_space.py +69 -37
  78. warp/fem/space/topology.py +38 -0
  79. warp/fem/space/trimesh_function_space.py +179 -0
  80. warp/fem/utils.py +6 -331
  81. warp/jax_experimental.py +3 -1
  82. warp/native/array.h +55 -40
  83. warp/native/builtin.h +124 -43
  84. warp/native/bvh.h +4 -0
  85. warp/native/coloring.cpp +600 -0
  86. warp/native/cuda_util.cpp +14 -0
  87. warp/native/cuda_util.h +2 -1
  88. warp/native/fabric.h +8 -0
  89. warp/native/hashgrid.h +4 -0
  90. warp/native/marching.cu +8 -0
  91. warp/native/mat.h +14 -3
  92. warp/native/mathdx.cpp +59 -0
  93. warp/native/mesh.h +4 -0
  94. warp/native/range.h +13 -1
  95. warp/native/reduce.cpp +9 -1
  96. warp/native/reduce.cu +7 -0
  97. warp/native/runlength_encode.cpp +9 -1
  98. warp/native/runlength_encode.cu +7 -1
  99. warp/native/scan.cpp +8 -0
  100. warp/native/scan.cu +8 -0
  101. warp/native/scan.h +8 -1
  102. warp/native/sparse.cpp +8 -0
  103. warp/native/sparse.cu +8 -0
  104. warp/native/temp_buffer.h +7 -0
  105. warp/native/tile.h +1857 -0
  106. warp/native/tile_gemm.h +341 -0
  107. warp/native/tile_reduce.h +210 -0
  108. warp/native/volume_builder.cu +8 -0
  109. warp/native/volume_builder.h +8 -0
  110. warp/native/warp.cpp +10 -2
  111. warp/native/warp.cu +369 -15
  112. warp/native/warp.h +12 -2
  113. warp/optim/adam.py +39 -4
  114. warp/paddle.py +29 -12
  115. warp/render/render_opengl.py +137 -65
  116. warp/sim/graph_coloring.py +292 -0
  117. warp/sim/integrator_euler.py +4 -2
  118. warp/sim/integrator_featherstone.py +115 -44
  119. warp/sim/integrator_vbd.py +6 -0
  120. warp/sim/model.py +90 -17
  121. warp/stubs.py +651 -85
  122. warp/tape.py +12 -7
  123. warp/tests/assets/pixel.npy +0 -0
  124. warp/tests/aux_test_instancing_gc.py +18 -0
  125. warp/tests/test_array.py +207 -48
  126. warp/tests/test_closest_point_edge_edge.py +8 -8
  127. warp/tests/test_codegen.py +120 -1
  128. warp/tests/test_codegen_instancing.py +30 -0
  129. warp/tests/test_collision.py +110 -0
  130. warp/tests/test_coloring.py +241 -0
  131. warp/tests/test_context.py +34 -0
  132. warp/tests/test_examples.py +18 -4
  133. warp/tests/test_fabricarray.py +33 -0
  134. warp/tests/test_fem.py +453 -113
  135. warp/tests/test_func.py +48 -1
  136. warp/tests/test_generics.py +52 -0
  137. warp/tests/test_iter.py +68 -0
  138. warp/tests/test_mat_scalar_ops.py +1 -1
  139. warp/tests/test_mesh_query_point.py +5 -4
  140. warp/tests/test_module_hashing.py +23 -0
  141. warp/tests/test_paddle.py +27 -87
  142. warp/tests/test_print.py +191 -1
  143. warp/tests/test_spatial.py +1 -1
  144. warp/tests/test_tile.py +700 -0
  145. warp/tests/test_tile_mathdx.py +144 -0
  146. warp/tests/test_tile_mlp.py +383 -0
  147. warp/tests/test_tile_reduce.py +374 -0
  148. warp/tests/test_tile_shared_memory.py +190 -0
  149. warp/tests/test_vbd.py +12 -20
  150. warp/tests/test_volume.py +43 -0
  151. warp/tests/unittest_suites.py +23 -2
  152. warp/tests/unittest_utils.py +4 -0
  153. warp/types.py +339 -73
  154. warp/utils.py +22 -1
  155. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
  156. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
  157. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
  158. warp/fem/field/test.py +0 -180
  159. warp/fem/field/trial.py +0 -183
  160. warp/fem/space/collocated_function_space.py +0 -102
  161. warp/fem/space/quadmesh_2d_function_space.py +0 -261
  162. warp/fem/space/trimesh_2d_function_space.py +0 -153
  163. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
  164. {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0
warp/tape.py CHANGED
@@ -15,7 +15,7 @@ class Tape:
15
15
  """
16
16
  Record kernel launches within a Tape scope to enable automatic differentiation.
17
17
  Gradients can be computed after the operations have been recorded on the tape via
18
- ``tape.backward()``.
18
+ :meth:`Tape.backward()`.
19
19
 
20
20
  Example
21
21
  -------
@@ -131,6 +131,7 @@ class Tape:
131
131
  inputs = launch[3]
132
132
  outputs = launch[4]
133
133
  device = launch[5]
134
+ block_dim = launch[6]
134
135
 
135
136
  adj_inputs = []
136
137
  adj_outputs = []
@@ -153,13 +154,14 @@ class Tape:
153
154
  device=device,
154
155
  adjoint=True,
155
156
  max_blocks=max_blocks,
157
+ block_dim=block_dim,
156
158
  )
157
159
 
158
160
  # record a kernel launch on the tape
159
- def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, metadata=None):
161
+ def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, block_dim=0, metadata=None):
160
162
  if metadata is None:
161
163
  metadata = {}
162
- self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, metadata])
164
+ self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, block_dim, metadata])
163
165
 
164
166
  def record_func(self, backward, arrays):
165
167
  """
@@ -614,7 +616,9 @@ class ArrayStatsVisitor(TapeVisitor):
614
616
  self.array_grad_stats.insert(0, grad_stats)
615
617
 
616
618
 
617
- Launch = namedtuple("Launch", ["id", "kernel", "dim", "max_blocks", "inputs", "outputs", "device", "metadata"])
619
+ Launch = namedtuple(
620
+ "Launch", ["id", "kernel", "dim", "max_blocks", "inputs", "outputs", "device", "block_dim", "metadata"]
621
+ )
618
622
  RepeatedSequence = namedtuple("RepeatedSequence", ["start", "end", "repetitions"])
619
623
 
620
624
 
@@ -645,8 +649,8 @@ def visit_tape(
645
649
  def get_launch_id(launch):
646
650
  kernel = launch[0]
647
651
  suffix = ""
648
- if len(launch) > 6:
649
- metadata = launch[6]
652
+ if len(launch) > 7:
653
+ metadata = launch[7]
650
654
  # calling function helps to identify unique launches
651
655
  if "caller" in metadata:
652
656
  caller = metadata["caller"]
@@ -680,7 +684,8 @@ def visit_tape(
680
684
  inputs=launch[3],
681
685
  outputs=launch[4],
682
686
  device=launch[5],
683
- metadata=launch[6] if len(launch) > 6 else {},
687
+ block_dim=launch[6],
688
+ metadata=launch[7] if len(launch) > 7 else {},
684
689
  )
685
690
  for launch in kernel_launches
686
691
  ]
Binary file
@@ -0,0 +1,18 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ """Helper module used in test_codegen_instancing.py"""
9
+
10
+ import warp as wp
11
+
12
+
13
+ def create_kernel_closure(value: int):
14
+ @wp.kernel
15
+ def k(a: wp.array(dtype=int)):
16
+ a[0] = value
17
+
18
+ return k
warp/tests/test_array.py CHANGED
@@ -6,6 +6,7 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
 
@@ -2361,64 +2362,85 @@ def test_array_from_cai(test, device):
2361
2362
  assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]]))
2362
2363
 
2363
2364
 
2364
- def test_array_inplace_ops(test, device):
2365
- @wp.kernel
2366
- def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2367
- i = wp.tid()
2368
- x[i] += y[i]
2365
+ @wp.kernel
2366
+ def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2367
+ i = wp.tid()
2368
+ x[i] += y[i]
2369
2369
 
2370
- @wp.kernel
2371
- def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
2372
- i, j = wp.tid()
2373
- x[i, j] += y[i, j]
2374
2370
 
2375
- @wp.kernel
2376
- def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
2377
- i, j, k = wp.tid()
2378
- x[i, j, k] += y[i, j, k]
2371
+ @wp.kernel
2372
+ def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
2373
+ i, j = wp.tid()
2374
+ x[i, j] += y[i, j]
2379
2375
 
2380
- @wp.kernel
2381
- def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
2382
- i, j, k, l = wp.tid()
2383
- x[i, j, k, l] += y[i, j, k, l]
2384
2376
 
2385
- @wp.kernel
2386
- def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2387
- i = wp.tid()
2388
- x[i] -= y[i]
2377
+ @wp.kernel
2378
+ def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
2379
+ i, j, k = wp.tid()
2380
+ x[i, j, k] += y[i, j, k]
2389
2381
 
2390
- @wp.kernel
2391
- def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
2392
- i, j = wp.tid()
2393
- x[i, j] -= y[i, j]
2394
2382
 
2395
- @wp.kernel
2396
- def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
2397
- i, j, k = wp.tid()
2398
- x[i, j, k] -= y[i, j, k]
2383
+ @wp.kernel
2384
+ def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
2385
+ i, j, k, l = wp.tid()
2386
+ x[i, j, k, l] += y[i, j, k, l]
2399
2387
 
2400
- @wp.kernel
2401
- def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
2402
- i, j, k, l = wp.tid()
2403
- x[i, j, k, l] -= y[i, j, k, l]
2404
2388
 
2405
- @wp.kernel
2406
- def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
2407
- i = wp.tid()
2408
- x[i] += y[i]
2389
+ @wp.kernel
2390
+ def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2391
+ i = wp.tid()
2392
+ x[i] -= y[i]
2409
2393
 
2410
- @wp.kernel
2411
- def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
2412
- i = wp.tid()
2413
- x[i] += y[i]
2414
2394
 
2415
- @wp.kernel
2416
- def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
2417
- i = wp.tid()
2418
- a = y[i]
2419
- a += x[i]
2420
- wp.atomic_add(z, 0, a)
2395
+ @wp.kernel
2396
+ def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
2397
+ i, j = wp.tid()
2398
+ x[i, j] -= y[i, j]
2399
+
2400
+
2401
+ @wp.kernel
2402
+ def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
2403
+ i, j, k = wp.tid()
2404
+ x[i, j, k] -= y[i, j, k]
2405
+
2406
+
2407
+ @wp.kernel
2408
+ def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
2409
+ i, j, k, l = wp.tid()
2410
+ x[i, j, k, l] -= y[i, j, k, l]
2411
+
2421
2412
 
2413
+ @wp.kernel
2414
+ def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
2415
+ i = wp.tid()
2416
+ x[i] += y[i]
2417
+
2418
+
2419
+ @wp.kernel
2420
+ def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
2421
+ i = wp.tid()
2422
+ x[i] += y[i]
2423
+
2424
+
2425
+ @wp.kernel
2426
+ def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
2427
+ i = wp.tid()
2428
+ a = y[i]
2429
+ a += x[i]
2430
+ wp.atomic_add(z, 0, a)
2431
+
2432
+
2433
+ vec9 = wp.vec(length=9, dtype=float)
2434
+
2435
+
2436
+ @wp.kernel
2437
+ def inplace_add_custom_vec(x: wp.array(dtype=vec9), y: wp.array(dtype=vec9)):
2438
+ i = wp.tid()
2439
+ x[i] += y[i]
2440
+ x[i] += y[i]
2441
+
2442
+
2443
+ def test_array_inplace_diff_ops(test, device):
2422
2444
  N = 3
2423
2445
  x1 = wp.ones(N, dtype=float, requires_grad=True, device=device)
2424
2446
  x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device)
@@ -2526,6 +2548,60 @@ def test_array_inplace_ops(test, device):
2526
2548
 
2527
2549
  assert_np_equal(x.grad.numpy(), np.ones(1, dtype=float))
2528
2550
  assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float))
2551
+ tape.reset()
2552
+
2553
+ x = wp.zeros(1, dtype=vec9, requires_grad=True, device=device)
2554
+ y = wp.ones(1, dtype=vec9, requires_grad=True, device=device)
2555
+
2556
+ with tape:
2557
+ wp.launch(inplace_add_custom_vec, 1, inputs=[x, y], device=device)
2558
+
2559
+ tape.backward(grads={x: wp.ones_like(x)})
2560
+
2561
+ assert_np_equal(x.numpy(), np.full((1, 9), 2.0, dtype=float))
2562
+ assert_np_equal(y.grad.numpy(), np.full((1, 9), 2.0, dtype=float))
2563
+
2564
+
2565
+ @wp.kernel
2566
+ def inplace_mul_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2567
+ i = wp.tid()
2568
+ x[i] *= y[i]
2569
+
2570
+
2571
+ @wp.kernel
2572
+ def inplace_div_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
2573
+ i = wp.tid()
2574
+ x[i] /= y[i]
2575
+
2576
+
2577
+ @wp.kernel
2578
+ def inplace_add_non_atomic_types(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
2579
+ i = wp.tid()
2580
+ x[i] += y[i]
2581
+
2582
+
2583
+ uint16vec3 = wp.vec(length=3, dtype=wp.uint16)
2584
+
2585
+
2586
+ def test_array_inplace_non_diff_ops(test, device):
2587
+ N = 3
2588
+ x1 = wp.full(N, value=10.0, dtype=float, device=device)
2589
+ y1 = wp.full(N, value=5.0, dtype=float, device=device)
2590
+
2591
+ wp.launch(inplace_mul_1d, N, inputs=[x1, y1], device=device)
2592
+ assert_np_equal(x1.numpy(), np.full(N, fill_value=50.0, dtype=float))
2593
+
2594
+ x1.fill_(10.0)
2595
+ y1.fill_(5.0)
2596
+ wp.launch(inplace_div_1d, N, inputs=[x1, y1], device=device)
2597
+ assert_np_equal(x1.numpy(), np.full(N, fill_value=2.0, dtype=float))
2598
+
2599
+ for dtype in wp.types.non_atomic_types + (wp.vec2b, wp.vec2ub, wp.vec2s, wp.vec2us, uint16vec3):
2600
+ x = wp.full(N, value=0, dtype=dtype, device=device)
2601
+ y = wp.full(N, value=1, dtype=dtype, device=device)
2602
+
2603
+ wp.launch(inplace_add_non_atomic_types, N, inputs=[x, y], device=device)
2604
+ assert_np_equal(x.numpy(), y.numpy())
2529
2605
 
2530
2606
 
2531
2607
  @wp.kernel
@@ -2609,6 +2685,87 @@ def test_numpy_array_interface(test, device):
2609
2685
  assert a1.strides == a2.strides
2610
2686
 
2611
2687
 
2688
+ @wp.kernel
2689
+ def kernel_indexing_types(
2690
+ arr_1d: wp.array(dtype=wp.int32, ndim=1),
2691
+ arr_2d: wp.array(dtype=wp.int32, ndim=2),
2692
+ arr_3d: wp.array(dtype=wp.int32, ndim=3),
2693
+ arr_4d: wp.array(dtype=wp.int32, ndim=4),
2694
+ ):
2695
+ x = arr_1d[wp.uint8(0)]
2696
+ y = arr_1d[wp.int16(1)]
2697
+ z = arr_1d[wp.uint32(2)]
2698
+ w = arr_1d[wp.int64(3)]
2699
+
2700
+ x = arr_2d[wp.uint8(0), wp.uint8(0)]
2701
+ y = arr_2d[wp.int16(1), wp.int16(1)]
2702
+ z = arr_2d[wp.uint32(2), wp.uint32(2)]
2703
+ w = arr_2d[wp.int64(3), wp.int64(3)]
2704
+
2705
+ x = arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)]
2706
+ y = arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)]
2707
+ z = arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)]
2708
+ w = arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)]
2709
+
2710
+ x = arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)]
2711
+ y = arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)]
2712
+ z = arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)]
2713
+ w = arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)]
2714
+
2715
+ arr_1d[wp.uint8(0)] = 123
2716
+ arr_1d[wp.int16(1)] = 123
2717
+ arr_1d[wp.uint32(2)] = 123
2718
+ arr_1d[wp.int64(3)] = 123
2719
+
2720
+ arr_2d[wp.uint8(0), wp.uint8(0)] = 123
2721
+ arr_2d[wp.int16(1), wp.int16(1)] = 123
2722
+ arr_2d[wp.uint32(2), wp.uint32(2)] = 123
2723
+ arr_2d[wp.int64(3), wp.int64(3)] = 123
2724
+
2725
+ arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
2726
+ arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)] = 123
2727
+ arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
2728
+ arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)] = 123
2729
+
2730
+ arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
2731
+ arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)] = 123
2732
+ arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
2733
+ arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)] = 123
2734
+
2735
+ wp.atomic_add(arr_1d, wp.uint8(0), 123)
2736
+ wp.atomic_sub(arr_1d, wp.int16(1), 123)
2737
+ wp.atomic_min(arr_1d, wp.uint32(2), 123)
2738
+ wp.atomic_max(arr_1d, wp.int64(3), 123)
2739
+
2740
+ wp.atomic_add(arr_2d, wp.uint8(0), wp.uint8(0), 123)
2741
+ wp.atomic_sub(arr_2d, wp.int16(1), wp.int16(1), 123)
2742
+ wp.atomic_min(arr_2d, wp.uint32(2), wp.uint32(2), 123)
2743
+ wp.atomic_max(arr_2d, wp.int64(3), wp.int64(3), 123)
2744
+
2745
+ wp.atomic_add(arr_3d, wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
2746
+ wp.atomic_sub(arr_3d, wp.int16(1), wp.int16(1), wp.int16(1), 123)
2747
+ wp.atomic_min(arr_3d, wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
2748
+ wp.atomic_max(arr_3d, wp.int64(3), wp.int64(3), wp.int64(3), 123)
2749
+
2750
+ wp.atomic_add(arr_4d, wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
2751
+ wp.atomic_sub(arr_4d, wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1), 123)
2752
+ wp.atomic_min(arr_4d, wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
2753
+ wp.atomic_max(arr_4d, wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3), 123)
2754
+
2755
+
2756
+ def test_indexing_types(test, device):
2757
+ arr_1d = wp.zeros(shape=(4,), dtype=wp.int32, device=device)
2758
+ arr_2d = wp.zeros(shape=(4, 4), dtype=wp.int32, device=device)
2759
+ arr_3d = wp.zeros(shape=(4, 4, 4), dtype=wp.int32, device=device)
2760
+ arr_4d = wp.zeros(shape=(4, 4, 4, 4), dtype=wp.int32, device=device)
2761
+ wp.launch(
2762
+ kernel=kernel_indexing_types,
2763
+ dim=1,
2764
+ inputs=(arr_1d, arr_2d, arr_3d, arr_4d),
2765
+ device=device,
2766
+ )
2767
+
2768
+
2612
2769
  devices = get_test_devices()
2613
2770
 
2614
2771
 
@@ -2669,12 +2826,14 @@ add_function_test(TestArray, "test_array_from_numpy", test_array_from_numpy, dev
2669
2826
  add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"])
2670
2827
  add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"])
2671
2828
 
2672
- add_function_test(TestArray, "test_array_inplace_ops", test_array_inplace_ops, devices=devices)
2829
+ add_function_test(TestArray, "test_array_inplace_diff_ops", test_array_inplace_diff_ops, devices=devices)
2830
+ add_function_test(TestArray, "test_array_inplace_non_diff_ops", test_array_inplace_non_diff_ops, devices=devices)
2673
2831
  add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"])
2674
2832
  add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)
2675
2833
 
2676
2834
  add_function_test(TestArray, "test_array_from_int32_domain", test_array_from_int32_domain, devices=devices)
2677
2835
  add_function_test(TestArray, "test_array_from_int64_domain", test_array_from_int64_domain, devices=devices)
2836
+ add_function_test(TestArray, "test_indexing_types", test_indexing_types, devices=devices)
2678
2837
 
2679
2838
  try:
2680
2839
  import torch
@@ -220,12 +220,12 @@ def check_edge_closest_point_sufficient_necessary_kernel(
220
220
 
221
221
  def check_edge_closest_point_random(test, device):
222
222
  num_tests = 100000
223
- np.random.seed(12345)
224
- p1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
225
- q1 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
223
+ rng = np.random.default_rng(123)
224
+ p1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
225
+ q1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
226
226
 
227
- p2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
228
- q2 = wp.array(np.random.randn(num_tests, 3), dtype=wp.vec3, device=device)
227
+ p2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
228
+ q2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
229
229
 
230
230
  wp.launch(
231
231
  kernel=check_edge_closest_point_sufficient_necessary_kernel,
@@ -235,10 +235,10 @@ def check_edge_closest_point_random(test, device):
235
235
  )
236
236
 
237
237
  # parallel edges
238
- p1 = np.random.randn(num_tests, 3)
239
- q1 = np.random.randn(num_tests, 3)
238
+ p1 = rng.standard_normal(size=(num_tests, 3))
239
+ q1 = rng.standard_normal(size=(num_tests, 3))
240
240
 
241
- shifts = np.random.randn(num_tests, 3)
241
+ shifts = rng.standard_normal(size=(num_tests, 3))
242
242
 
243
243
  p2 = p1 + shifts
244
244
  q2 = q1 + shifts
@@ -7,11 +7,27 @@
7
7
 
8
8
  import sys
9
9
  import unittest
10
+ from typing import Tuple
10
11
 
11
12
  import warp as wp
12
13
  from warp.tests.unittest_utils import *
13
14
 
14
15
 
16
+ @wp.kernel
17
+ def test_expect():
18
+ a = 1.0
19
+ a += 2.0
20
+
21
+ wp.expect_eq(123, 123)
22
+ wp.expect_neq(123, 234)
23
+
24
+ wp.expect_eq(wp.vec2(1.0, 2.0), wp.vec2(1.0, 2.0))
25
+ wp.expect_neq(wp.vec2(1.0, 2.0), wp.vec2(2.0, 3.0))
26
+
27
+ wp.expect_eq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(1.0, 2.0, 3.0, 4.0))
28
+ wp.expect_neq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(2.0, 3.0, 4.0, 5.0))
29
+
30
+
15
31
  @wp.kernel
16
32
  def test_rename():
17
33
  a = 0
@@ -534,6 +550,103 @@ def test_error_mutating_constant_in_dynamic_loop(test, device):
534
550
  )
535
551
  assert_np_equal(output.numpy(), np.ones([num_threads, const_a + const_b + dyn_a + dyn_b + dyn_c + 1]))
536
552
 
553
+ @wp.kernel
554
+ def static_then_dynamic_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
555
+ tid = wp.tid()
556
+ mat = wp.mat33d()
557
+ for i in range(3):
558
+ for j in range(3):
559
+ mat[i, j] = wp.float64(0.0)
560
+
561
+ dim = 2
562
+ for i in range(dim + 1):
563
+ for j in range(dim + 1):
564
+ mat[i, j] = wp.float64(1.0)
565
+
566
+ mats[tid] = mat
567
+
568
+ mats = wp.empty(1, dtype=wp.mat33d, device=device)
569
+ wp.launch(static_then_dynamic_loop_kernel, dim=1, inputs=[mats], device=device)
570
+ assert_np_equal(mats.numpy(), np.ones((1, 3, 3)))
571
+
572
+ @wp.kernel
573
+ def dynamic_then_static_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
574
+ tid = wp.tid()
575
+ mat = wp.mat33d()
576
+
577
+ dim = 2
578
+ for i in range(dim + 1):
579
+ for j in range(dim + 1):
580
+ mat[i, j] = wp.float64(1.0)
581
+
582
+ for i in range(3):
583
+ for j in range(3):
584
+ mat[i, j] = wp.float64(0.0)
585
+
586
+ mats[tid] = mat
587
+
588
+ mats = wp.empty(1, dtype=wp.mat33d, device=device)
589
+ wp.launch(dynamic_then_static_loop_kernel, dim=1, inputs=[mats], device=device)
590
+ assert_np_equal(mats.numpy(), np.zeros((1, 3, 3)))
591
+
592
+
593
+ def test_error_return_annotation_mismatch(test, device):
594
+ @wp.func
595
+ def foo_1(x: wp.int32) -> wp.int16:
596
+ return wp.int8(x)
597
+
598
+ def kernel_1_fn():
599
+ x = foo_1(123)
600
+
601
+ @wp.func
602
+ def foo_2(x: int) -> int:
603
+ return (x + x, x * x)
604
+
605
+ def kernel_2_fn():
606
+ x = foo_2(123)
607
+
608
+ @wp.func
609
+ def foo_3(x: int) -> Tuple[int, int]:
610
+ return (x, 1.23)
611
+
612
+ def kernel_3_fn():
613
+ x, y = foo_3(123)
614
+
615
+ @wp.func
616
+ def foo_4(x: int) -> Tuple[int, int, int]:
617
+ return (x + x, x * x)
618
+
619
+ def kernel_4_fn():
620
+ x, y, z = foo_4(123)
621
+
622
+ kernel = wp.Kernel(func=kernel_1_fn)
623
+ with test.assertRaisesRegex(
624
+ wp.codegen.WarpCodegenError,
625
+ r"The function `foo_1` has its return type annotated as `int16` but the code returns a value of type `int8`.",
626
+ ):
627
+ wp.launch(kernel, dim=1, device=device)
628
+
629
+ kernel = wp.Kernel(func=kernel_2_fn)
630
+ with test.assertRaisesRegex(
631
+ wp.codegen.WarpCodegenError,
632
+ r"The function `foo_2` has its return type annotated as `int` but the code returns 2 values.",
633
+ ):
634
+ wp.launch(kernel, dim=1, device=device)
635
+
636
+ kernel = wp.Kernel(func=kernel_3_fn)
637
+ with test.assertRaisesRegex(
638
+ wp.codegen.WarpCodegenError,
639
+ r"The function `foo_3` has its return type annotated as `Tuple\[int, int\]` but the code returns a tuple with types `\(int32, float32\)`.",
640
+ ):
641
+ wp.launch(kernel, dim=1, device=device)
642
+
643
+ kernel = wp.Kernel(func=kernel_4_fn)
644
+ with test.assertRaisesRegex(
645
+ wp.codegen.WarpCodegenError,
646
+ r"The function `foo_4` has its return type annotated as a tuple of 3 elements but the code returns 2 values.",
647
+ ):
648
+ wp.launch(kernel, dim=1, device=device)
649
+
537
650
 
538
651
  @wp.kernel
539
652
  def test_call_syntax():
@@ -583,6 +696,7 @@ class TestCodeGen(unittest.TestCase):
583
696
 
584
697
  devices = get_test_devices()
585
698
 
699
+ add_kernel_test(TestCodeGen, name="test_expect", kernel=test_expect, dim=1, devices=devices)
586
700
  add_kernel_test(TestCodeGen, name="test_inplace", kernel=test_inplace, dim=1, devices=devices)
587
701
  add_kernel_test(TestCodeGen, name="test_rename", kernel=test_rename, dim=1, devices=devices)
588
702
  add_kernel_test(TestCodeGen, name="test_constant", kernel=test_constant, inputs=[1.0], dim=1, devices=devices)
@@ -719,7 +833,12 @@ add_function_test(
719
833
  name="test_error_mutating_constant_in_dynamic_loop",
720
834
  devices=devices,
721
835
  )
722
-
836
+ add_function_test(
837
+ TestCodeGen,
838
+ func=test_error_return_annotation_mismatch,
839
+ name="test_error_return_annotation_mismatch",
840
+ devices=devices,
841
+ )
723
842
  add_kernel_test(TestCodeGen, name="test_call_syntax", kernel=test_call_syntax, dim=1, devices=devices)
724
843
  add_kernel_test(TestCodeGen, name="test_shadow_builtin", kernel=test_shadow_builtin, dim=1, devices=devices)
725
844
  add_kernel_test(TestCodeGen, name="test_while_condition_eval", kernel=test_while_condition_eval, dim=1, devices=devices)
@@ -1287,6 +1287,35 @@ def test_module_mark_modified(test, device):
1287
1287
  # =======================================================================
1288
1288
 
1289
1289
 
1290
+ def test_garbage_collection(test, device):
1291
+ """Test that dynamically generated kernels without user references are not retained in the module."""
1292
+
1293
+ # use a helper module with a known kernel count
1294
+ import warp.tests.aux_test_instancing_gc as gc_test_module
1295
+
1296
+ with wp.ScopedDevice(device):
1297
+ a = wp.zeros(1, dtype=int)
1298
+
1299
+ for i in range(10):
1300
+ # create a unique kernel on each iteration
1301
+ k = gc_test_module.create_kernel_closure(i)
1302
+
1303
+ # import gc
1304
+ # gc.collect()
1305
+
1306
+ # since we don't keep references to the previous kernels,
1307
+ # they should be garbage-collected and not appear in the module
1308
+ k.module.load(device=device)
1309
+ test.assertEqual(len(k.module.live_kernels), 1)
1310
+
1311
+ # test the kernel
1312
+ wp.launch(k, dim=1, inputs=[a])
1313
+ test.assertEqual(a.numpy()[0], i)
1314
+
1315
+
1316
+ # =======================================================================
1317
+
1318
+
1290
1319
  class TestCodeGenInstancing(unittest.TestCase):
1291
1320
  pass
1292
1321
 
@@ -1450,6 +1479,7 @@ add_function_test(TestCodeGenInstancing, func=test_create_kernel_loop, name="tes
1450
1479
  add_function_test(
1451
1480
  TestCodeGenInstancing, func=test_module_mark_modified, name="test_module_mark_modified", devices=devices
1452
1481
  )
1482
+ add_function_test(TestCodeGenInstancing, func=test_garbage_collection, name="test_garbage_collection", devices=devices)
1453
1483
 
1454
1484
 
1455
1485
  if __name__ == "__main__":