warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show
  1. warp/__init__.py +7 -1
  2. warp/bin/warp-clang.dll +0 -0
  3. warp/bin/warp.dll +0 -0
  4. warp/build.py +410 -0
  5. warp/build_dll.py +6 -14
  6. warp/builtins.py +452 -362
  7. warp/codegen.py +179 -119
  8. warp/config.py +42 -6
  9. warp/context.py +490 -271
  10. warp/dlpack.py +8 -6
  11. warp/examples/assets/nonuniform.usd +0 -0
  12. warp/examples/assets/nvidia_logo.png +0 -0
  13. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  14. warp/examples/core/example_sample_mesh.py +300 -0
  15. warp/examples/fem/example_apic_fluid.py +1 -1
  16. warp/examples/fem/example_burgers.py +2 -2
  17. warp/examples/fem/example_deformed_geometry.py +1 -1
  18. warp/examples/fem/example_distortion_energy.py +1 -1
  19. warp/examples/fem/example_magnetostatics.py +6 -6
  20. warp/examples/fem/utils.py +9 -3
  21. warp/examples/interop/example_jax_callable.py +116 -0
  22. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  23. warp/examples/interop/example_jax_kernel.py +205 -0
  24. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  25. warp/examples/tile/example_tile_matmul.py +2 -4
  26. warp/fem/__init__.py +11 -1
  27. warp/fem/adaptivity.py +4 -4
  28. warp/fem/field/nodal_field.py +22 -68
  29. warp/fem/field/virtual.py +62 -23
  30. warp/fem/geometry/adaptive_nanogrid.py +9 -10
  31. warp/fem/geometry/closest_point.py +1 -1
  32. warp/fem/geometry/deformed_geometry.py +5 -2
  33. warp/fem/geometry/geometry.py +5 -0
  34. warp/fem/geometry/grid_2d.py +12 -12
  35. warp/fem/geometry/grid_3d.py +12 -15
  36. warp/fem/geometry/hexmesh.py +5 -7
  37. warp/fem/geometry/nanogrid.py +9 -11
  38. warp/fem/geometry/quadmesh.py +13 -13
  39. warp/fem/geometry/tetmesh.py +3 -4
  40. warp/fem/geometry/trimesh.py +3 -8
  41. warp/fem/integrate.py +262 -93
  42. warp/fem/linalg.py +5 -5
  43. warp/fem/quadrature/pic_quadrature.py +37 -22
  44. warp/fem/quadrature/quadrature.py +194 -25
  45. warp/fem/space/__init__.py +1 -1
  46. warp/fem/space/basis_function_space.py +4 -2
  47. warp/fem/space/basis_space.py +25 -18
  48. warp/fem/space/hexmesh_function_space.py +2 -2
  49. warp/fem/space/partition.py +6 -2
  50. warp/fem/space/quadmesh_function_space.py +8 -8
  51. warp/fem/space/shape/cube_shape_function.py +23 -23
  52. warp/fem/space/shape/square_shape_function.py +12 -12
  53. warp/fem/space/shape/triangle_shape_function.py +1 -1
  54. warp/fem/space/tetmesh_function_space.py +3 -3
  55. warp/fem/space/trimesh_function_space.py +2 -2
  56. warp/fem/utils.py +12 -6
  57. warp/jax.py +14 -1
  58. warp/jax_experimental/__init__.py +16 -0
  59. warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
  60. warp/jax_experimental/ffi.py +698 -0
  61. warp/jax_experimental/xla_ffi.py +602 -0
  62. warp/math.py +89 -0
  63. warp/native/array.h +13 -0
  64. warp/native/builtin.h +29 -3
  65. warp/native/bvh.cpp +3 -1
  66. warp/native/bvh.cu +42 -14
  67. warp/native/bvh.h +2 -1
  68. warp/native/clang/clang.cpp +30 -3
  69. warp/native/cuda_util.cpp +14 -0
  70. warp/native/cuda_util.h +2 -0
  71. warp/native/exports.h +68 -63
  72. warp/native/intersect.h +26 -26
  73. warp/native/intersect_adj.h +33 -33
  74. warp/native/marching.cu +1 -1
  75. warp/native/mat.h +513 -9
  76. warp/native/mesh.h +10 -10
  77. warp/native/quat.h +99 -11
  78. warp/native/rand.h +6 -0
  79. warp/native/sort.cpp +122 -59
  80. warp/native/sort.cu +152 -15
  81. warp/native/sort.h +8 -1
  82. warp/native/sparse.cpp +43 -22
  83. warp/native/sparse.cu +52 -17
  84. warp/native/svd.h +116 -0
  85. warp/native/tile.h +301 -105
  86. warp/native/tile_reduce.h +46 -3
  87. warp/native/vec.h +68 -7
  88. warp/native/volume.cpp +85 -113
  89. warp/native/volume_builder.cu +25 -10
  90. warp/native/volume_builder.h +6 -0
  91. warp/native/warp.cpp +5 -6
  92. warp/native/warp.cu +99 -10
  93. warp/native/warp.h +19 -10
  94. warp/optim/linear.py +10 -10
  95. warp/sim/articulation.py +4 -4
  96. warp/sim/collide.py +21 -10
  97. warp/sim/import_mjcf.py +449 -155
  98. warp/sim/import_urdf.py +32 -12
  99. warp/sim/integrator_euler.py +5 -5
  100. warp/sim/integrator_featherstone.py +3 -10
  101. warp/sim/integrator_vbd.py +207 -2
  102. warp/sim/integrator_xpbd.py +5 -5
  103. warp/sim/model.py +42 -13
  104. warp/sim/utils.py +2 -2
  105. warp/sparse.py +642 -555
  106. warp/stubs.py +216 -19
  107. warp/tests/__main__.py +0 -15
  108. warp/tests/cuda/__init__.py +0 -0
  109. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  110. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  111. warp/tests/geometry/__init__.py +0 -0
  112. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  113. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  114. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  115. warp/tests/interop/__init__.py +0 -0
  116. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  117. warp/tests/sim/__init__.py +0 -0
  118. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  119. warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
  120. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  121. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  122. warp/tests/sim/test_vbd.py +597 -0
  123. warp/tests/test_bool.py +1 -1
  124. warp/tests/test_examples.py +28 -36
  125. warp/tests/test_fem.py +23 -4
  126. warp/tests/test_linear_solvers.py +0 -11
  127. warp/tests/test_mat.py +233 -79
  128. warp/tests/test_mat_scalar_ops.py +4 -4
  129. warp/tests/test_overwrite.py +0 -60
  130. warp/tests/test_quat.py +67 -46
  131. warp/tests/test_rand.py +44 -37
  132. warp/tests/test_sparse.py +47 -6
  133. warp/tests/test_spatial.py +75 -0
  134. warp/tests/test_static.py +1 -1
  135. warp/tests/test_utils.py +84 -4
  136. warp/tests/test_vec.py +46 -34
  137. warp/tests/tile/__init__.py +0 -0
  138. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  139. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
  140. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  141. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  142. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  143. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  144. warp/tests/unittest_serial.py +1 -0
  145. warp/tests/unittest_suites.py +45 -59
  146. warp/tests/unittest_utils.py +2 -1
  147. warp/thirdparty/unittest_parallel.py +3 -1
  148. warp/types.py +110 -658
  149. warp/utils.py +137 -72
  150. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
  151. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
  152. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  153. warp/examples/optim/example_walker.py +0 -317
  154. warp/native/cutlass_gemm.cpp +0 -43
  155. warp/native/cutlass_gemm.cu +0 -382
  156. warp/tests/test_matmul.py +0 -511
  157. warp/tests/test_matmul_lite.py +0 -411
  158. warp/tests/test_vbd.py +0 -386
  159. warp/tests/unused_test_misc.py +0 -77
  160. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  161. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  162. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  163. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  164. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  165. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  166. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  167. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  168. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  169. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  170. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  171. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  172. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  173. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  174. /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
  175. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  176. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  177. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  178. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
  179. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
@@ -334,19 +334,19 @@ def test_constructors(test, device, dtype, register_kernels=False):
334
334
  outcomponents: wp.array(dtype=wptype),
335
335
  ):
336
336
  # multiply outputs by 2 so we've got something to backpropagate:
337
- m2result = wptype(2) * mat22(vec2(input[0], input[2]), vec2(input[1], input[3]))
338
- m3result = wptype(2) * mat33(
337
+ m2result = wptype(2) * wp.matrix_from_cols(vec2(input[0], input[2]), vec2(input[1], input[3]))
338
+ m3result = wptype(2) * wp.matrix_from_cols(
339
339
  vec3(input[4], input[7], input[10]),
340
340
  vec3(input[5], input[8], input[11]),
341
341
  vec3(input[6], input[9], input[12]),
342
342
  )
343
- m4result = wptype(2) * mat44(
343
+ m4result = wptype(2) * wp.matrix_from_cols(
344
344
  vec4(input[13], input[17], input[21], input[25]),
345
345
  vec4(input[14], input[18], input[22], input[26]),
346
346
  vec4(input[15], input[19], input[23], input[27]),
347
347
  vec4(input[16], input[20], input[24], input[28]),
348
348
  )
349
- m5result = wptype(2) * mat55(
349
+ m5result = wptype(2) * wp.matrix_from_cols(
350
350
  vec5(input[29], input[34], input[39], input[44], input[49]),
351
351
  vec5(input[30], input[35], input[40], input[45], input[50]),
352
352
  vec5(input[31], input[36], input[41], input[46], input[51]),
@@ -23,8 +23,6 @@ import numpy as np
23
23
  import warp as wp
24
24
  from warp.tests.unittest_utils import *
25
25
 
26
- wp.init() # For wp.context.runtime.core.is_cutlass_enabled()
27
-
28
26
  # kernels are defined in the global scope, to ensure wp.Kernel objects are not GC'ed in the MGPU case
29
27
  # kernel args are assigned array modes during codegen, so wp.Kernel objects generated during codegen
30
28
  # must be preserved for overwrite tracking to function
@@ -378,62 +376,6 @@ def test_copy(test, device):
378
376
  wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
379
377
 
380
378
 
381
- # wp.matmul uses wp.record_func. Ensure array modes are propagated correctly.
382
- def test_matmul(test, device):
383
- if device.is_cuda and not wp.context.runtime.core.is_cutlass_enabled():
384
- test.skipTest("Warp was not built with CUTLASS support")
385
-
386
- saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
387
- try:
388
- wp.config.verify_autograd_array_access = True
389
-
390
- a = wp.ones((3, 3), dtype=float, requires_grad=True, device=device)
391
- b = wp.ones_like(a)
392
- c = wp.ones_like(a)
393
- d = wp.zeros_like(a)
394
-
395
- tape = wp.Tape()
396
-
397
- with tape:
398
- wp.matmul(a, b, c, d)
399
-
400
- test.assertEqual(a._is_read, True)
401
- test.assertEqual(b._is_read, True)
402
- test.assertEqual(c._is_read, True)
403
- test.assertEqual(d._is_read, False)
404
-
405
- finally:
406
- wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
407
-
408
-
409
- # wp.batched_matmul uses wp.record_func. Ensure array modes are propagated correctly.
410
- def test_batched_matmul(test, device):
411
- if device.is_cuda and not wp.context.runtime.core.is_cutlass_enabled():
412
- test.skipTest("Warp was not built with CUTLASS support")
413
-
414
- saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
415
- try:
416
- wp.config.verify_autograd_array_access = True
417
-
418
- a = wp.ones((1, 3, 3), dtype=float, requires_grad=True, device=device)
419
- b = wp.ones_like(a)
420
- c = wp.ones_like(a)
421
- d = wp.zeros_like(a)
422
-
423
- tape = wp.Tape()
424
-
425
- with tape:
426
- wp.batched_matmul(a, b, c, d)
427
-
428
- test.assertEqual(a._is_read, True)
429
- test.assertEqual(b._is_read, True)
430
- test.assertEqual(c._is_read, True)
431
- test.assertEqual(d._is_read, False)
432
-
433
- finally:
434
- wp.config.verify_autograd_array_access = saved_verify_autograd_array_access_setting
435
-
436
-
437
379
  # write after read warning with in-place operators within a kernel
438
380
  def test_in_place_operators_warning(test, device):
439
381
  saved_verify_autograd_array_access_setting = wp.config.verify_autograd_array_access
@@ -593,8 +535,6 @@ add_function_test(TestOverwrite, "test_views", test_views, devices=devices)
593
535
  add_function_test(TestOverwrite, "test_reset", test_reset, devices=devices)
594
536
 
595
537
  add_function_test(TestOverwrite, "test_copy", test_copy, devices=devices)
596
- add_function_test(TestOverwrite, "test_matmul", test_matmul, devices=devices, check_output=False)
597
- add_function_test(TestOverwrite, "test_batched_matmul", test_batched_matmul, devices=devices, check_output=False)
598
538
  add_function_test(TestOverwrite, "test_atomic_operations", test_atomic_operations, devices=devices)
599
539
 
600
540
  # Some warning are only issued during codegen, and codegen only runs on cuda_0 in the MGPU case.
warp/tests/test_quat.py CHANGED
@@ -1205,7 +1205,6 @@ def test_quat_to_matrix(test, device, dtype, register_kernels=False):
1205
1205
 
1206
1206
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1207
1207
  quat = wp.types.quaternion(dtype=wptype)
1208
- mat3 = wp.types.matrix(shape=(3, 3), dtype=wptype)
1209
1208
  vec3 = wp.types.vector(length=3, dtype=wptype)
1210
1209
 
1211
1210
  def check_quat_to_matrix(
@@ -1239,7 +1238,7 @@ def test_quat_to_matrix(test, device, dtype, register_kernels=False):
1239
1238
  wptype(1),
1240
1239
  ),
1241
1240
  )
1242
- result_manual = mat3(xaxis, yaxis, zaxis)
1241
+ result_manual = wp.matrix_from_cols(xaxis, yaxis, zaxis)
1243
1242
 
1244
1243
  idx = 0
1245
1244
  for i in range(3):
@@ -1711,18 +1710,31 @@ def test_quat_rpy_grad(test, device, dtype, register_kernels=False):
1711
1710
  def test_quat_from_matrix(test, device, dtype, register_kernels=False):
1712
1711
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1713
1712
  mat33 = wp.types.matrix((3, 3), wptype)
1713
+ mat44 = wp.types.matrix((4, 4), wptype)
1714
1714
  quat = wp.types.quaternion(wptype)
1715
1715
 
1716
1716
  def quat_from_matrix(m: wp.array2d(dtype=wptype), loss: wp.array(dtype=wptype), idx: int):
1717
1717
  tid = wp.tid()
1718
1718
 
1719
- matrix = mat33(
1720
- m[tid, 0], m[tid, 1], m[tid, 2], m[tid, 3], m[tid, 4], m[tid, 5], m[tid, 6], m[tid, 7], m[tid, 8]
1719
+ # fmt: off
1720
+ m3 = mat33(
1721
+ m[tid, 0], m[tid, 1], m[tid, 2],
1722
+ m[tid, 3], m[tid, 4], m[tid, 5],
1723
+ m[tid, 6], m[tid, 7], m[tid, 8],
1721
1724
  )
1725
+ q1 = wp.quat_from_matrix(m3)
1722
1726
 
1723
- q = wp.quat_from_matrix(matrix)
1727
+ m4 = mat44(
1728
+ m[tid, 0], m[tid, 1], m[tid, 2], wptype(0.0),
1729
+ m[tid, 3], m[tid, 4], m[tid, 5], wptype(0.0),
1730
+ m[tid, 6], m[tid, 7], m[tid, 8], wptype(0.0),
1731
+ wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
1732
+ )
1733
+ q2 = wp.quat_from_matrix(m4)
1734
+ # fmt: on
1724
1735
 
1725
- wp.atomic_add(loss, 0, q[idx])
1736
+ wp.expect_eq(q1, q2)
1737
+ wp.atomic_add(loss, 0, q1[idx])
1726
1738
 
1727
1739
  def quat_from_matrix_forward(mats: wp.array2d(dtype=wptype), loss: wp.array(dtype=wptype), idx: int):
1728
1740
  tid = wp.tid()
@@ -1894,7 +1906,7 @@ def test_quat_identity(test, device, dtype, register_kernels=False):
1894
1906
  ############################################################
1895
1907
 
1896
1908
 
1897
- def test_quat_assign(test, device, dtype, register_kernels=False):
1909
+ def test_quat_assign_inplace(test, device, dtype, register_kernels=False):
1898
1910
  np_type = np.dtype(dtype)
1899
1911
  wp_type = wp.types.np_dtype_to_warp_type[np_type]
1900
1912
 
@@ -1915,16 +1927,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
1915
1927
  g = q[0] + wp_type(2.0) * q[1] + wp_type(3.0) * q[2] + wp_type(4.0) * q[3]
1916
1928
  x[tid] = g
1917
1929
 
1918
- def quattest_in_register_overwrite(x: wp.array(dtype=quat), a: wp.array(dtype=quat)):
1919
- tid = wp.tid()
1920
-
1921
- f = quat()
1922
- a_quat = a[tid]
1923
- f = a_quat
1924
- f[1] = wp_type(3.0)
1925
-
1926
- x[tid] = f
1927
-
1928
1930
  def quattest_component(x: wp.array(dtype=quat), y: wp.array(dtype=wp_type)):
1929
1931
  i = wp.tid()
1930
1932
 
@@ -1937,7 +1939,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
1937
1939
 
1938
1940
  kernel_read_write_store = getkernel(quattest_read_write_store, suffix=dtype.__name__)
1939
1941
  kernel_in_register = getkernel(quattest_in_register, suffix=dtype.__name__)
1940
- kernel_in_register_overwrite = getkernel(quattest_in_register_overwrite, suffix=dtype.__name__)
1941
1942
  kernel_component = getkernel(quattest_component, suffix=dtype.__name__)
1942
1943
 
1943
1944
  if register_kernels:
@@ -1973,7 +1974,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
1973
1974
  x = wp.zeros(1, dtype=quat, requires_grad=True)
1974
1975
  y = wp.ones(1, dtype=wp_type, requires_grad=True)
1975
1976
 
1976
- tape = wp.Tape()
1977
1977
  with tape:
1978
1978
  wp.launch(kernel_component, dim=1, inputs=[x, y])
1979
1979
 
@@ -1982,18 +1982,6 @@ def test_quat_assign(test, device, dtype, register_kernels=False):
1982
1982
  assert_np_equal(x.numpy(), np.array([[1.0, 2.0, 3.0, 4.0]], dtype=np_type))
1983
1983
  assert_np_equal(y.grad.numpy(), np.array([10.0], dtype=np_type))
1984
1984
 
1985
- x = wp.zeros(1, dtype=quat, device=device, requires_grad=True)
1986
- a = wp.ones(1, dtype=quat, device=device, requires_grad=True)
1987
-
1988
- tape = wp.Tape()
1989
- with tape:
1990
- wp.launch(kernel_in_register_overwrite, dim=1, inputs=[x, a], device=device)
1991
-
1992
- tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
1993
-
1994
- assert_np_equal(x.numpy(), np.array([[1.0, 3.0, 1.0, 1.0]], dtype=np_type))
1995
- assert_np_equal(a.grad.numpy(), np.array([[1.0, 0.0, 1.0, 1.0]], dtype=np_type))
1996
-
1997
1985
 
1998
1986
  ############################################################
1999
1987
 
@@ -2128,7 +2116,7 @@ def test_quat_len(test, device):
2128
2116
 
2129
2117
 
2130
2118
  @wp.kernel
2131
- def vector_augassign_kernel(
2119
+ def quat_augassign_kernel(
2132
2120
  a: wp.array(dtype=wp.quat), b: wp.array(dtype=wp.quat), c: wp.array(dtype=wp.quat), d: wp.array(dtype=wp.quat)
2133
2121
  ):
2134
2122
  i = wp.tid()
@@ -2146,26 +2134,26 @@ def vector_augassign_kernel(
2146
2134
  q3 = wp.quat()
2147
2135
  q4 = d[i]
2148
2136
 
2149
- q3[0] += q4[0]
2150
- q3[1] += q4[1]
2151
- q3[2] += q4[2]
2152
- q3[3] += q4[3]
2137
+ q3[0] -= q4[0]
2138
+ q3[1] -= q4[1]
2139
+ q3[2] -= q4[2]
2140
+ q3[3] -= q4[3]
2153
2141
 
2154
- c[i] = q1
2142
+ c[i] = q3
2155
2143
 
2156
2144
 
2157
- def test_vector_augassign(test, device):
2145
+ def test_quat_augassign(test, device):
2158
2146
  N = 3
2159
2147
 
2160
- a = wp.zeros(N, dtype=wp.quat, requires_grad=True)
2161
- b = wp.ones(N, dtype=wp.quat, requires_grad=True)
2148
+ a = wp.zeros(N, dtype=wp.quat, requires_grad=True, device=device)
2149
+ b = wp.ones(N, dtype=wp.quat, requires_grad=True, device=device)
2162
2150
 
2163
- c = wp.zeros(N, dtype=wp.quat, requires_grad=True)
2164
- d = wp.ones(N, dtype=wp.quat, requires_grad=True)
2151
+ c = wp.zeros(N, dtype=wp.quat, requires_grad=True, device=device)
2152
+ d = wp.ones(N, dtype=wp.quat, requires_grad=True, device=device)
2165
2153
 
2166
2154
  tape = wp.Tape()
2167
2155
  with tape:
2168
- wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
2156
+ wp.launch(quat_augassign_kernel, N, inputs=[a, b, c, d], device=device)
2169
2157
 
2170
2158
  tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
2171
2159
 
@@ -2178,6 +2166,38 @@ def test_vector_augassign(test, device):
2178
2166
  assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
2179
2167
 
2180
2168
 
2169
+ def test_quat_assign_copy(test, device):
2170
+ saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
2171
+ try:
2172
+ wp.config.enable_vector_component_overwrites = True
2173
+
2174
+ @wp.kernel
2175
+ def quat_in_register_overwrite(x: wp.array(dtype=wp.quat), a: wp.array(dtype=wp.quat)):
2176
+ tid = wp.tid()
2177
+
2178
+ f = wp.quat()
2179
+ a_quat = a[tid]
2180
+ f = a_quat
2181
+ f[1] = 3.0
2182
+
2183
+ x[tid] = f
2184
+
2185
+ x = wp.zeros(1, dtype=wp.quat, device=device, requires_grad=True)
2186
+ a = wp.ones(1, dtype=wp.quat, device=device, requires_grad=True)
2187
+
2188
+ tape = wp.Tape()
2189
+ with tape:
2190
+ wp.launch(quat_in_register_overwrite, dim=1, inputs=[x, a], device=device)
2191
+
2192
+ tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
2193
+
2194
+ assert_np_equal(x.numpy(), np.array([[1.0, 3.0, 1.0, 1.0]], dtype=float))
2195
+ assert_np_equal(a.grad.numpy(), np.array([[1.0, 0.0, 1.0, 1.0]], dtype=float))
2196
+
2197
+ finally:
2198
+ wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
2199
+
2200
+
2181
2201
  devices = get_test_devices()
2182
2202
 
2183
2203
 
@@ -2277,8 +2297,8 @@ for dtype in np_float_types:
2277
2297
  )
2278
2298
  add_function_test_register_kernel(
2279
2299
  TestQuat,
2280
- f"test_quat_assign_{dtype.__name__}",
2281
- test_quat_assign,
2300
+ f"test_quat_assign_inplace_{dtype.__name__}",
2301
+ test_quat_assign_inplace,
2282
2302
  devices=devices,
2283
2303
  dtype=dtype,
2284
2304
  )
@@ -2287,7 +2307,8 @@ for dtype in np_float_types:
2287
2307
  )
2288
2308
 
2289
2309
  add_function_test(TestQuat, "test_quat_len", test_quat_len, devices=devices)
2290
-
2310
+ add_function_test(TestQuat, "test_quat_augassign", test_quat_augassign, devices=devices)
2311
+ add_function_test(TestQuat, "test_quat_assign_copy", test_quat_assign_copy, devices=devices)
2291
2312
 
2292
2313
  if __name__ == "__main__":
2293
2314
  wp.clear_kernel_cache()
warp/tests/test_rand.py CHANGED
@@ -26,6 +26,8 @@ def test_kernel(
26
26
  kernel_seed: int,
27
27
  int_a: wp.array(dtype=int),
28
28
  int_ab: wp.array(dtype=int),
29
+ uint_a: wp.array(dtype=wp.uint32),
30
+ uint_ab: wp.array(dtype=wp.uint32),
29
31
  float_01: wp.array(dtype=float),
30
32
  float_ab: wp.array(dtype=float),
31
33
  ):
@@ -35,6 +37,8 @@ def test_kernel(
35
37
 
36
38
  int_a[tid] = wp.randi(state)
37
39
  int_ab[tid] = wp.randi(state, 0, 100)
40
+ uint_a[tid] = wp.randu(state)
41
+ uint_ab[tid] = wp.randu(state, wp.uint32(0), wp.uint32(100))
38
42
  float_01[tid] = wp.randf(state)
39
43
  float_ab[tid] = wp.randf(state, 0.0, 100.0)
40
44
 
@@ -42,37 +46,25 @@ def test_kernel(
42
46
  def test_rand(test, device):
43
47
  N = 10
44
48
 
45
- int_a_device = wp.zeros(N, dtype=int, device=device)
46
- int_a_host = wp.zeros(N, dtype=int, device="cpu")
47
- int_ab_device = wp.zeros(N, dtype=int, device=device)
48
- int_ab_host = wp.zeros(N, dtype=int, device="cpu")
49
+ int_a = wp.zeros(N, dtype=int, device=device)
50
+ int_ab = wp.zeros(N, dtype=int, device=device)
49
51
 
50
- float_01_device = wp.zeros(N, dtype=float, device=device)
51
- float_01_host = wp.zeros(N, dtype=float, device="cpu")
52
- float_ab_device = wp.zeros(N, dtype=float, device=device)
53
- float_ab_host = wp.zeros(N, dtype=float, device="cpu")
52
+ uint_a = wp.zeros(N, dtype=wp.uint32, device=device)
53
+ uint_ab = wp.zeros(N, dtype=wp.uint32, device=device)
54
+
55
+ float_01 = wp.zeros(N, dtype=float, device=device)
56
+ float_ab = wp.zeros(N, dtype=float, device=device)
54
57
 
55
58
  seed = 42
56
59
 
57
60
  wp.launch(
58
61
  kernel=test_kernel,
59
62
  dim=N,
60
- inputs=[seed, int_a_device, int_ab_device, float_01_device, float_ab_device],
63
+ inputs=[seed, int_a, int_ab, uint_a, uint_ab, float_01, float_ab],
61
64
  outputs=[],
62
65
  device=device,
63
66
  )
64
67
 
65
- wp.copy(int_a_host, int_a_device)
66
- wp.copy(int_ab_host, int_ab_device)
67
- wp.copy(float_01_host, float_01_device)
68
- wp.copy(float_ab_host, float_ab_device)
69
- wp.synchronize_device(device)
70
-
71
- int_a = int_a_host.numpy()
72
- int_ab = int_ab_host.numpy()
73
- float_01 = float_01_host.numpy()
74
- float_ab = float_ab_host.numpy()
75
-
76
68
  int_a_true = np.array(
77
69
  [
78
70
  -575632308,
@@ -88,32 +80,47 @@ def test_rand(test, device):
88
80
  ]
89
81
  )
90
82
  int_ab_true = np.array([46, 58, 46, 83, 85, 39, 72, 99, 18, 41])
83
+ uint_a_true = np.array(
84
+ [
85
+ 3133687854,
86
+ 3702303309,
87
+ 1235698096,
88
+ 3516599792,
89
+ 800302729,
90
+ 2620462179,
91
+ 2423739693,
92
+ 3024873594,
93
+ 2783682377,
94
+ 1188846332,
95
+ ]
96
+ )
97
+ uint_ab_true = np.array([6, 55, 2, 92, 55, 93, 65, 23, 48, 0])
91
98
  float_01_true = np.array(
92
99
  [
93
- 0.72961855,
94
- 0.86200964,
95
- 0.28770837,
96
- 0.8187722,
97
- 0.186335,
98
- 0.6101239,
99
- 0.56432086,
100
- 0.70428324,
101
- 0.64812654,
102
- 0.27679986,
100
+ 0.8265858,
101
+ 0.5874614,
102
+ 0.1508659,
103
+ 0.9498008,
104
+ 0.02531803,
105
+ 0.8520948,
106
+ 0.0001185536,
107
+ 0.4855958,
108
+ 0.06277305,
109
+ 0.2214079,
103
110
  ]
104
111
  )
105
112
  float_ab_true = np.array(
106
- [96.04259, 73.33809, 63.601555, 38.647305, 71.813896, 64.65809, 77.79791, 46.579605, 94.614456, 91.921814]
113
+ [79.84678, 76.362206, 32.135242, 99.70866, 70.45863, 20.6523, 45.164482, 55.583008, 76.60291, 35.36277]
107
114
  )
108
115
 
109
- test.assertTrue((int_a == int_a_true).all())
110
- test.assertTrue((int_ab == int_ab_true).all())
116
+ assert_np_equal(int_a.numpy(), int_a_true)
117
+ assert_np_equal(int_ab.numpy(), int_ab_true)
111
118
 
112
- err = np.max(np.abs(float_01 - float_01_true))
113
- test.assertTrue(err < 1e-04)
119
+ assert_np_equal(uint_a.numpy(), uint_a_true)
120
+ assert_np_equal(uint_ab.numpy(), uint_ab_true)
114
121
 
115
- err = np.max(np.abs(float_ab - float_ab_true))
116
- test.assertTrue(err < 1e-04)
122
+ assert_np_equal(float_01.numpy(), float_01_true, 1e-04)
123
+ assert_np_equal(float_ab.numpy(), float_ab_true, 1e-04)
117
124
 
118
125
 
119
126
  @wp.kernel
warp/tests/test_sparse.py CHANGED
@@ -19,10 +19,12 @@ import numpy as np
19
19
 
20
20
  import warp as wp
21
21
  from warp.sparse import (
22
+ bsr_assign,
22
23
  bsr_axpy,
23
24
  bsr_axpy_work_arrays,
24
25
  bsr_copy,
25
26
  bsr_diag,
27
+ bsr_from_triplets,
26
28
  bsr_get_diag,
27
29
  bsr_identity,
28
30
  bsr_mm,
@@ -232,18 +234,43 @@ def test_bsr_split_merge(test, device):
232
234
  with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
233
235
  bsr_copy(bsr, block_shape=(3, 3))
234
236
 
235
- with test.assertRaisesRegex(
236
- ValueError, r"Dest block shape \(5, 5\) is not an exact multiple of src block shape \(4, 2\)"
237
- ):
237
+ with test.assertRaisesRegex(ValueError, "Incompatible dest and src block shapes"):
238
238
  bsr_copy(bsr, block_shape=(5, 5))
239
239
 
240
240
  with test.assertRaisesRegex(
241
241
  ValueError,
242
- "The total rows and columns of the src matrix cannot be evenly divided using the requested block shape",
242
+ "The requested block shape does not evenly divide the source matrix",
243
243
  ):
244
244
  bsr_copy(bsr, block_shape=(32, 32))
245
245
 
246
246
 
247
+ def test_bsr_assign_masked(test, device):
248
+ rng = np.random.default_rng(123)
249
+
250
+ block_shape = (1, 2)
251
+ nrow = 16
252
+ ncol = 8
253
+ shape = (block_shape[0] * nrow, block_shape[1] * ncol)
254
+ n = 20
255
+
256
+ rows = wp.array(rng.integers(0, high=nrow, size=n, dtype=int), dtype=int, device=device)
257
+ cols = wp.array(rng.integers(0, high=ncol, size=n, dtype=int), dtype=int, device=device)
258
+ vals = wp.array(rng.random(size=(n, block_shape[0], block_shape[1])), dtype=float, device=device)
259
+
260
+ A = bsr_from_triplets(nrow, ncol, rows, cols, vals)
261
+
262
+ # Extract coarse diagonal with copy + diag funcs, for reference
263
+ A_coarse = bsr_copy(A, block_shape=(4, 4))
264
+ ref = _bsr_to_dense(bsr_diag(bsr_get_diag(A_coarse)))
265
+
266
+ # Extract coarse diagonal with masked assign (more memory efficient)
267
+ diag_masked = bsr_diag(rows_of_blocks=shape[0] // 4, block_type=A_coarse.dtype, device=device)
268
+ bsr_assign(src=A, dest=diag_masked, masked=True)
269
+ res = _bsr_to_dense(diag_masked)
270
+
271
+ assert_np_equal(res, ref, 0.0001)
272
+
273
+
247
274
  def make_test_bsr_transpose(block_shape, scalar_type):
248
275
  def test_bsr_transpose(test, device):
249
276
  rng = np.random.default_rng(123)
@@ -316,6 +343,12 @@ def make_test_bsr_axpy(block_shape, scalar_type):
316
343
  res = _bsr_to_dense(y)
317
344
  assert_np_equal(res, ref, 0.0001)
318
345
 
346
+ # test masked
347
+ y_mask = bsr_from_triplets(nrow, ncol, y.uncompress_rows()[:1], y.columns[:1], y.values[:1])
348
+ bsr_axpy(y, y_mask, masked=True)
349
+ assert y_mask.nnz_sync() == 1
350
+ assert_np_equal(y_mask.values.numpy(), 2.0 * y.values[:1].numpy(), 0.0001)
351
+
319
352
  # test incompatible shapes
320
353
  y.ncol = y.ncol + 1
321
354
  with test.assertRaisesRegex(ValueError, "Matrices must have the same number of rows and columns"):
@@ -383,6 +416,13 @@ def make_test_bsr_mm(block_shape, scalar_type):
383
416
  bsr_mm(x, y, z, alpha, beta, work_arrays=work_arrays, reuse_topology=True)
384
417
  assert_np_equal(res, ref, 0.0001)
385
418
 
419
+ # test masked mm
420
+ z = bsr_diag(rows_of_blocks=z.nrow, block_type=z.dtype, device=z.device)
421
+ bsr_mm(x, y, z, masked=True)
422
+ res = _bsr_to_dense(z)
423
+ ref = _bsr_to_dense(bsr_diag(bsr_get_diag(x @ y)))
424
+ assert_np_equal(res, ref, 0.0001)
425
+
386
426
  # using overloaded operators
387
427
  x = (alpha * x) @ y
388
428
  assert_np_equal(res, ref, 0.0001)
@@ -479,12 +519,12 @@ def make_test_bsr_mv(block_shape, scalar_type):
479
519
  assert_np_equal(res, ref, 0.0001)
480
520
 
481
521
  A.ncol = A.ncol + 1
482
- with test.assertRaisesRegex(ValueError, "Number of columns"):
522
+ with test.assertRaisesRegex(ValueError, "Incompatible 'x'"):
483
523
  bsr_mv(A, x, y)
484
524
 
485
525
  A.ncol = A.ncol - 1
486
526
  A.nrow = A.nrow - 1
487
- with test.assertRaisesRegex(ValueError, "Number of rows"):
527
+ with test.assertRaisesRegex(ValueError, "Incompatible 'y'"):
488
528
  bsr_mv(A, x, y)
489
529
 
490
530
  return test_bsr_mv
@@ -518,6 +558,7 @@ add_function_test(TestSparse, "test_csr_from_triplets", test_csr_from_triplets,
518
558
  add_function_test(TestSparse, "test_bsr_from_triplets", test_bsr_from_triplets, devices=devices)
519
559
  add_function_test(TestSparse, "test_bsr_get_diag", test_bsr_get_set_diag, devices=devices)
520
560
  add_function_test(TestSparse, "test_bsr_split_merge", test_bsr_split_merge, devices=devices)
561
+ add_function_test(TestSparse, "test_bsr_assign_masked", test_bsr_assign_masked, devices=devices)
521
562
 
522
563
  add_function_test(TestSparse, "test_csr_transpose", make_test_bsr_transpose((1, 1), wp.float32), devices=devices)
523
564
  add_function_test(TestSparse, "test_bsr_transpose_1_3", make_test_bsr_transpose((1, 3), wp.float32), devices=devices)
@@ -1969,6 +1969,67 @@ def test_transform_anon_type_instance(test, device, dtype, register_kernels=Fals
1969
1969
  tape.zero()
1970
1970
 
1971
1971
 
1972
+ def test_transform_from_matrix(test, device, dtype, register_kernels=False):
1973
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1974
+ mat44 = wp.types.matrix((4, 4), wptype)
1975
+ vec3 = wp.types.vector(3, wptype)
1976
+ quat = wp.types.quaternion(wptype)
1977
+
1978
+ def transform_from_matrix_kernel():
1979
+ # fmt: off
1980
+ m = mat44(
1981
+ wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
1982
+ wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
1983
+ wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
1984
+ wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
1985
+ )
1986
+ # fmt: on
1987
+ t = wp.transform_from_matrix(m)
1988
+ p = wp.transform_get_translation(t)
1989
+ q = wp.transform_get_rotation(t)
1990
+ wp.expect_near(p, vec3(wptype(1.0), wptype(2.0), wptype(3.0)), tolerance=wptype(1e-3))
1991
+ wp.expect_near(q, quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8)), tolerance=wptype(1e-3))
1992
+
1993
+ kernel = getkernel(transform_from_matrix_kernel, suffix=dtype.__name__)
1994
+
1995
+ if register_kernels:
1996
+ return
1997
+
1998
+ wp.launch(kernel, dim=1, device=device)
1999
+
2000
+
2001
+ def test_transform_to_matrix(test, device, dtype, register_kernels=False):
2002
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
2003
+ mat44 = wp.types.matrix((4, 4), wptype)
2004
+ vec3 = wp.types.vector(3, wptype)
2005
+ quat = wp.types.quaternion(wptype)
2006
+
2007
+ def transform_to_matrix_kernel():
2008
+ p = vec3(wptype(1.0), wptype(2.0), wptype(3.0))
2009
+ q = quat(wptype(-0.4), wptype(0.2), wptype(-0.4), wptype(0.8))
2010
+ t = wp.transformation(p, q)
2011
+ m = wp.transform_to_matrix(t)
2012
+ # fmt: off
2013
+ wp.expect_near(
2014
+ m,
2015
+ mat44(
2016
+ wptype(0.6), wptype(0.48), wptype(0.64), wptype(1.0),
2017
+ wptype(-0.8), wptype(0.36), wptype(0.48), wptype(2.0),
2018
+ wptype(0.0), wptype(-0.8), wptype(0.6), wptype(3.0),
2019
+ wptype(0.0), wptype(0.0), wptype(0.0), wptype(1.0),
2020
+ ),
2021
+ tolerance=wptype(1e-3),
2022
+ )
2023
+ # fmt: on
2024
+
2025
+ kernel = getkernel(transform_to_matrix_kernel, suffix=dtype.__name__)
2026
+
2027
+ if register_kernels:
2028
+ return
2029
+
2030
+ wp.launch(kernel, dim=1, device=device)
2031
+
2032
+
1972
2033
  devices = get_test_devices()
1973
2034
 
1974
2035
 
@@ -2145,6 +2206,20 @@ for dtype in np_float_types:
2145
2206
  add_function_test_register_kernel(
2146
2207
  TestSpatial, f"test_spatial_adjoint_{dtype.__name__}", test_spatial_adjoint, devices=devices, dtype=dtype
2147
2208
  )
2209
+ add_function_test_register_kernel(
2210
+ TestSpatial,
2211
+ f"test_transform_from_matrix_{dtype.__name__}",
2212
+ test_transform_from_matrix,
2213
+ devices=devices,
2214
+ dtype=dtype,
2215
+ )
2216
+ add_function_test_register_kernel(
2217
+ TestSpatial,
2218
+ f"test_transform_to_matrix_{dtype.__name__}",
2219
+ test_transform_to_matrix,
2220
+ devices=devices,
2221
+ dtype=dtype,
2222
+ )
2148
2223
 
2149
2224
  # \TODO: test spatial_mass and spatial_jacobian
2150
2225
 
warp/tests/test_static.py CHANGED
@@ -307,7 +307,7 @@ def test_function_lookup(test, device):
307
307
 
308
308
  def count_ssa_occurrences(kernel: wp.Kernel, ssas: List[str]) -> Dict[str, int]:
309
309
  # analyze the generated code
310
- counts = {ssa: 0 for ssa in ssas}
310
+ counts = dict.fromkeys(ssas, 0)
311
311
  for line in kernel.adj.blocks[0].body_forward:
312
312
  for ssa in ssas:
313
313
  if ssa in line: