warp-lang 1.6.2__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show
  1. warp/__init__.py +7 -1
  2. warp/bin/warp-clang.dll +0 -0
  3. warp/bin/warp.dll +0 -0
  4. warp/build.py +410 -0
  5. warp/build_dll.py +6 -14
  6. warp/builtins.py +452 -362
  7. warp/codegen.py +179 -119
  8. warp/config.py +42 -6
  9. warp/context.py +490 -271
  10. warp/dlpack.py +8 -6
  11. warp/examples/assets/nonuniform.usd +0 -0
  12. warp/examples/assets/nvidia_logo.png +0 -0
  13. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  14. warp/examples/core/example_sample_mesh.py +300 -0
  15. warp/examples/fem/example_apic_fluid.py +1 -1
  16. warp/examples/fem/example_burgers.py +2 -2
  17. warp/examples/fem/example_deformed_geometry.py +1 -1
  18. warp/examples/fem/example_distortion_energy.py +1 -1
  19. warp/examples/fem/example_magnetostatics.py +6 -6
  20. warp/examples/fem/utils.py +9 -3
  21. warp/examples/interop/example_jax_callable.py +116 -0
  22. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  23. warp/examples/interop/example_jax_kernel.py +205 -0
  24. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  25. warp/examples/tile/example_tile_matmul.py +2 -4
  26. warp/fem/__init__.py +11 -1
  27. warp/fem/adaptivity.py +4 -4
  28. warp/fem/field/nodal_field.py +22 -68
  29. warp/fem/field/virtual.py +62 -23
  30. warp/fem/geometry/adaptive_nanogrid.py +9 -10
  31. warp/fem/geometry/closest_point.py +1 -1
  32. warp/fem/geometry/deformed_geometry.py +5 -2
  33. warp/fem/geometry/geometry.py +5 -0
  34. warp/fem/geometry/grid_2d.py +12 -12
  35. warp/fem/geometry/grid_3d.py +12 -15
  36. warp/fem/geometry/hexmesh.py +5 -7
  37. warp/fem/geometry/nanogrid.py +9 -11
  38. warp/fem/geometry/quadmesh.py +13 -13
  39. warp/fem/geometry/tetmesh.py +3 -4
  40. warp/fem/geometry/trimesh.py +3 -8
  41. warp/fem/integrate.py +262 -93
  42. warp/fem/linalg.py +5 -5
  43. warp/fem/quadrature/pic_quadrature.py +37 -22
  44. warp/fem/quadrature/quadrature.py +194 -25
  45. warp/fem/space/__init__.py +1 -1
  46. warp/fem/space/basis_function_space.py +4 -2
  47. warp/fem/space/basis_space.py +25 -18
  48. warp/fem/space/hexmesh_function_space.py +2 -2
  49. warp/fem/space/partition.py +6 -2
  50. warp/fem/space/quadmesh_function_space.py +8 -8
  51. warp/fem/space/shape/cube_shape_function.py +23 -23
  52. warp/fem/space/shape/square_shape_function.py +12 -12
  53. warp/fem/space/shape/triangle_shape_function.py +1 -1
  54. warp/fem/space/tetmesh_function_space.py +3 -3
  55. warp/fem/space/trimesh_function_space.py +2 -2
  56. warp/fem/utils.py +12 -6
  57. warp/jax.py +14 -1
  58. warp/jax_experimental/__init__.py +16 -0
  59. warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
  60. warp/jax_experimental/ffi.py +698 -0
  61. warp/jax_experimental/xla_ffi.py +602 -0
  62. warp/math.py +89 -0
  63. warp/native/array.h +13 -0
  64. warp/native/builtin.h +29 -3
  65. warp/native/bvh.cpp +3 -1
  66. warp/native/bvh.cu +42 -14
  67. warp/native/bvh.h +2 -1
  68. warp/native/clang/clang.cpp +30 -3
  69. warp/native/cuda_util.cpp +14 -0
  70. warp/native/cuda_util.h +2 -0
  71. warp/native/exports.h +68 -63
  72. warp/native/intersect.h +26 -26
  73. warp/native/intersect_adj.h +33 -33
  74. warp/native/marching.cu +1 -1
  75. warp/native/mat.h +513 -9
  76. warp/native/mesh.h +10 -10
  77. warp/native/quat.h +99 -11
  78. warp/native/rand.h +6 -0
  79. warp/native/sort.cpp +122 -59
  80. warp/native/sort.cu +152 -15
  81. warp/native/sort.h +8 -1
  82. warp/native/sparse.cpp +43 -22
  83. warp/native/sparse.cu +52 -17
  84. warp/native/svd.h +116 -0
  85. warp/native/tile.h +301 -105
  86. warp/native/tile_reduce.h +46 -3
  87. warp/native/vec.h +68 -7
  88. warp/native/volume.cpp +85 -113
  89. warp/native/volume_builder.cu +25 -10
  90. warp/native/volume_builder.h +6 -0
  91. warp/native/warp.cpp +5 -6
  92. warp/native/warp.cu +99 -10
  93. warp/native/warp.h +19 -10
  94. warp/optim/linear.py +10 -10
  95. warp/sim/articulation.py +4 -4
  96. warp/sim/collide.py +21 -10
  97. warp/sim/import_mjcf.py +449 -155
  98. warp/sim/import_urdf.py +32 -12
  99. warp/sim/integrator_euler.py +5 -5
  100. warp/sim/integrator_featherstone.py +3 -10
  101. warp/sim/integrator_vbd.py +207 -2
  102. warp/sim/integrator_xpbd.py +5 -5
  103. warp/sim/model.py +42 -13
  104. warp/sim/utils.py +2 -2
  105. warp/sparse.py +642 -555
  106. warp/stubs.py +216 -19
  107. warp/tests/__main__.py +0 -15
  108. warp/tests/cuda/__init__.py +0 -0
  109. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  110. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  111. warp/tests/geometry/__init__.py +0 -0
  112. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  113. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  114. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  115. warp/tests/interop/__init__.py +0 -0
  116. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  117. warp/tests/sim/__init__.py +0 -0
  118. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  119. warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
  120. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  121. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  122. warp/tests/sim/test_vbd.py +597 -0
  123. warp/tests/test_bool.py +1 -1
  124. warp/tests/test_examples.py +28 -36
  125. warp/tests/test_fem.py +23 -4
  126. warp/tests/test_linear_solvers.py +0 -11
  127. warp/tests/test_mat.py +233 -79
  128. warp/tests/test_mat_scalar_ops.py +4 -4
  129. warp/tests/test_overwrite.py +0 -60
  130. warp/tests/test_quat.py +67 -46
  131. warp/tests/test_rand.py +44 -37
  132. warp/tests/test_sparse.py +47 -6
  133. warp/tests/test_spatial.py +75 -0
  134. warp/tests/test_static.py +1 -1
  135. warp/tests/test_utils.py +84 -4
  136. warp/tests/test_vec.py +46 -34
  137. warp/tests/tile/__init__.py +0 -0
  138. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  139. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
  140. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  141. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  142. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  143. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  144. warp/tests/unittest_serial.py +1 -0
  145. warp/tests/unittest_suites.py +45 -59
  146. warp/tests/unittest_utils.py +2 -1
  147. warp/thirdparty/unittest_parallel.py +3 -1
  148. warp/types.py +110 -658
  149. warp/utils.py +137 -72
  150. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
  151. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
  152. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  153. warp/examples/optim/example_walker.py +0 -317
  154. warp/native/cutlass_gemm.cpp +0 -43
  155. warp/native/cutlass_gemm.cu +0 -382
  156. warp/tests/test_matmul.py +0 -511
  157. warp/tests/test_matmul_lite.py +0 -411
  158. warp/tests/test_vbd.py +0 -386
  159. warp/tests/unused_test_misc.py +0 -77
  160. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  161. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  162. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  163. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  164. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  165. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  166. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  167. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  168. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  169. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  170. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  171. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  172. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  173. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  174. /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
  175. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  176. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  177. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  178. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
  179. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
@@ -25,15 +25,14 @@ Generally the test_options[_cpu,_cuda] dictionaries should be used to prevent
25
25
  graphical windows from being open by the example {"headless": True} and to
26
26
  override example defaults so the example can run in less than ten seconds.
27
27
 
28
- Use {"usd_required": True} and {"torch_required": True} to skip running the test
29
- if usd-core or torch are not found in the Python environment.
30
-
31
- Use "cutlass_required": True} to skip the test if Warp needs to be built with
32
- CUTLASS.
28
+ To skip tests if the optional dependencies are not found, use the following keys:
29
+ - {"usd_required": True} (requires usd-core)
30
+ - {"torch_required": True} (requires torch)
31
+ - {"pillow_required": True} (requires pillow)
33
32
 
34
33
  Use the "num_frames" and "train_iters" keys to control the number of steps.
35
34
 
36
- Use "test_timeout" to override the default test timeout threshold of 300 seconds.
35
+ Use "test_timeout" to override the default test timeout threshold of 600 seconds.
37
36
  """
38
37
 
39
38
  import os
@@ -52,7 +51,7 @@ from warp.tests.unittest_utils import (
52
51
  )
53
52
  from warp.utils import check_p2p
54
53
 
55
- wp.init() # For wp.context.runtime.core.is_cutlass_enabled()
54
+ wp.init() # For wp.context.runtime.core.is_debug_enabled()
56
55
 
57
56
 
58
57
  def _build_command_line_options(test_options: Dict[str, Any]) -> list:
@@ -119,9 +118,13 @@ def add_example_test(
119
118
  if usd_required and not USD_AVAILABLE:
120
119
  test.skipTest("Requires usd-core")
121
120
 
122
- cutlass_required = options.pop("cutlass_required", False)
123
- if cutlass_required and not wp.context.runtime.core.is_cutlass_enabled():
124
- test.skipTest("Warp was not built with CUTLASS support")
121
+ # Mark the test as skipped if pillow is not installed but required
122
+ pillow_required = options.pop("pillow_required", False)
123
+ if pillow_required:
124
+ try:
125
+ import PIL # noqa: F401
126
+ except ImportError:
127
+ test.skipTest("Requires pillow")
125
128
 
126
129
  # Find the current Warp cache
127
130
  warp_cache_path = wp.config.kernel_cache_dir
@@ -169,7 +172,7 @@ def add_example_test(
169
172
  command.extend(_build_command_line_options(options))
170
173
 
171
174
  # Set the test timeout in seconds
172
- test_timeout = options.pop("test_timeout", 300)
175
+ test_timeout = options.pop("test_timeout", 600)
173
176
 
174
177
  # with wp.ScopedTimer(f"{name}_{sanitize_identifier(device)}"):
175
178
  # Run the script as a subprocess
@@ -242,19 +245,23 @@ add_example_test(
242
245
  devices=test_devices,
243
246
  test_options={"height": 512, "width": 1024, "headless": True},
244
247
  )
248
+ add_example_test(
249
+ TestCoreExamples,
250
+ name="core.example_sample_mesh",
251
+ devices=test_devices,
252
+ test_options_cpu={"num_frames": 1},
253
+ )
245
254
  add_example_test(
246
255
  TestCoreExamples,
247
256
  name="core.example_sph",
248
257
  devices=test_devices,
249
258
  test_options_cpu={"num_frames": 1},
250
- test_options_cuda={"test_timeout": 600},
251
259
  )
252
260
  add_example_test(
253
261
  TestCoreExamples,
254
262
  name="core.example_torch",
255
263
  devices=test_devices,
256
264
  test_options={"headless": True, "num_frames": 1000, "torch_required": True},
257
- test_options_cpu={"test_timeout": 600},
258
265
  )
259
266
  add_example_test(TestCoreExamples, name="core.example_wave", devices=test_devices)
260
267
 
@@ -268,7 +275,6 @@ add_example_test(
268
275
  name="optim.example_bounce",
269
276
  devices=test_devices,
270
277
  test_options_cpu={"train_iters": 3},
271
- test_options_cuda={"test_timeout": 600},
272
278
  )
273
279
  add_example_test(
274
280
  TestOptimExamples,
@@ -281,7 +287,6 @@ add_example_test(
281
287
  TestOptimExamples,
282
288
  name="optim.example_cloth_throw",
283
289
  devices=test_devices,
284
- test_options={"test_timeout": 600},
285
290
  test_options_cpu={"train_iters": 3},
286
291
  )
287
292
  add_example_test(
@@ -291,6 +296,12 @@ add_example_test(
291
296
  test_options={"usd_required": True, "headless": True},
292
297
  test_options_cpu={"train_iters": 2},
293
298
  )
299
+ add_example_test(
300
+ TestOptimExamples,
301
+ name="optim.example_fluid_checkpoint",
302
+ devices=cuda_test_devices,
303
+ test_options={"headless": True, "train_iters": 5, "num_frames": 300, "pillow_required": True},
304
+ )
294
305
  add_example_test(TestOptimExamples, name="optim.example_inverse_kinematics", devices=test_devices)
295
306
  add_example_test(
296
307
  TestOptimExamples,
@@ -305,19 +316,6 @@ add_example_test(
305
316
  devices=test_devices,
306
317
  test_options={"headless": True, "train_iters": 50},
307
318
  )
308
- # NOTE: This example uses CUTLASS and will run orders of magnitude slower when Warp is built in debug mode
309
- add_example_test(
310
- TestOptimExamples,
311
- name="optim.example_walker",
312
- devices=test_devices,
313
- test_options={"usd_required": True},
314
- test_options_cuda={
315
- "train_iters": 1 if warp.context.runtime.core.is_debug_enabled() else 3,
316
- "num_frames": 1 if warp.context.runtime.core.is_debug_enabled() else 60,
317
- "cutlass_required": True,
318
- },
319
- test_options_cpu={"train_iters": 1, "num_frames": 30},
320
- )
321
319
  add_example_test(
322
320
  TestOptimExamples,
323
321
  name="optim.example_softbody_properties",
@@ -333,15 +331,13 @@ class TestSimExamples(unittest.TestCase):
333
331
  pass
334
332
 
335
333
 
336
- add_example_test(
337
- TestSimExamples, name="sim.example_cartpole", devices=test_devices, test_options_cuda={"test_timeout": 600}
338
- )
334
+ add_example_test(TestSimExamples, name="sim.example_cartpole", devices=test_devices)
339
335
  add_example_test(
340
336
  TestSimExamples,
341
337
  name="sim.example_cloth",
342
338
  devices=test_devices,
343
339
  test_options={"usd_required": True},
344
- test_options_cpu={"num_frames": 10, "test_timeout": 600},
340
+ test_options_cpu={"num_frames": 10},
345
341
  )
346
342
  add_example_test(
347
343
  TestSimExamples, name="sim.example_granular", devices=test_devices, test_options_cpu={"num_frames": 10}
@@ -421,28 +417,24 @@ add_example_test(
421
417
  name="fem.example_convection_diffusion",
422
418
  devices=test_devices,
423
419
  test_options={"resolution": 20, "headless": True},
424
- test_options_cpu={"test_timeout": 600},
425
420
  )
426
421
  add_example_test(
427
422
  TestFemExamples,
428
423
  name="fem.example_burgers",
429
424
  devices=test_devices,
430
425
  test_options={"resolution": 20, "num_frames": 25, "degree": 1, "headless": True},
431
- test_options_cpu={"test_timeout": 600},
432
426
  )
433
427
  add_example_test(
434
428
  TestFemExamples,
435
429
  name="fem.example_convection_diffusion_dg",
436
430
  devices=test_devices,
437
431
  test_options={"resolution": 20, "num_frames": 25, "headless": True},
438
- test_options_cpu={"test_timeout": 600},
439
432
  )
440
433
  add_example_test(
441
434
  TestFemExamples,
442
435
  name="fem.example_mixed_elasticity",
443
436
  devices=test_devices,
444
437
  test_options={"nonconforming_stresses": True, "mesh": "quad", "headless": True},
445
- test_options_cpu={"test_timeout": 600},
446
438
  )
447
439
  add_example_test(
448
440
  TestFemExamples, name="fem.example_stokes_transfer", devices=test_devices, test_options={"headless": True}
warp/tests/test_fem.py CHANGED
@@ -33,6 +33,7 @@ from warp.fem.utils import (
33
33
  grid_to_tets,
34
34
  grid_to_tris,
35
35
  )
36
+ from warp.sparse import bsr_zeros
36
37
  from warp.tests.unittest_utils import *
37
38
 
38
39
  vec6f = wp.vec(length=6, dtype=float)
@@ -147,11 +148,12 @@ def test_interpolate_gradient(test, device):
147
148
  scalar_space = fem.make_polynomial_space(geo, degree=2)
148
149
 
149
150
  # Point-based vector space
150
- # So we can test gradient with respect to inteprolation point position
151
+ # So we can test gradient with respect to interpolation point position
151
152
  point_coords = wp.array([[[0.5, 0.5, 0.0]]], dtype=fem.Coords, requires_grad=True)
152
- interpolation_nodes = fem.PointBasisSpace(
153
- fem.ExplicitQuadrature(domain=fem.Cells(geo), points=point_coords, weights=wp.array([[1.0]], dtype=float))
153
+ point_quadrature = fem.ExplicitQuadrature(
154
+ domain=fem.Cells(geo), points=point_coords, weights=wp.array([[1.0]], dtype=float)
154
155
  )
156
+ interpolation_nodes = fem.PointBasisSpace(point_quadrature)
155
157
  vector_space = fem.make_collocated_function_space(interpolation_nodes, dtype=wp.vec2)
156
158
 
157
159
  # Initialize scalar field with known function
@@ -213,6 +215,23 @@ def test_interpolate_gradient(test, device):
213
215
  )
214
216
  assert_np_equal(point_coords.grad.numpy(), np.array([[[2.0, 0.0, 0.0]]]))
215
217
 
218
+ # Compare against jacobian
219
+ scalar_trial = fem.make_trial(scalar_space)
220
+ jacobian = bsr_zeros(
221
+ rows_of_blocks=point_quadrature.total_point_count(),
222
+ cols_of_blocks=scalar_space.node_count(),
223
+ block_type=wp.mat(shape=(2, 1), dtype=float),
224
+ )
225
+ fem.interpolate(
226
+ grad_field,
227
+ dest=jacobian,
228
+ quadrature=point_quadrature,
229
+ fields={"p": scalar_trial},
230
+ kernel_options={"enable_backward": False},
231
+ )
232
+ assert jacobian.nnz_sync() == 4 # one non-zero per edge center
233
+ assert_np_equal((jacobian @ scalar_field.dof_values.grad).numpy(), [[0.0, 0.5]])
234
+
216
235
 
217
236
  @integrand
218
237
  def vector_divergence_form(s: Sample, u: Field, q: Field):
@@ -1868,7 +1887,7 @@ def test_qr_eigenvalues():
1868
1887
  wp.expect_near(wp.ddot(Err4, Err4), 0.0, tol)
1869
1888
 
1870
1889
  # test robustness to low requested tolerance
1871
- Rank6 = mat66f(
1890
+ Rank6 = wp.matrix_from_cols(
1872
1891
  vec6f(0.00171076, 0.0, 0.0, 0.0, 0.0, 0.0),
1873
1892
  vec6f(0.0, 0.00169935, 6.14367e-06, -3.52589e-05, 3.02397e-05, -1.53458e-11),
1874
1893
  vec6f(0.0, 6.14368e-06, 0.00172217, 2.03568e-05, 1.74589e-05, -2.92627e-05),
@@ -21,8 +21,6 @@ import warp as wp
21
21
  from warp.optim.linear import bicgstab, cg, cr, gmres, preconditioner
22
22
  from warp.tests.unittest_utils import *
23
23
 
24
- wp.init() # For runtime.core.is_cutlass_enabled()
25
-
26
24
 
27
25
  def _check_linear_solve(test, A, b, func, *args, **kwargs):
28
26
  # test from zero
@@ -185,15 +183,6 @@ class TestLinearSolvers(unittest.TestCase):
185
183
 
186
184
  devices = get_test_devices()
187
185
 
188
- if not wp.context.runtime.core.is_cutlass_enabled():
189
- devices = [d for d in devices if not d.is_cuda]
190
- print("Skipping CUDA linear solver tests because CUTLASS is not supported in this build")
191
-
192
- if wp.context.runtime.core.is_debug_enabled():
193
- # cutlass-based matmul is *very* slow in debug mode -- skip
194
- devices = [d for d in devices if not d.is_cuda]
195
- print("Skipping CUDA linear solver tests in debug mode")
196
-
197
186
  add_function_test(TestLinearSolvers, "test_cg", test_cg, devices=devices)
198
187
  add_function_test(TestLinearSolvers, "test_cr", test_cr, devices=devices)
199
188
  add_function_test(TestLinearSolvers, "test_bicgstab", test_bicgstab, devices=devices)
warp/tests/test_mat.py CHANGED
@@ -127,30 +127,6 @@ def test_tpl_constructor_error_incompatible_sizes(test, device):
127
127
  wp.launch(kernel, dim=1, inputs=[], device=device)
128
128
 
129
129
 
130
- def test_tpl_constructor_error_invalid_vector_count(test, device):
131
- @wp.kernel
132
- def kernel():
133
- wp.mat33(wp.vec3(1.0, 2.0, 3.0), wp.vec3(1.0, 2.0, 3.0))
134
-
135
- with test.assertRaisesRegex(
136
- RuntimeError,
137
- r"incompatible number of column vectors given \(2\) when constructing a matrix of shape \(3, 3\)$",
138
- ):
139
- wp.launch(kernel, dim=1, inputs=[], device=device)
140
-
141
-
142
- def test_tpl_constructor_error_invalid_vector_shape(test, device):
143
- @wp.kernel
144
- def kernel():
145
- wp.mat22(wp.vec3(1.0, 2.0, 3.0), wp.vec3(4.0, 5.0, 6.0))
146
-
147
- with test.assertRaisesRegex(
148
- RuntimeError,
149
- r"incompatible column vector lengths given when constructing a matrix of shape \(2, 2\)$",
150
- ):
151
- wp.launch(kernel, dim=1, inputs=[], device=device)
152
-
153
-
154
130
  def test_tpl_constructor_error_invalid_arg_count(test, device):
155
131
  @wp.kernel
156
132
  def kernel():
@@ -234,7 +210,7 @@ def test_quat_constructor(test, device, dtype, register_kernels=False):
234
210
  c0 = s[0][0] * R[0]
235
211
  c1 = s[0][1] * R[1]
236
212
  c2 = s[0][2] * R[2]
237
- m_alt = mat44(
213
+ m_alt = wp.matrix_from_cols(
238
214
  vec4(c0[0], c0[1], c0[2], wptype(0.0)),
239
215
  vec4(c1[0], c1[1], c1[2], wptype(0.0)),
240
216
  vec4(c2[0], c2[1], c2[2], wptype(0.0)),
@@ -1066,6 +1042,124 @@ def test_svd(test, device, dtype, register_kernels=False):
1066
1042
  assert_np_equal((plusval - minusval) / (2 * dx), m3grads[ii, jj], tol=fdtol)
1067
1043
 
1068
1044
 
1045
+ def test_svd_2D(test, device, dtype, register_kernels=False):
1046
+ rng = np.random.default_rng(123)
1047
+
1048
+ tol = {
1049
+ np.float16: 1.0e-3,
1050
+ np.float32: 1.0e-6,
1051
+ np.float64: 1.0e-12,
1052
+ }.get(dtype, 0)
1053
+
1054
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
1055
+ vec2 = wp.types.vector(length=2, dtype=wptype)
1056
+ mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
1057
+
1058
+ def check_mat_svd2(
1059
+ m2: wp.array(dtype=mat22),
1060
+ Uout: wp.array(dtype=mat22),
1061
+ sigmaout: wp.array(dtype=vec2),
1062
+ Vout: wp.array(dtype=mat22),
1063
+ outcomponents: wp.array(dtype=wptype),
1064
+ ):
1065
+ U = mat22()
1066
+ sigma = vec2()
1067
+ V = mat22()
1068
+
1069
+ wp.svd2(m2[0], U, sigma, V) # Assuming there's a 2D SVD kernel
1070
+
1071
+ Uout[0] = U
1072
+ sigmaout[0] = sigma
1073
+ Vout[0] = V
1074
+
1075
+ # multiply outputs by 2 so we've got something to backpropagate:
1076
+ idx = 0
1077
+ for i in range(2):
1078
+ for j in range(2):
1079
+ outcomponents[idx] = wptype(2) * U[i, j]
1080
+ idx = idx + 1
1081
+
1082
+ for i in range(2):
1083
+ outcomponents[idx] = wptype(2) * sigma[i]
1084
+ idx = idx + 1
1085
+
1086
+ for i in range(2):
1087
+ for j in range(2):
1088
+ outcomponents[idx] = wptype(2) * V[i, j]
1089
+ idx = idx + 1
1090
+
1091
+ kernel = getkernel(check_mat_svd2, suffix=dtype.__name__)
1092
+
1093
+ output_select_kernel = get_select_kernel(wptype)
1094
+
1095
+ if register_kernels:
1096
+ return
1097
+
1098
+ m2 = wp.array(randvals(rng, [1, 2, 2], dtype) + np.eye(2), dtype=mat22, requires_grad=True, device=device)
1099
+
1100
+ outcomponents = wp.zeros(2 * 2 * 2 + 2, dtype=wptype, requires_grad=True, device=device)
1101
+ Uout = wp.zeros(1, dtype=mat22, requires_grad=True, device=device)
1102
+ sigmaout = wp.zeros(1, dtype=vec2, requires_grad=True, device=device)
1103
+ Vout = wp.zeros(1, dtype=mat22, requires_grad=True, device=device)
1104
+
1105
+ wp.launch(kernel, dim=1, inputs=[m2], outputs=[Uout, sigmaout, Vout, outcomponents], device=device)
1106
+
1107
+ Uout_np = Uout.numpy()[0].astype(np.float64)
1108
+ sigmaout_np = np.diag(sigmaout.numpy()[0].astype(np.float64))
1109
+ Vout_np = Vout.numpy()[0].astype(np.float64)
1110
+
1111
+ assert_np_equal(
1112
+ np.matmul(Uout_np, np.matmul(sigmaout_np, Vout_np.T)), m2.numpy()[0].astype(np.float64), tol=30 * tol
1113
+ )
1114
+
1115
+ if dtype == np.float16:
1116
+ # Skip gradient check for float16 due to rounding errors
1117
+ return
1118
+
1119
+ # Check gradients:
1120
+ out = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
1121
+ idx = 0
1122
+ for idx in range(2 * 2 + 2 + 2 * 2):
1123
+ tape = wp.Tape()
1124
+ with tape:
1125
+ wp.launch(kernel, dim=1, inputs=[m2], outputs=[Uout, sigmaout, Vout, outcomponents], device=device)
1126
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1127
+ tape.backward(out)
1128
+ m2grads = 1.0 * tape.gradients[m2].numpy()[0]
1129
+
1130
+ tape.zero()
1131
+
1132
+ dx = 0.0001
1133
+ fdtol = 5.0e-4 if dtype == np.float64 else 2.0e-2
1134
+ for ii in range(2):
1135
+ for jj in range(2):
1136
+ m2test = 1.0 * m2.numpy()
1137
+ m2test[0, ii, jj] += dx
1138
+ wp.launch(
1139
+ kernel,
1140
+ dim=1,
1141
+ inputs=[wp.array(m2test, dtype=mat22, device=device)],
1142
+ outputs=[Uout, sigmaout, Vout, outcomponents],
1143
+ device=device,
1144
+ )
1145
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1146
+ plusval = out.numpy()[0]
1147
+
1148
+ m2test = 1.0 * m2.numpy()
1149
+ m2test[0, ii, jj] -= dx
1150
+ wp.launch(
1151
+ kernel,
1152
+ dim=1,
1153
+ inputs=[wp.array(m2test, dtype=mat22, device=device)],
1154
+ outputs=[Uout, sigmaout, Vout, outcomponents],
1155
+ device=device,
1156
+ )
1157
+ wp.launch(output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device)
1158
+ minusval = out.numpy()[0]
1159
+
1160
+ assert_np_equal((plusval - minusval) / (2 * dx), m2grads[ii, jj], tol=fdtol)
1161
+
1162
+
1069
1163
  def test_qr(test, device, dtype, register_kernels=False):
1070
1164
  rng = np.random.default_rng(123)
1071
1165
 
@@ -1513,13 +1607,12 @@ def test_transform_vector(test, device, dtype, register_kernels=False):
1513
1607
  tape.zero()
1514
1608
 
1515
1609
 
1516
- def test_mat_array_type_indexing(test, device, dtype, register_kernels=False):
1610
+ def test_matrix_assign_inplace(test, device, dtype, register_kernels=False):
1517
1611
  np_type = np.dtype(dtype)
1518
1612
  wp_type = wp.types.np_dtype_to_warp_type[np_type]
1519
1613
 
1520
1614
  vec2 = wp.types.vector(length=2, dtype=wp_type)
1521
1615
  mat22 = wp.types.matrix(shape=(2, 2), dtype=wp_type)
1522
- mat33 = wp.types.matrix(shape=(3, 3), dtype=wp_type)
1523
1616
 
1524
1617
  def mattest_read_write_store(x: wp.array(dtype=wp_type), a: wp.array(dtype=mat22)):
1525
1618
  tid = wp.tid()
@@ -1536,17 +1629,8 @@ def test_mat_array_type_indexing(test, device, dtype, register_kernels=False):
1536
1629
  a[1, 1] = wp_type(3.0)
1537
1630
  x[i, j] = a
1538
1631
 
1539
- def mattest_in_register_overwrite(x: wp.array2d(dtype=mat22), y: wp.array(dtype=vec2)):
1540
- i, j = wp.tid()
1541
-
1542
- a = mat22(wp_type(0.0))
1543
- a[0] = y[i]
1544
- a[0, 1] = wp_type(3.0)
1545
- x[i, j] = a
1546
-
1547
1632
  kernel_read_write_store = getkernel(mattest_read_write_store, suffix=dtype.__name__)
1548
1633
  kernel_in_register = getkernel(mattest_in_register, suffix=dtype.__name__)
1549
- kernel_in_register_overwrite = getkernel(mattest_in_register_overwrite, suffix=dtype.__name__)
1550
1634
 
1551
1635
  if register_kernels:
1552
1636
  return
@@ -1576,19 +1660,6 @@ def test_mat_array_type_indexing(test, device, dtype, register_kernels=False):
1576
1660
  assert_np_equal(x.numpy(), np.array([[[[1.0, 1.0], [0.0, 3.0]]]], dtype=np_type))
1577
1661
  assert_np_equal(y.grad.numpy(), np.array([[1.0, 1.0]], dtype=np_type))
1578
1662
 
1579
- tape.reset()
1580
-
1581
- x = wp.zeros((1, 1), dtype=mat22, device=device, requires_grad=True)
1582
- y = wp.ones(1, dtype=vec2, device=device, requires_grad=True)
1583
-
1584
- with tape:
1585
- wp.launch(kernel_in_register_overwrite, dim=(1, 1), inputs=[x, y], device=device)
1586
-
1587
- tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
1588
-
1589
- assert_np_equal(x.numpy(), np.array([[[[1.0, 3.0], [0.0, 0.0]]]], dtype=np_type))
1590
- assert_np_equal(y.grad.numpy(), np.array([[1.0, 0.0]], dtype=np_type))
1591
-
1592
1663
 
1593
1664
  # Test matrix constructors using explicit type (float16)
1594
1665
  # note that these tests are specifically not using generics / closure
@@ -1623,10 +1694,61 @@ def test_matrix_constructor_value_func():
1623
1694
  c = mat32d()
1624
1695
  d = mat32d(c, shape=(3, 2))
1625
1696
  e = mat32d(wp.float64(1.0), wp.float64(2.0), wp.float64(1.0), wp.float64(2.0), wp.float64(1.0), wp.float64(2.0))
1626
- f = mat32d(
1627
- wp.vec3d(wp.float64(1.0), wp.float64(2.0), wp.float64(3.0)),
1628
- wp.vec3d(wp.float64(1.0), wp.float64(2.0), wp.float64(3.0)),
1697
+
1698
+
1699
+ @wp.kernel
1700
+ def test_matrix_from_vecs():
1701
+ m1 = wp.matrix_from_cols(
1702
+ wp.vec3(1.0, 2.0, 3.0),
1703
+ wp.vec3(4.0, 5.0, 6.0),
1704
+ wp.vec3(7.0, 8.0, 9.0),
1705
+ )
1706
+ wp.expect_eq(m1[0, 0], 1.0)
1707
+ wp.expect_eq(m1[0, 1], 4.0)
1708
+ wp.expect_eq(m1[0, 2], 7.0)
1709
+ wp.expect_eq(m1[1, 0], 2.0)
1710
+ wp.expect_eq(m1[1, 1], 5.0)
1711
+ wp.expect_eq(m1[1, 2], 8.0)
1712
+ wp.expect_eq(m1[2, 0], 3.0)
1713
+ wp.expect_eq(m1[2, 1], 6.0)
1714
+ wp.expect_eq(m1[2, 2], 9.0)
1715
+
1716
+ m2 = wp.matrix_from_rows(
1717
+ wp.vec3(1.0, 2.0, 3.0),
1718
+ wp.vec3(4.0, 5.0, 6.0),
1719
+ wp.vec3(7.0, 8.0, 9.0),
1720
+ )
1721
+ wp.expect_eq(m2[0, 0], 1.0)
1722
+ wp.expect_eq(m2[0, 1], 2.0)
1723
+ wp.expect_eq(m2[0, 2], 3.0)
1724
+ wp.expect_eq(m2[1, 0], 4.0)
1725
+ wp.expect_eq(m2[1, 1], 5.0)
1726
+ wp.expect_eq(m2[1, 2], 6.0)
1727
+ wp.expect_eq(m2[2, 0], 7.0)
1728
+ wp.expect_eq(m2[2, 1], 8.0)
1729
+ wp.expect_eq(m2[2, 2], 9.0)
1730
+
1731
+ m3 = wp.matrix_from_cols(
1732
+ wp.vec3(1.0, 2.0, 3.0),
1733
+ wp.vec3(4.0, 5.0, 6.0),
1629
1734
  )
1735
+ wp.expect_eq(m3[0, 0], 1.0)
1736
+ wp.expect_eq(m3[0, 1], 4.0)
1737
+ wp.expect_eq(m3[1, 0], 2.0)
1738
+ wp.expect_eq(m3[1, 1], 5.0)
1739
+ wp.expect_eq(m3[2, 0], 3.0)
1740
+ wp.expect_eq(m3[2, 1], 6.0)
1741
+
1742
+ m4 = wp.matrix_from_rows(
1743
+ wp.vec3(1.0, 2.0, 3.0),
1744
+ wp.vec3(4.0, 5.0, 6.0),
1745
+ )
1746
+ wp.expect_eq(m4[0, 0], 1.0)
1747
+ wp.expect_eq(m4[0, 1], 2.0)
1748
+ wp.expect_eq(m4[0, 2], 3.0)
1749
+ wp.expect_eq(m4[1, 0], 4.0)
1750
+ wp.expect_eq(m4[1, 1], 5.0)
1751
+ wp.expect_eq(m4[1, 2], 6.0)
1630
1752
 
1631
1753
 
1632
1754
  # Same as above but with a default (float/int) type
@@ -1743,15 +1865,20 @@ def test_matrix_len(test, device):
1743
1865
 
1744
1866
  @wp.kernel
1745
1867
  def matrix_augassign_kernel(
1746
- a: wp.array(dtype=wp.mat22), b: wp.array(dtype=wp.mat22), c: wp.array(dtype=wp.mat22), d: wp.array(dtype=wp.mat22)
1868
+ a: wp.array(dtype=wp.mat22),
1869
+ b: wp.array(dtype=wp.mat22),
1870
+ x: wp.array(dtype=wp.vec2),
1871
+ c: wp.array(dtype=wp.mat22),
1872
+ d: wp.array(dtype=wp.mat22),
1873
+ y: wp.array(dtype=wp.vec2),
1747
1874
  ):
1748
1875
  i = wp.tid()
1749
1876
 
1750
1877
  m1 = wp.mat22()
1751
1878
  m2 = b[i]
1879
+ v2 = x[i]
1752
1880
 
1753
- m1[0, 0] += m2[0, 0]
1754
- m1[0, 1] += m2[0, 1]
1881
+ m1[0] += v2
1755
1882
  m1[1, 0] += m2[1, 0]
1756
1883
  m1[1, 1] += m2[1, 1]
1757
1884
 
@@ -1759,9 +1886,9 @@ def matrix_augassign_kernel(
1759
1886
 
1760
1887
  m3 = wp.mat22()
1761
1888
  m4 = d[i]
1889
+ v4 = y[i]
1762
1890
 
1763
- m3[0, 0] -= m4[0, 0]
1764
- m3[0, 1] -= m4[0, 1]
1891
+ m3[0] -= v4
1765
1892
  m3[1, 0] -= m4[1, 0]
1766
1893
  m3[1, 1] -= m4[1, 1]
1767
1894
 
@@ -1769,27 +1896,61 @@ def matrix_augassign_kernel(
1769
1896
 
1770
1897
 
1771
1898
  def test_matrix_augassign(test, device):
1772
- N = 3
1899
+ N = 1
1773
1900
 
1774
- a = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1775
- b = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1901
+ a = wp.zeros(N, dtype=wp.mat22, requires_grad=True, device=device)
1902
+ b = wp.ones(N, dtype=wp.mat22, requires_grad=True, device=device)
1903
+ x = wp.ones(N, dtype=wp.vec2, requires_grad=True, device=device)
1776
1904
 
1777
- c = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1778
- d = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1905
+ c = wp.zeros(N, dtype=wp.mat22, requires_grad=True, device=device)
1906
+ d = wp.ones(N, dtype=wp.mat22, requires_grad=True, device=device)
1907
+ y = wp.ones(N, dtype=wp.vec2, requires_grad=True, device=device)
1779
1908
 
1780
1909
  tape = wp.Tape()
1781
1910
  with tape:
1782
- wp.launch(matrix_augassign_kernel, N, inputs=[a, b, c, d])
1911
+ wp.launch(matrix_augassign_kernel, N, inputs=[a, b, x, c, d, y], device=device)
1783
1912
 
1784
1913
  tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
1785
1914
 
1786
1915
  assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
1787
1916
  assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
1788
- assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
1917
+ assert_np_equal(b.grad.numpy(), np.array([[[0, 0], [1, 1]]], dtype=float))
1918
+ assert_np_equal(x.grad.numpy(), np.array([[1, 1]], dtype=float))
1789
1919
 
1790
1920
  assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
1791
1921
  assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
1792
- assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
1922
+ assert_np_equal(d.grad.numpy(), np.array([[[0, 0], [-1, -1]]], dtype=float))
1923
+ assert_np_equal(y.grad.numpy(), np.array([[-1, -1]], dtype=float))
1924
+
1925
+
1926
+ def test_matrix_assign_copy(test, device):
1927
+ saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
1928
+ try:
1929
+ wp.config.enable_vector_component_overwrites = True
1930
+
1931
+ @wp.kernel
1932
+ def mat_in_register_overwrite(x: wp.array2d(dtype=wp.mat22), y: wp.array(dtype=wp.vec2)):
1933
+ i, j = wp.tid()
1934
+
1935
+ a = wp.mat22()
1936
+ a[0] = y[i]
1937
+ a[0, 1] = 3.0
1938
+ x[i, j] = a
1939
+
1940
+ x = wp.zeros((1, 1), dtype=wp.mat22, device=device, requires_grad=True)
1941
+ y = wp.ones(1, dtype=wp.vec2, device=device, requires_grad=True)
1942
+
1943
+ tape = wp.Tape()
1944
+ with tape:
1945
+ wp.launch(mat_in_register_overwrite, dim=(1, 1), inputs=[x, y], device=device)
1946
+
1947
+ tape.backward(grads={x: wp.ones_like(x, requires_grad=False)})
1948
+
1949
+ assert_np_equal(x.numpy(), np.array([[[[1.0, 3.0], [0.0, 0.0]]]], dtype=float))
1950
+ assert_np_equal(y.grad.numpy(), np.array([[1.0, 0.0]], dtype=float))
1951
+
1952
+ finally:
1953
+ wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
1793
1954
 
1794
1955
 
1795
1956
  devices = get_test_devices()
@@ -1814,6 +1975,7 @@ add_kernel_test(TestMat, test_constructors_explicit_precision, dim=1, devices=de
1814
1975
  add_kernel_test(TestMat, test_constructors_default_precision, dim=1, devices=devices)
1815
1976
  add_kernel_test(TestMat, test_constructors_constant_shape, dim=1, devices=devices)
1816
1977
  add_kernel_test(TestMat, test_matrix_constructor_value_func, dim=1, devices=devices)
1978
+ add_kernel_test(TestMat, test_matrix_from_vecs, dim=1, devices=devices)
1817
1979
 
1818
1980
  mat103 = wp.types.matrix(shape=(10, 3), dtype=float)
1819
1981
  add_kernel_test(
@@ -1878,18 +2040,6 @@ add_function_test(
1878
2040
  test_tpl_constructor_error_incompatible_sizes,
1879
2041
  devices=devices,
1880
2042
  )
1881
- add_function_test(
1882
- TestMat,
1883
- "test_tpl_constructor_error_invalid_vector_count",
1884
- test_tpl_constructor_error_invalid_vector_count,
1885
- devices=devices,
1886
- )
1887
- add_function_test(
1888
- TestMat,
1889
- "test_tpl_constructor_error_invalid_vector_shape",
1890
- test_tpl_constructor_error_invalid_vector_shape,
1891
- devices=devices,
1892
- )
1893
2043
  add_function_test(
1894
2044
  TestMat,
1895
2045
  "test_tpl_constructor_error_invalid_arg_count",
@@ -1908,6 +2058,9 @@ for dtype in np_float_types:
1908
2058
  TestMat, f"test_inverse_{dtype.__name__}", test_inverse, devices=devices, dtype=dtype
1909
2059
  )
1910
2060
  add_function_test_register_kernel(TestMat, f"test_svd_{dtype.__name__}", test_svd, devices=devices, dtype=dtype)
2061
+ add_function_test_register_kernel(
2062
+ TestMat, f"test_svd_2D{dtype.__name__}", test_svd_2D, devices=devices, dtype=dtype
2063
+ )
1911
2064
  add_function_test_register_kernel(TestMat, f"test_qr_{dtype.__name__}", test_qr, devices=devices, dtype=dtype)
1912
2065
  add_function_test_register_kernel(TestMat, f"test_eig_{dtype.__name__}", test_eig, devices=devices, dtype=dtype)
1913
2066
  add_function_test_register_kernel(
@@ -1922,13 +2075,14 @@ for dtype in np_float_types:
1922
2075
  add_function_test_register_kernel(TestMat, f"test_skew_{dtype.__name__}", test_skew, devices=devices, dtype=dtype)
1923
2076
  add_function_test_register_kernel(
1924
2077
  TestMat,
1925
- f"test_mat_array_type_indexing_{dtype.__name__}",
1926
- test_mat_array_type_indexing,
2078
+ f"test_matrix_assign_inplace_{dtype.__name__}",
2079
+ test_matrix_assign_inplace,
1927
2080
  devices=devices,
1928
2081
  dtype=dtype,
1929
2082
  )
1930
2083
  add_function_test(TestMat, "test_matrix_len", test_matrix_len, devices=devices)
1931
2084
  add_function_test(TestMat, "test_matrix_augassign", test_matrix_augassign, devices=devices)
2085
+ add_function_test(TestMat, "test_matrix_assign_copy", test_matrix_assign_copy, devices=devices)
1932
2086
 
1933
2087
  if __name__ == "__main__":
1934
2088
  wp.clear_kernel_cache()