warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.1__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (131) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1077 -481
  8. warp/codegen.py +250 -122
  9. warp/config.py +65 -21
  10. warp/context.py +500 -149
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_marching_cubes.py +1 -1
  16. warp/examples/core/example_mesh.py +1 -1
  17. warp/examples/core/example_torch.py +18 -34
  18. warp/examples/core/example_wave.py +1 -1
  19. warp/examples/fem/example_apic_fluid.py +1 -0
  20. warp/examples/fem/example_mixed_elasticity.py +1 -1
  21. warp/examples/optim/example_bounce.py +1 -1
  22. warp/examples/optim/example_cloth_throw.py +1 -1
  23. warp/examples/optim/example_diffray.py +4 -15
  24. warp/examples/optim/example_drone.py +1 -1
  25. warp/examples/optim/example_softbody_properties.py +392 -0
  26. warp/examples/optim/example_trajectory.py +1 -3
  27. warp/examples/optim/example_walker.py +5 -0
  28. warp/examples/sim/example_cartpole.py +0 -2
  29. warp/examples/sim/example_cloth_self_contact.py +314 -0
  30. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  31. warp/examples/sim/example_jacobian_ik.py +0 -2
  32. warp/examples/sim/example_quadruped.py +5 -2
  33. warp/examples/tile/example_tile_cholesky.py +79 -0
  34. warp/examples/tile/example_tile_convolution.py +2 -2
  35. warp/examples/tile/example_tile_fft.py +2 -2
  36. warp/examples/tile/example_tile_filtering.py +3 -3
  37. warp/examples/tile/example_tile_matmul.py +4 -4
  38. warp/examples/tile/example_tile_mlp.py +12 -12
  39. warp/examples/tile/example_tile_nbody.py +191 -0
  40. warp/examples/tile/example_tile_walker.py +319 -0
  41. warp/math.py +147 -0
  42. warp/native/array.h +12 -0
  43. warp/native/builtin.h +0 -1
  44. warp/native/bvh.cpp +149 -70
  45. warp/native/bvh.cu +287 -68
  46. warp/native/bvh.h +195 -85
  47. warp/native/clang/clang.cpp +6 -2
  48. warp/native/crt.h +1 -0
  49. warp/native/cuda_util.cpp +35 -0
  50. warp/native/cuda_util.h +5 -0
  51. warp/native/exports.h +40 -40
  52. warp/native/intersect.h +17 -0
  53. warp/native/mat.h +57 -3
  54. warp/native/mathdx.cpp +19 -0
  55. warp/native/mesh.cpp +25 -8
  56. warp/native/mesh.cu +153 -101
  57. warp/native/mesh.h +482 -403
  58. warp/native/quat.h +40 -0
  59. warp/native/solid_angle.h +7 -0
  60. warp/native/sort.cpp +85 -0
  61. warp/native/sort.cu +34 -0
  62. warp/native/sort.h +3 -1
  63. warp/native/spatial.h +11 -0
  64. warp/native/tile.h +1189 -664
  65. warp/native/tile_reduce.h +8 -6
  66. warp/native/vec.h +41 -0
  67. warp/native/warp.cpp +8 -1
  68. warp/native/warp.cu +263 -40
  69. warp/native/warp.h +19 -5
  70. warp/optim/linear.py +22 -4
  71. warp/render/render_opengl.py +132 -59
  72. warp/render/render_usd.py +10 -2
  73. warp/sim/__init__.py +6 -1
  74. warp/sim/collide.py +289 -32
  75. warp/sim/import_urdf.py +20 -5
  76. warp/sim/integrator_euler.py +25 -7
  77. warp/sim/integrator_featherstone.py +147 -35
  78. warp/sim/integrator_vbd.py +842 -40
  79. warp/sim/model.py +173 -112
  80. warp/sim/render.py +2 -2
  81. warp/stubs.py +249 -116
  82. warp/tape.py +28 -30
  83. warp/tests/aux_test_module_unload.py +15 -0
  84. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  85. warp/tests/test_array.py +100 -0
  86. warp/tests/test_assert.py +242 -0
  87. warp/tests/test_codegen.py +14 -61
  88. warp/tests/test_collision.py +8 -8
  89. warp/tests/test_examples.py +16 -1
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_launch.py +77 -26
  94. warp/tests/test_mat.py +213 -168
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +11 -7
  97. warp/tests/test_matmul_lite.py +4 -4
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +6 -5
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_static.py +16 -0
  109. warp/tests/test_tape.py +25 -0
  110. warp/tests/test_tile.py +134 -191
  111. warp/tests/test_tile_load.py +399 -0
  112. warp/tests/test_tile_mathdx.py +61 -8
  113. warp/tests/test_tile_mlp.py +17 -17
  114. warp/tests/test_tile_reduce.py +24 -18
  115. warp/tests/test_tile_shared_memory.py +66 -17
  116. warp/tests/test_tile_view.py +165 -0
  117. warp/tests/test_torch.py +35 -0
  118. warp/tests/test_utils.py +36 -24
  119. warp/tests/test_vec.py +110 -0
  120. warp/tests/unittest_suites.py +29 -4
  121. warp/tests/unittest_utils.py +30 -11
  122. warp/thirdparty/unittest_parallel.py +5 -2
  123. warp/types.py +419 -111
  124. warp/utils.py +9 -5
  125. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
  126. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
  127. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
  128. warp/examples/benchmarks/benchmark_tile.py +0 -179
  129. warp/native/tile_gemm.h +0 -341
  130. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
  131. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/tests/test_mat.py CHANGED
@@ -6,20 +6,14 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
 
12
13
  import warp as wp
13
14
  from warp.tests.unittest_utils import *
14
15
 
15
- np_signed_int_types = [
16
- np.int8,
17
- np.int16,
18
- np.int32,
19
- np.int64,
20
- np.byte,
21
- ]
22
-
16
+ np_signed_int_types = [np.int8, np.int16, np.int32, np.int64, np.byte]
23
17
  np_float_types = [np.float16, np.float32, np.float64]
24
18
 
25
19
 
@@ -42,11 +36,7 @@ def getkernel(func, suffix=""):
42
36
 
43
37
 
44
38
  def get_select_kernel(dtype):
45
- def output_select_kernel_fn(
46
- input: wp.array(dtype=dtype),
47
- index: int,
48
- out: wp.array(dtype=dtype),
49
- ):
39
+ def output_select_kernel_fn(input: wp.array(dtype=dtype), index: int, out: wp.array(dtype=dtype)):
50
40
  out[0] = input[index]
51
41
 
52
42
  return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
@@ -61,33 +51,19 @@ def test_anon_constructor_error_shape_arg_missing(test, device):
61
51
  RuntimeError,
62
52
  r"the `shape` argument must be specified when initializing a matrix by value$",
63
53
  ):
64
- wp.launch(
65
- kernel,
66
- dim=1,
67
- inputs=[],
68
- device=device,
69
- )
54
+ wp.launch(kernel, dim=1, inputs=[], device=device)
70
55
 
71
56
 
72
57
  def test_anon_constructor_error_shape_mismatch(test, device):
73
58
  @wp.kernel
74
59
  def kernel():
75
- wp.matrix(
76
- wp.matrix(shape=(1, 2), dtype=float),
77
- shape=(3, 4),
78
- dtype=float,
79
- )
60
+ wp.matrix(wp.matrix(shape=(1, 2), dtype=float), shape=(3, 4), dtype=float)
80
61
 
81
62
  with test.assertRaisesRegex(
82
63
  RuntimeError,
83
64
  r"incompatible matrix of shape \(3, 4\) given when copy constructing a matrix of shape \(1, 2\)$",
84
65
  ):
85
- wp.launch(
86
- kernel,
87
- dim=1,
88
- inputs=[],
89
- device=device,
90
- )
66
+ wp.launch(kernel, dim=1, inputs=[], device=device)
91
67
 
92
68
 
93
69
  def test_anon_constructor_error_type_mismatch(test, device):
@@ -99,12 +75,7 @@ def test_anon_constructor_error_type_mismatch(test, device):
99
75
  RuntimeError,
100
76
  r"the value used to fill this matrix is expected to be of the type `float16`$",
101
77
  ):
102
- wp.launch(
103
- kernel,
104
- dim=1,
105
- inputs=[],
106
- device=device,
107
- )
78
+ wp.launch(kernel, dim=1, inputs=[], device=device)
108
79
 
109
80
 
110
81
  def test_anon_constructor_error_invalid_arg_count(test, device):
@@ -116,12 +87,7 @@ def test_anon_constructor_error_invalid_arg_count(test, device):
116
87
  RuntimeError,
117
88
  r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
118
89
  ):
119
- wp.launch(
120
- kernel,
121
- dim=1,
122
- inputs=[],
123
- device=device,
124
- )
90
+ wp.launch(kernel, dim=1, inputs=[], device=device)
125
91
 
126
92
 
127
93
  def test_anon_xform_constructor_error_type_mismatch(test, device):
@@ -150,12 +116,7 @@ def test_tpl_constructor_error_incompatible_sizes(test, device):
150
116
  RuntimeError,
151
117
  r"incompatible matrix of shape \(3, 3\) given when copy constructing a matrix of shape \(2, 2\)$",
152
118
  ):
153
- wp.launch(
154
- kernel,
155
- dim=1,
156
- inputs=[],
157
- device=device,
158
- )
119
+ wp.launch(kernel, dim=1, inputs=[], device=device)
159
120
 
160
121
 
161
122
  def test_tpl_constructor_error_invalid_vector_count(test, device):
@@ -167,12 +128,7 @@ def test_tpl_constructor_error_invalid_vector_count(test, device):
167
128
  RuntimeError,
168
129
  r"incompatible number of column vectors given \(2\) when constructing a matrix of shape \(3, 3\)$",
169
130
  ):
170
- wp.launch(
171
- kernel,
172
- dim=1,
173
- inputs=[],
174
- device=device,
175
- )
131
+ wp.launch(kernel, dim=1, inputs=[], device=device)
176
132
 
177
133
 
178
134
  def test_tpl_constructor_error_invalid_vector_shape(test, device):
@@ -184,12 +140,7 @@ def test_tpl_constructor_error_invalid_vector_shape(test, device):
184
140
  RuntimeError,
185
141
  r"incompatible column vector lengths given when constructing a matrix of shape \(2, 2\)$",
186
142
  ):
187
- wp.launch(
188
- kernel,
189
- dim=1,
190
- inputs=[],
191
- device=device,
192
- )
143
+ wp.launch(kernel, dim=1, inputs=[], device=device)
193
144
 
194
145
 
195
146
  def test_tpl_constructor_error_invalid_arg_count(test, device):
@@ -201,12 +152,7 @@ def test_tpl_constructor_error_invalid_arg_count(test, device):
201
152
  RuntimeError,
202
153
  r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
203
154
  ):
204
- wp.launch(
205
- kernel,
206
- dim=1,
207
- inputs=[],
208
- device=device,
209
- )
155
+ wp.launch(kernel, dim=1, inputs=[], device=device)
210
156
 
211
157
 
212
158
  def test_py_arithmetic_ops(test, device, dtype):
@@ -438,6 +384,77 @@ def test_negation(test, device, dtype, register_kernels=False):
438
384
  idx = idx + 1
439
385
 
440
386
 
387
+ def test_matmul(test, device, dtype, register_kernels=False):
388
+ rng = np.random.default_rng(123)
389
+
390
+ tol = {
391
+ np.float16: 5.0e-3,
392
+ np.float32: 1.0e-6,
393
+ np.float64: 1.0e-12,
394
+ }.get(dtype, 0)
395
+
396
+ wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
397
+ mat22 = wp.types.matrix(shape=(2, 2), dtype=wptype)
398
+ mat33 = wp.types.matrix(shape=(3, 3), dtype=wptype)
399
+ mat23 = wp.types.matrix(shape=(2, 3), dtype=wptype)
400
+ mat32 = wp.types.matrix(shape=(3, 2), dtype=wptype)
401
+ mat44 = wp.types.matrix(shape=(4, 4), dtype=wptype)
402
+
403
+ output_select_kernel = get_select_kernel(wptype)
404
+
405
+ def check_mat_mul(
406
+ i23: wp.array(dtype=mat23),
407
+ i32: wp.array(dtype=mat32),
408
+ i44: wp.array(dtype=mat44),
409
+ o22: wp.array(dtype=mat22),
410
+ o33: wp.array(dtype=mat33),
411
+ o44: wp.array(dtype=mat44),
412
+ ):
413
+ i = wp.tid()
414
+ o22[i] = i23[i] @ i32[i]
415
+ o33[i] = i32[i] @ i23[i]
416
+ o44[i] = i44[i] @ i44[i]
417
+
418
+ kernel = getkernel(check_mat_mul, suffix=dtype.__name__)
419
+
420
+ if register_kernels:
421
+ return
422
+
423
+ test_adj = dtype in np_float_types
424
+
425
+ i23 = wp.array(randvals(rng, [1, 2, 3], dtype), dtype=mat23, requires_grad=test_adj, device=device)
426
+ i32 = wp.array(randvals(rng, [1, 3, 2], dtype), dtype=mat32, requires_grad=test_adj, device=device)
427
+ i44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
428
+ o22 = wp.array(randvals(rng, [1, 2, 2], dtype), dtype=mat22, requires_grad=test_adj, device=device)
429
+ o33 = wp.array(randvals(rng, [1, 3, 3], dtype), dtype=mat33, requires_grad=test_adj, device=device)
430
+ o44 = wp.array(randvals(rng, [1, 4, 4], dtype), dtype=mat44, requires_grad=test_adj, device=device)
431
+
432
+ tape = wp.Tape()
433
+ with tape:
434
+ wp.launch(
435
+ kernel,
436
+ dim=1,
437
+ inputs=[i23, i32, i44],
438
+ outputs=[o22, o33, o44],
439
+ device=device,
440
+ )
441
+
442
+ assert_np_equal(o22.numpy(), i23.numpy() @ i32.numpy(), tol=tol)
443
+ assert_np_equal(o33.numpy(), i32.numpy() @ i23.numpy(), tol=tol)
444
+ assert_np_equal(o44.numpy(), i44.numpy() @ i44.numpy(), tol=tol)
445
+
446
+ if test_adj:
447
+ o22.grad.assign([np.eye(2)])
448
+ o33.grad.assign([np.eye(3)])
449
+ o44.grad.assign([np.eye(4)])
450
+
451
+ tape.backward()
452
+
453
+ assert_np_equal(i23.grad.numpy(), 2.0 * i32.numpy().T, tol=tol)
454
+ assert_np_equal(i32.grad.numpy(), 2.0 * i23.numpy().T, tol=tol)
455
+ assert_np_equal(i44.grad.numpy(), 2.0 * i44.numpy().T, tol=tol)
456
+
457
+
441
458
  def test_subtraction(test, device, dtype, register_kernels=False):
442
459
  rng = np.random.default_rng(123)
443
460
 
@@ -541,16 +558,7 @@ def test_subtraction(test, device, dtype, register_kernels=False):
541
558
  wp.launch(
542
559
  kernel,
543
560
  dim=1,
544
- inputs=[
545
- s2,
546
- s3,
547
- s4,
548
- s5,
549
- v2,
550
- v3,
551
- v4,
552
- v5,
553
- ],
561
+ inputs=[s2, s3, s4, s5, v2, v3, v4, v5],
554
562
  outputs=[outcomponents],
555
563
  device=device,
556
564
  )
@@ -558,11 +566,11 @@ def test_subtraction(test, device, dtype, register_kernels=False):
558
566
  output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
559
567
  )
560
568
  tape.backward(loss=out)
561
- expectedresult = np.zeros((dim, dim), dtype=dtype)
562
- expectedresult[i, j] = 2
563
- assert_np_equal(tape.gradients[in2].numpy()[0], expectedresult, tol=10 * tol)
564
- expectedresult[i, j] = -2
565
- assert_np_equal(tape.gradients[in1].numpy()[0], expectedresult, tol=10 * tol)
569
+ expected_result = np.zeros((dim, dim), dtype=dtype)
570
+ expected_result[i, j] = 2
571
+ assert_np_equal(tape.gradients[in2].numpy()[0], expected_result, tol=10 * tol)
572
+ expected_result[i, j] = -2
573
+ assert_np_equal(tape.gradients[in1].numpy()[0], expected_result, tol=10 * tol)
566
574
  tape.zero()
567
575
 
568
576
  idx = idx + 1
@@ -608,21 +616,7 @@ def test_determinant(test, device, dtype, register_kernels=False):
608
616
 
609
617
  tape = wp.Tape()
610
618
  with tape:
611
- wp.launch(
612
- kernel,
613
- dim=1,
614
- inputs=[
615
- v2,
616
- v3,
617
- v4,
618
- ],
619
- outputs=[
620
- det2,
621
- det3,
622
- det4,
623
- ],
624
- device=device,
625
- )
619
+ wp.launch(kernel, dim=1, inputs=[v2, v3, v4], outputs=[det2, det3, det4], device=device)
626
620
 
627
621
  if dtype in np_float_types:
628
622
  assert_np_equal(det2.numpy()[0], 2 * np.linalg.det(v2.numpy()[0].astype(np.float64)), tol=100 * tol)
@@ -658,16 +652,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
658
652
  wp.launch(
659
653
  kernel,
660
654
  dim=1,
661
- inputs=[
662
- wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
663
- v3,
664
- v4,
665
- ],
666
- outputs=[
667
- det2,
668
- det3,
669
- det4,
670
- ],
655
+ inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
656
+ outputs=[det2, det3, det4],
671
657
  device=device,
672
658
  )
673
659
  dplus = det2.numpy()[0]
@@ -675,16 +661,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
675
661
  wp.launch(
676
662
  kernel,
677
663
  dim=1,
678
- inputs=[
679
- wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
680
- v3,
681
- v4,
682
- ],
683
- outputs=[
684
- det2,
685
- det3,
686
- det4,
687
- ],
664
+ inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
665
+ outputs=[det2, det3, det4],
688
666
  device=device,
689
667
  )
690
668
  dminus = det2.numpy()[0]
@@ -697,16 +675,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
697
675
  wp.launch(
698
676
  kernel,
699
677
  dim=1,
700
- inputs=[
701
- v2,
702
- wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
703
- v4,
704
- ],
705
- outputs=[
706
- det2,
707
- det3,
708
- det4,
709
- ],
678
+ inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
679
+ outputs=[det2, det3, det4],
710
680
  device=device,
711
681
  )
712
682
  dplus = det3.numpy()[0]
@@ -714,16 +684,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
714
684
  wp.launch(
715
685
  kernel,
716
686
  dim=1,
717
- inputs=[
718
- v2,
719
- wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
720
- v4,
721
- ],
722
- outputs=[
723
- det2,
724
- det3,
725
- det4,
726
- ],
687
+ inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
688
+ outputs=[det2, det3, det4],
727
689
  device=device,
728
690
  )
729
691
  dminus = det3.numpy()[0]
@@ -736,16 +698,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
736
698
  wp.launch(
737
699
  kernel,
738
700
  dim=1,
739
- inputs=[
740
- v2,
741
- v3,
742
- wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
743
- ],
744
- outputs=[
745
- det2,
746
- det3,
747
- det4,
748
- ],
701
+ inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
702
+ outputs=[det2, det3, det4],
749
703
  device=device,
750
704
  )
751
705
  dplus = det4.numpy()[0]
@@ -753,16 +707,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
753
707
  wp.launch(
754
708
  kernel,
755
709
  dim=1,
756
- inputs=[
757
- v2,
758
- v3,
759
- wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
760
- ],
761
- outputs=[
762
- det2,
763
- det3,
764
- det4,
765
- ],
710
+ inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
711
+ outputs=[det2, det3, det4],
766
712
  device=device,
767
713
  )
768
714
  dminus = det4.numpy()[0]
@@ -999,7 +945,7 @@ def test_svd(test, device, dtype, register_kernels=False):
999
945
  tol = {
1000
946
  np.float16: 1.0e-3,
1001
947
  np.float32: 1.0e-6,
1002
- np.float64: 1.0e-6,
948
+ np.float64: 1.0e-12,
1003
949
  }.get(dtype, 0)
1004
950
 
1005
951
  wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -1722,8 +1668,9 @@ def test_matrix_mutation(expected: wp.types.matrix(shape=(10, 3), dtype=float)):
1722
1668
  wp.expect_eq(m, expected)
1723
1669
 
1724
1670
 
1725
- CONSTANT_SHAPE_ROWS = wp.constant(10)
1726
- CONSTANT_SHAPE_COLS = wp.constant(10)
1671
+ # NOTE: Compile tile is highly sensitive to shape so we use small values now
1672
+ CONSTANT_SHAPE_ROWS = wp.constant(2)
1673
+ CONSTANT_SHAPE_COLS = wp.constant(2)
1727
1674
 
1728
1675
 
1729
1676
  # tests that we can use global constants in shape keyword argument
@@ -1737,6 +1684,106 @@ def test_constructors_constant_shape():
1737
1684
  m[i, j] = float(i * j)
1738
1685
 
1739
1686
 
1687
+ Mat23 = wp.mat((2, 3), dtype=wp.float16)
1688
+
1689
+
1690
+ @wp.kernel
1691
+ def matrix_len_kernel(
1692
+ m1: wp.mat22, m2: wp.mat((3, 3), float), m3: wp.mat((Any, Any), float), m4: Mat23, out: wp.array(dtype=int)
1693
+ ):
1694
+ length = wp.static(len(m1))
1695
+ wp.expect_eq(len(m1), 2)
1696
+ out[0] = len(m1)
1697
+
1698
+ length = len(m2)
1699
+ wp.expect_eq(wp.static(len(m2)), 3)
1700
+ out[1] = len(m2)
1701
+
1702
+ length = len(m3)
1703
+ wp.expect_eq(len(m3), 4)
1704
+ out[2] = wp.static(len(m3))
1705
+
1706
+ length = wp.static(len(m4))
1707
+ wp.expect_eq(wp.static(len(m4)), 2)
1708
+ out[3] = wp.static(len(m4))
1709
+
1710
+ foo = wp.mat22()
1711
+ length = len(foo)
1712
+ wp.expect_eq(len(foo), 2)
1713
+ out[4] = len(foo)
1714
+
1715
+
1716
+ def test_matrix_len(test, device):
1717
+ m1 = wp.mat22()
1718
+ m2 = wp.mat33()
1719
+ m3 = wp.mat44()
1720
+ m4 = Mat23()
1721
+ out = wp.empty(5, dtype=int, device=device)
1722
+ wp.launch(matrix_len_kernel, dim=(1,), inputs=(m1, m2, m3, m4), outputs=(out,), device=device)
1723
+
1724
+ test.assertEqual(out.numpy()[0], 2)
1725
+ test.assertEqual(out.numpy()[1], 3)
1726
+ test.assertEqual(out.numpy()[2], 4)
1727
+ test.assertEqual(out.numpy()[3], 2)
1728
+ test.assertEqual(out.numpy()[4], 2)
1729
+
1730
+ test.assertEqual(len(m1), 2)
1731
+ test.assertEqual(len(m2), 3)
1732
+ test.assertEqual(len(m3), 4)
1733
+ test.assertEqual(len(m4), 2)
1734
+
1735
+
1736
+ @wp.kernel
1737
+ def matrix_augassign_kernel(
1738
+ a: wp.array(dtype=wp.mat22), b: wp.array(dtype=wp.mat22), c: wp.array(dtype=wp.mat22), d: wp.array(dtype=wp.mat22)
1739
+ ):
1740
+ i = wp.tid()
1741
+
1742
+ m1 = wp.mat22()
1743
+ m2 = b[i]
1744
+
1745
+ m1[0, 0] += m2[0, 0]
1746
+ m1[0, 1] += m2[0, 1]
1747
+ m1[1, 0] += m2[1, 0]
1748
+ m1[1, 1] += m2[1, 1]
1749
+
1750
+ a[i] = m1
1751
+
1752
+ m3 = wp.mat22()
1753
+ m4 = d[i]
1754
+
1755
+ m3[0, 0] -= m4[0, 0]
1756
+ m3[0, 1] -= m4[0, 1]
1757
+ m3[1, 0] -= m4[1, 0]
1758
+ m3[1, 1] -= m4[1, 1]
1759
+
1760
+ c[i] = m3
1761
+
1762
+
1763
+ def test_matrix_augassign(test, device):
1764
+ N = 3
1765
+
1766
+ a = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1767
+ b = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1768
+
1769
+ c = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1770
+ d = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1771
+
1772
+ tape = wp.Tape()
1773
+ with tape:
1774
+ wp.launch(matrix_augassign_kernel, N, inputs=[a, b, c, d])
1775
+
1776
+ tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
1777
+
1778
+ assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
1779
+ assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
1780
+ assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
1781
+
1782
+ assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
1783
+ assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
1784
+ assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
1785
+
1786
+
1740
1787
  devices = get_test_devices()
1741
1788
 
1742
1789
 
@@ -1789,6 +1836,9 @@ for dtype in np_signed_int_types + np_float_types:
1789
1836
  add_function_test_register_kernel(
1790
1837
  TestMat, f"test_subtraction_{dtype.__name__}", test_subtraction, devices=devices, dtype=dtype
1791
1838
  )
1839
+ add_function_test_register_kernel(
1840
+ TestMat, f"test_matmul_{dtype.__name__}", test_matmul, devices=devices, dtype=dtype
1841
+ )
1792
1842
 
1793
1843
  add_function_test(
1794
1844
  TestMat,
@@ -1797,16 +1847,10 @@ add_function_test(
1797
1847
  devices=devices,
1798
1848
  )
1799
1849
  add_function_test(
1800
- TestMat,
1801
- "test_anon_constructor_error_shape_mismatch",
1802
- test_anon_constructor_error_shape_mismatch,
1803
- devices=devices,
1850
+ TestMat, "test_anon_constructor_error_shape_mismatch", test_anon_constructor_error_shape_mismatch, devices=devices
1804
1851
  )
1805
1852
  add_function_test(
1806
- TestMat,
1807
- "test_anon_constructor_error_type_mismatch",
1808
- test_anon_constructor_error_type_mismatch,
1809
- devices=devices,
1853
+ TestMat, "test_anon_constructor_error_type_mismatch", test_anon_constructor_error_type_mismatch, devices=devices
1810
1854
  )
1811
1855
  add_function_test(
1812
1856
  TestMat,
@@ -1875,7 +1919,8 @@ for dtype in np_float_types:
1875
1919
  devices=devices,
1876
1920
  dtype=dtype,
1877
1921
  )
1878
-
1922
+ add_function_test(TestMat, "test_matrix_len", test_matrix_len, devices=devices)
1923
+ add_function_test(TestMat, "test_matrix_augassign", test_matrix_augassign, devices=devices)
1879
1924
 
1880
1925
  if __name__ == "__main__":
1881
1926
  wp.clear_kernel_cache()
warp/tests/test_math.py CHANGED
@@ -6,7 +6,7 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
- from typing import NamedTuple
9
+ from typing import Any, NamedTuple
10
10
 
11
11
  import numpy as np
12
12
 
@@ -50,6 +50,51 @@ def test_scalar_math(test, device):
50
50
  assert_np_equal(tape.gradients[x].numpy(), np.array([adj_float_results_expected[i]]), tol=1e-6)
51
51
 
52
52
 
53
+ @wp.kernel
54
+ def test_vec_norm_kernel(vs: wp.array(dtype=Any), out: wp.array(dtype=float, ndim=2)):
55
+ tid = wp.tid()
56
+ out[tid, 0] = wp.norm_l1(vs[tid])
57
+ out[tid, 1] = wp.norm_l2(vs[tid])
58
+ out[tid, 2] = wp.norm_huber(vs[tid])
59
+ out[tid, 3] = wp.norm_pseudo_huber(vs[tid])
60
+
61
+
62
+ def test_vec_norm(test, device):
63
+ # ground-truth implementations from SciPy
64
+ def huber(delta, x):
65
+ if x <= delta:
66
+ return 0.5 * x**2
67
+ else:
68
+ return delta * (x - 0.5 * delta)
69
+
70
+ def pseudo_huber(delta, x):
71
+ return delta**2 * (np.sqrt(1 + (x / delta) ** 2) - 1)
72
+
73
+ v0 = wp.vec3(-2.0, -1.0, -3.0)
74
+ v1 = wp.vec3(2.0, 1.0, 3.0)
75
+ v2 = wp.vec3(0.0, 0.0, 0.0)
76
+
77
+ xs = wp.array([v0, v1, v2], dtype=wp.vec3, requires_grad=True, device=device)
78
+ out = wp.empty((len(xs), 4), dtype=wp.float32, requires_grad=True, device=device)
79
+
80
+ wp.launch(test_vec_norm_kernel, dim=len(xs), inputs=[xs], outputs=[out], device=device)
81
+
82
+ for i, x in enumerate([v0, v1, v2]):
83
+ assert_np_equal(
84
+ out.numpy()[i],
85
+ np.array(
86
+ [
87
+ np.linalg.norm(x, ord=1),
88
+ np.linalg.norm(x, ord=2),
89
+ huber(1.0, wp.length(x)),
90
+ # note SciPy defines the Pseudo-Huber loss slightly differently
91
+ pseudo_huber(1.0, wp.length(x)) + 1.0,
92
+ ]
93
+ ),
94
+ tol=1e-6,
95
+ )
96
+
97
+
53
98
  devices = get_test_devices()
54
99
 
55
100
 
@@ -117,6 +162,7 @@ class TestMath(unittest.TestCase):
117
162
 
118
163
 
119
164
  add_function_test(TestMath, "test_scalar_math", test_scalar_math, devices=devices)
165
+ add_function_test(TestMath, "test_vec_norm", test_vec_norm, devices=devices)
120
166
 
121
167
 
122
168
  if __name__ == "__main__":
warp/tests/test_matmul.py CHANGED
@@ -485,13 +485,17 @@ class TestMatmul(unittest.TestCase):
485
485
 
486
486
 
487
487
  # add_function_test(TestMatmul, "test_f16", test_f16, devices=devices)
488
- add_function_test(TestMatmul, "test_f32", test_f32, devices=devices)
489
- add_function_test(TestMatmul, "test_f64", test_f64, devices=devices)
490
- add_function_test(TestMatmul, "test_tape", test_tape, devices=devices)
491
- add_function_test(TestMatmul, "test_operator", test_operator, devices=devices)
492
- add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices)
493
- add_function_test(TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices)
494
- add_function_test(TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices)
488
+ add_function_test(TestMatmul, "test_f32", test_f32, devices=devices, check_output=False)
489
+ add_function_test(TestMatmul, "test_f64", test_f64, devices=devices, check_output=False)
490
+ add_function_test(TestMatmul, "test_tape", test_tape, devices=devices, check_output=False)
491
+ add_function_test(TestMatmul, "test_operator", test_operator, devices=devices, check_output=False)
492
+ add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
493
+ add_function_test(
494
+ TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices, check_output=False
495
+ )
496
+ add_function_test(
497
+ TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices, check_output=False
498
+ )
495
499
 
496
500
 
497
501
  if __name__ == "__main__":
@@ -392,10 +392,10 @@ class TestMatmulLite(unittest.TestCase):
392
392
  pass
393
393
 
394
394
 
395
- add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices)
396
- add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices)
397
- add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices)
398
- add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices)
395
+ add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices, check_output=False)
396
+ add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices, check_output=False)
397
+ add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices, check_output=False)
398
+ add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
399
399
 
400
400
 
401
401
  if __name__ == "__main__":