warp-lang 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (132) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1124 -497
  8. warp/codegen.py +261 -136
  9. warp/config.py +1 -1
  10. warp/context.py +357 -119
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth.py +3 -1
  27. warp/examples/sim/example_cloth_self_contact.py +260 -0
  28. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  29. warp/examples/sim/example_jacobian_ik.py +0 -2
  30. warp/examples/sim/example_quadruped.py +5 -2
  31. warp/examples/tile/example_tile_cholesky.py +79 -0
  32. warp/examples/tile/example_tile_convolution.py +2 -2
  33. warp/examples/tile/example_tile_fft.py +2 -2
  34. warp/examples/tile/example_tile_filtering.py +3 -3
  35. warp/examples/tile/example_tile_matmul.py +4 -4
  36. warp/examples/tile/example_tile_mlp.py +12 -12
  37. warp/examples/tile/example_tile_nbody.py +180 -0
  38. warp/examples/tile/example_tile_walker.py +319 -0
  39. warp/fem/geometry/geometry.py +0 -2
  40. warp/math.py +147 -0
  41. warp/native/array.h +12 -0
  42. warp/native/builtin.h +0 -1
  43. warp/native/bvh.cpp +149 -70
  44. warp/native/bvh.cu +287 -68
  45. warp/native/bvh.h +195 -85
  46. warp/native/clang/clang.cpp +5 -1
  47. warp/native/coloring.cpp +5 -1
  48. warp/native/cuda_util.cpp +91 -53
  49. warp/native/cuda_util.h +5 -0
  50. warp/native/exports.h +40 -40
  51. warp/native/intersect.h +17 -0
  52. warp/native/mat.h +41 -0
  53. warp/native/mathdx.cpp +19 -0
  54. warp/native/mesh.cpp +25 -8
  55. warp/native/mesh.cu +153 -101
  56. warp/native/mesh.h +482 -403
  57. warp/native/quat.h +40 -0
  58. warp/native/solid_angle.h +7 -0
  59. warp/native/sort.cpp +85 -0
  60. warp/native/sort.cu +34 -0
  61. warp/native/sort.h +3 -1
  62. warp/native/spatial.h +11 -0
  63. warp/native/tile.h +1187 -669
  64. warp/native/tile_reduce.h +8 -6
  65. warp/native/vec.h +41 -0
  66. warp/native/warp.cpp +8 -1
  67. warp/native/warp.cu +263 -40
  68. warp/native/warp.h +19 -5
  69. warp/optim/linear.py +22 -4
  70. warp/render/render_opengl.py +130 -64
  71. warp/sim/__init__.py +6 -1
  72. warp/sim/collide.py +270 -26
  73. warp/sim/import_urdf.py +8 -8
  74. warp/sim/integrator_euler.py +25 -7
  75. warp/sim/integrator_featherstone.py +154 -35
  76. warp/sim/integrator_vbd.py +842 -40
  77. warp/sim/model.py +134 -72
  78. warp/sparse.py +1 -1
  79. warp/stubs.py +265 -132
  80. warp/tape.py +28 -30
  81. warp/tests/aux_test_module_unload.py +15 -0
  82. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  83. warp/tests/test_array.py +74 -0
  84. warp/tests/test_assert.py +242 -0
  85. warp/tests/test_codegen.py +14 -61
  86. warp/tests/test_collision.py +2 -2
  87. warp/tests/test_coloring.py +12 -2
  88. warp/tests/test_examples.py +12 -1
  89. warp/tests/test_func.py +21 -4
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_lerp.py +13 -87
  94. warp/tests/test_mat.py +138 -167
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +17 -16
  97. warp/tests/test_matmul_lite.py +10 -15
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +47 -2
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_smoothstep.py +17 -83
  109. warp/tests/test_static.py +19 -3
  110. warp/tests/test_tape.py +25 -0
  111. warp/tests/test_tile.py +178 -191
  112. warp/tests/test_tile_load.py +356 -0
  113. warp/tests/test_tile_mathdx.py +61 -8
  114. warp/tests/test_tile_mlp.py +17 -17
  115. warp/tests/test_tile_reduce.py +24 -18
  116. warp/tests/test_tile_shared_memory.py +66 -17
  117. warp/tests/test_tile_view.py +165 -0
  118. warp/tests/test_torch.py +35 -0
  119. warp/tests/test_utils.py +36 -24
  120. warp/tests/test_vec.py +110 -0
  121. warp/tests/unittest_suites.py +29 -4
  122. warp/tests/unittest_utils.py +30 -13
  123. warp/thirdparty/unittest_parallel.py +2 -2
  124. warp/types.py +411 -101
  125. warp/utils.py +10 -7
  126. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/METADATA +92 -69
  127. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/RECORD +130 -119
  128. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  129. warp/examples/benchmarks/benchmark_tile.py +0 -179
  130. warp/native/tile_gemm.h +0 -341
  131. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  132. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
warp/tests/test_mat.py CHANGED
@@ -6,20 +6,14 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
 
12
13
  import warp as wp
13
14
  from warp.tests.unittest_utils import *
14
15
 
15
- np_signed_int_types = [
16
- np.int8,
17
- np.int16,
18
- np.int32,
19
- np.int64,
20
- np.byte,
21
- ]
22
-
16
+ np_signed_int_types = [np.int8, np.int16, np.int32, np.int64, np.byte]
23
17
  np_float_types = [np.float16, np.float32, np.float64]
24
18
 
25
19
 
@@ -42,11 +36,7 @@ def getkernel(func, suffix=""):
42
36
 
43
37
 
44
38
  def get_select_kernel(dtype):
45
- def output_select_kernel_fn(
46
- input: wp.array(dtype=dtype),
47
- index: int,
48
- out: wp.array(dtype=dtype),
49
- ):
39
+ def output_select_kernel_fn(input: wp.array(dtype=dtype), index: int, out: wp.array(dtype=dtype)):
50
40
  out[0] = input[index]
51
41
 
52
42
  return getkernel(output_select_kernel_fn, suffix=dtype.__name__)
@@ -61,33 +51,19 @@ def test_anon_constructor_error_shape_arg_missing(test, device):
61
51
  RuntimeError,
62
52
  r"the `shape` argument must be specified when initializing a matrix by value$",
63
53
  ):
64
- wp.launch(
65
- kernel,
66
- dim=1,
67
- inputs=[],
68
- device=device,
69
- )
54
+ wp.launch(kernel, dim=1, inputs=[], device=device)
70
55
 
71
56
 
72
57
  def test_anon_constructor_error_shape_mismatch(test, device):
73
58
  @wp.kernel
74
59
  def kernel():
75
- wp.matrix(
76
- wp.matrix(shape=(1, 2), dtype=float),
77
- shape=(3, 4),
78
- dtype=float,
79
- )
60
+ wp.matrix(wp.matrix(shape=(1, 2), dtype=float), shape=(3, 4), dtype=float)
80
61
 
81
62
  with test.assertRaisesRegex(
82
63
  RuntimeError,
83
64
  r"incompatible matrix of shape \(3, 4\) given when copy constructing a matrix of shape \(1, 2\)$",
84
65
  ):
85
- wp.launch(
86
- kernel,
87
- dim=1,
88
- inputs=[],
89
- device=device,
90
- )
66
+ wp.launch(kernel, dim=1, inputs=[], device=device)
91
67
 
92
68
 
93
69
  def test_anon_constructor_error_type_mismatch(test, device):
@@ -99,12 +75,7 @@ def test_anon_constructor_error_type_mismatch(test, device):
99
75
  RuntimeError,
100
76
  r"the value used to fill this matrix is expected to be of the type `float16`$",
101
77
  ):
102
- wp.launch(
103
- kernel,
104
- dim=1,
105
- inputs=[],
106
- device=device,
107
- )
78
+ wp.launch(kernel, dim=1, inputs=[], device=device)
108
79
 
109
80
 
110
81
  def test_anon_constructor_error_invalid_arg_count(test, device):
@@ -116,12 +87,7 @@ def test_anon_constructor_error_invalid_arg_count(test, device):
116
87
  RuntimeError,
117
88
  r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
118
89
  ):
119
- wp.launch(
120
- kernel,
121
- dim=1,
122
- inputs=[],
123
- device=device,
124
- )
90
+ wp.launch(kernel, dim=1, inputs=[], device=device)
125
91
 
126
92
 
127
93
  def test_anon_xform_constructor_error_type_mismatch(test, device):
@@ -150,12 +116,7 @@ def test_tpl_constructor_error_incompatible_sizes(test, device):
150
116
  RuntimeError,
151
117
  r"incompatible matrix of shape \(3, 3\) given when copy constructing a matrix of shape \(2, 2\)$",
152
118
  ):
153
- wp.launch(
154
- kernel,
155
- dim=1,
156
- inputs=[],
157
- device=device,
158
- )
119
+ wp.launch(kernel, dim=1, inputs=[], device=device)
159
120
 
160
121
 
161
122
  def test_tpl_constructor_error_invalid_vector_count(test, device):
@@ -167,12 +128,7 @@ def test_tpl_constructor_error_invalid_vector_count(test, device):
167
128
  RuntimeError,
168
129
  r"incompatible number of column vectors given \(2\) when constructing a matrix of shape \(3, 3\)$",
169
130
  ):
170
- wp.launch(
171
- kernel,
172
- dim=1,
173
- inputs=[],
174
- device=device,
175
- )
131
+ wp.launch(kernel, dim=1, inputs=[], device=device)
176
132
 
177
133
 
178
134
  def test_tpl_constructor_error_invalid_vector_shape(test, device):
@@ -184,12 +140,7 @@ def test_tpl_constructor_error_invalid_vector_shape(test, device):
184
140
  RuntimeError,
185
141
  r"incompatible column vector lengths given when constructing a matrix of shape \(2, 2\)$",
186
142
  ):
187
- wp.launch(
188
- kernel,
189
- dim=1,
190
- inputs=[],
191
- device=device,
192
- )
143
+ wp.launch(kernel, dim=1, inputs=[], device=device)
193
144
 
194
145
 
195
146
  def test_tpl_constructor_error_invalid_arg_count(test, device):
@@ -201,12 +152,7 @@ def test_tpl_constructor_error_invalid_arg_count(test, device):
201
152
  RuntimeError,
202
153
  r"incompatible number of values given \(3\) when constructing a matrix of shape \(2, 2\)$",
203
154
  ):
204
- wp.launch(
205
- kernel,
206
- dim=1,
207
- inputs=[],
208
- device=device,
209
- )
155
+ wp.launch(kernel, dim=1, inputs=[], device=device)
210
156
 
211
157
 
212
158
  def test_py_arithmetic_ops(test, device, dtype):
@@ -541,16 +487,7 @@ def test_subtraction(test, device, dtype, register_kernels=False):
541
487
  wp.launch(
542
488
  kernel,
543
489
  dim=1,
544
- inputs=[
545
- s2,
546
- s3,
547
- s4,
548
- s5,
549
- v2,
550
- v3,
551
- v4,
552
- v5,
553
- ],
490
+ inputs=[s2, s3, s4, s5, v2, v3, v4, v5],
554
491
  outputs=[outcomponents],
555
492
  device=device,
556
493
  )
@@ -558,11 +495,11 @@ def test_subtraction(test, device, dtype, register_kernels=False):
558
495
  output_select_kernel, dim=1, inputs=[outcomponents, idx], outputs=[out], device=device
559
496
  )
560
497
  tape.backward(loss=out)
561
- expectedresult = np.zeros((dim, dim), dtype=dtype)
562
- expectedresult[i, j] = 2
563
- assert_np_equal(tape.gradients[in2].numpy()[0], expectedresult, tol=10 * tol)
564
- expectedresult[i, j] = -2
565
- assert_np_equal(tape.gradients[in1].numpy()[0], expectedresult, tol=10 * tol)
498
+ expected_result = np.zeros((dim, dim), dtype=dtype)
499
+ expected_result[i, j] = 2
500
+ assert_np_equal(tape.gradients[in2].numpy()[0], expected_result, tol=10 * tol)
501
+ expected_result[i, j] = -2
502
+ assert_np_equal(tape.gradients[in1].numpy()[0], expected_result, tol=10 * tol)
566
503
  tape.zero()
567
504
 
568
505
  idx = idx + 1
@@ -608,21 +545,7 @@ def test_determinant(test, device, dtype, register_kernels=False):
608
545
 
609
546
  tape = wp.Tape()
610
547
  with tape:
611
- wp.launch(
612
- kernel,
613
- dim=1,
614
- inputs=[
615
- v2,
616
- v3,
617
- v4,
618
- ],
619
- outputs=[
620
- det2,
621
- det3,
622
- det4,
623
- ],
624
- device=device,
625
- )
548
+ wp.launch(kernel, dim=1, inputs=[v2, v3, v4], outputs=[det2, det3, det4], device=device)
626
549
 
627
550
  if dtype in np_float_types:
628
551
  assert_np_equal(det2.numpy()[0], 2 * np.linalg.det(v2.numpy()[0].astype(np.float64)), tol=100 * tol)
@@ -658,16 +581,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
658
581
  wp.launch(
659
582
  kernel,
660
583
  dim=1,
661
- inputs=[
662
- wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
663
- v3,
664
- v4,
665
- ],
666
- outputs=[
667
- det2,
668
- det3,
669
- det4,
670
- ],
584
+ inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
585
+ outputs=[det2, det3, det4],
671
586
  device=device,
672
587
  )
673
588
  dplus = det2.numpy()[0]
@@ -675,16 +590,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
675
590
  wp.launch(
676
591
  kernel,
677
592
  dim=1,
678
- inputs=[
679
- wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device),
680
- v3,
681
- v4,
682
- ],
683
- outputs=[
684
- det2,
685
- det3,
686
- det4,
687
- ],
593
+ inputs=[wp.array(v2test, dtype=v2.dtype, requires_grad=True, device=device), v3, v4],
594
+ outputs=[det2, det3, det4],
688
595
  device=device,
689
596
  )
690
597
  dminus = det2.numpy()[0]
@@ -697,16 +604,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
697
604
  wp.launch(
698
605
  kernel,
699
606
  dim=1,
700
- inputs=[
701
- v2,
702
- wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
703
- v4,
704
- ],
705
- outputs=[
706
- det2,
707
- det3,
708
- det4,
709
- ],
607
+ inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
608
+ outputs=[det2, det3, det4],
710
609
  device=device,
711
610
  )
712
611
  dplus = det3.numpy()[0]
@@ -714,16 +613,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
714
613
  wp.launch(
715
614
  kernel,
716
615
  dim=1,
717
- inputs=[
718
- v2,
719
- wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device),
720
- v4,
721
- ],
722
- outputs=[
723
- det2,
724
- det3,
725
- det4,
726
- ],
616
+ inputs=[v2, wp.array(v3test, dtype=v3.dtype, requires_grad=True, device=device), v4],
617
+ outputs=[det2, det3, det4],
727
618
  device=device,
728
619
  )
729
620
  dminus = det3.numpy()[0]
@@ -736,16 +627,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
736
627
  wp.launch(
737
628
  kernel,
738
629
  dim=1,
739
- inputs=[
740
- v2,
741
- v3,
742
- wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
743
- ],
744
- outputs=[
745
- det2,
746
- det3,
747
- det4,
748
- ],
630
+ inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
631
+ outputs=[det2, det3, det4],
749
632
  device=device,
750
633
  )
751
634
  dplus = det4.numpy()[0]
@@ -753,16 +636,8 @@ def test_determinant(test, device, dtype, register_kernels=False):
753
636
  wp.launch(
754
637
  kernel,
755
638
  dim=1,
756
- inputs=[
757
- v2,
758
- v3,
759
- wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device),
760
- ],
761
- outputs=[
762
- det2,
763
- det3,
764
- det4,
765
- ],
639
+ inputs=[v2, v3, wp.array(v4test, dtype=v4.dtype, requires_grad=True, device=device)],
640
+ outputs=[det2, det3, det4],
766
641
  device=device,
767
642
  )
768
643
  dminus = det4.numpy()[0]
@@ -1722,8 +1597,9 @@ def test_matrix_mutation(expected: wp.types.matrix(shape=(10, 3), dtype=float)):
1722
1597
  wp.expect_eq(m, expected)
1723
1598
 
1724
1599
 
1725
- CONSTANT_SHAPE_ROWS = wp.constant(10)
1726
- CONSTANT_SHAPE_COLS = wp.constant(10)
1600
+ # NOTE: Compile tile is highly sensitive to shape so we use small values now
1601
+ CONSTANT_SHAPE_ROWS = wp.constant(2)
1602
+ CONSTANT_SHAPE_COLS = wp.constant(2)
1727
1603
 
1728
1604
 
1729
1605
  # tests that we can use global constants in shape keyword argument
@@ -1737,6 +1613,106 @@ def test_constructors_constant_shape():
1737
1613
  m[i, j] = float(i * j)
1738
1614
 
1739
1615
 
1616
+ Mat23 = wp.mat((2, 3), dtype=wp.float16)
1617
+
1618
+
1619
+ @wp.kernel
1620
+ def matrix_len_kernel(
1621
+ m1: wp.mat22, m2: wp.mat((3, 3), float), m3: wp.mat((Any, Any), float), m4: Mat23, out: wp.array(dtype=int)
1622
+ ):
1623
+ length = wp.static(len(m1))
1624
+ wp.expect_eq(len(m1), 2)
1625
+ out[0] = len(m1)
1626
+
1627
+ length = len(m2)
1628
+ wp.expect_eq(wp.static(len(m2)), 3)
1629
+ out[1] = len(m2)
1630
+
1631
+ length = len(m3)
1632
+ wp.expect_eq(len(m3), 4)
1633
+ out[2] = wp.static(len(m3))
1634
+
1635
+ length = wp.static(len(m4))
1636
+ wp.expect_eq(wp.static(len(m4)), 2)
1637
+ out[3] = wp.static(len(m4))
1638
+
1639
+ foo = wp.mat22()
1640
+ length = len(foo)
1641
+ wp.expect_eq(len(foo), 2)
1642
+ out[4] = len(foo)
1643
+
1644
+
1645
+ def test_matrix_len(test, device):
1646
+ m1 = wp.mat22()
1647
+ m2 = wp.mat33()
1648
+ m3 = wp.mat44()
1649
+ m4 = Mat23()
1650
+ out = wp.empty(5, dtype=int, device=device)
1651
+ wp.launch(matrix_len_kernel, dim=(1,), inputs=(m1, m2, m3, m4), outputs=(out,), device=device)
1652
+
1653
+ test.assertEqual(out.numpy()[0], 2)
1654
+ test.assertEqual(out.numpy()[1], 3)
1655
+ test.assertEqual(out.numpy()[2], 4)
1656
+ test.assertEqual(out.numpy()[3], 2)
1657
+ test.assertEqual(out.numpy()[4], 2)
1658
+
1659
+ test.assertEqual(len(m1), 2)
1660
+ test.assertEqual(len(m2), 3)
1661
+ test.assertEqual(len(m3), 4)
1662
+ test.assertEqual(len(m4), 2)
1663
+
1664
+
1665
+ @wp.kernel
1666
+ def matrix_augassign_kernel(
1667
+ a: wp.array(dtype=wp.mat22), b: wp.array(dtype=wp.mat22), c: wp.array(dtype=wp.mat22), d: wp.array(dtype=wp.mat22)
1668
+ ):
1669
+ i = wp.tid()
1670
+
1671
+ m1 = wp.mat22()
1672
+ m2 = b[i]
1673
+
1674
+ m1[0, 0] += m2[0, 0]
1675
+ m1[0, 1] += m2[0, 1]
1676
+ m1[1, 0] += m2[1, 0]
1677
+ m1[1, 1] += m2[1, 1]
1678
+
1679
+ a[i] = m1
1680
+
1681
+ m3 = wp.mat22()
1682
+ m4 = d[i]
1683
+
1684
+ m3[0, 0] -= m4[0, 0]
1685
+ m3[0, 1] -= m4[0, 1]
1686
+ m3[1, 0] -= m4[1, 0]
1687
+ m3[1, 1] -= m4[1, 1]
1688
+
1689
+ c[i] = m3
1690
+
1691
+
1692
+ def test_matrix_augassign(test, device):
1693
+ N = 3
1694
+
1695
+ a = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1696
+ b = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1697
+
1698
+ c = wp.zeros(N, dtype=wp.mat22, requires_grad=True)
1699
+ d = wp.ones(N, dtype=wp.mat22, requires_grad=True)
1700
+
1701
+ tape = wp.Tape()
1702
+ with tape:
1703
+ wp.launch(matrix_augassign_kernel, N, inputs=[a, b, c, d])
1704
+
1705
+ tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
1706
+
1707
+ assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
1708
+ assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
1709
+ assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
1710
+
1711
+ assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
1712
+ assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
1713
+ assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
1714
+
1715
+
1740
1716
  devices = get_test_devices()
1741
1717
 
1742
1718
 
@@ -1797,16 +1773,10 @@ add_function_test(
1797
1773
  devices=devices,
1798
1774
  )
1799
1775
  add_function_test(
1800
- TestMat,
1801
- "test_anon_constructor_error_shape_mismatch",
1802
- test_anon_constructor_error_shape_mismatch,
1803
- devices=devices,
1776
+ TestMat, "test_anon_constructor_error_shape_mismatch", test_anon_constructor_error_shape_mismatch, devices=devices
1804
1777
  )
1805
1778
  add_function_test(
1806
- TestMat,
1807
- "test_anon_constructor_error_type_mismatch",
1808
- test_anon_constructor_error_type_mismatch,
1809
- devices=devices,
1779
+ TestMat, "test_anon_constructor_error_type_mismatch", test_anon_constructor_error_type_mismatch, devices=devices
1810
1780
  )
1811
1781
  add_function_test(
1812
1782
  TestMat,
@@ -1875,7 +1845,8 @@ for dtype in np_float_types:
1875
1845
  devices=devices,
1876
1846
  dtype=dtype,
1877
1847
  )
1878
-
1848
+ add_function_test(TestMat, "test_matrix_len", test_matrix_len, devices=devices)
1849
+ add_function_test(TestMat, "test_matrix_augassign", test_matrix_augassign, devices=devices)
1879
1850
 
1880
1851
  if __name__ == "__main__":
1881
1852
  wp.clear_kernel_cache()
warp/tests/test_math.py CHANGED
@@ -6,7 +6,7 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
- from typing import NamedTuple
9
+ from typing import Any, NamedTuple
10
10
 
11
11
  import numpy as np
12
12
 
@@ -50,6 +50,51 @@ def test_scalar_math(test, device):
50
50
  assert_np_equal(tape.gradients[x].numpy(), np.array([adj_float_results_expected[i]]), tol=1e-6)
51
51
 
52
52
 
53
+ @wp.kernel
54
+ def test_vec_norm_kernel(vs: wp.array(dtype=Any), out: wp.array(dtype=float, ndim=2)):
55
+ tid = wp.tid()
56
+ out[tid, 0] = wp.norm_l1(vs[tid])
57
+ out[tid, 1] = wp.norm_l2(vs[tid])
58
+ out[tid, 2] = wp.norm_huber(vs[tid])
59
+ out[tid, 3] = wp.norm_pseudo_huber(vs[tid])
60
+
61
+
62
+ def test_vec_norm(test, device):
63
+ # ground-truth implementations from SciPy
64
+ def huber(delta, x):
65
+ if x <= delta:
66
+ return 0.5 * x**2
67
+ else:
68
+ return delta * (x - 0.5 * delta)
69
+
70
+ def pseudo_huber(delta, x):
71
+ return delta**2 * (np.sqrt(1 + (x / delta) ** 2) - 1)
72
+
73
+ v0 = wp.vec3(-2.0, -1.0, -3.0)
74
+ v1 = wp.vec3(2.0, 1.0, 3.0)
75
+ v2 = wp.vec3(0.0, 0.0, 0.0)
76
+
77
+ xs = wp.array([v0, v1, v2], dtype=wp.vec3, requires_grad=True, device=device)
78
+ out = wp.empty((len(xs), 4), dtype=wp.float32, requires_grad=True, device=device)
79
+
80
+ wp.launch(test_vec_norm_kernel, dim=len(xs), inputs=[xs], outputs=[out], device=device)
81
+
82
+ for i, x in enumerate([v0, v1, v2]):
83
+ assert_np_equal(
84
+ out.numpy()[i],
85
+ np.array(
86
+ [
87
+ np.linalg.norm(x, ord=1),
88
+ np.linalg.norm(x, ord=2),
89
+ huber(1.0, wp.length(x)),
90
+ # note SciPy defines the Pseudo-Huber loss slightly differently
91
+ pseudo_huber(1.0, wp.length(x)) + 1.0,
92
+ ]
93
+ ),
94
+ tol=1e-6,
95
+ )
96
+
97
+
53
98
  devices = get_test_devices()
54
99
 
55
100
 
@@ -117,6 +162,7 @@ class TestMath(unittest.TestCase):
117
162
 
118
163
 
119
164
  add_function_test(TestMath, "test_scalar_math", test_scalar_math, devices=devices)
165
+ add_function_test(TestMath, "test_vec_norm", test_vec_norm, devices=devices)
120
166
 
121
167
 
122
168
  if __name__ == "__main__":
warp/tests/test_matmul.py CHANGED
@@ -5,6 +5,7 @@
5
5
  # distribution of this software and related documentation without an express
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
+ import itertools
8
9
  import unittest
9
10
  from typing import Any
10
11
 
@@ -105,19 +106,15 @@ class gemm_test_bed_runner:
105
106
  assert_np_equal(C.grad.numpy(), adj_C_np)
106
107
 
107
108
  def run(self):
108
- Ms = [64, 128, 256]
109
- Ns = [64, 128, 256]
110
- Ks = [64, 128, 256]
109
+ Ms = [16, 32, 64]
110
+ Ns = [16, 32, 64]
111
+ Ks = [16, 32, 64]
111
112
  batch_counts = [1, 4]
112
113
  betas = [0.0, 1.0]
113
114
  alpha = 1.0
114
115
 
115
- for batch_count in batch_counts:
116
- for m in Ms:
117
- for n in Ns:
118
- for k in Ks:
119
- for beta in betas:
120
- self.run_and_verify(m, n, k, batch_count, alpha, beta)
116
+ for batch_count, m, n, k, beta in itertools.product(batch_counts, Ms, Ns, Ks, betas):
117
+ self.run_and_verify(m, n, k, batch_count, alpha, beta)
121
118
 
122
119
 
123
120
  class gemm_test_bed_runner_transpose:
@@ -488,13 +485,17 @@ class TestMatmul(unittest.TestCase):
488
485
 
489
486
 
490
487
  # add_function_test(TestMatmul, "test_f16", test_f16, devices=devices)
491
- add_function_test(TestMatmul, "test_f32", test_f32, devices=devices)
492
- add_function_test(TestMatmul, "test_f64", test_f64, devices=devices)
493
- add_function_test(TestMatmul, "test_tape", test_tape, devices=devices)
494
- add_function_test(TestMatmul, "test_operator", test_operator, devices=devices)
495
- add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices)
496
- add_function_test(TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices)
497
- add_function_test(TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices)
488
+ add_function_test(TestMatmul, "test_f32", test_f32, devices=devices, check_output=False)
489
+ add_function_test(TestMatmul, "test_f64", test_f64, devices=devices, check_output=False)
490
+ add_function_test(TestMatmul, "test_tape", test_tape, devices=devices, check_output=False)
491
+ add_function_test(TestMatmul, "test_operator", test_operator, devices=devices, check_output=False)
492
+ add_function_test(TestMatmul, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
493
+ add_function_test(
494
+ TestMatmul, "test_adjoint_accumulation", test_adjoint_accumulation, devices=devices, check_output=False
495
+ )
496
+ add_function_test(
497
+ TestMatmul, "test_cuda_graph_capture", test_cuda_graph_capture, devices=cuda_devices, check_output=False
498
+ )
498
499
 
499
500
 
500
501
  if __name__ == "__main__":
@@ -102,19 +102,14 @@ class gemm_test_bed_runner:
102
102
  assert_np_equal(C.grad.numpy(), adj_C_np)
103
103
 
104
104
  def run(self):
105
- Ms = [8]
106
- Ns = [16]
107
- Ks = [32]
108
- batch_counts = [1]
109
- betas = [1.0]
105
+ m = 8
106
+ n = 16
107
+ k = 32
108
+ batch_count = 1
109
+ beta = 1.0
110
110
  alpha = 1.0
111
111
 
112
- for batch_count in batch_counts:
113
- for m in Ms:
114
- for n in Ns:
115
- for k in Ks:
116
- for beta in betas:
117
- self.run_and_verify(m, n, k, batch_count, alpha, beta)
112
+ self.run_and_verify(m, n, k, batch_count, alpha, beta)
118
113
 
119
114
 
120
115
  class gemm_test_bed_runner_transpose:
@@ -397,10 +392,10 @@ class TestMatmulLite(unittest.TestCase):
397
392
  pass
398
393
 
399
394
 
400
- add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices)
401
- add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices)
402
- add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices)
403
- add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices)
395
+ add_function_test(TestMatmulLite, "test_f32", test_f32, devices=devices, check_output=False)
396
+ add_function_test(TestMatmulLite, "test_tape", test_tape, devices=devices, check_output=False)
397
+ add_function_test(TestMatmulLite, "test_operator", test_operator, devices=devices, check_output=False)
398
+ add_function_test(TestMatmulLite, "test_large_batch_count", test_large_batch_count, devices=devices, check_output=False)
404
399
 
405
400
 
406
401
  if __name__ == "__main__":