warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1076 -480
  8. warp/codegen.py +240 -119
  9. warp/config.py +1 -1
  10. warp/context.py +298 -84
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth_self_contact.py +260 -0
  27. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  28. warp/examples/sim/example_jacobian_ik.py +0 -2
  29. warp/examples/sim/example_quadruped.py +5 -2
  30. warp/examples/tile/example_tile_cholesky.py +79 -0
  31. warp/examples/tile/example_tile_convolution.py +2 -2
  32. warp/examples/tile/example_tile_fft.py +2 -2
  33. warp/examples/tile/example_tile_filtering.py +3 -3
  34. warp/examples/tile/example_tile_matmul.py +4 -4
  35. warp/examples/tile/example_tile_mlp.py +12 -12
  36. warp/examples/tile/example_tile_nbody.py +180 -0
  37. warp/examples/tile/example_tile_walker.py +319 -0
  38. warp/math.py +147 -0
  39. warp/native/array.h +12 -0
  40. warp/native/builtin.h +0 -1
  41. warp/native/bvh.cpp +149 -70
  42. warp/native/bvh.cu +287 -68
  43. warp/native/bvh.h +195 -85
  44. warp/native/clang/clang.cpp +5 -1
  45. warp/native/cuda_util.cpp +35 -0
  46. warp/native/cuda_util.h +5 -0
  47. warp/native/exports.h +40 -40
  48. warp/native/intersect.h +17 -0
  49. warp/native/mat.h +41 -0
  50. warp/native/mathdx.cpp +19 -0
  51. warp/native/mesh.cpp +25 -8
  52. warp/native/mesh.cu +153 -101
  53. warp/native/mesh.h +482 -403
  54. warp/native/quat.h +40 -0
  55. warp/native/solid_angle.h +7 -0
  56. warp/native/sort.cpp +85 -0
  57. warp/native/sort.cu +34 -0
  58. warp/native/sort.h +3 -1
  59. warp/native/spatial.h +11 -0
  60. warp/native/tile.h +1185 -664
  61. warp/native/tile_reduce.h +8 -6
  62. warp/native/vec.h +41 -0
  63. warp/native/warp.cpp +8 -1
  64. warp/native/warp.cu +263 -40
  65. warp/native/warp.h +19 -5
  66. warp/optim/linear.py +22 -4
  67. warp/render/render_opengl.py +124 -59
  68. warp/sim/__init__.py +6 -1
  69. warp/sim/collide.py +270 -26
  70. warp/sim/integrator_euler.py +25 -7
  71. warp/sim/integrator_featherstone.py +154 -35
  72. warp/sim/integrator_vbd.py +842 -40
  73. warp/sim/model.py +111 -53
  74. warp/stubs.py +248 -115
  75. warp/tape.py +28 -30
  76. warp/tests/aux_test_module_unload.py +15 -0
  77. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  78. warp/tests/test_array.py +74 -0
  79. warp/tests/test_assert.py +242 -0
  80. warp/tests/test_codegen.py +14 -61
  81. warp/tests/test_collision.py +2 -2
  82. warp/tests/test_examples.py +9 -0
  83. warp/tests/test_grad_debug.py +87 -2
  84. warp/tests/test_hash_grid.py +1 -1
  85. warp/tests/test_ipc.py +116 -0
  86. warp/tests/test_mat.py +138 -167
  87. warp/tests/test_math.py +47 -1
  88. warp/tests/test_matmul.py +11 -7
  89. warp/tests/test_matmul_lite.py +4 -4
  90. warp/tests/test_mesh.py +84 -60
  91. warp/tests/test_mesh_query_aabb.py +165 -0
  92. warp/tests/test_mesh_query_point.py +328 -286
  93. warp/tests/test_mesh_query_ray.py +134 -121
  94. warp/tests/test_mlp.py +2 -2
  95. warp/tests/test_operators.py +43 -0
  96. warp/tests/test_overwrite.py +2 -2
  97. warp/tests/test_quat.py +77 -0
  98. warp/tests/test_reload.py +29 -0
  99. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  100. warp/tests/test_static.py +16 -0
  101. warp/tests/test_tape.py +25 -0
  102. warp/tests/test_tile.py +134 -191
  103. warp/tests/test_tile_load.py +356 -0
  104. warp/tests/test_tile_mathdx.py +61 -8
  105. warp/tests/test_tile_mlp.py +17 -17
  106. warp/tests/test_tile_reduce.py +24 -18
  107. warp/tests/test_tile_shared_memory.py +66 -17
  108. warp/tests/test_tile_view.py +165 -0
  109. warp/tests/test_torch.py +35 -0
  110. warp/tests/test_utils.py +36 -24
  111. warp/tests/test_vec.py +110 -0
  112. warp/tests/unittest_suites.py +29 -4
  113. warp/tests/unittest_utils.py +30 -11
  114. warp/thirdparty/unittest_parallel.py +2 -2
  115. warp/types.py +409 -99
  116. warp/utils.py +9 -5
  117. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
  118. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
  119. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  120. warp/examples/benchmarks/benchmark_tile.py +0 -179
  121. warp/native/tile_gemm.h +0 -341
  122. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  123. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
@@ -22,11 +22,11 @@ def test_tile_shared_mem_size(test, device):
22
22
 
23
23
  @wp.kernel
24
24
  def compute(out: wp.array2d(dtype=float)):
25
- a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
26
- b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
25
+ a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
26
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
27
27
 
28
28
  c = a + b
29
- wp.tile_store(out, 0, 0, c)
29
+ wp.tile_store(out, c)
30
30
 
31
31
  out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
32
32
 
@@ -58,11 +58,11 @@ def test_tile_shared_mem_large(test, device):
58
58
  # we disable backward kernel gen since 128k is not supported on most architectures
59
59
  @wp.kernel(enable_backward=False)
60
60
  def compute(out: wp.array2d(dtype=float)):
61
- a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
62
- b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
61
+ a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
62
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
63
63
 
64
64
  c = a + b
65
- wp.tile_store(out, 0, 0, c)
65
+ wp.tile_store(out, c)
66
66
 
67
67
  out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
68
68
 
@@ -73,7 +73,7 @@ def test_tile_shared_mem_large(test, device):
73
73
 
74
74
  # check required shared memory
75
75
  expected_forward_bytes = DIM_M * DIM_N * 4 * 2
76
- expected_backward_bytes = expected_forward_bytes * 2
76
+ expected_backward_bytes = 0
77
77
 
78
78
  assert expected_forward_bytes == 2**16
79
79
 
@@ -94,11 +94,11 @@ def test_tile_shared_mem_graph(test, device):
94
94
 
95
95
  @wp.kernel
96
96
  def compute(out: wp.array2d(dtype=float)):
97
- a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
98
- b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
97
+ a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
98
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
99
99
 
100
100
  c = a + b
101
- wp.tile_store(out, 0, 0, c)
101
+ wp.tile_store(out, c)
102
102
 
103
103
  out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
104
104
 
@@ -127,22 +127,25 @@ def test_tile_shared_mem_graph(test, device):
127
127
 
128
128
  # checks that stack allocations work for user functions
129
129
  def test_tile_shared_mem_func(test, device):
130
- DIM_M = 32
131
- DIM_N = 32
130
+ DIM_M = 64
131
+ DIM_N = 64
132
+
133
+ SMALL_DIM_M = 64 // 4
134
+ SMALL_DIM_N = 64 // 4
132
135
 
133
136
  BLOCK_DIM = 256
134
137
 
135
138
  @wp.func
136
139
  def add_tile_small():
137
- a = wp.tile_ones(16, 16, dtype=float, storage="shared")
138
- b = wp.tile_ones(16, 16, dtype=float, storage="shared") * 2.0
140
+ a = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared")
141
+ b = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared") * 2.0
139
142
 
140
143
  return a + b
141
144
 
142
145
  @wp.func
143
146
  def add_tile_big():
144
- a = wp.tile_ones(64, 64, dtype=float, storage="shared")
145
- b = wp.tile_ones(64, 64, dtype=float, storage="shared") * 2.0
147
+ a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
148
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
146
149
 
147
150
  return a + b
148
151
 
@@ -151,7 +154,7 @@ def test_tile_shared_mem_func(test, device):
151
154
  s = add_tile_small()
152
155
  b = add_tile_big()
153
156
 
154
- wp.tile_store(out, 0, 0, b)
157
+ wp.tile_store(out, b)
155
158
 
156
159
  out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
157
160
 
@@ -168,6 +171,51 @@ def test_tile_shared_mem_func(test, device):
168
171
  assert hooks.backward_smem_bytes == expected_required_shared * 2
169
172
 
170
173
 
174
+ def round_up(a, b):
175
+ return b * ((a + b - 1) // b)
176
+
177
+
178
+ # checks that using non-16B aligned sizes work
179
+ def test_tile_shared_non_aligned(test, device):
180
+ # Tile size = 4 (float) * 1 * 3 = 12B % 16 != 0
181
+ DIM_M = 1
182
+ DIM_N = 3
183
+
184
+ BLOCK_DIM = 256
185
+
186
+ @wp.func
187
+ def foo():
188
+ a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
189
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 3.0
190
+ return a + b
191
+
192
+ @wp.kernel
193
+ def compute(out: wp.array2d(dtype=float)):
194
+ # This test the logic in the stack allocator, which should increment and
195
+ # decrement the stack pointer each time foo() is called
196
+ # Failing to do so correct will make b out of bounds and corrupt the results
197
+ for _ in range(4096):
198
+ foo()
199
+ b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
200
+ wp.tile_store(out, b)
201
+
202
+ out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
203
+
204
+ wp.launch_tiled(compute, dim=[1], inputs=[out], block_dim=BLOCK_DIM, device=device)
205
+
206
+ assert_np_equal(out.numpy(), np.ones((DIM_M, DIM_N), dtype=float))
207
+
208
+ # check shared memory for kernel on the device
209
+ module_exec = compute.module.load(device, BLOCK_DIM)
210
+ hooks = module_exec.get_kernel_hooks(compute)
211
+
212
+ # ensure that total required dynamic shared is the larger of the two tiles
213
+ expected_required_shared = 3 * round_up(DIM_M * DIM_N * 4, 16)
214
+
215
+ assert hooks.forward_smem_bytes == expected_required_shared
216
+ assert hooks.backward_smem_bytes == expected_required_shared * 2
217
+
218
+
171
219
  devices = get_cuda_test_devices()
172
220
 
173
221
 
@@ -183,6 +231,7 @@ add_function_test(
183
231
  )
184
232
  add_function_test(TestTileSharedMemory, "test_tile_shared_mem_graph", test_tile_shared_mem_graph, devices=devices)
185
233
  add_function_test(TestTileSharedMemory, "test_tile_shared_mem_func", test_tile_shared_mem_func, devices=devices)
234
+ add_function_test(TestTileSharedMemory, "test_tile_shared_non_aligned", test_tile_shared_non_aligned, devices=devices)
186
235
 
187
236
 
188
237
  if __name__ == "__main__":
@@ -0,0 +1,165 @@
1
+ # Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
2
+ # NVIDIA CORPORATION and its licensors retain all intellectual property
3
+ # and proprietary rights in and to this software, related documentation
4
+ # and any modifications thereto. Any use, reproduction, disclosure or
5
+ # distribution of this software and related documentation without an express
6
+ # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
+
8
+ import unittest
9
+
10
+ import numpy as np
11
+
12
+ import warp as wp
13
+ from warp.tests.unittest_utils import *
14
+
15
+ TILE_DIM = 64
16
+ TILE_M = 16
17
+ TILE_N = 32
18
+ TILE_O = 8
19
+
20
+
21
+ @wp.kernel
22
+ def test_tile_view_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
23
+ # load whole source into local memory
24
+ a = wp.tile_load(src, shape=(TILE_M, TILE_N))
25
+
26
+ # copy the source array row by row
27
+ for i in range(TILE_M):
28
+ # create a view on original array and store
29
+ row = a[i]
30
+ wp.tile_store(dst[i], row)
31
+
32
+
33
+ def test_tile_view(test, device):
34
+ rng = np.random.default_rng(42)
35
+
36
+ a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
37
+ b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
38
+
39
+ with wp.Tape() as tape:
40
+ wp.launch_tiled(test_tile_view_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
41
+
42
+ assert_np_equal(b.numpy(), a.numpy())
43
+ b.grad = wp.ones_like(b, device=device)
44
+ tape.backward()
45
+
46
+ assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
47
+
48
+
49
+ @wp.kernel
50
+ def test_tile_assign_1d_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
51
+ # load whole source into local memory
52
+ a = wp.tile_load(src, shape=(TILE_M, TILE_N))
53
+ b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N))
54
+
55
+ # copy the source array row by row
56
+ for i in range(int(TILE_M)):
57
+ # create views onto source and dest rows
58
+ row_src = a[i]
59
+ row_dst = b[i]
60
+
61
+ # copy onto dest row
62
+ wp.tile_assign(row_dst, row_src)
63
+
64
+ wp.tile_store(dst, b)
65
+
66
+
67
+ def test_tile_assign_1d(test, device):
68
+ rng = np.random.default_rng(42)
69
+
70
+ a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
71
+ b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
72
+
73
+ with wp.Tape() as tape:
74
+ wp.launch_tiled(test_tile_assign_1d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
75
+
76
+ assert_np_equal(b.numpy(), a.numpy())
77
+ b.grad = wp.ones_like(b, device=device)
78
+ tape.backward()
79
+
80
+ assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
81
+
82
+
83
+ @wp.kernel
84
+ def test_tile_assign_2d_kernel(src: wp.array3d(dtype=float), dst: wp.array3d(dtype=float)):
85
+ # load whole source into local memory
86
+ a = wp.tile_load(src, shape=(TILE_M, TILE_N, TILE_O))
87
+ b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N, TILE_O))
88
+
89
+ # copy the source array slice by slice
90
+ for i in range(TILE_M):
91
+ # create views onto source and dest slice
92
+ row_src = a[i]
93
+ row_dst = b[i]
94
+
95
+ # copy onto dest slice
96
+ wp.tile_assign(row_dst, row_src)
97
+
98
+ wp.tile_store(dst, b)
99
+
100
+
101
+ def test_tile_assign_2d(test, device):
102
+ rng = np.random.default_rng(42)
103
+
104
+ a = wp.array(rng.random((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
105
+ b = wp.array(np.zeros((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
106
+
107
+ with wp.Tape() as tape:
108
+ wp.launch_tiled(test_tile_assign_2d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
109
+
110
+ assert_np_equal(b.numpy(), a.numpy())
111
+ b.grad = wp.ones_like(b, device=device)
112
+ tape.backward()
113
+
114
+ assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
115
+
116
+
117
+ @wp.kernel
118
+ def test_tile_view_offset_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
119
+ # load whole source into local memory
120
+ a = wp.tile_load(src, shape=(TILE_M, TILE_N))
121
+ b = wp.tile_zeros(shape=(TILE_M, TILE_N), dtype=float)
122
+
123
+ # copy the source array slice by slice
124
+ for i in range(TILE_M // 4):
125
+ # create views onto source and dest slice 4 rows at a time
126
+ v = wp.tile_view(a, offset=(i * 4, 0), shape=(4, TILE_N))
127
+
128
+ # copy onto dest slice
129
+ wp.tile_assign(b, v, offset=(i * 4, 0))
130
+
131
+ wp.tile_store(dst, b)
132
+
133
+
134
+ def test_tile_view_offset(test, device):
135
+ rng = np.random.default_rng(42)
136
+
137
+ a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
138
+ b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
139
+
140
+ with wp.Tape() as tape:
141
+ wp.launch_tiled(test_tile_view_offset_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
142
+
143
+ assert_np_equal(b.numpy(), a.numpy())
144
+ b.grad = wp.ones_like(b, device=device)
145
+ tape.backward()
146
+
147
+ assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
148
+
149
+
150
+ devices = get_cuda_test_devices()
151
+
152
+
153
+ class TestTileView(unittest.TestCase):
154
+ pass
155
+
156
+
157
+ add_function_test(TestTileView, "test_tile_view", test_tile_view, devices=devices)
158
+ add_function_test(TestTileView, "test_tile_view_offset", test_tile_view_offset, devices=devices)
159
+ add_function_test(TestTileView, "test_tile_assign_1d", test_tile_assign_1d, devices=devices)
160
+ add_function_test(TestTileView, "test_tile_assign_2d", test_tile_assign_2d, devices=devices)
161
+
162
+
163
+ if __name__ == "__main__":
164
+ wp.clear_kernel_cache()
165
+ unittest.main(verbosity=2, failfast=True)
warp/tests/test_torch.py CHANGED
@@ -403,6 +403,38 @@ def test_cuda_array_interface(test, device):
403
403
  assert a1.strides == a2.strides
404
404
 
405
405
 
406
+ @wp.kernel
407
+ def vec_sum_kernel(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3), z: wp.array(dtype=wp.vec3)):
408
+ tid = wp.tid()
409
+ z[tid] = x[tid] + y[tid]
410
+
411
+
412
+ # ensure torch arrays passed to Warp kernels are unchanged by Tape.backward()
413
+ def test_tensor_in_warp_kernel(test, device):
414
+ torch_device = wp.device_to_torch(device)
415
+
416
+ x = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
417
+ y = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
418
+ wp_y = wp.from_torch(y, dtype=wp.vec3, requires_grad=True)
419
+ z = torch.zeros((10, 3), dtype=torch.float32, device=torch_device)
420
+ wp_z = wp.from_torch(z, dtype=wp.vec3, requires_grad=True)
421
+
422
+ tape = wp.Tape()
423
+
424
+ with tape:
425
+ wp.launch(vec_sum_kernel, dim=10, inputs=[x, wp_y], outputs=[wp_z], device=device)
426
+
427
+ assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
428
+
429
+ tape.backward(grads={wp_z: wp.ones_like(wp_z)})
430
+
431
+ # x is unchanged by Tape.backward()
432
+ assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
433
+
434
+ # we can still compute the gradient of y because Warp created an array for it
435
+ assert_np_equal(y.grad.cpu().numpy(), np.ones((10, 3), dtype=float))
436
+
437
+
406
438
  def test_to_torch(test, device):
407
439
  import torch
408
440
 
@@ -913,6 +945,9 @@ try:
913
945
  add_function_test(TestTorch, "test_torch_zerocopy", test_torch_zerocopy, devices=torch_compatible_devices)
914
946
  add_function_test(TestTorch, "test_torch_autograd", test_torch_autograd, devices=torch_compatible_devices)
915
947
  add_function_test(TestTorch, "test_direct", test_direct, devices=torch_compatible_devices)
948
+ add_function_test(
949
+ TestTorch, "test_tensor_in_warp_kernel", test_tensor_in_warp_kernel, devices=torch_compatible_devices
950
+ )
916
951
 
917
952
  if torch_compatible_cuda_devices:
918
953
  add_function_test(
warp/tests/test_utils.py CHANGED
@@ -79,37 +79,49 @@ def test_array_scan_error_unsupported_dtype(test, device):
79
79
 
80
80
 
81
81
  def test_radix_sort_pairs(test, device):
82
- keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
83
- values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
84
- wp.utils.radix_sort_pairs(keys, values, 8)
85
- assert_np_equal(keys.numpy()[:8], np.array((1, 2, 3, 4, 5, 6, 7, 8)))
86
- assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
82
+ keyTypes = [int, wp.float32]
83
+
84
+ for keyType in keyTypes:
85
+ keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=keyType, device=device)
86
+ values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
87
+ wp.utils.radix_sort_pairs(keys, values, 8)
88
+ assert_np_equal(keys.numpy()[:8], np.array((1, 2, 3, 4, 5, 6, 7, 8)))
89
+ assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
87
90
 
88
91
 
89
92
  def test_radix_sort_pairs_empty(test, device):
90
- keys = wp.array((), dtype=int, device=device)
91
- values = wp.array((), dtype=int, device=device)
92
- wp.utils.radix_sort_pairs(keys, values, 0)
93
+ keyTypes = [int, wp.float32]
94
+
95
+ for keyType in keyTypes:
96
+ keys = wp.array((), dtype=keyType, device=device)
97
+ values = wp.array((), dtype=int, device=device)
98
+ wp.utils.radix_sort_pairs(keys, values, 0)
93
99
 
94
100
 
95
101
  def test_radix_sort_pairs_error_insufficient_storage(test, device):
96
- keys = wp.array((1, 2, 3), dtype=int, device=device)
97
- values = wp.array((1, 2, 3), dtype=int, device=device)
98
- with test.assertRaisesRegex(
99
- RuntimeError,
100
- r"Array storage must be large enough to contain 2\*count elements$",
101
- ):
102
- wp.utils.radix_sort_pairs(keys, values, 3)
102
+ keyTypes = [int, wp.float32]
103
+
104
+ for keyType in keyTypes:
105
+ keys = wp.array((1, 2, 3), dtype=keyType, device=device)
106
+ values = wp.array((1, 2, 3), dtype=int, device=device)
107
+ with test.assertRaisesRegex(
108
+ RuntimeError,
109
+ r"Array storage must be large enough to contain 2\*count elements$",
110
+ ):
111
+ wp.utils.radix_sort_pairs(keys, values, 3)
103
112
 
104
113
 
105
114
  def test_radix_sort_pairs_error_unsupported_dtype(test, device):
106
- keys = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
107
- values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
108
- with test.assertRaisesRegex(
109
- RuntimeError,
110
- r"Unsupported data type$",
111
- ):
112
- wp.utils.radix_sort_pairs(keys, values, 1)
115
+ keyTypes = [int, wp.float32]
116
+
117
+ for keyType in keyTypes:
118
+ keys = wp.array((1.0, 2.0, 3.0), dtype=keyType, device=device)
119
+ values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
120
+ with test.assertRaisesRegex(
121
+ RuntimeError,
122
+ r"Unsupported data type$",
123
+ ):
124
+ wp.utils.radix_sort_pairs(keys, values, 1)
113
125
 
114
126
 
115
127
  def test_array_sum(test, device):
@@ -268,7 +280,7 @@ class TestUtils(unittest.TestCase):
268
280
  wp.utils.warn("hello, world!")
269
281
  wp.utils.warn("hello, world!")
270
282
 
271
- expected = "Warp UserWarning: hello, world!\n" "Warp UserWarning: hello, world!\n"
283
+ expected = "Warp UserWarning: hello, world!\nWarp UserWarning: hello, world!\n"
272
284
 
273
285
  self.assertEqual(f.getvalue(), expected)
274
286
 
@@ -308,7 +320,7 @@ class TestUtils(unittest.TestCase):
308
320
  wp.utils.warn("foo", category=DeprecationWarning)
309
321
  wp.utils.warn("bar", category=DeprecationWarning)
310
322
 
311
- expected = "Warp DeprecationWarning: foo\n" "Warp DeprecationWarning: bar\n"
323
+ expected = "Warp DeprecationWarning: foo\nWarp DeprecationWarning: bar\n"
312
324
 
313
325
  self.assertEqual(f.getvalue(), expected)
314
326
 
warp/tests/test_vec.py CHANGED
@@ -6,6 +6,7 @@
6
6
  # license agreement from NVIDIA CORPORATION is strictly prohibited.
7
7
 
8
8
  import unittest
9
+ from typing import Any
9
10
 
10
11
  import numpy as np
11
12
 
@@ -1240,6 +1241,103 @@ def test_constructors_constant_length():
1240
1241
  v[i] = float(i)
1241
1242
 
1242
1243
 
1244
+ Vec123 = wp.vec(123, dtype=wp.float16)
1245
+
1246
+
1247
+ @wp.kernel
1248
+ def vector_len_kernel(
1249
+ v1: wp.vec2,
1250
+ v2: wp.vec(3, float),
1251
+ v3: wp.vec(Any, float),
1252
+ v4: Vec123,
1253
+ out: wp.array(dtype=int),
1254
+ ):
1255
+ length = wp.static(len(v1))
1256
+ wp.expect_eq(len(v1), 2)
1257
+ out[0] = len(v1)
1258
+
1259
+ length = len(v2)
1260
+ wp.expect_eq(wp.static(len(v2)), 3)
1261
+ out[1] = len(v2)
1262
+
1263
+ length = len(v3)
1264
+ wp.expect_eq(len(v3), 4)
1265
+ out[2] = wp.static(len(v3))
1266
+
1267
+ length = wp.static(len(v4))
1268
+ wp.expect_eq(wp.static(len(v4)), 123)
1269
+ out[3] = wp.static(len(v4))
1270
+
1271
+ foo = wp.vec2()
1272
+ length = len(foo)
1273
+ wp.expect_eq(len(foo), 2)
1274
+ out[4] = len(foo)
1275
+
1276
+
1277
+ def test_vector_len(test, device):
1278
+ v1 = wp.vec2()
1279
+ v2 = wp.vec3()
1280
+ v3 = wp.vec4()
1281
+ v4 = Vec123()
1282
+ out = wp.empty(5, dtype=int, device=device)
1283
+ wp.launch(vector_len_kernel, dim=(1,), inputs=(v1, v2, v3, v4), outputs=(out,), device=device)
1284
+
1285
+ test.assertEqual(out.numpy()[0], 2)
1286
+ test.assertEqual(out.numpy()[1], 3)
1287
+ test.assertEqual(out.numpy()[2], 4)
1288
+ test.assertEqual(out.numpy()[3], 123)
1289
+ test.assertEqual(out.numpy()[4], 2)
1290
+
1291
+
1292
+ @wp.kernel
1293
+ def vector_augassign_kernel(
1294
+ a: wp.array(dtype=wp.vec3), b: wp.array(dtype=wp.vec3), c: wp.array(dtype=wp.vec3), d: wp.array(dtype=wp.vec3)
1295
+ ):
1296
+ i = wp.tid()
1297
+
1298
+ v1 = wp.vec3()
1299
+ v2 = b[i]
1300
+
1301
+ v1[0] += v2[0]
1302
+ v1[1] += v2[1]
1303
+ v1[2] += v2[2]
1304
+
1305
+ a[i] = v1
1306
+
1307
+ v3 = wp.vec3()
1308
+ v4 = d[i]
1309
+
1310
+ v3[0] -= v4[0]
1311
+ v3[1] -= v4[1]
1312
+ v3[2] -= v4[2]
1313
+
1314
+ c[i] = v3
1315
+
1316
+
1317
+ def test_vector_augassign(test, device):
1318
+ N = 3
1319
+
1320
+ a = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
1321
+ b = wp.ones(N, dtype=wp.vec3, requires_grad=True)
1322
+
1323
+ c = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
1324
+ d = wp.ones(N, dtype=wp.vec3, requires_grad=True)
1325
+
1326
+ tape = wp.Tape()
1327
+ with tape:
1328
+ wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
1329
+
1330
+ tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
1331
+
1332
+ assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
1333
+ assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
1334
+ assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
1335
+
1336
+ assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
1337
+ assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
1338
+ assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
1339
+
1340
+
1243
1341
  devices = get_test_devices()
1244
1342
 
1245
1343
 
@@ -1350,6 +1448,18 @@ add_function_test(
1350
1448
  test_tpl_constructor_error_numeric_args_mismatch,
1351
1449
  devices=devices,
1352
1450
  )
1451
+ add_function_test(
1452
+ TestVec,
1453
+ "test_vector_len",
1454
+ test_vector_len,
1455
+ devices=devices,
1456
+ )
1457
+ add_function_test(
1458
+ TestVec,
1459
+ "test_vector_augassign",
1460
+ test_vector_augassign,
1461
+ devices=devices,
1462
+ )
1353
1463
 
1354
1464
 
1355
1465
  if __name__ == "__main__":