warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1076 -480
- warp/codegen.py +240 -119
- warp/config.py +1 -1
- warp/context.py +298 -84
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_torch.py +18 -34
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth_self_contact.py +260 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +180 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +5 -1
- warp/native/cuda_util.cpp +35 -0
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +41 -0
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1185 -664
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +124 -59
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +270 -26
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +154 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +111 -53
- warp/stubs.py +248 -115
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +74 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +2 -2
- warp/tests/test_examples.py +9 -0
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_mat.py +138 -167
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +11 -7
- warp/tests/test_matmul_lite.py +4 -4
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +2 -2
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_static.py +16 -0
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +134 -191
- warp/tests/test_tile_load.py +356 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -11
- warp/thirdparty/unittest_parallel.py +2 -2
- warp/types.py +409 -99
- warp/utils.py +9 -5
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
|
@@ -22,11 +22,11 @@ def test_tile_shared_mem_size(test, device):
|
|
|
22
22
|
|
|
23
23
|
@wp.kernel
|
|
24
24
|
def compute(out: wp.array2d(dtype=float)):
|
|
25
|
-
a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
|
|
26
|
-
b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
|
|
25
|
+
a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
|
|
26
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
|
|
27
27
|
|
|
28
28
|
c = a + b
|
|
29
|
-
wp.tile_store(out,
|
|
29
|
+
wp.tile_store(out, c)
|
|
30
30
|
|
|
31
31
|
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
32
32
|
|
|
@@ -58,11 +58,11 @@ def test_tile_shared_mem_large(test, device):
|
|
|
58
58
|
# we disable backward kernel gen since 128k is not supported on most architectures
|
|
59
59
|
@wp.kernel(enable_backward=False)
|
|
60
60
|
def compute(out: wp.array2d(dtype=float)):
|
|
61
|
-
a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
|
|
62
|
-
b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
|
|
61
|
+
a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
|
|
62
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
|
|
63
63
|
|
|
64
64
|
c = a + b
|
|
65
|
-
wp.tile_store(out,
|
|
65
|
+
wp.tile_store(out, c)
|
|
66
66
|
|
|
67
67
|
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
68
68
|
|
|
@@ -73,7 +73,7 @@ def test_tile_shared_mem_large(test, device):
|
|
|
73
73
|
|
|
74
74
|
# check required shared memory
|
|
75
75
|
expected_forward_bytes = DIM_M * DIM_N * 4 * 2
|
|
76
|
-
expected_backward_bytes =
|
|
76
|
+
expected_backward_bytes = 0
|
|
77
77
|
|
|
78
78
|
assert expected_forward_bytes == 2**16
|
|
79
79
|
|
|
@@ -94,11 +94,11 @@ def test_tile_shared_mem_graph(test, device):
|
|
|
94
94
|
|
|
95
95
|
@wp.kernel
|
|
96
96
|
def compute(out: wp.array2d(dtype=float)):
|
|
97
|
-
a = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared")
|
|
98
|
-
b = wp.tile_ones(DIM_M, DIM_N, dtype=float, storage="shared") * 2.0
|
|
97
|
+
a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
|
|
98
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
|
|
99
99
|
|
|
100
100
|
c = a + b
|
|
101
|
-
wp.tile_store(out,
|
|
101
|
+
wp.tile_store(out, c)
|
|
102
102
|
|
|
103
103
|
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
104
104
|
|
|
@@ -127,22 +127,25 @@ def test_tile_shared_mem_graph(test, device):
|
|
|
127
127
|
|
|
128
128
|
# checks that stack allocations work for user functions
|
|
129
129
|
def test_tile_shared_mem_func(test, device):
|
|
130
|
-
DIM_M =
|
|
131
|
-
DIM_N =
|
|
130
|
+
DIM_M = 64
|
|
131
|
+
DIM_N = 64
|
|
132
|
+
|
|
133
|
+
SMALL_DIM_M = 64 // 4
|
|
134
|
+
SMALL_DIM_N = 64 // 4
|
|
132
135
|
|
|
133
136
|
BLOCK_DIM = 256
|
|
134
137
|
|
|
135
138
|
@wp.func
|
|
136
139
|
def add_tile_small():
|
|
137
|
-
a = wp.tile_ones(
|
|
138
|
-
b = wp.tile_ones(
|
|
140
|
+
a = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared")
|
|
141
|
+
b = wp.tile_ones(shape=(SMALL_DIM_M, SMALL_DIM_N), dtype=float, storage="shared") * 2.0
|
|
139
142
|
|
|
140
143
|
return a + b
|
|
141
144
|
|
|
142
145
|
@wp.func
|
|
143
146
|
def add_tile_big():
|
|
144
|
-
a = wp.tile_ones(
|
|
145
|
-
b = wp.tile_ones(
|
|
147
|
+
a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
|
|
148
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
|
|
146
149
|
|
|
147
150
|
return a + b
|
|
148
151
|
|
|
@@ -151,7 +154,7 @@ def test_tile_shared_mem_func(test, device):
|
|
|
151
154
|
s = add_tile_small()
|
|
152
155
|
b = add_tile_big()
|
|
153
156
|
|
|
154
|
-
wp.tile_store(out,
|
|
157
|
+
wp.tile_store(out, b)
|
|
155
158
|
|
|
156
159
|
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
157
160
|
|
|
@@ -168,6 +171,51 @@ def test_tile_shared_mem_func(test, device):
|
|
|
168
171
|
assert hooks.backward_smem_bytes == expected_required_shared * 2
|
|
169
172
|
|
|
170
173
|
|
|
174
|
+
def round_up(a, b):
|
|
175
|
+
return b * ((a + b - 1) // b)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# checks that using non-16B aligned sizes work
|
|
179
|
+
def test_tile_shared_non_aligned(test, device):
|
|
180
|
+
# Tile size = 4 (float) * 1 * 3 = 12B % 16 != 0
|
|
181
|
+
DIM_M = 1
|
|
182
|
+
DIM_N = 3
|
|
183
|
+
|
|
184
|
+
BLOCK_DIM = 256
|
|
185
|
+
|
|
186
|
+
@wp.func
|
|
187
|
+
def foo():
|
|
188
|
+
a = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 2.0
|
|
189
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared") * 3.0
|
|
190
|
+
return a + b
|
|
191
|
+
|
|
192
|
+
@wp.kernel
|
|
193
|
+
def compute(out: wp.array2d(dtype=float)):
|
|
194
|
+
# This test the logic in the stack allocator, which should increment and
|
|
195
|
+
# decrement the stack pointer each time foo() is called
|
|
196
|
+
# Failing to do so correct will make b out of bounds and corrupt the results
|
|
197
|
+
for _ in range(4096):
|
|
198
|
+
foo()
|
|
199
|
+
b = wp.tile_ones(shape=(DIM_M, DIM_N), dtype=float, storage="shared")
|
|
200
|
+
wp.tile_store(out, b)
|
|
201
|
+
|
|
202
|
+
out = wp.empty((DIM_M, DIM_N), dtype=float, device=device)
|
|
203
|
+
|
|
204
|
+
wp.launch_tiled(compute, dim=[1], inputs=[out], block_dim=BLOCK_DIM, device=device)
|
|
205
|
+
|
|
206
|
+
assert_np_equal(out.numpy(), np.ones((DIM_M, DIM_N), dtype=float))
|
|
207
|
+
|
|
208
|
+
# check shared memory for kernel on the device
|
|
209
|
+
module_exec = compute.module.load(device, BLOCK_DIM)
|
|
210
|
+
hooks = module_exec.get_kernel_hooks(compute)
|
|
211
|
+
|
|
212
|
+
# ensure that total required dynamic shared is the larger of the two tiles
|
|
213
|
+
expected_required_shared = 3 * round_up(DIM_M * DIM_N * 4, 16)
|
|
214
|
+
|
|
215
|
+
assert hooks.forward_smem_bytes == expected_required_shared
|
|
216
|
+
assert hooks.backward_smem_bytes == expected_required_shared * 2
|
|
217
|
+
|
|
218
|
+
|
|
171
219
|
devices = get_cuda_test_devices()
|
|
172
220
|
|
|
173
221
|
|
|
@@ -183,6 +231,7 @@ add_function_test(
|
|
|
183
231
|
)
|
|
184
232
|
add_function_test(TestTileSharedMemory, "test_tile_shared_mem_graph", test_tile_shared_mem_graph, devices=devices)
|
|
185
233
|
add_function_test(TestTileSharedMemory, "test_tile_shared_mem_func", test_tile_shared_mem_func, devices=devices)
|
|
234
|
+
add_function_test(TestTileSharedMemory, "test_tile_shared_non_aligned", test_tile_shared_non_aligned, devices=devices)
|
|
186
235
|
|
|
187
236
|
|
|
188
237
|
if __name__ == "__main__":
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
import unittest
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
|
|
12
|
+
import warp as wp
|
|
13
|
+
from warp.tests.unittest_utils import *
|
|
14
|
+
|
|
15
|
+
TILE_DIM = 64
|
|
16
|
+
TILE_M = 16
|
|
17
|
+
TILE_N = 32
|
|
18
|
+
TILE_O = 8
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@wp.kernel
|
|
22
|
+
def test_tile_view_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
|
|
23
|
+
# load whole source into local memory
|
|
24
|
+
a = wp.tile_load(src, shape=(TILE_M, TILE_N))
|
|
25
|
+
|
|
26
|
+
# copy the source array row by row
|
|
27
|
+
for i in range(TILE_M):
|
|
28
|
+
# create a view on original array and store
|
|
29
|
+
row = a[i]
|
|
30
|
+
wp.tile_store(dst[i], row)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_tile_view(test, device):
|
|
34
|
+
rng = np.random.default_rng(42)
|
|
35
|
+
|
|
36
|
+
a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
37
|
+
b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
38
|
+
|
|
39
|
+
with wp.Tape() as tape:
|
|
40
|
+
wp.launch_tiled(test_tile_view_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
|
|
41
|
+
|
|
42
|
+
assert_np_equal(b.numpy(), a.numpy())
|
|
43
|
+
b.grad = wp.ones_like(b, device=device)
|
|
44
|
+
tape.backward()
|
|
45
|
+
|
|
46
|
+
assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@wp.kernel
|
|
50
|
+
def test_tile_assign_1d_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
|
|
51
|
+
# load whole source into local memory
|
|
52
|
+
a = wp.tile_load(src, shape=(TILE_M, TILE_N))
|
|
53
|
+
b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N))
|
|
54
|
+
|
|
55
|
+
# copy the source array row by row
|
|
56
|
+
for i in range(int(TILE_M)):
|
|
57
|
+
# create views onto source and dest rows
|
|
58
|
+
row_src = a[i]
|
|
59
|
+
row_dst = b[i]
|
|
60
|
+
|
|
61
|
+
# copy onto dest row
|
|
62
|
+
wp.tile_assign(row_dst, row_src)
|
|
63
|
+
|
|
64
|
+
wp.tile_store(dst, b)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_tile_assign_1d(test, device):
|
|
68
|
+
rng = np.random.default_rng(42)
|
|
69
|
+
|
|
70
|
+
a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
71
|
+
b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
72
|
+
|
|
73
|
+
with wp.Tape() as tape:
|
|
74
|
+
wp.launch_tiled(test_tile_assign_1d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
|
|
75
|
+
|
|
76
|
+
assert_np_equal(b.numpy(), a.numpy())
|
|
77
|
+
b.grad = wp.ones_like(b, device=device)
|
|
78
|
+
tape.backward()
|
|
79
|
+
|
|
80
|
+
assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@wp.kernel
|
|
84
|
+
def test_tile_assign_2d_kernel(src: wp.array3d(dtype=float), dst: wp.array3d(dtype=float)):
|
|
85
|
+
# load whole source into local memory
|
|
86
|
+
a = wp.tile_load(src, shape=(TILE_M, TILE_N, TILE_O))
|
|
87
|
+
b = wp.tile_zeros(dtype=float, shape=(TILE_M, TILE_N, TILE_O))
|
|
88
|
+
|
|
89
|
+
# copy the source array slice by slice
|
|
90
|
+
for i in range(TILE_M):
|
|
91
|
+
# create views onto source and dest slice
|
|
92
|
+
row_src = a[i]
|
|
93
|
+
row_dst = b[i]
|
|
94
|
+
|
|
95
|
+
# copy onto dest slice
|
|
96
|
+
wp.tile_assign(row_dst, row_src)
|
|
97
|
+
|
|
98
|
+
wp.tile_store(dst, b)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_tile_assign_2d(test, device):
|
|
102
|
+
rng = np.random.default_rng(42)
|
|
103
|
+
|
|
104
|
+
a = wp.array(rng.random((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
|
|
105
|
+
b = wp.array(np.zeros((TILE_M, TILE_N, TILE_O), dtype=np.float32), requires_grad=True, device=device)
|
|
106
|
+
|
|
107
|
+
with wp.Tape() as tape:
|
|
108
|
+
wp.launch_tiled(test_tile_assign_2d_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
|
|
109
|
+
|
|
110
|
+
assert_np_equal(b.numpy(), a.numpy())
|
|
111
|
+
b.grad = wp.ones_like(b, device=device)
|
|
112
|
+
tape.backward()
|
|
113
|
+
|
|
114
|
+
assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@wp.kernel
|
|
118
|
+
def test_tile_view_offset_kernel(src: wp.array2d(dtype=float), dst: wp.array2d(dtype=float)):
|
|
119
|
+
# load whole source into local memory
|
|
120
|
+
a = wp.tile_load(src, shape=(TILE_M, TILE_N))
|
|
121
|
+
b = wp.tile_zeros(shape=(TILE_M, TILE_N), dtype=float)
|
|
122
|
+
|
|
123
|
+
# copy the source array slice by slice
|
|
124
|
+
for i in range(TILE_M // 4):
|
|
125
|
+
# create views onto source and dest slice 4 rows at a time
|
|
126
|
+
v = wp.tile_view(a, offset=(i * 4, 0), shape=(4, TILE_N))
|
|
127
|
+
|
|
128
|
+
# copy onto dest slice
|
|
129
|
+
wp.tile_assign(b, v, offset=(i * 4, 0))
|
|
130
|
+
|
|
131
|
+
wp.tile_store(dst, b)
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def test_tile_view_offset(test, device):
|
|
135
|
+
rng = np.random.default_rng(42)
|
|
136
|
+
|
|
137
|
+
a = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
138
|
+
b = wp.array(np.zeros((TILE_M, TILE_N), dtype=np.float32), requires_grad=True, device=device)
|
|
139
|
+
|
|
140
|
+
with wp.Tape() as tape:
|
|
141
|
+
wp.launch_tiled(test_tile_view_offset_kernel, dim=[1], inputs=[a, b], block_dim=32, device=device)
|
|
142
|
+
|
|
143
|
+
assert_np_equal(b.numpy(), a.numpy())
|
|
144
|
+
b.grad = wp.ones_like(b, device=device)
|
|
145
|
+
tape.backward()
|
|
146
|
+
|
|
147
|
+
assert_np_equal(a.grad.numpy(), np.ones_like(a.numpy()))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
devices = get_cuda_test_devices()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class TestTileView(unittest.TestCase):
|
|
154
|
+
pass
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
add_function_test(TestTileView, "test_tile_view", test_tile_view, devices=devices)
|
|
158
|
+
add_function_test(TestTileView, "test_tile_view_offset", test_tile_view_offset, devices=devices)
|
|
159
|
+
add_function_test(TestTileView, "test_tile_assign_1d", test_tile_assign_1d, devices=devices)
|
|
160
|
+
add_function_test(TestTileView, "test_tile_assign_2d", test_tile_assign_2d, devices=devices)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
if __name__ == "__main__":
|
|
164
|
+
wp.clear_kernel_cache()
|
|
165
|
+
unittest.main(verbosity=2, failfast=True)
|
warp/tests/test_torch.py
CHANGED
|
@@ -403,6 +403,38 @@ def test_cuda_array_interface(test, device):
|
|
|
403
403
|
assert a1.strides == a2.strides
|
|
404
404
|
|
|
405
405
|
|
|
406
|
+
@wp.kernel
|
|
407
|
+
def vec_sum_kernel(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3), z: wp.array(dtype=wp.vec3)):
|
|
408
|
+
tid = wp.tid()
|
|
409
|
+
z[tid] = x[tid] + y[tid]
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
# ensure torch arrays passed to Warp kernels are unchanged by Tape.backward()
|
|
413
|
+
def test_tensor_in_warp_kernel(test, device):
|
|
414
|
+
torch_device = wp.device_to_torch(device)
|
|
415
|
+
|
|
416
|
+
x = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
|
|
417
|
+
y = torch.ones((10, 3), dtype=torch.float32, device=torch_device)
|
|
418
|
+
wp_y = wp.from_torch(y, dtype=wp.vec3, requires_grad=True)
|
|
419
|
+
z = torch.zeros((10, 3), dtype=torch.float32, device=torch_device)
|
|
420
|
+
wp_z = wp.from_torch(z, dtype=wp.vec3, requires_grad=True)
|
|
421
|
+
|
|
422
|
+
tape = wp.Tape()
|
|
423
|
+
|
|
424
|
+
with tape:
|
|
425
|
+
wp.launch(vec_sum_kernel, dim=10, inputs=[x, wp_y], outputs=[wp_z], device=device)
|
|
426
|
+
|
|
427
|
+
assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
|
|
428
|
+
|
|
429
|
+
tape.backward(grads={wp_z: wp.ones_like(wp_z)})
|
|
430
|
+
|
|
431
|
+
# x is unchanged by Tape.backward()
|
|
432
|
+
assert_np_equal(x.cpu().numpy(), np.ones((10, 3), dtype=float))
|
|
433
|
+
|
|
434
|
+
# we can still compute the gradient of y because Warp created an array for it
|
|
435
|
+
assert_np_equal(y.grad.cpu().numpy(), np.ones((10, 3), dtype=float))
|
|
436
|
+
|
|
437
|
+
|
|
406
438
|
def test_to_torch(test, device):
|
|
407
439
|
import torch
|
|
408
440
|
|
|
@@ -913,6 +945,9 @@ try:
|
|
|
913
945
|
add_function_test(TestTorch, "test_torch_zerocopy", test_torch_zerocopy, devices=torch_compatible_devices)
|
|
914
946
|
add_function_test(TestTorch, "test_torch_autograd", test_torch_autograd, devices=torch_compatible_devices)
|
|
915
947
|
add_function_test(TestTorch, "test_direct", test_direct, devices=torch_compatible_devices)
|
|
948
|
+
add_function_test(
|
|
949
|
+
TestTorch, "test_tensor_in_warp_kernel", test_tensor_in_warp_kernel, devices=torch_compatible_devices
|
|
950
|
+
)
|
|
916
951
|
|
|
917
952
|
if torch_compatible_cuda_devices:
|
|
918
953
|
add_function_test(
|
warp/tests/test_utils.py
CHANGED
|
@@ -79,37 +79,49 @@ def test_array_scan_error_unsupported_dtype(test, device):
|
|
|
79
79
|
|
|
80
80
|
|
|
81
81
|
def test_radix_sort_pairs(test, device):
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
82
|
+
keyTypes = [int, wp.float32]
|
|
83
|
+
|
|
84
|
+
for keyType in keyTypes:
|
|
85
|
+
keys = wp.array((7, 2, 8, 4, 1, 6, 5, 3, 0, 0, 0, 0, 0, 0, 0, 0), dtype=keyType, device=device)
|
|
86
|
+
values = wp.array((1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0), dtype=int, device=device)
|
|
87
|
+
wp.utils.radix_sort_pairs(keys, values, 8)
|
|
88
|
+
assert_np_equal(keys.numpy()[:8], np.array((1, 2, 3, 4, 5, 6, 7, 8)))
|
|
89
|
+
assert_np_equal(values.numpy()[:8], np.array((5, 2, 8, 4, 7, 6, 1, 3)))
|
|
87
90
|
|
|
88
91
|
|
|
89
92
|
def test_radix_sort_pairs_empty(test, device):
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
+
keyTypes = [int, wp.float32]
|
|
94
|
+
|
|
95
|
+
for keyType in keyTypes:
|
|
96
|
+
keys = wp.array((), dtype=keyType, device=device)
|
|
97
|
+
values = wp.array((), dtype=int, device=device)
|
|
98
|
+
wp.utils.radix_sort_pairs(keys, values, 0)
|
|
93
99
|
|
|
94
100
|
|
|
95
101
|
def test_radix_sort_pairs_error_insufficient_storage(test, device):
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
102
|
+
keyTypes = [int, wp.float32]
|
|
103
|
+
|
|
104
|
+
for keyType in keyTypes:
|
|
105
|
+
keys = wp.array((1, 2, 3), dtype=keyType, device=device)
|
|
106
|
+
values = wp.array((1, 2, 3), dtype=int, device=device)
|
|
107
|
+
with test.assertRaisesRegex(
|
|
108
|
+
RuntimeError,
|
|
109
|
+
r"Array storage must be large enough to contain 2\*count elements$",
|
|
110
|
+
):
|
|
111
|
+
wp.utils.radix_sort_pairs(keys, values, 3)
|
|
103
112
|
|
|
104
113
|
|
|
105
114
|
def test_radix_sort_pairs_error_unsupported_dtype(test, device):
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
115
|
+
keyTypes = [int, wp.float32]
|
|
116
|
+
|
|
117
|
+
for keyType in keyTypes:
|
|
118
|
+
keys = wp.array((1.0, 2.0, 3.0), dtype=keyType, device=device)
|
|
119
|
+
values = wp.array((1.0, 2.0, 3.0), dtype=float, device=device)
|
|
120
|
+
with test.assertRaisesRegex(
|
|
121
|
+
RuntimeError,
|
|
122
|
+
r"Unsupported data type$",
|
|
123
|
+
):
|
|
124
|
+
wp.utils.radix_sort_pairs(keys, values, 1)
|
|
113
125
|
|
|
114
126
|
|
|
115
127
|
def test_array_sum(test, device):
|
|
@@ -268,7 +280,7 @@ class TestUtils(unittest.TestCase):
|
|
|
268
280
|
wp.utils.warn("hello, world!")
|
|
269
281
|
wp.utils.warn("hello, world!")
|
|
270
282
|
|
|
271
|
-
expected = "Warp UserWarning: hello, world!\
|
|
283
|
+
expected = "Warp UserWarning: hello, world!\nWarp UserWarning: hello, world!\n"
|
|
272
284
|
|
|
273
285
|
self.assertEqual(f.getvalue(), expected)
|
|
274
286
|
|
|
@@ -308,7 +320,7 @@ class TestUtils(unittest.TestCase):
|
|
|
308
320
|
wp.utils.warn("foo", category=DeprecationWarning)
|
|
309
321
|
wp.utils.warn("bar", category=DeprecationWarning)
|
|
310
322
|
|
|
311
|
-
expected = "Warp DeprecationWarning: foo\
|
|
323
|
+
expected = "Warp DeprecationWarning: foo\nWarp DeprecationWarning: bar\n"
|
|
312
324
|
|
|
313
325
|
self.assertEqual(f.getvalue(), expected)
|
|
314
326
|
|
warp/tests/test_vec.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
8
|
import unittest
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
12
|
|
|
@@ -1240,6 +1241,103 @@ def test_constructors_constant_length():
|
|
|
1240
1241
|
v[i] = float(i)
|
|
1241
1242
|
|
|
1242
1243
|
|
|
1244
|
+
Vec123 = wp.vec(123, dtype=wp.float16)
|
|
1245
|
+
|
|
1246
|
+
|
|
1247
|
+
@wp.kernel
|
|
1248
|
+
def vector_len_kernel(
|
|
1249
|
+
v1: wp.vec2,
|
|
1250
|
+
v2: wp.vec(3, float),
|
|
1251
|
+
v3: wp.vec(Any, float),
|
|
1252
|
+
v4: Vec123,
|
|
1253
|
+
out: wp.array(dtype=int),
|
|
1254
|
+
):
|
|
1255
|
+
length = wp.static(len(v1))
|
|
1256
|
+
wp.expect_eq(len(v1), 2)
|
|
1257
|
+
out[0] = len(v1)
|
|
1258
|
+
|
|
1259
|
+
length = len(v2)
|
|
1260
|
+
wp.expect_eq(wp.static(len(v2)), 3)
|
|
1261
|
+
out[1] = len(v2)
|
|
1262
|
+
|
|
1263
|
+
length = len(v3)
|
|
1264
|
+
wp.expect_eq(len(v3), 4)
|
|
1265
|
+
out[2] = wp.static(len(v3))
|
|
1266
|
+
|
|
1267
|
+
length = wp.static(len(v4))
|
|
1268
|
+
wp.expect_eq(wp.static(len(v4)), 123)
|
|
1269
|
+
out[3] = wp.static(len(v4))
|
|
1270
|
+
|
|
1271
|
+
foo = wp.vec2()
|
|
1272
|
+
length = len(foo)
|
|
1273
|
+
wp.expect_eq(len(foo), 2)
|
|
1274
|
+
out[4] = len(foo)
|
|
1275
|
+
|
|
1276
|
+
|
|
1277
|
+
def test_vector_len(test, device):
|
|
1278
|
+
v1 = wp.vec2()
|
|
1279
|
+
v2 = wp.vec3()
|
|
1280
|
+
v3 = wp.vec4()
|
|
1281
|
+
v4 = Vec123()
|
|
1282
|
+
out = wp.empty(5, dtype=int, device=device)
|
|
1283
|
+
wp.launch(vector_len_kernel, dim=(1,), inputs=(v1, v2, v3, v4), outputs=(out,), device=device)
|
|
1284
|
+
|
|
1285
|
+
test.assertEqual(out.numpy()[0], 2)
|
|
1286
|
+
test.assertEqual(out.numpy()[1], 3)
|
|
1287
|
+
test.assertEqual(out.numpy()[2], 4)
|
|
1288
|
+
test.assertEqual(out.numpy()[3], 123)
|
|
1289
|
+
test.assertEqual(out.numpy()[4], 2)
|
|
1290
|
+
|
|
1291
|
+
|
|
1292
|
+
@wp.kernel
|
|
1293
|
+
def vector_augassign_kernel(
|
|
1294
|
+
a: wp.array(dtype=wp.vec3), b: wp.array(dtype=wp.vec3), c: wp.array(dtype=wp.vec3), d: wp.array(dtype=wp.vec3)
|
|
1295
|
+
):
|
|
1296
|
+
i = wp.tid()
|
|
1297
|
+
|
|
1298
|
+
v1 = wp.vec3()
|
|
1299
|
+
v2 = b[i]
|
|
1300
|
+
|
|
1301
|
+
v1[0] += v2[0]
|
|
1302
|
+
v1[1] += v2[1]
|
|
1303
|
+
v1[2] += v2[2]
|
|
1304
|
+
|
|
1305
|
+
a[i] = v1
|
|
1306
|
+
|
|
1307
|
+
v3 = wp.vec3()
|
|
1308
|
+
v4 = d[i]
|
|
1309
|
+
|
|
1310
|
+
v3[0] -= v4[0]
|
|
1311
|
+
v3[1] -= v4[1]
|
|
1312
|
+
v3[2] -= v4[2]
|
|
1313
|
+
|
|
1314
|
+
c[i] = v3
|
|
1315
|
+
|
|
1316
|
+
|
|
1317
|
+
def test_vector_augassign(test, device):
|
|
1318
|
+
N = 3
|
|
1319
|
+
|
|
1320
|
+
a = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
|
|
1321
|
+
b = wp.ones(N, dtype=wp.vec3, requires_grad=True)
|
|
1322
|
+
|
|
1323
|
+
c = wp.zeros(N, dtype=wp.vec3, requires_grad=True)
|
|
1324
|
+
d = wp.ones(N, dtype=wp.vec3, requires_grad=True)
|
|
1325
|
+
|
|
1326
|
+
tape = wp.Tape()
|
|
1327
|
+
with tape:
|
|
1328
|
+
wp.launch(vector_augassign_kernel, N, inputs=[a, b, c, d])
|
|
1329
|
+
|
|
1330
|
+
tape.backward(grads={a: wp.ones_like(a), c: wp.ones_like(c)})
|
|
1331
|
+
|
|
1332
|
+
assert_np_equal(a.numpy(), wp.ones_like(a).numpy())
|
|
1333
|
+
assert_np_equal(a.grad.numpy(), wp.ones_like(a).numpy())
|
|
1334
|
+
assert_np_equal(b.grad.numpy(), wp.ones_like(a).numpy())
|
|
1335
|
+
|
|
1336
|
+
assert_np_equal(c.numpy(), -wp.ones_like(c).numpy())
|
|
1337
|
+
assert_np_equal(c.grad.numpy(), wp.ones_like(c).numpy())
|
|
1338
|
+
assert_np_equal(d.grad.numpy(), -wp.ones_like(d).numpy())
|
|
1339
|
+
|
|
1340
|
+
|
|
1243
1341
|
devices = get_test_devices()
|
|
1244
1342
|
|
|
1245
1343
|
|
|
@@ -1350,6 +1448,18 @@ add_function_test(
|
|
|
1350
1448
|
test_tpl_constructor_error_numeric_args_mismatch,
|
|
1351
1449
|
devices=devices,
|
|
1352
1450
|
)
|
|
1451
|
+
add_function_test(
|
|
1452
|
+
TestVec,
|
|
1453
|
+
"test_vector_len",
|
|
1454
|
+
test_vector_len,
|
|
1455
|
+
devices=devices,
|
|
1456
|
+
)
|
|
1457
|
+
add_function_test(
|
|
1458
|
+
TestVec,
|
|
1459
|
+
"test_vector_augassign",
|
|
1460
|
+
test_vector_augassign,
|
|
1461
|
+
devices=devices,
|
|
1462
|
+
)
|
|
1353
1463
|
|
|
1354
1464
|
|
|
1355
1465
|
if __name__ == "__main__":
|