warp-lang 1.8.0__py3-none-manylinux_2_34_aarch64.whl → 1.8.1__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build_dll.py +5 -0
- warp/codegen.py +15 -3
- warp/config.py +1 -1
- warp/context.py +122 -24
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/fem/field/virtual.py +2 -0
- warp/fem/integrate.py +78 -47
- warp/jax_experimental/ffi.py +201 -53
- warp/native/array.h +4 -4
- warp/native/builtin.h +8 -4
- warp/native/coloring.cpp +5 -1
- warp/native/cuda_util.cpp +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +3 -3
- warp/native/mesh.h +1 -1
- warp/native/quat.h +6 -2
- warp/native/rand.h +7 -7
- warp/native/sparse.cu +1 -1
- warp/native/svd.h +23 -8
- warp/native/tile.h +20 -1
- warp/native/tile_radix_sort.h +5 -1
- warp/native/tile_reduce.h +16 -25
- warp/native/tuple.h +2 -2
- warp/native/vec.h +4 -4
- warp/native/warp.cpp +1 -1
- warp/native/warp.cu +15 -2
- warp/native/warp.h +1 -1
- warp/render/render_opengl.py +52 -51
- warp/render/render_usd.py +0 -1
- warp/sim/collide.py +1 -2
- warp/sim/integrator_vbd.py +10 -2
- warp/sparse.py +1 -1
- warp/tape.py +2 -0
- warp/tests/sim/test_cloth.py +89 -6
- warp/tests/sim/test_coloring.py +76 -1
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +127 -114
- warp/tests/test_mat.py +22 -0
- warp/tests/test_quat.py +22 -0
- warp/tests/test_sparse.py +32 -0
- warp/tests/test_static.py +48 -0
- warp/tests/test_tape.py +38 -0
- warp/tests/test_vec.py +38 -408
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +31 -143
- warp/tests/tile/test_tile_mathdx.py +2 -2
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_reduce.py +100 -11
- warp/tests/tile/test_tile_shared_memory.py +12 -12
- warp/tests/tile/test_tile_sort.py +59 -55
- warp/tests/unittest_suites.py +10 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.8.1.dist-info}/METADATA +4 -4
- {warp_lang-1.8.0.dist-info → warp_lang-1.8.1.dist-info}/RECORD +59 -57
- {warp_lang-1.8.0.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/tests/test_atomic_cas.py
CHANGED
|
@@ -19,54 +19,63 @@ import numpy as np
|
|
|
19
19
|
import warp as wp
|
|
20
20
|
from warp.tests.unittest_utils import *
|
|
21
21
|
|
|
22
|
+
kernel_cache = {}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def getkernel(func, suffix=""):
|
|
26
|
+
key = func.__name__ + "_" + suffix
|
|
27
|
+
if key not in kernel_cache:
|
|
28
|
+
kernel_cache[key] = wp.Kernel(func=func, key=key)
|
|
29
|
+
return kernel_cache[key]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_atomic_cas(test, device, dtype, register_kernels=False):
|
|
33
|
+
warp_type = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
34
|
+
n = 100
|
|
35
|
+
counter = wp.array([0], dtype=warp_type, device=device)
|
|
36
|
+
lock = wp.array([0], dtype=warp_type, device=device)
|
|
22
37
|
|
|
23
|
-
def create_spinlock_test(dtype):
|
|
24
38
|
@wp.func
|
|
25
|
-
def
|
|
39
|
+
def spinlock_acquire_1d(lock: wp.array(dtype=warp_type)):
|
|
26
40
|
# Try to acquire the lock by setting it to 1 if it's 0
|
|
27
|
-
while wp.atomic_cas(lock, 0,
|
|
41
|
+
while wp.atomic_cas(lock, 0, warp_type(0), warp_type(1)) == 1:
|
|
28
42
|
pass
|
|
29
43
|
|
|
30
44
|
@wp.func
|
|
31
|
-
def
|
|
45
|
+
def spinlock_release_1d(lock: wp.array(dtype=warp_type)):
|
|
32
46
|
# Release the lock by setting it back to 0
|
|
33
|
-
wp.atomic_exch(lock, 0,
|
|
47
|
+
wp.atomic_exch(lock, 0, warp_type(0))
|
|
34
48
|
|
|
35
49
|
@wp.func
|
|
36
|
-
def
|
|
37
|
-
value = wp.atomic_exch(ptr, index,
|
|
50
|
+
def volatile_read_1d(ptr: wp.array(dtype=warp_type), index: int):
|
|
51
|
+
value = wp.atomic_exch(ptr, index, warp_type(0))
|
|
38
52
|
wp.atomic_exch(ptr, index, value)
|
|
39
53
|
return value
|
|
40
54
|
|
|
41
|
-
|
|
42
|
-
def test_spinlock_counter(counter: wp.array(dtype=dtype), lock: wp.array(dtype=dtype)):
|
|
55
|
+
def test_spinlock_counter_1d(counter: wp.array(dtype=warp_type), lock: wp.array(dtype=warp_type)):
|
|
43
56
|
# Try to acquire the lock
|
|
44
|
-
|
|
57
|
+
spinlock_acquire_1d(lock)
|
|
45
58
|
|
|
46
59
|
# Critical section - increment counter
|
|
47
60
|
# counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
|
|
48
61
|
|
|
49
62
|
# Work around since warp arrays cannot be marked as volatile
|
|
50
|
-
value =
|
|
51
|
-
counter[0] = value +
|
|
63
|
+
value = volatile_read_1d(counter, 0)
|
|
64
|
+
counter[0] = value + warp_type(1)
|
|
52
65
|
|
|
53
66
|
# Release the lock
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
return test_spinlock_counter
|
|
67
|
+
spinlock_release_1d(lock)
|
|
57
68
|
|
|
69
|
+
kernel = getkernel(test_spinlock_counter_1d, suffix=dtype.__name__)
|
|
58
70
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
counter = wp.array([0], dtype=warp_type, device=device)
|
|
62
|
-
lock = wp.array([0], dtype=warp_type, device=device)
|
|
71
|
+
if register_kernels:
|
|
72
|
+
return
|
|
63
73
|
|
|
64
|
-
|
|
65
|
-
wp.launch(test_spinlock_counter, dim=n, inputs=[counter, lock], device=device)
|
|
74
|
+
wp.launch(kernel, dim=n, inputs=[counter, lock], device=device)
|
|
66
75
|
|
|
67
76
|
# Verify counter reached n
|
|
68
77
|
counter_np = counter.numpy()
|
|
69
|
-
expected = np.array([n], dtype=
|
|
78
|
+
expected = np.array([n], dtype=dtype)
|
|
70
79
|
|
|
71
80
|
if not np.array_equal(counter_np, expected):
|
|
72
81
|
print(f"Counter mismatch: expected {expected}, got {counter_np}")
|
|
@@ -74,53 +83,53 @@ def test_atomic_cas(test, device, warp_type, numpy_type):
|
|
|
74
83
|
assert_np_equal(counter_np, expected)
|
|
75
84
|
|
|
76
85
|
|
|
77
|
-
def
|
|
86
|
+
def test_atomic_cas_2d(test, device, dtype, register_kernels=False):
|
|
87
|
+
warp_type = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
88
|
+
n = 100
|
|
89
|
+
counter = wp.array([0], dtype=warp_type, device=device)
|
|
90
|
+
lock = wp.zeros(shape=(1, 1), dtype=warp_type, device=device)
|
|
91
|
+
|
|
78
92
|
@wp.func
|
|
79
|
-
def
|
|
93
|
+
def spinlock_acquire_2d(lock: wp.array2d(dtype=warp_type)):
|
|
80
94
|
# Try to acquire the lock by setting it to 1 if it's 0
|
|
81
|
-
while wp.atomic_cas(lock, 0, 0,
|
|
95
|
+
while wp.atomic_cas(lock, 0, 0, warp_type(0), warp_type(1)) == 1:
|
|
82
96
|
pass
|
|
83
97
|
|
|
84
98
|
@wp.func
|
|
85
|
-
def
|
|
99
|
+
def spinlock_release_2d(lock: wp.array2d(dtype=warp_type)):
|
|
86
100
|
# Release the lock by setting it back to 0
|
|
87
|
-
wp.atomic_exch(lock, 0, 0,
|
|
101
|
+
wp.atomic_exch(lock, 0, 0, warp_type(0))
|
|
88
102
|
|
|
89
103
|
@wp.func
|
|
90
|
-
def
|
|
91
|
-
value = wp.atomic_exch(ptr, index,
|
|
104
|
+
def volatile_read_2d(ptr: wp.array(dtype=warp_type), index: int):
|
|
105
|
+
value = wp.atomic_exch(ptr, index, warp_type(0))
|
|
92
106
|
wp.atomic_exch(ptr, index, value)
|
|
93
107
|
return value
|
|
94
108
|
|
|
95
|
-
|
|
96
|
-
def test_spinlock_counter(counter: wp.array(dtype=dtype), lock: wp.array(dtype=dtype, ndim=2)):
|
|
109
|
+
def test_spinlock_counter_2d(counter: wp.array(dtype=warp_type), lock: wp.array2d(dtype=warp_type)):
|
|
97
110
|
# Try to acquire the lock
|
|
98
|
-
|
|
111
|
+
spinlock_acquire_2d(lock)
|
|
99
112
|
|
|
100
113
|
# Critical section - increment counter
|
|
101
114
|
# counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
|
|
102
115
|
|
|
103
116
|
# Work around since warp arrays cannot be marked as volatile
|
|
104
|
-
value =
|
|
105
|
-
counter[0] = value +
|
|
117
|
+
value = volatile_read_2d(counter, 0)
|
|
118
|
+
counter[0] = value + warp_type(1)
|
|
106
119
|
|
|
107
120
|
# Release the lock
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return test_spinlock_counter
|
|
121
|
+
spinlock_release_2d(lock)
|
|
111
122
|
|
|
123
|
+
kernel = getkernel(test_spinlock_counter_2d, suffix=dtype.__name__)
|
|
112
124
|
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
counter = wp.array([0], dtype=warp_type, device=device)
|
|
116
|
-
lock = wp.zeros(shape=(1, 1), dtype=warp_type, device=device)
|
|
125
|
+
if register_kernels:
|
|
126
|
+
return
|
|
117
127
|
|
|
118
|
-
|
|
119
|
-
wp.launch(test_spinlock_counter, dim=n, inputs=[counter, lock], device=device)
|
|
128
|
+
wp.launch(kernel, dim=n, inputs=[counter, lock], device=device)
|
|
120
129
|
|
|
121
130
|
# Verify counter reached n
|
|
122
131
|
counter_np = counter.numpy()
|
|
123
|
-
expected = np.array([n], dtype=
|
|
132
|
+
expected = np.array([n], dtype=dtype)
|
|
124
133
|
|
|
125
134
|
if not np.array_equal(counter_np, expected):
|
|
126
135
|
print(f"Counter mismatch: expected {expected}, got {counter_np}")
|
|
@@ -128,53 +137,53 @@ def test_atomic_cas_2d(test, device, warp_type, numpy_type):
|
|
|
128
137
|
assert_np_equal(counter_np, expected)
|
|
129
138
|
|
|
130
139
|
|
|
131
|
-
def
|
|
140
|
+
def test_atomic_cas_3d(test, device, dtype, register_kernels=False):
|
|
141
|
+
warp_type = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
142
|
+
n = 100
|
|
143
|
+
counter = wp.array([0], dtype=warp_type, device=device)
|
|
144
|
+
lock = wp.zeros(shape=(1, 1, 1), dtype=warp_type, device=device)
|
|
145
|
+
|
|
132
146
|
@wp.func
|
|
133
|
-
def
|
|
147
|
+
def spinlock_acquire_3d(lock: wp.array3d(dtype=warp_type)):
|
|
134
148
|
# Try to acquire the lock by setting it to 1 if it's 0
|
|
135
|
-
while wp.atomic_cas(lock, 0, 0, 0,
|
|
149
|
+
while wp.atomic_cas(lock, 0, 0, 0, warp_type(0), warp_type(1)) == 1:
|
|
136
150
|
pass
|
|
137
151
|
|
|
138
152
|
@wp.func
|
|
139
|
-
def
|
|
153
|
+
def spinlock_release_3d(lock: wp.array3d(dtype=warp_type)):
|
|
140
154
|
# Release the lock by setting it back to 0
|
|
141
|
-
wp.atomic_exch(lock, 0, 0, 0,
|
|
155
|
+
wp.atomic_exch(lock, 0, 0, 0, warp_type(0))
|
|
142
156
|
|
|
143
157
|
@wp.func
|
|
144
|
-
def
|
|
145
|
-
value = wp.atomic_exch(ptr, index,
|
|
158
|
+
def volatile_read_3d(ptr: wp.array(dtype=warp_type), index: int):
|
|
159
|
+
value = wp.atomic_exch(ptr, index, warp_type(0))
|
|
146
160
|
wp.atomic_exch(ptr, index, value)
|
|
147
161
|
return value
|
|
148
162
|
|
|
149
|
-
|
|
150
|
-
def test_spinlock_counter(counter: wp.array(dtype=dtype), lock: wp.array(dtype=dtype, ndim=3)):
|
|
163
|
+
def test_spinlock_counter_3d(counter: wp.array(dtype=warp_type), lock: wp.array3d(dtype=warp_type)):
|
|
151
164
|
# Try to acquire the lock
|
|
152
|
-
|
|
165
|
+
spinlock_acquire_3d(lock)
|
|
153
166
|
|
|
154
167
|
# Critical section - increment counter
|
|
155
168
|
# counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
|
|
156
169
|
|
|
157
170
|
# Work around since warp arrays cannot be marked as volatile
|
|
158
|
-
value =
|
|
159
|
-
counter[0] = value +
|
|
171
|
+
value = volatile_read_3d(counter, 0)
|
|
172
|
+
counter[0] = value + warp_type(1)
|
|
160
173
|
|
|
161
174
|
# Release the lock
|
|
162
|
-
|
|
175
|
+
spinlock_release_3d(lock)
|
|
163
176
|
|
|
164
|
-
|
|
177
|
+
kernel = getkernel(test_spinlock_counter_3d, suffix=dtype.__name__)
|
|
165
178
|
|
|
179
|
+
if register_kernels:
|
|
180
|
+
return
|
|
166
181
|
|
|
167
|
-
|
|
168
|
-
n = 100
|
|
169
|
-
counter = wp.array([0], dtype=warp_type, device=device)
|
|
170
|
-
lock = wp.zeros(shape=(1, 1, 1), dtype=warp_type, device=device)
|
|
171
|
-
|
|
172
|
-
test_spinlock_counter = create_spinlock_test_3d(warp_type)
|
|
173
|
-
wp.launch(test_spinlock_counter, dim=n, inputs=[counter, lock], device=device)
|
|
182
|
+
wp.launch(kernel, dim=n, inputs=[counter, lock], device=device)
|
|
174
183
|
|
|
175
184
|
# Verify counter reached n
|
|
176
185
|
counter_np = counter.numpy()
|
|
177
|
-
expected = np.array([n], dtype=
|
|
186
|
+
expected = np.array([n], dtype=dtype)
|
|
178
187
|
|
|
179
188
|
if not np.array_equal(counter_np, expected):
|
|
180
189
|
print(f"Counter mismatch: expected {expected}, got {counter_np}")
|
|
@@ -218,17 +227,53 @@ def create_spinlock_test_4d(dtype):
|
|
|
218
227
|
return test_spinlock_counter
|
|
219
228
|
|
|
220
229
|
|
|
221
|
-
def test_atomic_cas_4d(test, device,
|
|
230
|
+
def test_atomic_cas_4d(test, device, dtype, register_kernels=False):
|
|
231
|
+
warp_type = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
|
|
222
232
|
n = 100
|
|
223
233
|
counter = wp.array([0], dtype=warp_type, device=device)
|
|
224
234
|
lock = wp.zeros(shape=(1, 1, 1, 1), dtype=warp_type, device=device)
|
|
225
235
|
|
|
226
|
-
|
|
227
|
-
wp.
|
|
236
|
+
@wp.func
|
|
237
|
+
def spinlock_acquire_4d(lock: wp.array4d(dtype=warp_type)):
|
|
238
|
+
# Try to acquire the lock by setting it to 1 if it's 0
|
|
239
|
+
while wp.atomic_cas(lock, 0, 0, 0, 0, warp_type(0), warp_type(1)) == 1:
|
|
240
|
+
pass
|
|
241
|
+
|
|
242
|
+
@wp.func
|
|
243
|
+
def spinlock_release_4d(lock: wp.array4d(dtype=warp_type)):
|
|
244
|
+
# Release the lock by setting it back to 0
|
|
245
|
+
wp.atomic_exch(lock, 0, 0, 0, 0, warp_type(0))
|
|
246
|
+
|
|
247
|
+
@wp.func
|
|
248
|
+
def volatile_read_4d(ptr: wp.array(dtype=warp_type), index: int):
|
|
249
|
+
value = wp.atomic_exch(ptr, index, warp_type(0))
|
|
250
|
+
wp.atomic_exch(ptr, index, value)
|
|
251
|
+
return value
|
|
252
|
+
|
|
253
|
+
def test_spinlock_counter_4d(counter: wp.array(dtype=warp_type), lock: wp.array4d(dtype=warp_type)):
|
|
254
|
+
# Try to acquire the lock
|
|
255
|
+
spinlock_acquire_4d(lock)
|
|
256
|
+
|
|
257
|
+
# Critical section - increment counter
|
|
258
|
+
# counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
|
|
259
|
+
|
|
260
|
+
# Work around since warp arrays cannot be marked as volatile
|
|
261
|
+
value = volatile_read_4d(counter, 0)
|
|
262
|
+
counter[0] = value + warp_type(1)
|
|
263
|
+
|
|
264
|
+
# Release the lock
|
|
265
|
+
spinlock_release_4d(lock)
|
|
266
|
+
|
|
267
|
+
kernel = getkernel(test_spinlock_counter_4d, suffix=dtype.__name__)
|
|
268
|
+
|
|
269
|
+
if register_kernels:
|
|
270
|
+
return
|
|
271
|
+
|
|
272
|
+
wp.launch(kernel, dim=n, inputs=[counter, lock], device=device)
|
|
228
273
|
|
|
229
274
|
# Verify counter reached n
|
|
230
275
|
counter_np = counter.numpy()
|
|
231
|
-
expected = np.array([n], dtype=
|
|
276
|
+
expected = np.array([n], dtype=dtype)
|
|
232
277
|
|
|
233
278
|
if not np.array_equal(counter_np, expected):
|
|
234
279
|
print(f"Counter mismatch: expected {expected}, got {counter_np}")
|
|
@@ -244,54 +289,22 @@ class TestAtomicCAS(unittest.TestCase):
|
|
|
244
289
|
|
|
245
290
|
|
|
246
291
|
# Test all supported types
|
|
247
|
-
|
|
248
|
-
(wp.int32, np.int32),
|
|
249
|
-
(wp.uint32, np.uint32),
|
|
250
|
-
(wp.int64, np.int64),
|
|
251
|
-
(wp.uint64, np.uint64),
|
|
252
|
-
(wp.float32, np.float32),
|
|
253
|
-
(wp.float64, np.float64),
|
|
254
|
-
]
|
|
255
|
-
|
|
256
|
-
for warp_type, numpy_type in test_types:
|
|
257
|
-
type_name = warp_type.__name__
|
|
258
|
-
add_function_test(
|
|
259
|
-
TestAtomicCAS,
|
|
260
|
-
f"test_cas_{type_name}",
|
|
261
|
-
test_atomic_cas,
|
|
262
|
-
devices=devices,
|
|
263
|
-
warp_type=warp_type,
|
|
264
|
-
numpy_type=numpy_type,
|
|
265
|
-
)
|
|
292
|
+
np_test_types = (np.int32, np.uint32, np.int64, np.uint64, np.float32, np.float64)
|
|
266
293
|
|
|
294
|
+
for dtype in np_test_types:
|
|
295
|
+
type_name = dtype.__name__
|
|
296
|
+
add_function_test_register_kernel(
|
|
297
|
+
TestAtomicCAS, f"test_cas_{type_name}", test_atomic_cas, devices=devices, dtype=dtype
|
|
298
|
+
)
|
|
267
299
|
# Add 2D test for each type
|
|
268
|
-
|
|
269
|
-
TestAtomicCAS,
|
|
270
|
-
f"test_cas_2d_{type_name}",
|
|
271
|
-
test_atomic_cas_2d,
|
|
272
|
-
devices=devices,
|
|
273
|
-
warp_type=warp_type,
|
|
274
|
-
numpy_type=numpy_type,
|
|
300
|
+
add_function_test_register_kernel(
|
|
301
|
+
TestAtomicCAS, f"test_cas_2d_{type_name}", test_atomic_cas_2d, devices=devices, dtype=dtype
|
|
275
302
|
)
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
add_function_test(
|
|
279
|
-
TestAtomicCAS,
|
|
280
|
-
f"test_cas_3d_{type_name}",
|
|
281
|
-
test_atomic_cas_3d,
|
|
282
|
-
devices=devices,
|
|
283
|
-
warp_type=warp_type,
|
|
284
|
-
numpy_type=numpy_type,
|
|
303
|
+
add_function_test_register_kernel(
|
|
304
|
+
TestAtomicCAS, f"test_cas_3d_{type_name}", test_atomic_cas_3d, devices=devices, dtype=dtype
|
|
285
305
|
)
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
add_function_test(
|
|
289
|
-
TestAtomicCAS,
|
|
290
|
-
f"test_cas_4d_{type_name}",
|
|
291
|
-
test_atomic_cas_4d,
|
|
292
|
-
devices=devices,
|
|
293
|
-
warp_type=warp_type,
|
|
294
|
-
numpy_type=numpy_type,
|
|
306
|
+
add_function_test_register_kernel(
|
|
307
|
+
TestAtomicCAS, f"test_cas_4d_{type_name}", test_atomic_cas_4d, devices=devices, dtype=dtype
|
|
295
308
|
)
|
|
296
309
|
|
|
297
310
|
if __name__ == "__main__":
|
warp/tests/test_mat.py
CHANGED
|
@@ -2225,6 +2225,27 @@ def test_mat_array_sub_inplace(test, device):
|
|
|
2225
2225
|
assert_np_equal(x.grad.numpy(), np.array([[[-1.0, -1.0], [-1.0, -1.0]]], dtype=float))
|
|
2226
2226
|
|
|
2227
2227
|
|
|
2228
|
+
@wp.kernel
|
|
2229
|
+
def scalar_mat_div(x: wp.array(dtype=wp.mat22), y: wp.array(dtype=wp.mat22)):
|
|
2230
|
+
i = wp.tid()
|
|
2231
|
+
y[i] = 1.0 / x[i]
|
|
2232
|
+
|
|
2233
|
+
|
|
2234
|
+
def test_scalar_mat_div(test, device):
|
|
2235
|
+
x = wp.array((wp.mat22(1.0, 2.0, 4.0, 8.0),), dtype=wp.mat22, requires_grad=True, device=device)
|
|
2236
|
+
y = wp.ones(1, dtype=wp.mat22, requires_grad=True, device=device)
|
|
2237
|
+
|
|
2238
|
+
tape = wp.Tape()
|
|
2239
|
+
with tape:
|
|
2240
|
+
wp.launch(scalar_mat_div, 1, inputs=(x,), outputs=(y,), device=device)
|
|
2241
|
+
|
|
2242
|
+
y.grad = wp.ones_like(y)
|
|
2243
|
+
tape.backward()
|
|
2244
|
+
|
|
2245
|
+
assert_np_equal(y.numpy(), np.array((((1.0, 0.5), (0.25, 0.125)),), dtype=float))
|
|
2246
|
+
assert_np_equal(x.grad.numpy(), np.array((((-1.0, -0.25), (-0.0625, -0.015625)),), dtype=float))
|
|
2247
|
+
|
|
2248
|
+
|
|
2228
2249
|
devices = get_test_devices()
|
|
2229
2250
|
|
|
2230
2251
|
|
|
@@ -2356,6 +2377,7 @@ add_function_test(TestMat, "test_mat_add_inplace", test_mat_add_inplace, devices
|
|
|
2356
2377
|
add_function_test(TestMat, "test_mat_sub_inplace", test_mat_sub_inplace, devices=devices)
|
|
2357
2378
|
add_function_test(TestMat, "test_mat_array_add_inplace", test_mat_array_add_inplace, devices=devices)
|
|
2358
2379
|
add_function_test(TestMat, "test_mat_array_sub_inplace", test_mat_array_sub_inplace, devices=devices)
|
|
2380
|
+
add_function_test(TestMat, "test_scalar_mat_div", test_scalar_mat_div, devices=devices)
|
|
2359
2381
|
|
|
2360
2382
|
|
|
2361
2383
|
if __name__ == "__main__":
|
warp/tests/test_quat.py
CHANGED
|
@@ -2372,6 +2372,27 @@ def test_quat_array_sub_inplace(test, device):
|
|
|
2372
2372
|
assert_np_equal(x.grad.numpy(), np.array([[-1.0, -1.0, -1.0, -1.0]], dtype=float))
|
|
2373
2373
|
|
|
2374
2374
|
|
|
2375
|
+
@wp.kernel
|
|
2376
|
+
def scalar_quat_div(x: wp.array(dtype=wp.quat), y: wp.array(dtype=wp.quat)):
|
|
2377
|
+
i = wp.tid()
|
|
2378
|
+
y[i] = 1.0 / x[i]
|
|
2379
|
+
|
|
2380
|
+
|
|
2381
|
+
def test_scalar_quat_div(test, device):
|
|
2382
|
+
x = wp.array((wp.quat(1.0, 2.0, 4.0, 8.0),), dtype=wp.quat, requires_grad=True, device=device)
|
|
2383
|
+
y = wp.ones(1, dtype=wp.quat, requires_grad=True, device=device)
|
|
2384
|
+
|
|
2385
|
+
tape = wp.Tape()
|
|
2386
|
+
with tape:
|
|
2387
|
+
wp.launch(scalar_quat_div, 1, inputs=(x,), outputs=(y,), device=device)
|
|
2388
|
+
|
|
2389
|
+
y.grad = wp.ones_like(y)
|
|
2390
|
+
tape.backward()
|
|
2391
|
+
|
|
2392
|
+
assert_np_equal(y.numpy(), np.array(((1.0, 0.5, 0.25, 0.125),), dtype=float))
|
|
2393
|
+
assert_np_equal(x.grad.numpy(), np.array(((-1.0, -0.25, -0.0625, -0.015625),), dtype=float))
|
|
2394
|
+
|
|
2395
|
+
|
|
2375
2396
|
devices = get_test_devices()
|
|
2376
2397
|
|
|
2377
2398
|
|
|
@@ -2483,6 +2504,7 @@ add_function_test(TestQuat, "test_quat_add_inplace", test_quat_add_inplace, devi
|
|
|
2483
2504
|
add_function_test(TestQuat, "test_quat_sub_inplace", test_quat_sub_inplace, devices=devices)
|
|
2484
2505
|
add_function_test(TestQuat, "test_quat_array_add_inplace", test_quat_array_add_inplace, devices=devices)
|
|
2485
2506
|
add_function_test(TestQuat, "test_quat_array_sub_inplace", test_quat_array_sub_inplace, devices=devices)
|
|
2507
|
+
add_function_test(TestQuat, "test_scalar_quat_div", test_scalar_quat_div, devices=devices)
|
|
2486
2508
|
|
|
2487
2509
|
|
|
2488
2510
|
if __name__ == "__main__":
|
warp/tests/test_sparse.py
CHANGED
|
@@ -140,6 +140,32 @@ def test_bsr_from_triplets(test, device):
|
|
|
140
140
|
bsr_set_from_triplets(bsr, rows, cols, vals)
|
|
141
141
|
|
|
142
142
|
|
|
143
|
+
def test_bsr_from_triplets_prune_numerical_zeros(test, device):
|
|
144
|
+
rows = wp.array([1, 0, 2, 3], dtype=int)
|
|
145
|
+
cols = wp.array([0, 1, 2, 3], dtype=int)
|
|
146
|
+
vals = wp.zeros(len(rows), dtype=float)
|
|
147
|
+
|
|
148
|
+
A = bsr_from_triplets(
|
|
149
|
+
rows_of_blocks=12, # Number of rows of blocks
|
|
150
|
+
cols_of_blocks=12, # Number of columns of blocks
|
|
151
|
+
rows=rows, # Row indices
|
|
152
|
+
columns=cols, # Column indices
|
|
153
|
+
values=vals, # Block values
|
|
154
|
+
prune_numerical_zeros=False,
|
|
155
|
+
)
|
|
156
|
+
assert A.nnz_sync() == 4
|
|
157
|
+
|
|
158
|
+
A = bsr_from_triplets(
|
|
159
|
+
rows_of_blocks=12, # Number of rows of blocks
|
|
160
|
+
cols_of_blocks=12, # Number of columns of blocks
|
|
161
|
+
rows=rows, # Row indices
|
|
162
|
+
columns=cols, # Column indices
|
|
163
|
+
values=vals, # Block values
|
|
164
|
+
prune_numerical_zeros=True,
|
|
165
|
+
)
|
|
166
|
+
assert A.nnz_sync() == 0
|
|
167
|
+
|
|
168
|
+
|
|
143
169
|
def test_bsr_from_triplets_gradient(test, device):
|
|
144
170
|
rng = np.random.default_rng(123)
|
|
145
171
|
|
|
@@ -604,6 +630,12 @@ class TestSparse(unittest.TestCase):
|
|
|
604
630
|
|
|
605
631
|
add_function_test(TestSparse, "test_csr_from_triplets", test_csr_from_triplets, devices=devices)
|
|
606
632
|
add_function_test(TestSparse, "test_bsr_from_triplets", test_bsr_from_triplets, devices=devices)
|
|
633
|
+
add_function_test(
|
|
634
|
+
TestSparse,
|
|
635
|
+
"test_bsr_from_triplets_prune_numerical_zeros",
|
|
636
|
+
test_bsr_from_triplets_prune_numerical_zeros,
|
|
637
|
+
devices=devices,
|
|
638
|
+
)
|
|
607
639
|
add_function_test(TestSparse, "test_bsr_get_diag", test_bsr_get_set_diag, devices=devices)
|
|
608
640
|
add_function_test(TestSparse, "test_bsr_split_merge", test_bsr_split_merge, devices=devices)
|
|
609
641
|
add_function_test(TestSparse, "test_bsr_assign_masked", test_bsr_assign_masked, devices=devices)
|
warp/tests/test_static.py
CHANGED
|
@@ -559,6 +559,54 @@ def test_static_len_query(test, _):
|
|
|
559
559
|
wp.launch(static_len_query_kernel, 1, inputs=(v1,))
|
|
560
560
|
|
|
561
561
|
|
|
562
|
+
@wp.func
|
|
563
|
+
def func_1() -> int:
|
|
564
|
+
return 1
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
@wp.func
|
|
568
|
+
def func_2() -> int:
|
|
569
|
+
return 2
|
|
570
|
+
|
|
571
|
+
|
|
572
|
+
funcs = [func_1, func_2]
|
|
573
|
+
|
|
574
|
+
|
|
575
|
+
def unresolved_builder(funcids):
|
|
576
|
+
_funcs = [funcs[id] for id in funcids]
|
|
577
|
+
|
|
578
|
+
@wp.kernel
|
|
579
|
+
def eval(input: wp.array(dtype=int), output: wp.array(dtype=int)):
|
|
580
|
+
for i in range(wp.static(len(_funcs))):
|
|
581
|
+
output[0] = wp.static(_funcs[i])()
|
|
582
|
+
|
|
583
|
+
return eval
|
|
584
|
+
|
|
585
|
+
|
|
586
|
+
def test_unresolved_static_expression(test, device):
|
|
587
|
+
# The module hash will need to be updated from the static expressions
|
|
588
|
+
# resolved at code generation time, since some of them cannot be evaluated
|
|
589
|
+
# at declaration time.
|
|
590
|
+
with wp.ScopedDevice(device):
|
|
591
|
+
output1 = wp.array((1,), dtype=int)
|
|
592
|
+
wp.launch(
|
|
593
|
+
unresolved_builder([0]),
|
|
594
|
+
dim=(1,),
|
|
595
|
+
inputs=[wp.array(np.array([0]), dtype=int)],
|
|
596
|
+
outputs=[output1],
|
|
597
|
+
)
|
|
598
|
+
test.assertEqual(output1.numpy()[0], 1)
|
|
599
|
+
|
|
600
|
+
output2 = wp.array((1,), dtype=int)
|
|
601
|
+
wp.launch(
|
|
602
|
+
unresolved_builder([1]),
|
|
603
|
+
dim=(1,),
|
|
604
|
+
inputs=[wp.array(np.array([1]), dtype=int)],
|
|
605
|
+
outputs=[output2],
|
|
606
|
+
)
|
|
607
|
+
test.assertEqual(output2.numpy()[0], 2)
|
|
608
|
+
|
|
609
|
+
|
|
562
610
|
devices = get_test_devices()
|
|
563
611
|
|
|
564
612
|
|
warp/tests/test_tape.py
CHANGED
|
@@ -157,6 +157,43 @@ def test_tape_zero_multiple_outputs(test, device):
|
|
|
157
157
|
assert_np_equal(x.grad.numpy(), np.ones(3, dtype=float))
|
|
158
158
|
|
|
159
159
|
|
|
160
|
+
@wp.struct
|
|
161
|
+
class NestedStruct:
|
|
162
|
+
arr: wp.array(dtype=float)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
@wp.struct
|
|
166
|
+
class WrapperStruct:
|
|
167
|
+
nested: NestedStruct
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
@wp.kernel
|
|
171
|
+
def nested_loss_kernel(wrapper: WrapperStruct, loss: wp.array(dtype=float)):
|
|
172
|
+
i = wp.tid()
|
|
173
|
+
wp.atomic_add(loss, 0, wrapper.nested.arr[i])
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def test_tape_nested_struct(test, device):
|
|
177
|
+
wrapper = WrapperStruct()
|
|
178
|
+
wrapper.nested = NestedStruct()
|
|
179
|
+
wrapper.nested.arr = wp.ones(shape=(1,), dtype=float, requires_grad=True, device=device)
|
|
180
|
+
|
|
181
|
+
loss = wp.zeros(shape=(1,), dtype=float, requires_grad=True, device=device)
|
|
182
|
+
|
|
183
|
+
tape = wp.Tape()
|
|
184
|
+
with tape:
|
|
185
|
+
wp.launch(nested_loss_kernel, dim=1, inputs=(wrapper, loss), device=device)
|
|
186
|
+
|
|
187
|
+
assert_np_equal(loss.numpy(), [1.0])
|
|
188
|
+
|
|
189
|
+
tape.backward(loss)
|
|
190
|
+
assert_np_equal(wrapper.nested.arr.grad.numpy(), [1.0])
|
|
191
|
+
|
|
192
|
+
tape.zero()
|
|
193
|
+
|
|
194
|
+
assert_np_equal(wrapper.nested.arr.grad.numpy(), [0.0])
|
|
195
|
+
|
|
196
|
+
|
|
160
197
|
def test_tape_visualize(test, device):
|
|
161
198
|
dim = 8
|
|
162
199
|
tape = wp.Tape()
|
|
@@ -196,6 +233,7 @@ add_function_test(TestTape, "test_tape_mul_constant", test_tape_mul_constant, de
|
|
|
196
233
|
add_function_test(TestTape, "test_tape_mul_variable", test_tape_mul_variable, devices=devices)
|
|
197
234
|
add_function_test(TestTape, "test_tape_dot_product", test_tape_dot_product, devices=devices)
|
|
198
235
|
add_function_test(TestTape, "test_tape_zero_multiple_outputs", test_tape_zero_multiple_outputs, devices=devices)
|
|
236
|
+
add_function_test(TestTape, "test_tape_nested_struct", test_tape_nested_struct, devices=devices)
|
|
199
237
|
add_function_test(TestTape, "test_tape_visualize", test_tape_visualize, devices=devices)
|
|
200
238
|
|
|
201
239
|
|