warp-lang 1.8.1__py3-none-macosx_10_13_universal2.whl → 1.9.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/tests/tile/test_tile.py
CHANGED
|
@@ -109,12 +109,29 @@ def test_tile_copy_2d(test, device):
|
|
|
109
109
|
|
|
110
110
|
|
|
111
111
|
@wp.func
|
|
112
|
-
def unary_func(x:
|
|
112
|
+
def unary_func(x: wp.float32):
|
|
113
113
|
return wp.sin(x)
|
|
114
114
|
|
|
115
115
|
|
|
116
|
+
@wp.func
|
|
117
|
+
def unary_func(x: wp.float64):
|
|
118
|
+
return wp.sin(x)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
@wp.kernel
|
|
122
|
+
def tile_unary_map_user_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
|
|
123
|
+
# tile index
|
|
124
|
+
i, j = wp.tid()
|
|
125
|
+
|
|
126
|
+
a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
127
|
+
|
|
128
|
+
sa = wp.tile_map(unary_func, a)
|
|
129
|
+
|
|
130
|
+
wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
|
|
131
|
+
|
|
132
|
+
|
|
116
133
|
@wp.kernel
|
|
117
|
-
def
|
|
134
|
+
def tile_unary_map_builtin_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
|
|
118
135
|
# tile index
|
|
119
136
|
i, j = wp.tid()
|
|
120
137
|
|
|
@@ -131,17 +148,76 @@ def test_tile_unary_map(test, device):
|
|
|
131
148
|
M = TILE_M * 7
|
|
132
149
|
N = TILE_N * 5
|
|
133
150
|
|
|
134
|
-
|
|
135
|
-
|
|
151
|
+
def run(kernel, dtype):
|
|
152
|
+
A = rng.random((M, N), dtype=dtype)
|
|
153
|
+
B = np.sin(A)
|
|
154
|
+
|
|
155
|
+
A_grad = np.cos(A)
|
|
156
|
+
|
|
157
|
+
A_wp = wp.array(A, requires_grad=True, device=device)
|
|
158
|
+
B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
|
|
159
|
+
|
|
160
|
+
with wp.Tape() as tape:
|
|
161
|
+
wp.launch_tiled(
|
|
162
|
+
kernel,
|
|
163
|
+
dim=[int(M / TILE_M), int(N / TILE_N)],
|
|
164
|
+
inputs=[A_wp, B_wp],
|
|
165
|
+
block_dim=TILE_DIM,
|
|
166
|
+
device=device,
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
|
|
170
|
+
|
|
171
|
+
# verify forward pass
|
|
172
|
+
assert_np_equal(B_wp.numpy(), B, tol=tol)
|
|
173
|
+
|
|
174
|
+
# verify backward pass
|
|
175
|
+
B_wp.grad = wp.ones_like(B_wp, device=device)
|
|
176
|
+
tape.backward()
|
|
177
|
+
|
|
178
|
+
assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
|
|
179
|
+
|
|
180
|
+
dtypes = [np.float32, np.float64]
|
|
181
|
+
|
|
182
|
+
for dtype in dtypes:
|
|
183
|
+
run(tile_unary_map_user_func, dtype)
|
|
184
|
+
run(tile_unary_map_builtin_func, dtype)
|
|
136
185
|
|
|
137
|
-
|
|
186
|
+
|
|
187
|
+
@wp.func
|
|
188
|
+
def unary_func_mixed_types(x: int) -> float:
|
|
189
|
+
return wp.sin(float(x))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
@wp.kernel
|
|
193
|
+
def tile_unary_map_mixed_types(input: wp.array2d(dtype=int), output: wp.array2d(dtype=float)):
|
|
194
|
+
# tile index
|
|
195
|
+
i, j = wp.tid()
|
|
196
|
+
|
|
197
|
+
a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
198
|
+
|
|
199
|
+
sa = wp.tile_map(unary_func_mixed_types, a)
|
|
200
|
+
|
|
201
|
+
wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def test_tile_unary_map_mixed_types(test, device):
|
|
205
|
+
rng = np.random.default_rng(42)
|
|
206
|
+
|
|
207
|
+
M = TILE_M * 7
|
|
208
|
+
N = TILE_N * 5
|
|
209
|
+
|
|
210
|
+
A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
|
|
211
|
+
B = np.sin(A.astype(np.float32))
|
|
212
|
+
|
|
213
|
+
A_grad = np.cos(A.astype(np.float32))
|
|
138
214
|
|
|
139
215
|
A_wp = wp.array(A, requires_grad=True, device=device)
|
|
140
|
-
B_wp = wp.
|
|
216
|
+
B_wp = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
|
|
141
217
|
|
|
142
218
|
with wp.Tape() as tape:
|
|
143
219
|
wp.launch_tiled(
|
|
144
|
-
|
|
220
|
+
tile_unary_map_mixed_types,
|
|
145
221
|
dim=[int(M / TILE_M), int(N / TILE_N)],
|
|
146
222
|
inputs=[A_wp, B_wp],
|
|
147
223
|
block_dim=TILE_DIM,
|
|
@@ -155,17 +231,23 @@ def test_tile_unary_map(test, device):
|
|
|
155
231
|
B_wp.grad = wp.ones_like(B_wp, device=device)
|
|
156
232
|
tape.backward()
|
|
157
233
|
|
|
158
|
-
|
|
234
|
+
# The a gradients are now stored as ints and can't capture the correct values
|
|
235
|
+
# assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
|
|
159
236
|
|
|
160
237
|
|
|
161
238
|
@wp.func
|
|
162
|
-
def binary_func(x:
|
|
163
|
-
return
|
|
239
|
+
def binary_func(x: wp.float32, y: wp.float32):
|
|
240
|
+
return x + y
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
@wp.func
|
|
244
|
+
def binary_func(x: wp.float64, y: wp.float64):
|
|
245
|
+
return x + y
|
|
164
246
|
|
|
165
247
|
|
|
166
248
|
@wp.kernel
|
|
167
|
-
def
|
|
168
|
-
input_a: wp.array2d(dtype=
|
|
249
|
+
def tile_binary_map_user_func(
|
|
250
|
+
input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
|
|
169
251
|
):
|
|
170
252
|
# tile index
|
|
171
253
|
i, j = wp.tid()
|
|
@@ -178,26 +260,107 @@ def tile_binary_map(
|
|
|
178
260
|
wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
|
|
179
261
|
|
|
180
262
|
|
|
263
|
+
@wp.kernel
|
|
264
|
+
def tile_binary_map_builtin_func(
|
|
265
|
+
input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
|
|
266
|
+
):
|
|
267
|
+
# tile index
|
|
268
|
+
i, j = wp.tid()
|
|
269
|
+
|
|
270
|
+
a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
271
|
+
b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
272
|
+
|
|
273
|
+
sa = wp.tile_map(wp.add, a, b)
|
|
274
|
+
|
|
275
|
+
wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
|
|
276
|
+
|
|
277
|
+
|
|
181
278
|
def test_tile_binary_map(test, device):
|
|
182
279
|
rng = np.random.default_rng(42)
|
|
183
280
|
|
|
184
281
|
M = TILE_M * 7
|
|
185
282
|
N = TILE_N * 5
|
|
186
283
|
|
|
187
|
-
|
|
284
|
+
def run(kernel, dtype):
|
|
285
|
+
A = rng.random((M, N), dtype=dtype)
|
|
286
|
+
B = rng.random((M, N), dtype=dtype)
|
|
287
|
+
C = A + B
|
|
288
|
+
|
|
289
|
+
A_grad = np.ones_like(A)
|
|
290
|
+
B_grad = np.ones_like(B)
|
|
291
|
+
|
|
292
|
+
A_wp = wp.array(A, requires_grad=True, device=device)
|
|
293
|
+
B_wp = wp.array(B, requires_grad=True, device=device)
|
|
294
|
+
C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
|
|
295
|
+
|
|
296
|
+
with wp.Tape() as tape:
|
|
297
|
+
wp.launch_tiled(
|
|
298
|
+
kernel,
|
|
299
|
+
dim=[int(M / TILE_M), int(N / TILE_N)],
|
|
300
|
+
inputs=[A_wp, B_wp, C_wp],
|
|
301
|
+
block_dim=TILE_DIM,
|
|
302
|
+
device=device,
|
|
303
|
+
)
|
|
304
|
+
|
|
305
|
+
tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
|
|
306
|
+
|
|
307
|
+
# verify forward pass
|
|
308
|
+
assert_np_equal(C_wp.numpy(), C, tol=tol)
|
|
309
|
+
|
|
310
|
+
# verify backward pass
|
|
311
|
+
C_wp.grad = wp.ones_like(C_wp, device=device)
|
|
312
|
+
tape.backward()
|
|
313
|
+
|
|
314
|
+
assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
|
|
315
|
+
assert_np_equal(B_wp.grad.numpy(), B_grad, tol=tol)
|
|
316
|
+
|
|
317
|
+
dtypes = [np.float32, np.float64]
|
|
318
|
+
|
|
319
|
+
for dtype in dtypes:
|
|
320
|
+
run(tile_binary_map_builtin_func, dtype)
|
|
321
|
+
run(tile_binary_map_user_func, dtype)
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
@wp.func
|
|
325
|
+
def binary_func_mixed_types(x: int, y: float) -> float:
|
|
326
|
+
return wp.sin(float(x)) + y
|
|
327
|
+
|
|
328
|
+
|
|
329
|
+
@wp.kernel
|
|
330
|
+
def tile_binary_map_mixed_types(
|
|
331
|
+
input_a: wp.array2d(dtype=int), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
|
|
332
|
+
):
|
|
333
|
+
# tile index
|
|
334
|
+
i, j = wp.tid()
|
|
335
|
+
|
|
336
|
+
a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
337
|
+
b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
|
|
338
|
+
|
|
339
|
+
sa = wp.tile_map(binary_func_mixed_types, a, b)
|
|
340
|
+
|
|
341
|
+
wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
|
|
342
|
+
|
|
343
|
+
|
|
344
|
+
def test_tile_binary_map_mixed_types(test, device):
|
|
345
|
+
rng = np.random.default_rng(42)
|
|
346
|
+
|
|
347
|
+
M = TILE_M * 7
|
|
348
|
+
N = TILE_N * 5
|
|
349
|
+
|
|
350
|
+
A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
|
|
188
351
|
B = rng.random((M, N), dtype=np.float32)
|
|
189
|
-
C = np.sin(A) + B
|
|
352
|
+
C = np.sin(A.astype(np.float32)) + B
|
|
190
353
|
|
|
191
|
-
A_grad = np.cos(A)
|
|
354
|
+
A_grad = np.cos(A.astype(np.float32))
|
|
192
355
|
B_grad = np.ones_like(B)
|
|
193
356
|
|
|
194
357
|
A_wp = wp.array(A, requires_grad=True, device=device)
|
|
195
358
|
B_wp = wp.array(B, requires_grad=True, device=device)
|
|
196
|
-
C_wp = wp.zeros_like(
|
|
359
|
+
C_wp = wp.zeros_like(B_wp, requires_grad=True, device=device)
|
|
197
360
|
|
|
198
361
|
with wp.Tape() as tape:
|
|
199
362
|
wp.launch_tiled(
|
|
200
|
-
|
|
363
|
+
tile_binary_map_mixed_types,
|
|
201
364
|
dim=[int(M / TILE_M), int(N / TILE_N)],
|
|
202
365
|
inputs=[A_wp, B_wp, C_wp],
|
|
203
366
|
block_dim=TILE_DIM,
|
|
@@ -211,7 +374,8 @@ def test_tile_binary_map(test, device):
|
|
|
211
374
|
C_wp.grad = wp.ones_like(C_wp, device=device)
|
|
212
375
|
tape.backward()
|
|
213
376
|
|
|
214
|
-
|
|
377
|
+
# The a gradiens are now stored as ints and can't capture the correct values
|
|
378
|
+
# assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
|
|
215
379
|
assert_np_equal(B_wp.grad.numpy(), B_grad)
|
|
216
380
|
|
|
217
381
|
|
|
@@ -673,6 +837,66 @@ def test_tile_assign(test, device):
|
|
|
673
837
|
assert_np_equal(x.grad.numpy(), np.full(TILE_M, 1.0, dtype=np.float32))
|
|
674
838
|
|
|
675
839
|
|
|
840
|
+
@wp.kernel
|
|
841
|
+
def test_tile_where_kernel(select: int, x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
|
|
842
|
+
x_reg = wp.tile_load(x, shape=(TILE_M,), storage="register")
|
|
843
|
+
y_reg = wp.tile_load(y, shape=(TILE_M,), storage="register")
|
|
844
|
+
|
|
845
|
+
x_shared = wp.tile_load(x, shape=(TILE_M,), storage="shared")
|
|
846
|
+
y_shared = wp.tile_load(y, shape=(TILE_M,), storage="shared")
|
|
847
|
+
|
|
848
|
+
if select == 0:
|
|
849
|
+
s = x_reg
|
|
850
|
+
elif select == 1:
|
|
851
|
+
s = y_reg
|
|
852
|
+
elif select == 2:
|
|
853
|
+
s = x_shared
|
|
854
|
+
else:
|
|
855
|
+
s = y_shared
|
|
856
|
+
|
|
857
|
+
wp.tile_store(z, s)
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
def test_tile_where(test, device):
|
|
861
|
+
x = wp.full((TILE_M,), 1.0, dtype=float, device=device, requires_grad=True)
|
|
862
|
+
y = wp.full((TILE_M,), 2.0, dtype=float, device=device, requires_grad=True)
|
|
863
|
+
z = wp.zeros((TILE_M), dtype=float, device=device, requires_grad=True)
|
|
864
|
+
|
|
865
|
+
z_expected = [
|
|
866
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
867
|
+
np.full(TILE_M, 2.0, dtype=np.float32),
|
|
868
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
869
|
+
np.full(TILE_M, 2.0, dtype=np.float32),
|
|
870
|
+
]
|
|
871
|
+
x_grad_expected = [
|
|
872
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
873
|
+
np.full(TILE_M, 0.0, dtype=np.float32),
|
|
874
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
875
|
+
np.full(TILE_M, 0.0, dtype=np.float32),
|
|
876
|
+
]
|
|
877
|
+
y_grad_expected = [
|
|
878
|
+
np.full(TILE_M, 0.0, dtype=np.float32),
|
|
879
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
880
|
+
np.full(TILE_M, 0.0, dtype=np.float32),
|
|
881
|
+
np.full(TILE_M, 1.0, dtype=np.float32),
|
|
882
|
+
]
|
|
883
|
+
|
|
884
|
+
for i in range(4):
|
|
885
|
+
tape = wp.Tape()
|
|
886
|
+
with tape:
|
|
887
|
+
wp.launch_tiled(test_tile_where_kernel, dim=[1], inputs=[i, x, y], outputs=[z], block_dim=32, device=device)
|
|
888
|
+
|
|
889
|
+
z.grad = wp.ones_like(z)
|
|
890
|
+
|
|
891
|
+
tape.backward()
|
|
892
|
+
|
|
893
|
+
assert_np_equal(z.numpy(), z_expected[i])
|
|
894
|
+
assert_np_equal(x.grad.numpy(), x_grad_expected[i])
|
|
895
|
+
assert_np_equal(y.grad.numpy(), y_grad_expected[i])
|
|
896
|
+
|
|
897
|
+
tape.zero()
|
|
898
|
+
|
|
899
|
+
|
|
676
900
|
@wp.kernel
|
|
677
901
|
def test_tile_transpose_kernel(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
|
|
678
902
|
x = wp.tile_load(input, shape=(TILE_M, TILE_N))
|
|
@@ -1085,7 +1309,9 @@ class TestTile(unittest.TestCase):
|
|
|
1085
1309
|
add_function_test(TestTile, "test_tile_copy_1d", test_tile_copy_1d, devices=devices)
|
|
1086
1310
|
add_function_test(TestTile, "test_tile_copy_2d", test_tile_copy_2d, devices=devices)
|
|
1087
1311
|
add_function_test(TestTile, "test_tile_unary_map", test_tile_unary_map, devices=devices)
|
|
1312
|
+
add_function_test(TestTile, "test_tile_unary_map_mixed_types", test_tile_unary_map_mixed_types, devices=devices)
|
|
1088
1313
|
add_function_test(TestTile, "test_tile_binary_map", test_tile_binary_map, devices=devices)
|
|
1314
|
+
add_function_test(TestTile, "test_tile_binary_map_mixed_types", test_tile_binary_map_mixed_types, devices=devices)
|
|
1089
1315
|
add_function_test(TestTile, "test_tile_transpose", test_tile_transpose, devices=devices)
|
|
1090
1316
|
add_function_test(TestTile, "test_tile_operators", test_tile_operators, devices=devices)
|
|
1091
1317
|
add_function_test(TestTile, "test_tile_tile", test_tile_tile, devices=get_cuda_test_devices())
|
|
@@ -1095,6 +1321,7 @@ add_function_test(TestTile, "test_tile_sum_launch", test_tile_sum_launch, device
|
|
|
1095
1321
|
add_function_test(TestTile, "test_tile_extract", test_tile_extract, devices=devices)
|
|
1096
1322
|
add_function_test(TestTile, "test_tile_extract_repeated", test_tile_extract_repeated, devices=devices)
|
|
1097
1323
|
add_function_test(TestTile, "test_tile_assign", test_tile_assign, devices=devices)
|
|
1324
|
+
add_function_test(TestTile, "test_tile_where", test_tile_where, devices=devices)
|
|
1098
1325
|
add_function_test(TestTile, "test_tile_broadcast_add_1d", test_tile_broadcast_add_1d, devices=devices)
|
|
1099
1326
|
add_function_test(TestTile, "test_tile_broadcast_add_2d", test_tile_broadcast_add_2d, devices=devices)
|
|
1100
1327
|
add_function_test(TestTile, "test_tile_broadcast_add_3d", test_tile_broadcast_add_3d, devices=devices)
|