warp-lang 1.4.1__py3-none-macosx_10_13_universal2.whl → 1.5.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +4 -0
- warp/autograd.py +43 -8
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +21 -2
- warp/build_dll.py +23 -6
- warp/builtins.py +1920 -111
- warp/codegen.py +186 -62
- warp/config.py +2 -2
- warp/context.py +322 -73
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +86 -0
- warp/examples/benchmarks/benchmark_gemm.py +121 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +158 -0
- warp/examples/benchmarks/benchmark_tile.py +179 -0
- warp/examples/core/example_dem.py +2 -1
- warp/examples/core/example_mesh_intersect.py +3 -3
- warp/examples/fem/example_adaptive_grid.py +37 -10
- warp/examples/fem/example_apic_fluid.py +3 -2
- warp/examples/fem/example_convection_diffusion_dg.py +4 -5
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion_3d.py +47 -4
- warp/examples/fem/example_distortion_energy.py +220 -0
- warp/examples/fem/example_magnetostatics.py +127 -85
- warp/examples/fem/example_nonconforming_contact.py +5 -5
- warp/examples/fem/example_stokes.py +3 -1
- warp/examples/fem/example_streamlines.py +12 -19
- warp/examples/fem/utils.py +38 -15
- warp/examples/optim/example_walker.py +2 -2
- warp/examples/sim/example_cloth.py +2 -25
- warp/examples/sim/example_jacobian_ik.py +6 -2
- warp/examples/sim/example_quadruped.py +2 -1
- warp/examples/tile/example_tile_convolution.py +58 -0
- warp/examples/tile/example_tile_fft.py +47 -0
- warp/examples/tile/example_tile_filtering.py +105 -0
- warp/examples/tile/example_tile_matmul.py +79 -0
- warp/examples/tile/example_tile_mlp.py +375 -0
- warp/fem/__init__.py +8 -0
- warp/fem/cache.py +16 -12
- warp/fem/dirichlet.py +1 -1
- warp/fem/domain.py +44 -1
- warp/fem/field/__init__.py +1 -2
- warp/fem/field/field.py +31 -19
- warp/fem/field/nodal_field.py +101 -49
- warp/fem/field/virtual.py +794 -0
- warp/fem/geometry/__init__.py +2 -2
- warp/fem/geometry/deformed_geometry.py +3 -105
- warp/fem/geometry/element.py +13 -0
- warp/fem/geometry/geometry.py +165 -5
- warp/fem/geometry/grid_2d.py +3 -6
- warp/fem/geometry/grid_3d.py +31 -28
- warp/fem/geometry/hexmesh.py +3 -46
- warp/fem/geometry/nanogrid.py +3 -2
- warp/fem/geometry/{quadmesh_2d.py → quadmesh.py} +280 -159
- warp/fem/geometry/tetmesh.py +2 -43
- warp/fem/geometry/{trimesh_2d.py → trimesh.py} +354 -186
- warp/fem/integrate.py +683 -261
- warp/fem/linalg.py +404 -0
- warp/fem/operator.py +101 -18
- warp/fem/polynomial.py +5 -5
- warp/fem/quadrature/quadrature.py +45 -21
- warp/fem/space/__init__.py +45 -11
- warp/fem/space/basis_function_space.py +451 -0
- warp/fem/space/basis_space.py +58 -11
- warp/fem/space/function_space.py +146 -5
- warp/fem/space/grid_2d_function_space.py +80 -66
- warp/fem/space/grid_3d_function_space.py +113 -68
- warp/fem/space/hexmesh_function_space.py +96 -108
- warp/fem/space/nanogrid_function_space.py +62 -110
- warp/fem/space/quadmesh_function_space.py +208 -0
- warp/fem/space/shape/__init__.py +45 -7
- warp/fem/space/shape/cube_shape_function.py +328 -54
- warp/fem/space/shape/shape_function.py +10 -1
- warp/fem/space/shape/square_shape_function.py +328 -60
- warp/fem/space/shape/tet_shape_function.py +269 -19
- warp/fem/space/shape/triangle_shape_function.py +238 -19
- warp/fem/space/tetmesh_function_space.py +69 -37
- warp/fem/space/topology.py +38 -0
- warp/fem/space/trimesh_function_space.py +179 -0
- warp/fem/utils.py +6 -331
- warp/jax_experimental.py +3 -1
- warp/native/array.h +55 -40
- warp/native/builtin.h +124 -43
- warp/native/bvh.h +4 -0
- warp/native/coloring.cpp +600 -0
- warp/native/cuda_util.cpp +14 -0
- warp/native/cuda_util.h +2 -1
- warp/native/fabric.h +8 -0
- warp/native/hashgrid.h +4 -0
- warp/native/marching.cu +8 -0
- warp/native/mat.h +14 -3
- warp/native/mathdx.cpp +59 -0
- warp/native/mesh.h +4 -0
- warp/native/range.h +13 -1
- warp/native/reduce.cpp +9 -1
- warp/native/reduce.cu +7 -0
- warp/native/runlength_encode.cpp +9 -1
- warp/native/runlength_encode.cu +7 -1
- warp/native/scan.cpp +8 -0
- warp/native/scan.cu +8 -0
- warp/native/scan.h +8 -1
- warp/native/sparse.cpp +8 -0
- warp/native/sparse.cu +8 -0
- warp/native/temp_buffer.h +7 -0
- warp/native/tile.h +1857 -0
- warp/native/tile_gemm.h +341 -0
- warp/native/tile_reduce.h +210 -0
- warp/native/volume_builder.cu +8 -0
- warp/native/volume_builder.h +8 -0
- warp/native/warp.cpp +10 -2
- warp/native/warp.cu +369 -15
- warp/native/warp.h +12 -2
- warp/optim/adam.py +39 -4
- warp/paddle.py +29 -12
- warp/render/render_opengl.py +137 -65
- warp/sim/graph_coloring.py +292 -0
- warp/sim/integrator_euler.py +4 -2
- warp/sim/integrator_featherstone.py +115 -44
- warp/sim/integrator_vbd.py +6 -0
- warp/sim/model.py +90 -17
- warp/stubs.py +651 -85
- warp/tape.py +12 -7
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/aux_test_instancing_gc.py +18 -0
- warp/tests/test_array.py +207 -48
- warp/tests/test_closest_point_edge_edge.py +8 -8
- warp/tests/test_codegen.py +120 -1
- warp/tests/test_codegen_instancing.py +30 -0
- warp/tests/test_collision.py +110 -0
- warp/tests/test_coloring.py +241 -0
- warp/tests/test_context.py +34 -0
- warp/tests/test_examples.py +18 -4
- warp/tests/test_fabricarray.py +33 -0
- warp/tests/test_fem.py +453 -113
- warp/tests/test_func.py +48 -1
- warp/tests/test_generics.py +52 -0
- warp/tests/test_iter.py +68 -0
- warp/tests/test_mat_scalar_ops.py +1 -1
- warp/tests/test_mesh_query_point.py +5 -4
- warp/tests/test_module_hashing.py +23 -0
- warp/tests/test_paddle.py +27 -87
- warp/tests/test_print.py +191 -1
- warp/tests/test_spatial.py +1 -1
- warp/tests/test_tile.py +700 -0
- warp/tests/test_tile_mathdx.py +144 -0
- warp/tests/test_tile_mlp.py +383 -0
- warp/tests/test_tile_reduce.py +374 -0
- warp/tests/test_tile_shared_memory.py +190 -0
- warp/tests/test_vbd.py +12 -20
- warp/tests/test_volume.py +43 -0
- warp/tests/unittest_suites.py +23 -2
- warp/tests/unittest_utils.py +4 -0
- warp/types.py +339 -73
- warp/utils.py +22 -1
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/METADATA +33 -7
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/RECORD +159 -132
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/WHEEL +1 -1
- warp/fem/field/test.py +0 -180
- warp/fem/field/trial.py +0 -183
- warp/fem/space/collocated_function_space.py +0 -102
- warp/fem/space/quadmesh_2d_function_space.py +0 -261
- warp/fem/space/trimesh_2d_function_space.py +0 -153
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.4.1.dist-info → warp_lang-1.5.0.dist-info}/top_level.txt +0 -0
warp/tape.py
CHANGED
|
@@ -15,7 +15,7 @@ class Tape:
|
|
|
15
15
|
"""
|
|
16
16
|
Record kernel launches within a Tape scope to enable automatic differentiation.
|
|
17
17
|
Gradients can be computed after the operations have been recorded on the tape via
|
|
18
|
-
|
|
18
|
+
:meth:`Tape.backward()`.
|
|
19
19
|
|
|
20
20
|
Example
|
|
21
21
|
-------
|
|
@@ -131,6 +131,7 @@ class Tape:
|
|
|
131
131
|
inputs = launch[3]
|
|
132
132
|
outputs = launch[4]
|
|
133
133
|
device = launch[5]
|
|
134
|
+
block_dim = launch[6]
|
|
134
135
|
|
|
135
136
|
adj_inputs = []
|
|
136
137
|
adj_outputs = []
|
|
@@ -153,13 +154,14 @@ class Tape:
|
|
|
153
154
|
device=device,
|
|
154
155
|
adjoint=True,
|
|
155
156
|
max_blocks=max_blocks,
|
|
157
|
+
block_dim=block_dim,
|
|
156
158
|
)
|
|
157
159
|
|
|
158
160
|
# record a kernel launch on the tape
|
|
159
|
-
def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, metadata=None):
|
|
161
|
+
def record_launch(self, kernel, dim, max_blocks, inputs, outputs, device, block_dim=0, metadata=None):
|
|
160
162
|
if metadata is None:
|
|
161
163
|
metadata = {}
|
|
162
|
-
self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, metadata])
|
|
164
|
+
self.launches.append([kernel, dim, max_blocks, inputs, outputs, device, block_dim, metadata])
|
|
163
165
|
|
|
164
166
|
def record_func(self, backward, arrays):
|
|
165
167
|
"""
|
|
@@ -614,7 +616,9 @@ class ArrayStatsVisitor(TapeVisitor):
|
|
|
614
616
|
self.array_grad_stats.insert(0, grad_stats)
|
|
615
617
|
|
|
616
618
|
|
|
617
|
-
Launch = namedtuple(
|
|
619
|
+
Launch = namedtuple(
|
|
620
|
+
"Launch", ["id", "kernel", "dim", "max_blocks", "inputs", "outputs", "device", "block_dim", "metadata"]
|
|
621
|
+
)
|
|
618
622
|
RepeatedSequence = namedtuple("RepeatedSequence", ["start", "end", "repetitions"])
|
|
619
623
|
|
|
620
624
|
|
|
@@ -645,8 +649,8 @@ def visit_tape(
|
|
|
645
649
|
def get_launch_id(launch):
|
|
646
650
|
kernel = launch[0]
|
|
647
651
|
suffix = ""
|
|
648
|
-
if len(launch) >
|
|
649
|
-
metadata = launch[
|
|
652
|
+
if len(launch) > 7:
|
|
653
|
+
metadata = launch[7]
|
|
650
654
|
# calling function helps to identify unique launches
|
|
651
655
|
if "caller" in metadata:
|
|
652
656
|
caller = metadata["caller"]
|
|
@@ -680,7 +684,8 @@ def visit_tape(
|
|
|
680
684
|
inputs=launch[3],
|
|
681
685
|
outputs=launch[4],
|
|
682
686
|
device=launch[5],
|
|
683
|
-
|
|
687
|
+
block_dim=launch[6],
|
|
688
|
+
metadata=launch[7] if len(launch) > 7 else {},
|
|
684
689
|
)
|
|
685
690
|
for launch in kernel_launches
|
|
686
691
|
]
|
|
Binary file
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# Copyright (c) 2024 NVIDIA CORPORATION. All rights reserved.
|
|
2
|
+
# NVIDIA CORPORATION and its licensors retain all intellectual property
|
|
3
|
+
# and proprietary rights in and to this software, related documentation
|
|
4
|
+
# and any modifications thereto. Any use, reproduction, disclosure or
|
|
5
|
+
# distribution of this software and related documentation without an express
|
|
6
|
+
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
|
+
|
|
8
|
+
"""Helper module used in test_codegen_instancing.py"""
|
|
9
|
+
|
|
10
|
+
import warp as wp
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def create_kernel_closure(value: int):
|
|
14
|
+
@wp.kernel
|
|
15
|
+
def k(a: wp.array(dtype=int)):
|
|
16
|
+
a[0] = value
|
|
17
|
+
|
|
18
|
+
return k
|
warp/tests/test_array.py
CHANGED
|
@@ -6,6 +6,7 @@
|
|
|
6
6
|
# license agreement from NVIDIA CORPORATION is strictly prohibited.
|
|
7
7
|
|
|
8
8
|
import unittest
|
|
9
|
+
from typing import Any
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
12
|
|
|
@@ -2361,64 +2362,85 @@ def test_array_from_cai(test, device):
|
|
|
2361
2362
|
assert_np_equal(arr_warp.numpy(), np.array([[2, 1, 1], [1, 0, 0], [1, 0, 0]]))
|
|
2362
2363
|
|
|
2363
2364
|
|
|
2364
|
-
|
|
2365
|
-
|
|
2366
|
-
|
|
2367
|
-
|
|
2368
|
-
x[i] += y[i]
|
|
2365
|
+
@wp.kernel
|
|
2366
|
+
def inplace_add_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
|
|
2367
|
+
i = wp.tid()
|
|
2368
|
+
x[i] += y[i]
|
|
2369
2369
|
|
|
2370
|
-
@wp.kernel
|
|
2371
|
-
def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
|
|
2372
|
-
i, j = wp.tid()
|
|
2373
|
-
x[i, j] += y[i, j]
|
|
2374
2370
|
|
|
2375
|
-
|
|
2376
|
-
|
|
2377
|
-
|
|
2378
|
-
|
|
2371
|
+
@wp.kernel
|
|
2372
|
+
def inplace_add_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
|
|
2373
|
+
i, j = wp.tid()
|
|
2374
|
+
x[i, j] += y[i, j]
|
|
2379
2375
|
|
|
2380
|
-
@wp.kernel
|
|
2381
|
-
def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
|
|
2382
|
-
i, j, k, l = wp.tid()
|
|
2383
|
-
x[i, j, k, l] += y[i, j, k, l]
|
|
2384
2376
|
|
|
2385
|
-
|
|
2386
|
-
|
|
2387
|
-
|
|
2388
|
-
|
|
2377
|
+
@wp.kernel
|
|
2378
|
+
def inplace_add_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
|
|
2379
|
+
i, j, k = wp.tid()
|
|
2380
|
+
x[i, j, k] += y[i, j, k]
|
|
2389
2381
|
|
|
2390
|
-
@wp.kernel
|
|
2391
|
-
def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
|
|
2392
|
-
i, j = wp.tid()
|
|
2393
|
-
x[i, j] -= y[i, j]
|
|
2394
2382
|
|
|
2395
|
-
|
|
2396
|
-
|
|
2397
|
-
|
|
2398
|
-
|
|
2383
|
+
@wp.kernel
|
|
2384
|
+
def inplace_add_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
|
|
2385
|
+
i, j, k, l = wp.tid()
|
|
2386
|
+
x[i, j, k, l] += y[i, j, k, l]
|
|
2399
2387
|
|
|
2400
|
-
@wp.kernel
|
|
2401
|
-
def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
|
|
2402
|
-
i, j, k, l = wp.tid()
|
|
2403
|
-
x[i, j, k, l] -= y[i, j, k, l]
|
|
2404
2388
|
|
|
2405
|
-
|
|
2406
|
-
|
|
2407
|
-
|
|
2408
|
-
|
|
2389
|
+
@wp.kernel
|
|
2390
|
+
def inplace_sub_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
|
|
2391
|
+
i = wp.tid()
|
|
2392
|
+
x[i] -= y[i]
|
|
2409
2393
|
|
|
2410
|
-
@wp.kernel
|
|
2411
|
-
def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
|
|
2412
|
-
i = wp.tid()
|
|
2413
|
-
x[i] += y[i]
|
|
2414
2394
|
|
|
2415
|
-
|
|
2416
|
-
|
|
2417
|
-
|
|
2418
|
-
|
|
2419
|
-
|
|
2420
|
-
|
|
2395
|
+
@wp.kernel
|
|
2396
|
+
def inplace_sub_2d(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
|
|
2397
|
+
i, j = wp.tid()
|
|
2398
|
+
x[i, j] -= y[i, j]
|
|
2399
|
+
|
|
2400
|
+
|
|
2401
|
+
@wp.kernel
|
|
2402
|
+
def inplace_sub_3d(x: wp.array3d(dtype=float), y: wp.array3d(dtype=float)):
|
|
2403
|
+
i, j, k = wp.tid()
|
|
2404
|
+
x[i, j, k] -= y[i, j, k]
|
|
2405
|
+
|
|
2406
|
+
|
|
2407
|
+
@wp.kernel
|
|
2408
|
+
def inplace_sub_4d(x: wp.array4d(dtype=float), y: wp.array4d(dtype=float)):
|
|
2409
|
+
i, j, k, l = wp.tid()
|
|
2410
|
+
x[i, j, k, l] -= y[i, j, k, l]
|
|
2411
|
+
|
|
2421
2412
|
|
|
2413
|
+
@wp.kernel
|
|
2414
|
+
def inplace_add_vecs(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=wp.vec3)):
|
|
2415
|
+
i = wp.tid()
|
|
2416
|
+
x[i] += y[i]
|
|
2417
|
+
|
|
2418
|
+
|
|
2419
|
+
@wp.kernel
|
|
2420
|
+
def inplace_add_mats(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=wp.mat33)):
|
|
2421
|
+
i = wp.tid()
|
|
2422
|
+
x[i] += y[i]
|
|
2423
|
+
|
|
2424
|
+
|
|
2425
|
+
@wp.kernel
|
|
2426
|
+
def inplace_add_rhs(x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
|
|
2427
|
+
i = wp.tid()
|
|
2428
|
+
a = y[i]
|
|
2429
|
+
a += x[i]
|
|
2430
|
+
wp.atomic_add(z, 0, a)
|
|
2431
|
+
|
|
2432
|
+
|
|
2433
|
+
vec9 = wp.vec(length=9, dtype=float)
|
|
2434
|
+
|
|
2435
|
+
|
|
2436
|
+
@wp.kernel
|
|
2437
|
+
def inplace_add_custom_vec(x: wp.array(dtype=vec9), y: wp.array(dtype=vec9)):
|
|
2438
|
+
i = wp.tid()
|
|
2439
|
+
x[i] += y[i]
|
|
2440
|
+
x[i] += y[i]
|
|
2441
|
+
|
|
2442
|
+
|
|
2443
|
+
def test_array_inplace_diff_ops(test, device):
|
|
2422
2444
|
N = 3
|
|
2423
2445
|
x1 = wp.ones(N, dtype=float, requires_grad=True, device=device)
|
|
2424
2446
|
x2 = wp.ones((N, N), dtype=float, requires_grad=True, device=device)
|
|
@@ -2526,6 +2548,60 @@ def test_array_inplace_ops(test, device):
|
|
|
2526
2548
|
|
|
2527
2549
|
assert_np_equal(x.grad.numpy(), np.ones(1, dtype=float))
|
|
2528
2550
|
assert_np_equal(y.grad.numpy(), np.ones(1, dtype=float))
|
|
2551
|
+
tape.reset()
|
|
2552
|
+
|
|
2553
|
+
x = wp.zeros(1, dtype=vec9, requires_grad=True, device=device)
|
|
2554
|
+
y = wp.ones(1, dtype=vec9, requires_grad=True, device=device)
|
|
2555
|
+
|
|
2556
|
+
with tape:
|
|
2557
|
+
wp.launch(inplace_add_custom_vec, 1, inputs=[x, y], device=device)
|
|
2558
|
+
|
|
2559
|
+
tape.backward(grads={x: wp.ones_like(x)})
|
|
2560
|
+
|
|
2561
|
+
assert_np_equal(x.numpy(), np.full((1, 9), 2.0, dtype=float))
|
|
2562
|
+
assert_np_equal(y.grad.numpy(), np.full((1, 9), 2.0, dtype=float))
|
|
2563
|
+
|
|
2564
|
+
|
|
2565
|
+
@wp.kernel
|
|
2566
|
+
def inplace_mul_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
|
|
2567
|
+
i = wp.tid()
|
|
2568
|
+
x[i] *= y[i]
|
|
2569
|
+
|
|
2570
|
+
|
|
2571
|
+
@wp.kernel
|
|
2572
|
+
def inplace_div_1d(x: wp.array(dtype=float), y: wp.array(dtype=float)):
|
|
2573
|
+
i = wp.tid()
|
|
2574
|
+
x[i] /= y[i]
|
|
2575
|
+
|
|
2576
|
+
|
|
2577
|
+
@wp.kernel
|
|
2578
|
+
def inplace_add_non_atomic_types(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
|
|
2579
|
+
i = wp.tid()
|
|
2580
|
+
x[i] += y[i]
|
|
2581
|
+
|
|
2582
|
+
|
|
2583
|
+
uint16vec3 = wp.vec(length=3, dtype=wp.uint16)
|
|
2584
|
+
|
|
2585
|
+
|
|
2586
|
+
def test_array_inplace_non_diff_ops(test, device):
|
|
2587
|
+
N = 3
|
|
2588
|
+
x1 = wp.full(N, value=10.0, dtype=float, device=device)
|
|
2589
|
+
y1 = wp.full(N, value=5.0, dtype=float, device=device)
|
|
2590
|
+
|
|
2591
|
+
wp.launch(inplace_mul_1d, N, inputs=[x1, y1], device=device)
|
|
2592
|
+
assert_np_equal(x1.numpy(), np.full(N, fill_value=50.0, dtype=float))
|
|
2593
|
+
|
|
2594
|
+
x1.fill_(10.0)
|
|
2595
|
+
y1.fill_(5.0)
|
|
2596
|
+
wp.launch(inplace_div_1d, N, inputs=[x1, y1], device=device)
|
|
2597
|
+
assert_np_equal(x1.numpy(), np.full(N, fill_value=2.0, dtype=float))
|
|
2598
|
+
|
|
2599
|
+
for dtype in wp.types.non_atomic_types + (wp.vec2b, wp.vec2ub, wp.vec2s, wp.vec2us, uint16vec3):
|
|
2600
|
+
x = wp.full(N, value=0, dtype=dtype, device=device)
|
|
2601
|
+
y = wp.full(N, value=1, dtype=dtype, device=device)
|
|
2602
|
+
|
|
2603
|
+
wp.launch(inplace_add_non_atomic_types, N, inputs=[x, y], device=device)
|
|
2604
|
+
assert_np_equal(x.numpy(), y.numpy())
|
|
2529
2605
|
|
|
2530
2606
|
|
|
2531
2607
|
@wp.kernel
|
|
@@ -2609,6 +2685,87 @@ def test_numpy_array_interface(test, device):
|
|
|
2609
2685
|
assert a1.strides == a2.strides
|
|
2610
2686
|
|
|
2611
2687
|
|
|
2688
|
+
@wp.kernel
|
|
2689
|
+
def kernel_indexing_types(
|
|
2690
|
+
arr_1d: wp.array(dtype=wp.int32, ndim=1),
|
|
2691
|
+
arr_2d: wp.array(dtype=wp.int32, ndim=2),
|
|
2692
|
+
arr_3d: wp.array(dtype=wp.int32, ndim=3),
|
|
2693
|
+
arr_4d: wp.array(dtype=wp.int32, ndim=4),
|
|
2694
|
+
):
|
|
2695
|
+
x = arr_1d[wp.uint8(0)]
|
|
2696
|
+
y = arr_1d[wp.int16(1)]
|
|
2697
|
+
z = arr_1d[wp.uint32(2)]
|
|
2698
|
+
w = arr_1d[wp.int64(3)]
|
|
2699
|
+
|
|
2700
|
+
x = arr_2d[wp.uint8(0), wp.uint8(0)]
|
|
2701
|
+
y = arr_2d[wp.int16(1), wp.int16(1)]
|
|
2702
|
+
z = arr_2d[wp.uint32(2), wp.uint32(2)]
|
|
2703
|
+
w = arr_2d[wp.int64(3), wp.int64(3)]
|
|
2704
|
+
|
|
2705
|
+
x = arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)]
|
|
2706
|
+
y = arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)]
|
|
2707
|
+
z = arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)]
|
|
2708
|
+
w = arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)]
|
|
2709
|
+
|
|
2710
|
+
x = arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)]
|
|
2711
|
+
y = arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)]
|
|
2712
|
+
z = arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)]
|
|
2713
|
+
w = arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)]
|
|
2714
|
+
|
|
2715
|
+
arr_1d[wp.uint8(0)] = 123
|
|
2716
|
+
arr_1d[wp.int16(1)] = 123
|
|
2717
|
+
arr_1d[wp.uint32(2)] = 123
|
|
2718
|
+
arr_1d[wp.int64(3)] = 123
|
|
2719
|
+
|
|
2720
|
+
arr_2d[wp.uint8(0), wp.uint8(0)] = 123
|
|
2721
|
+
arr_2d[wp.int16(1), wp.int16(1)] = 123
|
|
2722
|
+
arr_2d[wp.uint32(2), wp.uint32(2)] = 123
|
|
2723
|
+
arr_2d[wp.int64(3), wp.int64(3)] = 123
|
|
2724
|
+
|
|
2725
|
+
arr_3d[wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
|
|
2726
|
+
arr_3d[wp.int16(1), wp.int16(1), wp.int16(1)] = 123
|
|
2727
|
+
arr_3d[wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
|
|
2728
|
+
arr_3d[wp.int64(3), wp.int64(3), wp.int64(3)] = 123
|
|
2729
|
+
|
|
2730
|
+
arr_4d[wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0)] = 123
|
|
2731
|
+
arr_4d[wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1)] = 123
|
|
2732
|
+
arr_4d[wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2)] = 123
|
|
2733
|
+
arr_4d[wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3)] = 123
|
|
2734
|
+
|
|
2735
|
+
wp.atomic_add(arr_1d, wp.uint8(0), 123)
|
|
2736
|
+
wp.atomic_sub(arr_1d, wp.int16(1), 123)
|
|
2737
|
+
wp.atomic_min(arr_1d, wp.uint32(2), 123)
|
|
2738
|
+
wp.atomic_max(arr_1d, wp.int64(3), 123)
|
|
2739
|
+
|
|
2740
|
+
wp.atomic_add(arr_2d, wp.uint8(0), wp.uint8(0), 123)
|
|
2741
|
+
wp.atomic_sub(arr_2d, wp.int16(1), wp.int16(1), 123)
|
|
2742
|
+
wp.atomic_min(arr_2d, wp.uint32(2), wp.uint32(2), 123)
|
|
2743
|
+
wp.atomic_max(arr_2d, wp.int64(3), wp.int64(3), 123)
|
|
2744
|
+
|
|
2745
|
+
wp.atomic_add(arr_3d, wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
|
|
2746
|
+
wp.atomic_sub(arr_3d, wp.int16(1), wp.int16(1), wp.int16(1), 123)
|
|
2747
|
+
wp.atomic_min(arr_3d, wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
|
|
2748
|
+
wp.atomic_max(arr_3d, wp.int64(3), wp.int64(3), wp.int64(3), 123)
|
|
2749
|
+
|
|
2750
|
+
wp.atomic_add(arr_4d, wp.uint8(0), wp.uint8(0), wp.uint8(0), wp.uint8(0), 123)
|
|
2751
|
+
wp.atomic_sub(arr_4d, wp.int16(1), wp.int16(1), wp.int16(1), wp.int16(1), 123)
|
|
2752
|
+
wp.atomic_min(arr_4d, wp.uint32(2), wp.uint32(2), wp.uint32(2), wp.uint32(2), 123)
|
|
2753
|
+
wp.atomic_max(arr_4d, wp.int64(3), wp.int64(3), wp.int64(3), wp.int64(3), 123)
|
|
2754
|
+
|
|
2755
|
+
|
|
2756
|
+
def test_indexing_types(test, device):
|
|
2757
|
+
arr_1d = wp.zeros(shape=(4,), dtype=wp.int32, device=device)
|
|
2758
|
+
arr_2d = wp.zeros(shape=(4, 4), dtype=wp.int32, device=device)
|
|
2759
|
+
arr_3d = wp.zeros(shape=(4, 4, 4), dtype=wp.int32, device=device)
|
|
2760
|
+
arr_4d = wp.zeros(shape=(4, 4, 4, 4), dtype=wp.int32, device=device)
|
|
2761
|
+
wp.launch(
|
|
2762
|
+
kernel=kernel_indexing_types,
|
|
2763
|
+
dim=1,
|
|
2764
|
+
inputs=(arr_1d, arr_2d, arr_3d, arr_4d),
|
|
2765
|
+
device=device,
|
|
2766
|
+
)
|
|
2767
|
+
|
|
2768
|
+
|
|
2612
2769
|
devices = get_test_devices()
|
|
2613
2770
|
|
|
2614
2771
|
|
|
@@ -2669,12 +2826,14 @@ add_function_test(TestArray, "test_array_from_numpy", test_array_from_numpy, dev
|
|
|
2669
2826
|
add_function_test(TestArray, "test_array_aliasing_from_numpy", test_array_aliasing_from_numpy, devices=["cpu"])
|
|
2670
2827
|
add_function_test(TestArray, "test_numpy_array_interface", test_numpy_array_interface, devices=["cpu"])
|
|
2671
2828
|
|
|
2672
|
-
add_function_test(TestArray, "
|
|
2829
|
+
add_function_test(TestArray, "test_array_inplace_diff_ops", test_array_inplace_diff_ops, devices=devices)
|
|
2830
|
+
add_function_test(TestArray, "test_array_inplace_non_diff_ops", test_array_inplace_non_diff_ops, devices=devices)
|
|
2673
2831
|
add_function_test(TestArray, "test_direct_from_numpy", test_direct_from_numpy, devices=["cpu"])
|
|
2674
2832
|
add_function_test(TestArray, "test_kernel_array_from_ptr", test_kernel_array_from_ptr, devices=devices)
|
|
2675
2833
|
|
|
2676
2834
|
add_function_test(TestArray, "test_array_from_int32_domain", test_array_from_int32_domain, devices=devices)
|
|
2677
2835
|
add_function_test(TestArray, "test_array_from_int64_domain", test_array_from_int64_domain, devices=devices)
|
|
2836
|
+
add_function_test(TestArray, "test_indexing_types", test_indexing_types, devices=devices)
|
|
2678
2837
|
|
|
2679
2838
|
try:
|
|
2680
2839
|
import torch
|
|
@@ -220,12 +220,12 @@ def check_edge_closest_point_sufficient_necessary_kernel(
|
|
|
220
220
|
|
|
221
221
|
def check_edge_closest_point_random(test, device):
|
|
222
222
|
num_tests = 100000
|
|
223
|
-
np.random.
|
|
224
|
-
p1 = wp.array(
|
|
225
|
-
q1 = wp.array(
|
|
223
|
+
rng = np.random.default_rng(123)
|
|
224
|
+
p1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
|
|
225
|
+
q1 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
|
|
226
226
|
|
|
227
|
-
p2 = wp.array(
|
|
228
|
-
q2 = wp.array(
|
|
227
|
+
p2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
|
|
228
|
+
q2 = wp.array(rng.standard_normal(size=(num_tests, 3)), dtype=wp.vec3, device=device)
|
|
229
229
|
|
|
230
230
|
wp.launch(
|
|
231
231
|
kernel=check_edge_closest_point_sufficient_necessary_kernel,
|
|
@@ -235,10 +235,10 @@ def check_edge_closest_point_random(test, device):
|
|
|
235
235
|
)
|
|
236
236
|
|
|
237
237
|
# parallel edges
|
|
238
|
-
p1 =
|
|
239
|
-
q1 =
|
|
238
|
+
p1 = rng.standard_normal(size=(num_tests, 3))
|
|
239
|
+
q1 = rng.standard_normal(size=(num_tests, 3))
|
|
240
240
|
|
|
241
|
-
shifts =
|
|
241
|
+
shifts = rng.standard_normal(size=(num_tests, 3))
|
|
242
242
|
|
|
243
243
|
p2 = p1 + shifts
|
|
244
244
|
q2 = q1 + shifts
|
warp/tests/test_codegen.py
CHANGED
|
@@ -7,11 +7,27 @@
|
|
|
7
7
|
|
|
8
8
|
import sys
|
|
9
9
|
import unittest
|
|
10
|
+
from typing import Tuple
|
|
10
11
|
|
|
11
12
|
import warp as wp
|
|
12
13
|
from warp.tests.unittest_utils import *
|
|
13
14
|
|
|
14
15
|
|
|
16
|
+
@wp.kernel
|
|
17
|
+
def test_expect():
|
|
18
|
+
a = 1.0
|
|
19
|
+
a += 2.0
|
|
20
|
+
|
|
21
|
+
wp.expect_eq(123, 123)
|
|
22
|
+
wp.expect_neq(123, 234)
|
|
23
|
+
|
|
24
|
+
wp.expect_eq(wp.vec2(1.0, 2.0), wp.vec2(1.0, 2.0))
|
|
25
|
+
wp.expect_neq(wp.vec2(1.0, 2.0), wp.vec2(2.0, 3.0))
|
|
26
|
+
|
|
27
|
+
wp.expect_eq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(1.0, 2.0, 3.0, 4.0))
|
|
28
|
+
wp.expect_neq(wp.mat22(1.0, 2.0, 3.0, 4.0), wp.mat22(2.0, 3.0, 4.0, 5.0))
|
|
29
|
+
|
|
30
|
+
|
|
15
31
|
@wp.kernel
|
|
16
32
|
def test_rename():
|
|
17
33
|
a = 0
|
|
@@ -534,6 +550,103 @@ def test_error_mutating_constant_in_dynamic_loop(test, device):
|
|
|
534
550
|
)
|
|
535
551
|
assert_np_equal(output.numpy(), np.ones([num_threads, const_a + const_b + dyn_a + dyn_b + dyn_c + 1]))
|
|
536
552
|
|
|
553
|
+
@wp.kernel
|
|
554
|
+
def static_then_dynamic_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
|
|
555
|
+
tid = wp.tid()
|
|
556
|
+
mat = wp.mat33d()
|
|
557
|
+
for i in range(3):
|
|
558
|
+
for j in range(3):
|
|
559
|
+
mat[i, j] = wp.float64(0.0)
|
|
560
|
+
|
|
561
|
+
dim = 2
|
|
562
|
+
for i in range(dim + 1):
|
|
563
|
+
for j in range(dim + 1):
|
|
564
|
+
mat[i, j] = wp.float64(1.0)
|
|
565
|
+
|
|
566
|
+
mats[tid] = mat
|
|
567
|
+
|
|
568
|
+
mats = wp.empty(1, dtype=wp.mat33d, device=device)
|
|
569
|
+
wp.launch(static_then_dynamic_loop_kernel, dim=1, inputs=[mats], device=device)
|
|
570
|
+
assert_np_equal(mats.numpy(), np.ones((1, 3, 3)))
|
|
571
|
+
|
|
572
|
+
@wp.kernel
|
|
573
|
+
def dynamic_then_static_loop_kernel(mats: wp.array(dtype=wp.mat33d)):
|
|
574
|
+
tid = wp.tid()
|
|
575
|
+
mat = wp.mat33d()
|
|
576
|
+
|
|
577
|
+
dim = 2
|
|
578
|
+
for i in range(dim + 1):
|
|
579
|
+
for j in range(dim + 1):
|
|
580
|
+
mat[i, j] = wp.float64(1.0)
|
|
581
|
+
|
|
582
|
+
for i in range(3):
|
|
583
|
+
for j in range(3):
|
|
584
|
+
mat[i, j] = wp.float64(0.0)
|
|
585
|
+
|
|
586
|
+
mats[tid] = mat
|
|
587
|
+
|
|
588
|
+
mats = wp.empty(1, dtype=wp.mat33d, device=device)
|
|
589
|
+
wp.launch(dynamic_then_static_loop_kernel, dim=1, inputs=[mats], device=device)
|
|
590
|
+
assert_np_equal(mats.numpy(), np.zeros((1, 3, 3)))
|
|
591
|
+
|
|
592
|
+
|
|
593
|
+
def test_error_return_annotation_mismatch(test, device):
|
|
594
|
+
@wp.func
|
|
595
|
+
def foo_1(x: wp.int32) -> wp.int16:
|
|
596
|
+
return wp.int8(x)
|
|
597
|
+
|
|
598
|
+
def kernel_1_fn():
|
|
599
|
+
x = foo_1(123)
|
|
600
|
+
|
|
601
|
+
@wp.func
|
|
602
|
+
def foo_2(x: int) -> int:
|
|
603
|
+
return (x + x, x * x)
|
|
604
|
+
|
|
605
|
+
def kernel_2_fn():
|
|
606
|
+
x = foo_2(123)
|
|
607
|
+
|
|
608
|
+
@wp.func
|
|
609
|
+
def foo_3(x: int) -> Tuple[int, int]:
|
|
610
|
+
return (x, 1.23)
|
|
611
|
+
|
|
612
|
+
def kernel_3_fn():
|
|
613
|
+
x, y = foo_3(123)
|
|
614
|
+
|
|
615
|
+
@wp.func
|
|
616
|
+
def foo_4(x: int) -> Tuple[int, int, int]:
|
|
617
|
+
return (x + x, x * x)
|
|
618
|
+
|
|
619
|
+
def kernel_4_fn():
|
|
620
|
+
x, y, z = foo_4(123)
|
|
621
|
+
|
|
622
|
+
kernel = wp.Kernel(func=kernel_1_fn)
|
|
623
|
+
with test.assertRaisesRegex(
|
|
624
|
+
wp.codegen.WarpCodegenError,
|
|
625
|
+
r"The function `foo_1` has its return type annotated as `int16` but the code returns a value of type `int8`.",
|
|
626
|
+
):
|
|
627
|
+
wp.launch(kernel, dim=1, device=device)
|
|
628
|
+
|
|
629
|
+
kernel = wp.Kernel(func=kernel_2_fn)
|
|
630
|
+
with test.assertRaisesRegex(
|
|
631
|
+
wp.codegen.WarpCodegenError,
|
|
632
|
+
r"The function `foo_2` has its return type annotated as `int` but the code returns 2 values.",
|
|
633
|
+
):
|
|
634
|
+
wp.launch(kernel, dim=1, device=device)
|
|
635
|
+
|
|
636
|
+
kernel = wp.Kernel(func=kernel_3_fn)
|
|
637
|
+
with test.assertRaisesRegex(
|
|
638
|
+
wp.codegen.WarpCodegenError,
|
|
639
|
+
r"The function `foo_3` has its return type annotated as `Tuple\[int, int\]` but the code returns a tuple with types `\(int32, float32\)`.",
|
|
640
|
+
):
|
|
641
|
+
wp.launch(kernel, dim=1, device=device)
|
|
642
|
+
|
|
643
|
+
kernel = wp.Kernel(func=kernel_4_fn)
|
|
644
|
+
with test.assertRaisesRegex(
|
|
645
|
+
wp.codegen.WarpCodegenError,
|
|
646
|
+
r"The function `foo_4` has its return type annotated as a tuple of 3 elements but the code returns 2 values.",
|
|
647
|
+
):
|
|
648
|
+
wp.launch(kernel, dim=1, device=device)
|
|
649
|
+
|
|
537
650
|
|
|
538
651
|
@wp.kernel
|
|
539
652
|
def test_call_syntax():
|
|
@@ -583,6 +696,7 @@ class TestCodeGen(unittest.TestCase):
|
|
|
583
696
|
|
|
584
697
|
devices = get_test_devices()
|
|
585
698
|
|
|
699
|
+
add_kernel_test(TestCodeGen, name="test_expect", kernel=test_expect, dim=1, devices=devices)
|
|
586
700
|
add_kernel_test(TestCodeGen, name="test_inplace", kernel=test_inplace, dim=1, devices=devices)
|
|
587
701
|
add_kernel_test(TestCodeGen, name="test_rename", kernel=test_rename, dim=1, devices=devices)
|
|
588
702
|
add_kernel_test(TestCodeGen, name="test_constant", kernel=test_constant, inputs=[1.0], dim=1, devices=devices)
|
|
@@ -719,7 +833,12 @@ add_function_test(
|
|
|
719
833
|
name="test_error_mutating_constant_in_dynamic_loop",
|
|
720
834
|
devices=devices,
|
|
721
835
|
)
|
|
722
|
-
|
|
836
|
+
add_function_test(
|
|
837
|
+
TestCodeGen,
|
|
838
|
+
func=test_error_return_annotation_mismatch,
|
|
839
|
+
name="test_error_return_annotation_mismatch",
|
|
840
|
+
devices=devices,
|
|
841
|
+
)
|
|
723
842
|
add_kernel_test(TestCodeGen, name="test_call_syntax", kernel=test_call_syntax, dim=1, devices=devices)
|
|
724
843
|
add_kernel_test(TestCodeGen, name="test_shadow_builtin", kernel=test_shadow_builtin, dim=1, devices=devices)
|
|
725
844
|
add_kernel_test(TestCodeGen, name="test_while_condition_eval", kernel=test_while_condition_eval, dim=1, devices=devices)
|
|
@@ -1287,6 +1287,35 @@ def test_module_mark_modified(test, device):
|
|
|
1287
1287
|
# =======================================================================
|
|
1288
1288
|
|
|
1289
1289
|
|
|
1290
|
+
def test_garbage_collection(test, device):
|
|
1291
|
+
"""Test that dynamically generated kernels without user references are not retained in the module."""
|
|
1292
|
+
|
|
1293
|
+
# use a helper module with a known kernel count
|
|
1294
|
+
import warp.tests.aux_test_instancing_gc as gc_test_module
|
|
1295
|
+
|
|
1296
|
+
with wp.ScopedDevice(device):
|
|
1297
|
+
a = wp.zeros(1, dtype=int)
|
|
1298
|
+
|
|
1299
|
+
for i in range(10):
|
|
1300
|
+
# create a unique kernel on each iteration
|
|
1301
|
+
k = gc_test_module.create_kernel_closure(i)
|
|
1302
|
+
|
|
1303
|
+
# import gc
|
|
1304
|
+
# gc.collect()
|
|
1305
|
+
|
|
1306
|
+
# since we don't keep references to the previous kernels,
|
|
1307
|
+
# they should be garbage-collected and not appear in the module
|
|
1308
|
+
k.module.load(device=device)
|
|
1309
|
+
test.assertEqual(len(k.module.live_kernels), 1)
|
|
1310
|
+
|
|
1311
|
+
# test the kernel
|
|
1312
|
+
wp.launch(k, dim=1, inputs=[a])
|
|
1313
|
+
test.assertEqual(a.numpy()[0], i)
|
|
1314
|
+
|
|
1315
|
+
|
|
1316
|
+
# =======================================================================
|
|
1317
|
+
|
|
1318
|
+
|
|
1290
1319
|
class TestCodeGenInstancing(unittest.TestCase):
|
|
1291
1320
|
pass
|
|
1292
1321
|
|
|
@@ -1450,6 +1479,7 @@ add_function_test(TestCodeGenInstancing, func=test_create_kernel_loop, name="tes
|
|
|
1450
1479
|
add_function_test(
|
|
1451
1480
|
TestCodeGenInstancing, func=test_module_mark_modified, name="test_module_mark_modified", devices=devices
|
|
1452
1481
|
)
|
|
1482
|
+
add_function_test(TestCodeGenInstancing, func=test_garbage_collection, name="test_garbage_collection", devices=devices)
|
|
1453
1483
|
|
|
1454
1484
|
|
|
1455
1485
|
if __name__ == "__main__":
|