PyPI - warp-lang - Versions diffs - 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl - Mend

warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show

docs/conf.py +17 -5
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/env/env_usd.py +4 -1
examples/env/environment.py +8 -9
examples/example_dem.py +34 -33
examples/example_diffray.py +364 -337
examples/example_fluid.py +32 -23
examples/example_jacobian_ik.py +97 -93
examples/example_marching_cubes.py +6 -16
examples/example_mesh.py +6 -16
examples/example_mesh_intersect.py +16 -14
examples/example_nvdb.py +14 -16
examples/example_raycast.py +14 -13
examples/example_raymarch.py +16 -23
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +82 -78
examples/example_sim_cloth.py +45 -48
examples/example_sim_fk_grad.py +51 -44
examples/example_sim_fk_grad_torch.py +47 -40
examples/example_sim_grad_bounce.py +108 -133
examples/example_sim_grad_cloth.py +99 -113
examples/example_sim_granular.py +5 -6
examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
examples/example_sim_neo_hookean.py +51 -55
examples/example_sim_particle_chain.py +4 -4
examples/example_sim_quadruped.py +126 -81
examples/example_sim_rigid_chain.py +54 -61
examples/example_sim_rigid_contact.py +66 -70
examples/example_sim_rigid_fem.py +3 -3
examples/example_sim_rigid_force.py +1 -1
examples/example_sim_rigid_gyroscopic.py +3 -4
examples/example_sim_rigid_kinematics.py +28 -39
examples/example_sim_trajopt.py +112 -110
examples/example_sph.py +9 -8
examples/example_wave.py +7 -7
examples/fem/bsr_utils.py +30 -17
examples/fem/example_apic_fluid.py +85 -69
examples/fem/example_convection_diffusion.py +97 -93
examples/fem/example_convection_diffusion_dg.py +142 -149
examples/fem/example_convection_diffusion_dg0.py +141 -136
examples/fem/example_deformed_geometry.py +146 -0
examples/fem/example_diffusion.py +115 -84
examples/fem/example_diffusion_3d.py +116 -86
examples/fem/example_diffusion_mgpu.py +102 -79
examples/fem/example_mixed_elasticity.py +139 -100
examples/fem/example_navier_stokes.py +175 -162
examples/fem/example_stokes.py +143 -111
examples/fem/example_stokes_transfer.py +186 -157
examples/fem/mesh_utils.py +59 -97
examples/fem/plot_utils.py +138 -17
tools/ci/publishing/build_nodes_info.py +54 -0
warp/__init__.py +4 -3
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +836 -492
warp/codegen.py +864 -553
warp/config.py +3 -1
warp/context.py +389 -172
warp/fem/__init__.py +24 -6
warp/fem/cache.py +318 -25
warp/fem/dirichlet.py +7 -3
warp/fem/domain.py +14 -0
warp/fem/field/__init__.py +30 -38
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +244 -138
warp/fem/field/restriction.py +8 -6
warp/fem/field/test.py +127 -59
warp/fem/field/trial.py +117 -60
warp/fem/geometry/__init__.py +5 -1
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +24 -1
warp/fem/geometry/geometry.py +86 -14
warp/fem/geometry/grid_2d.py +112 -54
warp/fem/geometry/grid_3d.py +134 -65
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +85 -33
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +451 -115
warp/fem/geometry/trimesh_2d.py +197 -92
warp/fem/integrate.py +534 -268
warp/fem/operator.py +58 -31
warp/fem/polynomial.py +11 -0
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +150 -58
warp/fem/quadrature/quadrature.py +209 -57
warp/fem/space/__init__.py +230 -53
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +49 -2
warp/fem/space/function_space.py +90 -39
warp/fem/space/grid_2d_function_space.py +149 -496
warp/fem/space/grid_3d_function_space.py +173 -538
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +129 -76
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +46 -34
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +132 -1039
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +104 -742
warp/fem/types.py +13 -11
warp/fem/utils.py +335 -60
warp/native/array.h +120 -34
warp/native/builtin.h +101 -72
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +22 -40
warp/native/clang/clang.cpp +1 -0
warp/native/crt.h +2 -0
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1522 -1243
warp/native/intersect.h +19 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +76 -17
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -18
warp/native/mesh.h +395 -40
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +44 -34
warp/native/reduce.cpp +1 -1
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +163 -155
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +18 -14
warp/native/vec.h +103 -21
warp/native/warp.cpp +2 -1
warp/native/warp.cu +28 -3
warp/native/warp.h +4 -3
warp/render/render_opengl.py +261 -109
warp/sim/__init__.py +1 -2
warp/sim/articulation.py +385 -185
warp/sim/import_mjcf.py +59 -48
warp/sim/import_urdf.py +15 -15
warp/sim/import_usd.py +174 -102
warp/sim/inertia.py +17 -18
warp/sim/integrator_xpbd.py +4 -3
warp/sim/model.py +330 -250
warp/sim/render.py +1 -1
warp/sparse.py +625 -152
warp/stubs.py +341 -309
warp/tape.py +9 -6
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +94 -74
warp/tests/test_array.py +82 -101
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +22 -12
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +18 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +165 -134
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +237 -0
warp/tests/test_fabricarray.py +22 -24
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1034 -124
warp/tests/test_fp16.py +23 -16
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +123 -181
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +35 -34
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +24 -25
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +304 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +60 -22
warp/tests/test_mesh_query_aabb.py +21 -25
warp/tests/test_mesh_query_point.py +111 -22
warp/tests/test_mesh_query_ray.py +12 -24
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +90 -86
warp/tests/test_transient_module.py +10 -12
warp/tests/test_types.py +363 -0
warp/tests/test_utils.py +451 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +418 -376
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/unittest_utils.py +342 -0
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +589 -0
warp/types.py +622 -211
warp/utils.py +54 -393
warp_lang-1.0.0b6.dist-info/METADATA +238 -0
warp_lang-1.0.0b6.dist-info/RECORD +409 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
examples/example_cache_management.py +0 -40
examples/example_multigpu.py +0 -54
examples/example_struct.py +0 -65
examples/fem/example_stokes_transfer_3d.py +0 -210
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/fem/field/discrete_field.py +0 -80
warp/fem/space/nodal_function_space.py +0 -233
warp/tests/test_all.py +0 -223
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-1.0.0b2.dist-info/METADATA +0 -26
warp_lang-1.0.0b2.dist-info/RECORD +0 -380
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/tests/test_grad.py CHANGED Viewed

@@ -5,9 +5,13 @@
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+from typing import Any
 import numpy as np
 import warp as wp
-from warp.tests.test_base import *
+from warp.tests.unittest_utils import *
 wp.init()
@@ -63,26 +67,26 @@ def test_for_loop_grad(test, device):
 def test_for_loop_graph_grad(test, device):
+    wp.load_module(device=device)
     n = 32
     val = np.ones(n, dtype=np.float32)
     x = wp.array(val, device=device, requires_grad=True)
     sum = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
-    wp.force_load()
+    wp.capture_begin(device, force_module_load=False)
+    try:
+        tape = wp.Tape()
+        with tape:
+            wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    wp.capture_begin()
-    tape = wp.Tape()
-    with tape:
-        wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    tape.backward(loss=sum)
-    graph = wp.capture_end()
+        tape.backward(loss=sum)
+    finally:
+        graph = wp.capture_end(device)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
     # ensure forward pass outputs persist
     assert_np_equal(sum.numpy(), 2.0 * np.sum(x.numpy()))
@@ -90,7 +94,7 @@ def test_for_loop_graph_grad(test, device):
     assert_np_equal(x.grad.numpy(), 2.0 * val)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
 @wp.kernel
@@ -272,8 +276,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
     numerical gradient computed using finite differences.
     """
-    module = wp.get_module(func.__module__)
-    kernel = wp.Kernel(func=func, key=func_name, module=module)
+    kernel = wp.Kernel(func=func, key=func_name)
     def f(xs):
         # call the kernel without taping for finite differences
@@ -316,7 +319,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
 def test_vector_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, vec_type in [(2, wp.vec2), (3, wp.vec3), (4, wp.vec4), (4, wp.quat)]:
@@ -332,14 +335,14 @@ def test_vector_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim).astype(np.float32), dtype=vec_type, device=device)
+            x = wp.array(rng.random(size=(1, dim), dtype=np.float32), dtype=vec_type, device=device)
             gradcheck(check_length, f"check_length_{vec_type.__name__}", [x], device)
             gradcheck(check_length_sq, f"check_length_sq_{vec_type.__name__}", [x], device)
             gradcheck(check_normalize, f"check_normalize_{vec_type.__name__}", [x], device)
 def test_matrix_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, mat_type in [(2, wp.mat22), (3, wp.mat33), (4, wp.mat44)]:
@@ -352,13 +355,13 @@ def test_matrix_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim, dim).astype(np.float32), ndim=1, dtype=mat_type, device=device)
+            x = wp.array(rng.random(size=(1, dim, dim), dtype=np.float32), ndim=1, dtype=mat_type, device=device)
             gradcheck(check_determinant, f"check_length_{mat_type.__name__}", [x], device)
             gradcheck(check_trace, f"check_length_sq_{mat_type.__name__}", [x], device)
 def test_3d_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test binary operations
     def check_cross(vs: wp.array(dtype=wp.vec3), out: wp.array(dtype=float)):
@@ -408,7 +411,9 @@ def test_3d_math_grad(test, device):
     # run the tests with 5 different random inputs
     for _ in range(5):
-        x = wp.array(np.random.randn(2, 3).astype(np.float32), dtype=wp.vec3, device=device, requires_grad=True)
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
         gradcheck(check_cross, "check_cross_3d", [x], device)
         gradcheck(check_dot, "check_dot_3d", [x], device)
         gradcheck(check_mat33, "check_mat33_3d", [x], device, eps=2e-2)
@@ -419,7 +424,7 @@ def test_3d_math_grad(test, device):
 def test_multi_valued_function_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     @wp.func
     def multi_valued(x: float, y: float, z: float):
@@ -434,7 +439,9 @@ def test_multi_valued_function_grad(test, device):
     # run the tests with 5 different random inputs
     for _ in range(5):
-        x = wp.array(np.random.randn(2, 3).astype(np.float32), dtype=wp.vec3, device=device, requires_grad=True)
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
         gradcheck(check_multi_valued, "check_multi_valued_3d", [x], device)
@@ -467,19 +474,17 @@ def test_mesh_grad(test, device):
         c = mesh.points[k]
         return wp.length(wp.cross(b - a, c - a)) * 0.5
+    @wp.kernel
     def compute_area(mesh_id: wp.uint64, out: wp.array(dtype=wp.float32)):
         wp.atomic_add(out, 0, compute_triangle_area(mesh_id, wp.tid()))
-    module = wp.get_module(compute_area.__module__)
-    kernel = wp.Kernel(func=compute_area, key="compute_area", module=module)
     num_tris = int(len(indices) / 3)
     # compute analytical gradient
     tape = wp.Tape()
     output = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
     with tape:
-        wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+        wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
     tape.backward(loss=output)
@@ -496,13 +501,13 @@ def test_mesh_grad(test, device):
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f1 = output.numpy()[0]
             pos_np[i, j] -= 2 * eps
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f2 = output.numpy()[0]
             pos_np[i, j] += eps
             fd_grad[i, j] = (f1 - f2) / (2 * eps)
@@ -510,189 +515,126 @@ def test_mesh_grad(test, device):
     assert np.allclose(ad_grad, fd_grad, atol=1e-3)
-# atomic add function that memorizes which thread incremented the counter
-# so that the correct counter value per thread can be used in the replay
-# phase of the backward pass
 @wp.func
-def reversible_increment(
-    counter: wp.array(dtype=int),
-    counter_index: int,
-    value: int,
-    thread_values: wp.array(dtype=int),
-    tid: int
-):
-    next_index = wp.atomic_add(counter, counter_index, value)
-    thread_values[tid] = next_index
-    return next_index
-@wp.func_replay(reversible_increment)
-def replay_reversible_increment(
-    counter: wp.array(dtype=int),
-    counter_index: int,
-    value: int,
-    thread_values: wp.array(dtype=int),
-    tid: int
-):
-    return thread_values[tid]
+def name_clash(a: float, b: float) -> float:
+    return a + b
-def test_custom_replay_grad(test, device):
-    num_threads = 128
-    counter = wp.zeros(1, dtype=wp.int32, device=device)
-    thread_ids = wp.zeros(num_threads, dtype=wp.int32, device=device)
-    inputs = wp.array(np.arange(num_threads, dtype=np.float32), device=device, requires_grad=True)
-    outputs = wp.zeros_like(inputs)
+@wp.func_grad(name_clash)
+def adj_name_clash(a: float, b: float, adj_ret: float):
+    # names `adj_a` and `adj_b` must not clash with function args of generated function
+    adj_a = 0.0
+    adj_b = 0.0
+    if a < 0.0:
+        adj_a = adj_ret
+    if b > 0.0:
+        adj_b = adj_ret
-    @wp.kernel
-    def run_atomic_add(
-        input: wp.array(dtype=float),
-        counter: wp.array(dtype=int),
-        thread_values: wp.array(dtype=int),
-        output: wp.array(dtype=float)
-    ):
-        tid = wp.tid()
-        idx = reversible_increment(counter, 0, 1, thread_values, tid)
-        output[idx] = input[idx] ** 2.0
+    wp.adjoint[a] += adj_a
+    wp.adjoint[b] += adj_b
-    tape = wp.Tape()
-    with tape:
-        wp.launch(run_atomic_add, dim=num_threads, inputs=[inputs, counter, thread_ids], outputs=[outputs], device=device)
-    tape.backward(grads={outputs: wp.array(np.ones(num_threads, dtype=np.float32), device=device)})
-    assert_np_equal(inputs.grad.numpy(), 2.0 * inputs.numpy(), tol=1e-4)
+@wp.kernel
+def name_clash_kernel(
+    input_a: wp.array(dtype=float),
+    input_b: wp.array(dtype=float),
+    output: wp.array(dtype=float),
+):
+    tid = wp.tid()
+    output[tid] = name_clash(input_a[tid], input_b[tid])
-@wp.func
-def overload_fn(x: float, y: float):
-    return x * 3.0 + y / 3.0, y ** 2.5
+def test_name_clash(test, device):
+    # tests that no name clashes occur when variable names such as `adj_a` are used in custom gradient code
+    with wp.ScopedDevice(device):
+        input_a = wp.array([1.0, -2.0, 3.0], dtype=wp.float32, requires_grad=True)
+        input_b = wp.array([4.0, 5.0, -6.0], dtype=wp.float32, requires_grad=True)
+        output = wp.zeros(3, dtype=wp.float32, requires_grad=True)
+        tape = wp.Tape()
+        with tape:
+            wp.launch(name_clash_kernel, dim=len(input_a), inputs=[input_a, input_b], outputs=[output])
+        tape.backward(grads={output: wp.array(np.ones(len(input_a), dtype=np.float32))})
-@wp.func_grad(overload_fn)
-def overload_fn_grad(x: float, y: float, adj_ret0: float, adj_ret1: float):
-    wp.adjoint[x] += x * adj_ret0 * 42.0 + y * adj_ret1 * 10.0
-    wp.adjoint[y] += y * adj_ret1 * 3.0
+        assert_np_equal(input_a.grad.numpy(), np.array([0.0, 1.0, 0.0]))
+        assert_np_equal(input_b.grad.numpy(), np.array([1.0, 1.0, 0.0]))
+@wp.struct
+class NestedStruct:
+    v: wp.vec2
 @wp.struct
-class MyStruct:
-    scalar: float
-    vec: wp.vec3
+class ParentStruct:
+    a: float
+    n: NestedStruct
 @wp.func
-def overload_fn(x: MyStruct):
-    return x.vec[0] * x.vec[1] * x.vec[2] * 4.0, wp.length(x.vec), x.scalar ** 0.5
+def noop(a: Any):
+    pass
-@wp.func_grad(overload_fn)
-def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: float):
-    wp.adjoint[x.scalar] += x.scalar * adj_ret0 * 10.0
-    wp.adjoint[x.vec][0] += adj_ret0 * x.vec[1] * x.vec[2] * 20.0
-    wp.adjoint[x.vec][1] += adj_ret1 * x.vec[0] * x.vec[2] * 30.0
-    wp.adjoint[x.vec][2] += adj_ret2 * x.vec[0] * x.vec[1] * 40.0
+@wp.func
+def sum2(v: wp.vec2):
+    return v[0] + v[1]
 @wp.kernel
-def run_overload_float_fn(
-    xs: wp.array(dtype=float),
-    ys: wp.array(dtype=float),
-    output0: wp.array(dtype=float),
-    output1: wp.array(dtype=float)
-):
-    i = wp.tid()
-    out0, out1 = overload_fn(xs[i], ys[i])
-    output0[i] = out0
-    output1[i] = out1
+def test_struct_attribute_gradient_kernel(src: wp.array(dtype=float), res: wp.array(dtype=float)):
+    tid = wp.tid()
+    p = ParentStruct(src[tid], NestedStruct(wp.vec2(2.0 * src[tid])))
+    # test that we are not losing gradients when accessing attributes
+    noop(p.a)
+    noop(p.n)
+    noop(p.n.v)
+    res[tid] = p.a + sum2(p.n.v)
+def test_struct_attribute_gradient(test_case, device):
+    src = wp.array([1], dtype=float, requires_grad=True)
+    res = wp.empty_like(src)
-@wp.kernel
-def run_overload_struct_fn(xs: wp.array(dtype=MyStruct), output: wp.array(dtype=float)):
-    i = wp.tid()
-    out0, out1, out2 = overload_fn(xs[i])
-    output[i] = out0 + out1 + out2
-def test_custom_overload_grad(test, device):
-    dim = 3
-    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True)
-    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True)
-    out0_float = wp.zeros(dim)
-    out1_float = wp.zeros(dim)
-    tape = wp.Tape()
-    with tape:
-        wp.launch(
-            run_overload_float_fn,
-            dim=dim,
-            inputs=[xs_float, ys_float],
-            outputs=[out0_float, out1_float])
-    tape.backward(grads={
-        out0_float: wp.array(np.ones(dim), dtype=wp.float32),
-        out1_float: wp.array(np.ones(dim), dtype=wp.float32)})
-    assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
-    assert_np_equal(ys_float.grad.numpy(), ys_float.numpy() * 3.0)
-    x0 = MyStruct()
-    x0.vec = wp.vec3(1., 2., 3.)
-    x0.scalar = 4.
-    x1 = MyStruct()
-    x1.vec = wp.vec3(5., 6., 7.)
-    x1.scalar = -1.0
-    x2 = MyStruct()
-    x2.vec = wp.vec3(8., 9., 10.)
-    x2.scalar = 19.0
-    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True)
-    out_struct = wp.zeros(dim)
     tape = wp.Tape()
     with tape:
-        wp.launch(
-            run_overload_struct_fn,
-            dim=dim,
-            inputs=[xs_struct],
-            outputs=[out_struct])
-    tape.backward(grads={out_struct: wp.array(np.ones(dim), dtype=wp.float32)})
-    xs_struct_np = xs_struct.numpy()
-    struct_grads = xs_struct.grad.numpy()
-    # fmt: off
-    assert_np_equal(
-        np.array([g[0] for g in struct_grads]),
-        np.array([g[0] * 10.0 for g in xs_struct_np]))
-    assert_np_equal(
-        np.array([g[1][0] for g in struct_grads]),
-        np.array([g[1][1] * g[1][2] * 20.0 for g in xs_struct_np]))
-    assert_np_equal(
-        np.array([g[1][1] for g in struct_grads]),
-        np.array([g[1][0] * g[1][2] * 30.0 for g in xs_struct_np]))
-    assert_np_equal(
-        np.array([g[1][2] for g in struct_grads]),
-        np.array([g[1][0] * g[1][1] * 40.0 for g in xs_struct_np]))
-    # fmt: on
+        wp.launch(test_struct_attribute_gradient_kernel, dim=1, inputs=[src, res])
+    res.grad.fill_(1.0)
+    tape.backward()
+    test_case.assertEqual(src.grad.numpy()[0], 5.0)
+devices = get_test_devices()
-def register(parent):
-    devices = get_test_devices()
-    class TestGrad(parent):
-        pass
+class TestGrad(unittest.TestCase):
+    pass
-    # add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
-    add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=wp.get_cuda_devices())
-    add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
-    add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
-    add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_multi_valued_function_grad", test_multi_valued_function_grad, devices=devices)
-    add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
-    add_function_test(TestGrad, "test_custom_replay_grad", test_custom_replay_grad, devices=devices)
-    add_function_test(TestGrad, "test_custom_overload_grad", test_custom_overload_grad, devices=devices)
-    return TestGrad
+# add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
+add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
+add_function_test(
+    TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=get_unique_cuda_test_devices()
+)
+add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
+add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
+add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
+add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
+add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
+add_function_test(TestGrad, "test_multi_valued_function_grad", test_multi_valued_function_grad, devices=devices)
+add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
+add_function_test(TestGrad, "test_name_clash", test_name_clash, devices=devices)
+add_function_test(TestGrad, "test_struct_attribute_gradient", test_struct_attribute_gradient, devices=devices)
 if __name__ == "__main__":
-    c = register(unittest.TestCase)
+    wp.build.clear_kernel_cache()
     unittest.main(verbosity=2, failfast=False)

warp/tests/test_grad_customs.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+import numpy as np
+import warp as wp
+from warp.tests.unittest_utils import *
+wp.init()
+# atomic add function that memorizes which thread incremented the counter
+# so that the correct counter value per thread can be used in the replay
+# phase of the backward pass
+@wp.func
+def reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    next_index = wp.atomic_add(counter, counter_index, value)
+    thread_values[tid] = next_index
+    return next_index
+@wp.func_replay(reversible_increment)
+def replay_reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    return thread_values[tid]
+def test_custom_replay_grad(test, device):
+    num_threads = 128
+    counter = wp.zeros(1, dtype=wp.int32, device=device)
+    thread_ids = wp.zeros(num_threads, dtype=wp.int32, device=device)
+    inputs = wp.array(np.arange(num_threads, dtype=np.float32), device=device, requires_grad=True)
+    outputs = wp.zeros_like(inputs)
+    @wp.kernel
+    def run_atomic_add(
+        input: wp.array(dtype=float),
+        counter: wp.array(dtype=int),
+        thread_values: wp.array(dtype=int),
+        output: wp.array(dtype=float),
+    ):
+        tid = wp.tid()
+        idx = reversible_increment(counter, 0, 1, thread_values, tid)
+        output[idx] = input[idx] ** 2.0
+    tape = wp.Tape()
+    with tape:
+        wp.launch(
+            run_atomic_add, dim=num_threads, inputs=[inputs, counter, thread_ids], outputs=[outputs], device=device
+        )
+    tape.backward(grads={outputs: wp.array(np.ones(num_threads, dtype=np.float32), device=device)})
+    assert_np_equal(inputs.grad.numpy(), 2.0 * inputs.numpy(), tol=1e-4)
+@wp.func
+def overload_fn(x: float, y: float):
+    return x * 3.0 + y / 3.0, y**2.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: float, y: float, adj_ret0: float, adj_ret1: float):
+    wp.adjoint[x] += x * adj_ret0 * 42.0 + y * adj_ret1 * 10.0
+    wp.adjoint[y] += y * adj_ret1 * 3.0
+@wp.struct
+class MyStruct:
+    scalar: float
+    vec: wp.vec3
+@wp.func
+def overload_fn(x: MyStruct):
+    return x.vec[0] * x.vec[1] * x.vec[2] * 4.0, wp.length(x.vec), x.scalar**0.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: float):
+    wp.adjoint[x.scalar] += x.scalar * adj_ret0 * 10.0
+    wp.adjoint[x.vec][0] += adj_ret0 * x.vec[1] * x.vec[2] * 20.0
+    wp.adjoint[x.vec][1] += adj_ret1 * x.vec[0] * x.vec[2] * 30.0
+    wp.adjoint[x.vec][2] += adj_ret2 * x.vec[0] * x.vec[1] * 40.0
+@wp.kernel
+def run_overload_float_fn(
+    xs: wp.array(dtype=float), ys: wp.array(dtype=float), output0: wp.array(dtype=float), output1: wp.array(dtype=float)
+):
+    i = wp.tid()
+    out0, out1 = overload_fn(xs[i], ys[i])
+    output0[i] = out0
+    output1[i] = out1
+@wp.kernel
+def run_overload_struct_fn(xs: wp.array(dtype=MyStruct), output: wp.array(dtype=float)):
+    i = wp.tid()
+    out0, out1, out2 = overload_fn(xs[i])
+    output[i] = out0 + out1 + out2
+def test_custom_overload_grad(test, device):
+    dim = 3
+    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True)
+    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True)
+    out0_float = wp.zeros(dim)
+    out1_float = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float])
+    tape.backward(
+        grads={
+            out0_float: wp.array(np.ones(dim), dtype=wp.float32),
+            out1_float: wp.array(np.ones(dim), dtype=wp.float32),
+        }
+    )
+    assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
+    assert_np_equal(ys_float.grad.numpy(), ys_float.numpy() * 3.0)
+    x0 = MyStruct()
+    x0.vec = wp.vec3(1.0, 2.0, 3.0)
+    x0.scalar = 4.0
+    x1 = MyStruct()
+    x1.vec = wp.vec3(5.0, 6.0, 7.0)
+    x1.scalar = -1.0
+    x2 = MyStruct()
+    x2.vec = wp.vec3(8.0, 9.0, 10.0)
+    x2.scalar = 19.0
+    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True)
+    out_struct = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_struct_fn, dim=dim, inputs=[xs_struct], outputs=[out_struct])
+    tape.backward(grads={out_struct: wp.array(np.ones(dim), dtype=wp.float32)})
+    xs_struct_np = xs_struct.numpy()
+    struct_grads = xs_struct.grad.numpy()
+    # fmt: off
+    assert_np_equal(
+        np.array([g[0] for g in struct_grads]),
+        np.array([g[0] * 10.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][0] for g in struct_grads]),
+        np.array([g[1][1] * g[1][2] * 20.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][1] for g in struct_grads]),
+        np.array([g[1][0] * g[1][2] * 30.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][2] for g in struct_grads]),
+        np.array([g[1][0] * g[1][1] * 40.0 for g in xs_struct_np]))
+    # fmt: on
+devices = get_test_devices()
+class TestGradCustoms(unittest.TestCase):
+    pass
+add_function_test(TestGradCustoms, "test_custom_replay_grad", test_custom_replay_grad, devices=devices)
+add_function_test(TestGradCustoms, "test_custom_overload_grad", test_custom_overload_grad, devices=devices)
+if __name__ == "__main__":
+    wp.build.clear_kernel_cache()
+    unittest.main(verbosity=2, failfast=False)