PyPI - warp-lang - Versions diffs - 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show

warp/__init__.py +15 -7
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +22 -443
warp/build_dll.py +384 -0
warp/builtins.py +998 -488
warp/codegen.py +1307 -739
warp/config.py +5 -3
warp/constants.py +6 -0
warp/context.py +1291 -548
warp/dlpack.py +31 -31
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +164 -55
warp/native/builtin.h +150 -174
warp/native/bvh.cpp +75 -328
warp/native/bvh.cu +406 -23
warp/native/bvh.h +37 -45
warp/native/clang/clang.cpp +136 -24
warp/native/crt.cpp +1 -76
warp/native/crt.h +111 -104
warp/native/cuda_crt.h +1049 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -949
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/initializer_array.h +2 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +13 -16
warp/native/marching.cu +157 -161
warp/native/mat.h +119 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +108 -83
warp/native/mesh.cu +243 -6
warp/native/mesh.h +1547 -458
warp/native/nanovdb/NanoVDB.h +1 -1
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +157 -0
warp/native/reduce.cu +348 -0
warp/native/runlength_encode.cpp +62 -0
warp/native/runlength_encode.cu +46 -0
warp/native/scan.cu +11 -13
warp/native/scan.h +1 -0
warp/native/solid_angle.h +442 -0
warp/native/sort.cpp +13 -0
warp/native/sort.cu +9 -1
warp/native/sparse.cpp +338 -0
warp/native/sparse.cu +545 -0
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +30 -0
warp/native/vec.h +126 -24
warp/native/volume.h +120 -0
warp/native/warp.cpp +658 -53
warp/native/warp.cu +660 -68
warp/native/warp.h +112 -12
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +392 -152
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +21 -8
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +161 -19
warp/sim/model.py +795 -291
warp/sim/optimizer.py +2 -6
warp/sim/render.py +65 -3
warp/sim/utils.py +3 -0
warp/sparse.py +1227 -0
warp/stubs.py +665 -223
warp/tape.py +66 -15
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/torus.usda +105 -105
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +1497 -211
warp/tests/test_array_reduce.py +150 -0
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +75 -43
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +233 -128
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +136 -108
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -74
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +180 -116
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +577 -24
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +251 -15
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +508 -2778
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +325 -34
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +190 -0
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +460 -0
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +331 -85
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -1987
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +72 -30
warp/types.py +1744 -713
warp/utils.py +360 -350
warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/bin/warp-clang.exp +0 -0
warp/bin/warp-clang.lib +0 -0
warp/bin/warp.exp +0 -0
warp/bin/warp.lib +0 -0
warp/tests/test_all.py +0 -215
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.9.0.dist-info/METADATA +0 -20
warp_lang-0.9.0.dist-info/RECORD +0 -177
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/tests/test_grad.py CHANGED Viewed

@@ -5,9 +5,13 @@
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+from typing import Any
 import numpy as np
 import warp as wp
-from warp.tests.test_base import *
+from warp.tests.unittest_utils import *
 wp.init()
@@ -63,26 +67,26 @@ def test_for_loop_grad(test, device):
 def test_for_loop_graph_grad(test, device):
+    wp.load_module(device=device)
     n = 32
     val = np.ones(n, dtype=np.float32)
     x = wp.array(val, device=device, requires_grad=True)
     sum = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
-    wp.force_load()
-    wp.capture_begin()
-    tape = wp.Tape()
-    with tape:
-        wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    tape.backward(loss=sum)
+    wp.capture_begin(device, force_module_load=False)
+    try:
+        tape = wp.Tape()
+        with tape:
+            wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    graph = wp.capture_end()
+        tape.backward(loss=sum)
+    finally:
+        graph = wp.capture_end(device)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
     # ensure forward pass outputs persist
     assert_np_equal(sum.numpy(), 2.0 * np.sum(x.numpy()))
@@ -90,7 +94,7 @@ def test_for_loop_graph_grad(test, device):
     assert_np_equal(x.grad.numpy(), 2.0 * val)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
 @wp.kernel
@@ -115,75 +119,20 @@ def for_loop_nested_if_grad(n: int, x: wp.array(dtype=float), s: wp.array(dtype=
 def test_for_loop_nested_if_grad(test, device):
     n = 32
     val = np.ones(n, dtype=np.float32)
+    # fmt: off
     expected_val = [
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
+        2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+        4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+        6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+        8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0,
     ]
     expected_grad = [
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
+        2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+        4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+        6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+        8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0,
     ]
+    # fmt: on
     x = wp.array(val, device=device, requires_grad=True)
     sum = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
@@ -327,8 +276,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
     numerical gradient computed using finite differences.
     """
-    module = wp.get_module(func.__module__)
-    kernel = wp.Kernel(func=func, key=func_name, module=module)
+    kernel = wp.Kernel(func=func, key=func_name)
     def f(xs):
         # call the kernel without taping for finite differences
@@ -371,7 +319,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
 def test_vector_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, vec_type in [(2, wp.vec2), (3, wp.vec3), (4, wp.vec4), (4, wp.quat)]:
@@ -387,14 +335,14 @@ def test_vector_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim).astype(np.float32), dtype=vec_type, device=device)
+            x = wp.array(rng.random(size=(1, dim), dtype=np.float32), dtype=vec_type, device=device)
             gradcheck(check_length, f"check_length_{vec_type.__name__}", [x], device)
             gradcheck(check_length_sq, f"check_length_sq_{vec_type.__name__}", [x], device)
             gradcheck(check_normalize, f"check_normalize_{vec_type.__name__}", [x], device)
 def test_matrix_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, mat_type in [(2, wp.mat22), (3, wp.mat33), (4, wp.mat44)]:
@@ -407,13 +355,13 @@ def test_matrix_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim, dim).astype(np.float32), ndim=1, dtype=mat_type, device=device)
+            x = wp.array(rng.random(size=(1, dim, dim), dtype=np.float32), ndim=1, dtype=mat_type, device=device)
             gradcheck(check_determinant, f"check_length_{mat_type.__name__}", [x], device)
             gradcheck(check_trace, f"check_length_sq_{mat_type.__name__}", [x], device)
 def test_3d_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test binary operations
     def check_cross(vs: wp.array(dtype=wp.vec3), out: wp.array(dtype=float)):
@@ -463,7 +411,9 @@ def test_3d_math_grad(test, device):
     # run the tests with 5 different random inputs
     for _ in range(5):
-        x = wp.array(np.random.randn(2, 3).astype(np.float32), dtype=wp.vec3, device=device, requires_grad=True)
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
         gradcheck(check_cross, "check_cross_3d", [x], device)
         gradcheck(check_dot, "check_dot_3d", [x], device)
         gradcheck(check_mat33, "check_mat33_3d", [x], device, eps=2e-2)
@@ -473,6 +423,28 @@ def test_3d_math_grad(test, device):
         gradcheck(check_rot_quat_inv, "check_rot_quat_inv_3d", [x], device)
+def test_multi_valued_function_grad(test, device):
+    rng = np.random.default_rng(123)
+    @wp.func
+    def multi_valued(x: float, y: float, z: float):
+        return wp.sin(x), wp.cos(y) * z, wp.sqrt(z) / wp.abs(x)
+    # test multi-valued functions
+    def check_multi_valued(vs: wp.array(dtype=wp.vec3), out: wp.array(dtype=float)):
+        tid = wp.tid()
+        v = vs[tid]
+        a, b, c = multi_valued(v[0], v[1], v[2])
+        out[tid] = a + b + c
+    # run the tests with 5 different random inputs
+    for _ in range(5):
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
+        gradcheck(check_multi_valued, "check_multi_valued_3d", [x], device)
 def test_mesh_grad(test, device):
     pos = wp.array(
         [
@@ -486,12 +458,7 @@ def test_mesh_grad(test, device):
         requires_grad=True,
     )
     indices = wp.array(
-        [
-            0, 1, 2,
-            0, 2, 3,
-            0, 3, 1,
-            1, 3, 2
-        ],
+        [0, 1, 2, 0, 2, 3, 0, 3, 1, 1, 3, 2],
         dtype=wp.int32,
         device=device,
     )
@@ -501,25 +468,23 @@ def test_mesh_grad(test, device):
     @wp.func
     def compute_triangle_area(mesh_id: wp.uint64, tri_id: int):
         mesh = wp.mesh_get(mesh_id)
-        i, j, k = mesh.indices[tri_id*3+0], mesh.indices[tri_id*3+1], mesh.indices[tri_id*3+2]
+        i, j, k = mesh.indices[tri_id * 3 + 0], mesh.indices[tri_id * 3 + 1], mesh.indices[tri_id * 3 + 2]
         a = mesh.points[i]
         b = mesh.points[j]
         c = mesh.points[k]
         return wp.length(wp.cross(b - a, c - a)) * 0.5
+    @wp.kernel
     def compute_area(mesh_id: wp.uint64, out: wp.array(dtype=wp.float32)):
         wp.atomic_add(out, 0, compute_triangle_area(mesh_id, wp.tid()))
-    module = wp.get_module(compute_area.__module__)
-    kernel = wp.Kernel(func=compute_area, key="compute_area", module=module)
     num_tris = int(len(indices) / 3)
     # compute analytical gradient
     tape = wp.Tape()
     output = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
     with tape:
-        wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+        wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
     tape.backward(loss=output)
@@ -536,13 +501,13 @@ def test_mesh_grad(test, device):
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f1 = output.numpy()[0]
             pos_np[i, j] -= 2 * eps
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f2 = output.numpy()[0]
             pos_np[i, j] += eps
             fd_grad[i, j] = (f1 - f2) / (2 * eps)
@@ -550,27 +515,126 @@ def test_mesh_grad(test, device):
     assert np.allclose(ad_grad, fd_grad, atol=1e-3)
-def register(parent):
-    devices = get_test_devices()
+@wp.func
+def name_clash(a: float, b: float) -> float:
+    return a + b
+@wp.func_grad(name_clash)
+def adj_name_clash(a: float, b: float, adj_ret: float):
+    # names `adj_a` and `adj_b` must not clash with function args of generated function
+    adj_a = 0.0
+    adj_b = 0.0
+    if a < 0.0:
+        adj_a = adj_ret
+    if b > 0.0:
+        adj_b = adj_ret
+    wp.adjoint[a] += adj_a
+    wp.adjoint[b] += adj_b
+@wp.kernel
+def name_clash_kernel(
+    input_a: wp.array(dtype=float),
+    input_b: wp.array(dtype=float),
+    output: wp.array(dtype=float),
+):
+    tid = wp.tid()
+    output[tid] = name_clash(input_a[tid], input_b[tid])
+def test_name_clash(test, device):
+    # tests that no name clashes occur when variable names such as `adj_a` are used in custom gradient code
+    with wp.ScopedDevice(device):
+        input_a = wp.array([1.0, -2.0, 3.0], dtype=wp.float32, requires_grad=True)
+        input_b = wp.array([4.0, 5.0, -6.0], dtype=wp.float32, requires_grad=True)
+        output = wp.zeros(3, dtype=wp.float32, requires_grad=True)
+        tape = wp.Tape()
+        with tape:
+            wp.launch(name_clash_kernel, dim=len(input_a), inputs=[input_a, input_b], outputs=[output])
+        tape.backward(grads={output: wp.array(np.ones(len(input_a), dtype=np.float32))})
+        assert_np_equal(input_a.grad.numpy(), np.array([0.0, 1.0, 0.0]))
+        assert_np_equal(input_b.grad.numpy(), np.array([1.0, 1.0, 0.0]))
+@wp.struct
+class NestedStruct:
+    v: wp.vec2
+@wp.struct
+class ParentStruct:
+    a: float
+    n: NestedStruct
+@wp.func
+def noop(a: Any):
+    pass
+@wp.func
+def sum2(v: wp.vec2):
+    return v[0] + v[1]
+@wp.kernel
+def test_struct_attribute_gradient_kernel(src: wp.array(dtype=float), res: wp.array(dtype=float)):
+    tid = wp.tid()
+    p = ParentStruct(src[tid], NestedStruct(wp.vec2(2.0 * src[tid])))
+    # test that we are not losing gradients when accessing attributes
+    noop(p.a)
+    noop(p.n)
+    noop(p.n.v)
+    res[tid] = p.a + sum2(p.n.v)
+def test_struct_attribute_gradient(test_case, device):
+    src = wp.array([1], dtype=float, requires_grad=True)
+    res = wp.empty_like(src)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(test_struct_attribute_gradient_kernel, dim=1, inputs=[src, res])
+    res.grad.fill_(1.0)
+    tape.backward()
+    test_case.assertEqual(src.grad.numpy()[0], 5.0)
+devices = get_test_devices()
-    class TestGrad(parent):
-        pass
+class TestGrad(unittest.TestCase):
+    pass
-    # add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
-    add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=wp.get_cuda_devices())
-    add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
-    add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
-    add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
-    return TestGrad
+# add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
+add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
+add_function_test(
+    TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=get_unique_cuda_test_devices()
+)
+add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
+add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
+add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
+add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
+add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
+add_function_test(TestGrad, "test_multi_valued_function_grad", test_multi_valued_function_grad, devices=devices)
+add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
+add_function_test(TestGrad, "test_name_clash", test_name_clash, devices=devices)
+add_function_test(TestGrad, "test_struct_attribute_gradient", test_struct_attribute_gradient, devices=devices)
 if __name__ == "__main__":
-    c = register(unittest.TestCase)
+    wp.build.clear_kernel_cache()
     unittest.main(verbosity=2, failfast=False)

warp/tests/test_grad_customs.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+import numpy as np
+import warp as wp
+from warp.tests.unittest_utils import *
+wp.init()
+# atomic add function that memorizes which thread incremented the counter
+# so that the correct counter value per thread can be used in the replay
+# phase of the backward pass
+@wp.func
+def reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    next_index = wp.atomic_add(counter, counter_index, value)
+    thread_values[tid] = next_index
+    return next_index
+@wp.func_replay(reversible_increment)
+def replay_reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    return thread_values[tid]
+def test_custom_replay_grad(test, device):
+    num_threads = 128
+    counter = wp.zeros(1, dtype=wp.int32, device=device)
+    thread_ids = wp.zeros(num_threads, dtype=wp.int32, device=device)
+    inputs = wp.array(np.arange(num_threads, dtype=np.float32), device=device, requires_grad=True)
+    outputs = wp.zeros_like(inputs)
+    @wp.kernel
+    def run_atomic_add(
+        input: wp.array(dtype=float),
+        counter: wp.array(dtype=int),
+        thread_values: wp.array(dtype=int),
+        output: wp.array(dtype=float),
+    ):
+        tid = wp.tid()
+        idx = reversible_increment(counter, 0, 1, thread_values, tid)
+        output[idx] = input[idx] ** 2.0
+    tape = wp.Tape()
+    with tape:
+        wp.launch(
+            run_atomic_add, dim=num_threads, inputs=[inputs, counter, thread_ids], outputs=[outputs], device=device
+        )
+    tape.backward(grads={outputs: wp.array(np.ones(num_threads, dtype=np.float32), device=device)})
+    assert_np_equal(inputs.grad.numpy(), 2.0 * inputs.numpy(), tol=1e-4)
+@wp.func
+def overload_fn(x: float, y: float):
+    return x * 3.0 + y / 3.0, y**2.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: float, y: float, adj_ret0: float, adj_ret1: float):
+    wp.adjoint[x] += x * adj_ret0 * 42.0 + y * adj_ret1 * 10.0
+    wp.adjoint[y] += y * adj_ret1 * 3.0
+@wp.struct
+class MyStruct:
+    scalar: float
+    vec: wp.vec3
+@wp.func
+def overload_fn(x: MyStruct):
+    return x.vec[0] * x.vec[1] * x.vec[2] * 4.0, wp.length(x.vec), x.scalar**0.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: float):
+    wp.adjoint[x.scalar] += x.scalar * adj_ret0 * 10.0
+    wp.adjoint[x.vec][0] += adj_ret0 * x.vec[1] * x.vec[2] * 20.0
+    wp.adjoint[x.vec][1] += adj_ret1 * x.vec[0] * x.vec[2] * 30.0
+    wp.adjoint[x.vec][2] += adj_ret2 * x.vec[0] * x.vec[1] * 40.0
+@wp.kernel
+def run_overload_float_fn(
+    xs: wp.array(dtype=float), ys: wp.array(dtype=float), output0: wp.array(dtype=float), output1: wp.array(dtype=float)
+):
+    i = wp.tid()
+    out0, out1 = overload_fn(xs[i], ys[i])
+    output0[i] = out0
+    output1[i] = out1
+@wp.kernel
+def run_overload_struct_fn(xs: wp.array(dtype=MyStruct), output: wp.array(dtype=float)):
+    i = wp.tid()
+    out0, out1, out2 = overload_fn(xs[i])
+    output[i] = out0 + out1 + out2
+def test_custom_overload_grad(test, device):
+    dim = 3
+    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True)
+    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True)
+    out0_float = wp.zeros(dim)
+    out1_float = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float])
+    tape.backward(
+        grads={
+            out0_float: wp.array(np.ones(dim), dtype=wp.float32),
+            out1_float: wp.array(np.ones(dim), dtype=wp.float32),
+        }
+    )
+    assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
+    assert_np_equal(ys_float.grad.numpy(), ys_float.numpy() * 3.0)
+    x0 = MyStruct()
+    x0.vec = wp.vec3(1.0, 2.0, 3.0)
+    x0.scalar = 4.0
+    x1 = MyStruct()
+    x1.vec = wp.vec3(5.0, 6.0, 7.0)
+    x1.scalar = -1.0
+    x2 = MyStruct()
+    x2.vec = wp.vec3(8.0, 9.0, 10.0)
+    x2.scalar = 19.0
+    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True)
+    out_struct = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_struct_fn, dim=dim, inputs=[xs_struct], outputs=[out_struct])
+    tape.backward(grads={out_struct: wp.array(np.ones(dim), dtype=wp.float32)})
+    xs_struct_np = xs_struct.numpy()
+    struct_grads = xs_struct.grad.numpy()
+    # fmt: off
+    assert_np_equal(
+        np.array([g[0] for g in struct_grads]),
+        np.array([g[0] * 10.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][0] for g in struct_grads]),
+        np.array([g[1][1] * g[1][2] * 20.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][1] for g in struct_grads]),
+        np.array([g[1][0] * g[1][2] * 30.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][2] for g in struct_grads]),
+        np.array([g[1][0] * g[1][1] * 40.0 for g in xs_struct_np]))
+    # fmt: on
+devices = get_test_devices()
+class TestGradCustoms(unittest.TestCase):
+    pass
+add_function_test(TestGradCustoms, "test_custom_replay_grad", test_custom_replay_grad, devices=devices)
+add_function_test(TestGradCustoms, "test_custom_overload_grad", test_custom_overload_grad, devices=devices)
+if __name__ == "__main__":
+    wp.build.clear_kernel_cache()
+    unittest.main(verbosity=2, failfast=False)