PyPI - warp-lang - Versions diffs - 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show

warp/__init__.py +10 -4
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +868 -507
warp/codegen.py +1074 -638
warp/config.py +3 -3
warp/constants.py +6 -0
warp/context.py +715 -222
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +147 -44
warp/native/builtin.h +122 -149
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +34 -43
warp/native/clang/clang.cpp +13 -8
warp/native/crt.h +2 -0
warp/native/cuda_crt.h +5 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -952
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +1 -1
warp/native/marching.cu +157 -161
warp/native/mat.h +80 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -23
warp/native/mesh.h +446 -46
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +1 -1
warp/native/reduce.cu +10 -12
warp/native/runlength_encode.cu +6 -10
warp/native/scan.cu +8 -11
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +164 -154
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +14 -30
warp/native/vec.h +107 -23
warp/native/volume.h +120 -0
warp/native/warp.cpp +560 -30
warp/native/warp.cu +431 -44
warp/native/warp.h +13 -4
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +335 -119
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +8 -0
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +158 -16
warp/sim/model.py +795 -291
warp/sim/render.py +3 -3
warp/sim/utils.py +3 -0
warp/sparse.py +640 -150
warp/stubs.py +606 -267
warp/tape.py +61 -10
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +212 -97
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +42 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +208 -130
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +178 -109
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +32 -31
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +140 -22
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +9 -6
warp/types.py +1089 -366
warp/utils.py +93 -387
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/tests/test_all.py +0 -219
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.10.1.dist-info/METADATA +0 -21
warp_lang-0.10.1.dist-info/RECORD +0 -188
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
{warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/tests/test_grad.py CHANGED Viewed

@@ -5,9 +5,13 @@
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+from typing import Any
 import numpy as np
 import warp as wp
-from warp.tests.test_base import *
+from warp.tests.unittest_utils import *
 wp.init()
@@ -63,26 +67,26 @@ def test_for_loop_grad(test, device):
 def test_for_loop_graph_grad(test, device):
+    wp.load_module(device=device)
     n = 32
     val = np.ones(n, dtype=np.float32)
     x = wp.array(val, device=device, requires_grad=True)
     sum = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
-    wp.force_load()
-    wp.capture_begin()
+    wp.capture_begin(device, force_module_load=False)
+    try:
+        tape = wp.Tape()
+        with tape:
+            wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    tape = wp.Tape()
-    with tape:
-        wp.launch(for_loop_grad, dim=1, inputs=[n, x, sum], device=device)
-    tape.backward(loss=sum)
-    graph = wp.capture_end()
+        tape.backward(loss=sum)
+    finally:
+        graph = wp.capture_end(device)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
     # ensure forward pass outputs persist
     assert_np_equal(sum.numpy(), 2.0 * np.sum(x.numpy()))
@@ -90,7 +94,7 @@ def test_for_loop_graph_grad(test, device):
     assert_np_equal(x.grad.numpy(), 2.0 * val)
     wp.capture_launch(graph)
-    wp.synchronize()
+    wp.synchronize_device(device)
 @wp.kernel
@@ -115,75 +119,20 @@ def for_loop_nested_if_grad(n: int, x: wp.array(dtype=float), s: wp.array(dtype=
 def test_for_loop_nested_if_grad(test, device):
     n = 32
     val = np.ones(n, dtype=np.float32)
+    # fmt: off
     expected_val = [
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
+        2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+        4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+        6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+        8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0,
     ]
     expected_grad = [
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        2.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        4.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        6.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
-        8.0,
+        2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0,
+        4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0,
+        6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0,
+        8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0,
     ]
+    # fmt: on
     x = wp.array(val, device=device, requires_grad=True)
     sum = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
@@ -327,8 +276,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
     numerical gradient computed using finite differences.
     """
-    module = wp.get_module(func.__module__)
-    kernel = wp.Kernel(func=func, key=func_name, module=module)
+    kernel = wp.Kernel(func=func, key=func_name)
     def f(xs):
         # call the kernel without taping for finite differences
@@ -371,7 +319,7 @@ def gradcheck(func, func_name, inputs, device, eps=1e-4, tol=1e-2):
 def test_vector_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, vec_type in [(2, wp.vec2), (3, wp.vec3), (4, wp.vec4), (4, wp.quat)]:
@@ -387,14 +335,14 @@ def test_vector_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim).astype(np.float32), dtype=vec_type, device=device)
+            x = wp.array(rng.random(size=(1, dim), dtype=np.float32), dtype=vec_type, device=device)
             gradcheck(check_length, f"check_length_{vec_type.__name__}", [x], device)
             gradcheck(check_length_sq, f"check_length_sq_{vec_type.__name__}", [x], device)
             gradcheck(check_normalize, f"check_normalize_{vec_type.__name__}", [x], device)
 def test_matrix_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test unary operations
     for dim, mat_type in [(2, wp.mat22), (3, wp.mat33), (4, wp.mat44)]:
@@ -407,13 +355,13 @@ def test_matrix_math_grad(test, device):
         # run the tests with 5 different random inputs
         for _ in range(5):
-            x = wp.array(np.random.randn(1, dim, dim).astype(np.float32), ndim=1, dtype=mat_type, device=device)
+            x = wp.array(rng.random(size=(1, dim, dim), dtype=np.float32), ndim=1, dtype=mat_type, device=device)
             gradcheck(check_determinant, f"check_length_{mat_type.__name__}", [x], device)
             gradcheck(check_trace, f"check_length_sq_{mat_type.__name__}", [x], device)
 def test_3d_math_grad(test, device):
-    np.random.seed(123)
+    rng = np.random.default_rng(123)
     # test binary operations
     def check_cross(vs: wp.array(dtype=wp.vec3), out: wp.array(dtype=float)):
@@ -463,7 +411,9 @@ def test_3d_math_grad(test, device):
     # run the tests with 5 different random inputs
     for _ in range(5):
-        x = wp.array(np.random.randn(2, 3).astype(np.float32), dtype=wp.vec3, device=device, requires_grad=True)
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
         gradcheck(check_cross, "check_cross_3d", [x], device)
         gradcheck(check_dot, "check_dot_3d", [x], device)
         gradcheck(check_mat33, "check_mat33_3d", [x], device, eps=2e-2)
@@ -473,6 +423,28 @@ def test_3d_math_grad(test, device):
         gradcheck(check_rot_quat_inv, "check_rot_quat_inv_3d", [x], device)
+def test_multi_valued_function_grad(test, device):
+    rng = np.random.default_rng(123)
+    @wp.func
+    def multi_valued(x: float, y: float, z: float):
+        return wp.sin(x), wp.cos(y) * z, wp.sqrt(z) / wp.abs(x)
+    # test multi-valued functions
+    def check_multi_valued(vs: wp.array(dtype=wp.vec3), out: wp.array(dtype=float)):
+        tid = wp.tid()
+        v = vs[tid]
+        a, b, c = multi_valued(v[0], v[1], v[2])
+        out[tid] = a + b + c
+    # run the tests with 5 different random inputs
+    for _ in range(5):
+        x = wp.array(
+            rng.standard_normal(size=(2, 3), dtype=np.float32), dtype=wp.vec3, device=device, requires_grad=True
+        )
+        gradcheck(check_multi_valued, "check_multi_valued_3d", [x], device)
 def test_mesh_grad(test, device):
     pos = wp.array(
         [
@@ -502,19 +474,17 @@ def test_mesh_grad(test, device):
         c = mesh.points[k]
         return wp.length(wp.cross(b - a, c - a)) * 0.5
+    @wp.kernel
     def compute_area(mesh_id: wp.uint64, out: wp.array(dtype=wp.float32)):
         wp.atomic_add(out, 0, compute_triangle_area(mesh_id, wp.tid()))
-    module = wp.get_module(compute_area.__module__)
-    kernel = wp.Kernel(func=compute_area, key="compute_area", module=module)
     num_tris = int(len(indices) / 3)
     # compute analytical gradient
     tape = wp.Tape()
     output = wp.zeros(1, dtype=wp.float32, device=device, requires_grad=True)
     with tape:
-        wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+        wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
     tape.backward(loss=output)
@@ -531,13 +501,13 @@ def test_mesh_grad(test, device):
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f1 = output.numpy()[0]
             pos_np[i, j] -= 2 * eps
             pos = wp.array(pos_np, dtype=wp.vec3, device=device)
             mesh = wp.Mesh(points=pos, indices=indices)
             output.zero_()
-            wp.launch(kernel, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
+            wp.launch(compute_area, dim=num_tris, inputs=[mesh.id], outputs=[output], device=device)
             f2 = output.numpy()[0]
             pos_np[i, j] += eps
             fd_grad[i, j] = (f1 - f2) / (2 * eps)
@@ -545,27 +515,126 @@ def test_mesh_grad(test, device):
     assert np.allclose(ad_grad, fd_grad, atol=1e-3)
-def register(parent):
-    devices = get_test_devices()
+@wp.func
+def name_clash(a: float, b: float) -> float:
+    return a + b
+@wp.func_grad(name_clash)
+def adj_name_clash(a: float, b: float, adj_ret: float):
+    # names `adj_a` and `adj_b` must not clash with function args of generated function
+    adj_a = 0.0
+    adj_b = 0.0
+    if a < 0.0:
+        adj_a = adj_ret
+    if b > 0.0:
+        adj_b = adj_ret
+    wp.adjoint[a] += adj_a
+    wp.adjoint[b] += adj_b
+@wp.kernel
+def name_clash_kernel(
+    input_a: wp.array(dtype=float),
+    input_b: wp.array(dtype=float),
+    output: wp.array(dtype=float),
+):
+    tid = wp.tid()
+    output[tid] = name_clash(input_a[tid], input_b[tid])
+def test_name_clash(test, device):
+    # tests that no name clashes occur when variable names such as `adj_a` are used in custom gradient code
+    with wp.ScopedDevice(device):
+        input_a = wp.array([1.0, -2.0, 3.0], dtype=wp.float32, requires_grad=True)
+        input_b = wp.array([4.0, 5.0, -6.0], dtype=wp.float32, requires_grad=True)
+        output = wp.zeros(3, dtype=wp.float32, requires_grad=True)
+        tape = wp.Tape()
+        with tape:
+            wp.launch(name_clash_kernel, dim=len(input_a), inputs=[input_a, input_b], outputs=[output])
+        tape.backward(grads={output: wp.array(np.ones(len(input_a), dtype=np.float32))})
+        assert_np_equal(input_a.grad.numpy(), np.array([0.0, 1.0, 0.0]))
+        assert_np_equal(input_b.grad.numpy(), np.array([1.0, 1.0, 0.0]))
+@wp.struct
+class NestedStruct:
+    v: wp.vec2
+@wp.struct
+class ParentStruct:
+    a: float
+    n: NestedStruct
+@wp.func
+def noop(a: Any):
+    pass
+@wp.func
+def sum2(v: wp.vec2):
+    return v[0] + v[1]
+@wp.kernel
+def test_struct_attribute_gradient_kernel(src: wp.array(dtype=float), res: wp.array(dtype=float)):
+    tid = wp.tid()
+    p = ParentStruct(src[tid], NestedStruct(wp.vec2(2.0 * src[tid])))
+    # test that we are not losing gradients when accessing attributes
+    noop(p.a)
+    noop(p.n)
+    noop(p.n.v)
+    res[tid] = p.a + sum2(p.n.v)
+def test_struct_attribute_gradient(test_case, device):
+    src = wp.array([1], dtype=float, requires_grad=True)
+    res = wp.empty_like(src)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(test_struct_attribute_gradient_kernel, dim=1, inputs=[src, res])
+    res.grad.fill_(1.0)
+    tape.backward()
+    test_case.assertEqual(src.grad.numpy()[0], 5.0)
+devices = get_test_devices()
-    class TestGrad(parent):
-        pass
+class TestGrad(unittest.TestCase):
+    pass
-    # add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
-    add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
-    add_function_test(TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=wp.get_cuda_devices())
-    add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
-    add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
-    add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
-    add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
-    return TestGrad
+# add_function_test(TestGrad, "test_while_loop_grad", test_while_loop_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_nested_for_grad", test_for_loop_nested_for_grad, devices=devices)
+add_function_test(TestGrad, "test_scalar_grad", test_scalar_grad, devices=devices)
+add_function_test(TestGrad, "test_for_loop_grad", test_for_loop_grad, devices=devices)
+add_function_test(
+    TestGrad, "test_for_loop_graph_grad", test_for_loop_graph_grad, devices=get_unique_cuda_test_devices()
+)
+add_function_test(TestGrad, "test_for_loop_nested_if_grad", test_for_loop_nested_if_grad, devices=devices)
+add_function_test(TestGrad, "test_preserve_outputs_grad", test_preserve_outputs_grad, devices=devices)
+add_function_test(TestGrad, "test_vector_math_grad", test_vector_math_grad, devices=devices)
+add_function_test(TestGrad, "test_matrix_math_grad", test_matrix_math_grad, devices=devices)
+add_function_test(TestGrad, "test_3d_math_grad", test_3d_math_grad, devices=devices)
+add_function_test(TestGrad, "test_multi_valued_function_grad", test_multi_valued_function_grad, devices=devices)
+add_function_test(TestGrad, "test_mesh_grad", test_mesh_grad, devices=devices)
+add_function_test(TestGrad, "test_name_clash", test_name_clash, devices=devices)
+add_function_test(TestGrad, "test_struct_attribute_gradient", test_struct_attribute_gradient, devices=devices)
 if __name__ == "__main__":
-    c = register(unittest.TestCase)
+    wp.build.clear_kernel_cache()
     unittest.main(verbosity=2, failfast=False)

warp/tests/test_grad_customs.py ADDED Viewed

@@ -0,0 +1,176 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
+import numpy as np
+import warp as wp
+from warp.tests.unittest_utils import *
+wp.init()
+# atomic add function that memorizes which thread incremented the counter
+# so that the correct counter value per thread can be used in the replay
+# phase of the backward pass
+@wp.func
+def reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    next_index = wp.atomic_add(counter, counter_index, value)
+    thread_values[tid] = next_index
+    return next_index
+@wp.func_replay(reversible_increment)
+def replay_reversible_increment(
+    counter: wp.array(dtype=int), counter_index: int, value: int, thread_values: wp.array(dtype=int), tid: int
+):
+    return thread_values[tid]
+def test_custom_replay_grad(test, device):
+    num_threads = 128
+    counter = wp.zeros(1, dtype=wp.int32, device=device)
+    thread_ids = wp.zeros(num_threads, dtype=wp.int32, device=device)
+    inputs = wp.array(np.arange(num_threads, dtype=np.float32), device=device, requires_grad=True)
+    outputs = wp.zeros_like(inputs)
+    @wp.kernel
+    def run_atomic_add(
+        input: wp.array(dtype=float),
+        counter: wp.array(dtype=int),
+        thread_values: wp.array(dtype=int),
+        output: wp.array(dtype=float),
+    ):
+        tid = wp.tid()
+        idx = reversible_increment(counter, 0, 1, thread_values, tid)
+        output[idx] = input[idx] ** 2.0
+    tape = wp.Tape()
+    with tape:
+        wp.launch(
+            run_atomic_add, dim=num_threads, inputs=[inputs, counter, thread_ids], outputs=[outputs], device=device
+        )
+    tape.backward(grads={outputs: wp.array(np.ones(num_threads, dtype=np.float32), device=device)})
+    assert_np_equal(inputs.grad.numpy(), 2.0 * inputs.numpy(), tol=1e-4)
+@wp.func
+def overload_fn(x: float, y: float):
+    return x * 3.0 + y / 3.0, y**2.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: float, y: float, adj_ret0: float, adj_ret1: float):
+    wp.adjoint[x] += x * adj_ret0 * 42.0 + y * adj_ret1 * 10.0
+    wp.adjoint[y] += y * adj_ret1 * 3.0
+@wp.struct
+class MyStruct:
+    scalar: float
+    vec: wp.vec3
+@wp.func
+def overload_fn(x: MyStruct):
+    return x.vec[0] * x.vec[1] * x.vec[2] * 4.0, wp.length(x.vec), x.scalar**0.5
+@wp.func_grad(overload_fn)
+def overload_fn_grad(x: MyStruct, adj_ret0: float, adj_ret1: float, adj_ret2: float):
+    wp.adjoint[x.scalar] += x.scalar * adj_ret0 * 10.0
+    wp.adjoint[x.vec][0] += adj_ret0 * x.vec[1] * x.vec[2] * 20.0
+    wp.adjoint[x.vec][1] += adj_ret1 * x.vec[0] * x.vec[2] * 30.0
+    wp.adjoint[x.vec][2] += adj_ret2 * x.vec[0] * x.vec[1] * 40.0
+@wp.kernel
+def run_overload_float_fn(
+    xs: wp.array(dtype=float), ys: wp.array(dtype=float), output0: wp.array(dtype=float), output1: wp.array(dtype=float)
+):
+    i = wp.tid()
+    out0, out1 = overload_fn(xs[i], ys[i])
+    output0[i] = out0
+    output1[i] = out1
+@wp.kernel
+def run_overload_struct_fn(xs: wp.array(dtype=MyStruct), output: wp.array(dtype=float)):
+    i = wp.tid()
+    out0, out1, out2 = overload_fn(xs[i])
+    output[i] = out0 + out1 + out2
+def test_custom_overload_grad(test, device):
+    dim = 3
+    xs_float = wp.array(np.arange(1.0, dim + 1.0), dtype=wp.float32, requires_grad=True)
+    ys_float = wp.array(np.arange(10.0, dim + 10.0), dtype=wp.float32, requires_grad=True)
+    out0_float = wp.zeros(dim)
+    out1_float = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_float_fn, dim=dim, inputs=[xs_float, ys_float], outputs=[out0_float, out1_float])
+    tape.backward(
+        grads={
+            out0_float: wp.array(np.ones(dim), dtype=wp.float32),
+            out1_float: wp.array(np.ones(dim), dtype=wp.float32),
+        }
+    )
+    assert_np_equal(xs_float.grad.numpy(), xs_float.numpy() * 42.0 + ys_float.numpy() * 10.0)
+    assert_np_equal(ys_float.grad.numpy(), ys_float.numpy() * 3.0)
+    x0 = MyStruct()
+    x0.vec = wp.vec3(1.0, 2.0, 3.0)
+    x0.scalar = 4.0
+    x1 = MyStruct()
+    x1.vec = wp.vec3(5.0, 6.0, 7.0)
+    x1.scalar = -1.0
+    x2 = MyStruct()
+    x2.vec = wp.vec3(8.0, 9.0, 10.0)
+    x2.scalar = 19.0
+    xs_struct = wp.array([x0, x1, x2], dtype=MyStruct, requires_grad=True)
+    out_struct = wp.zeros(dim)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(run_overload_struct_fn, dim=dim, inputs=[xs_struct], outputs=[out_struct])
+    tape.backward(grads={out_struct: wp.array(np.ones(dim), dtype=wp.float32)})
+    xs_struct_np = xs_struct.numpy()
+    struct_grads = xs_struct.grad.numpy()
+    # fmt: off
+    assert_np_equal(
+        np.array([g[0] for g in struct_grads]),
+        np.array([g[0] * 10.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][0] for g in struct_grads]),
+        np.array([g[1][1] * g[1][2] * 20.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][1] for g in struct_grads]),
+        np.array([g[1][0] * g[1][2] * 30.0 for g in xs_struct_np]))
+    assert_np_equal(
+        np.array([g[1][2] for g in struct_grads]),
+        np.array([g[1][0] * g[1][1] * 40.0 for g in xs_struct_np]))
+    # fmt: on
+devices = get_test_devices()
+class TestGradCustoms(unittest.TestCase):
+    pass
+add_function_test(TestGradCustoms, "test_custom_replay_grad", test_custom_replay_grad, devices=devices)
+add_function_test(TestGradCustoms, "test_custom_overload_grad", test_custom_overload_grad, devices=devices)
+if __name__ == "__main__":
+    wp.build.clear_kernel_cache()
+    unittest.main(verbosity=2, failfast=False)