PyPI - warp-lang - Versions diffs - 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show

warp/__init__.py +15 -7
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +22 -443
warp/build_dll.py +384 -0
warp/builtins.py +998 -488
warp/codegen.py +1307 -739
warp/config.py +5 -3
warp/constants.py +6 -0
warp/context.py +1291 -548
warp/dlpack.py +31 -31
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +164 -55
warp/native/builtin.h +150 -174
warp/native/bvh.cpp +75 -328
warp/native/bvh.cu +406 -23
warp/native/bvh.h +37 -45
warp/native/clang/clang.cpp +136 -24
warp/native/crt.cpp +1 -76
warp/native/crt.h +111 -104
warp/native/cuda_crt.h +1049 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -949
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/initializer_array.h +2 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +13 -16
warp/native/marching.cu +157 -161
warp/native/mat.h +119 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +108 -83
warp/native/mesh.cu +243 -6
warp/native/mesh.h +1547 -458
warp/native/nanovdb/NanoVDB.h +1 -1
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +157 -0
warp/native/reduce.cu +348 -0
warp/native/runlength_encode.cpp +62 -0
warp/native/runlength_encode.cu +46 -0
warp/native/scan.cu +11 -13
warp/native/scan.h +1 -0
warp/native/solid_angle.h +442 -0
warp/native/sort.cpp +13 -0
warp/native/sort.cu +9 -1
warp/native/sparse.cpp +338 -0
warp/native/sparse.cu +545 -0
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +30 -0
warp/native/vec.h +126 -24
warp/native/volume.h +120 -0
warp/native/warp.cpp +658 -53
warp/native/warp.cu +660 -68
warp/native/warp.h +112 -12
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +392 -152
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +21 -8
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +161 -19
warp/sim/model.py +795 -291
warp/sim/optimizer.py +2 -6
warp/sim/render.py +65 -3
warp/sim/utils.py +3 -0
warp/sparse.py +1227 -0
warp/stubs.py +665 -223
warp/tape.py +66 -15
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/torus.usda +105 -105
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +1497 -211
warp/tests/test_array_reduce.py +150 -0
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +75 -43
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +233 -128
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +136 -108
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -74
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +180 -116
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +577 -24
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +251 -15
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +508 -2778
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +325 -34
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +190 -0
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +460 -0
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +331 -85
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -1987
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +72 -30
warp/types.py +1744 -713
warp/utils.py +360 -350
warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/bin/warp-clang.exp +0 -0
warp/bin/warp-clang.lib +0 -0
warp/bin/warp.exp +0 -0
warp/bin/warp.lib +0 -0
warp/tests/test_all.py +0 -215
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.9.0.dist-info/METADATA +0 -20
warp_lang-0.9.0.dist-info/RECORD +0 -177
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/tests/test_large.py ADDED Viewed

@@ -0,0 +1,141 @@
+# Copyright (c) 2023 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import math
+import unittest
+import warp as wp
+from warp.tests.unittest_utils import *
+wp.init()
+@wp.kernel
+def conditional_sum(result: wp.array(dtype=wp.uint64)):
+    i, j, k = wp.tid()
+    if i == 0:
+        wp.atomic_add(result, 0, wp.uint64(1))
+def test_large_launch_large_kernel(test, device):
+    """Test tid() on kernel launch of 2**33 threads.
+    The function conditional sum will add 1 to result for every thread that has an i index of 0.
+    Due to the size of the grid, this test is not run on CPUs
+    """
+    test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
+    large_dim_length = 2**16
+    half_result = large_dim_length * large_dim_length
+    wp.launch(kernel=conditional_sum, dim=[2, large_dim_length, large_dim_length], inputs=[test_result], device=device)
+    test.assertEqual(test_result.numpy()[0], half_result)
+@wp.kernel
+def count_elements(result: wp.array(dtype=wp.uint64)):
+    wp.atomic_add(result, 0, wp.uint64(1))
+def test_large_launch_max_blocks(test, device):
+    # Loop over 1000x1x1 elements using a grid of 256 threads
+    test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
+    wp.launch(count_elements, (1000,), inputs=[test_result], max_blocks=1, device=device)
+    test.assertEqual(test_result.numpy()[0], 1000)
+    # Loop over 2x10x10 elements using a grid of 256 threads, using the tid() index to count half the elements
+    test_result.zero_()
+    wp.launch(
+        conditional_sum,
+        (
+            2,
+            50,
+            10,
+        ),
+        inputs=[test_result],
+        max_blocks=1,
+        device=device,
+    )
+    test.assertEqual(test_result.numpy()[0], 500)
+def test_large_launch_very_large_kernel(test, device):
+    """Due to the size of the grid, this test is not run on CPUs"""
+    # Dim is chosen to be larger than the maximum CUDA one-dimensional grid size (total threads)
+    dim = (2**31 - 1) * 256 + 1
+    test_result = wp.zeros(shape=(1,), dtype=wp.uint64, device=device)
+    wp.launch(count_elements, (dim,), inputs=[test_result], device=device)
+    test.assertEqual(test_result.numpy()[0], dim)
+def test_large_arrays_slow(test, device):
+    # The goal of this test is to use arrays just large enough to know
+    # if there's a flaw in handling arrays with more than 2**31-1 elements
+    # Unfortunately, it takes a long time to run so it won't be run automatically
+    # without changes to support how frequently a test may be run
+    total_elements = 2**31 + 8
+    # 1-D to 4-D arrays: test zero_, fill_, then zero_ for scalar data types:
+    for total_dims in range(1, 5):
+        dim_x = math.ceil(total_elements ** (1 / total_dims))
+        shape_tuple = tuple([dim_x] * total_dims)
+        for nptype, wptype in wp.types.np_dtype_to_warp_type.items():
+            a1 = wp.zeros(shape_tuple, dtype=wptype, device=device)
+            assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+            a1.fill_(127)
+            assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))
+            a1.zero_()
+            assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+def test_large_arrays_fast(test, device):
+    # A truncated version of test_large_arrays_slow meant to catch basic errors
+    total_elements = 2**31 + 8
+    nptype = np.dtype(np.int8)
+    wptype = wp.types.np_dtype_to_warp_type[nptype]
+    a1 = wp.zeros((total_elements,), dtype=wptype, device=device)
+    assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+    a1.fill_(127)
+    assert_np_equal(a1.numpy(), 127 * np.ones_like(a1.numpy()))
+    a1.zero_()
+    assert_np_equal(a1.numpy(), np.zeros_like(a1.numpy()))
+devices = get_test_devices()
+class TestLarge(unittest.TestCase):
+    pass
+add_function_test(
+    TestLarge, "test_large_launch_large_kernel", test_large_launch_large_kernel, devices=get_unique_cuda_test_devices()
+)
+add_function_test(TestLarge, "test_large_launch_max_blocks", test_large_launch_max_blocks, devices=devices)
+add_function_test(
+    TestLarge,
+    "test_large_launch_very_large_kernel",
+    test_large_launch_very_large_kernel,
+    devices=get_unique_cuda_test_devices(),
+)
+add_function_test(TestLarge, "test_large_arrays_fast", test_large_arrays_fast, devices=devices)
+if __name__ == "__main__":
+    wp.build.clear_kernel_cache()
+    unittest.main(verbosity=2)

warp/tests/test_launch.py CHANGED Viewed

@@ -5,14 +5,12 @@
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
-# include parent path
+import unittest
 import numpy as np
-import math
 import warp as wp
-from warp.tests.test_base import *
-import unittest
+from warp.tests.unittest_utils import *
 wp.init()
@@ -74,20 +72,258 @@ def test4d(test, device):
     wp.launch(kernel4d, dim=a.shape, inputs=[wp.array(a, dtype=int, device=device)], device=device)
-def register(parent):
-    devices = get_test_devices()
+@wp.struct
+class Params:
+    a: wp.array(dtype=int)
+    i: int
+    f: float
+@wp.kernel
+def kernel_cmd(params: Params, i: int, f: float, v: wp.vec3, m: wp.mat33, out: wp.array(dtype=int)):
+    tid = wp.tid()
+    wp.expect_eq(params.i, i)
+    wp.expect_eq(params.f, f)
+    wp.expect_eq(i, int(f))
+    wp.expect_eq(v[0], f)
+    wp.expect_eq(v[1], f)
+    wp.expect_eq(v[2], f)
+    wp.expect_eq(m[0, 0], f)
+    wp.expect_eq(m[1, 1], f)
+    wp.expect_eq(m[2, 2], f)
+    out[tid] = tid + i
+def test_launch_cmd(test, device):
+    n = 1
+    ref = np.arange(0, n)
+    out = wp.zeros(n, dtype=int, device=device)
+    params = Params()
+    params.i = 1
+    params.f = 1.0
+    v = wp.vec3(params.f, params.f, params.f)
+    m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
+    # standard launch
+    wp.launch(kernel_cmd, dim=n, inputs=[params, params.i, params.f, v, m, out], device=device)
+    assert_np_equal(out.numpy(), ref + params.i)
+    # cmd launch
+    out.zero_()
+    cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, params.i, params.f, v, m, out], device=device, record_cmd=True)
+    cmd.launch()
+    assert_np_equal(out.numpy(), ref + params.i)
+def test_launch_cmd_set_param(test, device):
+    n = 1
+    ref = np.arange(0, n)
+    params = Params()
+    v = wp.vec3()
+    m = wp.mat33()
+    cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, 0, 0.0, v, m, None], device=device, record_cmd=True)
+    # cmd param modification
+    out = wp.zeros(n, dtype=int, device=device)
+    params.i = 13
+    params.f = 13.0
+    v = wp.vec3(params.f, params.f, params.f)
+    m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
+    cmd.set_param_at_index(0, params)
+    cmd.set_param_at_index(1, params.i)
+    cmd.set_param_at_index(2, params.f)
+    cmd.set_param_at_index(3, v)
+    cmd.set_param_at_index(4, m)
+    cmd.set_param_by_name("out", out)
+    cmd.launch()
+    assert_np_equal(out.numpy(), ref + params.i)
+    # test changing params after launch directly
+    # because we now cache the ctypes object inside the wp.struct
+    # instance  the command buffer will be automatically updated
+    params.i = 14
+    params.f = 14.0
+    v = wp.vec3(params.f, params.f, params.f)
+    m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
+    # this is the line we explicitly leave out to
+    # ensure that param changes are reflected in the launch
+    # launch.set_param_at_index(0, params)
+    cmd.set_param_at_index(1, params.i)
+    cmd.set_param_at_index(2, params.f)
+    cmd.set_param_at_index(3, v)
+    cmd.set_param_at_index(4, m)
+    cmd.set_param_by_name("out", out)
+    cmd.launch()
+    assert_np_equal(out.numpy(), ref + params.i)
+def test_launch_cmd_set_ctype(test, device):
+    n = 1
+    ref = np.arange(0, n)
+    params = Params()
+    v = wp.vec3()
+    m = wp.mat33()
+    cmd = wp.launch(kernel_cmd, dim=n, inputs=[params, 0, 0.0, v, m, None], device=device, record_cmd=True)
+    # cmd param modification
+    out = wp.zeros(n, dtype=int, device=device)
+    # cmd param modification
+    out.zero_()
+    params.i = 13
+    params.f = 13.0
+    v = wp.vec3(params.f, params.f, params.f)
+    m = wp.mat33(params.f, 0.0, 0.0, 0.0, params.f, 0.0, 0.0, 0.0, params.f)
+    cmd.set_param_at_index_from_ctype(0, params.__ctype__())
+    cmd.set_param_at_index_from_ctype(1, params.i)
+    cmd.set_param_at_index_from_ctype(2, params.f)
+    cmd.set_param_at_index_from_ctype(3, v)
+    cmd.set_param_at_index_from_ctype(4, m)
+    cmd.set_param_by_name_from_ctype("out", out.__ctype__())
+    cmd.launch()
+    assert_np_equal(out.numpy(), ref + params.i)
+@wp.kernel
+def arange(out: wp.array(dtype=int)):
+    tid = wp.tid()
+    out[tid] = tid
+def test_launch_cmd_set_dim(test, device):
+    n = 10
+    ref = np.arange(0, n, dtype=int)
+    out = wp.zeros(n, dtype=int, device=device)
+    cmd = wp.launch(arange, dim=n, inputs=[out], device=device, record_cmd=True)
+    cmd.set_dim(5)
+    cmd.launch()
+    # check first half the array is filled while rest is still zero
+    assert_np_equal(out.numpy()[0:5], ref[0:5])
+    assert_np_equal(out.numpy()[5:], np.zeros(5))
+    out.zero_()
+    cmd.set_dim(10)
+    cmd.launch()
+    # check the whole array was filled
+    assert_np_equal(out.numpy(), ref)
+def test_launch_cmd_empty(test, device):
+    n = 10
+    ref = np.arange(0, n, dtype=int)
+    out = wp.zeros(n, dtype=int, device=device)
+    cmd = wp.Launch(arange, device)
+    cmd.set_dim(5)
+    cmd.set_param_by_name("out", out)
+    cmd.launch()
+    # check first half the array is filled while rest is still zero
+    assert_np_equal(out.numpy()[0:5], ref[0:5])
+    assert_np_equal(out.numpy()[5:], np.zeros(5))
+    out.zero_()
+    cmd.set_dim(10)
+    cmd.launch()
+    # check the whole array was filled
+    assert_np_equal(out.numpy(), ref)
+@wp.kernel
+def kernel_mul(
+    values: wp.array(dtype=int),
+    coeff: int,
+    out: wp.array(dtype=int),
+):
+    tid = wp.tid()
+    out[tid] = values[tid] * coeff
+def test_launch_tuple_args(test, device):
+    values = wp.array(np.arange(0, 4), dtype=int, device=device)
+    coeff = 3
+    out = wp.empty_like(values)
+    wp.launch(
+        kernel_mul,
+        dim=len(values),
+        inputs=(
+            values,
+            coeff,
+        ),
+        outputs=(out,),
+        device=device,
+    )
+    assert_np_equal(out.numpy(), np.array((0, 3, 6, 9)))
+devices = get_test_devices()
+class TestLaunch(unittest.TestCase):
+    pass
-    class TestLaunch(parent):
-        pass
-    add_function_test(TestLaunch, "test_1d_launch", test1d, devices=devices)
-    add_function_test(TestLaunch, "test_2d_launch", test2d, devices=devices)
-    add_function_test(TestLaunch, "test_3d_launch", test3d, devices=devices)
-    add_function_test(TestLaunch, "test_4d_launch", test4d, devices=devices)
+add_function_test(TestLaunch, "test_launch_1d", test1d, devices=devices)
+add_function_test(TestLaunch, "test_launch_2d", test2d, devices=devices)
+add_function_test(TestLaunch, "test_launch_3d", test3d, devices=devices)
+add_function_test(TestLaunch, "test_launch_4d", test4d, devices=devices)
-    return TestLaunch
+add_function_test(TestLaunch, "test_launch_cmd", test_launch_cmd, devices=devices)
+add_function_test(TestLaunch, "test_launch_cmd_set_param", test_launch_cmd_set_param, devices=devices)
+add_function_test(TestLaunch, "test_launch_cmd_set_ctype", test_launch_cmd_set_ctype, devices=devices)
+add_function_test(TestLaunch, "test_launch_cmd_set_dim", test_launch_cmd_set_dim, devices=devices)
+add_function_test(TestLaunch, "test_launch_cmd_empty", test_launch_cmd_empty, devices=devices)
 if __name__ == "__main__":
-    c = register(unittest.TestCase)
+    wp.build.clear_kernel_cache()
     unittest.main(verbosity=2)

warp/tests/test_lerp.py CHANGED Viewed

@@ -5,14 +5,16 @@
 # distribution of this software and related documentation without an express
 # license agreement from NVIDIA CORPORATION is strictly prohibited.
+import unittest
 from dataclasses import dataclass
 from typing import Any
-import unittest
 import numpy as np
 import warp as wp
-from warp.tests.test_base import *
+from warp.tests.unittest_utils import *
+wp.init()
 @dataclass
@@ -162,8 +164,6 @@ TEST_DATA = {
     ),
 }
-wp.init()
 def test_lerp(test, device):
     def make_kernel_fn(data_type):
@@ -179,84 +179,83 @@ def test_lerp(test, device):
     for data_type in TEST_DATA:
         kernel_fn = make_kernel_fn(data_type)
-        module = wp.get_module(kernel_fn.__module__)
         kernel = wp.Kernel(
             func=kernel_fn,
             key=f"test_lerp_{data_type.__name__}_kernel",
-            module=module,
         )
-        for test_data in TEST_DATA[data_type]:
-            a = wp.array(
-                [test_data.a],
-                dtype=data_type,
-                device=device,
-                requires_grad=True,
-            )
-            b = wp.array(
-                [test_data.b],
-                dtype=data_type,
-                device=device,
-                requires_grad=True,
-            )
-            t = wp.array(
-                [test_data.t],
-                dtype=float,
-                device=device,
-                requires_grad=True,
-            )
-            out = wp.array(
-                [0] * wp.types.type_length(data_type),
-                dtype=data_type,
-                device=device,
-                requires_grad=True,
-            )
-            tape = wp.Tape()
-            with tape:
-                wp.launch(
-                    kernel,
-                    dim=1,
-                    inputs=[a, b, t, out],
+        with test.subTest(data_type=data_type):
+            for test_data in TEST_DATA[data_type]:
+                a = wp.array(
+                    [test_data.a],
+                    dtype=data_type,
                     device=device,
+                    requires_grad=True,
                 )
-            assert_np_equal(
-                out.numpy(),
-                np.array([test_data.expected]),
-                tol=1e-6,
-            )
-            if test_data.check_backwards():
-                tape.backward(out)
-                assert_np_equal(
-                    tape.gradients[a].numpy(),
-                    np.array([test_data.expected_adj_a]),
-                    tol=1e-6,
+                b = wp.array(
+                    [test_data.b],
+                    dtype=data_type,
+                    device=device,
+                    requires_grad=True,
                 )
-                assert_np_equal(
-                    tape.gradients[b].numpy(),
-                    np.array([test_data.expected_adj_b]),
-                    tol=1e-6,
+                t = wp.array(
+                    [test_data.t],
+                    dtype=float,
+                    device=device,
+                    requires_grad=True,
+                )
+                out = wp.array(
+                    [0] * wp.types.type_length(data_type),
+                    dtype=data_type,
+                    device=device,
+                    requires_grad=True,
                 )
+                tape = wp.Tape()
+                with tape:
+                    wp.launch(
+                        kernel,
+                        dim=1,
+                        inputs=[a, b, t, out],
+                        device=device,
+                    )
                 assert_np_equal(
-                    tape.gradients[t].numpy(),
-                    np.array([test_data.expected_adj_t]),
+                    out.numpy(),
+                    np.array([test_data.expected]),
                     tol=1e-6,
                 )
+                if test_data.check_backwards():
+                    tape.backward(out)
+                    assert_np_equal(
+                        tape.gradients[a].numpy(),
+                        np.array([test_data.expected_adj_a]),
+                        tol=1e-6,
+                    )
+                    assert_np_equal(
+                        tape.gradients[b].numpy(),
+                        np.array([test_data.expected_adj_b]),
+                        tol=1e-6,
+                    )
+                    assert_np_equal(
+                        tape.gradients[t].numpy(),
+                        np.array([test_data.expected_adj_t]),
+                        tol=1e-6,
+                    )
+devices = get_test_devices()
-def register(parent):
-    devices = get_test_devices()
+class TestLerp(unittest.TestCase):
+    pass
-    class TestLerp(parent):
-        pass
-    add_function_test(TestLerp, "test_lerp", test_lerp, devices=devices)
-    return TestLerp
+add_function_test(TestLerp, "test_lerp", test_lerp, devices=devices)
 if __name__ == "__main__":
-    _ = register(unittest.TestCase)
+    wp.build.clear_kernel_cache()
     unittest.main(verbosity=2)