PyPI - warp-lang - Versions diffs - 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (132) hide show

warp/__init__.py +5 -0
warp/autograd.py +414 -191
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +40 -12
warp/build_dll.py +13 -6
warp/builtins.py +1124 -497
warp/codegen.py +261 -136
warp/config.py +1 -1
warp/context.py +357 -119
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_gemm.py +27 -18
warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
warp/examples/core/example_torch.py +18 -34
warp/examples/fem/example_apic_fluid.py +1 -0
warp/examples/fem/example_mixed_elasticity.py +1 -1
warp/examples/optim/example_bounce.py +1 -1
warp/examples/optim/example_cloth_throw.py +1 -1
warp/examples/optim/example_diffray.py +4 -15
warp/examples/optim/example_drone.py +1 -1
warp/examples/optim/example_softbody_properties.py +392 -0
warp/examples/optim/example_trajectory.py +1 -3
warp/examples/optim/example_walker.py +5 -0
warp/examples/sim/example_cartpole.py +0 -2
warp/examples/sim/example_cloth.py +3 -1
warp/examples/sim/example_cloth_self_contact.py +260 -0
warp/examples/sim/example_granular_collision_sdf.py +4 -5
warp/examples/sim/example_jacobian_ik.py +0 -2
warp/examples/sim/example_quadruped.py +5 -2
warp/examples/tile/example_tile_cholesky.py +79 -0
warp/examples/tile/example_tile_convolution.py +2 -2
warp/examples/tile/example_tile_fft.py +2 -2
warp/examples/tile/example_tile_filtering.py +3 -3
warp/examples/tile/example_tile_matmul.py +4 -4
warp/examples/tile/example_tile_mlp.py +12 -12
warp/examples/tile/example_tile_nbody.py +180 -0
warp/examples/tile/example_tile_walker.py +319 -0
warp/fem/geometry/geometry.py +0 -2
warp/math.py +147 -0
warp/native/array.h +12 -0
warp/native/builtin.h +0 -1
warp/native/bvh.cpp +149 -70
warp/native/bvh.cu +287 -68
warp/native/bvh.h +195 -85
warp/native/clang/clang.cpp +5 -1
warp/native/coloring.cpp +5 -1
warp/native/cuda_util.cpp +91 -53
warp/native/cuda_util.h +5 -0
warp/native/exports.h +40 -40
warp/native/intersect.h +17 -0
warp/native/mat.h +41 -0
warp/native/mathdx.cpp +19 -0
warp/native/mesh.cpp +25 -8
warp/native/mesh.cu +153 -101
warp/native/mesh.h +482 -403
warp/native/quat.h +40 -0
warp/native/solid_angle.h +7 -0
warp/native/sort.cpp +85 -0
warp/native/sort.cu +34 -0
warp/native/sort.h +3 -1
warp/native/spatial.h +11 -0
warp/native/tile.h +1187 -669
warp/native/tile_reduce.h +8 -6
warp/native/vec.h +41 -0
warp/native/warp.cpp +8 -1
warp/native/warp.cu +263 -40
warp/native/warp.h +19 -5
warp/optim/linear.py +22 -4
warp/render/render_opengl.py +130 -64
warp/sim/__init__.py +6 -1
warp/sim/collide.py +270 -26
warp/sim/import_urdf.py +8 -8
warp/sim/integrator_euler.py +25 -7
warp/sim/integrator_featherstone.py +154 -35
warp/sim/integrator_vbd.py +842 -40
warp/sim/model.py +134 -72
warp/sparse.py +1 -1
warp/stubs.py +265 -132
warp/tape.py +28 -30
warp/tests/aux_test_module_unload.py +15 -0
warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
warp/tests/test_array.py +74 -0
warp/tests/test_assert.py +242 -0
warp/tests/test_codegen.py +14 -61
warp/tests/test_collision.py +2 -2
warp/tests/test_coloring.py +12 -2
warp/tests/test_examples.py +12 -1
warp/tests/test_func.py +21 -4
warp/tests/test_grad_debug.py +87 -2
warp/tests/test_hash_grid.py +1 -1
warp/tests/test_ipc.py +116 -0
warp/tests/test_lerp.py +13 -87
warp/tests/test_mat.py +138 -167
warp/tests/test_math.py +47 -1
warp/tests/test_matmul.py +17 -16
warp/tests/test_matmul_lite.py +10 -15
warp/tests/test_mesh.py +84 -60
warp/tests/test_mesh_query_aabb.py +165 -0
warp/tests/test_mesh_query_point.py +328 -286
warp/tests/test_mesh_query_ray.py +134 -121
warp/tests/test_mlp.py +2 -2
warp/tests/test_operators.py +43 -0
warp/tests/test_overwrite.py +47 -2
warp/tests/test_quat.py +77 -0
warp/tests/test_reload.py +29 -0
warp/tests/test_sim_grad_bounce_linear.py +204 -0
warp/tests/test_smoothstep.py +17 -83
warp/tests/test_static.py +19 -3
warp/tests/test_tape.py +25 -0
warp/tests/test_tile.py +178 -191
warp/tests/test_tile_load.py +356 -0
warp/tests/test_tile_mathdx.py +61 -8
warp/tests/test_tile_mlp.py +17 -17
warp/tests/test_tile_reduce.py +24 -18
warp/tests/test_tile_shared_memory.py +66 -17
warp/tests/test_tile_view.py +165 -0
warp/tests/test_torch.py +35 -0
warp/tests/test_utils.py +36 -24
warp/tests/test_vec.py +110 -0
warp/tests/unittest_suites.py +29 -4
warp/tests/unittest_utils.py +30 -13
warp/thirdparty/unittest_parallel.py +2 -2
warp/types.py +411 -101
warp/utils.py +10 -7
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/METADATA +92 -69
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/RECORD +130 -119
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
warp/examples/benchmarks/benchmark_tile.py +0 -179
warp/native/tile_gemm.h +0 -341
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0

warp/bin/warp-clang.so CHANGED Viewed

Binary file

warp/bin/warp.so CHANGED Viewed

Binary file

warp/build.py CHANGED Viewed

@@ -11,27 +11,50 @@ import os
 import warp.config
 from warp.thirdparty import appdirs
+# From nvJitLink.h
+nvJitLink_input_type = {"cubin": 1, "ptx": 2, "ltoir": 3, "fatbin": 4, "object": 5, "library": 6}
 # builds cuda source to PTX or CUBIN using NVRTC (output type determined by output_path extension)
-def build_cuda(cu_path, arch, output_path, config="release", verify_fp=False, fast_math=False, ltoirs=None):
+def build_cuda(
+    cu_path,
+    arch,
+    output_path,
+    config="release",
+    verify_fp=False,
+    fast_math=False,
+    fuse_fp=True,
+    lineinfo=False,
+    ltoirs=None,
+    fatbins=None,
+) -> None:
     with open(cu_path, "rb") as src_file:
         src = src_file.read()
-        cu_path = cu_path.encode("utf-8")
+        cu_path_bytes = cu_path.encode("utf-8")
+        program_name_bytes = os.path.basename(cu_path).encode("utf-8")
         inc_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "native").encode("utf-8")
         output_path = output_path.encode("utf-8")
         if warp.config.llvm_cuda:
-            warp.context.runtime.llvm.compile_cuda(src, cu_path, inc_path, output_path, False)
+            warp.context.runtime.llvm.compile_cuda(src, cu_path_bytes, inc_path, output_path, False)
         else:
             if ltoirs is None:
                 ltoirs = []
-            num_ltoirs = len(ltoirs)
-            arr_lroirs = (ctypes.c_char_p * num_ltoirs)(*ltoirs)
-            arr_lroir_sizes = (ctypes.c_size_t * num_ltoirs)(*[len(l) for l in ltoirs])
+            if fatbins is None:
+                fatbins = []
+            link_data = list(ltoirs) + list(fatbins)
+            num_link = len(link_data)
+            arr_link = (ctypes.c_char_p * num_link)(*link_data)
+            arr_link_sizes = (ctypes.c_size_t * num_link)(*[len(l) for l in link_data])
+            link_input_types = [nvJitLink_input_type["ltoir"]] * len(ltoirs) + [nvJitLink_input_type["fatbin"]] * len(
+                fatbins
+            )
+            arr_link_input_types = (ctypes.c_int * num_link)(*link_input_types)
             err = warp.context.runtime.core.cuda_compile_program(
                 src,
+                program_name_bytes,
                 arch,
                 inc_path,
                 0,
@@ -40,10 +63,13 @@ def build_cuda(cu_path, arch, output_path, config="release", verify_fp=False, fa
                 warp.config.verbose,
                 verify_fp,
                 fast_math,
+                fuse_fp,
+                lineinfo,
                 output_path,
-                num_ltoirs,
-                arr_lroirs,
-                arr_lroir_sizes,
+                num_link,
+                arr_link,
+                arr_link_sizes,
+                arr_link_input_types,
             )
             if err != 0:
                 raise Exception(f"CUDA kernel build failed with error code {err}")
@@ -57,14 +83,16 @@ def load_cuda(input_path, device):
     return warp.context.runtime.core.cuda_load_module(device.context, input_path.encode("utf-8"))
-def build_cpu(obj_path, cpp_path, mode="release", verify_fp=False, fast_math=False):
+def build_cpu(obj_path, cpp_path, mode="release", verify_fp=False, fast_math=False, fuse_fp=True):
     with open(cpp_path, "rb") as cpp:
         src = cpp.read()
         cpp_path = cpp_path.encode("utf-8")
         inc_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "native").encode("utf-8")
         obj_path = obj_path.encode("utf-8")
-        err = warp.context.runtime.llvm.compile_cpp(src, cpp_path, inc_path, obj_path, mode == "debug", verify_fp)
+        err = warp.context.runtime.llvm.compile_cpp(
+            src, cpp_path, inc_path, obj_path, mode == "debug", verify_fp, fuse_fp
+        )
         if err != 0:
             raise Exception(f"CPU kernel build failed with error code {err}")

warp/build_dll.py CHANGED Viewed

@@ -202,18 +202,25 @@ def build_dll_for_arch(args, dll_path, cpp_paths, cu_path, libs, arch, mode=None
                     "-gencode=arch=compute_87,code=sm_87",  # Orin
                 ]
-            # support for Ada and Hopper is available with CUDA Toolkit 11.8+
-            if ctk_version >= (11, 8):
+            if ctk_version >= (12, 8):
+                # Support for Blackwell is available with CUDA Toolkit 12.8+
                 gencode_opts += [
                     "-gencode=arch=compute_89,code=sm_89",  # Ada
                     "-gencode=arch=compute_90,code=sm_90",  # Hopper
-                    # PTX for future hardware
-                    "-gencode=arch=compute_90,code=compute_90",
+                    "-gencode=arch=compute_100,code=sm_100",  # Blackwell
+                    "-gencode=arch=compute_120,code=sm_120",  # Blackwell
+                    "-gencode=arch=compute_120,code=compute_120",  # PTX for future hardware
+                ]
+            elif ctk_version >= (11, 8):
+                # Support for Ada and Hopper is available with CUDA Toolkit 11.8+
+                gencode_opts += [
+                    "-gencode=arch=compute_89,code=sm_89",  # Ada
+                    "-gencode=arch=compute_90,code=sm_90",  # Hopper
+                    "-gencode=arch=compute_90,code=compute_90",  # PTX for future hardware
                 ]
             else:
                 gencode_opts += [
-                    # PTX for future hardware
-                    "-gencode=arch=compute_86,code=compute_86",
+                    "-gencode=arch=compute_86,code=compute_86",  # PTX for future hardware
                 ]
         nvcc_opts = gencode_opts + [