PyPI - warp-lang - Versions diffs - 1.8.0__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl - Mend

warp-lang 1.8.0__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (153) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +93 -30
warp/build_dll.py +48 -63
warp/builtins.py +955 -137
warp/codegen.py +327 -209
warp/config.py +1 -1
warp/context.py +1363 -800
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/examples/interop/example_jax_callable.py +34 -4
warp/examples/interop/example_jax_kernel.py +27 -1
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +266 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +200 -91
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +203 -54
warp/marching_cubes.py +708 -0
warp/native/array.h +103 -8
warp/native/builtin.h +90 -9
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +13 -3
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +42 -11
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +4 -4
warp/native/mat.h +1913 -119
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +5 -3
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +337 -16
warp/native/rand.h +7 -7
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +14 -14
warp/native/spatial.h +366 -17
warp/native/svd.h +23 -8
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +303 -70
warp/native/tile_radix_sort.h +5 -1
warp/native/tile_reduce.h +16 -25
warp/native/tuple.h +2 -2
warp/native/vec.h +385 -18
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +337 -193
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +137 -57
warp/render/render_usd.py +0 -1
warp/sim/collide.py +1 -2
warp/sim/graph_coloring.py +2 -2
warp/sim/integrator_vbd.py +10 -2
warp/sparse.py +559 -176
warp/tape.py +2 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_cloth.py +89 -6
warp/tests/sim/test_coloring.py +82 -7
warp/tests/test_array.py +56 -5
warp/tests/test_assert.py +53 -0
warp/tests/test_atomic_cas.py +127 -114
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1540 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +162 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +103 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_static.py +48 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_tape.py +38 -0
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +216 -441
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/tile/test_tile.py +206 -152
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_reduce.py +100 -11
warp/tests/tile/test_tile_shared_memory.py +16 -16
warp/tests/tile/test_tile_sort.py +59 -55
warp/tests/unittest_suites.py +16 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/tests/test_module_aot.py ADDED Viewed

@@ -0,0 +1,287 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import importlib.util
+import os
+import shutil
+import unittest
+from pathlib import Path
+import numpy as np
+import warp as wp
+import warp.tests.aux_test_module_aot
+from warp.tests.unittest_utils import *
+ADD_KERNEL_START = """import warp as wp
+@wp.kernel
+def add_kernel(a: wp.array(dtype=wp.int32), b: wp.array(dtype=wp.int32), res: wp.array(dtype=wp.int32)):
+    pass
+"""
+ADD_KERNEL_FINAL = """import warp as wp
+@wp.kernel
+def add_kernel(a: wp.array(dtype=wp.int32), b: wp.array(dtype=wp.int32), res: wp.array(dtype=wp.int32)):
+    i = wp.tid()
+    res[i] = a[i] + b[i]
+"""
+def reload_module(module):
+    # Clearing the .pyc file associated with a module is a necessary workaround
+    # for `importlib.reload` to work as expected when run from within Kit.
+    cache_file = importlib.util.cache_from_source(module.__file__)
+    if os.path.exists(cache_file):
+        os.remove(cache_file)
+    importlib.reload(module)
+TEST_CACHE_DIR = Path(os.path.abspath(os.path.join(os.path.dirname(__file__), "test_module_aot_cache")))
+def test_disable_hashing(test, device):
+    """Test that module hashing can be disabled.
+    A module is run, modified, and run again. The second run should not trigger
+    a recompilation since the hash will not be used to detect changes.
+    """
+    try:
+        shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+        TEST_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        wp.set_module_options(
+            {"block_dim": 1 if device.is_cpu else 256},
+            warp.tests.aux_test_module_aot,
+        )
+        a = wp.ones(10, dtype=wp.int32, device=device)
+        b = wp.ones(10, dtype=wp.int32, device=device)
+        res = wp.zeros((10,), dtype=wp.int32, device=device)
+        # Write out the module and import it
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_START)
+        reload_module(warp.tests.aux_test_module_aot)
+        # First launch, cold compile, expect res to be unchanged since kernel is empty
+        wp.compile_aot_module(warp.tests.aux_test_module_aot, device, module_dir=TEST_CACHE_DIR, strip_hash=True)
+        wp.load_aot_module(warp.tests.aux_test_module_aot, device, module_dir=TEST_CACHE_DIR, strip_hash=True)
+        wp.launch(
+            warp.tests.aux_test_module_aot.add_kernel,
+            dim=a.shape,
+            inputs=[a, b],
+            outputs=[res],
+            device=device,
+        )
+        assert_np_equal(res.numpy(), np.zeros((10,), dtype=np.int32))
+        res.zero_()
+        # Write out the modified module and import it
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_FINAL)
+        reload_module(warp.tests.aux_test_module_aot)
+        # This time, the hash checks will be skipped so the previously compiled module will be loaded
+        wp.load_aot_module(warp.tests.aux_test_module_aot, device, module_dir=TEST_CACHE_DIR, strip_hash=True)
+        # Kernel is executed with the ADD_KERNEL_START code, not the ADD_KERNEL_FINAL code
+        wp.launch(
+            warp.tests.aux_test_module_aot.add_kernel,
+            dim=a.shape,
+            inputs=[a, b],
+            outputs=[res],
+            device=device,
+        )
+        assert_np_equal(res.numpy(), np.zeros((10,), dtype=np.int32))
+    finally:
+        # Clear the cache directory
+        shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+        # Revert the module default options and auxiliary file to the original states
+        wp.set_module_options({"cuda_output": None, "strip_hash": False}, warp.tests.aux_test_module_aot)
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_FINAL)
+def test_enable_hashing(test, device):
+    """Ensure that the logic of test_disable_hashing is sound.
+    This test sets "strip_hash" to False, so normal module hashing rules
+    should be in effect.
+    """
+    try:
+        shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+        TEST_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+        wp.set_module_options(
+            {"block_dim": 1 if device.is_cpu else 256},
+            warp.tests.aux_test_module_aot,
+        )
+        a = wp.ones(10, dtype=wp.int32, device=device)
+        b = wp.ones(10, dtype=wp.int32, device=device)
+        res = wp.zeros((10,), dtype=wp.int32, device=device)
+        # Write out the module and import it
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_START)
+        reload_module(warp.tests.aux_test_module_aot)
+        # First launch, cold compile, expect no-op result
+        wp.compile_aot_module(warp.tests.aux_test_module_aot, device, module_dir=TEST_CACHE_DIR, strip_hash=False)
+        wp.load_aot_module(warp.tests.aux_test_module_aot, device, module_dir=TEST_CACHE_DIR, strip_hash=False)
+        wp.launch(
+            warp.tests.aux_test_module_aot.add_kernel,
+            dim=a.shape,
+            inputs=[a, b],
+            outputs=[res],
+            device=device,
+        )
+        assert_np_equal(res.numpy(), np.zeros((10,), dtype=np.int32))
+        # Write out the modified module (results in a different hash) and import it
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_FINAL)
+        reload_module(warp.tests.aux_test_module_aot)
+        # Trying to load the module should fail since a compiled module with the expected hash does not exist
+        with test.assertRaises(FileNotFoundError):
+            wp.load_aot_module("warp.tests.aux_test_module_aot", device, module_dir=TEST_CACHE_DIR, strip_hash=False)
+    finally:
+        # Clear the cache directory
+        shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+        # Revert the module default options and auxiliary file to the original states
+        wp.set_module_options({"cuda_output": None, "strip_hash": False}, warp.tests.aux_test_module_aot)
+        with open(os.path.abspath(os.path.join(os.path.dirname(__file__), "aux_test_module_aot.py")), "w") as f:
+            f.writelines(ADD_KERNEL_FINAL)
+def test_module_load_resolution(test, device):
+    """Test various ways to resolving a module when loading and compiling."""
+    wp.set_module_options(
+        {"block_dim": 1 if device.is_cpu else 256},
+        warp.tests.aux_test_module_aot,
+    )
+    a = wp.ones(10, dtype=wp.int32, device=device)
+    b = wp.ones(10, dtype=wp.int32, device=device)
+    res = wp.zeros((10,), dtype=wp.int32, device=device)
+    reload_module(warp.tests.aux_test_module_aot)
+    wp.compile_aot_module(warp.tests.aux_test_module_aot, device)
+    wp.load_aot_module(warp.tests.aux_test_module_aot, device)
+    wp.launch(
+        warp.tests.aux_test_module_aot.add_kernel,
+        dim=a.shape,
+        inputs=[a, b],
+        outputs=[res],
+        device=device,
+    )
+    assert_np_equal(res.numpy(), np.full((10,), 2, dtype=np.int32))
+    reload_module(warp.tests.aux_test_module_aot)
+    res.zero_()
+    wp.compile_aot_module("warp.tests.aux_test_module_aot", device)
+    wp.load_aot_module("warp.tests.aux_test_module_aot", device)
+    wp.launch(
+        warp.tests.aux_test_module_aot.add_kernel,
+        dim=a.shape,
+        inputs=[a, b],
+        outputs=[res],
+        device=device,
+    )
+    assert_np_equal(res.numpy(), np.full((10,), 2, dtype=np.int32))
+class TestModuleAOT(unittest.TestCase):
+    def test_module_compile_specified_arch_ptx(self):
+        """Test that a module can be compiled for a specific architecture or architectures (PTX)."""
+        if wp.get_cuda_device_count() == 0:
+            self.skipTest("No CUDA devices found")
+        if len(wp.context.runtime.nvrtc_supported_archs) < 2:
+            self.skipTest("NVRTC must support at least two architectures to run this test")
+        try:
+            shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+            TEST_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+            archs = list(wp.context.runtime.nvrtc_supported_archs)[:2]
+            wp.compile_aot_module(warp.tests.aux_test_module_aot, arch=archs, module_dir=TEST_CACHE_DIR, use_ptx=True)
+            # Make sure the expected files exist
+            module_identifier = wp.get_module("warp.tests.aux_test_module_aot").get_module_identifier()
+            for arch in archs:
+                expected_filename = f"{module_identifier}.sm{arch}.ptx"
+                expected_path = TEST_CACHE_DIR / expected_filename
+                self.assertTrue(expected_path.exists(), f"Expected compiled PTX file not found: {expected_path}")
+        finally:
+            # Clear the cache directory
+            shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+    def test_module_compile_specified_arch_cubin(self):
+        """Test that a module can be compiled for a specific architecture or architectures (CUBIN)."""
+        if wp.get_cuda_device_count() == 0:
+            self.skipTest("No CUDA devices found")
+        if len(wp.context.runtime.nvrtc_supported_archs) < 2:
+            self.skipTest("NVRTC must support at least two architectures to run this test")
+        try:
+            shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+            TEST_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+            archs = list(wp.context.runtime.nvrtc_supported_archs)[:2]
+            wp.compile_aot_module(warp.tests.aux_test_module_aot, arch=archs, module_dir=TEST_CACHE_DIR, use_ptx=False)
+            # Make sure the expected files exist
+            module_identifier = wp.get_module("warp.tests.aux_test_module_aot").get_module_identifier()
+            for arch in archs:
+                expected_filename = f"{module_identifier}.sm{arch}.cubin"
+                expected_path = TEST_CACHE_DIR / expected_filename
+                self.assertTrue(expected_path.exists(), f"Expected compiled CUBIN file not found: {expected_path}")
+        finally:
+            # Clear the cache directory
+            shutil.rmtree(TEST_CACHE_DIR, ignore_errors=True)
+devices = get_test_devices()
+add_function_test(TestModuleAOT, "test_disable_hashing", test_disable_hashing, devices=devices)
+add_function_test(TestModuleAOT, "test_enable_hashing", test_enable_hashing, devices=devices)
+add_function_test(TestModuleAOT, "test_module_load_resolution", test_module_load_resolution, devices=devices)
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    unittest.main(verbosity=2)

warp/tests/test_print.py CHANGED Viewed

@@ -43,6 +43,52 @@ def test_print_kernel():
     # fmt: on
+@wp.func
+def test_print_numeric_func(value: int):
+    b = wp.bool(value)
+    print(b)
+    assert repr(b) == "bool(True)"
+    # signed ints
+    i8 = wp.int8(value)
+    print(i8)
+    assert repr(i8) == "int8(-1)"
+    i16 = wp.int16(value)
+    print(i16)
+    assert repr(i16) == "int16(-1)"
+    i32 = wp.int32(value)
+    print(i32)
+    assert repr(i32) == "int32(-1)"
+    i64 = wp.int64(value)
+    print(i64)
+    assert repr(i64) == "int64(-1)"
+    # unsigned ints
+    ui8 = wp.uint8(value)
+    print(ui8)
+    assert repr(ui8) == "uint8(255)"
+    ui16 = wp.uint16(value)
+    print(ui16)
+    assert repr(ui16) == "uint16(65535)"
+    ui32 = wp.uint32(value)
+    print(ui32)
+    assert repr(ui32) == "uint32(4294967295)"
+    ui64 = wp.uint64(value)
+    print(ui64)
+    assert repr(ui64) == "uint64(18446744073709551615)"
+    # floats
+    f16 = wp.float16(value)
+    print(f16)
+    assert repr(f16) == "float16(-1)"
+    f32 = wp.float32(value)
+    print(f32)
+    assert repr(f32) == "float32(-1)"
+    f64 = wp.float64(value)
+    print(f64)
+    assert repr(f64) == "float64(-1)"
 @wp.kernel
 def test_print_numeric_kernel(value: int):
     # signed ints
@@ -140,6 +186,29 @@ def test_print_numeric(test, device):
             rf"-1{os.linesep}",
         )
+    capture = StdOutCapture()
+    capture.begin()
+    test_print_numeric_func(-1)
+    s = capture.end()
+    # We skip the win32 comparison for now since the capture sometimes is an empty string
+    if sys.platform != "win32":
+        test.assertRegex(
+            s,
+            rf"True{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"255{os.linesep}"
+            rf"65535{os.linesep}"
+            rf"4294967295{os.linesep}"
+            rf"18446744073709551615{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"-1{os.linesep}"
+            rf"-1{os.linesep}",
+        )
 def test_print_boolean(test, device):
     wp.load_module(device=device)

warp/tests/test_quat.py CHANGED Viewed

@@ -2014,6 +2014,22 @@ def test_py_arithmetic_ops(test, device, dtype):
     test.assertSequenceEqual(wptype(24) / v, make_quat(12, 6, 4, 3))
+@wp.kernel
+def quat_grad(q: wp.quat):
+    wp.expect_eq(q.w, 1.0)
+# Test passing of a quaternion in the backward pass
+def test_quat_backward(test, device):
+    q = wp.quat_identity()
+    tape = wp.Tape()
+    with tape:
+        wp.launch(quat_grad, dim=1, inputs=[q], device=device)
+    tape.backward()
 @wp.kernel
 def quat_len_kernel(
     q: wp.quat,
@@ -2118,39 +2134,6 @@ def test_quat_assign(test, device):
     run(quat_assign_attribute)
-def test_quat_assign_copy(test, device):
-    saved_enable_vector_component_overwrites_setting = wp.config.enable_vector_component_overwrites
-    try:
-        wp.config.enable_vector_component_overwrites = True
-        @wp.kernel
-        def quat_assign_overwrite(x: wp.array(dtype=wp.quat), y: wp.array(dtype=wp.quat)):
-            tid = wp.tid()
-            a = wp.quat()
-            b = x[tid]
-            a = b
-            a[1] = 3.0
-            y[tid] = a
-        x = wp.ones(1, dtype=wp.quat, device=device, requires_grad=True)
-        y = wp.zeros(1, dtype=wp.quat, device=device, requires_grad=True)
-        tape = wp.Tape()
-        with tape:
-            wp.launch(quat_assign_overwrite, dim=1, inputs=[x, y], device=device)
-        y.grad = wp.ones_like(y, requires_grad=False)
-        tape.backward()
-        assert_np_equal(y.numpy(), np.array([[1.0, 3.0, 1.0, 1.0]], dtype=float))
-        assert_np_equal(x.grad.numpy(), np.array([[1.0, 0.0, 1.0, 1.0]], dtype=float))
-    finally:
-        wp.config.enable_vector_component_overwrites = saved_enable_vector_component_overwrites_setting
 @wp.kernel
 def quat_array_extract_subscript(x: wp.array2d(dtype=wp.quat), y: wp.array2d(dtype=float)):
     i, j = wp.tid()
@@ -2372,6 +2355,147 @@ def test_quat_array_sub_inplace(test, device):
     assert_np_equal(x.grad.numpy(), np.array([[-1.0, -1.0, -1.0, -1.0]], dtype=float))
+@wp.kernel
+def scalar_quat_div(x: wp.array(dtype=wp.quat), y: wp.array(dtype=wp.quat)):
+    i = wp.tid()
+    y[i] = 1.0 / x[i]
+def test_scalar_quat_div(test, device):
+    x = wp.array((wp.quat(1.0, 2.0, 4.0, 8.0),), dtype=wp.quat, requires_grad=True, device=device)
+    y = wp.ones(1, dtype=wp.quat, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(scalar_quat_div, 1, inputs=(x,), outputs=(y,), device=device)
+    y.grad = wp.ones_like(y)
+    tape.backward()
+    assert_np_equal(y.numpy(), np.array(((1.0, 0.5, 0.25, 0.125),), dtype=float))
+    assert_np_equal(x.grad.numpy(), np.array(((-1.0, -0.25, -0.0625, -0.015625),), dtype=float))
+def test_quat_indexing_assign(test, device):
+    @wp.func
+    def fn():
+        q = wp.quat(1.0, 2.0, 3.0, 4.0)
+        q[0] = 123.0
+        q[1] *= 2.0
+        wp.expect_eq(q[0], 123.0)
+        wp.expect_eq(q[1], 4.0)
+        wp.expect_eq(q[2], 3.0)
+        wp.expect_eq(q[3], 4.0)
+        q[-1] = 123.0
+        q[-2] *= 2.0
+        wp.expect_eq(q[-1], 123.0)
+        wp.expect_eq(q[-2], 6.0)
+        wp.expect_eq(q[-3], 4.0)
+        wp.expect_eq(q[-4], 123.0)
+    @wp.kernel(module="unique")
+    def kernel():
+        fn()
+    wp.launch(kernel, 1, device=device)
+    wp.synchronize()
+    fn()
+def test_quat_slicing_assign(test, device):
+    vec0 = wp.vec(0, float)
+    vec1 = wp.vec(1, float)
+    vec2 = wp.vec(2, float)
+    vec3 = wp.vec(3, float)
+    vec4 = wp.vec(4, float)
+    @wp.func
+    def fn():
+        q = wp.quat(1.0, 2.0, 3.0, 4.0)
+        wp.expect_eq(q[:] == vec4(1.0, 2.0, 3.0, 4.0), True)
+        wp.expect_eq(q[-123:123] == vec4(1.0, 2.0, 3.0, 4.0), True)
+        wp.expect_eq(q[123:] == vec0(), True)
+        wp.expect_eq(q[:-123] == vec0(), True)
+        wp.expect_eq(q[::123] == vec1(1.0), True)
+        wp.expect_eq(q[1:] == vec3(2.0, 3.0, 4.0), True)
+        wp.expect_eq(q[-2:] == vec2(3.0, 4.0), True)
+        wp.expect_eq(q[:2] == vec2(1.0, 2.0), True)
+        wp.expect_eq(q[:-1] == vec3(1.0, 2.0, 3.0), True)
+        wp.expect_eq(q[::2] == vec2(1.0, 3.0), True)
+        wp.expect_eq(q[1::2] == vec2(2.0, 4.0), True)
+        wp.expect_eq(q[::-1] == vec4(4.0, 3.0, 2.0, 1.0), True)
+        wp.expect_eq(q[::-2] == vec2(4.0, 2.0), True)
+        wp.expect_eq(q[1::-2] == vec1(2.0), True)
+        q[1:] = vec3(5.0, 6.0, 7.0)
+        wp.expect_eq(q == wp.quat(1.0, 5.0, 6.0, 7.0), True)
+        q[-2:] = vec2(8.0, 9.0)
+        wp.expect_eq(q == wp.quat(1.0, 5.0, 8.0, 9.0), True)
+        q[:2] = vec2(10.0, 11.0)
+        wp.expect_eq(q == wp.quat(10.0, 11.0, 8.0, 9.0), True)
+        q[:-1] = vec3(12.0, 13.0, 14.0)
+        wp.expect_eq(q == wp.quat(12.0, 13.0, 14.0, 9.0), True)
+        q[::2] = vec2(15.0, 16.0)
+        wp.expect_eq(q == wp.quat(15.0, 13.0, 16.0, 9.0), True)
+        q[1::2] = vec2(17.0, 18.0)
+        wp.expect_eq(q == wp.quat(15.0, 17.0, 16.0, 18.0), True)
+        q[1::-2] = vec1(19.0)
+        wp.expect_eq(q == wp.quat(15.0, 19.0, 16.0, 18.0), True)
+        q[1:] += vec3(20.0, 21.0, 22.0)
+        wp.expect_eq(q == wp.quat(15.0, 39.0, 37.0, 40.0), True)
+        q[:-1] -= vec3(23.0, 24.0, 25.0)
+        wp.expect_eq(q == wp.quat(-8.0, 15.0, 12.0, 40.0), True)
+    @wp.kernel(module="unique")
+    def kernel():
+        fn()
+    wp.launch(kernel, 1, device=device)
+    wp.synchronize()
+    fn()
+def test_quat_slicing_assign_backward(test, device):
+    @wp.kernel(module="unique")
+    def kernel(arr_x: wp.array(dtype=wp.vec2), arr_y: wp.array(dtype=wp.quat)):
+        i = wp.tid()
+        y = arr_y[i]
+        y[:2] = arr_x[i]
+        y[1:-1] += arr_x[i][:2]
+        y[3:1:-1] -= arr_x[i][0:]
+        arr_y[i] = y
+    x = wp.ones(1, dtype=wp.vec2, requires_grad=True, device=device)
+    y = wp.zeros(1, dtype=wp.quat, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(kernel, 1, inputs=(x,), outputs=(y,), device=device)
+    y.grad = wp.ones_like(y)
+    tape.backward()
+    assert_np_equal(y.numpy(), np.array(((1.0, 2.0, 0.0, -1.0),), dtype=float))
+    assert_np_equal(x.grad.numpy(), np.array(((1.0, 1.0),), dtype=float))
 devices = get_test_devices()
@@ -2473,16 +2597,20 @@ for dtype in np_float_types:
         TestQuat, f"test_py_arithmetic_ops_{dtype.__name__}", test_py_arithmetic_ops, devices=None, dtype=dtype
     )
+add_function_test(TestQuat, "test_quat_backward", test_quat_backward, devices=devices)
 add_function_test(TestQuat, "test_quat_len", test_quat_len, devices=devices)
 add_function_test(TestQuat, "test_quat_extract", test_quat_extract, devices=devices)
 add_function_test(TestQuat, "test_quat_assign", test_quat_assign, devices=devices)
-add_function_test(TestQuat, "test_quat_assign_copy", test_quat_assign_copy, devices=devices)
 add_function_test(TestQuat, "test_quat_array_extract", test_quat_array_extract, devices=devices)
 add_function_test(TestQuat, "test_quat_array_assign", test_quat_array_assign, devices=devices)
 add_function_test(TestQuat, "test_quat_add_inplace", test_quat_add_inplace, devices=devices)
 add_function_test(TestQuat, "test_quat_sub_inplace", test_quat_sub_inplace, devices=devices)
 add_function_test(TestQuat, "test_quat_array_add_inplace", test_quat_array_add_inplace, devices=devices)
 add_function_test(TestQuat, "test_quat_array_sub_inplace", test_quat_array_sub_inplace, devices=devices)
+add_function_test(TestQuat, "test_scalar_quat_div", test_scalar_quat_div, devices=devices)
+add_function_test(TestQuat, "test_quat_indexing_assign", test_quat_indexing_assign, devices=devices)
+add_function_test(TestQuat, "test_quat_slicing_assign", test_quat_slicing_assign, devices=devices)
+add_function_test(TestQuat, "test_quat_slicing_assign_backward", test_quat_slicing_assign_backward, devices=devices)
 if __name__ == "__main__":