PyPI - warp-lang - Versions diffs - 1.3.2__py3-none-manylinux2014_aarch64.whl → 1.4.0__py3-none-manylinux2014_aarch64.whl - Mend

warp-lang 1.3.2__py3-none-manylinux2014_aarch64.whl → 1.4.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (107) hide show

warp/__init__.py +6 -0
warp/autograd.py +59 -6
warp/bin/warp.so +0 -0
warp/build_dll.py +8 -10
warp/builtins.py +126 -4
warp/codegen.py +435 -53
warp/config.py +1 -1
warp/context.py +678 -403
warp/dlpack.py +2 -0
warp/examples/benchmarks/benchmark_cloth.py +10 -0
warp/examples/core/example_render_opengl.py +12 -10
warp/examples/fem/example_adaptive_grid.py +251 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_diffusion_3d.py +2 -2
warp/examples/fem/example_magnetostatics.py +1 -1
warp/examples/fem/example_streamlines.py +1 -0
warp/examples/fem/utils.py +23 -4
warp/examples/sim/example_cloth.py +50 -6
warp/fem/__init__.py +2 -0
warp/fem/adaptivity.py +493 -0
warp/fem/field/field.py +2 -1
warp/fem/field/nodal_field.py +18 -26
warp/fem/field/test.py +4 -4
warp/fem/field/trial.py +4 -4
warp/fem/geometry/__init__.py +1 -0
warp/fem/geometry/adaptive_nanogrid.py +843 -0
warp/fem/geometry/nanogrid.py +55 -28
warp/fem/space/__init__.py +1 -1
warp/fem/space/nanogrid_function_space.py +69 -35
warp/fem/utils.py +113 -107
warp/jax_experimental.py +28 -15
warp/native/array.h +0 -1
warp/native/builtin.h +103 -6
warp/native/bvh.cu +2 -0
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/error.cpp +4 -2
warp/native/exports.h +99 -17
warp/native/mat.h +97 -0
warp/native/mesh.cpp +36 -0
warp/native/mesh.cu +51 -0
warp/native/mesh.h +1 -0
warp/native/quat.h +43 -0
warp/native/spatial.h +6 -0
warp/native/vec.h +74 -0
warp/native/warp.cpp +2 -1
warp/native/warp.cu +10 -3
warp/native/warp.h +8 -1
warp/paddle.py +382 -0
warp/sim/__init__.py +1 -0
warp/sim/collide.py +519 -0
warp/sim/integrator_euler.py +18 -5
warp/sim/integrator_featherstone.py +5 -5
warp/sim/integrator_vbd.py +1026 -0
warp/sim/model.py +49 -23
warp/stubs.py +459 -0
warp/tape.py +2 -0
warp/tests/aux_test_dependent.py +1 -0
warp/tests/aux_test_name_clash1.py +32 -0
warp/tests/aux_test_name_clash2.py +32 -0
warp/tests/aux_test_square.py +1 -0
warp/tests/test_array.py +222 -0
warp/tests/test_async.py +3 -3
warp/tests/test_atomic.py +6 -0
warp/tests/test_closest_point_edge_edge.py +93 -1
warp/tests/test_codegen.py +62 -15
warp/tests/test_codegen_instancing.py +1457 -0
warp/tests/test_collision.py +486 -0
warp/tests/test_compile_consts.py +3 -28
warp/tests/test_dlpack.py +170 -0
warp/tests/test_examples.py +22 -8
warp/tests/test_fast_math.py +10 -4
warp/tests/test_fem.py +64 -0
warp/tests/test_func.py +46 -0
warp/tests/test_implicit_init.py +49 -0
warp/tests/test_jax.py +58 -0
warp/tests/test_mat.py +84 -0
warp/tests/test_mesh_query_point.py +188 -0
warp/tests/test_module_hashing.py +40 -0
warp/tests/test_multigpu.py +3 -3
warp/tests/test_overwrite.py +8 -0
warp/tests/test_paddle.py +852 -0
warp/tests/test_print.py +89 -0
warp/tests/test_quat.py +111 -0
warp/tests/test_reload.py +31 -1
warp/tests/test_scalar_ops.py +2 -0
warp/tests/test_static.py +412 -0
warp/tests/test_streams.py +64 -3
warp/tests/test_struct.py +4 -4
warp/tests/test_torch.py +24 -0
warp/tests/test_triangle_closest_point.py +137 -0
warp/tests/test_types.py +1 -1
warp/tests/test_vbd.py +386 -0
warp/tests/test_vec.py +143 -0
warp/tests/test_vec_scalar_ops.py +139 -0
warp/tests/test_volume.py +30 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +9 -5
warp/thirdparty/dlpack.py +3 -1
warp/types.py +157 -34
warp/utils.py +37 -14
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/METADATA +10 -8
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/RECORD +106 -94
warp/tests/test_point_triangle_closest_point.py +0 -143
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/LICENSE.md +0 -0
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/WHEEL +0 -0
{warp_lang-1.3.2.dist-info → warp_lang-1.4.0.dist-info}/top_level.txt +0 -0

warp/tests/test_vec_scalar_ops.py CHANGED Viewed

@@ -192,12 +192,14 @@ def test_py_arithmetic_ops(test, device, dtype):
     test.assertSequenceEqual(-v, make_vec(-1, 2, -3))
     test.assertSequenceEqual(v + vec_cls(5, 5, 5), make_vec(6, 3, 8))
     test.assertSequenceEqual(v - vec_cls(5, 5, 5), make_vec(-4, -7, -2))
+    test.assertSequenceEqual(v % vec_cls(2, 2, 2), make_vec(1, 0, 1))
     v = vec_cls(2, 4, 6)
     test.assertSequenceEqual(v * wptype(2), make_vec(4, 8, 12))
     test.assertSequenceEqual(wptype(2) * v, make_vec(4, 8, 12))
     test.assertSequenceEqual(v / wptype(2), make_vec(1, 2, 3))
     test.assertSequenceEqual(wptype(24) / v, make_vec(12, 6, 4))
+    test.assertSequenceEqual(v % vec_cls(3, 3, 3), make_vec(2, 1, 0))
 def test_constructors(test, device, dtype, register_kernels=False):
@@ -1797,6 +1799,140 @@ def test_dotproduct(test, device, dtype, register_kernels=False):
         tape.zero()
+def test_modulo(test, device, dtype, register_kernels=False):
+    rng = np.random.default_rng(123)
+    tol = {
+        np.float16: 1.0e-2,
+        np.float32: 1.0e-6,
+        np.float64: 1.0e-8,
+    }.get(dtype, 0)
+    wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
+    vec2 = wp.types.vector(length=2, dtype=wptype)
+    vec3 = wp.types.vector(length=3, dtype=wptype)
+    vec4 = wp.types.vector(length=4, dtype=wptype)
+    vec5 = wp.types.vector(length=5, dtype=wptype)
+    def check_mod(
+        s2: wp.array(dtype=vec2),
+        s3: wp.array(dtype=vec3),
+        s4: wp.array(dtype=vec4),
+        s5: wp.array(dtype=vec5),
+        v2: wp.array(dtype=vec2),
+        v3: wp.array(dtype=vec3),
+        v4: wp.array(dtype=vec4),
+        v5: wp.array(dtype=vec5),
+        v20: wp.array(dtype=wptype),
+        v21: wp.array(dtype=wptype),
+        v30: wp.array(dtype=wptype),
+        v31: wp.array(dtype=wptype),
+        v32: wp.array(dtype=wptype),
+        v40: wp.array(dtype=wptype),
+        v41: wp.array(dtype=wptype),
+        v42: wp.array(dtype=wptype),
+        v43: wp.array(dtype=wptype),
+        v50: wp.array(dtype=wptype),
+        v51: wp.array(dtype=wptype),
+        v52: wp.array(dtype=wptype),
+        v53: wp.array(dtype=wptype),
+        v54: wp.array(dtype=wptype),
+    ):
+        v20[0] = (wptype(2) * wp.mod(v2[0], s2[0]))[0]
+        v21[0] = (wptype(2) * wp.mod(v2[0], s2[0]))[1]
+        v30[0] = (wptype(2) * wp.mod(v3[0], s3[0]))[0]
+        v31[0] = (wptype(2) * wp.mod(v3[0], s3[0]))[1]
+        v32[0] = (wptype(2) * wp.mod(v3[0], s3[0]))[2]
+        v40[0] = (wptype(2) * wp.mod(v4[0], s4[0]))[0]
+        v41[0] = (wptype(2) * wp.mod(v4[0], s4[0]))[1]
+        v42[0] = (wptype(2) * wp.mod(v4[0], s4[0]))[2]
+        v43[0] = (wptype(2) * wp.mod(v4[0], s4[0]))[3]
+        v50[0] = (wptype(2) * wp.mod(v5[0], s5[0]))[0]
+        v51[0] = (wptype(2) * wp.mod(v5[0], s5[0]))[1]
+        v52[0] = (wptype(2) * wp.mod(v5[0], s5[0]))[2]
+        v53[0] = (wptype(2) * wp.mod(v5[0], s5[0]))[3]
+        v54[0] = (wptype(2) * wp.mod(v5[0], s5[0]))[4]
+    kernel = getkernel(check_mod, suffix=dtype.__name__)
+    if register_kernels:
+        return
+    s2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
+    s3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
+    s4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
+    s5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
+    v2 = wp.array(randvals(rng, (1, 2), dtype), dtype=vec2, requires_grad=True, device=device)
+    v3 = wp.array(randvals(rng, (1, 3), dtype), dtype=vec3, requires_grad=True, device=device)
+    v4 = wp.array(randvals(rng, (1, 4), dtype), dtype=vec4, requires_grad=True, device=device)
+    v5 = wp.array(randvals(rng, (1, 5), dtype), dtype=vec5, requires_grad=True, device=device)
+    v20 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v21 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v30 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v31 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v32 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v40 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v41 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v42 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v43 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v50 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v51 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v52 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v53 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    v54 = wp.zeros(1, dtype=wptype, requires_grad=True, device=device)
+    tape = wp.Tape()
+    with tape:
+        wp.launch(
+            kernel,
+            dim=1,
+            inputs=[
+                s2,
+                s3,
+                s4,
+                s5,
+                v2,
+                v3,
+                v4,
+                v5,
+            ],
+            outputs=[
+                v20,
+                v21,
+                v30,
+                v31,
+                v32,
+                v40,
+                v41,
+                v42,
+                v43,
+                v50,
+                v51,
+                v52,
+                v53,
+                v54,
+            ],
+            device=device,
+        )
+    assert_np_equal(v20.numpy()[0], 2.0 * np.fmod(v2.numpy(), s2.numpy())[0, 0], tol=10 * tol)
+    assert_np_equal(v21.numpy()[0], 2.0 * np.fmod(v2.numpy(), s2.numpy())[0, 1], tol=10 * tol)
+    assert_np_equal(v30.numpy()[0], 2.0 * np.fmod(v3.numpy(), s3.numpy())[0, 0], tol=10 * tol)
+    assert_np_equal(v31.numpy()[0], 2.0 * np.fmod(v3.numpy(), s3.numpy())[0, 1], tol=10 * tol)
+    assert_np_equal(v32.numpy()[0], 2.0 * np.fmod(v3.numpy(), s3.numpy())[0, 2], tol=10 * tol)
+    assert_np_equal(v40.numpy()[0], 2.0 * np.fmod(v4.numpy(), s4.numpy())[0, 0], tol=10 * tol)
+    assert_np_equal(v41.numpy()[0], 2.0 * np.fmod(v4.numpy(), s4.numpy())[0, 1], tol=10 * tol)
+    assert_np_equal(v42.numpy()[0], 2.0 * np.fmod(v4.numpy(), s4.numpy())[0, 2], tol=10 * tol)
+    assert_np_equal(v43.numpy()[0], 2.0 * np.fmod(v4.numpy(), s4.numpy())[0, 3], tol=10 * tol)
+    assert_np_equal(v50.numpy()[0], 2.0 * np.fmod(v5.numpy(), s5.numpy())[0, 0], tol=10 * tol)
+    assert_np_equal(v51.numpy()[0], 2.0 * np.fmod(v5.numpy(), s5.numpy())[0, 1], tol=10 * tol)
+    assert_np_equal(v52.numpy()[0], 2.0 * np.fmod(v5.numpy(), s5.numpy())[0, 2], tol=10 * tol)
+    assert_np_equal(v53.numpy()[0], 2.0 * np.fmod(v5.numpy(), s5.numpy())[0, 3], tol=10 * tol)
+    assert_np_equal(v54.numpy()[0], 2.0 * np.fmod(v5.numpy(), s5.numpy())[0, 4], tol=10 * tol)
 def test_equivalent_types(test, device, dtype, register_kernels=False):
     wptype = wp.types.np_dtype_to_warp_type[np.dtype(dtype)]
@@ -2150,6 +2286,9 @@ for dtype in np_scalar_types:
     add_function_test_register_kernel(
         TestVecScalarOps, f"test_addition_{dtype.__name__}", test_addition, devices=devices, dtype=dtype
     )
+    add_function_test_register_kernel(
+        TestVecScalarOps, f"test_modulo_{dtype.__name__}", test_modulo, devices=devices, dtype=dtype
+    )
     add_function_test_register_kernel(
         TestVecScalarOps, f"test_dotproduct_{dtype.__name__}", test_dotproduct, devices=devices, dtype=dtype
     )

warp/tests/test_volume.py CHANGED Viewed

@@ -843,6 +843,33 @@ def test_volume_from_numpy(test, device):
     test.assertIsNone(sphere_vdb_array.deleter)
+def test_volume_from_numpy_3d(test, device):
+    # Volume.allocate_from_tiles() is only available with CUDA
+    mins = np.array([-3.0, -3.0, -3.0])
+    voxel_size = 0.2
+    maxs = np.array([3.0, 3.0, 3.0])
+    nums = np.ceil((maxs - mins) / (voxel_size)).astype(dtype=int)
+    centers = np.array([[-1.0, -1.0, -1.0], [0.0, 0.0, 0.0], [1.0, 1.0, 1.0]])
+    rad = 2.5
+    sphere_sdf_np = np.zeros(tuple(nums) + (3,))
+    for x in range(nums[0]):
+        for y in range(nums[1]):
+            for z in range(nums[2]):
+                for k in range(3):
+                    pos = mins + voxel_size * np.array([x, y, z])
+                    dis = np.linalg.norm(pos - centers[k])
+                    sphere_sdf_np[x, y, z, k] = dis - rad
+    sphere_vdb = wp.Volume.load_from_numpy(
+        sphere_sdf_np, mins, voxel_size, (rad + 3.0 * voxel_size,) * 3, device=device
+    )
+    test.assertNotEqual(sphere_vdb.id, 0)
+    sphere_vdb_array = sphere_vdb.array()
+    test.assertEqual(sphere_vdb_array.dtype, wp.uint8)
+    test.assertIsNone(sphere_vdb_array.deleter)
 def test_volume_aniso_transform(test, device):
     # XY-rotation + z scale
     transform = [
@@ -894,6 +921,9 @@ add_function_test(TestVolume, "test_volume_introspection", test_volume_introspec
 add_function_test(
     TestVolume, "test_volume_from_numpy", test_volume_from_numpy, devices=get_selected_cuda_test_devices()
 )
+add_function_test(
+    TestVolume, "test_volume_from_numpy_3d", test_volume_from_numpy_3d, devices=get_selected_cuda_test_devices()
+)
 add_function_test(
     TestVolume, "test_volume_aniso_transform", test_volume_aniso_transform, devices=get_selected_cuda_test_devices()
 )

warp/tests/unittest_suites.py CHANGED Viewed

@@ -98,6 +98,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_bvh import TestBvh
     from warp.tests.test_closest_point_edge_edge import TestClosestPointEdgeEdgeMethods
     from warp.tests.test_codegen import TestCodeGen
+    from warp.tests.test_codegen_instancing import TestCodeGenInstancing
     from warp.tests.test_compile_consts import TestConstants
     from warp.tests.test_conditional import TestConditional
     from warp.tests.test_copy import TestCopy
@@ -117,6 +118,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_fem import TestFem, TestFemShapeFunctions
     from warp.tests.test_fp16 import TestFp16
     from warp.tests.test_func import TestFunc
+    from warp.tests.test_future_annotations import TestFutureAnnotations
     from warp.tests.test_generics import TestGenerics
     from warp.tests.test_grad import TestGrad
     from warp.tests.test_grad_customs import TestGradCustoms
@@ -160,6 +162,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_reload import TestReload
     from warp.tests.test_rounding import TestRounding
     from warp.tests.test_runlength_encode import TestRunlengthEncode
+    from warp.tests.test_scalar_ops import TestScalarOps
     from warp.tests.test_sim_grad import TestSimGradients
     from warp.tests.test_sim_kinematics import TestSimKinematics
     from warp.tests.test_smoothstep import TestSmoothstep
@@ -172,8 +175,10 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
     from warp.tests.test_tape import TestTape
     from warp.tests.test_torch import TestTorch
     from warp.tests.test_transient_module import TestTransientModule
+    from warp.tests.test_triangle_closest_point import TestTriangleClosestPoint
     from warp.tests.test_types import TestTypes
     from warp.tests.test_utils import TestUtils
+    from warp.tests.test_vbd import TestVBD
     from warp.tests.test_vec import TestVec
     from warp.tests.test_vec_lite import TestVecLite
     from warp.tests.test_vec_scalar_ops import TestVecScalarOps
@@ -193,6 +198,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestBvh,
         TestClosestPointEdgeEdgeMethods,
         TestCodeGen,
+        TestCodeGenInstancing,
         TestConstants,
         TestConditional,
         TestCopy,
@@ -211,6 +217,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestFemShapeFunctions,
         TestFp16,
         TestFunc,
+        TestFutureAnnotations,
         TestGenerics,
         TestGrad,
         TestGradCustoms,
@@ -254,6 +261,7 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestReload,
         TestRounding,
         TestRunlengthEncode,
+        TestScalarOps,
         TestSimGradients,
         TestSimKinematics,
         TestSmoothstep,
@@ -266,8 +274,10 @@ def default_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader)
         TestTape,
         TestTorch,
         TestTransientModule,
+        TestTriangleClosestPoint,
         TestTypes,
         TestUtils,
+        TestVBD,
         TestVec,
         TestVecLite,
         TestVecScalarOps,
@@ -288,6 +298,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
     from warp.tests.test_array_reduce import TestArrayReduce
     from warp.tests.test_bvh import TestBvh
     from warp.tests.test_codegen import TestCodeGen
+    from warp.tests.test_codegen_instancing import TestCodeGenInstancing
     from warp.tests.test_compile_consts import TestConstants
     from warp.tests.test_conditional import TestConditional
     from warp.tests.test_ctypes import TestCTypes
@@ -332,6 +343,7 @@ def kit_suite(test_loader: unittest.TestLoader = unittest.defaultTestLoader):
         TestArrayReduce,
         TestBvh,
         TestCodeGen,
+        TestCodeGenInstancing,
         TestConstants,
         TestConditional,
         TestCTypes,

warp/tests/unittest_utils.py CHANGED Viewed

@@ -198,11 +198,15 @@ class CheckOutput:
         if s != "":
             print(s.rstrip())
-        # fail if test produces unexpected output (e.g.: from wp.expect_eq() builtins)
-        # we allow strings starting of the form "Module xxx load on device xxx"
-        # for lazy loaded modules
-        if s != "" and not s.startswith("Module"):
-            self.test.fail(f"Unexpected output:\n'{s.rstrip()}'")
+            # fail if test produces unexpected output (e.g.: from wp.expect_eq() builtins)
+            # we allow strings starting of the form "Module xxx load on device xxx"
+            # for lazy loaded modules
+            filtered_s = "\n".join(
+                [line for line in s.splitlines() if not (line.startswith("Module") and "load on device" in line)]
+            )
+            if filtered_s.strip():
+                self.test.fail(f"Unexpected output:\n'{s.rstrip()}'")
 def assert_array_equal(result: wp.array, expect: wp.array):

warp/thirdparty/dlpack.py CHANGED Viewed

@@ -58,6 +58,7 @@ class DLDataTypeCode(ctypes.c_uint8):
     kDLOpaquePointer = 3
     kDLBfloat = 4
     kDLComplex = 5
+    kDLBool = 6
     def __str__(self):
         return {
@@ -66,6 +67,7 @@ class DLDataTypeCode(ctypes.c_uint8):
             self.kDLFloat: "float",
             self.kDLBfloat: "bfloat",
             self.kDLComplex: "complex",
+            self.kDLBool: "bool",
             self.kDLOpaquePointer: "void_p",
         }[self.value]
@@ -85,7 +87,7 @@ class DLDataType(ctypes.Structure):
         ("lanes", ctypes.c_uint16),
     ]
     TYPE_MAP = {
-        "bool": (DLDataTypeCode.kDLUInt, 1, 1),
+        "bool": (DLDataTypeCode.kDLBool, 8, 1),
         "int8": (DLDataTypeCode.kDLInt, 8, 1),
         "int16": (DLDataTypeCode.kDLInt, 16, 1),
         "int32": (DLDataTypeCode.kDLInt, 32, 1),

warp/types.py CHANGED Viewed

@@ -66,8 +66,8 @@ def constant(x):
         x: Compile-time constant value, can be any of the built-in math types.
     """
-    if not isinstance(x, (builtins.bool, int, float, tuple(scalar_and_bool_types), ctypes.Array)):
-        raise RuntimeError(f"Invalid constant type: {type(x)}")
+    if not is_value(x):
+        raise TypeError(f"Invalid constant type: {type(x)}")
     return x
@@ -237,6 +237,12 @@ def vector(length, dtype):
         def __rtruediv__(self, x):
             return warp.div(x, self)
+        def __mod__(self, x):
+            return warp.mod(self, x)
+        def __rmod__(self, x):
+            return warp.mod(x, self)
         def __pos__(self):
             return warp.pos(self)
@@ -519,6 +525,12 @@ class scalar_base:
     def __rtruediv__(self, x):
         return warp.div(x, self)
+    def __mod__(self, x):
+        return warp.mod(self, x)
+    def __rmod__(self, x):
+        return warp.mod(x, self)
     def __pos__(self):
         return warp.pos(self)
@@ -979,6 +991,43 @@ vector_types = (
     spatial_matrixd,
 )
+atomic_vector_types = (
+    vec2i,
+    vec2ui,
+    vec2l,
+    vec2ul,
+    vec2h,
+    vec2f,
+    vec2d,
+    vec3i,
+    vec3ui,
+    vec3l,
+    vec3ul,
+    vec3h,
+    vec3f,
+    vec3d,
+    vec4i,
+    vec4ui,
+    vec4l,
+    vec4ul,
+    vec4h,
+    vec4f,
+    vec4d,
+    mat22h,
+    mat22f,
+    mat22d,
+    mat33h,
+    mat33f,
+    mat33d,
+    mat44h,
+    mat44f,
+    mat44d,
+    quath,
+    quatf,
+    quatd,
+)
+atomic_types = float_types + (int32, uint32, int64, uint64) + atomic_vector_types
 np_dtype_to_warp_type = {
     # Numpy scalar types
     np.bool_: bool,
@@ -1253,7 +1302,7 @@ def type_to_warp(dtype):
 def type_typestr(dtype):
     if dtype == bool:
-        return "?"
+        return "|b1"
     elif dtype == float16:
         return "<f2"
     elif dtype == float32:
@@ -1261,9 +1310,9 @@ def type_typestr(dtype):
     elif dtype == float64:
         return "<f8"
     elif dtype == int8:
-        return "b"
+        return "|i1"
     elif dtype == uint8:
-        return "B"
+        return "|u1"
     elif dtype == int16:
         return "<i2"
     elif dtype == uint16:
@@ -1335,7 +1384,7 @@ value_types = (int, float, builtins.bool) + scalar_types
 # returns true for all value types (int, float, bool, scalars, vectors, matrices)
 def type_is_value(x):
-    return x in value_types or issubclass(x, ctypes.Array)
+    return x in value_types or hasattr(x, "_wp_scalar_type_")
 # equivalent of the above but for values
@@ -1442,6 +1491,10 @@ def types_equal(a, b, match_generic=False):
     if is_array(a) and type(a) is type(b):
         return True
+    # match NewStructInstance and Struct dtype
+    if getattr(a, "cls", "a") is getattr(b, "cls", "b"):
+        return True
     return scalars_equal(a, b, match_generic)
@@ -1486,7 +1539,7 @@ def array_ctype_from_interface(interface: dict, dtype=None, owner=None):
         strides = strides_from_shape(shape, element_dtype)
     if dtype is None:
-        # accept verbatum
+        # accept verbatim
         pass
     elif hasattr(dtype, "_shape_"):
         # vector/matrix types, ensure element dtype matches
@@ -1601,6 +1654,9 @@ class array(Array):
         self._array_interface = None
         self.is_transposed = False
+        # reference to other array
+        self._ref = None
         # canonicalize dtype
         if dtype == int:
             dtype = int32
@@ -1652,9 +1708,6 @@ class array(Array):
                 if requires_grad:
                     self._alloc_grad()
-        # reference to other array
-        self._ref = None
     def _init_from_data(self, data, dtype, shape, device, copy, pinned):
         if not hasattr(data, "__len__"):
             raise RuntimeError(f"Data must be a sequence or array, got scalar {data}")
@@ -2005,24 +2058,27 @@ class array(Array):
         if self.device is None:
             raise RuntimeError("Array has no device assigned")
-        if self.device.is_cuda and stream != -1:
-            if not isinstance(stream, int):
-                raise TypeError("DLPack stream must be an integer or None")
-            # assume that the array is being used on its device's current stream
-            array_stream = self.device.stream
-            # the external stream should wait for outstanding operations to complete
-            if stream in (None, 0, 1):
-                external_stream = 0
-            else:
-                external_stream = stream
-            # Performance note: avoid wrapping the external stream in a temporary Stream object
-            if external_stream != array_stream.cuda_stream:
-                warp.context.runtime.core.cuda_stream_wait_stream(
-                    external_stream, array_stream.cuda_stream, array_stream.cached_event.cuda_event
-                )
+        # check if synchronization is needed
+        if stream != -1:
+            if self.device.is_cuda:
+                # validate stream argument
+                if stream is None:
+                    stream = 1  # legacy default stream
+                elif not isinstance(stream, int) or stream < -1:
+                    raise TypeError("DLPack stream must None or an integer >= -1")
+                # assume that the array is being used on its device's current stream
+                array_stream = self.device.stream
+                # Performance note: avoid wrapping the external stream in a temporary Stream object
+                if stream != array_stream.cuda_stream:
+                    warp.context.runtime.core.cuda_stream_wait_stream(
+                        stream, array_stream.cuda_stream, array_stream.cached_event.cuda_event
+                    )
+            elif self.device.is_cpu:
+                # on CPU, stream must be None or -1
+                if stream is not None:
+                    raise TypeError("DLPack stream must be None or -1 for CPU device")
         return warp.dlpack.to_dlpack(self)
@@ -2991,7 +3047,7 @@ class Mesh:
         Args:
             points (:class:`warp.array`): Array of vertex positions of type :class:`warp.vec3`
-            indices (:class:`warp.array`): Array of triangle indices of type :class:`warp.int32`, should be a 1d array with shape (num_tris, 3)
+            indices (:class:`warp.array`): Array of triangle indices of type :class:`warp.int32`, should be a 1d array with shape (num_tris * 3)
             velocities (:class:`warp.array`): Array of vertex velocities of type :class:`warp.vec3` (optional)
             support_winding_number (bool): If true the mesh will build additional datastructures to support `wp.mesh_query_point_sign_winding_number()` queries
         """
@@ -3012,8 +3068,8 @@ class Mesh:
             raise RuntimeError("Mesh indices should be a flattened 1d array of indices")
         self.device = points.device
-        self.points = points
-        self.velocities = velocities
+        self._points = points
+        self._velocities = velocities
         self.indices = indices
         self.runtime = warp.context.runtime
@@ -3058,6 +3114,72 @@ class Mesh:
             self.runtime.core.mesh_refit_device(self.id)
             self.runtime.verify_cuda_device(self.device)
+    @property
+    def points(self):
+        """The array of mesh's vertex positions of type :class:`warp.vec3`.
+        The `Mesh.points` property has a custom setter method. Users can modify the vertex positions in-place,
+        but the `refit()` method must be called manually after such modifications. Alternatively, assigning a new array
+        to this property is also supported. The new array must have the same shape as the original, and once assigned,
+        the `Mesh` class will automatically perform a refit operation based on the new vertex positions.
+        """
+        return self._points
+    @points.setter
+    def points(self, points_new):
+        if points_new.device != self._points.device:
+            raise RuntimeError(
+                "The new points and the original points must live on the same device, currently "
+                "the new points lives on {} while the old points lives on {}.".format(
+                    points_new.device, self._points.device
+                )
+            )
+        if points_new.ndim != 1 or points_new.shape[0] != self._points.shape[0]:
+            raise RuntimeError(
+                "the new points and the original points must have the same shape, currently new points shape is: {},"
+                " while the old points' shape is: {}".format(points_new.shape, self._points.shape)
+            )
+        self._points = points_new
+        if self.device.is_cpu:
+            self.runtime.core.mesh_set_points_host(self.id, points_new.__ctype__())
+        else:
+            self.runtime.core.mesh_set_points_device(self.id, points_new.__ctype__())
+            self.runtime.verify_cuda_device(self.device)
+    @property
+    def velocities(self):
+        """The array of mesh's velocities of type :class:`warp.vec3`.
+        This is a property with a custom setter method. Users can modify the velocities in-place,
+        or assigning a new array to this property. No refitting is needed for changing velocities.
+        """
+        return self._velocities
+    @velocities.setter
+    def velocities(self, velocities_new):
+        if velocities_new.device != self._velocities.device:
+            raise RuntimeError(
+                "The new points and the original points must live on the same device, currently "
+                "the new points lives on {} while the old points lives on {}.".format(
+                    velocities_new.device, self._velocities.device
+                )
+            )
+        if velocities_new.ndim != 1 or velocities_new.shape[0] != self._velocities.shape[0]:
+            raise RuntimeError(
+                "the new points and the original points must have the same shape, currently new points shape is: {},"
+                " while the old points' shape is: {}".format(velocities_new.shape, self._velocities.shape)
+            )
+        self._velocities = velocities_new
+        if self.device.is_cpu:
+            self.runtime.core.mesh_set_velocities_host(self.id, velocities_new.__ctype__())
+        else:
+            self.runtime.core.mesh_set_velocities_device(self.id, velocities_new.__ctype__())
+            self.runtime.verify_cuda_device(self.device)
 class Volume:
     #: Enum value to specify nearest-neighbor interpolation during sampling
@@ -3529,8 +3651,9 @@ class Volume:
         )
         if hasattr(bg_value, "__len__"):
             # vec3, assuming the numpy array is 4D
-            padded_array = np.array((target_shape[0], target_shape[1], target_shape[2], 3), dtype=np.single)
-            padded_array[:, :, :, :] = np.array(bg_value)
+            padded_array = np.full(
+                shape=(target_shape[0], target_shape[1], target_shape[2], 3), fill_value=bg_value, dtype=np.single
+            )
             padded_array[0 : ndarray.shape[0], 0 : ndarray.shape[1], 0 : ndarray.shape[2], :] = ndarray
         else:
             padded_amount = (
@@ -5024,7 +5147,7 @@ def get_type_code(arg_type):
     elif isinstance(arg_type, indexedfabricarray):
         return f"ifa{arg_type.ndim}{get_type_code(arg_type.dtype)}"
     elif isinstance(arg_type, warp.codegen.Struct):
-        return warp.codegen.make_full_qualified_name(arg_type.cls)
+        return arg_type.native_name
     elif arg_type == Scalar:
         # generic scalar type
         return "s?"