PyPI - warp-lang - Versions diffs - 1.7.2rc1__py3-none-win_amd64.whl → 1.8.0__py3-none-win_amd64.whl - Mend

warp-lang 1.7.2rc1__py3-none-win_amd64.whl → 1.8.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (181) hide show

warp/__init__.py +3 -1
warp/__init__.pyi +3489 -1
warp/autograd.py +45 -122
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +241 -252
warp/build_dll.py +125 -26
warp/builtins.py +1907 -384
warp/codegen.py +257 -101
warp/config.py +12 -1
warp/constants.py +1 -1
warp/context.py +657 -223
warp/dlpack.py +1 -1
warp/examples/benchmarks/benchmark_cloth.py +2 -2
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/core/example_sample_mesh.py +1 -1
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/fem/example_adaptive_grid.py +5 -5
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +1 -1
warp/examples/fem/example_convection_diffusion.py +9 -6
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_diffusion.py +2 -2
warp/examples/fem/example_diffusion_3d.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +5 -3
warp/examples/fem/example_mixed_elasticity.py +5 -3
warp/examples/fem/example_navier_stokes.py +11 -9
warp/examples/fem/example_nonconforming_contact.py +5 -3
warp/examples/fem/example_streamlines.py +8 -3
warp/examples/fem/utils.py +9 -8
warp/examples/interop/example_jax_ffi_callback.py +2 -2
warp/examples/optim/example_drone.py +1 -1
warp/examples/sim/example_cloth.py +1 -1
warp/examples/sim/example_cloth_self_contact.py +48 -54
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +2 -1
warp/examples/tile/example_tile_convolution.py +1 -1
warp/examples/tile/example_tile_filtering.py +1 -1
warp/examples/tile/example_tile_matmul.py +1 -1
warp/examples/tile/example_tile_mlp.py +2 -0
warp/fabric.py +7 -7
warp/fem/__init__.py +5 -0
warp/fem/adaptivity.py +1 -1
warp/fem/cache.py +152 -63
warp/fem/dirichlet.py +2 -2
warp/fem/domain.py +136 -6
warp/fem/field/field.py +141 -99
warp/fem/field/nodal_field.py +85 -39
warp/fem/field/virtual.py +97 -52
warp/fem/geometry/adaptive_nanogrid.py +91 -86
warp/fem/geometry/closest_point.py +13 -0
warp/fem/geometry/deformed_geometry.py +102 -40
warp/fem/geometry/element.py +56 -2
warp/fem/geometry/geometry.py +323 -22
warp/fem/geometry/grid_2d.py +157 -62
warp/fem/geometry/grid_3d.py +116 -20
warp/fem/geometry/hexmesh.py +86 -20
warp/fem/geometry/nanogrid.py +166 -86
warp/fem/geometry/partition.py +59 -25
warp/fem/geometry/quadmesh.py +86 -135
warp/fem/geometry/tetmesh.py +47 -119
warp/fem/geometry/trimesh.py +77 -270
warp/fem/integrate.py +107 -52
warp/fem/linalg.py +25 -58
warp/fem/operator.py +124 -27
warp/fem/quadrature/pic_quadrature.py +36 -14
warp/fem/quadrature/quadrature.py +40 -16
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +66 -46
warp/fem/space/basis_space.py +17 -4
warp/fem/space/dof_mapper.py +1 -1
warp/fem/space/function_space.py +2 -2
warp/fem/space/grid_2d_function_space.py +4 -1
warp/fem/space/hexmesh_function_space.py +4 -2
warp/fem/space/nanogrid_function_space.py +3 -1
warp/fem/space/partition.py +11 -2
warp/fem/space/quadmesh_function_space.py +4 -1
warp/fem/space/restriction.py +5 -2
warp/fem/space/shape/__init__.py +10 -8
warp/fem/space/tetmesh_function_space.py +4 -1
warp/fem/space/topology.py +52 -21
warp/fem/space/trimesh_function_space.py +4 -1
warp/fem/utils.py +53 -8
warp/jax.py +1 -2
warp/jax_experimental/ffi.py +12 -17
warp/jax_experimental/xla_ffi.py +37 -24
warp/math.py +171 -1
warp/native/array.h +99 -0
warp/native/builtin.h +174 -31
warp/native/coloring.cpp +1 -1
warp/native/exports.h +118 -63
warp/native/intersect.h +3 -3
warp/native/mat.h +5 -10
warp/native/mathdx.cpp +11 -5
warp/native/matnn.h +1 -123
warp/native/quat.h +28 -4
warp/native/sparse.cpp +121 -258
warp/native/sparse.cu +181 -274
warp/native/spatial.h +305 -17
warp/native/tile.h +583 -72
warp/native/tile_radix_sort.h +1108 -0
warp/native/tile_reduce.h +237 -2
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +6 -16
warp/native/warp.cpp +36 -4
warp/native/warp.cu +574 -51
warp/native/warp.h +47 -74
warp/optim/linear.py +5 -1
warp/paddle.py +7 -8
warp/py.typed +0 -0
warp/render/render_opengl.py +58 -29
warp/render/render_usd.py +124 -61
warp/sim/__init__.py +9 -0
warp/sim/collide.py +252 -78
warp/sim/graph_coloring.py +8 -1
warp/sim/import_mjcf.py +4 -3
warp/sim/import_usd.py +11 -7
warp/sim/integrator.py +5 -2
warp/sim/integrator_euler.py +1 -1
warp/sim/integrator_featherstone.py +1 -1
warp/sim/integrator_vbd.py +751 -320
warp/sim/integrator_xpbd.py +1 -1
warp/sim/model.py +265 -260
warp/sim/utils.py +10 -7
warp/sparse.py +303 -166
warp/tape.py +52 -51
warp/tests/cuda/test_conditional_captures.py +1046 -0
warp/tests/cuda/test_streams.py +1 -1
warp/tests/geometry/test_volume.py +2 -2
warp/tests/interop/test_dlpack.py +9 -9
warp/tests/interop/test_jax.py +0 -1
warp/tests/run_coverage_serial.py +1 -1
warp/tests/sim/disabled_kinematics.py +2 -2
warp/tests/sim/{test_vbd.py → test_cloth.py} +296 -113
warp/tests/sim/test_collision.py +159 -51
warp/tests/sim/test_coloring.py +15 -1
warp/tests/test_array.py +254 -2
warp/tests/test_array_reduce.py +2 -2
warp/tests/test_atomic_cas.py +299 -0
warp/tests/test_codegen.py +142 -19
warp/tests/test_conditional.py +47 -1
warp/tests/test_ctypes.py +0 -20
warp/tests/test_devices.py +8 -0
warp/tests/test_fabricarray.py +4 -2
warp/tests/test_fem.py +58 -25
warp/tests/test_func.py +42 -1
warp/tests/test_grad.py +1 -1
warp/tests/test_lerp.py +1 -3
warp/tests/test_map.py +481 -0
warp/tests/test_mat.py +1 -24
warp/tests/test_quat.py +6 -15
warp/tests/test_rounding.py +10 -38
warp/tests/test_runlength_encode.py +7 -7
warp/tests/test_smoothstep.py +1 -1
warp/tests/test_sparse.py +51 -2
warp/tests/test_spatial.py +507 -1
warp/tests/test_struct.py +2 -2
warp/tests/test_tuple.py +265 -0
warp/tests/test_types.py +2 -2
warp/tests/test_utils.py +24 -18
warp/tests/tile/test_tile.py +420 -1
warp/tests/tile/test_tile_mathdx.py +518 -14
warp/tests/tile/test_tile_reduce.py +213 -0
warp/tests/tile/test_tile_shared_memory.py +130 -1
warp/tests/tile/test_tile_sort.py +117 -0
warp/tests/unittest_suites.py +4 -6
warp/types.py +462 -308
warp/utils.py +647 -86
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/METADATA +20 -6
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/RECORD +178 -166
warp/stubs.py +0 -3381
warp/tests/sim/test_xpbd.py +0 -399
warp/tests/test_mlp.py +0 -282
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/WHEEL +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/top_level.txt +0 -0

warp/dlpack.py CHANGED Viewed

@@ -65,7 +65,7 @@ class _DLPackTensorHolder:
     """Class responsible for deleting DLManagedTensor memory after ownership is transferred from a capsule."""
     def __new__(cls, *args, **kwargs):
-        instance = super(_DLPackTensorHolder, cls).__new__(cls)
+        instance = super().__new__(cls)
         instance.mem_ptr = None
         return instance

warp/examples/benchmarks/benchmark_cloth.py CHANGED Viewed

@@ -243,7 +243,7 @@ def run_benchmark(mode, dim, timers, render=False):
             # run one warm-up iteration to accurately measure initialization time (some engines do lazy init)
             positions = integrator.simulate(sim_dt, sim_substeps)
-    label = "Dim ({}^2)".format(dim)
+    label = f"Dim ({dim}^2)"
     # run simulation
     for _i in range(sim_frames):
@@ -275,7 +275,7 @@ run_benchmark(mode, 128, timers, render=False)
 # write results
 for k, v in timers.items():
-    print("{:16} min: {:8.2f} max: {:8.2f} avg: {:8.2f}".format(k, np.min(v), np.max(v), np.mean(v)))
+    print(f"{k:16} min: {np.min(v):8.2f} max: {np.max(v):8.2f} avg: {np.mean(v):8.2f}")
 report = open(os.path.join("benchmark.csv"), "a")
 writer = csv.writer(report, delimiter=",")

warp/examples/benchmarks/benchmark_tile_sort.py ADDED Viewed

@@ -0,0 +1,155 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+import numpy as np
+import warp as wp
+BLOCK_DIM = 128
+def create_test_kernel(KEY_TYPE, MAX_SORT_LENGTH):
+    @wp.kernel
+    def tile_sort_kernel(
+        input_keys: wp.array(dtype=KEY_TYPE, ndim=2),
+        input_values: wp.array(dtype=wp.int32, ndim=2),
+        output_keys: wp.array(dtype=KEY_TYPE, ndim=2),
+        output_values: wp.array(dtype=wp.int32, ndim=2),
+    ):
+        batch_id, i = wp.tid()
+        # Load input into shared memory
+        keys = wp.tile_load(input_keys[batch_id], shape=MAX_SORT_LENGTH, storage="shared")
+        values = wp.tile_load(input_values[batch_id], shape=MAX_SORT_LENGTH, storage="shared")
+        # Perform in-place sorting
+        wp.tile_sort(keys, values)
+        # Store sorted shared memory into output arrays
+        wp.tile_store(output_keys[batch_id], keys)
+        wp.tile_store(output_values[batch_id], values)
+    return tile_sort_kernel
+if __name__ == "__main__":
+    wp.config.quiet = True
+    wp.init()
+    wp.clear_kernel_cache()
+    wp.set_module_options({"fast_math": True, "enable_backward": False})
+    iterations = 100
+    rng = np.random.default_rng(42)
+    shared_benchmark_data = {}
+    cub_segmented_sort_benchmark_data = {}
+    array_length = list(range(16, 257, 16))
+    print(
+        f"{'Type':<12s} {'Batch Size':<12s} {'Length':<12s} {'Tile Sort (ms)':<16s} {'Cub Segmented Sort (ms)':<24s} {'CubTime/TileTime':<16s}"
+    )
+    print("-" * 100)
+    for dtype in [int, float]:
+        for batch_size_exponent in range(5, 11):
+            batch_size = 2**batch_size_exponent
+            for length in array_length:
+                if dtype == int:
+                    np_keys = rng.choice(1000000000, size=(batch_size, length), replace=False)
+                else:  # dtype == float
+                    np_keys = rng.choice(1000000, size=(batch_size, length), replace=False).astype(np.float32)
+                np_values = np.tile(np.arange(length), (batch_size, 1))
+                # Sort using NumPy for validation
+                np_sorted_keys = np.zeros_like(np_keys)
+                np_sorted_values = np.zeros_like(np_values)
+                for b in range(batch_size):
+                    sorted_indices = np.argsort(np_keys[b])
+                    np_sorted_keys[b] = np_keys[b][sorted_indices]
+                    np_sorted_values[b] = np_values[b][sorted_indices]
+                # Generate random keys and iota indexer
+                input_keys = wp.array(np_keys, dtype=dtype, ndim=2, device="cuda")
+                input_values = wp.array(np_values, dtype=int, ndim=2, device="cuda")
+                output_keys = wp.zeros_like(input_keys, device="cuda")
+                output_values = wp.zeros_like(input_values, device="cuda")
+                kernel = create_test_kernel(dtype, length)
+                cmd = wp.launch_tiled(
+                    kernel,
+                    dim=batch_size,
+                    inputs=[input_keys, input_values, output_keys, output_values],
+                    block_dim=BLOCK_DIM,
+                    record_cmd=True,
+                )
+                # Warmup
+                for _ in range(5):
+                    cmd.launch()
+                with wp.ScopedTimer("benchmark", cuda_filter=wp.TIMING_KERNEL, print=False, synchronize=True) as timer:
+                    for _ in range(iterations):
+                        cmd.launch()
+                    wp.synchronize()
+                if dtype == int:
+                    keys_match = np.array_equal(output_keys.numpy(), np_sorted_keys)
+                else:  # dtype == float
+                    keys_match = np.allclose(output_keys.numpy(), np_sorted_keys, atol=1e-6)  # Use tolerance for floats
+                values_match = np.array_equal(output_values.numpy(), np_sorted_values)
+                # Validate results
+                assert keys_match, f"Key sorting mismatch for dtype={dtype}!"
+                assert values_match, f"Value sorting mismatch for dtype={dtype}!"
+                timing_results = [result.elapsed for result in timer.timing_results]
+                mean_timing = np.mean(timing_results)
+                shared_benchmark_data[length] = mean_timing
+                # Allocate memory
+                input_keys = wp.zeros(shape=(batch_size * 2, length), dtype=dtype, device="cuda")
+                input_values = wp.zeros(shape=(batch_size * 2, length), dtype=int, device="cuda")
+                # Copy data
+                input_keys.assign(np_keys)
+                input_values.assign(np_values)
+                input_keys = input_keys.reshape(-1)
+                input_values = input_values.reshape(-1)
+                segments = wp.array(np.arange(0, batch_size + 1) * length, dtype=int, device="cuda")
+                # Compare with cub segmented radix sort
+                # Warmup
+                for _ in range(5):
+                    wp.utils.segmented_sort_pairs(input_keys, input_values, batch_size * length, segments)
+                t1 = time.time_ns()
+                for _ in range(iterations):
+                    wp.utils.segmented_sort_pairs(input_keys, input_values, batch_size * length, segments)
+                wp.synchronize()
+                t2 = time.time_ns()
+                cub_segmented_sort_benchmark_data[length] = (t2 - t1) / (1_000_000 * iterations)
+                # Print results
+                print(
+                    f"{dtype!s:<12s} {batch_size:<12d} {length:<12d} {shared_benchmark_data[length]:<16.4g} {cub_segmented_sort_benchmark_data[length]:<24.4g} {cub_segmented_sort_benchmark_data[length] / shared_benchmark_data[length]:<16.4g}"
+                )

warp/examples/core/example_sample_mesh.py CHANGED Viewed

@@ -176,7 +176,7 @@ def sample_mesh(
     sample = wp.randf(rng)
     tri = wp.lower_bound(cdf, sample)
-    # Sample the location in that triangle using random barycentric cordinates.
+    # Sample the location in that triangle using random barycentric coordinates.
     ru = wp.randf(rng)
     rv = wp.randf(rng)
     tri_u = 1.0 - wp.sqrt(ru)

warp/examples/core/example_spin_lock.py ADDED Viewed

@@ -0,0 +1,93 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+# Example Spin Lock
+#
+# Shows how to use a spin lock to synchronize access to a shared resource.
+#
+###########################################################################
+import warp as wp
+from warp.tests.unittest_utils import *
+@wp.func
+def spinlock_acquire(lock: wp.array(dtype=wp.int32)):
+    # Try to acquire the lock by setting it to 1 if it's 0
+    while wp.atomic_cas(lock, 0, 0, 1) == 1:
+        pass
+@wp.func
+def spinlock_release(lock: wp.array(dtype=wp.int32)):
+    # Release the lock by setting it back to 0
+    wp.atomic_exch(lock, 0, 0)
+@wp.func
+def volatile_read(ptr: wp.array(dtype=wp.int32), index: int):
+    value = wp.atomic_exch(ptr, index, 0)
+    wp.atomic_exch(ptr, index, value)
+    return value
+@wp.kernel
+def test_spinlock_counter(
+    counter: wp.array(dtype=wp.int32), atomic_counter: wp.array(dtype=wp.int32), lock: wp.array(dtype=wp.int32)
+):
+    # Try to acquire the lock
+    spinlock_acquire(lock)
+    # Critical section - increment counter
+    # counter[0] = counter[0] + 1 # This gives wrong results - counter should be marked as volatile
+    # Work around since warp arrays cannot be marked as volatile
+    value = volatile_read(counter, 0)
+    counter[0] = value + 1
+    # Release the lock
+    spinlock_release(lock)
+    # Increment atomic counter for comparison
+    wp.atomic_add(atomic_counter, 0, 1)
+def test_spinlock(device):
+    # Create a lock array initialized to 0 (unlocked)
+    lock = wp.array([0], dtype=wp.int32, device=device)
+    # Create counter arrays initialized to 0
+    counter = wp.array([0], dtype=wp.int32, device=device)
+    atomic_counter = wp.array([0], dtype=wp.int32, device=device)
+    # Number of threads to test with
+    n = 1024
+    # Launch the test kernel
+    wp.launch(test_spinlock_counter, dim=n, inputs=[counter, atomic_counter, lock], device=device)
+    # Verify results
+    assert atomic_counter.numpy()[0] == n, f"Atomic counter should be {n}, got {atomic_counter.numpy()[0]}"
+    assert counter.numpy()[0] == n, f"Counter should be {n}, got {counter.numpy()[0]}"
+    assert lock.numpy()[0] == 0, "Lock was not properly released"
+    print(f"Final counter value: {counter.numpy()[0]}")
+    print(f"Final atomic counter value: {atomic_counter.numpy()[0]}")
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    test_spinlock(device="cuda")

warp/examples/core/example_work_queue.py ADDED Viewed

@@ -0,0 +1,118 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+###########################################################################
+# Example Work Queue
+#
+# Shows how to use a work queue to synchronize access to a shared resource.
+#
+###########################################################################
+import warp as wp
+from warp.tests.unittest_utils import *
+@wp.func
+def volatile_read(ptr: wp.array(dtype=wp.int32), index: int):
+    value = wp.atomic_add(ptr, index, 0)
+    return value
+@wp.struct
+class WorkQueue:
+    buffer: wp.array(dtype=wp.int32)
+    capacity: int
+    head: wp.array(dtype=wp.int32)
+    tail: wp.array(dtype=wp.int32)
+@wp.func
+def enqueue(queue: WorkQueue, item: int) -> bool:
+    while True:
+        # Read current head and tail atomically
+        current_tail = volatile_read(queue.tail, 0)
+        current_head = volatile_read(queue.head, 0)
+        # Check if queue is full
+        if (current_tail - current_head) >= queue.capacity:
+            return False
+        # Try to increment tail atomically
+        index = current_tail % queue.capacity
+        if wp.atomic_cas(queue.tail, 0, current_tail, current_tail + 1) == current_tail:
+            queue.buffer[index] = item
+            return True
+        # Retry if another thread changed tail
+@wp.func
+def dequeue(queue: WorkQueue) -> tuple[bool, int]:
+    while True:
+        # Read current head and tail atomically
+        current_head = volatile_read(queue.head, 0)
+        current_tail = volatile_read(queue.tail, 0)
+        # Check if queue is empty
+        if current_head >= current_tail:
+            return False, 0
+        # Get item at current head
+        index = current_head % queue.capacity
+        item = queue.buffer[index]
+        # Try to increment head atomically
+        if wp.atomic_cas(queue.head, 0, current_head, current_head + 1) == current_head:
+            return True, item
+        # Retry if another thread changed head
+@wp.kernel
+def process_queue(queue: WorkQueue):
+    counter = int(0)
+    while True:
+        success, item = dequeue(queue)
+        if not success:
+            break
+        wp.printf("Processed item: %d\n", item)
+        if item < 1000000:
+            if not enqueue(queue, item + 1000000):
+                wp.printf("Failed to enqueue item: %d\n", item + 1000000)
+        counter = counter + 1
+def test_work_queue(device):
+    # Create a work queue with capacity 1024
+    capacity = 8192
+    head = wp.array([0], dtype=wp.int32, device=device)
+    tail = wp.array([4096], dtype=wp.int32, device=device)
+    buffer = wp.array(np.arange(4096, dtype=np.int32), dtype=wp.int32, device=device)
+    queue = WorkQueue()
+    queue.capacity = capacity
+    queue.head = head
+    queue.tail = tail
+    queue.buffer = buffer
+    # Launch processing kernel
+    wp.launch(process_queue, dim=1024, inputs=[queue], device=device)
+    wp.synchronize()
+if __name__ == "__main__":
+    wp.clear_kernel_cache()
+    test_work_queue(device="cuda")

warp/examples/fem/example_adaptive_grid.py CHANGED Viewed

@@ -70,7 +70,7 @@ def mass_form(
 @fem.integrand
 def side_divergence_form(s: fem.Sample, domain: fem.Domain, u: fem.Field, psi: fem.Field):
     # normal velocity jump (non-zero at resolution boundaries)
-    return -wp.dot(fem.jump(u, s), fem.normal(domain, s)) * psi(s)
+    return -wp.dot(fem.jump(u, s), fem.normal(domain, s)) * fem.average(psi, s)
 @wp.func
@@ -173,7 +173,7 @@ class Example:
         bd_test = fem.make_test(u_space, domain=boundary)
         bd_trial = fem.make_trial(u_space, domain=boundary)
         dirichlet_projector = fem.integrate(
-            noslip_projector_form, fields={"u": bd_test, "v": bd_trial}, nodal=True, output_dtype=float
+            noslip_projector_form, fields={"u": bd_test, "v": bd_trial}, assembly="nodal", output_dtype=float
         )
         fem.normalize_dirichlet_projector(dirichlet_projector)
@@ -187,7 +187,7 @@ class Example:
             rho_trial = fem.make_trial(rho_space)
         inv_mass_matrix = fem.integrate(
-            mass_form, fields={"u": rho_trial, "v": rho_test}, nodal=True, output_dtype=float
+            mass_form, fields={"u": rho_trial, "v": rho_test}, assembly="nodal", output_dtype=float
         )
         fem_example_utils.invert_diagonal_bsr_matrix(inv_mass_matrix)
@@ -269,8 +269,8 @@ if __name__ == "__main__":
                 stage = Usd.Stage.Open(os.path.join(warp.examples.get_asset_directory(), "rocks.usd"))
                 mesh = UsdGeom.Mesh(stage.GetPrimAtPath("/root/rocks"))
-                points = np.array((mesh.GetPointsAttr().Get()))
-                counts = np.array((mesh.GetFaceVertexCountsAttr().Get()))
+                points = np.array(mesh.GetPointsAttr().Get())
+                counts = np.array(mesh.GetFaceVertexCountsAttr().Get())
                 indices = np.array(mesh.GetFaceVertexIndicesAttr().Get())
                 ref_geom = (points, counts, indices)
             except Exception:

warp/examples/fem/example_apic_fluid.py CHANGED Viewed

@@ -290,7 +290,7 @@ class Example:
                 vel_projector = fem.integrate(
                     velocity_boundary_projector_form,
                     fields={"u": velocity_trial, "v": velocity_test},
-                    nodal=True,
+                    assembly="nodal",
                     output_dtype=float,
                 )
                 fem.normalize_dirichlet_projector(vel_projector)

warp/examples/fem/example_burgers.py CHANGED Viewed

@@ -146,7 +146,7 @@ class Example:
         # For simplicity, use nodal integration so that inertia matrix is diagonal
         trial = fem.make_trial(space=vector_space, domain=domain)
         matrix_inertia = fem.integrate(
-            vel_mass_form, fields={"u": trial, "v": self._test}, output_dtype=wp.float32, nodal=True
+            vel_mass_form, fields={"u": trial, "v": self._test}, output_dtype=wp.float32, assembly="nodal"
         )
         self._inv_mass_matrix = wp.sparse.bsr_copy(matrix_inertia)
         fem_example_utils.invert_diagonal_bsr_matrix(self._inv_mass_matrix)

warp/examples/fem/example_convection_diffusion.py CHANGED Viewed

@@ -82,7 +82,7 @@ def diffusion_and_inertia_form(s: fem.Sample, phi: fem.Field, psi: fem.Field, dt
 class Example:
-    def __init__(self, quiet=False, degree=2, resolution=50, tri_mesh=False, viscosity=0.001, ang_vel=1.0):
+    def __init__(self, quiet=False, degree=2, resolution=50, mesh: str = "grid", viscosity=0.001, ang_vel=1.0):
         self._quiet = quiet
         self._ang_vel = ang_vel
@@ -91,11 +91,14 @@ class Example:
         self.sim_dt = 1.0 / (ang_vel * res)
         self.current_frame = 0
-        if tri_mesh:
-            positions, tri_vidx = fem_example_utils.gen_trimesh(res=wp.vec2i(res))
+        if mesh == "tri":
+            positions, tri_vidx = fem_example_utils.gen_trimesh(res=wp.vec2i(resolution))
             geo = fem.Trimesh2D(tri_vertex_indices=tri_vidx, positions=positions, build_bvh=True)
+        elif mesh == "quad":
+            positions, quad_vidx = fem_example_utils.gen_quadmesh(res=wp.vec2i(resolution))
+            geo = fem.Quadmesh2D(quad_vertex_indices=quad_vidx, positions=positions, build_bvh=True)
         else:
-            geo = fem.Grid2D(res=wp.vec2i(res))
+            geo = fem.Grid2D(res=wp.vec2i(resolution))
         domain = fem.Cells(geometry=geo)
         scalar_space = fem.make_polynomial_space(geo, degree=degree)
@@ -149,7 +152,7 @@ if __name__ == "__main__":
     parser.add_argument("--num_frames", type=int, default=250, help="Total number of frames.")
     parser.add_argument("--viscosity", type=float, default=0.001, help="Fluid viscosity parameter.")
     parser.add_argument("--ang_vel", type=float, default=1.0, help="Angular velocity.")
-    parser.add_argument("--tri_mesh", action="store_true", help="Use a triangular mesh.")
+    parser.add_argument("--mesh", choices=("grid", "tri", "quad"), default="grid", help="Mesh type.")
     parser.add_argument(
         "--headless",
         action="store_true",
@@ -164,7 +167,7 @@ if __name__ == "__main__":
             quiet=args.quiet,
             degree=args.degree,
             resolution=args.resolution,
-            tri_mesh=args.tri_mesh,
+            mesh=args.mesh,
             viscosity=args.viscosity,
             ang_vel=args.ang_vel,
         )