PyPI - warp-lang - Versions diffs - 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl - Mend

warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (134) hide show

warp/__init__.py +282 -103
warp/__init__.pyi +482 -110
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +93 -30
warp/build_dll.py +47 -67
warp/builtins.py +955 -137
warp/codegen.py +312 -206
warp/config.py +1 -1
warp/context.py +1249 -784
warp/examples/core/example_marching_cubes.py +1 -0
warp/examples/core/example_render_opengl.py +100 -3
warp/examples/fem/example_apic_fluid.py +98 -52
warp/examples/fem/example_convection_diffusion_dg.py +25 -4
warp/examples/fem/example_diffusion_mgpu.py +8 -3
warp/examples/fem/utils.py +68 -22
warp/fabric.py +1 -1
warp/fem/cache.py +27 -19
warp/fem/domain.py +2 -2
warp/fem/field/nodal_field.py +2 -2
warp/fem/field/virtual.py +264 -166
warp/fem/geometry/geometry.py +5 -5
warp/fem/integrate.py +129 -51
warp/fem/space/restriction.py +4 -0
warp/fem/space/shape/tet_shape_function.py +3 -10
warp/jax_experimental/custom_call.py +1 -1
warp/jax_experimental/ffi.py +2 -1
warp/marching_cubes.py +708 -0
warp/native/array.h +99 -4
warp/native/builtin.h +82 -5
warp/native/bvh.cpp +64 -28
warp/native/bvh.cu +58 -58
warp/native/bvh.h +2 -2
warp/native/clang/clang.cpp +7 -7
warp/native/coloring.cpp +8 -2
warp/native/crt.cpp +2 -2
warp/native/crt.h +3 -5
warp/native/cuda_util.cpp +41 -10
warp/native/cuda_util.h +10 -4
warp/native/exports.h +1842 -1908
warp/native/fabric.h +2 -1
warp/native/hashgrid.cpp +37 -37
warp/native/hashgrid.cu +2 -2
warp/native/initializer_array.h +1 -1
warp/native/intersect.h +2 -2
warp/native/mat.h +1910 -116
warp/native/mathdx.cpp +43 -43
warp/native/mesh.cpp +24 -24
warp/native/mesh.cu +26 -26
warp/native/mesh.h +4 -2
warp/native/nanovdb/GridHandle.h +179 -12
warp/native/nanovdb/HostBuffer.h +8 -7
warp/native/nanovdb/NanoVDB.h +517 -895
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +2 -2
warp/native/quat.h +331 -14
warp/native/range.h +7 -1
warp/native/reduce.cpp +10 -10
warp/native/reduce.cu +13 -14
warp/native/runlength_encode.cpp +2 -2
warp/native/runlength_encode.cu +5 -5
warp/native/scan.cpp +3 -3
warp/native/scan.cu +4 -4
warp/native/sort.cpp +10 -10
warp/native/sort.cu +22 -22
warp/native/sparse.cpp +8 -8
warp/native/sparse.cu +13 -13
warp/native/spatial.h +366 -17
warp/native/temp_buffer.h +2 -2
warp/native/tile.h +283 -69
warp/native/vec.h +381 -14
warp/native/volume.cpp +54 -54
warp/native/volume.cu +1 -1
warp/native/volume.h +2 -1
warp/native/volume_builder.cu +30 -37
warp/native/warp.cpp +150 -149
warp/native/warp.cu +323 -192
warp/native/warp.h +227 -226
warp/optim/linear.py +736 -271
warp/render/imgui_manager.py +289 -0
warp/render/render_opengl.py +85 -6
warp/sim/graph_coloring.py +2 -2
warp/sparse.py +558 -175
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/cuda/test_async.py +3 -3
warp/tests/cuda/test_conditional_captures.py +101 -0
warp/tests/geometry/test_marching_cubes.py +233 -12
warp/tests/sim/test_coloring.py +6 -6
warp/tests/test_array.py +56 -5
warp/tests/test_codegen.py +3 -2
warp/tests/test_context.py +8 -15
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +2 -2
warp/tests/test_fem.py +45 -2
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_func.py +18 -15
warp/tests/test_future_annotations.py +7 -5
warp/tests/test_linear_solvers.py +30 -0
warp/tests/test_map.py +1 -1
warp/tests/test_mat.py +1518 -378
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +574 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_print.py +69 -0
warp/tests/test_quat.py +140 -34
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_reload.py +2 -1
warp/tests/test_sparse.py +71 -0
warp/tests/test_spatial.py +140 -34
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_struct.py +43 -3
warp/tests/test_types.py +0 -20
warp/tests/test_vec.py +179 -34
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/tile/test_tile.py +184 -18
warp/tests/tile/test_tile_cholesky.py +605 -0
warp/tests/tile/test_tile_load.py +169 -0
warp/tests/tile/test_tile_mathdx.py +2 -558
warp/tests/tile/test_tile_matmul.py +1 -1
warp/tests/tile/test_tile_mlp.py +1 -1
warp/tests/tile/test_tile_shared_memory.py +5 -5
warp/tests/unittest_suites.py +6 -0
warp/tests/walkthrough_debug.py +1 -1
warp/thirdparty/unittest_parallel.py +108 -9
warp/types.py +554 -264
warp/utils.py +68 -86
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/RECORD +131 -121
warp/native/marching.cpp +0 -19
warp/native/marching.cu +0 -514
warp/native/marching.h +0 -19
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
{warp_lang-1.8.1.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0

warp/examples/core/example_marching_cubes.py CHANGED Viewed

@@ -22,6 +22,7 @@
 # Note: requires a CUDA-capable device
 ###########################################################################
 import warp as wp
 import warp.render

warp/examples/core/example_render_opengl.py CHANGED Viewed

@@ -18,6 +18,7 @@
 #
 # Demonstrates how to set up tiled rendering and retrieves the pixels from
 # OpenGLRenderer as a Warp array while keeping all memory on the GPU.
+# It also shows how to add an ImGui UI to the renderer.
 #
 ###########################################################################
@@ -25,14 +26,95 @@ import numpy as np
 import warp as wp
 import warp.render
+from warp.render.imgui_manager import ImGuiManager
+class ExampleImGuiManager(ImGuiManager):
+    """An example ImGui manager that displays a few float values."""
+    def __init__(self, renderer, window_pos=(10, 10), window_size=(300, 400)):
+        super().__init__(renderer)
+        if not self.is_available:
+            return
+        # UI properties
+        self.window_pos = window_pos
+        self.window_size = window_size
+        # Values to display in the UI
+        self.some_float = 123.456
+        self.editable_float1 = 10.0
+        self.editable_float2 = 20.0
+        self.editable_float3 = 30.0
+        self.editable_vec2 = wp.vec2(0.5, 1.2)
+        self.editable_vec3 = wp.vec3(2.1, 3.4, 4.7)
+        self.editable_vec4 = wp.vec4(1.5, 3.2, 4.8, 6.1)
+        self.warp_array_float = wp.array([0.7, 1.4, 2.8], dtype=float)
+        self.warp_array_vec2 = wp.array([wp.vec2(1.1, 2.3), wp.vec2(3.4, 4.2), wp.vec2(5.6, 6.9)], dtype=wp.vec2)
+        self.warp_array_vec3 = wp.array(
+            [wp.vec3(0.5, 1.7, 2.9), wp.vec3(3.2, 4.8, 5.1), wp.vec3(6.4, 7.6, 8.3)], dtype=wp.vec3
+        )
+        self.warp_array_vec4 = wp.array([wp.vec4(1.2, 2.4, 3.6, 4.8), wp.vec4(5.1, 6.3, 7.5, 8.7)], dtype=wp.vec4)
+    def draw_ui(self):
+        # set window position and size once
+        self.imgui.set_next_window_size(self.window_size[0], self.window_size[1], self.imgui.ONCE)
+        self.imgui.set_next_window_position(self.window_pos[0], self.window_pos[1], self.imgui.ONCE)
+        self.imgui.begin("Warp Float Values")
+        self.imgui.text(f"A read-only float: {self.some_float}")
+        self.imgui.separator()
+        self.imgui.text("Editable floats:")
+        changed1, self.editable_float1 = self.imgui.slider_float("Slider", self.editable_float1, 0.0, 100.0)
+        changed2, self.editable_float2 = self.imgui.drag_float("Drag", self.editable_float2, 0.1, 0.0, 100.0)
+        changed3, self.editable_float3 = self.imgui.input_float("Input", self.editable_float3)
+        changed, self.editable_vec2 = self.drag_vec2("Vec2", self.editable_vec2)
+        changed, self.editable_vec3 = self.drag_vec3("Vec3", self.editable_vec3)
+        changed, self.editable_vec4 = self.drag_vec4("Vec4", self.editable_vec4)
+        changed, self.warp_array_float = self.drag_float_list("Float", self.warp_array_float)
+        changed, self.warp_array_vec2 = self.drag_vec2_list("Vec2", self.warp_array_vec2)
+        changed, self.warp_array_vec3 = self.drag_vec3_list("Vec3", self.warp_array_vec3)
+        changed, self.warp_array_vec4 = self.drag_vec4_list("Vec4", self.warp_array_vec4)
+        self.imgui.separator()
+        self.imgui.text("File Dialog Examples:")
+        if self.imgui.button("Open File"):
+            file_path = self.open_load_file_dialog(
+                title="Select a File", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
+            )
+            if file_path:
+                print(f"Selected file to open: {file_path}")
+        if self.imgui.button("Save File"):
+            file_path = self.open_save_file_dialog(
+                title="Save As", defaultextension=".txt", filetypes=[("Text Files", "*.txt"), ("All Files", "*.*")]
+            )
+            if file_path:
+                print(f"Selected file to save: {file_path}")
+        self.imgui.end()
 class Example:
-    def __init__(self, num_tiles=4, custom_tile_arrangement=False):
+    def __init__(self, num_tiles=4, custom_tile_arrangement=False, use_imgui=True):
         if num_tiles < 1:
             raise ValueError("num_tiles must be greater than or equal to 1.")
         self.renderer = wp.render.OpenGLRenderer(vsync=False)
+        self.use_imgui = use_imgui
+        if self.use_imgui:
+            self.imgui_manager = ExampleImGuiManager(self.renderer)
+            if self.imgui_manager.is_available:
+                self.renderer.render_2d_callbacks.append(self.imgui_manager.render_frame)
+            else:
+                self.use_imgui = False
         instance_ids = []
         if custom_tile_arrangement:
@@ -81,6 +163,11 @@ class Example:
         )
         self.renderer.end_frame()
+    def clear(self):
+        if self.use_imgui:
+            self.imgui_manager.shutdown()
+        self.renderer.clear()
 if __name__ == "__main__":
     import argparse
@@ -103,11 +190,21 @@ if __name__ == "__main__":
         help="Whether to split tiles into subplots when --show_plot is True.",
     )
     parser.add_argument("--custom_tile_arrangement", action="store_true", help="Apply custom tile arrangement.")
+    parser.add_argument(
+        "--use_imgui",
+        type=lambda x: bool(distutils.util.strtobool(x.strip())),
+        default=True,
+        help="Enable or disable the ImGui window.",
+    )
     args = parser.parse_known_args()[0]
     with wp.ScopedDevice(args.device):
-        example = Example(num_tiles=args.num_tiles, custom_tile_arrangement=args.custom_tile_arrangement)
+        example = Example(
+            num_tiles=args.num_tiles,
+            custom_tile_arrangement=args.custom_tile_arrangement,
+            use_imgui=args.use_imgui,
+        )
         channels = 1 if args.render_mode == "depth" else 3
@@ -190,4 +287,4 @@ if __name__ == "__main__":
                 fig.canvas.draw()
                 fig.canvas.flush_events()
-        example.renderer.clear()
+        example.clear()

warp/examples/fem/example_apic_fluid.py CHANGED Viewed

@@ -20,6 +20,7 @@
 # grid and the PicQuadrature class.
 ###########################################################################
+from dataclasses import dataclass
 from typing import Any
 import numpy as np
@@ -27,9 +28,8 @@ import numpy as np
 import warp as wp
 import warp.examples.fem.utils as fem_example_utils
 import warp.fem as fem
-import warp.sim.render
+import warp.render
 from warp.fem import Domain, Field, Sample, at_node, div, grad, integrand
-from warp.sim import Model, State
 from warp.sparse import BsrMatrix, bsr_mm, bsr_mv, bsr_transposed
@@ -186,76 +186,83 @@ def solve_incompressibility(
 class Example:
-    def __init__(self, quiet=False, stage_path="example_apic_fluid.usd", voxel_size=1.0):
+    @dataclass
+    class State:
+        particle_q: wp.array(dtype=wp.vec3)
+        particle_qd: wp.array(dtype=wp.vec3)
+        particle_qd_grad: wp.array(dtype=wp.mat33)
+    def __init__(self, quiet=False, stage_path="example_apic_fluid.usd", voxel_size=1.0, opengl=False):
+        self.gravity = wp.vec3(0.0, -10.0, 0.0)
         fps = 60
+        self.sim_substeps = 1
         self.frame_dt = 1.0 / fps
         self.current_frame = 0
-        self.sim_substeps = 1
         self.sim_dt = self.frame_dt / self.sim_substeps
         self.voxel_size = voxel_size
         self._quiet = quiet
         # particle emission
-        particle_grid_lo = wp.vec3(-5)
-        particle_grid_hi = wp.vec3(5)
-        grid_cell_size = voxel_size
-        grid_cell_volume = np.prod(grid_cell_size)
         PARTICLES_PER_CELL_DIM = 2
-        self.radius = float(np.max(grid_cell_size) / (2 * PARTICLES_PER_CELL_DIM))
+        self.radius = float(np.max(voxel_size) / (2 * PARTICLES_PER_CELL_DIM))
+        particle_grid_lo = np.full(3, -5)
+        particle_grid_hi = np.full(3, 5)
         particle_grid_res = (
             np.array((particle_grid_hi - particle_grid_lo) / voxel_size, dtype=int) * PARTICLES_PER_CELL_DIM
         )
-        particle_grid_offset = wp.vec3(self.radius, self.radius, self.radius)
-        # Initialize warp.sim model, spawn particles
-        builder = wp.sim.ModelBuilder()
-        builder.add_particle_grid(
-            dim_x=particle_grid_res[0],
-            dim_y=particle_grid_res[1],
-            dim_z=particle_grid_res[2],
-            cell_x=self.radius * 2.0,
-            cell_y=self.radius * 2.0,
-            cell_z=self.radius * 2.0,
-            pos=particle_grid_lo + particle_grid_offset,
-            rot=wp.quat_identity(),
-            vel=wp.vec3(0.0, 0.0, 0.0),
-            mass=grid_cell_volume / PARTICLES_PER_CELL_DIM**3,
-            jitter=self.radius * 1.0,
+        self.particle_volumes, particle_q = self._spawn_particles(
+            particle_grid_res, particle_grid_lo, particle_grid_hi, packing_fraction=1.0
+        )
+        particle_qd = wp.zeros_like(particle_q)
+        particle_count = particle_q.shape[0]
+        if not self._quiet:
+            print("Particle count:", particle_count)
+        # Allocate states
+        self.state_0 = self.State(
+            wp.clone(particle_q),
+            wp.clone(particle_qd),
+            particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
+        )
+        self.state_1 = self.State(
+            wp.clone(particle_q),
+            wp.clone(particle_qd),
+            particle_qd_grad=wp.zeros(shape=(particle_count), dtype=wp.mat33),
         )
-        self.model: Model = builder.finalize()
-        self.model.ground = False
         # Storage for temporary variables
         self.temporary_store = fem.TemporaryStore()
-        if not self._quiet:
-            print("Particle count:", self.model.particle_count)
+        # initialize renderers
+        self.opengl_renderer = None
+        self.usd_renderer = None
-        self.state_0: State = self.model.state()
-        self.state_0.particle_qd_grad = wp.zeros(shape=(self.model.particle_count), dtype=wp.mat33)
-        self.state_1: State = self.model.state()
-        self.state_1.particle_qd_grad = wp.zeros(shape=(self.model.particle_count), dtype=wp.mat33)
+        try:
+            if opengl:
+                self.opengl_renderer = warp.render.OpenGLRenderer(
+                    screen_width=1024,
+                    screen_height=1024,
+                )
+        except Exception as err:
+            wp.utils.warn(f"Could not initialize OpenGL renderer: {err}.")
         try:
             if stage_path:
-                self.renderer = warp.sim.render.SimRenderer(self.model, stage_path, scaling=20.0)
-            else:
-                self.renderer = None
+                self.usd_renderer = warp.render.UsdRenderer(stage_path)
         except Exception as err:
-            print(f"Could not initialize SimRenderer for stage '{stage_path}': {err}.")
+            print(f"Could not initialize Usd renderer '{stage_path}': {err}.")
     def step(self):
         fem.set_default_temporary_store(self.temporary_store)
         self.current_frame = self.current_frame + 1
-        with wp.ScopedTimer(f"simulate frame {self.current_frame}", active=True):
+        with wp.ScopedTimer(f"simulate frame {self.current_frame}", synchronize=True):
             for _s in range(self.sim_substeps):
                 # Allocate the voxels and create the warp.fem geometry
                 volume = wp.Volume.allocate_by_voxels(
@@ -297,7 +304,7 @@ class Example:
                 # Bin particles to grid cells
                 pic = fem.PicQuadrature(
-                    domain=domain, positions=self.state_0.particle_q, measures=self.model.particle_mass
+                    domain=domain, positions=self.state_0.particle_q, measures=self.particle_volumes
                 )
                 # Compute inverse particle volume for each grid node
@@ -318,7 +325,7 @@ class Example:
                         "velocities": self.state_0.particle_qd,
                         "velocity_gradients": self.state_0.particle_qd_grad,
                         "dt": self.sim_dt,
-                        "gravity": self.model.gravity,
+                        "gravity": self.gravity,
                     },
                     output_dtype=wp.vec3,
                 )
@@ -377,16 +384,54 @@ class Example:
         fem.set_default_temporary_store(None)
+    @staticmethod
+    def _spawn_particles(res, bounds_lo, bounds_hi, packing_fraction):
+        Nx = res[0]
+        Ny = res[1]
+        Nz = res[2]
+        px = np.linspace(bounds_lo[0], bounds_hi[0], Nx + 1)
+        py = np.linspace(bounds_lo[1], bounds_hi[1], Ny + 1)
+        pz = np.linspace(bounds_lo[2], bounds_hi[2], Nz + 1)
+        points = np.stack(np.meshgrid(px, py, pz)).reshape(3, -1).T
+        cell_size = (bounds_hi - bounds_lo) / res
+        cell_volume = np.prod(cell_size)
+        radius = np.max(cell_size) * 0.5
+        volume = np.prod(cell_volume) * packing_fraction
+        rng = np.random.default_rng(42)
+        points += 2.0 * radius * (rng.random(points.shape) - 0.5)
+        volumes = wp.full(points.shape[0], volume, dtype=float)
+        points = wp.array(np.ascontiguousarray(points), dtype=wp.vec3)
+        return volumes, points
     def render(self, is_live=False):
-        if self.renderer is None:
+        if self.usd_renderer is None and self.opengl_renderer is None:
             return
-        with wp.ScopedTimer("render", active=True):
+        with wp.ScopedTimer("render", synchronize=True):
             time = self.current_frame * self.frame_dt
-            self.renderer.begin_frame(time)
-            self.renderer.render(self.state_0)
-            self.renderer.end_frame()
+            if self.usd_renderer is not None:
+                self.usd_renderer.begin_frame(time)
+                self.usd_renderer.render_points(
+                    "particles",
+                    self.state_0.particle_q.numpy(),
+                    radius=self.radius,
+                )
+                self.usd_renderer.end_frame()
+            if self.opengl_renderer is not None:
+                self.opengl_renderer.begin_frame(time)
+                self.opengl_renderer.render_points(
+                    "particles",
+                    self.state_0.particle_q,
+                    radius=self.radius,
+                )
+                self.opengl_renderer.end_frame()
 if __name__ == "__main__":
@@ -404,6 +449,7 @@ if __name__ == "__main__":
     )
     parser.add_argument("--num_frames", type=int, default=250, help="Total number of frames.")
     parser.add_argument("--quiet", action="store_true")
+    parser.add_argument("--opengl", action="store_true")
     parser.add_argument(
         "--voxel_size",
         type=float,
@@ -413,11 +459,11 @@ if __name__ == "__main__":
     args = parser.parse_known_args()[0]
     with wp.ScopedDevice(args.device):
-        example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size)
+        example = Example(quiet=args.quiet, stage_path=args.stage_path, voxel_size=args.voxel_size, opengl=args.opengl)
         for _ in range(args.num_frames):
             example.step()
             example.render()
-        if example.renderer:
-            example.renderer.save()
+        if example.usd_renderer is not None:
+            example.usd_renderer.save()

warp/examples/fem/example_convection_diffusion_dg.py CHANGED Viewed

@@ -116,11 +116,11 @@ class Example:
             values={"ang_vel": ang_vel},
         )
-        side_test = fem.make_test(space=scalar_space, domain=sides)
+        self._side_test = fem.make_test(space=scalar_space, domain=sides)
         side_trial = fem.make_trial(space=scalar_space, domain=sides)
         fem.integrate(
             upwind_transport_form,
-            fields={"phi": side_trial, "psi": side_test},
+            fields={"phi": side_trial, "psi": self._side_test},
             values={"ang_vel": ang_vel},
             output=matrix_transport,
             add=True,
@@ -132,7 +132,7 @@ class Example:
         )
         matrix_diffusion += fem.integrate(
             sip_diffusion_form,
-            fields={"phi": side_trial, "psi": side_test},
+            fields={"phi": side_trial, "psi": self._side_test},
         )
         self._matrix = matrix_inertia + matrix_transport + viscosity * matrix_diffusion
@@ -140,8 +140,12 @@ class Example:
         self._phi_field = scalar_space.make_field()
         fem.interpolate(initial_condition, dest=self._phi_field)
+        self._phi_curvature_field = scalar_space.make_field()
+        self._compute_phi_curvature()
         self.renderer = fem_example_utils.Plot()
         self.renderer.add_field("phi", self._phi_field)
+        self.renderer.add_field("phi_curvature", self._phi_curvature_field)
     def step(self):
         self.current_frame += 1
@@ -154,12 +158,29 @@ class Example:
         phi = wp.zeros_like(rhs)
         fem_example_utils.bsr_cg(self._matrix, b=rhs, x=phi, method="bicgstab", quiet=self._quiet)
         wp.utils.array_cast(in_array=phi, out_array=self._phi_field.dof_values)
+        # for visualization purposes only
+        self._compute_phi_curvature()
+    def _compute_phi_curvature(self):
+        fem.integrate(
+            diffusion_form,
+            fields={"u": self._phi_field, "v": self._test},
+            output=self._phi_curvature_field.dof_values,
+        )
+        fem.integrate(
+            sip_diffusion_form,
+            fields={"phi": self._phi_field.trace(), "psi": self._side_test},
+            output=self._phi_curvature_field.dof_values,
+            add=True,
+        )
     def render(self):
         self.renderer.begin_frame(time=self.current_frame * self.sim_dt)
         self.renderer.add_field("phi", self._phi_field)
+        self.renderer.add_field("phi_curvature", self._phi_curvature_field)
         self.renderer.end_frame()

warp/examples/fem/example_diffusion_mgpu.py CHANGED Viewed

@@ -77,8 +77,7 @@ class DistributedSystem:
         stream = wp.get_stream()
         for mat_i, x_i, y_i, idx in zip(*self.rank_data):
-            # WAR copy with indexed array requiring matching shape
-            tmp_i = wp.array(ptr=tmp.ptr, device=tmp.device, capacity=tmp.capacity, dtype=tmp.dtype, shape=idx.shape)
+            tmp_i = tmp[: idx.size]
             # Compress rhs on rank 0
             x_idx = wp.indexedarray(x, idx)
@@ -161,7 +160,13 @@ class Example:
         with wp.ScopedDevice(main_device):
             fem_example_utils.bsr_cg(
-                A, x=global_res, b=glob_rhs, use_diag_precond=False, quiet=self._quiet, mv_routine=A.mv_routine
+                A,
+                x=global_res,
+                b=glob_rhs,
+                use_diag_precond=False,
+                quiet=self._quiet,
+                mv_routine=A.mv_routine,
+                mv_routine_uses_multiple_cuda_contexts=True,
             )
         array_cast(in_array=global_res, out_array=self._scalar_field.dof_values)

warp/examples/fem/utils.py CHANGED Viewed

@@ -14,12 +14,14 @@
 # limitations under the License.
+import gc
 from typing import Any, Dict, Optional, Tuple
 import numpy as np
 import warp as wp
 import warp.fem as fem
+from warp.context import assert_conditional_graph_support
 from warp.optim.linear import LinearOperator, aslinearoperator, preconditioner
 from warp.sparse import BsrMatrix, bsr_get_diag, bsr_mv, bsr_transposed
@@ -230,6 +232,7 @@ def bsr_cg(
     quiet=False,
     method: str = "cg",
     M: BsrMatrix = None,
+    mv_routine_uses_multiple_cuda_contexts: bool = False,
 ) -> Tuple[float, int]:
     """Solves the linear system A x = b using an iterative solver, optionally with diagonal preconditioning
@@ -244,6 +247,8 @@ def bsr_cg(
         mv_routine: Matrix-vector multiplication routine to use for multiplications with ``A``
         quiet: if True, do not print iteration residuals
         method: Iterative solver method to use, defaults to Conjugate Gradient
+        mv_routine_uses_multiple_cuda_contexts: Whether the matrix-vector multiplication routine uses multiple CUDA contexts,
+          which prevents the use of conditional CUDA graphs.
     Returns:
         Tuple (residual norm, iteration count)
@@ -260,10 +265,53 @@ def bsr_cg(
     func = _get_linear_solver_func(method_name=method)
-    def print_callback(i, err, tol):
-        print(f"{func.__name__}: at iteration {i} error = \t {err}  \t tol: {tol}")
+    callback = None
-    callback = None if quiet else print_callback
+    use_cuda_graph = A.device.is_cuda and not wp.config.verify_cuda
+    capturable = use_cuda_graph and not mv_routine_uses_multiple_cuda_contexts
+    if capturable:
+        try:
+            assert_conditional_graph_support()
+        except RuntimeError:
+            capturable = False
+    if not quiet:
+        if capturable:
+            @wp.func_native(snippet=f'printf("%s: ", "{func.__name__}");')
+            def print_method_name():
+                pass
+            @fem.cache.dynamic_kernel(suffix=f"{check_every}{func.__name__}")
+            def device_cg_callback(
+                cur_iter: wp.array(dtype=int),
+                err_sq: wp.array(dtype=Any),
+                atol_sq: wp.array(dtype=Any),
+            ):
+                if cur_iter[0] % check_every == 0:
+                    print_method_name()
+                    wp.printf(
+                        "at iteration %d error = \t %f  \t tol: %f\n",
+                        cur_iter[0],
+                        wp.sqrt(err_sq[0]),
+                        wp.sqrt(atol_sq[0]),
+                    )
+            if check_every > 0:
+                callback = device_cg_callback
+        else:
+            def print_callback(i, err, tol):
+                print(f"{func.__name__}: at iteration {i} error = \t {err}  \t tol: {tol}")
+            callback = print_callback
+    if use_cuda_graph:
+        # Temporarily disable garbage collection
+        # Garbage collection of externally-allocated objects during graph capture may lead to
+        # invalid operations or memory access errors.
+        gc.disable()
     end_iter, err, atol = func(
         A=A,
@@ -271,12 +319,20 @@ def bsr_cg(
         x=x,
         maxiter=max_iters,
         tol=tol,
-        check_every=check_every,
+        check_every=0 if capturable else check_every,
         M=M,
         callback=callback,
-        use_cuda_graph=not wp.config.verify_cuda,
+        use_cuda_graph=use_cuda_graph,
     )
+    if use_cuda_graph:
+        gc.enable()
+    if isinstance(end_iter, wp.array):
+        end_iter = end_iter.numpy()[0]
+        err = np.sqrt(err.numpy()[0])
+        atol = np.sqrt(atol.numpy()[0])
     if not quiet:
         res_str = "OK" if err <= atol else "TRUNCATED"
         print(f"{func.__name__}: terminated after {end_iter} iterations with error = \t {err} ({res_str})")
@@ -437,28 +493,18 @@ def bsr_solve_saddle(
     wp.copy(src=b_u, dest=saddle_system.u_slice(b))
     wp.copy(src=b_p, dest=saddle_system.p_slice(b))
-    func = _get_linear_solver_func(method_name=method)
-    def print_callback(i, err, tol):
-        print(f"{func.__name__}: at iteration {i} error = \t {err}  \t tol: {tol}")
-    callback = None if quiet else print_callback
-    end_iter, err, atol = func(
-        A=saddle_system,
-        b=b,
-        x=x,
-        maxiter=max_iters,
+    err, end_iter = bsr_cg(
+        saddle_system,
+        x,
+        b,
+        max_iters=max_iters,
         tol=tol,
         check_every=check_every,
+        quiet=quiet,
+        method=method,
         M=saddle_system.preconditioner,
-        callback=callback,
     )
-    if not quiet:
-        res_str = "OK" if err <= atol else "TRUNCATED"
-        print(f"{func.__name__}: terminated after {end_iter} iterations with absolute error = \t {err} ({res_str})")
     wp.copy(dest=x_u, src=saddle_system.u_slice(x))
     wp.copy(dest=x_p, src=saddle_system.p_slice(x))

warp/fabric.py CHANGED Viewed

@@ -211,7 +211,7 @@ class fabricarray(noncontiguous_array_base[T]):
                     allocator = self.device.get_allocator()
                     buckets_ptr = allocator.alloc(buckets_size)
                     cuda_stream = self.device.stream.cuda_stream
-                    runtime.core.memcpy_h2d(
+                    runtime.core.wp_memcpy_h2d(
                         self.device.context, buckets_ptr, ctypes.addressof(buckets), buckets_size, cuda_stream
                     )
                     self.deleter = allocator.deleter