PyPI - warp-lang - Versions diffs - 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 0.11.0__py3-none-manylinux2014_x86_64.whl → 1.0.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (170) hide show

warp/__init__.py +8 -0
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/build.py +7 -6
warp/build_dll.py +70 -79
warp/builtins.py +10 -6
warp/codegen.py +51 -19
warp/config.py +7 -8
warp/constants.py +3 -0
warp/context.py +948 -245
warp/dlpack.py +198 -113
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cartpole.urdf +110 -0
warp/examples/assets/crazyflie.usd +0 -0
warp/examples/assets/cube.usda +42 -0
warp/examples/assets/nv_ant.xml +92 -0
warp/examples/assets/nv_humanoid.xml +183 -0
warp/examples/assets/quadruped.urdf +268 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usda +56 -0
warp/examples/assets/torus.usda +105 -0
warp/examples/benchmarks/benchmark_api.py +383 -0
warp/examples/benchmarks/benchmark_cloth.py +279 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +88 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +100 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +142 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +77 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +86 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +112 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +146 -0
warp/examples/benchmarks/benchmark_launches.py +295 -0
warp/examples/core/example_dem.py +221 -0
warp/examples/core/example_fluid.py +267 -0
warp/examples/core/example_graph_capture.py +129 -0
warp/examples/core/example_marching_cubes.py +177 -0
warp/examples/core/example_mesh.py +154 -0
warp/examples/core/example_mesh_intersect.py +193 -0
warp/examples/core/example_nvdb.py +169 -0
warp/examples/core/example_raycast.py +89 -0
warp/examples/core/example_raymarch.py +178 -0
warp/examples/core/example_render_opengl.py +141 -0
warp/examples/core/example_sph.py +389 -0
warp/examples/core/example_torch.py +181 -0
warp/examples/core/example_wave.py +249 -0
warp/examples/fem/bsr_utils.py +380 -0
warp/examples/fem/example_apic_fluid.py +391 -0
warp/examples/fem/example_convection_diffusion.py +168 -0
warp/examples/fem/example_convection_diffusion_dg.py +209 -0
warp/examples/fem/example_convection_diffusion_dg0.py +194 -0
warp/examples/fem/example_deformed_geometry.py +159 -0
warp/examples/fem/example_diffusion.py +173 -0
warp/examples/fem/example_diffusion_3d.py +152 -0
warp/examples/fem/example_diffusion_mgpu.py +214 -0
warp/examples/fem/example_mixed_elasticity.py +222 -0
warp/examples/fem/example_navier_stokes.py +243 -0
warp/examples/fem/example_stokes.py +192 -0
warp/examples/fem/example_stokes_transfer.py +249 -0
warp/examples/fem/mesh_utils.py +109 -0
warp/examples/fem/plot_utils.py +287 -0
warp/examples/optim/example_bounce.py +248 -0
warp/examples/optim/example_cloth_throw.py +210 -0
warp/examples/optim/example_diffray.py +535 -0
warp/examples/optim/example_drone.py +850 -0
warp/examples/optim/example_inverse_kinematics.py +169 -0
warp/examples/optim/example_inverse_kinematics_torch.py +170 -0
warp/examples/optim/example_spring_cage.py +234 -0
warp/examples/optim/example_trajectory.py +201 -0
warp/examples/sim/example_cartpole.py +128 -0
warp/examples/sim/example_cloth.py +184 -0
warp/examples/sim/example_granular.py +113 -0
warp/examples/sim/example_granular_collision_sdf.py +185 -0
warp/examples/sim/example_jacobian_ik.py +213 -0
warp/examples/sim/example_particle_chain.py +106 -0
warp/examples/sim/example_quadruped.py +179 -0
warp/examples/sim/example_rigid_chain.py +191 -0
warp/examples/sim/example_rigid_contact.py +176 -0
warp/examples/sim/example_rigid_force.py +126 -0
warp/examples/sim/example_rigid_gyroscopic.py +97 -0
warp/examples/sim/example_rigid_soft_contact.py +124 -0
warp/examples/sim/example_soft_body.py +178 -0
warp/fabric.py +29 -20
warp/fem/cache.py +0 -1
warp/fem/dirichlet.py +0 -2
warp/fem/integrate.py +0 -1
warp/jax.py +45 -0
warp/jax_experimental.py +339 -0
warp/native/builtin.h +12 -0
warp/native/bvh.cu +18 -18
warp/native/clang/clang.cpp +8 -3
warp/native/cuda_util.cpp +94 -5
warp/native/cuda_util.h +35 -6
warp/native/cutlass_gemm.cpp +1 -1
warp/native/cutlass_gemm.cu +4 -1
warp/native/error.cpp +66 -0
warp/native/error.h +27 -0
warp/native/mesh.cu +2 -2
warp/native/reduce.cu +4 -4
warp/native/runlength_encode.cu +2 -2
warp/native/scan.cu +2 -2
warp/native/sparse.cu +0 -1
warp/native/temp_buffer.h +2 -2
warp/native/warp.cpp +95 -60
warp/native/warp.cu +1053 -218
warp/native/warp.h +49 -32
warp/optim/linear.py +33 -16
warp/render/render_opengl.py +202 -101
warp/render/render_usd.py +82 -40
warp/sim/__init__.py +13 -4
warp/sim/articulation.py +4 -5
warp/sim/collide.py +320 -175
warp/sim/import_mjcf.py +25 -30
warp/sim/import_urdf.py +94 -63
warp/sim/import_usd.py +51 -36
warp/sim/inertia.py +3 -2
warp/sim/integrator.py +233 -0
warp/sim/integrator_euler.py +447 -469
warp/sim/integrator_featherstone.py +1991 -0
warp/sim/integrator_xpbd.py +1420 -640
warp/sim/model.py +765 -487
warp/sim/particles.py +2 -1
warp/sim/render.py +35 -13
warp/sim/utils.py +222 -11
warp/stubs.py +8 -0
warp/tape.py +16 -1
warp/tests/aux_test_grad_customs.py +23 -0
warp/tests/test_array.py +190 -1
warp/tests/test_async.py +656 -0
warp/tests/test_bool.py +50 -0
warp/tests/test_dlpack.py +164 -11
warp/tests/test_examples.py +166 -74
warp/tests/test_fem.py +8 -1
warp/tests/test_generics.py +15 -5
warp/tests/test_grad.py +1 -1
warp/tests/test_grad_customs.py +172 -12
warp/tests/test_jax.py +254 -0
warp/tests/test_large.py +29 -6
warp/tests/test_launch.py +25 -0
warp/tests/test_linear_solvers.py +20 -3
warp/tests/test_matmul.py +61 -16
warp/tests/test_matmul_lite.py +13 -13
warp/tests/test_mempool.py +186 -0
warp/tests/test_multigpu.py +3 -0
warp/tests/test_options.py +16 -2
warp/tests/test_peer.py +137 -0
warp/tests/test_print.py +3 -1
warp/tests/test_quat.py +23 -0
warp/tests/test_sim_kinematics.py +97 -0
warp/tests/test_snippet.py +126 -3
warp/tests/test_streams.py +108 -79
warp/tests/test_torch.py +16 -8
warp/tests/test_utils.py +32 -27
warp/tests/test_verify_fp.py +65 -0
warp/tests/test_volume.py +1 -1
warp/tests/unittest_serial.py +2 -0
warp/tests/unittest_suites.py +12 -0
warp/tests/unittest_utils.py +14 -7
warp/thirdparty/unittest_parallel.py +15 -3
warp/torch.py +10 -8
warp/types.py +363 -246
warp/utils.py +143 -19
warp_lang-1.0.0.dist-info/LICENSE.md +126 -0
warp_lang-1.0.0.dist-info/METADATA +394 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/RECORD +167 -86
warp/sim/optimizer.py +0 -138
warp_lang-0.11.0.dist-info/LICENSE.md +0 -36
warp_lang-0.11.0.dist-info/METADATA +0 -238
/warp/tests/{walkthough_debug.py → walkthrough_debug.py} +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/WHEEL +0 -0
{warp_lang-0.11.0.dist-info → warp_lang-1.0.0.dist-info}/top_level.txt +0 -0

warp/examples/benchmarks/benchmark_cloth_warp.py ADDED Viewed

@@ -0,0 +1,146 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import warp as wp
+wp.init()
+wp.build.clear_kernel_cache()
+@wp.kernel
+def eval_springs(
+    x: wp.array(dtype=wp.vec3),
+    v: wp.array(dtype=wp.vec3),
+    spring_indices: wp.array(dtype=int),
+    spring_rest_lengths: wp.array(dtype=float),
+    spring_stiffness: wp.array(dtype=float),
+    spring_damping: wp.array(dtype=float),
+    f: wp.array(dtype=wp.vec3),
+):
+    tid = wp.tid()
+    i = spring_indices[tid * 2 + 0]
+    j = spring_indices[tid * 2 + 1]
+    ke = spring_stiffness[tid]
+    kd = spring_damping[tid]
+    rest = spring_rest_lengths[tid]
+    xi = x[i]
+    xj = x[j]
+    vi = v[i]
+    vj = v[j]
+    xij = xi - xj
+    vij = vi - vj
+    l = wp.length(xij)
+    l_inv = 1.0 / l
+    # normalized spring direction
+    dir = xij * l_inv
+    c = l - rest
+    dcdt = wp.dot(dir, vij)
+    # damping based on relative velocity.
+    fs = dir * (ke * c + kd * dcdt)
+    wp.atomic_sub(f, i, fs)
+    wp.atomic_add(f, j, fs)
+@wp.kernel
+def integrate_particles(
+    x: wp.array(dtype=wp.vec3),
+    v: wp.array(dtype=wp.vec3),
+    f: wp.array(dtype=wp.vec3),
+    w: wp.array(dtype=float),
+    dt: float,
+):
+    tid = wp.tid()
+    x0 = x[tid]
+    v0 = v[tid]
+    f0 = f[tid]
+    inv_mass = w[tid]
+    g = wp.vec3()
+    # treat particles with inv_mass == 0 as kinematic
+    if inv_mass > 0.0:
+        g = wp.vec3(0.0, 0.0 - 9.81, 0.0)
+    # simple semi-implicit Euler. v1 = v0 + a dt, x1 = x0 + v1 dt
+    v1 = v0 + (f0 * inv_mass + g) * dt
+    x1 = x0 + v1 * dt
+    x[tid] = x1
+    v[tid] = v1
+    # clear forces
+    f[tid] = wp.vec3()
+class WpIntegrator:
+    def __init__(self, cloth, device):
+        self.device = wp.get_device(device)
+        with wp.ScopedDevice(self.device):
+            self.positions = wp.from_numpy(cloth.positions, dtype=wp.vec3)
+            self.positions_host = wp.from_numpy(cloth.positions, dtype=wp.vec3, device="cpu")
+            self.invmass = wp.from_numpy(cloth.inv_masses, dtype=float)
+            self.velocities = wp.zeros(cloth.num_particles, dtype=wp.vec3)
+            self.forces = wp.zeros(cloth.num_particles, dtype=wp.vec3)
+            self.spring_indices = wp.from_numpy(cloth.spring_indices, dtype=int)
+            self.spring_lengths = wp.from_numpy(cloth.spring_lengths, dtype=float)
+            self.spring_stiffness = wp.from_numpy(cloth.spring_stiffness, dtype=float)
+            self.spring_damping = wp.from_numpy(cloth.spring_damping, dtype=float)
+        self.cloth = cloth
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for s in range(substeps):
+            wp.launch(
+                kernel=eval_springs,
+                dim=self.cloth.num_springs,
+                inputs=[
+                    self.positions,
+                    self.velocities,
+                    self.spring_indices,
+                    self.spring_lengths,
+                    self.spring_stiffness,
+                    self.spring_damping,
+                    self.forces,
+                ],
+                outputs=[],
+                device=self.device,
+            )
+            # integrate
+            wp.launch(
+                kernel=integrate_particles,
+                dim=self.cloth.num_particles,
+                inputs=[self.positions, self.velocities, self.forces, self.invmass, sim_dt],
+                outputs=[],
+                device=self.device,
+            )
+        # copy data back to host
+        if self.device.is_cuda:
+            wp.copy(self.positions_host, self.positions)
+            wp.synchronize()
+            return self.positions_host.numpy()
+        else:
+            return self.positions.numpy()

warp/examples/benchmarks/benchmark_launches.py ADDED Viewed

@@ -0,0 +1,295 @@
+# Copyright (c) 2023 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+###########################################################################
+# Benchmarks for kernel launches with different types of args
+###########################################################################
+import warp as wp
+@wp.struct
+class S0:
+    pass
+@wp.struct
+class Sf:
+    x: float
+    y: float
+    z: float
+@wp.struct
+class Sv:
+    u: wp.vec3
+    v: wp.vec3
+    w: wp.vec3
+@wp.struct
+class Sm:
+    M: wp.mat33
+    N: wp.mat33
+    O: wp.mat33
+@wp.struct
+class Sa:
+    a: wp.array(dtype=float)
+    b: wp.array(dtype=float)
+    c: wp.array(dtype=float)
+@wp.struct
+class Sz:
+    a: wp.array(dtype=float)
+    b: wp.array(dtype=float)
+    c: wp.array(dtype=float)
+    x: float
+    y: float
+    z: float
+    u: wp.vec3
+    v: wp.vec3
+    w: wp.vec3
+@wp.kernel
+def k0():
+    tid = wp.tid()
+@wp.kernel
+def kf(x: float, y: float, z: float):
+    tid = wp.tid()
+@wp.kernel
+def kv(u: wp.vec3, v: wp.vec3, w: wp.vec3):
+    tid = wp.tid()
+@wp.kernel
+def km(M: wp.mat33, N: wp.mat33, O: wp.mat33):
+    tid = wp.tid()
+@wp.kernel
+def ka(a: wp.array(dtype=float), b: wp.array(dtype=float), c: wp.array(dtype=float)):
+    tid = wp.tid()
+@wp.kernel
+def kz(
+    a: wp.array(dtype=float),
+    b: wp.array(dtype=float),
+    c: wp.array(dtype=float),
+    x: float,
+    y: float,
+    z: float,
+    u: wp.vec3,
+    v: wp.vec3,
+    w: wp.vec3,
+):
+    tid = wp.tid()
+@wp.kernel
+def ks0(s: S0):
+    tid = wp.tid()
+@wp.kernel
+def ksf(s: Sf):
+    tid = wp.tid()
+@wp.kernel
+def ksv(s: Sv):
+    tid = wp.tid()
+@wp.kernel
+def ksm(s: Sm):
+    tid = wp.tid()
+@wp.kernel
+def ksa(s: Sa):
+    tid = wp.tid()
+@wp.kernel
+def ksz(s: Sz):
+    tid = wp.tid()
+wp.init()
+wp.build.clear_kernel_cache()
+devices = wp.get_devices()
+num_launches = 100000
+for device in devices:
+    with wp.ScopedDevice(device):
+        print(f"\n=================== Device '{device}' ===================")
+        wp.force_load(device)
+        n = 1
+        a = wp.zeros(n, dtype=float)
+        b = wp.zeros(n, dtype=float)
+        c = wp.zeros(n, dtype=float)
+        x = 17.0
+        y = 42.0
+        z = 99.0
+        u = wp.vec3(1, 2, 3)
+        v = wp.vec3(10, 20, 30)
+        w = wp.vec3(100, 200, 300)
+        M = wp.mat33()
+        N = wp.mat33()
+        O = wp.mat33()
+        s0 = S0()
+        sf = Sf()
+        sf.x = x
+        sf.y = y
+        sf.z = z
+        sv = Sv()
+        sv.u = u
+        sv.v = v
+        sv.w = w
+        sm = Sm()
+        sm.M = M
+        sm.N = N
+        sm.O = O
+        sa = Sa()
+        sa.a = a
+        sa.b = b
+        sa.c = c
+        sz = Sz()
+        sz.a = a
+        sz.b = b
+        sz.c = c
+        sz.x = x
+        sz.y = y
+        sz.z = z
+        sz.u = u
+        sz.v = v
+        sz.w = w
+        tk0 = wp.ScopedTimer("k0")
+        tkf = wp.ScopedTimer("kf")
+        tkv = wp.ScopedTimer("kv")
+        tkm = wp.ScopedTimer("km")
+        tka = wp.ScopedTimer("ka")
+        tkz = wp.ScopedTimer("kz")
+        ts0 = wp.ScopedTimer("s0")
+        tsf = wp.ScopedTimer("sf")
+        tsv = wp.ScopedTimer("sv")
+        tsm = wp.ScopedTimer("sm")
+        tsa = wp.ScopedTimer("sa")
+        tsz = wp.ScopedTimer("sz")
+        wp.synchronize_device()
+        with tk0:
+            for _ in range(num_launches):
+                wp.launch(k0, dim=1, inputs=[])
+        wp.synchronize_device()
+        with tkf:
+            for _ in range(num_launches):
+                wp.launch(kf, dim=1, inputs=[x, y, z])
+        wp.synchronize_device()
+        with tkv:
+            for _ in range(num_launches):
+                wp.launch(kv, dim=1, inputs=[u, v, w])
+        wp.synchronize_device()
+        with tkm:
+            for _ in range(num_launches):
+                wp.launch(km, dim=1, inputs=[M, N, O])
+        wp.synchronize_device()
+        with tka:
+            for _ in range(num_launches):
+                wp.launch(ka, dim=1, inputs=[a, b, c])
+        wp.synchronize_device()
+        with tkz:
+            for _ in range(num_launches):
+                wp.launch(kz, dim=1, inputs=[a, b, c, x, y, z, u, v, w])
+        # structs
+        wp.synchronize_device()
+        with ts0:
+            for _ in range(num_launches):
+                wp.launch(ks0, dim=1, inputs=[s0])
+        wp.synchronize_device()
+        with tsf:
+            for _ in range(num_launches):
+                wp.launch(ksf, dim=1, inputs=[sf])
+        wp.synchronize_device()
+        with tsv:
+            for _ in range(num_launches):
+                wp.launch(ksv, dim=1, inputs=[sv])
+        wp.synchronize_device()
+        with tsm:
+            for _ in range(num_launches):
+                wp.launch(ksm, dim=1, inputs=[sm])
+        wp.synchronize_device()
+        with tsa:
+            for _ in range(num_launches):
+                wp.launch(ksa, dim=1, inputs=[sa])
+        wp.synchronize_device()
+        with tsz:
+            for _ in range(num_launches):
+                wp.launch(ksz, dim=1, inputs=[sz])
+        wp.synchronize_device()
+        timers = [
+            [tk0, ts0],
+            [tkf, tsf],
+            [tkv, tsv],
+            [tkm, tsm],
+            [tka, tsa],
+            [tkz, tsz],
+        ]
+        print("--------------------------------")
+        print("| args |    direct |    struct |")
+        print("--------------------------------")
+        for tk, ts in timers:
+            print(f"|  {tk.name}  |{tk.elapsed:10.0f} |{ts.elapsed:10.0f} |")
+        print("--------------------------------")

warp/examples/core/example_dem.py ADDED Viewed

@@ -0,0 +1,221 @@
+# Copyright (c) 2022 NVIDIA CORPORATION.  All rights reserved.
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+###########################################################################
+# Example DEM
+#
+# Shows how to implement a DEM particle simulation with cohesion between
+# particles. Neighbors are found using the wp.HashGrid class, and
+# wp.hash_grid_query(), wp.hash_grid_query_next() kernel methods.
+#
+###########################################################################
+import os
+import numpy as np
+import warp as wp
+import warp.render
+wp.init()
+@wp.func
+def contact_force(n: wp.vec3, v: wp.vec3, c: float, k_n: float, k_d: float, k_f: float, k_mu: float):
+    vn = wp.dot(n, v)
+    jn = c * k_n
+    jd = min(vn, 0.0) * k_d
+    # contact force
+    fn = jn + jd
+    # friction force
+    vt = v - n * vn
+    vs = wp.length(vt)
+    if vs > 0.0:
+        vt = vt / vs
+    # Coulomb condition
+    ft = wp.min(vs * k_f, k_mu * wp.abs(fn))
+    # total force
+    return -n * fn - vt * ft
+@wp.kernel
+def apply_forces(
+    grid: wp.uint64,
+    particle_x: wp.array(dtype=wp.vec3),
+    particle_v: wp.array(dtype=wp.vec3),
+    particle_f: wp.array(dtype=wp.vec3),
+    radius: float,
+    k_contact: float,
+    k_damp: float,
+    k_friction: float,
+    k_mu: float,
+):
+    tid = wp.tid()
+    # order threads by cell
+    i = wp.hash_grid_point_id(grid, tid)
+    x = particle_x[i]
+    v = particle_v[i]
+    f = wp.vec3()
+    # ground contact
+    n = wp.vec3(0.0, 1.0, 0.0)
+    c = wp.dot(n, x)
+    cohesion_ground = 0.02
+    cohesion_particle = 0.0075
+    if c < cohesion_ground:
+        f = f + contact_force(n, v, c, k_contact, k_damp, 100.0, 0.5)
+    # particle contact
+    neighbors = wp.hash_grid_query(grid, x, radius * 5.0)
+    for index in neighbors:
+        if index != i:
+            # compute distance to point
+            n = x - particle_x[index]
+            d = wp.length(n)
+            err = d - radius * 2.0
+            if err <= cohesion_particle:
+                n = n / d
+                vrel = v - particle_v[index]
+                f = f + contact_force(n, vrel, err, k_contact, k_damp, k_friction, k_mu)
+    particle_f[i] = f
+@wp.kernel
+def integrate(
+    x: wp.array(dtype=wp.vec3),
+    v: wp.array(dtype=wp.vec3),
+    f: wp.array(dtype=wp.vec3),
+    gravity: wp.vec3,
+    dt: float,
+    inv_mass: float,
+):
+    tid = wp.tid()
+    v_new = v[tid] + f[tid] * inv_mass * dt + gravity * dt
+    x_new = x[tid] + v_new * dt
+    v[tid] = v_new
+    x[tid] = x_new
+class Example:
+    def __init__(self, stage):
+        self.frame_dt = 1.0 / 60
+        self.frame_count = 400
+        self.sim_substeps = 64
+        self.sim_dt = self.frame_dt / self.sim_substeps
+        self.sim_steps = self.frame_count * self.sim_substeps
+        self.sim_time = 0.0
+        self.point_radius = 0.1
+        self.k_contact = 8000.0
+        self.k_damp = 2.0
+        self.k_friction = 1.0
+        self.k_mu = 100000.0  # for cohesive materials
+        self.inv_mass = 64.0
+        self.grid = wp.HashGrid(128, 128, 128)
+        self.grid_cell_size = self.point_radius * 5.0
+        self.points = self.particle_grid(32, 128, 32, (0.0, 0.3, 0.0), self.point_radius, 0.1)
+        self.x = wp.array(self.points, dtype=wp.vec3)
+        self.v = wp.array(np.ones([len(self.x), 3]) * np.array([0.0, 0.0, 10.0]), dtype=wp.vec3)
+        self.f = wp.zeros_like(self.v)
+        self.renderer = None
+        if stage is not None:
+            self.renderer = wp.render.UsdRenderer(stage)
+            self.renderer.render_ground()
+        self.use_graph = wp.get_device().is_cuda
+        if self.use_graph:
+            with wp.ScopedCapture() as capture:
+                self.simulate()
+            self.graph = capture.graph
+    def simulate(self):
+        for _ in range(self.sim_substeps):
+            wp.launch(
+                kernel=apply_forces,
+                dim=len(self.x),
+                inputs=[
+                    self.grid.id,
+                    self.x,
+                    self.v,
+                    self.f,
+                    self.point_radius,
+                    self.k_contact,
+                    self.k_damp,
+                    self.k_friction,
+                    self.k_mu,
+                ],
+            )
+            wp.launch(
+                kernel=integrate,
+                dim=len(self.x),
+                inputs=[self.x, self.v, self.f, (0.0, -9.8, 0.0), self.sim_dt, self.inv_mass],
+            )
+    def step(self):
+        with wp.ScopedTimer("step", active=True):
+            with wp.ScopedTimer("grid build", active=False):
+                self.grid.build(self.x, self.grid_cell_size)
+            if self.use_graph:
+                wp.capture_launch(self.graph)
+            else:
+                self.simulate()
+            self.sim_time += self.frame_dt
+    def render(self):
+        if self.renderer is None:
+            return
+        with wp.ScopedTimer("render", active=True):
+            self.renderer.begin_frame(self.sim_time)
+            self.renderer.render_points(points=self.x.numpy(), radius=self.point_radius, name="points", colors=((0.8, 0.3, 0.2),) * len(self.x))
+            self.renderer.end_frame()
+    # creates a grid of particles
+    def particle_grid(self, dim_x, dim_y, dim_z, lower, radius, jitter):
+        points = np.meshgrid(np.linspace(0, dim_x, dim_x), np.linspace(0, dim_y, dim_y), np.linspace(0, dim_z, dim_z))
+        points_t = np.array((points[0], points[1], points[2])).T * radius * 2.0 + np.array(lower)
+        points_t = points_t + np.random.rand(*points_t.shape) * radius * jitter
+        return points_t.reshape((-1, 3))
+if __name__ == "__main__":
+    stage_path = os.path.join(os.path.dirname(__file__), "example_dem.usd")
+    example = Example(stage_path)
+    for i in range(example.frame_count):
+        example.step()
+        example.render()
+    if example.renderer:
+        example.renderer.save()