PyPI - warp-lang - Versions diffs - 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show

warp/__init__.py +7 -1
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +410 -0
warp/build_dll.py +6 -14
warp/builtins.py +452 -362
warp/codegen.py +179 -119
warp/config.py +42 -6
warp/context.py +490 -271
warp/dlpack.py +8 -6
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/fem/example_apic_fluid.py +1 -1
warp/examples/fem/example_burgers.py +2 -2
warp/examples/fem/example_deformed_geometry.py +1 -1
warp/examples/fem/example_distortion_energy.py +1 -1
warp/examples/fem/example_magnetostatics.py +6 -6
warp/examples/fem/utils.py +9 -3
warp/examples/interop/example_jax_callable.py +116 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +205 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_matmul.py +2 -4
warp/fem/__init__.py +11 -1
warp/fem/adaptivity.py +4 -4
warp/fem/field/nodal_field.py +22 -68
warp/fem/field/virtual.py +62 -23
warp/fem/geometry/adaptive_nanogrid.py +9 -10
warp/fem/geometry/closest_point.py +1 -1
warp/fem/geometry/deformed_geometry.py +5 -2
warp/fem/geometry/geometry.py +5 -0
warp/fem/geometry/grid_2d.py +12 -12
warp/fem/geometry/grid_3d.py +12 -15
warp/fem/geometry/hexmesh.py +5 -7
warp/fem/geometry/nanogrid.py +9 -11
warp/fem/geometry/quadmesh.py +13 -13
warp/fem/geometry/tetmesh.py +3 -4
warp/fem/geometry/trimesh.py +3 -8
warp/fem/integrate.py +262 -93
warp/fem/linalg.py +5 -5
warp/fem/quadrature/pic_quadrature.py +37 -22
warp/fem/quadrature/quadrature.py +194 -25
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_function_space.py +4 -2
warp/fem/space/basis_space.py +25 -18
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +6 -2
warp/fem/space/quadmesh_function_space.py +8 -8
warp/fem/space/shape/cube_shape_function.py +23 -23
warp/fem/space/shape/square_shape_function.py +12 -12
warp/fem/space/shape/triangle_shape_function.py +1 -1
warp/fem/space/tetmesh_function_space.py +3 -3
warp/fem/space/trimesh_function_space.py +2 -2
warp/fem/utils.py +12 -6
warp/jax.py +14 -1
warp/jax_experimental/__init__.py +16 -0
warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
warp/jax_experimental/ffi.py +698 -0
warp/jax_experimental/xla_ffi.py +602 -0
warp/math.py +89 -0
warp/native/array.h +13 -0
warp/native/builtin.h +29 -3
warp/native/bvh.cpp +3 -1
warp/native/bvh.cu +42 -14
warp/native/bvh.h +2 -1
warp/native/clang/clang.cpp +30 -3
warp/native/cuda_util.cpp +14 -0
warp/native/cuda_util.h +2 -0
warp/native/exports.h +68 -63
warp/native/intersect.h +26 -26
warp/native/intersect_adj.h +33 -33
warp/native/marching.cu +1 -1
warp/native/mat.h +513 -9
warp/native/mesh.h +10 -10
warp/native/quat.h +99 -11
warp/native/rand.h +6 -0
warp/native/sort.cpp +122 -59
warp/native/sort.cu +152 -15
warp/native/sort.h +8 -1
warp/native/sparse.cpp +43 -22
warp/native/sparse.cu +52 -17
warp/native/svd.h +116 -0
warp/native/tile.h +301 -105
warp/native/tile_reduce.h +46 -3
warp/native/vec.h +68 -7
warp/native/volume.cpp +85 -113
warp/native/volume_builder.cu +25 -10
warp/native/volume_builder.h +6 -0
warp/native/warp.cpp +5 -6
warp/native/warp.cu +99 -10
warp/native/warp.h +19 -10
warp/optim/linear.py +10 -10
warp/sim/articulation.py +4 -4
warp/sim/collide.py +21 -10
warp/sim/import_mjcf.py +449 -155
warp/sim/import_urdf.py +32 -12
warp/sim/integrator_euler.py +5 -5
warp/sim/integrator_featherstone.py +3 -10
warp/sim/integrator_vbd.py +207 -2
warp/sim/integrator_xpbd.py +5 -5
warp/sim/model.py +42 -13
warp/sim/utils.py +2 -2
warp/sparse.py +642 -555
warp/stubs.py +216 -19
warp/tests/__main__.py +0 -15
warp/tests/cuda/__init__.py +0 -0
warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
warp/tests/interop/__init__.py +0 -0
warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
warp/tests/sim/__init__.py +0 -0
warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
warp/tests/{test_model.py → sim/test_model.py} +40 -0
warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
warp/tests/sim/test_vbd.py +597 -0
warp/tests/test_bool.py +1 -1
warp/tests/test_examples.py +28 -36
warp/tests/test_fem.py +23 -4
warp/tests/test_linear_solvers.py +0 -11
warp/tests/test_mat.py +233 -79
warp/tests/test_mat_scalar_ops.py +4 -4
warp/tests/test_overwrite.py +0 -60
warp/tests/test_quat.py +67 -46
warp/tests/test_rand.py +44 -37
warp/tests/test_sparse.py +47 -6
warp/tests/test_spatial.py +75 -0
warp/tests/test_static.py +1 -1
warp/tests/test_utils.py +84 -4
warp/tests/test_vec.py +46 -34
warp/tests/tile/__init__.py +0 -0
warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
warp/tests/unittest_serial.py +1 -0
warp/tests/unittest_suites.py +45 -59
warp/tests/unittest_utils.py +2 -1
warp/thirdparty/unittest_parallel.py +3 -1
warp/types.py +110 -658
warp/utils.py +137 -72
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
warp/examples/optim/example_walker.py +0 -317
warp/native/cutlass_gemm.cpp +0 -43
warp/native/cutlass_gemm.cu +0 -382
warp/tests/test_matmul.py +0 -511
warp/tests/test_matmul_lite.py +0 -411
warp/tests/test_vbd.py +0 -386
warp/tests/unused_test_misc.py +0 -77
/warp/tests/{test_async.py → cuda/test_async.py} +0 -0
/warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
/warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
/warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
/warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
/warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
/warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
/warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
/warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
/warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
/warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
/warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
/warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
/warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
/warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
/warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
/warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
/warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0

warp/utils.py CHANGED Viewed

@@ -13,13 +13,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 import cProfile
 import ctypes
 import os
 import sys
 import time
 import warnings
-from typing import Any, Optional
+from typing import Any, Callable, Dict, List, Optional, Union
 import numpy as np
@@ -141,6 +143,8 @@ def radix_sort_pairs(keys, values, count: int):
             runtime.core.radix_sort_pairs_int_host(keys.ptr, values.ptr, count)
         elif keys.dtype == wp.float32 and values.dtype == wp.int32:
             runtime.core.radix_sort_pairs_float_host(keys.ptr, values.ptr, count)
+        elif keys.dtype == wp.int64 and values.dtype == wp.int32:
+            runtime.core.radix_sort_pairs_int64_host(keys.ptr, values.ptr, count)
         else:
             raise RuntimeError("Unsupported data type")
     elif keys.device.is_cuda:
@@ -148,6 +152,90 @@ def radix_sort_pairs(keys, values, count: int):
             runtime.core.radix_sort_pairs_int_device(keys.ptr, values.ptr, count)
         elif keys.dtype == wp.float32 and values.dtype == wp.int32:
             runtime.core.radix_sort_pairs_float_device(keys.ptr, values.ptr, count)
+        elif keys.dtype == wp.int64 and values.dtype == wp.int32:
+            runtime.core.radix_sort_pairs_int64_device(keys.ptr, values.ptr, count)
+        else:
+            raise RuntimeError("Unsupported data type")
+def segmented_sort_pairs(
+    keys,
+    values,
+    count: int,
+    segment_start_indices: wp.array(dtype=wp.int32),
+    segment_end_indices: wp.array(dtype=wp.int32) = None,
+):
+    """Sort key-value pairs within segments.
+    This function performs a segmented sort of key-value pairs, where the sorting is done independently within each segment.
+    The segments are defined by their start and optionally end indices.
+    Args:
+        keys: Array of keys to sort. Must be of type int32 or float32.
+        values: Array of values to sort along with keys. Must be of type int32.
+        count: Number of elements to sort.
+        segment_start_indices: Array containing start index of each segment. Must be of type int32.
+            If segment_end_indices is None, this array must have length at least num_segments + 1,
+            and segment_end_indices will be inferred as segment_start_indices[1:].
+            If segment_end_indices is provided, this array must have length at least num_segments.
+        segment_end_indices: Optional array containing end index of each segment. Must be of type int32 if provided.
+            If None, segment_end_indices will be inferred from segment_start_indices[1:].
+            If provided, must have length at least num_segments.
+    Raises:
+        RuntimeError: If array storage devices don't match, if storage size is insufficient,
+                     if segment_start_indices is not of type int32, or if data types are unsupported.
+    """
+    if keys.device != values.device:
+        raise RuntimeError("Array storage devices do not match")
+    if count == 0:
+        return
+    if keys.size < 2 * count or values.size < 2 * count:
+        raise RuntimeError("Array storage must be large enough to contain 2*count elements")
+    from warp.context import runtime
+    if segment_start_indices.dtype != wp.int32:
+        raise RuntimeError("segment_start_indices array must be of type int32")
+    # Handle case where segment_end_indices is not provided
+    if segment_end_indices is None:
+        num_segments = max(0, segment_start_indices.size - 1)
+        segment_end_indices = segment_start_indices[1:]
+        segment_end_indices_ptr = segment_end_indices.ptr
+        segment_start_indices_ptr = segment_start_indices.ptr
+    else:
+        if segment_end_indices.dtype != wp.int32:
+            raise RuntimeError("segment_end_indices array must be of type int32")
+        num_segments = segment_start_indices.size
+        segment_end_indices_ptr = segment_end_indices.ptr
+        segment_start_indices_ptr = segment_start_indices.ptr
+    if keys.device.is_cpu:
+        if keys.dtype == wp.int32 and values.dtype == wp.int32:
+            runtime.core.segmented_sort_pairs_int_host(
+                keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
+            )
+        elif keys.dtype == wp.float32 and values.dtype == wp.int32:
+            runtime.core.segmented_sort_pairs_float_host(
+                keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
+            )
+        else:
+            raise RuntimeError("Unsupported data type")
+    elif keys.device.is_cuda:
+        if keys.dtype == wp.int32 and values.dtype == wp.int32:
+            runtime.core.segmented_sort_pairs_int_device(
+                keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
+            )
+        elif keys.dtype == wp.float32 and values.dtype == wp.int32:
+            runtime.core.segmented_sort_pairs_float_device(
+                keys.ptr, values.ptr, count, segment_start_indices_ptr, segment_end_indices_ptr, num_segments
+            )
         else:
             raise RuntimeError("Unsupported data type")
@@ -673,37 +761,38 @@ class ScopedTimer:
     def __init__(
         self,
-        name,
-        active=True,
-        print=True,
-        detailed=False,
-        dict=None,
-        use_nvtx=False,
-        color="rapids",
-        synchronize=False,
-        cuda_filter=0,
-        report_func=None,
-        skip_tape=False,
+        name: str,
+        active: bool = True,
+        print: bool = True,
+        detailed: bool = False,
+        dict: Optional[Dict[str, List[float]]] = None,
+        use_nvtx: bool = False,
+        color: Union[int, str] = "rapids",
+        synchronize: bool = False,
+        cuda_filter: int = 0,
+        report_func: Optional[Callable[[List[TimingResult], str], None]] = None,
+        skip_tape: bool = False,
     ):
         """Context manager object for a timer
         Parameters:
-            name (str): Name of timer
-            active (bool): Enables this timer
-            print (bool): At context manager exit, print elapsed time to sys.stdout
-            detailed (bool): Collects additional profiling data using cProfile and calls ``print_stats()`` at context exit
-            dict (dict): A dictionary of lists to which the elapsed time will be appended using ``name`` as a key
-            use_nvtx (bool): If true, timing functionality is replaced by an NVTX range
-            color (int or str): ARGB value (e.g. 0x00FFFF) or color name (e.g. 'cyan') associated with the NVTX range
-            synchronize (bool): Synchronize the CPU thread with any outstanding CUDA work to return accurate GPU timings
-            cuda_filter (int): Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
-            report_func (Callable): A callback function to print the activity report (``wp.timing_print()`` is used by default)
-            skip_tape (bool): If true, the timer will not be recorded in the tape
+            name: Name of timer
+            active: Enables this timer
+            print: At context manager exit, print elapsed time to ``sys.stdout``
+            detailed: Collects additional profiling data using cProfile and calls ``print_stats()`` at context exit
+            dict: A dictionary of lists to which the elapsed time will be appended using ``name`` as a key
+            use_nvtx: If true, timing functionality is replaced by an NVTX range
+            color: ARGB value (e.g. 0x00FFFF) or color name (e.g. 'cyan') associated with the NVTX range
+            synchronize: Synchronize the CPU thread with any outstanding CUDA work to return accurate GPU timings
+            cuda_filter: Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
+            report_func: A callback function to print the activity report.
+              If ``None``,  :func:`wp.timing_print() <timing_print>` will be used.
+            skip_tape: If true, the timer will not be recorded in the tape
         Attributes:
             extra_msg (str): Can be set to a string that will be added to the printout at context exit.
             elapsed (float): The duration of the ``with`` block used with this object
-            timing_results (list[TimingResult]): The list of activity timing results, if collection was requested using ``cuda_filter``
+            timing_results (List[TimingResult]): The list of activity timing results, if collection was requested using ``cuda_filter``
         """
         self.name = name
         self.active = active and self.enabled
@@ -799,7 +888,7 @@ class ScopedTimer:
 # Allow temporarily enabling/disabling mempool allocators
 class ScopedMempool:
-    def __init__(self, device, enable: bool):
+    def __init__(self, device: Devicelike, enable: bool):
         self.device = wp.get_device(device)
         self.enable = enable
@@ -813,7 +902,7 @@ class ScopedMempool:
 # Allow temporarily enabling/disabling mempool access
 class ScopedMempoolAccess:
-    def __init__(self, target_device, peer_device, enable: bool):
+    def __init__(self, target_device: Devicelike, peer_device: Devicelike, enable: bool):
         self.target_device = target_device
         self.peer_device = peer_device
         self.enable = enable
@@ -828,7 +917,7 @@ class ScopedMempoolAccess:
 # Allow temporarily enabling/disabling peer access
 class ScopedPeerAccess:
-    def __init__(self, target_device, peer_device, enable: bool):
+    def __init__(self, target_device: Devicelike, peer_device: Devicelike, enable: bool):
         self.target_device = target_device
         self.peer_device = peer_device
         self.enable = enable
@@ -842,7 +931,7 @@ class ScopedPeerAccess:
 class ScopedCapture:
-    def __init__(self, device=None, stream=None, force_module_load=None, external=False):
+    def __init__(self, device: Devicelike = None, stream=None, force_module_load=None, external=False):
         self.device = device
         self.stream = stream
         self.force_module_load = force_module_load
@@ -868,27 +957,6 @@ class ScopedCapture:
                 self.active = False
-# helper kernels for adj_matmul
-@wp.kernel
-def add_kernel_2d(x: wp.array2d(dtype=Any), acc: wp.array2d(dtype=Any), beta: Any):
-    i, j = wp.tid()
-    x[i, j] = x[i, j] + beta * acc[i, j]
-@wp.kernel
-def add_kernel_3d(x: wp.array3d(dtype=Any), acc: wp.array3d(dtype=Any), beta: Any):
-    i, j, k = wp.tid()
-    x[i, j, k] = x[i, j, k] + beta * acc[i, j, k]
-# explicit instantiations of generic kernels for adj_matmul
-for T in [wp.float16, wp.float32, wp.float64]:
-    wp.overload(add_kernel_2d, [wp.array2d(dtype=T), wp.array2d(dtype=T), T])
-    wp.overload(add_kernel_3d, [wp.array3d(dtype=T), wp.array3d(dtype=T), T])
 def check_p2p():
     """Check if the machine is configured properly for peer-to-peer transfers.
@@ -927,31 +995,28 @@ class timing_result_t(ctypes.Structure):
 class TimingResult:
-    """Timing result for a single activity.
+    """Timing result for a single activity."""
-    Parameters:
-        raw_result (warp.utils.timing_result_t): The result structure obtained from C++ (internal use only)
+    def __init__(self, device, name, filter, elapsed):
+        self.device: warp.context.Device = device
+        """The device where the activity was recorded."""
-    Attributes:
-        device (warp.Device): The device where the activity was recorded.
-        name (str): The activity name.
-        filter (int): The type of activity (e.g., ``warp.TIMING_KERNEL``).
-        elapsed (float): The elapsed time in milliseconds.
-    """
+        self.name: str = name
+        """The activity name."""
-    def __init__(self, device, name, filter, elapsed):
-        self.device = device
-        self.name = name
-        self.filter = filter
-        self.elapsed = elapsed
+        self.filter: int = filter
+        """The type of activity (e.g., ``warp.TIMING_KERNEL``)."""
+        self.elapsed: float = elapsed
+        """The elapsed time in milliseconds."""
-def timing_begin(cuda_filter=TIMING_ALL, synchronize=True):
+def timing_begin(cuda_filter: int = TIMING_ALL, synchronize: bool = True) -> None:
     """Begin detailed activity timing.
     Parameters:
-        cuda_filter (int): Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
-        synchronize (bool): Whether to synchronize all CUDA devices before timing starts
+        cuda_filter: Filter flags for CUDA activity timing, e.g. ``warp.TIMING_KERNEL`` or ``warp.TIMING_ALL``
+        synchronize: Whether to synchronize all CUDA devices before timing starts
     """
     if synchronize:
@@ -960,14 +1025,14 @@ def timing_begin(cuda_filter=TIMING_ALL, synchronize=True):
     warp.context.runtime.core.cuda_timing_begin(cuda_filter)
-def timing_end(synchronize=True):
+def timing_end(synchronize: bool = True) -> List[TimingResult]:
     """End detailed activity timing.
     Parameters:
-        synchronize (bool): Whether to synchronize all CUDA devices before timing ends
+        synchronize: Whether to synchronize all CUDA devices before timing ends
     Returns:
-        list[TimingResult]: A list of ``TimingResult`` objects for all recorded activities.
+        A list of :class:`TimingResult` objects for all recorded activities.
     """
     if synchronize:
@@ -1006,12 +1071,12 @@ def timing_end(synchronize=True):
     return results
-def timing_print(results, indent=""):
+def timing_print(results: List[TimingResult], indent: str = "") -> None:
     """Print timing results.
     Parameters:
-        results (list[TimingResult]): List of ``TimingResult`` objects.
-        indent (str): Optional indentation for the output.
+        results: List of :class:`TimingResult` objects to print.
+        indent: Optional indentation to prepend to all output lines.
     """
     if not results:

{warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
-Metadata-Version: 2.2
+Metadata-Version: 2.4
 Name: warp-lang
-Version: 1.6.2
+Version: 1.7.0
 Summary: A Python framework for high-performance simulation and graphics programming
 Author-email: NVIDIA Corporation <warp-python@nvidia.com>
 License: Apache-2.0
@@ -32,6 +32,7 @@ Requires-Dist: usd-core; extra == "extras"
 Requires-Dist: matplotlib; extra == "extras"
 Requires-Dist: pillow; extra == "extras"
 Requires-Dist: pyglet; extra == "extras"
+Dynamic: license-file
 [![PyPI version](https://badge.fury.io/py/warp-lang.svg)](https://badge.fury.io/py/warp-lang)
 [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
@@ -81,9 +82,9 @@ the `pip install` command, e.g.
 | Platform        | Install Command                                                                                                               |
 | --------------- | ----------------------------------------------------------------------------------------------------------------------------- |
-| Linux aarch64   | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.2/warp_lang-1.6.2+cu11-py3-none-manylinux2014_aarch64.whl` |
-| Linux x86-64    | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.2/warp_lang-1.6.2+cu11-py3-none-manylinux2014_x86_64.whl`  |
-| Windows x86-64  | `pip install https://github.com/NVIDIA/warp/releases/download/v1.6.2/warp_lang-1.6.2+cu11-py3-none-win_amd64.whl`             |
+| Linux aarch64   | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.0/warp_lang-1.7.0+cu11-py3-none-manylinux2014_aarch64.whl` |
+| Linux x86-64    | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.0/warp_lang-1.7.0+cu11-py3-none-manylinux2014_x86_64.whl`  |
+| Windows x86-64  | `pip install https://github.com/NVIDIA/warp/releases/download/v1.7.0/warp_lang-1.7.0+cu11-py3-none-win_amd64.whl`             |
 The `--force-reinstall` option may need to be used to overwrite a previous installation.
@@ -99,6 +100,15 @@ pip install -U --pre warp-lang --extra-index-url=https://pypi.nvidia.com/
 Note that the nightly builds are built with the CUDA 12 runtime and are not published for macOS.
+If you plan to install nightly builds regularly, you can simplify future installations by adding NVIDIA's package
+repository as an extra index via the `PIP_EXTRA_INDEX_URL` environment variable. For example:
+```text
+export PIP_EXTRA_INDEX_URL="https://pypi.nvidia.com"
+```
+This ensures the index is automatically used for `pip` commands, avoiding the need to specify it explicitly.
 ### CUDA Requirements
 * Warp packages built with CUDA Toolkit 11.x require NVIDIA driver 470 or newer.
@@ -250,16 +260,16 @@ python -m warp.tests
             <td align="center">raymarch</td>
         </tr>
         <tr>
+            <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_sample_mesh.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_sample_mesh.png"></a></td>
             <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_sph.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_sph.png"></a></td>
             <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_torch.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_torch.png"></a></td>
             <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/core/example_wave.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/core_wave.png"></a></td>
-            <td></td>
         </tr>
         <tr>
+            <td align="center">sample mesh</td>
             <td align="center">sph</td>
             <td align="center">torch</td>
             <td align="center">wave</td>
-            <td align="center"></td>
         </tr>
     </tbody>
 </table>
@@ -323,6 +333,18 @@ python -m warp.tests
             <td align="center">trajectory</td>
             <td align="center">soft body properties</td>
         </tr>
+        <tr>
+            <td><a href="https://github.com/NVIDIA/warp/tree/main/warp/examples/optim/example_fluid_checkpoint.py"><img src="https://media.githubusercontent.com/media/NVIDIA/warp/refs/heads/main/docs/img/examples/optim_fluid_checkpoint.png"></a></td>
+            <td></td>
+            <td></td>
+            <td></td>
+        </tr>
+        <tr>
+            <td align="center">fluid checkpoint</td>
+            <td align="center"></td>
+            <td align="center"></td>
+            <td align="center"></td>
+        </tr>
     </tbody>
 </table>