PyPI - warp-lang - Versions diffs - 1.7.0__py3-none-macosx_10_13_universal2.whl → 1.7.2rc1__py3-none-macosx_10_13_universal2.whl - Mend

warp-lang 1.7.0__py3-none-macosx_10_13_universal2.whl → 1.7.2rc1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (60) hide show

warp/autograd.py +12 -2
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +1 -1
warp/builtins.py +103 -66
warp/codegen.py +48 -27
warp/config.py +1 -1
warp/context.py +112 -49
warp/examples/benchmarks/benchmark_cloth.py +1 -1
warp/examples/distributed/example_jacobi_mpi.py +507 -0
warp/fem/cache.py +1 -1
warp/fem/field/field.py +11 -1
warp/fem/field/nodal_field.py +36 -22
warp/fem/geometry/adaptive_nanogrid.py +7 -3
warp/fem/geometry/trimesh.py +4 -12
warp/jax_experimental/custom_call.py +14 -2
warp/jax_experimental/ffi.py +100 -67
warp/native/builtin.h +91 -65
warp/native/svd.h +59 -49
warp/native/tile.h +55 -26
warp/native/volume.cpp +2 -2
warp/native/volume_builder.cu +33 -22
warp/native/warp.cu +1 -1
warp/render/render_opengl.py +41 -34
warp/render/render_usd.py +96 -6
warp/sim/collide.py +11 -9
warp/sim/inertia.py +189 -156
warp/sim/integrator_euler.py +3 -0
warp/sim/integrator_xpbd.py +3 -0
warp/sim/model.py +56 -31
warp/sim/render.py +4 -0
warp/sparse.py +1 -1
warp/stubs.py +73 -25
warp/tests/assets/torus.usda +1 -1
warp/tests/cuda/test_streams.py +1 -1
warp/tests/sim/test_collision.py +237 -206
warp/tests/sim/test_inertia.py +161 -0
warp/tests/sim/test_model.py +5 -3
warp/tests/sim/{flaky_test_sim_grad.py → test_sim_grad.py} +1 -4
warp/tests/sim/test_xpbd.py +399 -0
warp/tests/test_array.py +8 -7
warp/tests/test_atomic.py +181 -2
warp/tests/test_builtins_resolution.py +38 -38
warp/tests/test_codegen.py +24 -3
warp/tests/test_examples.py +16 -6
warp/tests/test_fem.py +93 -14
warp/tests/test_func.py +1 -1
warp/tests/test_mat.py +416 -119
warp/tests/test_quat.py +321 -137
warp/tests/test_struct.py +116 -0
warp/tests/test_vec.py +320 -174
warp/tests/tile/test_tile.py +27 -0
warp/tests/tile/test_tile_load.py +124 -0
warp/tests/unittest_suites.py +2 -5
warp/types.py +107 -9
{warp_lang-1.7.0.dist-info → warp_lang-1.7.2rc1.dist-info}/METADATA +41 -19
{warp_lang-1.7.0.dist-info → warp_lang-1.7.2rc1.dist-info}/RECORD +60 -57
{warp_lang-1.7.0.dist-info → warp_lang-1.7.2rc1.dist-info}/WHEEL +1 -1
{warp_lang-1.7.0.dist-info → warp_lang-1.7.2rc1.dist-info}/licenses/LICENSE.md +0 -26
{warp_lang-1.7.0.dist-info → warp_lang-1.7.2rc1.dist-info}/top_level.txt +0 -0

warp/native/tile.h CHANGED Viewed

@@ -219,8 +219,8 @@ struct tile_coord_t
 {
     int indices[N];
-    CUDA_CALLABLE inline int operator[](int i) const { assert(0 <= 1 && i < N); return indices[i]; }
-    CUDA_CALLABLE inline int& operator[](int i) { assert(0 <= 1 && i < N); return indices[i]; }
+    CUDA_CALLABLE inline int operator[](int i) const { assert(0 <= i && i < N); return indices[i]; }
+    CUDA_CALLABLE inline int& operator[](int i) { assert(0 <= i && i < N); return indices[i]; }
     CUDA_CALLABLE inline tile_coord_t<N> operator + (const tile_coord_t<N>& c) const
     {
@@ -1133,17 +1133,18 @@ struct tile_shared_t
             constexpr int lastdim = Layout::Shape::N-1;
             constexpr bool contiguous_src = Layout::Stride::dim(lastdim) == 1;
             const bool contiguous_dest = dest.data.strides[lastdim] == sizeof(T);
-            const int elements = (dest.data.shape[lastdim] - dest.offset[lastdim]);
+            const int elements = min(Layout::Shape::dim(1), (dest.data.shape[lastdim] - dest.offset[lastdim]));
             const bool aligned_size = (elements*sizeof(T))%sizeof(float4) == 0;
+            const bool aligned_stride = (dest.data.strides[0]/sizeof(T))%Layout::Stride::dim(0) == 0;
             float4* dest128 = (float4*)&dest.data.data[dest.index_from_coord(tile_coord(0,0))];
             const bool aligned_dst = (uint64_t)(dest128)%sizeof(float4) == 0;
-            if (contiguous_dest && contiguous_src && aligned_size && aligned_dst)
-            {
-                constexpr int M = Layout::Shape::dim(0);
-                constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
+            constexpr int M = Layout::Shape::dim(0);
+            constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
+            if (contiguous_dest && contiguous_src && aligned_size && aligned_dst && aligned_stride && N)
+            {
                 // alias of shared tile with 128bit type
                 using SrcLayout = tile_layout_strided_t<tile_shape_t<M, N>>;
                 tile_shared_t<float4, SrcLayout> src128((float4*)data.ptr);
@@ -1222,17 +1223,18 @@ struct tile_shared_t
             constexpr int lastdim = Layout::Shape::N-1;
             constexpr bool contiguous_dest = Layout::Stride::dim(lastdim) == 1;
             const bool contiguous_src = src.data.strides[lastdim] == sizeof(T);
-            const int elements = (src.data.shape[lastdim] - src.offset[lastdim]);
+            const int elements = min(Layout::Shape::dim(1), (src.data.shape[lastdim] - src.offset[lastdim]));
             const bool aligned_size = (elements*sizeof(T))%sizeof(float4) == 0;
+            const bool aligned_stride = (src.data.strides[0]/sizeof(T))%Layout::Stride::dim(0) == 0;
             float4* src128 = (float4*)&src.data.data[src.index_from_coord(tile_coord(0,0))];
             const bool aligned_src = (uint64_t)(src128)%sizeof(float4) == 0;
-            if (contiguous_dest && contiguous_src && aligned_size && aligned_src)
-            {
-                constexpr int M = Layout::Shape::dim(0);
-                constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
+            constexpr int M = Layout::Shape::dim(0);
+            constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
+            if (contiguous_dest && contiguous_src && aligned_size && aligned_src && aligned_stride && N)
+            {
                 // alias of shared tile with 128bit type
                 using DestLayout = tile_layout_strided_t<tile_shape_t<M, N>>;
                 tile_shared_t<float4, DestLayout> dest128((float4*)data.ptr);
@@ -1282,13 +1284,13 @@ struct tile_shared_t
     template <typename Global>
     inline CUDA_CALLABLE auto atomic_add(Global& dest)
     {
-        copy_to_register().atomic_add(dest);
+        return copy_to_register().atomic_add(dest);
     }
     template <typename Global>
     inline CUDA_CALLABLE auto atomic_add_grad(Global& dest)
     {
-        grad_to_register().atomic_add_grad(dest);
+        return grad_to_register().atomic_add_grad(dest);
     }
     // overload for integral types
@@ -1682,15 +1684,27 @@ template <typename T, typename Tile>
 inline CUDA_CALLABLE void tile_store(array_t<T>& dest, int x, int y, int z, int w, Tile& src) { src.copy_to_global(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z, w))); }
+// compiler struggles with these if they are one line
 template <typename T, typename Tile>
-inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x))); }
+inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, Tile& src) {
+    tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x));
+    return src.atomic_add(global);
+}
 template <typename T, typename Tile>
-inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y)));}
+inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src) {
+    tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y));
+    return src.atomic_add(global);
+}
 template <typename T, typename Tile>
-inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z)));}
+inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, Tile& src) {
+    tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y, z));
+    return src.atomic_add(global);
+}
 template <typename T, typename Tile>
-inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, int w, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z, w)));}
+inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, int w, Tile& src) {
+    tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y, z, w));
+    return src.atomic_add(global);
+}
 //-------------------------------------
@@ -2468,21 +2482,18 @@ inline CUDA_CALLABLE void assign(TileA& dest, int i, const Scalar& src)
     dest.data(tile_coord(i)) = src;
     WP_TILE_SYNC();
 }
 template <typename TileA, typename Scalar>
 inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, const Scalar& src)
 {
     dest.data(tile_coord(i, j)) = src;
     WP_TILE_SYNC();
 }
 template <typename TileA, typename Scalar>
 inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, const Scalar& src)
 {
     dest.data(tile_coord(i, j, k)) = src;
     WP_TILE_SYNC();
 }
 template <typename TileA, typename Scalar>
 inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, int l, const Scalar& src)
 {
@@ -2490,8 +2501,26 @@ inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, int l, const
     WP_TILE_SYNC();
 }
+template <typename TileA, typename AdjTileA, typename Scalar>
+inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, const Scalar& src, AdjTileA& adj_dest, int adj_i, Scalar& adj_src)
+{
+    adj_src += dest.grad(tile_coord(i));
+}
+template <typename TileA, typename AdjTileA, typename Scalar>
+inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, Scalar& adj_src)
+{
+    adj_src += dest.grad(tile_coord(i, j));
+}
+template <typename TileA, typename AdjTileA, typename Scalar>
+inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, int k, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, int adj_k, Scalar& adj_src)
+{
+    adj_src += dest.grad(tile_coord(i, j, k));
+}
+template <typename TileA, typename AdjTileA, typename Scalar>
+inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, int k, int l, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, int adj_k, int adj_l, Scalar& adj_src)
+{
+    adj_src += dest.grad(tile_coord(i, j, k, l));
+}
 template <typename TileA, typename TileB, typename Coord>
 inline CUDA_CALLABLE void tile_assign(TileA& dest, TileB& src, const Coord& offset)

warp/native/volume.cpp CHANGED Viewed

@@ -87,7 +87,7 @@ void volume_set_map(nanovdb::Map& map, const float transform[9], const float tra
 // NB: buf must be a host pointer
 uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
 {
-    if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t))
+    if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
         return 0; // This cannot be a valid NanoVDB grid with data
     if (!copy && volume_exists(buf))
@@ -138,7 +138,7 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
 // NB: buf must be a pointer on the same device
 uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
 {
-    if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t))
+    if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
         return 0; // This cannot be a valid NanoVDB grid with data
     if (!copy && volume_exists(buf))

warp/native/volume_builder.cu CHANGED Viewed

@@ -43,6 +43,7 @@ struct Allocator
     {
         // in PointsToGrid stream argument always coincide with current stream, ignore
         *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
+        cudaCheckError();
         return cudaSuccess;
     }
@@ -160,6 +161,7 @@ class DeviceBuffer
         {
             mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
         }
+        cudaCheckError();
         mSize = size;
         mManaged = true;
     }
@@ -432,35 +434,44 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
     out_grid = nullptr;
     out_grid_size = 0;
-    cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
-    nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
+    try
+    {
-    // p2g.setVerbose(2);
-    p2g.setGridName(params.name);
-    p2g.setChecksum(nanovdb::CheckMode::Disable);
+        cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
+        nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
-    // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
-    p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
+        // p2g.setVerbose(2);
+        p2g.setGridName(params.name);
+        p2g.setChecksum(nanovdb::CheckMode::Disable);
-    nanovdb::GridHandle<DeviceBuffer> grid_handle;
+        // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
+        p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
-    if (points_in_world_space)
-    {
-        grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), params.map}, num_points,
-                                    DeviceBuffer());
-    }
-    else
-    {
-        grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord *>(points), num_points, DeviceBuffer());
-    }
+        nanovdb::GridHandle<DeviceBuffer> grid_handle;
-    out_grid = grid_handle.deviceGrid<BuildT>();
-    out_grid_size = grid_handle.gridSize();
+        if (points_in_world_space)
+        {
+            grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f*>(points), params.map},
+                                        num_points, DeviceBuffer());
+        }
+        else
+        {
+            grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord*>(points), num_points, DeviceBuffer());
+        }
+        out_grid = grid_handle.deviceGrid<BuildT>();
+        out_grid_size = grid_handle.gridSize();
-    finalize_grid(*out_grid, params);
+        finalize_grid(*out_grid, params);
-    // So that buffer is not destroyed when handles goes out of scope
-    grid_handle.buffer().detachDeviceData();
+        // So that buffer is not destroyed when handles goes out of scope
+        grid_handle.buffer().detachDeviceData();
+    }
+    catch (const std::runtime_error& exc)
+    {
+        out_grid = nullptr;
+        out_grid_size = 0;
+    }
 }

warp/native/warp.cu CHANGED Viewed

@@ -3027,7 +3027,7 @@ size_t cuda_compile_program(const char* cuda_src, const char* program_name, int
                     fprintf(stderr, "Warp error: num_ltoirs > 0 but ltoir_input_types, ltoirs or ltoir_sizes are NULL\n");
                     return size_t(-1);
                 }
-                nvJitLinkHandle handle;
+                nvJitLinkHandle handle = nullptr;
                 std::vector<const char *> lopts = {"-dlto", arch_opt_lto};
                 if (use_ptx) {
                     lopts.push_back("-ptx");

warp/render/render_opengl.py CHANGED Viewed

@@ -13,11 +13,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 import ctypes
 import sys
 import time
 from collections import defaultdict
-from typing import List, Optional, Tuple, Union
+from typing import List, Union
 import numpy as np
@@ -1500,16 +1502,16 @@ class OpenGLRenderer:
     def setup_tiled_rendering(
         self,
-        instances: List[List[int]],
+        instances: list[list[int]],
         rescale_window: bool = False,
-        tile_width: Optional[int] = None,
-        tile_height: Optional[int] = None,
-        tile_ncols: Optional[int] = None,
-        tile_nrows: Optional[int] = None,
-        tile_positions: Optional[List[Tuple[int]]] = None,
-        tile_sizes: Optional[List[Tuple[int]]] = None,
-        projection_matrices: Optional[List[Mat44]] = None,
-        view_matrices: Optional[List[Mat44]] = None,
+        tile_width: int | None = None,
+        tile_height: int | None = None,
+        tile_ncols: int | None = None,
+        tile_nrows: int | None = None,
+        tile_positions: list[tuple[int]] | None = None,
+        tile_sizes: list[tuple[int]] | None = None,
+        projection_matrices: list[Mat44] | None = None,
+        view_matrices: list[Mat44] | None = None,
     ):
         """
         Set up tiled rendering where the render buffer is split into multiple tiles that can visualize
@@ -1602,11 +1604,11 @@ class OpenGLRenderer:
     def update_tile(
         self,
         tile_id,
-        instances: Optional[List[int]] = None,
-        projection_matrix: Optional[Mat44] = None,
-        view_matrix: Optional[Mat44] = None,
-        tile_size: Optional[Tuple[int]] = None,
-        tile_position: Optional[Tuple[int]] = None,
+        instances: list[int] | None = None,
+        projection_matrix: Mat44 | None = None,
+        view_matrix: Mat44 | None = None,
+        tile_size: tuple[int] | None = None,
+        tile_position: tuple[int] | None = None,
     ):
         """
         Update the shape instances, projection matrix, view matrix, tile size, or tile position
@@ -1806,7 +1808,7 @@ class OpenGLRenderer:
         return np.array((scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 1), dtype=np.float32)
-    def update_model_matrix(self, model_matrix: Optional[Mat44] = None):
+    def update_model_matrix(self, model_matrix: Mat44 | None = None):
         gl = OpenGLRenderer.gl
         self._switch_context()
@@ -1988,6 +1990,10 @@ class OpenGLRenderer:
             gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)
             gl.glEnable(gl.GL_BLEND)
+            # disable depth test to fix text rendering
+            # https://github.com/pyglet/pyglet/issues/1302
+            gl.glDisable(gl.GL_DEPTH_TEST)
             text = f"""Sim Time: {self.time:.1f}
 Update FPS: {self._fps_update:.1f}
 Render FPS: {self._fps_render:.1f}
@@ -2001,6 +2007,8 @@ Instances: {len(self._instances)}"""
             self._info_label.y = self.screen_height - 5
             self._info_label.draw()
+            gl.glEnable(gl.GL_DEPTH_TEST)
         for cb in self.render_2d_callbacks:
             cb()
@@ -2339,6 +2347,14 @@ Instances: {len(self._instances)}"""
         colors1 = np.array(colors1, dtype=np.float32)
         colors2 = np.array(colors2, dtype=np.float32)
+        # create color buffers
+        if self._instance_color1_buffer is None:
+            self._instance_color1_buffer = gl.GLuint()
+            gl.glGenBuffers(1, self._instance_color1_buffer)
+        if self._instance_color2_buffer is None:
+            self._instance_color2_buffer = gl.GLuint()
+            gl.glGenBuffers(1, self._instance_color2_buffer)
         gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color1_buffer)
         gl.glBufferData(gl.GL_ARRAY_BUFFER, colors1.nbytes, colors1.ctypes.data, gl.GL_STATIC_DRAW)
@@ -2362,14 +2378,10 @@ Instances: {len(self._instances)}"""
         )
         gl.glUseProgram(self._shape_shader.id)
-        if self._instance_transform_gl_buffer is not None:
-            gl.glDeleteBuffers(1, self._instance_transform_gl_buffer)
-            gl.glDeleteBuffers(1, self._instance_color1_buffer)
-            gl.glDeleteBuffers(1, self._instance_color2_buffer)
-        # create instance buffer and bind it as an instanced array
-        self._instance_transform_gl_buffer = gl.GLuint()
-        gl.glGenBuffers(1, self._instance_transform_gl_buffer)
+        if self._instance_transform_gl_buffer is None:
+            # create instance buffer and bind it as an instanced array
+            self._instance_transform_gl_buffer = gl.GLuint()
+            gl.glGenBuffers(1, self._instance_transform_gl_buffer)
         gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_transform_gl_buffer)
         transforms = np.tile(np.diag(np.ones(4, dtype=np.float32)), (len(self._instances), 1, 1))
@@ -2380,12 +2392,6 @@ Instances: {len(self._instances)}"""
             int(self._instance_transform_gl_buffer.value), self._device
         )
-        # create color buffers
-        self._instance_color1_buffer = gl.GLuint()
-        gl.glGenBuffers(1, self._instance_color1_buffer)
-        self._instance_color2_buffer = gl.GLuint()
-        gl.glGenBuffers(1, self._instance_color2_buffer)
         self.update_instance_colors()
         # set up instance attribute pointers
@@ -2440,7 +2446,7 @@ Instances: {len(self._instances)}"""
         gl.glBindVertexArray(0)
     def update_shape_instance(self, name, pos=None, rot=None, color1=None, color2=None, visible=None):
-        """Update the instance transform of the shape
+        """Update the instance properties of the shape
         Args:
             name: The name of the shape
@@ -2783,8 +2789,9 @@ Instances: {len(self._instances)}"""
                 q = (0.0, 0.0, 0.0, 1.0)
             else:
                 c = np.cross(normal, (0.0, 1.0, 0.0))
-                angle = np.arcsin(np.linalg.norm(c))
-                axis = np.abs(c) / np.linalg.norm(c)
+                angle = wp.float32(np.arcsin(np.linalg.norm(c)))
+                axis = wp.vec3(np.abs(c))
+                axis = wp.normalize(axis)
                 q = wp.quat_from_axis_angle(axis, angle)
         return self.render_plane(
             "ground",
@@ -3092,7 +3099,7 @@ Instances: {len(self._instances)}"""
         parent_body: str = None,
         is_template: bool = False,
         up_axis: int = 1,
-        color: Tuple[float, float, float] = None,
+        color: tuple[float, float, float] = None,
     ):
         """Add a arrow for visualization

warp/render/render_usd.py CHANGED Viewed

@@ -13,10 +13,19 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
 import numpy as np
 import warp as wp
+UP_AXIS_TOKEN = ("X", "Y", "Z")
+UP_AXIS_VEC = (
+    np.array((1.0, 0.0, 0.0), dtype=float),
+    np.array((0.0, 1.0, 0.0), dtype=float),
+    np.array((0.0, 0.0, 1.0), dtype=float),
+)
 def _usd_add_xform(prim):
     from pxr import UsdGeom
@@ -29,7 +38,13 @@ def _usd_add_xform(prim):
     prim.AddScaleOp()
-def _usd_set_xform(xform, pos: tuple, rot: tuple, scale: tuple, time):
+def _usd_set_xform(
+    xform,
+    pos: tuple | None = None,
+    rot: tuple | None = None,
+    scale: tuple | None = None,
+    time: float = 0.0,
+):
     from pxr import Gf, UsdGeom
     xform = UsdGeom.Xform(xform)
@@ -108,7 +123,7 @@ class UsdRenderer:
         self.stage.SetDefaultPrim(self.root.GetPrim())
         self.stage.SetStartTimeCode(0.0)
         self.stage.SetEndTimeCode(0.0)
-        self.stage.SetTimeCodesPerSecond(self.fps)
+        self.stage.SetFramesPerSecond(self.fps)
         if up_axis == "X":
             UsdGeom.SetStageUpAxis(self.stage, UsdGeom.Tokens.x)
@@ -622,7 +637,82 @@ class UsdRenderer:
         return prim_path
-    def render_line_list(self, name, vertices, indices, color, radius):
+    def render_arrow(
+        self,
+        name: str,
+        pos: tuple,
+        rot: tuple,
+        base_radius: float,
+        base_height: float,
+        cap_radius: float = None,
+        cap_height: float = None,
+        parent_body: str = None,
+        is_template: bool = False,
+        up_axis: int = 1,
+        color: tuple[float, float, float] = None,
+        visible: bool = True,
+    ):
+        from pxr import Gf, Sdf, UsdGeom
+        if is_template:
+            prim_path = self._resolve_path(name, parent_body, is_template)
+            blueprint = UsdGeom.Scope.Define(self.stage, prim_path)
+            blueprint_prim = blueprint.GetPrim()
+            blueprint_prim.SetInstanceable(True)
+            blueprint_prim.SetSpecifier(Sdf.SpecifierClass)
+            arrow_path = prim_path.AppendChild("arrow")
+        else:
+            arrow_path = self._resolve_path(name, parent_body)
+            prim_path = arrow_path
+        arrow = UsdGeom.Xform.Get(self.stage, arrow_path)
+        if not arrow:
+            arrow = UsdGeom.Xform.Define(self.stage, arrow_path)
+            _usd_add_xform(arrow)
+        base_path = arrow_path.AppendChild("base")
+        base = UsdGeom.Xform.Get(self.stage, base_path)
+        if not base:
+            base = UsdGeom.Cylinder.Define(self.stage, base_path)
+            _usd_add_xform(base)
+        base.GetRadiusAttr().Set(float(base_radius))
+        base.GetHeightAttr().Set(float(base_height))
+        base.GetAxisAttr().Set(UP_AXIS_TOKEN[up_axis])
+        _usd_set_xform(base, UP_AXIS_VEC[up_axis] * base_height * 0.5)
+        cap_path = arrow_path.AppendChild("cap")
+        cap = UsdGeom.Xform.Get(self.stage, cap_path)
+        if not cap:
+            cap = UsdGeom.Cone.Define(self.stage, arrow_path.AppendChild("cap"))
+            _usd_add_xform(cap)
+        cap.GetRadiusAttr().Set(float(cap_radius))
+        cap.GetHeightAttr().Set(float(cap_height))
+        cap.GetAxisAttr().Set(UP_AXIS_TOKEN[up_axis])
+        _usd_set_xform(cap, UP_AXIS_VEC[up_axis] * (base_height + cap_height * 0.5))
+        if color is not None:
+            base.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
+            cap.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
+        self._shape_constructors[name] = UsdGeom.Xform
+        if not is_template:
+            _usd_set_xform(arrow, pos, rot, (1.0, 1.0, 1.0), self.time)
+        arrow.GetVisibilityAttr().Set("inherited" if visible else "invisible", self.time)
+        return prim_path
+    def render_line_list(
+        self,
+        name: str,
+        vertices,
+        indices,
+        color: tuple = None,
+        radius: float = 0.01,
+        visible: bool = True,
+    ):
         """Debug helper to add a line list as a set of capsules
         Args:
@@ -717,8 +807,8 @@ class UsdRenderer:
         instancer_capsule = UsdGeom.Capsule.Get(self.stage, instancer.GetPath().AppendChild("capsule"))
         instancer_capsule.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
-    def render_points(self, name: str, points, radius, colors=None):
-        from pxr import Gf, UsdGeom
+    def render_points(self, name: str, points, radius, colors=None, as_spheres: bool = True, visible: bool = True):
+        from pxr import Gf, UsdGeom, Vt
         instancer_path = self.root.GetPath().AppendChild(name)
         instancer = UsdGeom.PointInstancer.Get(self.stage, instancer_path)
@@ -737,7 +827,7 @@ class UsdRenderer:
                     instancer_sphere.GetDisplayColorAttr().Set([Gf.Vec3f(colors)], self.time)
                 instancer.CreatePrototypesRel().SetTargets([instancer_sphere.GetPath()])
-                instancer.CreateProtoIndicesAttr().Set([0] * len(points))
+                instancer.CreateProtoIndicesAttr().Set(Vt.IntArray((0,) * len(points)))
                 # set identity rotations
                 quats = [Gf.Quath(1.0, 0.0, 0.0, 0.0)] * len(points)

warp/sim/collide.py CHANGED Viewed

@@ -614,9 +614,9 @@ def volume_grad(volume: wp.uint64, p: wp.vec3):
 @wp.func
 def counter_increment(counter: wp.array(dtype=int), counter_index: int, tids: wp.array(dtype=int), tid: int):
     # increment counter, remember which thread received which counter value
-    next_count = wp.atomic_add(counter, counter_index, 1)
-    tids[tid] = next_count
-    return next_count
+    count = wp.atomic_add(counter, counter_index, 1)
+    tids[tid] = count
+    return count
 @wp.func_replay(counter_increment)
@@ -629,10 +629,10 @@ def limited_counter_increment(
     counter: wp.array(dtype=int), counter_index: int, tids: wp.array(dtype=int), tid: int, index_limit: int
 ):
     # increment counter but only if it is smaller than index_limit, remember which thread received which counter value
-    next_count = wp.atomic_add(counter, counter_index, 1)
-    if next_count < index_limit or index_limit < 0:
-        tids[tid] = next_count
-        return next_count
+    count = wp.atomic_add(counter, counter_index, 1)
+    if count < index_limit or index_limit < 0:
+        tids[tid] = count
+        return count
     tids[tid] = -1
     return -1
@@ -1547,6 +1547,8 @@ def handle_contact_pairs(
                 # reached contact point limit
                 return
         index = counter_increment(contact_count, 0, contact_tids, tid)
+        if index == -1:
+            return
         contact_shape0[index] = shape_a
         contact_shape1[index] = shape_b
         # transform from world into body frame (so the contact point includes the shape transform)
@@ -1690,7 +1692,7 @@ def collide(
                 model.rigid_contact_normal = wp.empty_like(model.rigid_contact_normal)
                 model.rigid_contact_thickness = wp.empty_like(model.rigid_contact_thickness)
                 model.rigid_contact_count = wp.zeros_like(model.rigid_contact_count)
-                model.rigid_contact_tids = wp.zeros_like(model.rigid_contact_tids)
+                model.rigid_contact_tids = wp.full_like(model.rigid_contact_tids, -1)
                 model.rigid_contact_shape0 = wp.empty_like(model.rigid_contact_shape0)
                 model.rigid_contact_shape1 = wp.empty_like(model.rigid_contact_shape1)
@@ -1698,7 +1700,7 @@ def collide(
                     model.rigid_contact_pairwise_counter = wp.zeros_like(model.rigid_contact_pairwise_counter)
             else:
                 model.rigid_contact_count.zero_()
-                model.rigid_contact_tids.zero_()
+                model.rigid_contact_tids.fill_(-1)
                 if model.rigid_contact_pairwise_counter is not None:
                     model.rigid_contact_pairwise_counter.zero_()