warp-lang 1.7.0__py3-none-win_amd64.whl → 1.7.2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (60) hide show
  1. warp/autograd.py +12 -2
  2. warp/bin/warp-clang.dll +0 -0
  3. warp/bin/warp.dll +0 -0
  4. warp/build.py +1 -1
  5. warp/builtins.py +103 -66
  6. warp/codegen.py +48 -27
  7. warp/config.py +1 -1
  8. warp/context.py +112 -49
  9. warp/examples/benchmarks/benchmark_cloth.py +1 -1
  10. warp/examples/distributed/example_jacobi_mpi.py +507 -0
  11. warp/fem/cache.py +1 -1
  12. warp/fem/field/field.py +11 -1
  13. warp/fem/field/nodal_field.py +36 -22
  14. warp/fem/geometry/adaptive_nanogrid.py +7 -3
  15. warp/fem/geometry/trimesh.py +4 -12
  16. warp/jax_experimental/custom_call.py +14 -2
  17. warp/jax_experimental/ffi.py +100 -67
  18. warp/native/builtin.h +91 -65
  19. warp/native/svd.h +59 -49
  20. warp/native/tile.h +55 -26
  21. warp/native/volume.cpp +2 -2
  22. warp/native/volume_builder.cu +33 -22
  23. warp/native/warp.cu +1 -1
  24. warp/render/render_opengl.py +41 -34
  25. warp/render/render_usd.py +96 -6
  26. warp/sim/collide.py +11 -9
  27. warp/sim/inertia.py +189 -156
  28. warp/sim/integrator_euler.py +3 -0
  29. warp/sim/integrator_xpbd.py +3 -0
  30. warp/sim/model.py +56 -31
  31. warp/sim/render.py +4 -0
  32. warp/sparse.py +1 -1
  33. warp/stubs.py +73 -25
  34. warp/tests/assets/torus.usda +1 -1
  35. warp/tests/cuda/test_streams.py +1 -1
  36. warp/tests/sim/test_collision.py +237 -206
  37. warp/tests/sim/test_inertia.py +161 -0
  38. warp/tests/sim/test_model.py +5 -3
  39. warp/tests/sim/{flaky_test_sim_grad.py → test_sim_grad.py} +1 -4
  40. warp/tests/sim/test_xpbd.py +399 -0
  41. warp/tests/test_array.py +8 -7
  42. warp/tests/test_atomic.py +181 -2
  43. warp/tests/test_builtins_resolution.py +38 -38
  44. warp/tests/test_codegen.py +24 -3
  45. warp/tests/test_examples.py +16 -6
  46. warp/tests/test_fem.py +93 -14
  47. warp/tests/test_func.py +1 -1
  48. warp/tests/test_mat.py +416 -119
  49. warp/tests/test_quat.py +321 -137
  50. warp/tests/test_struct.py +116 -0
  51. warp/tests/test_vec.py +320 -174
  52. warp/tests/tile/test_tile.py +27 -0
  53. warp/tests/tile/test_tile_load.py +124 -0
  54. warp/tests/unittest_suites.py +2 -5
  55. warp/types.py +107 -9
  56. {warp_lang-1.7.0.dist-info → warp_lang-1.7.2.dist-info}/METADATA +41 -19
  57. {warp_lang-1.7.0.dist-info → warp_lang-1.7.2.dist-info}/RECORD +60 -57
  58. {warp_lang-1.7.0.dist-info → warp_lang-1.7.2.dist-info}/WHEEL +1 -1
  59. {warp_lang-1.7.0.dist-info → warp_lang-1.7.2.dist-info}/licenses/LICENSE.md +0 -26
  60. {warp_lang-1.7.0.dist-info → warp_lang-1.7.2.dist-info}/top_level.txt +0 -0
warp/native/tile.h CHANGED
@@ -219,8 +219,8 @@ struct tile_coord_t
219
219
  {
220
220
  int indices[N];
221
221
 
222
- CUDA_CALLABLE inline int operator[](int i) const { assert(0 <= 1 && i < N); return indices[i]; }
223
- CUDA_CALLABLE inline int& operator[](int i) { assert(0 <= 1 && i < N); return indices[i]; }
222
+ CUDA_CALLABLE inline int operator[](int i) const { assert(0 <= i && i < N); return indices[i]; }
223
+ CUDA_CALLABLE inline int& operator[](int i) { assert(0 <= i && i < N); return indices[i]; }
224
224
 
225
225
  CUDA_CALLABLE inline tile_coord_t<N> operator + (const tile_coord_t<N>& c) const
226
226
  {
@@ -1133,17 +1133,18 @@ struct tile_shared_t
1133
1133
  constexpr int lastdim = Layout::Shape::N-1;
1134
1134
  constexpr bool contiguous_src = Layout::Stride::dim(lastdim) == 1;
1135
1135
  const bool contiguous_dest = dest.data.strides[lastdim] == sizeof(T);
1136
- const int elements = (dest.data.shape[lastdim] - dest.offset[lastdim]);
1136
+ const int elements = min(Layout::Shape::dim(1), (dest.data.shape[lastdim] - dest.offset[lastdim]));
1137
1137
  const bool aligned_size = (elements*sizeof(T))%sizeof(float4) == 0;
1138
-
1138
+ const bool aligned_stride = (dest.data.strides[0]/sizeof(T))%Layout::Stride::dim(0) == 0;
1139
+
1139
1140
  float4* dest128 = (float4*)&dest.data.data[dest.index_from_coord(tile_coord(0,0))];
1140
1141
  const bool aligned_dst = (uint64_t)(dest128)%sizeof(float4) == 0;
1141
1142
 
1142
- if (contiguous_dest && contiguous_src && aligned_size && aligned_dst)
1143
- {
1144
- constexpr int M = Layout::Shape::dim(0);
1145
- constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
1143
+ constexpr int M = Layout::Shape::dim(0);
1144
+ constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
1146
1145
 
1146
+ if (contiguous_dest && contiguous_src && aligned_size && aligned_dst && aligned_stride && N)
1147
+ {
1147
1148
  // alias of shared tile with 128bit type
1148
1149
  using SrcLayout = tile_layout_strided_t<tile_shape_t<M, N>>;
1149
1150
  tile_shared_t<float4, SrcLayout> src128((float4*)data.ptr);
@@ -1222,17 +1223,18 @@ struct tile_shared_t
1222
1223
  constexpr int lastdim = Layout::Shape::N-1;
1223
1224
  constexpr bool contiguous_dest = Layout::Stride::dim(lastdim) == 1;
1224
1225
  const bool contiguous_src = src.data.strides[lastdim] == sizeof(T);
1225
- const int elements = (src.data.shape[lastdim] - src.offset[lastdim]);
1226
+ const int elements = min(Layout::Shape::dim(1), (src.data.shape[lastdim] - src.offset[lastdim]));
1226
1227
  const bool aligned_size = (elements*sizeof(T))%sizeof(float4) == 0;
1227
-
1228
+ const bool aligned_stride = (src.data.strides[0]/sizeof(T))%Layout::Stride::dim(0) == 0;
1229
+
1228
1230
  float4* src128 = (float4*)&src.data.data[src.index_from_coord(tile_coord(0,0))];
1229
1231
  const bool aligned_src = (uint64_t)(src128)%sizeof(float4) == 0;
1230
1232
 
1231
- if (contiguous_dest && contiguous_src && aligned_size && aligned_src)
1232
- {
1233
- constexpr int M = Layout::Shape::dim(0);
1234
- constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
1233
+ constexpr int M = Layout::Shape::dim(0);
1234
+ constexpr int N = (Layout::Shape::dim(1)*sizeof(T))/sizeof(float4);
1235
1235
 
1236
+ if (contiguous_dest && contiguous_src && aligned_size && aligned_src && aligned_stride && N)
1237
+ {
1236
1238
  // alias of shared tile with 128bit type
1237
1239
  using DestLayout = tile_layout_strided_t<tile_shape_t<M, N>>;
1238
1240
  tile_shared_t<float4, DestLayout> dest128((float4*)data.ptr);
@@ -1282,13 +1284,13 @@ struct tile_shared_t
1282
1284
  template <typename Global>
1283
1285
  inline CUDA_CALLABLE auto atomic_add(Global& dest)
1284
1286
  {
1285
- copy_to_register().atomic_add(dest);
1287
+ return copy_to_register().atomic_add(dest);
1286
1288
  }
1287
1289
 
1288
1290
  template <typename Global>
1289
1291
  inline CUDA_CALLABLE auto atomic_add_grad(Global& dest)
1290
1292
  {
1291
- grad_to_register().atomic_add_grad(dest);
1293
+ return grad_to_register().atomic_add_grad(dest);
1292
1294
  }
1293
1295
 
1294
1296
  // overload for integral types
@@ -1682,15 +1684,27 @@ template <typename T, typename Tile>
1682
1684
  inline CUDA_CALLABLE void tile_store(array_t<T>& dest, int x, int y, int z, int w, Tile& src) { src.copy_to_global(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z, w))); }
1683
1685
 
1684
1686
 
1685
-
1687
+ // compiler struggles with these if they are one line
1686
1688
  template <typename T, typename Tile>
1687
- inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x))); }
1689
+ inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, Tile& src) {
1690
+ tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x));
1691
+ return src.atomic_add(global);
1692
+ }
1688
1693
  template <typename T, typename Tile>
1689
- inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y)));}
1694
+ inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, Tile& src) {
1695
+ tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y));
1696
+ return src.atomic_add(global);
1697
+ }
1690
1698
  template <typename T, typename Tile>
1691
- inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z)));}
1699
+ inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, Tile& src) {
1700
+ tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y, z));
1701
+ return src.atomic_add(global);
1702
+ }
1692
1703
  template <typename T, typename Tile>
1693
- inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, int w, Tile& src) { return src.atomic_add(tile_global_t<T, typename Tile::Layout::Shape>(dest, tile_coord(x, y, z, w)));}
1704
+ inline CUDA_CALLABLE auto tile_atomic_add(array_t<T>& dest, int x, int y, int z, int w, Tile& src) {
1705
+ tile_global_t<T, typename Tile::Layout::Shape> global(dest, tile_coord(x, y, z, w));
1706
+ return src.atomic_add(global);
1707
+ }
1694
1708
 
1695
1709
 
1696
1710
  //-------------------------------------
@@ -2468,21 +2482,18 @@ inline CUDA_CALLABLE void assign(TileA& dest, int i, const Scalar& src)
2468
2482
  dest.data(tile_coord(i)) = src;
2469
2483
  WP_TILE_SYNC();
2470
2484
  }
2471
-
2472
2485
  template <typename TileA, typename Scalar>
2473
2486
  inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, const Scalar& src)
2474
2487
  {
2475
2488
  dest.data(tile_coord(i, j)) = src;
2476
2489
  WP_TILE_SYNC();
2477
2490
  }
2478
-
2479
2491
  template <typename TileA, typename Scalar>
2480
2492
  inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, const Scalar& src)
2481
2493
  {
2482
2494
  dest.data(tile_coord(i, j, k)) = src;
2483
2495
  WP_TILE_SYNC();
2484
2496
  }
2485
-
2486
2497
  template <typename TileA, typename Scalar>
2487
2498
  inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, int l, const Scalar& src)
2488
2499
  {
@@ -2490,8 +2501,26 @@ inline CUDA_CALLABLE void assign(TileA& dest, int i, int j, int k, int l, const
2490
2501
  WP_TILE_SYNC();
2491
2502
  }
2492
2503
 
2493
-
2494
-
2504
+ template <typename TileA, typename AdjTileA, typename Scalar>
2505
+ inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, const Scalar& src, AdjTileA& adj_dest, int adj_i, Scalar& adj_src)
2506
+ {
2507
+ adj_src += dest.grad(tile_coord(i));
2508
+ }
2509
+ template <typename TileA, typename AdjTileA, typename Scalar>
2510
+ inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, Scalar& adj_src)
2511
+ {
2512
+ adj_src += dest.grad(tile_coord(i, j));
2513
+ }
2514
+ template <typename TileA, typename AdjTileA, typename Scalar>
2515
+ inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, int k, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, int adj_k, Scalar& adj_src)
2516
+ {
2517
+ adj_src += dest.grad(tile_coord(i, j, k));
2518
+ }
2519
+ template <typename TileA, typename AdjTileA, typename Scalar>
2520
+ inline CUDA_CALLABLE void adj_assign(TileA& dest, int i, int j, int k, int l, const Scalar& src, AdjTileA& adj_dest, int adj_i, int adj_j, int adj_k, int adj_l, Scalar& adj_src)
2521
+ {
2522
+ adj_src += dest.grad(tile_coord(i, j, k, l));
2523
+ }
2495
2524
 
2496
2525
  template <typename TileA, typename TileB, typename Coord>
2497
2526
  inline CUDA_CALLABLE void tile_assign(TileA& dest, TileB& src, const Coord& offset)
warp/native/volume.cpp CHANGED
@@ -87,7 +87,7 @@ void volume_set_map(nanovdb::Map& map, const float transform[9], const float tra
87
87
  // NB: buf must be a host pointer
88
88
  uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
89
89
  {
90
- if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t))
90
+ if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
91
91
  return 0; // This cannot be a valid NanoVDB grid with data
92
92
 
93
93
  if (!copy && volume_exists(buf))
@@ -138,7 +138,7 @@ uint64_t volume_create_host(void* buf, uint64_t size, bool copy, bool owner)
138
138
  // NB: buf must be a pointer on the same device
139
139
  uint64_t volume_create_device(void* context, void* buf, uint64_t size, bool copy, bool owner)
140
140
  {
141
- if (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t))
141
+ if (buf == nullptr || (size > 0 && size < sizeof(pnanovdb_grid_t) + sizeof(pnanovdb_tree_t)))
142
142
  return 0; // This cannot be a valid NanoVDB grid with data
143
143
 
144
144
  if (!copy && volume_exists(buf))
@@ -43,6 +43,7 @@ struct Allocator
43
43
  {
44
44
  // in PointsToGrid stream argument always coincide with current stream, ignore
45
45
  *d_ptr = alloc_device(WP_CURRENT_CONTEXT, bytes);
46
+ cudaCheckError();
46
47
  return cudaSuccess;
47
48
  }
48
49
 
@@ -160,6 +161,7 @@ class DeviceBuffer
160
161
  {
161
162
  mGpuData = alloc_device(WP_CURRENT_CONTEXT, size);
162
163
  }
164
+ cudaCheckError();
163
165
  mSize = size;
164
166
  mManaged = true;
165
167
  }
@@ -432,35 +434,44 @@ void build_grid_from_points(nanovdb::Grid<nanovdb::NanoTree<BuildT>> *&out_grid,
432
434
  out_grid = nullptr;
433
435
  out_grid_size = 0;
434
436
 
435
- cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
436
- nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
437
+ try
438
+ {
437
439
 
438
- // p2g.setVerbose(2);
439
- p2g.setGridName(params.name);
440
- p2g.setChecksum(nanovdb::CheckMode::Disable);
440
+ cudaStream_t stream = static_cast<cudaStream_t>(cuda_stream_get_current());
441
+ nanovdb::tools::cuda::PointsToGrid<BuildT, Allocator> p2g(params.map, stream);
441
442
 
442
- // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
443
- p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
443
+ // p2g.setVerbose(2);
444
+ p2g.setGridName(params.name);
445
+ p2g.setChecksum(nanovdb::CheckMode::Disable);
444
446
 
445
- nanovdb::GridHandle<DeviceBuffer> grid_handle;
447
+ // Only compute bbox for OnIndex grids. Otherwise bbox will be computed after activating all leaf voxels
448
+ p2g.includeBBox(nanovdb::BuildTraits<BuildT>::is_onindex);
446
449
 
447
- if (points_in_world_space)
448
- {
449
- grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f *>(points), params.map}, num_points,
450
- DeviceBuffer());
451
- }
452
- else
453
- {
454
- grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord *>(points), num_points, DeviceBuffer());
455
- }
450
+ nanovdb::GridHandle<DeviceBuffer> grid_handle;
456
451
 
457
- out_grid = grid_handle.deviceGrid<BuildT>();
458
- out_grid_size = grid_handle.gridSize();
452
+ if (points_in_world_space)
453
+ {
454
+ grid_handle = p2g.getHandle(WorldSpacePointsPtr{static_cast<const nanovdb::Vec3f*>(points), params.map},
455
+ num_points, DeviceBuffer());
456
+ }
457
+ else
458
+ {
459
+ grid_handle = p2g.getHandle(static_cast<const nanovdb::Coord*>(points), num_points, DeviceBuffer());
460
+ }
461
+
462
+ out_grid = grid_handle.deviceGrid<BuildT>();
463
+ out_grid_size = grid_handle.gridSize();
459
464
 
460
- finalize_grid(*out_grid, params);
465
+ finalize_grid(*out_grid, params);
461
466
 
462
- // So that buffer is not destroyed when handles goes out of scope
463
- grid_handle.buffer().detachDeviceData();
467
+ // So that buffer is not destroyed when handles goes out of scope
468
+ grid_handle.buffer().detachDeviceData();
469
+ }
470
+ catch (const std::runtime_error& exc)
471
+ {
472
+ out_grid = nullptr;
473
+ out_grid_size = 0;
474
+ }
464
475
  }
465
476
 
466
477
 
warp/native/warp.cu CHANGED
@@ -3027,7 +3027,7 @@ size_t cuda_compile_program(const char* cuda_src, const char* program_name, int
3027
3027
  fprintf(stderr, "Warp error: num_ltoirs > 0 but ltoir_input_types, ltoirs or ltoir_sizes are NULL\n");
3028
3028
  return size_t(-1);
3029
3029
  }
3030
- nvJitLinkHandle handle;
3030
+ nvJitLinkHandle handle = nullptr;
3031
3031
  std::vector<const char *> lopts = {"-dlto", arch_opt_lto};
3032
3032
  if (use_ptx) {
3033
3033
  lopts.push_back("-ptx");
@@ -13,11 +13,13 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from __future__ import annotations
17
+
16
18
  import ctypes
17
19
  import sys
18
20
  import time
19
21
  from collections import defaultdict
20
- from typing import List, Optional, Tuple, Union
22
+ from typing import List, Union
21
23
 
22
24
  import numpy as np
23
25
 
@@ -1500,16 +1502,16 @@ class OpenGLRenderer:
1500
1502
 
1501
1503
  def setup_tiled_rendering(
1502
1504
  self,
1503
- instances: List[List[int]],
1505
+ instances: list[list[int]],
1504
1506
  rescale_window: bool = False,
1505
- tile_width: Optional[int] = None,
1506
- tile_height: Optional[int] = None,
1507
- tile_ncols: Optional[int] = None,
1508
- tile_nrows: Optional[int] = None,
1509
- tile_positions: Optional[List[Tuple[int]]] = None,
1510
- tile_sizes: Optional[List[Tuple[int]]] = None,
1511
- projection_matrices: Optional[List[Mat44]] = None,
1512
- view_matrices: Optional[List[Mat44]] = None,
1507
+ tile_width: int | None = None,
1508
+ tile_height: int | None = None,
1509
+ tile_ncols: int | None = None,
1510
+ tile_nrows: int | None = None,
1511
+ tile_positions: list[tuple[int]] | None = None,
1512
+ tile_sizes: list[tuple[int]] | None = None,
1513
+ projection_matrices: list[Mat44] | None = None,
1514
+ view_matrices: list[Mat44] | None = None,
1513
1515
  ):
1514
1516
  """
1515
1517
  Set up tiled rendering where the render buffer is split into multiple tiles that can visualize
@@ -1602,11 +1604,11 @@ class OpenGLRenderer:
1602
1604
  def update_tile(
1603
1605
  self,
1604
1606
  tile_id,
1605
- instances: Optional[List[int]] = None,
1606
- projection_matrix: Optional[Mat44] = None,
1607
- view_matrix: Optional[Mat44] = None,
1608
- tile_size: Optional[Tuple[int]] = None,
1609
- tile_position: Optional[Tuple[int]] = None,
1607
+ instances: list[int] | None = None,
1608
+ projection_matrix: Mat44 | None = None,
1609
+ view_matrix: Mat44 | None = None,
1610
+ tile_size: tuple[int] | None = None,
1611
+ tile_position: tuple[int] | None = None,
1610
1612
  ):
1611
1613
  """
1612
1614
  Update the shape instances, projection matrix, view matrix, tile size, or tile position
@@ -1806,7 +1808,7 @@ class OpenGLRenderer:
1806
1808
 
1807
1809
  return np.array((scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, scaling, 0, 0, 0, 0, 1), dtype=np.float32)
1808
1810
 
1809
- def update_model_matrix(self, model_matrix: Optional[Mat44] = None):
1811
+ def update_model_matrix(self, model_matrix: Mat44 | None = None):
1810
1812
  gl = OpenGLRenderer.gl
1811
1813
 
1812
1814
  self._switch_context()
@@ -1988,6 +1990,10 @@ class OpenGLRenderer:
1988
1990
  gl.glBlendFunc(gl.GL_SRC_ALPHA, gl.GL_ONE_MINUS_SRC_ALPHA)
1989
1991
  gl.glEnable(gl.GL_BLEND)
1990
1992
 
1993
+ # disable depth test to fix text rendering
1994
+ # https://github.com/pyglet/pyglet/issues/1302
1995
+ gl.glDisable(gl.GL_DEPTH_TEST)
1996
+
1991
1997
  text = f"""Sim Time: {self.time:.1f}
1992
1998
  Update FPS: {self._fps_update:.1f}
1993
1999
  Render FPS: {self._fps_render:.1f}
@@ -2001,6 +2007,8 @@ Instances: {len(self._instances)}"""
2001
2007
  self._info_label.y = self.screen_height - 5
2002
2008
  self._info_label.draw()
2003
2009
 
2010
+ gl.glEnable(gl.GL_DEPTH_TEST)
2011
+
2004
2012
  for cb in self.render_2d_callbacks:
2005
2013
  cb()
2006
2014
 
@@ -2339,6 +2347,14 @@ Instances: {len(self._instances)}"""
2339
2347
  colors1 = np.array(colors1, dtype=np.float32)
2340
2348
  colors2 = np.array(colors2, dtype=np.float32)
2341
2349
 
2350
+ # create color buffers
2351
+ if self._instance_color1_buffer is None:
2352
+ self._instance_color1_buffer = gl.GLuint()
2353
+ gl.glGenBuffers(1, self._instance_color1_buffer)
2354
+ if self._instance_color2_buffer is None:
2355
+ self._instance_color2_buffer = gl.GLuint()
2356
+ gl.glGenBuffers(1, self._instance_color2_buffer)
2357
+
2342
2358
  gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_color1_buffer)
2343
2359
  gl.glBufferData(gl.GL_ARRAY_BUFFER, colors1.nbytes, colors1.ctypes.data, gl.GL_STATIC_DRAW)
2344
2360
 
@@ -2362,14 +2378,10 @@ Instances: {len(self._instances)}"""
2362
2378
  )
2363
2379
 
2364
2380
  gl.glUseProgram(self._shape_shader.id)
2365
- if self._instance_transform_gl_buffer is not None:
2366
- gl.glDeleteBuffers(1, self._instance_transform_gl_buffer)
2367
- gl.glDeleteBuffers(1, self._instance_color1_buffer)
2368
- gl.glDeleteBuffers(1, self._instance_color2_buffer)
2369
-
2370
- # create instance buffer and bind it as an instanced array
2371
- self._instance_transform_gl_buffer = gl.GLuint()
2372
- gl.glGenBuffers(1, self._instance_transform_gl_buffer)
2381
+ if self._instance_transform_gl_buffer is None:
2382
+ # create instance buffer and bind it as an instanced array
2383
+ self._instance_transform_gl_buffer = gl.GLuint()
2384
+ gl.glGenBuffers(1, self._instance_transform_gl_buffer)
2373
2385
  gl.glBindBuffer(gl.GL_ARRAY_BUFFER, self._instance_transform_gl_buffer)
2374
2386
 
2375
2387
  transforms = np.tile(np.diag(np.ones(4, dtype=np.float32)), (len(self._instances), 1, 1))
@@ -2380,12 +2392,6 @@ Instances: {len(self._instances)}"""
2380
2392
  int(self._instance_transform_gl_buffer.value), self._device
2381
2393
  )
2382
2394
 
2383
- # create color buffers
2384
- self._instance_color1_buffer = gl.GLuint()
2385
- gl.glGenBuffers(1, self._instance_color1_buffer)
2386
- self._instance_color2_buffer = gl.GLuint()
2387
- gl.glGenBuffers(1, self._instance_color2_buffer)
2388
-
2389
2395
  self.update_instance_colors()
2390
2396
 
2391
2397
  # set up instance attribute pointers
@@ -2440,7 +2446,7 @@ Instances: {len(self._instances)}"""
2440
2446
  gl.glBindVertexArray(0)
2441
2447
 
2442
2448
  def update_shape_instance(self, name, pos=None, rot=None, color1=None, color2=None, visible=None):
2443
- """Update the instance transform of the shape
2449
+ """Update the instance properties of the shape
2444
2450
 
2445
2451
  Args:
2446
2452
  name: The name of the shape
@@ -2783,8 +2789,9 @@ Instances: {len(self._instances)}"""
2783
2789
  q = (0.0, 0.0, 0.0, 1.0)
2784
2790
  else:
2785
2791
  c = np.cross(normal, (0.0, 1.0, 0.0))
2786
- angle = np.arcsin(np.linalg.norm(c))
2787
- axis = np.abs(c) / np.linalg.norm(c)
2792
+ angle = wp.float32(np.arcsin(np.linalg.norm(c)))
2793
+ axis = wp.vec3(np.abs(c))
2794
+ axis = wp.normalize(axis)
2788
2795
  q = wp.quat_from_axis_angle(axis, angle)
2789
2796
  return self.render_plane(
2790
2797
  "ground",
@@ -3092,7 +3099,7 @@ Instances: {len(self._instances)}"""
3092
3099
  parent_body: str = None,
3093
3100
  is_template: bool = False,
3094
3101
  up_axis: int = 1,
3095
- color: Tuple[float, float, float] = None,
3102
+ color: tuple[float, float, float] = None,
3096
3103
  ):
3097
3104
  """Add a arrow for visualization
3098
3105
 
warp/render/render_usd.py CHANGED
@@ -13,10 +13,19 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
+ from __future__ import annotations
17
+
16
18
  import numpy as np
17
19
 
18
20
  import warp as wp
19
21
 
22
+ UP_AXIS_TOKEN = ("X", "Y", "Z")
23
+ UP_AXIS_VEC = (
24
+ np.array((1.0, 0.0, 0.0), dtype=float),
25
+ np.array((0.0, 1.0, 0.0), dtype=float),
26
+ np.array((0.0, 0.0, 1.0), dtype=float),
27
+ )
28
+
20
29
 
21
30
  def _usd_add_xform(prim):
22
31
  from pxr import UsdGeom
@@ -29,7 +38,13 @@ def _usd_add_xform(prim):
29
38
  prim.AddScaleOp()
30
39
 
31
40
 
32
- def _usd_set_xform(xform, pos: tuple, rot: tuple, scale: tuple, time):
41
+ def _usd_set_xform(
42
+ xform,
43
+ pos: tuple | None = None,
44
+ rot: tuple | None = None,
45
+ scale: tuple | None = None,
46
+ time: float = 0.0,
47
+ ):
33
48
  from pxr import Gf, UsdGeom
34
49
 
35
50
  xform = UsdGeom.Xform(xform)
@@ -108,7 +123,7 @@ class UsdRenderer:
108
123
  self.stage.SetDefaultPrim(self.root.GetPrim())
109
124
  self.stage.SetStartTimeCode(0.0)
110
125
  self.stage.SetEndTimeCode(0.0)
111
- self.stage.SetTimeCodesPerSecond(self.fps)
126
+ self.stage.SetFramesPerSecond(self.fps)
112
127
 
113
128
  if up_axis == "X":
114
129
  UsdGeom.SetStageUpAxis(self.stage, UsdGeom.Tokens.x)
@@ -622,7 +637,82 @@ class UsdRenderer:
622
637
 
623
638
  return prim_path
624
639
 
625
- def render_line_list(self, name, vertices, indices, color, radius):
640
+ def render_arrow(
641
+ self,
642
+ name: str,
643
+ pos: tuple,
644
+ rot: tuple,
645
+ base_radius: float,
646
+ base_height: float,
647
+ cap_radius: float = None,
648
+ cap_height: float = None,
649
+ parent_body: str = None,
650
+ is_template: bool = False,
651
+ up_axis: int = 1,
652
+ color: tuple[float, float, float] = None,
653
+ visible: bool = True,
654
+ ):
655
+ from pxr import Gf, Sdf, UsdGeom
656
+
657
+ if is_template:
658
+ prim_path = self._resolve_path(name, parent_body, is_template)
659
+ blueprint = UsdGeom.Scope.Define(self.stage, prim_path)
660
+ blueprint_prim = blueprint.GetPrim()
661
+ blueprint_prim.SetInstanceable(True)
662
+ blueprint_prim.SetSpecifier(Sdf.SpecifierClass)
663
+ arrow_path = prim_path.AppendChild("arrow")
664
+ else:
665
+ arrow_path = self._resolve_path(name, parent_body)
666
+ prim_path = arrow_path
667
+
668
+ arrow = UsdGeom.Xform.Get(self.stage, arrow_path)
669
+ if not arrow:
670
+ arrow = UsdGeom.Xform.Define(self.stage, arrow_path)
671
+ _usd_add_xform(arrow)
672
+
673
+ base_path = arrow_path.AppendChild("base")
674
+ base = UsdGeom.Xform.Get(self.stage, base_path)
675
+ if not base:
676
+ base = UsdGeom.Cylinder.Define(self.stage, base_path)
677
+ _usd_add_xform(base)
678
+
679
+ base.GetRadiusAttr().Set(float(base_radius))
680
+ base.GetHeightAttr().Set(float(base_height))
681
+ base.GetAxisAttr().Set(UP_AXIS_TOKEN[up_axis])
682
+ _usd_set_xform(base, UP_AXIS_VEC[up_axis] * base_height * 0.5)
683
+
684
+ cap_path = arrow_path.AppendChild("cap")
685
+ cap = UsdGeom.Xform.Get(self.stage, cap_path)
686
+ if not cap:
687
+ cap = UsdGeom.Cone.Define(self.stage, arrow_path.AppendChild("cap"))
688
+ _usd_add_xform(cap)
689
+
690
+ cap.GetRadiusAttr().Set(float(cap_radius))
691
+ cap.GetHeightAttr().Set(float(cap_height))
692
+ cap.GetAxisAttr().Set(UP_AXIS_TOKEN[up_axis])
693
+ _usd_set_xform(cap, UP_AXIS_VEC[up_axis] * (base_height + cap_height * 0.5))
694
+
695
+ if color is not None:
696
+ base.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
697
+ cap.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
698
+
699
+ self._shape_constructors[name] = UsdGeom.Xform
700
+
701
+ if not is_template:
702
+ _usd_set_xform(arrow, pos, rot, (1.0, 1.0, 1.0), self.time)
703
+
704
+ arrow.GetVisibilityAttr().Set("inherited" if visible else "invisible", self.time)
705
+ return prim_path
706
+
707
+ def render_line_list(
708
+ self,
709
+ name: str,
710
+ vertices,
711
+ indices,
712
+ color: tuple = None,
713
+ radius: float = 0.01,
714
+ visible: bool = True,
715
+ ):
626
716
  """Debug helper to add a line list as a set of capsules
627
717
 
628
718
  Args:
@@ -717,8 +807,8 @@ class UsdRenderer:
717
807
  instancer_capsule = UsdGeom.Capsule.Get(self.stage, instancer.GetPath().AppendChild("capsule"))
718
808
  instancer_capsule.GetDisplayColorAttr().Set([Gf.Vec3f(color)], self.time)
719
809
 
720
- def render_points(self, name: str, points, radius, colors=None):
721
- from pxr import Gf, UsdGeom
810
+ def render_points(self, name: str, points, radius, colors=None, as_spheres: bool = True, visible: bool = True):
811
+ from pxr import Gf, UsdGeom, Vt
722
812
 
723
813
  instancer_path = self.root.GetPath().AppendChild(name)
724
814
  instancer = UsdGeom.PointInstancer.Get(self.stage, instancer_path)
@@ -737,7 +827,7 @@ class UsdRenderer:
737
827
  instancer_sphere.GetDisplayColorAttr().Set([Gf.Vec3f(colors)], self.time)
738
828
 
739
829
  instancer.CreatePrototypesRel().SetTargets([instancer_sphere.GetPath()])
740
- instancer.CreateProtoIndicesAttr().Set([0] * len(points))
830
+ instancer.CreateProtoIndicesAttr().Set(Vt.IntArray((0,) * len(points)))
741
831
 
742
832
  # set identity rotations
743
833
  quats = [Gf.Quath(1.0, 0.0, 0.0, 0.0)] * len(points)
warp/sim/collide.py CHANGED
@@ -614,9 +614,9 @@ def volume_grad(volume: wp.uint64, p: wp.vec3):
614
614
  @wp.func
615
615
  def counter_increment(counter: wp.array(dtype=int), counter_index: int, tids: wp.array(dtype=int), tid: int):
616
616
  # increment counter, remember which thread received which counter value
617
- next_count = wp.atomic_add(counter, counter_index, 1)
618
- tids[tid] = next_count
619
- return next_count
617
+ count = wp.atomic_add(counter, counter_index, 1)
618
+ tids[tid] = count
619
+ return count
620
620
 
621
621
 
622
622
  @wp.func_replay(counter_increment)
@@ -629,10 +629,10 @@ def limited_counter_increment(
629
629
  counter: wp.array(dtype=int), counter_index: int, tids: wp.array(dtype=int), tid: int, index_limit: int
630
630
  ):
631
631
  # increment counter but only if it is smaller than index_limit, remember which thread received which counter value
632
- next_count = wp.atomic_add(counter, counter_index, 1)
633
- if next_count < index_limit or index_limit < 0:
634
- tids[tid] = next_count
635
- return next_count
632
+ count = wp.atomic_add(counter, counter_index, 1)
633
+ if count < index_limit or index_limit < 0:
634
+ tids[tid] = count
635
+ return count
636
636
  tids[tid] = -1
637
637
  return -1
638
638
 
@@ -1547,6 +1547,8 @@ def handle_contact_pairs(
1547
1547
  # reached contact point limit
1548
1548
  return
1549
1549
  index = counter_increment(contact_count, 0, contact_tids, tid)
1550
+ if index == -1:
1551
+ return
1550
1552
  contact_shape0[index] = shape_a
1551
1553
  contact_shape1[index] = shape_b
1552
1554
  # transform from world into body frame (so the contact point includes the shape transform)
@@ -1690,7 +1692,7 @@ def collide(
1690
1692
  model.rigid_contact_normal = wp.empty_like(model.rigid_contact_normal)
1691
1693
  model.rigid_contact_thickness = wp.empty_like(model.rigid_contact_thickness)
1692
1694
  model.rigid_contact_count = wp.zeros_like(model.rigid_contact_count)
1693
- model.rigid_contact_tids = wp.zeros_like(model.rigid_contact_tids)
1695
+ model.rigid_contact_tids = wp.full_like(model.rigid_contact_tids, -1)
1694
1696
  model.rigid_contact_shape0 = wp.empty_like(model.rigid_contact_shape0)
1695
1697
  model.rigid_contact_shape1 = wp.empty_like(model.rigid_contact_shape1)
1696
1698
 
@@ -1698,7 +1700,7 @@ def collide(
1698
1700
  model.rigid_contact_pairwise_counter = wp.zeros_like(model.rigid_contact_pairwise_counter)
1699
1701
  else:
1700
1702
  model.rigid_contact_count.zero_()
1701
- model.rigid_contact_tids.zero_()
1703
+ model.rigid_contact_tids.fill_(-1)
1702
1704
 
1703
1705
  if model.rigid_contact_pairwise_counter is not None:
1704
1706
  model.rigid_contact_pairwise_counter.zero_()