warp-lang 1.8.0__py3-none-win_amd64.whl → 1.9.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (153) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +482 -110
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +48 -63
  7. warp/builtins.py +955 -137
  8. warp/codegen.py +327 -209
  9. warp/config.py +1 -1
  10. warp/context.py +1363 -800
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/examples/interop/example_jax_callable.py +34 -4
  18. warp/examples/interop/example_jax_kernel.py +27 -1
  19. warp/fabric.py +1 -1
  20. warp/fem/cache.py +27 -19
  21. warp/fem/domain.py +2 -2
  22. warp/fem/field/nodal_field.py +2 -2
  23. warp/fem/field/virtual.py +266 -166
  24. warp/fem/geometry/geometry.py +5 -5
  25. warp/fem/integrate.py +200 -91
  26. warp/fem/space/restriction.py +4 -0
  27. warp/fem/space/shape/tet_shape_function.py +3 -10
  28. warp/jax_experimental/custom_call.py +1 -1
  29. warp/jax_experimental/ffi.py +203 -54
  30. warp/marching_cubes.py +708 -0
  31. warp/native/array.h +103 -8
  32. warp/native/builtin.h +90 -9
  33. warp/native/bvh.cpp +64 -28
  34. warp/native/bvh.cu +58 -58
  35. warp/native/bvh.h +2 -2
  36. warp/native/clang/clang.cpp +7 -7
  37. warp/native/coloring.cpp +13 -3
  38. warp/native/crt.cpp +2 -2
  39. warp/native/crt.h +3 -5
  40. warp/native/cuda_util.cpp +42 -11
  41. warp/native/cuda_util.h +10 -4
  42. warp/native/exports.h +1842 -1908
  43. warp/native/fabric.h +2 -1
  44. warp/native/hashgrid.cpp +37 -37
  45. warp/native/hashgrid.cu +2 -2
  46. warp/native/initializer_array.h +1 -1
  47. warp/native/intersect.h +4 -4
  48. warp/native/mat.h +1913 -119
  49. warp/native/mathdx.cpp +43 -43
  50. warp/native/mesh.cpp +24 -24
  51. warp/native/mesh.cu +26 -26
  52. warp/native/mesh.h +5 -3
  53. warp/native/nanovdb/GridHandle.h +179 -12
  54. warp/native/nanovdb/HostBuffer.h +8 -7
  55. warp/native/nanovdb/NanoVDB.h +517 -895
  56. warp/native/nanovdb/NodeManager.h +323 -0
  57. warp/native/nanovdb/PNanoVDB.h +2 -2
  58. warp/native/quat.h +337 -16
  59. warp/native/rand.h +7 -7
  60. warp/native/range.h +7 -1
  61. warp/native/reduce.cpp +10 -10
  62. warp/native/reduce.cu +13 -14
  63. warp/native/runlength_encode.cpp +2 -2
  64. warp/native/runlength_encode.cu +5 -5
  65. warp/native/scan.cpp +3 -3
  66. warp/native/scan.cu +4 -4
  67. warp/native/sort.cpp +10 -10
  68. warp/native/sort.cu +22 -22
  69. warp/native/sparse.cpp +8 -8
  70. warp/native/sparse.cu +14 -14
  71. warp/native/spatial.h +366 -17
  72. warp/native/svd.h +23 -8
  73. warp/native/temp_buffer.h +2 -2
  74. warp/native/tile.h +303 -70
  75. warp/native/tile_radix_sort.h +5 -1
  76. warp/native/tile_reduce.h +16 -25
  77. warp/native/tuple.h +2 -2
  78. warp/native/vec.h +385 -18
  79. warp/native/volume.cpp +54 -54
  80. warp/native/volume.cu +1 -1
  81. warp/native/volume.h +2 -1
  82. warp/native/volume_builder.cu +30 -37
  83. warp/native/warp.cpp +150 -149
  84. warp/native/warp.cu +337 -193
  85. warp/native/warp.h +227 -226
  86. warp/optim/linear.py +736 -271
  87. warp/render/imgui_manager.py +289 -0
  88. warp/render/render_opengl.py +137 -57
  89. warp/render/render_usd.py +0 -1
  90. warp/sim/collide.py +1 -2
  91. warp/sim/graph_coloring.py +2 -2
  92. warp/sim/integrator_vbd.py +10 -2
  93. warp/sparse.py +559 -176
  94. warp/tape.py +2 -0
  95. warp/tests/aux_test_module_aot.py +7 -0
  96. warp/tests/cuda/test_async.py +3 -3
  97. warp/tests/cuda/test_conditional_captures.py +101 -0
  98. warp/tests/geometry/test_marching_cubes.py +233 -12
  99. warp/tests/sim/test_cloth.py +89 -6
  100. warp/tests/sim/test_coloring.py +82 -7
  101. warp/tests/test_array.py +56 -5
  102. warp/tests/test_assert.py +53 -0
  103. warp/tests/test_atomic_cas.py +127 -114
  104. warp/tests/test_codegen.py +3 -2
  105. warp/tests/test_context.py +8 -15
  106. warp/tests/test_enum.py +136 -0
  107. warp/tests/test_examples.py +2 -2
  108. warp/tests/test_fem.py +45 -2
  109. warp/tests/test_fixedarray.py +229 -0
  110. warp/tests/test_func.py +18 -15
  111. warp/tests/test_future_annotations.py +7 -5
  112. warp/tests/test_linear_solvers.py +30 -0
  113. warp/tests/test_map.py +1 -1
  114. warp/tests/test_mat.py +1540 -378
  115. warp/tests/test_mat_assign_copy.py +178 -0
  116. warp/tests/test_mat_constructors.py +574 -0
  117. warp/tests/test_module_aot.py +287 -0
  118. warp/tests/test_print.py +69 -0
  119. warp/tests/test_quat.py +162 -34
  120. warp/tests/test_quat_assign_copy.py +145 -0
  121. warp/tests/test_reload.py +2 -1
  122. warp/tests/test_sparse.py +103 -0
  123. warp/tests/test_spatial.py +140 -34
  124. warp/tests/test_spatial_assign_copy.py +160 -0
  125. warp/tests/test_static.py +48 -0
  126. warp/tests/test_struct.py +43 -3
  127. warp/tests/test_tape.py +38 -0
  128. warp/tests/test_types.py +0 -20
  129. warp/tests/test_vec.py +216 -441
  130. warp/tests/test_vec_assign_copy.py +143 -0
  131. warp/tests/test_vec_constructors.py +325 -0
  132. warp/tests/tile/test_tile.py +206 -152
  133. warp/tests/tile/test_tile_cholesky.py +605 -0
  134. warp/tests/tile/test_tile_load.py +169 -0
  135. warp/tests/tile/test_tile_mathdx.py +2 -558
  136. warp/tests/tile/test_tile_matmul.py +179 -0
  137. warp/tests/tile/test_tile_mlp.py +1 -1
  138. warp/tests/tile/test_tile_reduce.py +100 -11
  139. warp/tests/tile/test_tile_shared_memory.py +16 -16
  140. warp/tests/tile/test_tile_sort.py +59 -55
  141. warp/tests/unittest_suites.py +16 -0
  142. warp/tests/walkthrough_debug.py +1 -1
  143. warp/thirdparty/unittest_parallel.py +108 -9
  144. warp/types.py +554 -264
  145. warp/utils.py +68 -86
  146. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
  147. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
  148. warp/native/marching.cpp +0 -19
  149. warp/native/marching.cu +0 -514
  150. warp/native/marching.h +0 -19
  151. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
  152. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
  153. {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/tile_reduce.h CHANGED
@@ -83,19 +83,7 @@ inline CUDA_CALLABLE wp::vec_t<Length, T> warp_shuffle_down(wp::vec_t<Length, T>
83
83
  wp::vec_t<Length, T> result;
84
84
 
85
85
  for (unsigned i=0; i < Length; ++i)
86
- result.data[i] = __shfl_down_sync(mask, val.data[i], offset, WP_TILE_WARP_SIZE);
87
-
88
- return result;
89
- }
90
-
91
- // Quaternion overload
92
- template <typename T>
93
- inline CUDA_CALLABLE wp::quat_t<T> warp_shuffle_down(wp::quat_t<T> val, int offset, int mask)
94
- {
95
- wp::quat_t<T> result;
96
-
97
- for (unsigned i=0; i < 4; ++i)
98
- result.data[i] = __shfl_down_sync(mask, val.data[i], offset, WP_TILE_WARP_SIZE);
86
+ result[i] = __shfl_down_sync(mask, val[i], offset, WP_TILE_WARP_SIZE);
99
87
 
100
88
  return result;
101
89
  }
@@ -218,6 +206,7 @@ auto tile_reduce_impl(Op f, Tile& t)
218
206
 
219
207
  // ensure that only threads with at least one valid item participate in the reduction
220
208
  unsigned int mask = __ballot_sync(__activemask(), Layout::valid(Layout::linear_from_register(0)));
209
+ bool warp_is_active = mask != 0;
221
210
 
222
211
  // warp reduction
223
212
  T warp_sum = warp_reduce(thread_sum, f, mask);
@@ -233,7 +222,7 @@ auto tile_reduce_impl(Op f, Tile& t)
233
222
  // ensure active_warps is initialized
234
223
  WP_TILE_SYNC();
235
224
 
236
- if (lane_index == 0)
225
+ if (lane_index == 0 && warp_is_active)
237
226
  {
238
227
  partials[warp_index] = warp_sum;
239
228
  atomicAdd(&active_warps, 1);
@@ -291,6 +280,7 @@ auto tile_arg_reduce_impl(Op f, OpTrack track, Tile& t)
291
280
 
292
281
  // ensure that only threads with at least one valid item participate in the reduction
293
282
  unsigned int mask = __ballot_sync(__activemask(), Layout::valid(Layout::linear_from_register(0)));
283
+ bool warp_is_active = mask != 0;
294
284
 
295
285
  // warp reduction
296
286
  ValueAndIndex<T> warp_sum = warp_reduce_tracked(thread_sum, champion_index, f, track, mask);
@@ -307,7 +297,7 @@ auto tile_arg_reduce_impl(Op f, OpTrack track, Tile& t)
307
297
  // ensure active_warps is initialized
308
298
  WP_TILE_SYNC();
309
299
 
310
- if (lane_index == 0)
300
+ if (lane_index == 0 && warp_is_active)
311
301
  {
312
302
  partials[warp_index] = warp_sum.value;
313
303
  partials_idx[warp_index] = warp_sum.index;
@@ -422,25 +412,26 @@ void adj_tile_sum(Tile& t, Tile& adj_t, AdjTile& adj_ret)
422
412
  {
423
413
  using T = typename Tile::Type;
424
414
 
425
- #if !defined(__CUDA_ARCH__)
426
-
427
- for (int i=0; i < Tile::Layout::Size; ++i)
428
- {
429
- adj_t(i) += adj_ret.data[0];
415
+ auto adj_reg = adj_ret.grad_to_register();
430
416
 
431
- }
417
+ #if !defined(__CUDA_ARCH__)
418
+ T scratch = adj_reg.data[0];
432
419
  #else
433
420
  // broadcast incoming adjoint to block
434
421
  WP_TILE_SHARED T scratch;
435
422
  if (WP_TILE_THREAD_IDX == 0)
436
- scratch = adj_ret.data[0];
423
+ scratch = adj_reg.data[0];
437
424
 
438
425
  WP_TILE_SYNC();
426
+ #endif
439
427
 
440
- // broadcast scalar across input dimensions (note zero strides)
441
- auto adj_ret_reg = tile_shared_t<T, tile_layout_strided_t<typename Tile::Layout::Shape, tile_stride_t<0, 0>>, false>(&scratch, nullptr).copy_to_register();
428
+ auto adj_ret_reg = tile_register_like<Tile>();
429
+ using Layout = typename decltype(adj_ret_reg)::Layout;
430
+ for (int i=0; i < Layout::NumRegs; ++i)
431
+ {
432
+ adj_ret_reg.data[i] += scratch;
433
+ }
442
434
  adj_t.grad_add(adj_ret_reg);
443
- #endif
444
435
  }
445
436
 
446
437
  template <typename Tile>
warp/native/tuple.h CHANGED
@@ -182,8 +182,8 @@ adj_add(
182
182
  const tuple_t<Head, Tail...>& adj_ret
183
183
  )
184
184
  {
185
- adj_add(a.head, b.head, adj_ret.head);
186
- adj_add(a.tail, b.tail, adj_ret.tail);
185
+ adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
186
+ adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
187
187
  }
188
188
 
189
189
  } // namespace wp