warp-lang 1.8.0__py3-none-macosx_10_13_universal2.whl → 1.9.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +482 -110
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +48 -63
- warp/builtins.py +955 -137
- warp/codegen.py +327 -209
- warp/config.py +1 -1
- warp/context.py +1363 -800
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +266 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +200 -91
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +1 -1
- warp/jax_experimental/ffi.py +203 -54
- warp/marching_cubes.py +708 -0
- warp/native/array.h +103 -8
- warp/native/builtin.h +90 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +13 -3
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +42 -11
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +4 -4
- warp/native/mat.h +1913 -119
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +5 -3
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +337 -16
- warp/native/rand.h +7 -7
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +22 -22
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +14 -14
- warp/native/spatial.h +366 -17
- warp/native/svd.h +23 -8
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +303 -70
- warp/native/tile_radix_sort.h +5 -1
- warp/native/tile_reduce.h +16 -25
- warp/native/tuple.h +2 -2
- warp/native/vec.h +385 -18
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +337 -193
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +137 -57
- warp/render/render_usd.py +0 -1
- warp/sim/collide.py +1 -2
- warp/sim/graph_coloring.py +2 -2
- warp/sim/integrator_vbd.py +10 -2
- warp/sparse.py +559 -176
- warp/tape.py +2 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/sim/test_cloth.py +89 -6
- warp/tests/sim/test_coloring.py +82 -7
- warp/tests/test_array.py +56 -5
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +127 -114
- warp/tests/test_codegen.py +3 -2
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +45 -2
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +1 -1
- warp/tests/test_mat.py +1540 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +162 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +103 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_static.py +48 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tape.py +38 -0
- warp/tests/test_types.py +0 -20
- warp/tests/test_vec.py +216 -441
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +206 -152
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_reduce.py +100 -11
- warp/tests/tile/test_tile_shared_memory.py +16 -16
- warp/tests/tile/test_tile_sort.py +59 -55
- warp/tests/unittest_suites.py +16 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +554 -264
- warp/utils.py +68 -86
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/tile_reduce.h
CHANGED
|
@@ -83,19 +83,7 @@ inline CUDA_CALLABLE wp::vec_t<Length, T> warp_shuffle_down(wp::vec_t<Length, T>
|
|
|
83
83
|
wp::vec_t<Length, T> result;
|
|
84
84
|
|
|
85
85
|
for (unsigned i=0; i < Length; ++i)
|
|
86
|
-
result
|
|
87
|
-
|
|
88
|
-
return result;
|
|
89
|
-
}
|
|
90
|
-
|
|
91
|
-
// Quaternion overload
|
|
92
|
-
template <typename T>
|
|
93
|
-
inline CUDA_CALLABLE wp::quat_t<T> warp_shuffle_down(wp::quat_t<T> val, int offset, int mask)
|
|
94
|
-
{
|
|
95
|
-
wp::quat_t<T> result;
|
|
96
|
-
|
|
97
|
-
for (unsigned i=0; i < 4; ++i)
|
|
98
|
-
result.data[i] = __shfl_down_sync(mask, val.data[i], offset, WP_TILE_WARP_SIZE);
|
|
86
|
+
result[i] = __shfl_down_sync(mask, val[i], offset, WP_TILE_WARP_SIZE);
|
|
99
87
|
|
|
100
88
|
return result;
|
|
101
89
|
}
|
|
@@ -218,6 +206,7 @@ auto tile_reduce_impl(Op f, Tile& t)
|
|
|
218
206
|
|
|
219
207
|
// ensure that only threads with at least one valid item participate in the reduction
|
|
220
208
|
unsigned int mask = __ballot_sync(__activemask(), Layout::valid(Layout::linear_from_register(0)));
|
|
209
|
+
bool warp_is_active = mask != 0;
|
|
221
210
|
|
|
222
211
|
// warp reduction
|
|
223
212
|
T warp_sum = warp_reduce(thread_sum, f, mask);
|
|
@@ -233,7 +222,7 @@ auto tile_reduce_impl(Op f, Tile& t)
|
|
|
233
222
|
// ensure active_warps is initialized
|
|
234
223
|
WP_TILE_SYNC();
|
|
235
224
|
|
|
236
|
-
if (lane_index == 0)
|
|
225
|
+
if (lane_index == 0 && warp_is_active)
|
|
237
226
|
{
|
|
238
227
|
partials[warp_index] = warp_sum;
|
|
239
228
|
atomicAdd(&active_warps, 1);
|
|
@@ -291,6 +280,7 @@ auto tile_arg_reduce_impl(Op f, OpTrack track, Tile& t)
|
|
|
291
280
|
|
|
292
281
|
// ensure that only threads with at least one valid item participate in the reduction
|
|
293
282
|
unsigned int mask = __ballot_sync(__activemask(), Layout::valid(Layout::linear_from_register(0)));
|
|
283
|
+
bool warp_is_active = mask != 0;
|
|
294
284
|
|
|
295
285
|
// warp reduction
|
|
296
286
|
ValueAndIndex<T> warp_sum = warp_reduce_tracked(thread_sum, champion_index, f, track, mask);
|
|
@@ -307,7 +297,7 @@ auto tile_arg_reduce_impl(Op f, OpTrack track, Tile& t)
|
|
|
307
297
|
// ensure active_warps is initialized
|
|
308
298
|
WP_TILE_SYNC();
|
|
309
299
|
|
|
310
|
-
if (lane_index == 0)
|
|
300
|
+
if (lane_index == 0 && warp_is_active)
|
|
311
301
|
{
|
|
312
302
|
partials[warp_index] = warp_sum.value;
|
|
313
303
|
partials_idx[warp_index] = warp_sum.index;
|
|
@@ -422,25 +412,26 @@ void adj_tile_sum(Tile& t, Tile& adj_t, AdjTile& adj_ret)
|
|
|
422
412
|
{
|
|
423
413
|
using T = typename Tile::Type;
|
|
424
414
|
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
for (int i=0; i < Tile::Layout::Size; ++i)
|
|
428
|
-
{
|
|
429
|
-
adj_t(i) += adj_ret.data[0];
|
|
415
|
+
auto adj_reg = adj_ret.grad_to_register();
|
|
430
416
|
|
|
431
|
-
|
|
417
|
+
#if !defined(__CUDA_ARCH__)
|
|
418
|
+
T scratch = adj_reg.data[0];
|
|
432
419
|
#else
|
|
433
420
|
// broadcast incoming adjoint to block
|
|
434
421
|
WP_TILE_SHARED T scratch;
|
|
435
422
|
if (WP_TILE_THREAD_IDX == 0)
|
|
436
|
-
scratch =
|
|
423
|
+
scratch = adj_reg.data[0];
|
|
437
424
|
|
|
438
425
|
WP_TILE_SYNC();
|
|
426
|
+
#endif
|
|
439
427
|
|
|
440
|
-
|
|
441
|
-
|
|
428
|
+
auto adj_ret_reg = tile_register_like<Tile>();
|
|
429
|
+
using Layout = typename decltype(adj_ret_reg)::Layout;
|
|
430
|
+
for (int i=0; i < Layout::NumRegs; ++i)
|
|
431
|
+
{
|
|
432
|
+
adj_ret_reg.data[i] += scratch;
|
|
433
|
+
}
|
|
442
434
|
adj_t.grad_add(adj_ret_reg);
|
|
443
|
-
#endif
|
|
444
435
|
}
|
|
445
436
|
|
|
446
437
|
template <typename Tile>
|
warp/native/tuple.h
CHANGED
|
@@ -182,8 +182,8 @@ adj_add(
|
|
|
182
182
|
const tuple_t<Head, Tail...>& adj_ret
|
|
183
183
|
)
|
|
184
184
|
{
|
|
185
|
-
adj_add(a.head, b.head, adj_ret.head);
|
|
186
|
-
adj_add(a.tail, b.tail, adj_ret.tail);
|
|
185
|
+
adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
|
|
186
|
+
adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
|
|
187
187
|
}
|
|
188
188
|
|
|
189
189
|
} // namespace wp
|