warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +334 -0
- warp/__init__.pyi +5856 -0
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1077 -0
- warp/_src/build.py +620 -0
- warp/_src/build_dll.py +642 -0
- warp/_src/builtins.py +10555 -0
- warp/_src/codegen.py +4361 -0
- warp/_src/config.py +178 -0
- warp/_src/constants.py +59 -0
- warp/_src/context.py +8352 -0
- warp/_src/dlpack.py +464 -0
- warp/_src/fabric.py +362 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +510 -0
- warp/_src/fem/cache.py +689 -0
- warp/_src/fem/dirichlet.py +190 -0
- warp/_src/fem/domain.py +553 -0
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +703 -0
- warp/_src/fem/field/nodal_field.py +403 -0
- warp/_src/fem/field/restriction.py +39 -0
- warp/_src/fem/field/virtual.py +1021 -0
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
- warp/_src/fem/geometry/closest_point.py +99 -0
- warp/_src/fem/geometry/deformed_geometry.py +277 -0
- warp/_src/fem/geometry/element.py +854 -0
- warp/_src/fem/geometry/geometry.py +693 -0
- warp/_src/fem/geometry/grid_2d.py +478 -0
- warp/_src/fem/geometry/grid_3d.py +539 -0
- warp/_src/fem/geometry/hexmesh.py +956 -0
- warp/_src/fem/geometry/nanogrid.py +660 -0
- warp/_src/fem/geometry/partition.py +483 -0
- warp/_src/fem/geometry/quadmesh.py +597 -0
- warp/_src/fem/geometry/tetmesh.py +762 -0
- warp/_src/fem/geometry/trimesh.py +588 -0
- warp/_src/fem/integrate.py +2507 -0
- warp/_src/fem/linalg.py +385 -0
- warp/_src/fem/operator.py +398 -0
- warp/_src/fem/polynomial.py +231 -0
- warp/_src/fem/quadrature/__init__.py +17 -0
- warp/_src/fem/quadrature/pic_quadrature.py +318 -0
- warp/_src/fem/quadrature/quadrature.py +665 -0
- warp/_src/fem/space/__init__.py +248 -0
- warp/_src/fem/space/basis_function_space.py +499 -0
- warp/_src/fem/space/basis_space.py +681 -0
- warp/_src/fem/space/dof_mapper.py +253 -0
- warp/_src/fem/space/function_space.py +312 -0
- warp/_src/fem/space/grid_2d_function_space.py +179 -0
- warp/_src/fem/space/grid_3d_function_space.py +229 -0
- warp/_src/fem/space/hexmesh_function_space.py +255 -0
- warp/_src/fem/space/nanogrid_function_space.py +199 -0
- warp/_src/fem/space/partition.py +435 -0
- warp/_src/fem/space/quadmesh_function_space.py +222 -0
- warp/_src/fem/space/restriction.py +221 -0
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
- warp/_src/fem/space/shape/shape_function.py +134 -0
- warp/_src/fem/space/shape/square_shape_function.py +928 -0
- warp/_src/fem/space/shape/tet_shape_function.py +829 -0
- warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
- warp/_src/fem/space/tetmesh_function_space.py +270 -0
- warp/_src/fem/space/topology.py +461 -0
- warp/_src/fem/space/trimesh_function_space.py +193 -0
- warp/_src/fem/types.py +114 -0
- warp/_src/fem/utils.py +488 -0
- warp/_src/jax.py +188 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +389 -0
- warp/_src/jax_experimental/ffi.py +1286 -0
- warp/_src/jax_experimental/xla_ffi.py +658 -0
- warp/_src/marching_cubes.py +710 -0
- warp/_src/math.py +416 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +165 -0
- warp/_src/optim/linear.py +1608 -0
- warp/_src/optim/sgd.py +114 -0
- warp/_src/paddle.py +408 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +291 -0
- warp/_src/render/render_opengl.py +3638 -0
- warp/_src/render/render_usd.py +939 -0
- warp/_src/render/utils.py +162 -0
- warp/_src/sparse.py +2718 -0
- warp/_src/tape.py +1208 -0
- warp/_src/thirdparty/__init__.py +0 -0
- warp/_src/thirdparty/appdirs.py +598 -0
- warp/_src/thirdparty/dlpack.py +145 -0
- warp/_src/thirdparty/unittest_parallel.py +676 -0
- warp/_src/torch.py +393 -0
- warp/_src/types.py +5888 -0
- warp/_src/utils.py +1695 -0
- warp/autograd.py +33 -0
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +29 -0
- warp/build_dll.py +24 -0
- warp/codegen.py +24 -0
- warp/constants.py +24 -0
- warp/context.py +33 -0
- warp/dlpack.py +24 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +195 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +290 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/distributed/example_jacobi_mpi.py +506 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +469 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +181 -0
- warp/examples/fem/example_convection_diffusion_dg.py +225 -0
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +225 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +242 -0
- warp/examples/fem/example_mixed_elasticity.py +293 -0
- warp/examples/fem/example_navier_stokes.py +263 -0
- warp/examples/fem/example_nonconforming_contact.py +300 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +357 -0
- warp/examples/fem/utils.py +1047 -0
- warp/examples/interop/example_jax_callable.py +146 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +232 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +88 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/examples/tile/example_tile_mlp.py +385 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/fabric.py +24 -0
- warp/fem/__init__.py +173 -0
- warp/fem/adaptivity.py +26 -0
- warp/fem/cache.py +30 -0
- warp/fem/dirichlet.py +24 -0
- warp/fem/field/__init__.py +24 -0
- warp/fem/field/field.py +26 -0
- warp/fem/geometry/__init__.py +21 -0
- warp/fem/geometry/closest_point.py +31 -0
- warp/fem/linalg.py +38 -0
- warp/fem/operator.py +32 -0
- warp/fem/polynomial.py +29 -0
- warp/fem/space/__init__.py +22 -0
- warp/fem/space/basis_space.py +24 -0
- warp/fem/space/shape/__init__.py +68 -0
- warp/fem/space/topology.py +24 -0
- warp/fem/types.py +24 -0
- warp/fem/utils.py +32 -0
- warp/jax.py +29 -0
- warp/jax_experimental/__init__.py +29 -0
- warp/jax_experimental/custom_call.py +29 -0
- warp/jax_experimental/ffi.py +39 -0
- warp/jax_experimental/xla_ffi.py +24 -0
- warp/marching_cubes.py +24 -0
- warp/math.py +37 -0
- warp/native/array.h +1687 -0
- warp/native/builtin.h +2327 -0
- warp/native/bvh.cpp +562 -0
- warp/native/bvh.cu +826 -0
- warp/native/bvh.h +555 -0
- warp/native/clang/clang.cpp +541 -0
- warp/native/coloring.cpp +622 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +568 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +677 -0
- warp/native/cuda_util.h +313 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +2023 -0
- warp/native/fabric.h +246 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +89 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1253 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +348 -0
- warp/native/mat.h +5189 -0
- warp/native/mathdx.cpp +93 -0
- warp/native/matnn.h +221 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +406 -0
- warp/native/mesh.h +2097 -0
- warp/native/nanovdb/GridHandle.h +533 -0
- warp/native/nanovdb/HostBuffer.h +591 -0
- warp/native/nanovdb/NanoVDB.h +6246 -0
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1664 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +145 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +363 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +55 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +286 -0
- warp/native/sort.h +35 -0
- warp/native/sparse.cpp +241 -0
- warp/native/sparse.cu +435 -0
- warp/native/spatial.h +1306 -0
- warp/native/svd.h +727 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +4124 -0
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +838 -0
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +2199 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +68 -0
- warp/native/volume.h +970 -0
- warp/native/volume_builder.cu +483 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1143 -0
- warp/native/warp.cu +4604 -0
- warp/native/warp.h +358 -0
- warp/optim/__init__.py +20 -0
- warp/optim/adam.py +24 -0
- warp/optim/linear.py +35 -0
- warp/optim/sgd.py +24 -0
- warp/paddle.py +24 -0
- warp/py.typed +0 -0
- warp/render/__init__.py +22 -0
- warp/render/imgui_manager.py +29 -0
- warp/render/render_opengl.py +24 -0
- warp/render/render_usd.py +24 -0
- warp/render/utils.py +24 -0
- warp/sparse.py +51 -0
- warp/tape.py +24 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_conditional_captures.py +1147 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +691 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +335 -0
- warp/tests/geometry/test_hash_grid.py +259 -0
- warp/tests/geometry/test_marching_cubes.py +294 -0
- warp/tests/geometry/test_mesh.py +318 -0
- warp/tests/geometry/test_mesh_query_aabb.py +392 -0
- warp/tests/geometry/test_mesh_query_point.py +935 -0
- warp/tests/geometry/test_mesh_query_ray.py +323 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +730 -0
- warp/tests/interop/test_jax.py +1673 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/test_adam.py +162 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +3756 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +303 -0
- warp/tests/test_atomic.py +336 -0
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +732 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +974 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +298 -0
- warp/tests/test_context.py +35 -0
- warp/tests/test_copy.py +319 -0
- warp/tests/test_ctypes.py +618 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +127 -0
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +424 -0
- warp/tests/test_fabricarray.py +998 -0
- warp/tests/test_fast_math.py +72 -0
- warp/tests/test_fem.py +2204 -0
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +501 -0
- warp/tests/test_future_annotations.py +100 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +103 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +223 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_map.py +526 -0
- warp/tests/test_mat.py +3515 -0
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +573 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +212 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +70 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +408 -0
- warp/tests/test_quat.py +2653 -0
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +303 -0
- warp/tests/test_rounding.py +157 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +133 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +845 -0
- warp/tests/test_spatial.py +2859 -0
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +640 -0
- warp/tests/test_struct.py +901 -0
- warp/tests/test_tape.py +242 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +192 -0
- warp/tests/test_tuple.py +361 -0
- warp/tests/test_types.py +615 -0
- warp/tests/test_utils.py +594 -0
- warp/tests/test_vec.py +1408 -0
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/test_version.py +75 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +1519 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +608 -0
- warp/tests/tile/test_tile_load.py +724 -0
- warp/tests/tile/test_tile_mathdx.py +156 -0
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +400 -0
- warp/tests/tile/test_tile_reduce.py +950 -0
- warp/tests/tile/test_tile_shared_memory.py +376 -0
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +430 -0
- warp/tests/unittest_utils.py +469 -0
- warp/tests/walkthrough_debug.py +95 -0
- warp/torch.py +24 -0
- warp/types.py +51 -0
- warp/utils.py +31 -0
- warp_lang-1.10.0.dist-info/METADATA +459 -0
- warp_lang-1.10.0.dist-info/RECORD +468 -0
- warp_lang-1.10.0.dist-info/WHEEL +5 -0
- warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/native/tile_scan.h
ADDED
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
#include "tile.h"
|
|
21
|
+
|
|
22
|
+
#if defined(__clang__)
|
|
23
|
+
// disable warnings related to C++17 extensions on CPU JIT builds
|
|
24
|
+
#pragma clang diagnostic push
|
|
25
|
+
#pragma clang diagnostic ignored "-Wc++17-extensions"
|
|
26
|
+
#endif
|
|
27
|
+
|
|
28
|
+
namespace wp
|
|
29
|
+
{
|
|
30
|
+
|
|
31
|
+
#if defined(__CUDA_ARCH__)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
template<typename T>
|
|
35
|
+
inline CUDA_CALLABLE T scan_warp_inclusive(int lane, T value)
|
|
36
|
+
{
|
|
37
|
+
//Computes an inclusive cumulative sum
|
|
38
|
+
#pragma unroll
|
|
39
|
+
for (int i = 1; i <= 32; i *= 2)
|
|
40
|
+
{
|
|
41
|
+
auto n = __shfl_up_sync(0xffffffffu, value, i, 32);
|
|
42
|
+
|
|
43
|
+
if (lane >= i)
|
|
44
|
+
value = value + n;
|
|
45
|
+
}
|
|
46
|
+
return value;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
template<typename T>
|
|
51
|
+
inline CUDA_CALLABLE T thread_block_scan_inclusive(int lane, int warp_index, int num_warps, T value)
|
|
52
|
+
{
|
|
53
|
+
__shared__ T sums[1024 / WP_TILE_WARP_SIZE]; // 1024 is the maximum number of threads per block
|
|
54
|
+
|
|
55
|
+
value = scan_warp_inclusive(lane, value);
|
|
56
|
+
|
|
57
|
+
if (lane == 31)
|
|
58
|
+
{
|
|
59
|
+
sums[warp_index] = value;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
WP_TILE_SYNC();
|
|
63
|
+
|
|
64
|
+
if (warp_index == 0)
|
|
65
|
+
{
|
|
66
|
+
T v = lane < num_warps ? sums[lane] : T(0);
|
|
67
|
+
v = scan_warp_inclusive(lane, v);
|
|
68
|
+
if (lane < num_warps)
|
|
69
|
+
sums[lane] = v;
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
WP_TILE_SYNC();
|
|
73
|
+
|
|
74
|
+
if (warp_index > 0)
|
|
75
|
+
{
|
|
76
|
+
value += sums[warp_index - 1];
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
return value;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
template<typename T, bool exclusive>
|
|
83
|
+
inline CUDA_CALLABLE void thread_block_scan(T* values, int num_elements)
|
|
84
|
+
{
|
|
85
|
+
const int num_threads_in_block = blockDim.x;
|
|
86
|
+
const int num_iterations = (num_elements + num_threads_in_block - 1) / num_threads_in_block;
|
|
87
|
+
|
|
88
|
+
__shared__ T offset;
|
|
89
|
+
if (threadIdx.x == 0)
|
|
90
|
+
offset = T(0);
|
|
91
|
+
|
|
92
|
+
WP_TILE_SYNC();
|
|
93
|
+
|
|
94
|
+
const int lane = WP_TILE_THREAD_IDX % WP_TILE_WARP_SIZE;
|
|
95
|
+
const int warp_index = WP_TILE_THREAD_IDX / WP_TILE_WARP_SIZE;
|
|
96
|
+
const int num_warps = num_threads_in_block / WP_TILE_WARP_SIZE;
|
|
97
|
+
|
|
98
|
+
for (int i = 0; i < num_iterations; ++i)
|
|
99
|
+
{
|
|
100
|
+
int element_index = WP_TILE_THREAD_IDX + i * num_threads_in_block;
|
|
101
|
+
T orig_value = element_index < num_elements ? values[element_index] : T(0);
|
|
102
|
+
T value = thread_block_scan_inclusive(lane, warp_index, num_warps, orig_value);
|
|
103
|
+
if (element_index < num_elements)
|
|
104
|
+
{
|
|
105
|
+
T new_value = value + offset;
|
|
106
|
+
if constexpr (exclusive)
|
|
107
|
+
new_value -= orig_value;
|
|
108
|
+
values[element_index] = new_value;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
WP_TILE_SYNC();
|
|
112
|
+
|
|
113
|
+
if (threadIdx.x == num_threads_in_block - 1)
|
|
114
|
+
offset += value;
|
|
115
|
+
|
|
116
|
+
WP_TILE_SYNC();
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
template<typename Tile>
|
|
121
|
+
inline CUDA_CALLABLE auto tile_scan_inclusive_impl(Tile& t)
|
|
122
|
+
{
|
|
123
|
+
using T = typename Tile::Type;
|
|
124
|
+
constexpr int num_elements_to_scan = Tile::Layout::Shape::size();
|
|
125
|
+
|
|
126
|
+
// create a temporary shared tile to hold the input values
|
|
127
|
+
__shared__ T smem[num_elements_to_scan];
|
|
128
|
+
tile_shared_t<T, tile_layout_strided_t<typename Tile::Layout::Shape>, false> scratch(smem, nullptr);
|
|
129
|
+
|
|
130
|
+
// copy input values to scratch space
|
|
131
|
+
scratch.assign(t);
|
|
132
|
+
|
|
133
|
+
T* values = &scratch.data(0);
|
|
134
|
+
thread_block_scan<T, false>(values, num_elements_to_scan);
|
|
135
|
+
|
|
136
|
+
auto result = scratch.copy_to_register();
|
|
137
|
+
|
|
138
|
+
WP_TILE_SYNC();
|
|
139
|
+
|
|
140
|
+
return result;
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
template<typename Tile>
|
|
144
|
+
inline CUDA_CALLABLE auto tile_scan_exclusive_impl(Tile& t)
|
|
145
|
+
{
|
|
146
|
+
using T = typename Tile::Type;
|
|
147
|
+
constexpr int num_elements_to_scan = Tile::Layout::Shape::size();
|
|
148
|
+
|
|
149
|
+
// create a temporary shared tile to hold the input values
|
|
150
|
+
__shared__ T smem[num_elements_to_scan];
|
|
151
|
+
tile_shared_t<T, tile_layout_strided_t<typename Tile::Layout::Shape>, false> scratch(smem, nullptr);
|
|
152
|
+
|
|
153
|
+
// copy input values to scratch space
|
|
154
|
+
scratch.assign(t);
|
|
155
|
+
|
|
156
|
+
T* values = &scratch.data(0);
|
|
157
|
+
thread_block_scan<T, true>(values, num_elements_to_scan);
|
|
158
|
+
|
|
159
|
+
auto result = scratch.copy_to_register();
|
|
160
|
+
|
|
161
|
+
WP_TILE_SYNC();
|
|
162
|
+
|
|
163
|
+
return result;
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
#else
|
|
167
|
+
|
|
168
|
+
template<typename Tile>
|
|
169
|
+
inline auto tile_scan_inclusive_impl(Tile& t)
|
|
170
|
+
{
|
|
171
|
+
using T = typename Tile::Type;
|
|
172
|
+
constexpr int num_elements_to_scan = Tile::Layout::Shape::size();
|
|
173
|
+
|
|
174
|
+
auto input = t.copy_to_register();
|
|
175
|
+
auto output = tile_register_like<Tile>();
|
|
176
|
+
|
|
177
|
+
using Layout = typename decltype(input)::Layout;
|
|
178
|
+
|
|
179
|
+
T sum = T(0);
|
|
180
|
+
for (int i = 0; i < num_elements_to_scan; ++i)
|
|
181
|
+
{
|
|
182
|
+
sum += input.data[i];
|
|
183
|
+
output.data[i] = sum;
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
return output;
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
template<typename Tile>
|
|
190
|
+
inline auto tile_scan_exclusive_impl(Tile& t)
|
|
191
|
+
{
|
|
192
|
+
using T = typename Tile::Type;
|
|
193
|
+
constexpr int num_elements_to_scan = Tile::Layout::Shape::size();
|
|
194
|
+
|
|
195
|
+
auto input = t.copy_to_register();
|
|
196
|
+
auto output = tile_register_like<Tile>();
|
|
197
|
+
|
|
198
|
+
using Layout = typename decltype(input)::Layout;
|
|
199
|
+
|
|
200
|
+
T sum = T(0);
|
|
201
|
+
for (int i = 0; i < num_elements_to_scan; ++i)
|
|
202
|
+
{
|
|
203
|
+
output.data[i] = sum;
|
|
204
|
+
sum += input.data[i];
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
return output;
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
#endif // !defined(__CUDA_ARCH__)
|
|
211
|
+
|
|
212
|
+
template <typename Tile>
|
|
213
|
+
auto tile_scan_inclusive(Tile& t)
|
|
214
|
+
{
|
|
215
|
+
return tile_scan_inclusive_impl(t);
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
template <typename Tile, typename AdjTile>
|
|
219
|
+
void adj_tile_scan_inclusive(Tile& t, Tile& adj_t, AdjTile& adj_ret)
|
|
220
|
+
{
|
|
221
|
+
// todo: not implemented
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
template <typename Tile>
|
|
225
|
+
auto tile_scan_exclusive(Tile& t)
|
|
226
|
+
{
|
|
227
|
+
return tile_scan_exclusive_impl(t);
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
template <typename Tile, typename AdjTile>
|
|
231
|
+
void adj_tile_scan_exclusive(Tile& t, Tile& adj_t, AdjTile& adj_ret)
|
|
232
|
+
{
|
|
233
|
+
// todo: not implemented
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
} // namespace wp
|
|
237
|
+
|
|
238
|
+
#if defined(__clang__)
|
|
239
|
+
#pragma clang diagnostic pop
|
|
240
|
+
#endif
|
warp/native/tuple.h
ADDED
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
#pragma once
|
|
19
|
+
|
|
20
|
+
namespace wp
|
|
21
|
+
{
|
|
22
|
+
|
|
23
|
+
template <typename... Types>
|
|
24
|
+
struct tuple_t;
|
|
25
|
+
|
|
26
|
+
template <>
|
|
27
|
+
struct tuple_t<>
|
|
28
|
+
{
|
|
29
|
+
|
|
30
|
+
static constexpr int size() { return 0; }
|
|
31
|
+
|
|
32
|
+
// Base case: empty tuple.
|
|
33
|
+
template <typename Callable>
|
|
34
|
+
void apply(Callable&&) const { }
|
|
35
|
+
};
|
|
36
|
+
|
|
37
|
+
template <typename Head, typename... Tail>
|
|
38
|
+
struct tuple_t<Head, Tail...>
|
|
39
|
+
{
|
|
40
|
+
Head head;
|
|
41
|
+
tuple_t<Tail...> tail;
|
|
42
|
+
|
|
43
|
+
CUDA_CALLABLE inline tuple_t() {}
|
|
44
|
+
CUDA_CALLABLE inline tuple_t(Head h, Tail... t) : head(h), tail(t...) {}
|
|
45
|
+
|
|
46
|
+
static constexpr int size() { return 1 + tuple_t<Tail...>::size(); }
|
|
47
|
+
|
|
48
|
+
// Applies a callable to each element.
|
|
49
|
+
template <typename Callable>
|
|
50
|
+
void apply(Callable&& func) const
|
|
51
|
+
{
|
|
52
|
+
func(head); // Apply the callable to the current element.
|
|
53
|
+
tail.apply(func); // Recursively process the rest of the tuple.
|
|
54
|
+
}
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
// Tuple constructor.
|
|
58
|
+
template <typename... Args>
|
|
59
|
+
CUDA_CALLABLE inline tuple_t<Args...>
|
|
60
|
+
tuple(
|
|
61
|
+
Args... args
|
|
62
|
+
)
|
|
63
|
+
{
|
|
64
|
+
return tuple_t<Args...>(args...);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Helper to extract a value from the tuple.
|
|
68
|
+
// Can be replaced with simpler member function version when our CPU compiler
|
|
69
|
+
// backend supports constexpr if statements.
|
|
70
|
+
template <int N, typename Head, typename... Tail>
|
|
71
|
+
struct tuple_get
|
|
72
|
+
{
|
|
73
|
+
static CUDA_CALLABLE inline const auto&
|
|
74
|
+
value(
|
|
75
|
+
const tuple_t<Head, Tail...>& t
|
|
76
|
+
)
|
|
77
|
+
{
|
|
78
|
+
return tuple_get<N - 1, Tail...>::value(t.tail);
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
// Specialization for the base case N == 0. Simply return the head of the tuple.
|
|
83
|
+
template <typename Head, typename... Tail>
|
|
84
|
+
struct tuple_get<0, Head, Tail...>
|
|
85
|
+
{
|
|
86
|
+
static CUDA_CALLABLE inline const auto&
|
|
87
|
+
value(
|
|
88
|
+
const tuple_t<Head, Tail...>& t
|
|
89
|
+
)
|
|
90
|
+
{
|
|
91
|
+
return t.head;
|
|
92
|
+
}
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
template <int Index, typename... Args>
|
|
96
|
+
CUDA_CALLABLE inline auto
|
|
97
|
+
extract(
|
|
98
|
+
const tuple_t<Args...>& t
|
|
99
|
+
)
|
|
100
|
+
{
|
|
101
|
+
return tuple_get<Index, Args...>::value(t);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
template <typename... Args>
|
|
105
|
+
CUDA_CALLABLE inline int
|
|
106
|
+
len(
|
|
107
|
+
const tuple_t<Args...>& t
|
|
108
|
+
)
|
|
109
|
+
{
|
|
110
|
+
return t.size();
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
template <typename... Args>
|
|
114
|
+
CUDA_CALLABLE inline void
|
|
115
|
+
adj_len(
|
|
116
|
+
const tuple_t<Args...>& t,
|
|
117
|
+
tuple_t<Args...>& adj_t,
|
|
118
|
+
int adj_ret
|
|
119
|
+
)
|
|
120
|
+
{
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
template <typename... Args>
|
|
124
|
+
CUDA_CALLABLE inline void
|
|
125
|
+
print(
|
|
126
|
+
const tuple_t<Args...>& t
|
|
127
|
+
)
|
|
128
|
+
{
|
|
129
|
+
t.apply([&](auto a) { print(a); });
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
template <typename... Args>
|
|
133
|
+
CUDA_CALLABLE inline void
|
|
134
|
+
adj_print(
|
|
135
|
+
const tuple_t<Args...>& t,
|
|
136
|
+
tuple_t<Args...>& adj_t
|
|
137
|
+
)
|
|
138
|
+
{
|
|
139
|
+
adj_t.apply([&](auto a) { print(a); });
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
CUDA_CALLABLE inline tuple_t<>
|
|
143
|
+
add(
|
|
144
|
+
const tuple_t<>& a,
|
|
145
|
+
const tuple_t<>& b
|
|
146
|
+
)
|
|
147
|
+
{
|
|
148
|
+
return tuple_t<>();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
template <typename Head, typename... Tail>
|
|
152
|
+
CUDA_CALLABLE inline tuple_t<Head, Tail...>
|
|
153
|
+
add(
|
|
154
|
+
const tuple_t<Head, Tail...>& a,
|
|
155
|
+
const tuple_t<Head, Tail...>& b
|
|
156
|
+
)
|
|
157
|
+
{
|
|
158
|
+
tuple_t<Head, Tail...> out;
|
|
159
|
+
out.head = add(a.head, b.head);
|
|
160
|
+
out.tail = add(a.tail, b.tail);
|
|
161
|
+
return out;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
CUDA_CALLABLE inline void
|
|
165
|
+
adj_add(
|
|
166
|
+
const tuple_t<>& a,
|
|
167
|
+
const tuple_t<>& b,
|
|
168
|
+
tuple_t<>& adj_a,
|
|
169
|
+
tuple_t<>& adj_b,
|
|
170
|
+
const tuple_t<>& adj_ret
|
|
171
|
+
)
|
|
172
|
+
{
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
template <typename Head, typename... Tail>
|
|
176
|
+
CUDA_CALLABLE inline void
|
|
177
|
+
adj_add(
|
|
178
|
+
const tuple_t<Head, Tail...>& a,
|
|
179
|
+
const tuple_t<Head, Tail...>& b,
|
|
180
|
+
tuple_t<Head, Tail...>& adj_a,
|
|
181
|
+
tuple_t<Head, Tail...>& adj_b,
|
|
182
|
+
const tuple_t<Head, Tail...>& adj_ret
|
|
183
|
+
)
|
|
184
|
+
{
|
|
185
|
+
adj_add(a.head, b.head, adj_a.head, adj_b.head, adj_ret.head);
|
|
186
|
+
adj_add(a.tail, b.tail, adj_a.tail, adj_b.tail, adj_ret.tail);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
} // namespace wp
|