PyPI - warp-lang - Versions diffs - 1.10.0__py3-none-macosx_11_0_arm64.whl - Mend

warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show

warp/__init__.py +334 -0
warp/__init__.pyi +5856 -0
warp/_src/__init__.py +14 -0
warp/_src/autograd.py +1077 -0
warp/_src/build.py +620 -0
warp/_src/build_dll.py +642 -0
warp/_src/builtins.py +10555 -0
warp/_src/codegen.py +4361 -0
warp/_src/config.py +178 -0
warp/_src/constants.py +59 -0
warp/_src/context.py +8352 -0
warp/_src/dlpack.py +464 -0
warp/_src/fabric.py +362 -0
warp/_src/fem/__init__.py +14 -0
warp/_src/fem/adaptivity.py +510 -0
warp/_src/fem/cache.py +689 -0
warp/_src/fem/dirichlet.py +190 -0
warp/_src/fem/domain.py +553 -0
warp/_src/fem/field/__init__.py +131 -0
warp/_src/fem/field/field.py +703 -0
warp/_src/fem/field/nodal_field.py +403 -0
warp/_src/fem/field/restriction.py +39 -0
warp/_src/fem/field/virtual.py +1021 -0
warp/_src/fem/geometry/__init__.py +32 -0
warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
warp/_src/fem/geometry/closest_point.py +99 -0
warp/_src/fem/geometry/deformed_geometry.py +277 -0
warp/_src/fem/geometry/element.py +854 -0
warp/_src/fem/geometry/geometry.py +693 -0
warp/_src/fem/geometry/grid_2d.py +478 -0
warp/_src/fem/geometry/grid_3d.py +539 -0
warp/_src/fem/geometry/hexmesh.py +956 -0
warp/_src/fem/geometry/nanogrid.py +660 -0
warp/_src/fem/geometry/partition.py +483 -0
warp/_src/fem/geometry/quadmesh.py +597 -0
warp/_src/fem/geometry/tetmesh.py +762 -0
warp/_src/fem/geometry/trimesh.py +588 -0
warp/_src/fem/integrate.py +2507 -0
warp/_src/fem/linalg.py +385 -0
warp/_src/fem/operator.py +398 -0
warp/_src/fem/polynomial.py +231 -0
warp/_src/fem/quadrature/__init__.py +17 -0
warp/_src/fem/quadrature/pic_quadrature.py +318 -0
warp/_src/fem/quadrature/quadrature.py +665 -0
warp/_src/fem/space/__init__.py +248 -0
warp/_src/fem/space/basis_function_space.py +499 -0
warp/_src/fem/space/basis_space.py +681 -0
warp/_src/fem/space/dof_mapper.py +253 -0
warp/_src/fem/space/function_space.py +312 -0
warp/_src/fem/space/grid_2d_function_space.py +179 -0
warp/_src/fem/space/grid_3d_function_space.py +229 -0
warp/_src/fem/space/hexmesh_function_space.py +255 -0
warp/_src/fem/space/nanogrid_function_space.py +199 -0
warp/_src/fem/space/partition.py +435 -0
warp/_src/fem/space/quadmesh_function_space.py +222 -0
warp/_src/fem/space/restriction.py +221 -0
warp/_src/fem/space/shape/__init__.py +152 -0
warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
warp/_src/fem/space/shape/shape_function.py +134 -0
warp/_src/fem/space/shape/square_shape_function.py +928 -0
warp/_src/fem/space/shape/tet_shape_function.py +829 -0
warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
warp/_src/fem/space/tetmesh_function_space.py +270 -0
warp/_src/fem/space/topology.py +461 -0
warp/_src/fem/space/trimesh_function_space.py +193 -0
warp/_src/fem/types.py +114 -0
warp/_src/fem/utils.py +488 -0
warp/_src/jax.py +188 -0
warp/_src/jax_experimental/__init__.py +14 -0
warp/_src/jax_experimental/custom_call.py +389 -0
warp/_src/jax_experimental/ffi.py +1286 -0
warp/_src/jax_experimental/xla_ffi.py +658 -0
warp/_src/marching_cubes.py +710 -0
warp/_src/math.py +416 -0
warp/_src/optim/__init__.py +14 -0
warp/_src/optim/adam.py +165 -0
warp/_src/optim/linear.py +1608 -0
warp/_src/optim/sgd.py +114 -0
warp/_src/paddle.py +408 -0
warp/_src/render/__init__.py +14 -0
warp/_src/render/imgui_manager.py +291 -0
warp/_src/render/render_opengl.py +3638 -0
warp/_src/render/render_usd.py +939 -0
warp/_src/render/utils.py +162 -0
warp/_src/sparse.py +2718 -0
warp/_src/tape.py +1208 -0
warp/_src/thirdparty/__init__.py +0 -0
warp/_src/thirdparty/appdirs.py +598 -0
warp/_src/thirdparty/dlpack.py +145 -0
warp/_src/thirdparty/unittest_parallel.py +676 -0
warp/_src/torch.py +393 -0
warp/_src/types.py +5888 -0
warp/_src/utils.py +1695 -0
warp/autograd.py +33 -0
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +29 -0
warp/build_dll.py +24 -0
warp/codegen.py +24 -0
warp/constants.py +24 -0
warp/context.py +33 -0
warp/dlpack.py +24 -0
warp/examples/__init__.py +24 -0
warp/examples/assets/bear.usd +0 -0
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cube.usd +0 -0
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/assets/pixel.jpg +0 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usd +0 -0
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_api.py +389 -0
warp/examples/benchmarks/benchmark_cloth.py +296 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
warp/examples/benchmarks/benchmark_gemm.py +164 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
warp/examples/benchmarks/benchmark_launches.py +301 -0
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/browse.py +37 -0
warp/examples/core/example_cupy.py +86 -0
warp/examples/core/example_dem.py +241 -0
warp/examples/core/example_fluid.py +299 -0
warp/examples/core/example_graph_capture.py +150 -0
warp/examples/core/example_marching_cubes.py +195 -0
warp/examples/core/example_mesh.py +180 -0
warp/examples/core/example_mesh_intersect.py +211 -0
warp/examples/core/example_nvdb.py +182 -0
warp/examples/core/example_raycast.py +111 -0
warp/examples/core/example_raymarch.py +205 -0
warp/examples/core/example_render_opengl.py +290 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/core/example_sph.py +411 -0
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_torch.py +211 -0
warp/examples/core/example_wave.py +269 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/distributed/example_jacobi_mpi.py +506 -0
warp/examples/fem/example_adaptive_grid.py +286 -0
warp/examples/fem/example_apic_fluid.py +469 -0
warp/examples/fem/example_burgers.py +261 -0
warp/examples/fem/example_convection_diffusion.py +181 -0
warp/examples/fem/example_convection_diffusion_dg.py +225 -0
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +172 -0
warp/examples/fem/example_diffusion.py +196 -0
warp/examples/fem/example_diffusion_3d.py +225 -0
warp/examples/fem/example_diffusion_mgpu.py +225 -0
warp/examples/fem/example_distortion_energy.py +228 -0
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +242 -0
warp/examples/fem/example_mixed_elasticity.py +293 -0
warp/examples/fem/example_navier_stokes.py +263 -0
warp/examples/fem/example_nonconforming_contact.py +300 -0
warp/examples/fem/example_stokes.py +213 -0
warp/examples/fem/example_stokes_transfer.py +262 -0
warp/examples/fem/example_streamlines.py +357 -0
warp/examples/fem/utils.py +1047 -0
warp/examples/interop/example_jax_callable.py +146 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +232 -0
warp/examples/optim/example_diffray.py +561 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +88 -0
warp/examples/tile/example_tile_convolution.py +66 -0
warp/examples/tile/example_tile_fft.py +55 -0
warp/examples/tile/example_tile_filtering.py +113 -0
warp/examples/tile/example_tile_matmul.py +85 -0
warp/examples/tile/example_tile_mcgp.py +191 -0
warp/examples/tile/example_tile_mlp.py +385 -0
warp/examples/tile/example_tile_nbody.py +199 -0
warp/fabric.py +24 -0
warp/fem/__init__.py +173 -0
warp/fem/adaptivity.py +26 -0
warp/fem/cache.py +30 -0
warp/fem/dirichlet.py +24 -0
warp/fem/field/__init__.py +24 -0
warp/fem/field/field.py +26 -0
warp/fem/geometry/__init__.py +21 -0
warp/fem/geometry/closest_point.py +31 -0
warp/fem/linalg.py +38 -0
warp/fem/operator.py +32 -0
warp/fem/polynomial.py +29 -0
warp/fem/space/__init__.py +22 -0
warp/fem/space/basis_space.py +24 -0
warp/fem/space/shape/__init__.py +68 -0
warp/fem/space/topology.py +24 -0
warp/fem/types.py +24 -0
warp/fem/utils.py +32 -0
warp/jax.py +29 -0
warp/jax_experimental/__init__.py +29 -0
warp/jax_experimental/custom_call.py +29 -0
warp/jax_experimental/ffi.py +39 -0
warp/jax_experimental/xla_ffi.py +24 -0
warp/marching_cubes.py +24 -0
warp/math.py +37 -0
warp/native/array.h +1687 -0
warp/native/builtin.h +2327 -0
warp/native/bvh.cpp +562 -0
warp/native/bvh.cu +826 -0
warp/native/bvh.h +555 -0
warp/native/clang/clang.cpp +541 -0
warp/native/coloring.cpp +622 -0
warp/native/crt.cpp +51 -0
warp/native/crt.h +568 -0
warp/native/cuda_crt.h +1058 -0
warp/native/cuda_util.cpp +677 -0
warp/native/cuda_util.h +313 -0
warp/native/error.cpp +77 -0
warp/native/error.h +36 -0
warp/native/exports.h +2023 -0
warp/native/fabric.h +246 -0
warp/native/hashgrid.cpp +311 -0
warp/native/hashgrid.cu +89 -0
warp/native/hashgrid.h +240 -0
warp/native/initializer_array.h +41 -0
warp/native/intersect.h +1253 -0
warp/native/intersect_adj.h +375 -0
warp/native/intersect_tri.h +348 -0
warp/native/mat.h +5189 -0
warp/native/mathdx.cpp +93 -0
warp/native/matnn.h +221 -0
warp/native/mesh.cpp +266 -0
warp/native/mesh.cu +406 -0
warp/native/mesh.h +2097 -0
warp/native/nanovdb/GridHandle.h +533 -0
warp/native/nanovdb/HostBuffer.h +591 -0
warp/native/nanovdb/NanoVDB.h +6246 -0
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +3390 -0
warp/native/noise.h +859 -0
warp/native/quat.h +1664 -0
warp/native/rand.h +342 -0
warp/native/range.h +145 -0
warp/native/reduce.cpp +174 -0
warp/native/reduce.cu +363 -0
warp/native/runlength_encode.cpp +79 -0
warp/native/runlength_encode.cu +61 -0
warp/native/scan.cpp +47 -0
warp/native/scan.cu +55 -0
warp/native/scan.h +23 -0
warp/native/solid_angle.h +466 -0
warp/native/sort.cpp +251 -0
warp/native/sort.cu +286 -0
warp/native/sort.h +35 -0
warp/native/sparse.cpp +241 -0
warp/native/sparse.cu +435 -0
warp/native/spatial.h +1306 -0
warp/native/svd.h +727 -0
warp/native/temp_buffer.h +46 -0
warp/native/tile.h +4124 -0
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +838 -0
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +2199 -0
warp/native/version.h +23 -0
warp/native/volume.cpp +501 -0
warp/native/volume.cu +68 -0
warp/native/volume.h +970 -0
warp/native/volume_builder.cu +483 -0
warp/native/volume_builder.h +52 -0
warp/native/volume_impl.h +70 -0
warp/native/warp.cpp +1143 -0
warp/native/warp.cu +4604 -0
warp/native/warp.h +358 -0
warp/optim/__init__.py +20 -0
warp/optim/adam.py +24 -0
warp/optim/linear.py +35 -0
warp/optim/sgd.py +24 -0
warp/paddle.py +24 -0
warp/py.typed +0 -0
warp/render/__init__.py +22 -0
warp/render/imgui_manager.py +29 -0
warp/render/render_opengl.py +24 -0
warp/render/render_usd.py +24 -0
warp/render/utils.py +24 -0
warp/sparse.py +51 -0
warp/tape.py +24 -0
warp/tests/__init__.py +1 -0
warp/tests/__main__.py +4 -0
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/mlp_golden.npy +0 -0
warp/tests/assets/pixel.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/spiky.usd +0 -0
warp/tests/assets/test_grid.nvdb +0 -0
warp/tests/assets/test_index_grid.nvdb +0 -0
warp/tests/assets/test_int32_grid.nvdb +0 -0
warp/tests/assets/test_vec_grid.nvdb +0 -0
warp/tests/assets/torus.nvdb +0 -0
warp/tests/assets/torus.usda +105 -0
warp/tests/aux_test_class_kernel.py +34 -0
warp/tests/aux_test_compile_consts_dummy.py +18 -0
warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
warp/tests/aux_test_dependent.py +29 -0
warp/tests/aux_test_grad_customs.py +29 -0
warp/tests/aux_test_instancing_gc.py +26 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/aux_test_module_unload.py +23 -0
warp/tests/aux_test_name_clash1.py +40 -0
warp/tests/aux_test_name_clash2.py +40 -0
warp/tests/aux_test_reference.py +9 -0
warp/tests/aux_test_reference_reference.py +8 -0
warp/tests/aux_test_square.py +16 -0
warp/tests/aux_test_unresolved_func.py +22 -0
warp/tests/aux_test_unresolved_symbol.py +22 -0
warp/tests/cuda/__init__.py +0 -0
warp/tests/cuda/test_async.py +676 -0
warp/tests/cuda/test_conditional_captures.py +1147 -0
warp/tests/cuda/test_ipc.py +124 -0
warp/tests/cuda/test_mempool.py +233 -0
warp/tests/cuda/test_multigpu.py +169 -0
warp/tests/cuda/test_peer.py +139 -0
warp/tests/cuda/test_pinned.py +84 -0
warp/tests/cuda/test_streams.py +691 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/geometry/test_bvh.py +335 -0
warp/tests/geometry/test_hash_grid.py +259 -0
warp/tests/geometry/test_marching_cubes.py +294 -0
warp/tests/geometry/test_mesh.py +318 -0
warp/tests/geometry/test_mesh_query_aabb.py +392 -0
warp/tests/geometry/test_mesh_query_point.py +935 -0
warp/tests/geometry/test_mesh_query_ray.py +323 -0
warp/tests/geometry/test_volume.py +1103 -0
warp/tests/geometry/test_volume_write.py +346 -0
warp/tests/interop/__init__.py +0 -0
warp/tests/interop/test_dlpack.py +730 -0
warp/tests/interop/test_jax.py +1673 -0
warp/tests/interop/test_paddle.py +800 -0
warp/tests/interop/test_torch.py +1001 -0
warp/tests/run_coverage_serial.py +39 -0
warp/tests/test_adam.py +162 -0
warp/tests/test_arithmetic.py +1096 -0
warp/tests/test_array.py +3756 -0
warp/tests/test_array_reduce.py +156 -0
warp/tests/test_assert.py +303 -0
warp/tests/test_atomic.py +336 -0
warp/tests/test_atomic_bitwise.py +209 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_bool.py +220 -0
warp/tests/test_builtins_resolution.py +732 -0
warp/tests/test_closest_point_edge_edge.py +327 -0
warp/tests/test_codegen.py +974 -0
warp/tests/test_codegen_instancing.py +1495 -0
warp/tests/test_compile_consts.py +215 -0
warp/tests/test_conditional.py +298 -0
warp/tests/test_context.py +35 -0
warp/tests/test_copy.py +319 -0
warp/tests/test_ctypes.py +618 -0
warp/tests/test_dense.py +73 -0
warp/tests/test_devices.py +127 -0
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +424 -0
warp/tests/test_fabricarray.py +998 -0
warp/tests/test_fast_math.py +72 -0
warp/tests/test_fem.py +2204 -0
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_fp16.py +136 -0
warp/tests/test_func.py +501 -0
warp/tests/test_future_annotations.py +100 -0
warp/tests/test_generics.py +656 -0
warp/tests/test_grad.py +893 -0
warp/tests/test_grad_customs.py +339 -0
warp/tests/test_grad_debug.py +341 -0
warp/tests/test_implicit_init.py +411 -0
warp/tests/test_import.py +45 -0
warp/tests/test_indexedarray.py +1140 -0
warp/tests/test_intersect.py +103 -0
warp/tests/test_iter.py +76 -0
warp/tests/test_large.py +177 -0
warp/tests/test_launch.py +411 -0
warp/tests/test_lerp.py +151 -0
warp/tests/test_linear_solvers.py +223 -0
warp/tests/test_lvalue.py +427 -0
warp/tests/test_map.py +526 -0
warp/tests/test_mat.py +3515 -0
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +573 -0
warp/tests/test_mat_lite.py +122 -0
warp/tests/test_mat_scalar_ops.py +2913 -0
warp/tests/test_math.py +212 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_module_hashing.py +258 -0
warp/tests/test_modules_lite.py +70 -0
warp/tests/test_noise.py +252 -0
warp/tests/test_operators.py +299 -0
warp/tests/test_options.py +129 -0
warp/tests/test_overwrite.py +551 -0
warp/tests/test_print.py +408 -0
warp/tests/test_quat.py +2653 -0
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_rand.py +339 -0
warp/tests/test_reload.py +303 -0
warp/tests/test_rounding.py +157 -0
warp/tests/test_runlength_encode.py +196 -0
warp/tests/test_scalar_ops.py +133 -0
warp/tests/test_smoothstep.py +108 -0
warp/tests/test_snippet.py +318 -0
warp/tests/test_sparse.py +845 -0
warp/tests/test_spatial.py +2859 -0
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_special_values.py +361 -0
warp/tests/test_static.py +640 -0
warp/tests/test_struct.py +901 -0
warp/tests/test_tape.py +242 -0
warp/tests/test_transient_module.py +93 -0
warp/tests/test_triangle_closest_point.py +192 -0
warp/tests/test_tuple.py +361 -0
warp/tests/test_types.py +615 -0
warp/tests/test_utils.py +594 -0
warp/tests/test_vec.py +1408 -0
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/test_vec_lite.py +80 -0
warp/tests/test_vec_scalar_ops.py +2327 -0
warp/tests/test_verify_fp.py +100 -0
warp/tests/test_version.py +75 -0
warp/tests/tile/__init__.py +0 -0
warp/tests/tile/test_tile.py +1519 -0
warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
warp/tests/tile/test_tile_cholesky.py +608 -0
warp/tests/tile/test_tile_load.py +724 -0
warp/tests/tile/test_tile_mathdx.py +156 -0
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +400 -0
warp/tests/tile/test_tile_reduce.py +950 -0
warp/tests/tile/test_tile_shared_memory.py +376 -0
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/tile/test_tile_view.py +173 -0
warp/tests/unittest_serial.py +47 -0
warp/tests/unittest_suites.py +430 -0
warp/tests/unittest_utils.py +469 -0
warp/tests/walkthrough_debug.py +95 -0
warp/torch.py +24 -0
warp/types.py +51 -0
warp/utils.py +31 -0
warp_lang-1.10.0.dist-info/METADATA +459 -0
warp_lang-1.10.0.dist-info/RECORD +468 -0
warp_lang-1.10.0.dist-info/WHEEL +5 -0
warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/top_level.txt +1 -0

warp/native/reduce.cu ADDED Viewed

@@ -0,0 +1,363 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "cuda_util.h"
+#include "warp.h"
+#include "temp_buffer.h"
+#define THRUST_IGNORE_CUB_VERSION_CHECK
+#include <cub/device/device_reduce.cuh>
+namespace
+{
+template <typename T>
+__global__ void cwise_mult_kernel(int len, int stride_a, int stride_b, const T *a, const T *b, T *out)
+{
+    int i = blockIdx.x * blockDim.x + threadIdx.x;
+    if (i >= len)
+        return;
+    out[i] = a[i * stride_a] * b[i * stride_b];
+}
+/// Custom iterator for allowing strided access with CUB
+template <typename T> struct cub_strided_iterator
+{
+    typedef cub_strided_iterator<T> self_type;
+    typedef std::ptrdiff_t difference_type;
+    typedef T value_type;
+    typedef T *pointer;
+    typedef T &reference;
+    typedef std::random_access_iterator_tag iterator_category; ///< The iterator category
+    T *ptr = nullptr;
+    int stride = 1;
+    CUDA_CALLABLE self_type operator++(int)
+    {
+        return ++(self_type(*this));
+    }
+    CUDA_CALLABLE self_type &operator++()
+    {
+        ptr += stride;
+        return *this;
+    }
+    __host__ __device__ __forceinline__ reference operator*() const
+    {
+        return *ptr;
+    }
+    CUDA_CALLABLE self_type operator+(difference_type n) const
+    {
+        return self_type(*this) += n;
+    }
+    CUDA_CALLABLE self_type &operator+=(difference_type n)
+    {
+        ptr += n * stride;
+        return *this;
+    }
+    CUDA_CALLABLE self_type operator-(difference_type n) const
+    {
+        return self_type(*this) -= n;
+    }
+    CUDA_CALLABLE self_type &operator-=(difference_type n)
+    {
+        ptr -= n * stride;
+        return *this;
+    }
+    CUDA_CALLABLE difference_type operator-(const self_type &other) const
+    {
+        return (ptr - other.ptr) / stride;
+    }
+    CUDA_CALLABLE reference operator[](difference_type n) const
+    {
+        return *(ptr + n * stride);
+    }
+    CUDA_CALLABLE pointer operator->() const
+    {
+        return ptr;
+    }
+    CUDA_CALLABLE bool operator==(const self_type &rhs) const
+    {
+        return (ptr == rhs.ptr);
+    }
+    CUDA_CALLABLE bool operator!=(const self_type &rhs) const
+    {
+        return (ptr != rhs.ptr);
+    }
+};
+template <typename T> void array_sum_device(const T *ptr_a, T *ptr_out, int count, int byte_stride, int type_length)
+{
+    assert((byte_stride % sizeof(T)) == 0);
+    const int stride = byte_stride / sizeof(T);
+    ContextGuard guard(wp_cuda_context_get_current());
+    cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+    cub_strided_iterator<const T> ptr_strided{ptr_a, stride};
+    size_t buff_size = 0;
+    check_cuda(cub::DeviceReduce::Sum(nullptr, buff_size, ptr_strided, ptr_out, count, stream));
+    void* temp_buffer = wp_alloc_device(WP_CURRENT_CONTEXT, buff_size);
+    for (int k = 0; k < type_length; ++k)
+    {
+        cub_strided_iterator<const T> ptr_strided{ptr_a + k, stride};
+        check_cuda(cub::DeviceReduce::Sum(temp_buffer, buff_size, ptr_strided, ptr_out + k, count, stream));
+    }
+    wp_free_device(WP_CURRENT_CONTEXT, temp_buffer);
+}
+template <typename T>
+void array_sum_device_dispatch(const T *ptr_a, T *ptr_out, int count, int byte_stride, int type_length)
+{
+    using vec2 = wp::vec_t<2, T>;
+    using vec3 = wp::vec_t<3, T>;
+    using vec4 = wp::vec_t<4, T>;
+    // specialized calls for common vector types
+    if ((type_length % 4) == 0 && (byte_stride % sizeof(vec4)) == 0)
+    {
+        return array_sum_device(reinterpret_cast<const vec4 *>(ptr_a), reinterpret_cast<vec4 *>(ptr_out), count,
+                                byte_stride, type_length / 4);
+    }
+    if ((type_length % 3) == 0 && (byte_stride % sizeof(vec3)) == 0)
+    {
+        return array_sum_device(reinterpret_cast<const vec3 *>(ptr_a), reinterpret_cast<vec3 *>(ptr_out), count,
+                                byte_stride, type_length / 3);
+    }
+    if ((type_length % 2) == 0 && (byte_stride % sizeof(vec2)) == 0)
+    {
+        return array_sum_device(reinterpret_cast<const vec2 *>(ptr_a), reinterpret_cast<vec2 *>(ptr_out), count,
+                                byte_stride, type_length / 2);
+    }
+    return array_sum_device(ptr_a, ptr_out, count, byte_stride, type_length);
+}
+template <typename T> CUDA_CALLABLE T element_inner_product(const T &a, const T &b)
+{
+    return a * b;
+}
+template <unsigned Length, typename T>
+CUDA_CALLABLE T element_inner_product(const wp::vec_t<Length, T> &a, const wp::vec_t<Length, T> &b)
+{
+    return wp::dot(a, b);
+}
+/// Custom iterator for allowing strided access with CUB
+template <typename ElemT, typename ScalarT> struct cub_inner_product_iterator
+{
+    typedef cub_inner_product_iterator<ElemT, ScalarT> self_type;
+    typedef std::ptrdiff_t difference_type;
+    typedef ScalarT value_type;
+    typedef ScalarT *pointer;
+    typedef ScalarT reference;
+    typedef std::random_access_iterator_tag iterator_category; ///< The iterator category
+    const ElemT *ptr_a = nullptr;
+    const ElemT *ptr_b = nullptr;
+    int stride_a = 1;
+    int stride_b = 1;
+    int type_length = 1;
+    CUDA_CALLABLE self_type operator++(int)
+    {
+        return ++(self_type(*this));
+    }
+    CUDA_CALLABLE self_type &operator++()
+    {
+        ptr_a += stride_a;
+        ptr_b += stride_b;
+        return *this;
+    }
+    __host__ __device__ __forceinline__ reference operator*() const
+    {
+        return compute_value(0);
+    }
+    CUDA_CALLABLE self_type operator+(difference_type n) const
+    {
+        return self_type(*this) += n;
+    }
+    CUDA_CALLABLE self_type &operator+=(difference_type n)
+    {
+        ptr_a += n * stride_a;
+        ptr_b += n * stride_b;
+        return *this;
+    }
+    CUDA_CALLABLE self_type operator-(difference_type n) const
+    {
+        return self_type(*this) -= n;
+    }
+    CUDA_CALLABLE self_type &operator-=(difference_type n)
+    {
+        ptr_a -= n * stride_a;
+        ptr_b -= n * stride_b;
+        return *this;
+    }
+    CUDA_CALLABLE difference_type operator-(const self_type &other) const
+    {
+        return (ptr_a - other.ptr_a) / stride_a;
+    }
+    CUDA_CALLABLE reference operator[](difference_type n) const
+    {
+        return compute_value(n);
+    }
+    CUDA_CALLABLE bool operator==(const self_type &rhs) const
+    {
+        return (ptr_a == rhs.ptr_a);
+    }
+    CUDA_CALLABLE bool operator!=(const self_type &rhs) const
+    {
+        return (ptr_a != rhs.ptr_a);
+    }
+  private:
+    CUDA_CALLABLE ScalarT compute_value(difference_type n) const
+    {
+        ScalarT val(0);
+        const ElemT *a = ptr_a + n * stride_a;
+        const ElemT *b = ptr_b + n * stride_b;
+        for (int k = 0; k < type_length; ++k)
+        {
+            val += element_inner_product(a[k], b[k]);
+        }
+        return val;
+    }
+};
+template <typename ElemT, typename ScalarT>
+void array_inner_device(const ElemT *ptr_a, const ElemT *ptr_b, ScalarT *ptr_out, int count, int byte_stride_a,
+                        int byte_stride_b, int type_length)
+{
+    assert((byte_stride_a % sizeof(ElemT)) == 0);
+    assert((byte_stride_b % sizeof(ElemT)) == 0);
+    const int stride_a = byte_stride_a / sizeof(ElemT);
+    const int stride_b = byte_stride_b / sizeof(ElemT);
+    ContextGuard guard(wp_cuda_context_get_current());
+    cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+    cub_inner_product_iterator<ElemT, ScalarT> inner_iterator{ptr_a, ptr_b, stride_a, stride_b, type_length};
+    size_t buff_size = 0;
+    check_cuda(cub::DeviceReduce::Sum(nullptr, buff_size, inner_iterator, ptr_out, count, stream));
+    void* temp_buffer = wp_alloc_device(WP_CURRENT_CONTEXT, buff_size);
+    check_cuda(cub::DeviceReduce::Sum(temp_buffer, buff_size, inner_iterator, ptr_out, count, stream));
+    wp_free_device(WP_CURRENT_CONTEXT, temp_buffer);
+}
+template <typename T>
+void array_inner_device_dispatch(const T *ptr_a, const T *ptr_b, T *ptr_out, int count, int byte_stride_a,
+                                 int byte_stride_b, int type_length)
+{
+    using vec2 = wp::vec_t<2, T>;
+    using vec3 = wp::vec_t<3, T>;
+    using vec4 = wp::vec_t<4, T>;
+    // specialized calls for common vector types
+    if ((type_length % 4) == 0 && (byte_stride_a % sizeof(vec4)) == 0 && (byte_stride_b % sizeof(vec4)) == 0)
+    {
+        return array_inner_device(reinterpret_cast<const vec4 *>(ptr_a), reinterpret_cast<const vec4 *>(ptr_b), ptr_out,
+                                  count, byte_stride_a, byte_stride_b, type_length / 4);
+    }
+    if ((type_length % 3) == 0 && (byte_stride_a % sizeof(vec3)) == 0 && (byte_stride_b % sizeof(vec3)) == 0)
+    {
+        return array_inner_device(reinterpret_cast<const vec3 *>(ptr_a), reinterpret_cast<const vec3 *>(ptr_b), ptr_out,
+                                  count, byte_stride_a, byte_stride_b, type_length / 3);
+    }
+    if ((type_length % 2) == 0 && (byte_stride_a % sizeof(vec2)) == 0 && (byte_stride_b % sizeof(vec2)) == 0)
+    {
+        return array_inner_device(reinterpret_cast<const vec2 *>(ptr_a), reinterpret_cast<const vec2 *>(ptr_b), ptr_out,
+                                  count, byte_stride_a, byte_stride_b, type_length / 2);
+    }
+    return array_inner_device(ptr_a, ptr_b, ptr_out, count, byte_stride_a, byte_stride_b, type_length);
+}
+} // anonymous namespace
+void wp_array_inner_float_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+                              int type_len)
+{
+    void *context = wp_cuda_context_get_current();
+    const float *ptr_a = (const float *)(a);
+    const float *ptr_b = (const float *)(b);
+    float *ptr_out = (float *)(out);
+    array_inner_device_dispatch(ptr_a, ptr_b, ptr_out, count, byte_stride_a, byte_stride_b, type_len);
+}
+void wp_array_inner_double_device(uint64_t a, uint64_t b, uint64_t out, int count, int byte_stride_a, int byte_stride_b,
+                               int type_len)
+{
+    const double *ptr_a = (const double *)(a);
+    const double *ptr_b = (const double *)(b);
+    double *ptr_out = (double *)(out);
+    array_inner_device_dispatch(ptr_a, ptr_b, ptr_out, count, byte_stride_a, byte_stride_b, type_len);
+}
+void wp_array_sum_float_device(uint64_t a, uint64_t out, int count, int byte_stride, int type_length)
+{
+    const float *ptr_a = (const float *)(a);
+    float *ptr_out = (float *)(out);
+    array_sum_device_dispatch(ptr_a, ptr_out, count, byte_stride, type_length);
+}
+void wp_array_sum_double_device(uint64_t a, uint64_t out, int count, int byte_stride, int type_length)
+{
+    const double *ptr_a = (const double *)(a);
+    double *ptr_out = (double *)(out);
+    array_sum_device_dispatch(ptr_a, ptr_out, count, byte_stride, type_length);
+}

warp/native/runlength_encode.cpp ADDED Viewed

@@ -0,0 +1,79 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "warp.h"
+#include <cstdint>
+template <typename T>
+void runlength_encode_host(int n,
+                           const T *values,
+                           T *run_values,
+                           int *run_lengths,
+                           int *run_count)
+{
+    if (n == 0)
+    {
+        *run_count = 0;
+        return;
+    }
+    const T *end = values + n;
+    *run_count = 1;
+    *run_lengths = 1;
+    *run_values = *values;
+    while (++values != end)
+    {
+        if (*values == *run_values)
+        {
+            ++*run_lengths;
+        }
+        else
+        {
+            ++*run_count;
+            *(++run_lengths) = 1;
+            *(++run_values) = *values;
+        }
+    }
+}
+void wp_runlength_encode_int_host(
+    uint64_t values,
+    uint64_t run_values,
+    uint64_t run_lengths,
+    uint64_t run_count,
+    int n)
+{
+    runlength_encode_host<int>(n,
+                               reinterpret_cast<const int *>(values),
+                               reinterpret_cast<int *>(run_values),
+                               reinterpret_cast<int *>(run_lengths),
+                               reinterpret_cast<int *>(run_count));
+}
+#if !WP_ENABLE_CUDA
+void wp_runlength_encode_int_device(
+    uint64_t values,
+    uint64_t run_values,
+    uint64_t run_lengths,
+    uint64_t run_count,
+    int n)
+{
+}
+#endif

warp/native/runlength_encode.cu ADDED Viewed

@@ -0,0 +1,61 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "warp.h"
+#include "cuda_util.h"
+#define THRUST_IGNORE_CUB_VERSION_CHECK
+#include <cub/device/device_run_length_encode.cuh>
+template <typename T>
+void runlength_encode_device(int n,
+                             const T *values,
+                             T *run_values,
+                             int *run_lengths,
+                             int *run_count)
+{
+    ContextGuard guard(wp_cuda_context_get_current());
+    cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+    size_t buff_size = 0;
+    check_cuda(cub::DeviceRunLengthEncode::Encode(
+        nullptr, buff_size, values, run_values, run_lengths, run_count,
+        n, stream));
+    void* temp_buffer = wp_alloc_device(WP_CURRENT_CONTEXT, buff_size);
+    check_cuda(cub::DeviceRunLengthEncode::Encode(
+        temp_buffer, buff_size, values, run_values, run_lengths, run_count,
+        n, stream));
+    wp_free_device(WP_CURRENT_CONTEXT, temp_buffer);
+}
+void wp_runlength_encode_int_device(
+    uint64_t values,
+    uint64_t run_values,
+    uint64_t run_lengths,
+    uint64_t run_count,
+    int n)
+{
+    return runlength_encode_device<int>(
+        n,
+        reinterpret_cast<const int *>(values),
+        reinterpret_cast<int *>(run_values),
+        reinterpret_cast<int *>(run_lengths),
+        reinterpret_cast<int *>(run_count));
+}

warp/native/scan.cpp ADDED Viewed

@@ -0,0 +1,47 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "scan.h"
+#include <numeric>
+template<typename T>
+void scan_host(const T* values_in, T* values_out, int n, bool inclusive)
+{
+    static void* scan_temp_memory = NULL;
+    static size_t scan_temp_max_size = 0;
+    // compute temporary memory required
+    if (!inclusive && n > scan_temp_max_size)
+    {
+	    wp_free_host(scan_temp_memory);
+        scan_temp_memory = wp_alloc_host(sizeof(T) * n);
+        scan_temp_max_size = n;
+    }
+    T* result = inclusive ? values_out : static_cast<T*>(scan_temp_memory);
+    // scan
+    std::partial_sum(values_in, values_in + n, result);
+    if (!inclusive) {
+        values_out[0] = (T)0;
+        wp_memcpy_h2h(values_out + 1, result, sizeof(T) * (n - 1));
+    }
+}
+template void scan_host(const int*, int*, int, bool);
+template void scan_host(const float*, float*, int, bool);

warp/native/scan.cu ADDED Viewed

@@ -0,0 +1,55 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "warp.h"
+#include "scan.h"
+#include "cuda_util.h"
+#define THRUST_IGNORE_CUB_VERSION_CHECK
+#include <cub/device/device_scan.cuh>
+template<typename T>
+void scan_device(const T* values_in, T* values_out, int n, bool inclusive)
+{
+    ContextGuard guard(wp_cuda_context_get_current());
+    cudaStream_t stream = static_cast<cudaStream_t>(wp_cuda_stream_get_current());
+    // compute temporary memory required
+	size_t scan_temp_size;
+    if (inclusive) {
+        check_cuda(cub::DeviceScan::InclusiveSum(NULL, scan_temp_size, values_in, values_out, n));
+    } else {
+        check_cuda(cub::DeviceScan::ExclusiveSum(NULL, scan_temp_size, values_in, values_out, n));
+    }
+    void* temp_buffer = wp_alloc_device(WP_CURRENT_CONTEXT, scan_temp_size);
+    // scan
+    if (inclusive) {
+        check_cuda(cub::DeviceScan::InclusiveSum(temp_buffer, scan_temp_size, values_in, values_out, n, stream));
+    } else {
+        check_cuda(cub::DeviceScan::ExclusiveSum(temp_buffer, scan_temp_size, values_in, values_out, n, stream));
+    }
+    wp_free_device(WP_CURRENT_CONTEXT, temp_buffer);
+}
+template void scan_device(const int*, int*, int, bool);
+template void scan_device(const float*, float*, int, bool);

warp/native/scan.h ADDED Viewed

@@ -0,0 +1,23 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+template<typename T>
+void scan_host(const T* values_in, T* values_out, int n, bool inclusive = true);
+template<typename T>
+void scan_device(const T* values_in, T* values_out, int n, bool inclusive = true);