warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +334 -0
- warp/__init__.pyi +5856 -0
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1077 -0
- warp/_src/build.py +620 -0
- warp/_src/build_dll.py +642 -0
- warp/_src/builtins.py +10555 -0
- warp/_src/codegen.py +4361 -0
- warp/_src/config.py +178 -0
- warp/_src/constants.py +59 -0
- warp/_src/context.py +8352 -0
- warp/_src/dlpack.py +464 -0
- warp/_src/fabric.py +362 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +510 -0
- warp/_src/fem/cache.py +689 -0
- warp/_src/fem/dirichlet.py +190 -0
- warp/_src/fem/domain.py +553 -0
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +703 -0
- warp/_src/fem/field/nodal_field.py +403 -0
- warp/_src/fem/field/restriction.py +39 -0
- warp/_src/fem/field/virtual.py +1021 -0
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
- warp/_src/fem/geometry/closest_point.py +99 -0
- warp/_src/fem/geometry/deformed_geometry.py +277 -0
- warp/_src/fem/geometry/element.py +854 -0
- warp/_src/fem/geometry/geometry.py +693 -0
- warp/_src/fem/geometry/grid_2d.py +478 -0
- warp/_src/fem/geometry/grid_3d.py +539 -0
- warp/_src/fem/geometry/hexmesh.py +956 -0
- warp/_src/fem/geometry/nanogrid.py +660 -0
- warp/_src/fem/geometry/partition.py +483 -0
- warp/_src/fem/geometry/quadmesh.py +597 -0
- warp/_src/fem/geometry/tetmesh.py +762 -0
- warp/_src/fem/geometry/trimesh.py +588 -0
- warp/_src/fem/integrate.py +2507 -0
- warp/_src/fem/linalg.py +385 -0
- warp/_src/fem/operator.py +398 -0
- warp/_src/fem/polynomial.py +231 -0
- warp/_src/fem/quadrature/__init__.py +17 -0
- warp/_src/fem/quadrature/pic_quadrature.py +318 -0
- warp/_src/fem/quadrature/quadrature.py +665 -0
- warp/_src/fem/space/__init__.py +248 -0
- warp/_src/fem/space/basis_function_space.py +499 -0
- warp/_src/fem/space/basis_space.py +681 -0
- warp/_src/fem/space/dof_mapper.py +253 -0
- warp/_src/fem/space/function_space.py +312 -0
- warp/_src/fem/space/grid_2d_function_space.py +179 -0
- warp/_src/fem/space/grid_3d_function_space.py +229 -0
- warp/_src/fem/space/hexmesh_function_space.py +255 -0
- warp/_src/fem/space/nanogrid_function_space.py +199 -0
- warp/_src/fem/space/partition.py +435 -0
- warp/_src/fem/space/quadmesh_function_space.py +222 -0
- warp/_src/fem/space/restriction.py +221 -0
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
- warp/_src/fem/space/shape/shape_function.py +134 -0
- warp/_src/fem/space/shape/square_shape_function.py +928 -0
- warp/_src/fem/space/shape/tet_shape_function.py +829 -0
- warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
- warp/_src/fem/space/tetmesh_function_space.py +270 -0
- warp/_src/fem/space/topology.py +461 -0
- warp/_src/fem/space/trimesh_function_space.py +193 -0
- warp/_src/fem/types.py +114 -0
- warp/_src/fem/utils.py +488 -0
- warp/_src/jax.py +188 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +389 -0
- warp/_src/jax_experimental/ffi.py +1286 -0
- warp/_src/jax_experimental/xla_ffi.py +658 -0
- warp/_src/marching_cubes.py +710 -0
- warp/_src/math.py +416 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +165 -0
- warp/_src/optim/linear.py +1608 -0
- warp/_src/optim/sgd.py +114 -0
- warp/_src/paddle.py +408 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +291 -0
- warp/_src/render/render_opengl.py +3638 -0
- warp/_src/render/render_usd.py +939 -0
- warp/_src/render/utils.py +162 -0
- warp/_src/sparse.py +2718 -0
- warp/_src/tape.py +1208 -0
- warp/_src/thirdparty/__init__.py +0 -0
- warp/_src/thirdparty/appdirs.py +598 -0
- warp/_src/thirdparty/dlpack.py +145 -0
- warp/_src/thirdparty/unittest_parallel.py +676 -0
- warp/_src/torch.py +393 -0
- warp/_src/types.py +5888 -0
- warp/_src/utils.py +1695 -0
- warp/autograd.py +33 -0
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +29 -0
- warp/build_dll.py +24 -0
- warp/codegen.py +24 -0
- warp/constants.py +24 -0
- warp/context.py +33 -0
- warp/dlpack.py +24 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +195 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +290 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/distributed/example_jacobi_mpi.py +506 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +469 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +181 -0
- warp/examples/fem/example_convection_diffusion_dg.py +225 -0
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +225 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +242 -0
- warp/examples/fem/example_mixed_elasticity.py +293 -0
- warp/examples/fem/example_navier_stokes.py +263 -0
- warp/examples/fem/example_nonconforming_contact.py +300 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +357 -0
- warp/examples/fem/utils.py +1047 -0
- warp/examples/interop/example_jax_callable.py +146 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +232 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +88 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/examples/tile/example_tile_mlp.py +385 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/fabric.py +24 -0
- warp/fem/__init__.py +173 -0
- warp/fem/adaptivity.py +26 -0
- warp/fem/cache.py +30 -0
- warp/fem/dirichlet.py +24 -0
- warp/fem/field/__init__.py +24 -0
- warp/fem/field/field.py +26 -0
- warp/fem/geometry/__init__.py +21 -0
- warp/fem/geometry/closest_point.py +31 -0
- warp/fem/linalg.py +38 -0
- warp/fem/operator.py +32 -0
- warp/fem/polynomial.py +29 -0
- warp/fem/space/__init__.py +22 -0
- warp/fem/space/basis_space.py +24 -0
- warp/fem/space/shape/__init__.py +68 -0
- warp/fem/space/topology.py +24 -0
- warp/fem/types.py +24 -0
- warp/fem/utils.py +32 -0
- warp/jax.py +29 -0
- warp/jax_experimental/__init__.py +29 -0
- warp/jax_experimental/custom_call.py +29 -0
- warp/jax_experimental/ffi.py +39 -0
- warp/jax_experimental/xla_ffi.py +24 -0
- warp/marching_cubes.py +24 -0
- warp/math.py +37 -0
- warp/native/array.h +1687 -0
- warp/native/builtin.h +2327 -0
- warp/native/bvh.cpp +562 -0
- warp/native/bvh.cu +826 -0
- warp/native/bvh.h +555 -0
- warp/native/clang/clang.cpp +541 -0
- warp/native/coloring.cpp +622 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +568 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +677 -0
- warp/native/cuda_util.h +313 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +2023 -0
- warp/native/fabric.h +246 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +89 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1253 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +348 -0
- warp/native/mat.h +5189 -0
- warp/native/mathdx.cpp +93 -0
- warp/native/matnn.h +221 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +406 -0
- warp/native/mesh.h +2097 -0
- warp/native/nanovdb/GridHandle.h +533 -0
- warp/native/nanovdb/HostBuffer.h +591 -0
- warp/native/nanovdb/NanoVDB.h +6246 -0
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1664 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +145 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +363 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +55 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +286 -0
- warp/native/sort.h +35 -0
- warp/native/sparse.cpp +241 -0
- warp/native/sparse.cu +435 -0
- warp/native/spatial.h +1306 -0
- warp/native/svd.h +727 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +4124 -0
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +838 -0
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +2199 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +68 -0
- warp/native/volume.h +970 -0
- warp/native/volume_builder.cu +483 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1143 -0
- warp/native/warp.cu +4604 -0
- warp/native/warp.h +358 -0
- warp/optim/__init__.py +20 -0
- warp/optim/adam.py +24 -0
- warp/optim/linear.py +35 -0
- warp/optim/sgd.py +24 -0
- warp/paddle.py +24 -0
- warp/py.typed +0 -0
- warp/render/__init__.py +22 -0
- warp/render/imgui_manager.py +29 -0
- warp/render/render_opengl.py +24 -0
- warp/render/render_usd.py +24 -0
- warp/render/utils.py +24 -0
- warp/sparse.py +51 -0
- warp/tape.py +24 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_conditional_captures.py +1147 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +691 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +335 -0
- warp/tests/geometry/test_hash_grid.py +259 -0
- warp/tests/geometry/test_marching_cubes.py +294 -0
- warp/tests/geometry/test_mesh.py +318 -0
- warp/tests/geometry/test_mesh_query_aabb.py +392 -0
- warp/tests/geometry/test_mesh_query_point.py +935 -0
- warp/tests/geometry/test_mesh_query_ray.py +323 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +730 -0
- warp/tests/interop/test_jax.py +1673 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/test_adam.py +162 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +3756 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +303 -0
- warp/tests/test_atomic.py +336 -0
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +732 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +974 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +298 -0
- warp/tests/test_context.py +35 -0
- warp/tests/test_copy.py +319 -0
- warp/tests/test_ctypes.py +618 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +127 -0
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +424 -0
- warp/tests/test_fabricarray.py +998 -0
- warp/tests/test_fast_math.py +72 -0
- warp/tests/test_fem.py +2204 -0
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +501 -0
- warp/tests/test_future_annotations.py +100 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +103 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +223 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_map.py +526 -0
- warp/tests/test_mat.py +3515 -0
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +573 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +212 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +70 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +408 -0
- warp/tests/test_quat.py +2653 -0
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +303 -0
- warp/tests/test_rounding.py +157 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +133 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +845 -0
- warp/tests/test_spatial.py +2859 -0
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +640 -0
- warp/tests/test_struct.py +901 -0
- warp/tests/test_tape.py +242 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +192 -0
- warp/tests/test_tuple.py +361 -0
- warp/tests/test_types.py +615 -0
- warp/tests/test_utils.py +594 -0
- warp/tests/test_vec.py +1408 -0
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/test_version.py +75 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +1519 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +608 -0
- warp/tests/tile/test_tile_load.py +724 -0
- warp/tests/tile/test_tile_mathdx.py +156 -0
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +400 -0
- warp/tests/tile/test_tile_reduce.py +950 -0
- warp/tests/tile/test_tile_shared_memory.py +376 -0
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +430 -0
- warp/tests/unittest_utils.py +469 -0
- warp/tests/walkthrough_debug.py +95 -0
- warp/torch.py +24 -0
- warp/types.py +51 -0
- warp/utils.py +31 -0
- warp_lang-1.10.0.dist-info/METADATA +459 -0
- warp_lang-1.10.0.dist-info/RECORD +468 -0
- warp_lang-1.10.0.dist-info/WHEEL +5 -0
- warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/top_level.txt +1 -0
warp/autograd.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# isort: skip_file
|
|
17
|
+
|
|
18
|
+
from warp._src.autograd import gradcheck as gradcheck
|
|
19
|
+
from warp._src.autograd import gradcheck_tape as gradcheck_tape
|
|
20
|
+
from warp._src.autograd import jacobian as jacobian
|
|
21
|
+
from warp._src.autograd import jacobian_fd as jacobian_fd
|
|
22
|
+
from warp._src.autograd import jacobian_plot as jacobian_plot
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# TODO: Remove after cleaning up the public API.
|
|
26
|
+
|
|
27
|
+
from warp._src import autograd as _autograd
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def __getattr__(name):
|
|
31
|
+
from warp._src.utils import get_deprecated_api
|
|
32
|
+
|
|
33
|
+
return get_deprecated_api(_autograd, "wp", name)
|
|
Binary file
|
warp/bin/libwarp.dylib
ADDED
|
Binary file
|
warp/build.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# isort: skip_file
|
|
17
|
+
|
|
18
|
+
from warp._src.build import clear_kernel_cache as clear_kernel_cache
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
# TODO: Remove after cleaning up the public API.
|
|
22
|
+
|
|
23
|
+
from warp._src import build as _build
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def __getattr__(name):
|
|
27
|
+
from warp._src.utils import get_deprecated_api
|
|
28
|
+
|
|
29
|
+
return get_deprecated_api(_build, "wp", name)
|
warp/build_dll.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# TODO: Remove after cleaning up the public API.
|
|
17
|
+
|
|
18
|
+
from warp._src import build_dll as _build_dll
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name):
|
|
22
|
+
from warp._src.utils import get_deprecated_api
|
|
23
|
+
|
|
24
|
+
return get_deprecated_api(_build_dll, "wp", name)
|
warp/codegen.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# TODO: Remove after cleaning up the public API.
|
|
17
|
+
|
|
18
|
+
from warp._src import codegen as _codegen
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name):
|
|
22
|
+
from warp._src.utils import get_deprecated_api
|
|
23
|
+
|
|
24
|
+
return get_deprecated_api(_codegen, "wp", name)
|
warp/constants.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# TODO: Remove after cleaning up the public API.
|
|
17
|
+
|
|
18
|
+
from warp._src import constants as _constants
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name):
|
|
22
|
+
from warp._src.utils import get_deprecated_api
|
|
23
|
+
|
|
24
|
+
return get_deprecated_api(_constants, "wp", name)
|
warp/context.py
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# isort: skip_file
|
|
17
|
+
|
|
18
|
+
from warp._src.context import Device as Device
|
|
19
|
+
from warp._src.context import Module as Module
|
|
20
|
+
from warp._src.context import assert_conditional_graph_support as assert_conditional_graph_support
|
|
21
|
+
from warp._src.context import get_module as get_module
|
|
22
|
+
from warp._src.context import type_str as type_str
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
# TODO: Remove after cleaning up the public API.
|
|
26
|
+
|
|
27
|
+
from warp._src import context as _context
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def __getattr__(name):
|
|
31
|
+
from warp._src.utils import get_deprecated_api
|
|
32
|
+
|
|
33
|
+
return get_deprecated_api(_context, "wp", name)
|
warp/dlpack.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# TODO: Remove after cleaning up the public API.
|
|
17
|
+
|
|
18
|
+
from warp._src import dlpack as _dlpack
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name):
|
|
22
|
+
from warp._src.utils import get_deprecated_api
|
|
23
|
+
|
|
24
|
+
return get_deprecated_api(_dlpack, "wp", name)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def get_source_directory():
|
|
20
|
+
return os.path.realpath(os.path.dirname(__file__))
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def get_asset_directory():
|
|
24
|
+
return os.path.join(get_source_directory(), "assets")
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import gc
|
|
17
|
+
import statistics as stats
|
|
18
|
+
|
|
19
|
+
import warp as wp
|
|
20
|
+
|
|
21
|
+
ENABLE_MEMPOOLS = False
|
|
22
|
+
ENABLE_PEER_ACCESS = False
|
|
23
|
+
ENABLE_MEMPOOL_ACCESS = False
|
|
24
|
+
ENABLE_MEMPOOL_RELEASE_THRESHOLD = False
|
|
25
|
+
|
|
26
|
+
MEMPOOL_RELEASE_THRESHOLD = 1024 * 1024 * 1024
|
|
27
|
+
|
|
28
|
+
DO_SYNC = False
|
|
29
|
+
VERBOSE = False
|
|
30
|
+
USE_NVTX = False
|
|
31
|
+
|
|
32
|
+
num_elems = 10000
|
|
33
|
+
num_runs = 10000
|
|
34
|
+
trim_runs = 2500
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@wp.kernel
|
|
38
|
+
def inc_kernel(a: wp.array(dtype=float)):
|
|
39
|
+
tid = wp.tid()
|
|
40
|
+
a[tid] = a[tid] + 1.0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# configure devices
|
|
44
|
+
for target_device in wp.get_cuda_devices():
|
|
45
|
+
try:
|
|
46
|
+
wp.set_mempool_enabled(target_device, ENABLE_MEMPOOLS)
|
|
47
|
+
if ENABLE_MEMPOOL_RELEASE_THRESHOLD:
|
|
48
|
+
wp.set_mempool_release_threshold(target_device, MEMPOOL_RELEASE_THRESHOLD)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"Error: {e}")
|
|
51
|
+
|
|
52
|
+
for peer_device in wp.get_cuda_devices():
|
|
53
|
+
try:
|
|
54
|
+
wp.set_peer_access_enabled(target_device, peer_device, ENABLE_PEER_ACCESS)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(f"Error: {e}")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
wp.set_mempool_access_enabled(target_device, peer_device, ENABLE_MEMPOOL_ACCESS)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(f"Error: {e}")
|
|
62
|
+
|
|
63
|
+
cuda_device_count = wp.get_cuda_device_count()
|
|
64
|
+
|
|
65
|
+
cuda0 = wp.get_device("cuda:0")
|
|
66
|
+
|
|
67
|
+
# preallocate some arrays
|
|
68
|
+
arr_host = wp.zeros(num_elems, dtype=float, device="cpu", pinned=False)
|
|
69
|
+
arr_host_pinned = wp.zeros(num_elems, dtype=float, device="cpu", pinned=True)
|
|
70
|
+
arr_cuda0 = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
71
|
+
arr_cuda0_src = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
72
|
+
arr_cuda0_dst = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
73
|
+
|
|
74
|
+
# mgpu support
|
|
75
|
+
if cuda_device_count > 1:
|
|
76
|
+
cuda1 = wp.get_device("cuda:1")
|
|
77
|
+
arr_cuda1 = wp.zeros(num_elems, dtype=float, device=cuda1)
|
|
78
|
+
|
|
79
|
+
stream0 = wp.Stream(cuda0)
|
|
80
|
+
|
|
81
|
+
# preload module
|
|
82
|
+
wp.force_load(cuda0)
|
|
83
|
+
if cuda_device_count > 1:
|
|
84
|
+
wp.force_load(cuda1)
|
|
85
|
+
|
|
86
|
+
# capture graph
|
|
87
|
+
with wp.ScopedDevice(cuda0):
|
|
88
|
+
wp.capture_begin()
|
|
89
|
+
wp.launch(inc_kernel, dim=arr_cuda0.size, inputs=[arr_cuda0])
|
|
90
|
+
graph0 = wp.capture_end()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
g_allocs = [None] * num_runs
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_alloc(num_elems, device, idx):
|
|
97
|
+
wp.synchronize()
|
|
98
|
+
|
|
99
|
+
with wp.ScopedTimer("alloc", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
100
|
+
g_allocs[idx] = wp.empty(num_elems, dtype=float, device=device)
|
|
101
|
+
|
|
102
|
+
if DO_SYNC:
|
|
103
|
+
wp.synchronize_device(device)
|
|
104
|
+
|
|
105
|
+
return timer.elapsed
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_free(device, idx):
|
|
109
|
+
wp.synchronize()
|
|
110
|
+
|
|
111
|
+
with wp.ScopedTimer("free", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
112
|
+
g_allocs[idx] = None
|
|
113
|
+
|
|
114
|
+
if DO_SYNC:
|
|
115
|
+
wp.synchronize_device(device)
|
|
116
|
+
|
|
117
|
+
return timer.elapsed
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_zeros(num_elems, device, idx):
|
|
121
|
+
wp.synchronize()
|
|
122
|
+
|
|
123
|
+
with wp.ScopedTimer("zeros", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
124
|
+
g_allocs[idx] = wp.zeros(num_elems, dtype=float, device=device)
|
|
125
|
+
|
|
126
|
+
if DO_SYNC:
|
|
127
|
+
wp.synchronize_device(device)
|
|
128
|
+
|
|
129
|
+
return timer.elapsed
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_h2d(num_elems, device):
|
|
133
|
+
wp.synchronize()
|
|
134
|
+
|
|
135
|
+
with wp.ScopedTimer("h2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
136
|
+
wp.copy(arr_cuda0, arr_host)
|
|
137
|
+
|
|
138
|
+
if DO_SYNC:
|
|
139
|
+
wp.synchronize_device(device)
|
|
140
|
+
|
|
141
|
+
return timer.elapsed
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_d2h(num_elems, device):
|
|
145
|
+
wp.synchronize()
|
|
146
|
+
|
|
147
|
+
with wp.ScopedTimer("d2h", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
148
|
+
wp.copy(arr_host, arr_cuda0)
|
|
149
|
+
|
|
150
|
+
if DO_SYNC:
|
|
151
|
+
wp.synchronize_device(device)
|
|
152
|
+
|
|
153
|
+
return timer.elapsed
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_h2d_pinned(num_elems, device):
|
|
157
|
+
wp.synchronize()
|
|
158
|
+
|
|
159
|
+
with wp.ScopedTimer("h2d pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
160
|
+
wp.copy(arr_cuda0, arr_host_pinned)
|
|
161
|
+
|
|
162
|
+
if DO_SYNC:
|
|
163
|
+
wp.synchronize_device(device)
|
|
164
|
+
|
|
165
|
+
return timer.elapsed
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_d2h_pinned(num_elems, device):
|
|
169
|
+
wp.synchronize()
|
|
170
|
+
|
|
171
|
+
with wp.ScopedTimer("d2h pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
172
|
+
wp.copy(arr_host_pinned, arr_cuda0)
|
|
173
|
+
|
|
174
|
+
if DO_SYNC:
|
|
175
|
+
wp.synchronize_device(device)
|
|
176
|
+
|
|
177
|
+
return timer.elapsed
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_d2d(num_elems, device):
|
|
181
|
+
wp.synchronize()
|
|
182
|
+
|
|
183
|
+
with wp.ScopedTimer("d2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
184
|
+
wp.copy(arr_cuda0_dst, arr_cuda0_src)
|
|
185
|
+
|
|
186
|
+
if DO_SYNC:
|
|
187
|
+
wp.synchronize_device(device)
|
|
188
|
+
|
|
189
|
+
return timer.elapsed
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_p2p(num_elems, src_device, dst_device):
|
|
193
|
+
wp.synchronize()
|
|
194
|
+
|
|
195
|
+
with wp.ScopedTimer("p2p", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
196
|
+
wp.copy(arr_cuda0, arr_cuda1)
|
|
197
|
+
|
|
198
|
+
if DO_SYNC:
|
|
199
|
+
wp.synchronize_device(src_device)
|
|
200
|
+
wp.synchronize_device(dst_device)
|
|
201
|
+
|
|
202
|
+
return timer.elapsed
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def test_p2p_stream(num_elems, src_device, dst_device):
|
|
206
|
+
stream = stream0
|
|
207
|
+
|
|
208
|
+
wp.synchronize()
|
|
209
|
+
|
|
210
|
+
with wp.ScopedTimer("p2p stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
211
|
+
wp.copy(arr_cuda0, arr_cuda1, stream=stream)
|
|
212
|
+
|
|
213
|
+
if DO_SYNC:
|
|
214
|
+
wp.synchronize_device(src_device)
|
|
215
|
+
wp.synchronize_device(dst_device)
|
|
216
|
+
|
|
217
|
+
return timer.elapsed
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def test_launch(num_elems, device):
|
|
221
|
+
a = arr_cuda0
|
|
222
|
+
|
|
223
|
+
wp.synchronize()
|
|
224
|
+
|
|
225
|
+
with wp.ScopedTimer("launch", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
226
|
+
wp.launch(inc_kernel, dim=a.size, inputs=[a], device=device)
|
|
227
|
+
|
|
228
|
+
if DO_SYNC:
|
|
229
|
+
wp.synchronize_device(device)
|
|
230
|
+
|
|
231
|
+
return timer.elapsed
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_launch_stream(num_elems, device):
|
|
235
|
+
a = arr_cuda0
|
|
236
|
+
stream = stream0
|
|
237
|
+
|
|
238
|
+
wp.synchronize()
|
|
239
|
+
|
|
240
|
+
with wp.ScopedTimer("launch stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
241
|
+
wp.launch(inc_kernel, dim=a.size, inputs=[a], stream=stream)
|
|
242
|
+
|
|
243
|
+
if DO_SYNC:
|
|
244
|
+
wp.synchronize_device(device)
|
|
245
|
+
|
|
246
|
+
return timer.elapsed
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_graph(num_elems, device):
|
|
250
|
+
wp.synchronize()
|
|
251
|
+
|
|
252
|
+
with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
253
|
+
wp.capture_launch(graph0)
|
|
254
|
+
|
|
255
|
+
if DO_SYNC:
|
|
256
|
+
wp.synchronize_device(device)
|
|
257
|
+
|
|
258
|
+
return timer.elapsed
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def test_graph_stream(num_elems, device):
|
|
262
|
+
wp.synchronize()
|
|
263
|
+
|
|
264
|
+
with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
265
|
+
wp.capture_launch(graph0, stream=stream0)
|
|
266
|
+
|
|
267
|
+
if DO_SYNC:
|
|
268
|
+
wp.synchronize_device(device)
|
|
269
|
+
|
|
270
|
+
return timer.elapsed
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
alloc_times = [0] * num_runs
|
|
274
|
+
free_times = [0] * num_runs
|
|
275
|
+
zeros_times = [0] * num_runs
|
|
276
|
+
d2h_times = [0] * num_runs
|
|
277
|
+
h2d_times = [0] * num_runs
|
|
278
|
+
d2h_pinned_times = [0] * num_runs
|
|
279
|
+
h2d_pinned_times = [0] * num_runs
|
|
280
|
+
d2d_times = [0] * num_runs
|
|
281
|
+
p2p_times = [0] * num_runs
|
|
282
|
+
p2p_stream_times = [0] * num_runs
|
|
283
|
+
launch_times = [0] * num_runs
|
|
284
|
+
launch_stream_times = [0] * num_runs
|
|
285
|
+
graph_times = [0] * num_runs
|
|
286
|
+
graph_stream_times = [0] * num_runs
|
|
287
|
+
|
|
288
|
+
wp.set_device(cuda0)
|
|
289
|
+
|
|
290
|
+
# alloc
|
|
291
|
+
for i in range(num_runs):
|
|
292
|
+
gc.disable()
|
|
293
|
+
alloc_times[i] = test_alloc(num_elems, cuda0, i)
|
|
294
|
+
gc.enable()
|
|
295
|
+
|
|
296
|
+
# free
|
|
297
|
+
for i in range(num_runs):
|
|
298
|
+
gc.disable()
|
|
299
|
+
free_times[i] = test_free(cuda0, i)
|
|
300
|
+
gc.enable()
|
|
301
|
+
|
|
302
|
+
# zeros
|
|
303
|
+
for i in range(num_runs):
|
|
304
|
+
gc.disable()
|
|
305
|
+
zeros_times[i] = test_zeros(num_elems, cuda0, i)
|
|
306
|
+
gc.enable()
|
|
307
|
+
|
|
308
|
+
# free zeros
|
|
309
|
+
for i in range(num_runs):
|
|
310
|
+
g_allocs[i] = None
|
|
311
|
+
|
|
312
|
+
# h2d, d2h pageable copy
|
|
313
|
+
for i in range(num_runs):
|
|
314
|
+
gc.disable()
|
|
315
|
+
h2d_times[i] = test_h2d(num_elems, cuda0)
|
|
316
|
+
d2h_times[i] = test_d2h(num_elems, cuda0)
|
|
317
|
+
gc.enable()
|
|
318
|
+
|
|
319
|
+
# h2d, d2h pinned copy
|
|
320
|
+
for i in range(num_runs):
|
|
321
|
+
gc.disable()
|
|
322
|
+
h2d_pinned_times[i] = test_h2d_pinned(num_elems, cuda0)
|
|
323
|
+
d2h_pinned_times[i] = test_d2h_pinned(num_elems, cuda0)
|
|
324
|
+
gc.enable()
|
|
325
|
+
|
|
326
|
+
# d2d copy
|
|
327
|
+
for i in range(num_runs):
|
|
328
|
+
gc.disable()
|
|
329
|
+
d2d_times[i] = test_d2d(num_elems, cuda0)
|
|
330
|
+
gc.enable()
|
|
331
|
+
|
|
332
|
+
# p2p copy
|
|
333
|
+
if cuda_device_count > 1:
|
|
334
|
+
for i in range(num_runs):
|
|
335
|
+
gc.disable()
|
|
336
|
+
p2p_times[i] = test_p2p(num_elems, cuda1, cuda0)
|
|
337
|
+
p2p_stream_times[i] = test_p2p_stream(num_elems, cuda1, cuda0)
|
|
338
|
+
gc.enable()
|
|
339
|
+
|
|
340
|
+
# launch
|
|
341
|
+
for i in range(num_runs):
|
|
342
|
+
gc.disable()
|
|
343
|
+
launch_times[i] = test_launch(num_elems, cuda0)
|
|
344
|
+
launch_stream_times[i] = test_launch_stream(num_elems, cuda0)
|
|
345
|
+
gc.enable()
|
|
346
|
+
|
|
347
|
+
# graph
|
|
348
|
+
for i in range(num_runs):
|
|
349
|
+
gc.disable()
|
|
350
|
+
graph_times[i] = test_graph(num_elems, cuda0)
|
|
351
|
+
graph_stream_times[i] = test_graph_stream(num_elems, cuda0)
|
|
352
|
+
gc.enable()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def print_stat(name, data, trim=trim_runs):
|
|
356
|
+
assert len(data) - 2 * trim > 0
|
|
357
|
+
if trim > 0:
|
|
358
|
+
data = sorted(data)[trim:-trim]
|
|
359
|
+
print(f"{name:15s} {1000000 * stats.mean(data):.0f}")
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
print("=========================")
|
|
363
|
+
print_stat("Alloc", alloc_times)
|
|
364
|
+
print_stat("Free", free_times)
|
|
365
|
+
print_stat("Zeros", zeros_times)
|
|
366
|
+
print_stat("H2D", h2d_times)
|
|
367
|
+
print_stat("D2H", d2h_times)
|
|
368
|
+
print_stat("H2D pinned", h2d_pinned_times)
|
|
369
|
+
print_stat("D2H pinned", d2h_pinned_times)
|
|
370
|
+
print_stat("D2D", d2d_times)
|
|
371
|
+
print_stat("P2P", p2p_times)
|
|
372
|
+
print_stat("P2P stream", p2p_stream_times)
|
|
373
|
+
print_stat("Launch", launch_times)
|
|
374
|
+
print_stat("Launch stream", launch_stream_times)
|
|
375
|
+
print_stat("Graph", graph_times)
|
|
376
|
+
print_stat("Graph stream", graph_stream_times)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# ========= profiling ==========
|
|
380
|
+
|
|
381
|
+
# from pyinstrument import Profiler
|
|
382
|
+
# profiler = Profiler()
|
|
383
|
+
# profiler.start()
|
|
384
|
+
# for i in range(10):
|
|
385
|
+
# # test_alloc(num_elems, cuda0)
|
|
386
|
+
# # test_h2d(num_elems, cuda0)
|
|
387
|
+
# test_p2p(num_elems, cuda0, cuda1)
|
|
388
|
+
# profiler.stop()
|
|
389
|
+
# print(profiler.output_text(show_all=True))
|