PyPI - warp-lang - Versions diffs - 1.10.0__py3-none-macosx_11_0_arm64.whl - Mend

warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show

warp/__init__.py +334 -0
warp/__init__.pyi +5856 -0
warp/_src/__init__.py +14 -0
warp/_src/autograd.py +1077 -0
warp/_src/build.py +620 -0
warp/_src/build_dll.py +642 -0
warp/_src/builtins.py +10555 -0
warp/_src/codegen.py +4361 -0
warp/_src/config.py +178 -0
warp/_src/constants.py +59 -0
warp/_src/context.py +8352 -0
warp/_src/dlpack.py +464 -0
warp/_src/fabric.py +362 -0
warp/_src/fem/__init__.py +14 -0
warp/_src/fem/adaptivity.py +510 -0
warp/_src/fem/cache.py +689 -0
warp/_src/fem/dirichlet.py +190 -0
warp/_src/fem/domain.py +553 -0
warp/_src/fem/field/__init__.py +131 -0
warp/_src/fem/field/field.py +703 -0
warp/_src/fem/field/nodal_field.py +403 -0
warp/_src/fem/field/restriction.py +39 -0
warp/_src/fem/field/virtual.py +1021 -0
warp/_src/fem/geometry/__init__.py +32 -0
warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
warp/_src/fem/geometry/closest_point.py +99 -0
warp/_src/fem/geometry/deformed_geometry.py +277 -0
warp/_src/fem/geometry/element.py +854 -0
warp/_src/fem/geometry/geometry.py +693 -0
warp/_src/fem/geometry/grid_2d.py +478 -0
warp/_src/fem/geometry/grid_3d.py +539 -0
warp/_src/fem/geometry/hexmesh.py +956 -0
warp/_src/fem/geometry/nanogrid.py +660 -0
warp/_src/fem/geometry/partition.py +483 -0
warp/_src/fem/geometry/quadmesh.py +597 -0
warp/_src/fem/geometry/tetmesh.py +762 -0
warp/_src/fem/geometry/trimesh.py +588 -0
warp/_src/fem/integrate.py +2507 -0
warp/_src/fem/linalg.py +385 -0
warp/_src/fem/operator.py +398 -0
warp/_src/fem/polynomial.py +231 -0
warp/_src/fem/quadrature/__init__.py +17 -0
warp/_src/fem/quadrature/pic_quadrature.py +318 -0
warp/_src/fem/quadrature/quadrature.py +665 -0
warp/_src/fem/space/__init__.py +248 -0
warp/_src/fem/space/basis_function_space.py +499 -0
warp/_src/fem/space/basis_space.py +681 -0
warp/_src/fem/space/dof_mapper.py +253 -0
warp/_src/fem/space/function_space.py +312 -0
warp/_src/fem/space/grid_2d_function_space.py +179 -0
warp/_src/fem/space/grid_3d_function_space.py +229 -0
warp/_src/fem/space/hexmesh_function_space.py +255 -0
warp/_src/fem/space/nanogrid_function_space.py +199 -0
warp/_src/fem/space/partition.py +435 -0
warp/_src/fem/space/quadmesh_function_space.py +222 -0
warp/_src/fem/space/restriction.py +221 -0
warp/_src/fem/space/shape/__init__.py +152 -0
warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
warp/_src/fem/space/shape/shape_function.py +134 -0
warp/_src/fem/space/shape/square_shape_function.py +928 -0
warp/_src/fem/space/shape/tet_shape_function.py +829 -0
warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
warp/_src/fem/space/tetmesh_function_space.py +270 -0
warp/_src/fem/space/topology.py +461 -0
warp/_src/fem/space/trimesh_function_space.py +193 -0
warp/_src/fem/types.py +114 -0
warp/_src/fem/utils.py +488 -0
warp/_src/jax.py +188 -0
warp/_src/jax_experimental/__init__.py +14 -0
warp/_src/jax_experimental/custom_call.py +389 -0
warp/_src/jax_experimental/ffi.py +1286 -0
warp/_src/jax_experimental/xla_ffi.py +658 -0
warp/_src/marching_cubes.py +710 -0
warp/_src/math.py +416 -0
warp/_src/optim/__init__.py +14 -0
warp/_src/optim/adam.py +165 -0
warp/_src/optim/linear.py +1608 -0
warp/_src/optim/sgd.py +114 -0
warp/_src/paddle.py +408 -0
warp/_src/render/__init__.py +14 -0
warp/_src/render/imgui_manager.py +291 -0
warp/_src/render/render_opengl.py +3638 -0
warp/_src/render/render_usd.py +939 -0
warp/_src/render/utils.py +162 -0
warp/_src/sparse.py +2718 -0
warp/_src/tape.py +1208 -0
warp/_src/thirdparty/__init__.py +0 -0
warp/_src/thirdparty/appdirs.py +598 -0
warp/_src/thirdparty/dlpack.py +145 -0
warp/_src/thirdparty/unittest_parallel.py +676 -0
warp/_src/torch.py +393 -0
warp/_src/types.py +5888 -0
warp/_src/utils.py +1695 -0
warp/autograd.py +33 -0
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +29 -0
warp/build_dll.py +24 -0
warp/codegen.py +24 -0
warp/constants.py +24 -0
warp/context.py +33 -0
warp/dlpack.py +24 -0
warp/examples/__init__.py +24 -0
warp/examples/assets/bear.usd +0 -0
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cube.usd +0 -0
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/assets/pixel.jpg +0 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usd +0 -0
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_api.py +389 -0
warp/examples/benchmarks/benchmark_cloth.py +296 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
warp/examples/benchmarks/benchmark_gemm.py +164 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
warp/examples/benchmarks/benchmark_launches.py +301 -0
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/browse.py +37 -0
warp/examples/core/example_cupy.py +86 -0
warp/examples/core/example_dem.py +241 -0
warp/examples/core/example_fluid.py +299 -0
warp/examples/core/example_graph_capture.py +150 -0
warp/examples/core/example_marching_cubes.py +195 -0
warp/examples/core/example_mesh.py +180 -0
warp/examples/core/example_mesh_intersect.py +211 -0
warp/examples/core/example_nvdb.py +182 -0
warp/examples/core/example_raycast.py +111 -0
warp/examples/core/example_raymarch.py +205 -0
warp/examples/core/example_render_opengl.py +290 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/core/example_sph.py +411 -0
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_torch.py +211 -0
warp/examples/core/example_wave.py +269 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/distributed/example_jacobi_mpi.py +506 -0
warp/examples/fem/example_adaptive_grid.py +286 -0
warp/examples/fem/example_apic_fluid.py +469 -0
warp/examples/fem/example_burgers.py +261 -0
warp/examples/fem/example_convection_diffusion.py +181 -0
warp/examples/fem/example_convection_diffusion_dg.py +225 -0
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +172 -0
warp/examples/fem/example_diffusion.py +196 -0
warp/examples/fem/example_diffusion_3d.py +225 -0
warp/examples/fem/example_diffusion_mgpu.py +225 -0
warp/examples/fem/example_distortion_energy.py +228 -0
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +242 -0
warp/examples/fem/example_mixed_elasticity.py +293 -0
warp/examples/fem/example_navier_stokes.py +263 -0
warp/examples/fem/example_nonconforming_contact.py +300 -0
warp/examples/fem/example_stokes.py +213 -0
warp/examples/fem/example_stokes_transfer.py +262 -0
warp/examples/fem/example_streamlines.py +357 -0
warp/examples/fem/utils.py +1047 -0
warp/examples/interop/example_jax_callable.py +146 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +232 -0
warp/examples/optim/example_diffray.py +561 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +88 -0
warp/examples/tile/example_tile_convolution.py +66 -0
warp/examples/tile/example_tile_fft.py +55 -0
warp/examples/tile/example_tile_filtering.py +113 -0
warp/examples/tile/example_tile_matmul.py +85 -0
warp/examples/tile/example_tile_mcgp.py +191 -0
warp/examples/tile/example_tile_mlp.py +385 -0
warp/examples/tile/example_tile_nbody.py +199 -0
warp/fabric.py +24 -0
warp/fem/__init__.py +173 -0
warp/fem/adaptivity.py +26 -0
warp/fem/cache.py +30 -0
warp/fem/dirichlet.py +24 -0
warp/fem/field/__init__.py +24 -0
warp/fem/field/field.py +26 -0
warp/fem/geometry/__init__.py +21 -0
warp/fem/geometry/closest_point.py +31 -0
warp/fem/linalg.py +38 -0
warp/fem/operator.py +32 -0
warp/fem/polynomial.py +29 -0
warp/fem/space/__init__.py +22 -0
warp/fem/space/basis_space.py +24 -0
warp/fem/space/shape/__init__.py +68 -0
warp/fem/space/topology.py +24 -0
warp/fem/types.py +24 -0
warp/fem/utils.py +32 -0
warp/jax.py +29 -0
warp/jax_experimental/__init__.py +29 -0
warp/jax_experimental/custom_call.py +29 -0
warp/jax_experimental/ffi.py +39 -0
warp/jax_experimental/xla_ffi.py +24 -0
warp/marching_cubes.py +24 -0
warp/math.py +37 -0
warp/native/array.h +1687 -0
warp/native/builtin.h +2327 -0
warp/native/bvh.cpp +562 -0
warp/native/bvh.cu +826 -0
warp/native/bvh.h +555 -0
warp/native/clang/clang.cpp +541 -0
warp/native/coloring.cpp +622 -0
warp/native/crt.cpp +51 -0
warp/native/crt.h +568 -0
warp/native/cuda_crt.h +1058 -0
warp/native/cuda_util.cpp +677 -0
warp/native/cuda_util.h +313 -0
warp/native/error.cpp +77 -0
warp/native/error.h +36 -0
warp/native/exports.h +2023 -0
warp/native/fabric.h +246 -0
warp/native/hashgrid.cpp +311 -0
warp/native/hashgrid.cu +89 -0
warp/native/hashgrid.h +240 -0
warp/native/initializer_array.h +41 -0
warp/native/intersect.h +1253 -0
warp/native/intersect_adj.h +375 -0
warp/native/intersect_tri.h +348 -0
warp/native/mat.h +5189 -0
warp/native/mathdx.cpp +93 -0
warp/native/matnn.h +221 -0
warp/native/mesh.cpp +266 -0
warp/native/mesh.cu +406 -0
warp/native/mesh.h +2097 -0
warp/native/nanovdb/GridHandle.h +533 -0
warp/native/nanovdb/HostBuffer.h +591 -0
warp/native/nanovdb/NanoVDB.h +6246 -0
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +3390 -0
warp/native/noise.h +859 -0
warp/native/quat.h +1664 -0
warp/native/rand.h +342 -0
warp/native/range.h +145 -0
warp/native/reduce.cpp +174 -0
warp/native/reduce.cu +363 -0
warp/native/runlength_encode.cpp +79 -0
warp/native/runlength_encode.cu +61 -0
warp/native/scan.cpp +47 -0
warp/native/scan.cu +55 -0
warp/native/scan.h +23 -0
warp/native/solid_angle.h +466 -0
warp/native/sort.cpp +251 -0
warp/native/sort.cu +286 -0
warp/native/sort.h +35 -0
warp/native/sparse.cpp +241 -0
warp/native/sparse.cu +435 -0
warp/native/spatial.h +1306 -0
warp/native/svd.h +727 -0
warp/native/temp_buffer.h +46 -0
warp/native/tile.h +4124 -0
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +838 -0
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +2199 -0
warp/native/version.h +23 -0
warp/native/volume.cpp +501 -0
warp/native/volume.cu +68 -0
warp/native/volume.h +970 -0
warp/native/volume_builder.cu +483 -0
warp/native/volume_builder.h +52 -0
warp/native/volume_impl.h +70 -0
warp/native/warp.cpp +1143 -0
warp/native/warp.cu +4604 -0
warp/native/warp.h +358 -0
warp/optim/__init__.py +20 -0
warp/optim/adam.py +24 -0
warp/optim/linear.py +35 -0
warp/optim/sgd.py +24 -0
warp/paddle.py +24 -0
warp/py.typed +0 -0
warp/render/__init__.py +22 -0
warp/render/imgui_manager.py +29 -0
warp/render/render_opengl.py +24 -0
warp/render/render_usd.py +24 -0
warp/render/utils.py +24 -0
warp/sparse.py +51 -0
warp/tape.py +24 -0
warp/tests/__init__.py +1 -0
warp/tests/__main__.py +4 -0
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/mlp_golden.npy +0 -0
warp/tests/assets/pixel.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/spiky.usd +0 -0
warp/tests/assets/test_grid.nvdb +0 -0
warp/tests/assets/test_index_grid.nvdb +0 -0
warp/tests/assets/test_int32_grid.nvdb +0 -0
warp/tests/assets/test_vec_grid.nvdb +0 -0
warp/tests/assets/torus.nvdb +0 -0
warp/tests/assets/torus.usda +105 -0
warp/tests/aux_test_class_kernel.py +34 -0
warp/tests/aux_test_compile_consts_dummy.py +18 -0
warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
warp/tests/aux_test_dependent.py +29 -0
warp/tests/aux_test_grad_customs.py +29 -0
warp/tests/aux_test_instancing_gc.py +26 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/aux_test_module_unload.py +23 -0
warp/tests/aux_test_name_clash1.py +40 -0
warp/tests/aux_test_name_clash2.py +40 -0
warp/tests/aux_test_reference.py +9 -0
warp/tests/aux_test_reference_reference.py +8 -0
warp/tests/aux_test_square.py +16 -0
warp/tests/aux_test_unresolved_func.py +22 -0
warp/tests/aux_test_unresolved_symbol.py +22 -0
warp/tests/cuda/__init__.py +0 -0
warp/tests/cuda/test_async.py +676 -0
warp/tests/cuda/test_conditional_captures.py +1147 -0
warp/tests/cuda/test_ipc.py +124 -0
warp/tests/cuda/test_mempool.py +233 -0
warp/tests/cuda/test_multigpu.py +169 -0
warp/tests/cuda/test_peer.py +139 -0
warp/tests/cuda/test_pinned.py +84 -0
warp/tests/cuda/test_streams.py +691 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/geometry/test_bvh.py +335 -0
warp/tests/geometry/test_hash_grid.py +259 -0
warp/tests/geometry/test_marching_cubes.py +294 -0
warp/tests/geometry/test_mesh.py +318 -0
warp/tests/geometry/test_mesh_query_aabb.py +392 -0
warp/tests/geometry/test_mesh_query_point.py +935 -0
warp/tests/geometry/test_mesh_query_ray.py +323 -0
warp/tests/geometry/test_volume.py +1103 -0
warp/tests/geometry/test_volume_write.py +346 -0
warp/tests/interop/__init__.py +0 -0
warp/tests/interop/test_dlpack.py +730 -0
warp/tests/interop/test_jax.py +1673 -0
warp/tests/interop/test_paddle.py +800 -0
warp/tests/interop/test_torch.py +1001 -0
warp/tests/run_coverage_serial.py +39 -0
warp/tests/test_adam.py +162 -0
warp/tests/test_arithmetic.py +1096 -0
warp/tests/test_array.py +3756 -0
warp/tests/test_array_reduce.py +156 -0
warp/tests/test_assert.py +303 -0
warp/tests/test_atomic.py +336 -0
warp/tests/test_atomic_bitwise.py +209 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_bool.py +220 -0
warp/tests/test_builtins_resolution.py +732 -0
warp/tests/test_closest_point_edge_edge.py +327 -0
warp/tests/test_codegen.py +974 -0
warp/tests/test_codegen_instancing.py +1495 -0
warp/tests/test_compile_consts.py +215 -0
warp/tests/test_conditional.py +298 -0
warp/tests/test_context.py +35 -0
warp/tests/test_copy.py +319 -0
warp/tests/test_ctypes.py +618 -0
warp/tests/test_dense.py +73 -0
warp/tests/test_devices.py +127 -0
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +424 -0
warp/tests/test_fabricarray.py +998 -0
warp/tests/test_fast_math.py +72 -0
warp/tests/test_fem.py +2204 -0
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_fp16.py +136 -0
warp/tests/test_func.py +501 -0
warp/tests/test_future_annotations.py +100 -0
warp/tests/test_generics.py +656 -0
warp/tests/test_grad.py +893 -0
warp/tests/test_grad_customs.py +339 -0
warp/tests/test_grad_debug.py +341 -0
warp/tests/test_implicit_init.py +411 -0
warp/tests/test_import.py +45 -0
warp/tests/test_indexedarray.py +1140 -0
warp/tests/test_intersect.py +103 -0
warp/tests/test_iter.py +76 -0
warp/tests/test_large.py +177 -0
warp/tests/test_launch.py +411 -0
warp/tests/test_lerp.py +151 -0
warp/tests/test_linear_solvers.py +223 -0
warp/tests/test_lvalue.py +427 -0
warp/tests/test_map.py +526 -0
warp/tests/test_mat.py +3515 -0
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +573 -0
warp/tests/test_mat_lite.py +122 -0
warp/tests/test_mat_scalar_ops.py +2913 -0
warp/tests/test_math.py +212 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_module_hashing.py +258 -0
warp/tests/test_modules_lite.py +70 -0
warp/tests/test_noise.py +252 -0
warp/tests/test_operators.py +299 -0
warp/tests/test_options.py +129 -0
warp/tests/test_overwrite.py +551 -0
warp/tests/test_print.py +408 -0
warp/tests/test_quat.py +2653 -0
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_rand.py +339 -0
warp/tests/test_reload.py +303 -0
warp/tests/test_rounding.py +157 -0
warp/tests/test_runlength_encode.py +196 -0
warp/tests/test_scalar_ops.py +133 -0
warp/tests/test_smoothstep.py +108 -0
warp/tests/test_snippet.py +318 -0
warp/tests/test_sparse.py +845 -0
warp/tests/test_spatial.py +2859 -0
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_special_values.py +361 -0
warp/tests/test_static.py +640 -0
warp/tests/test_struct.py +901 -0
warp/tests/test_tape.py +242 -0
warp/tests/test_transient_module.py +93 -0
warp/tests/test_triangle_closest_point.py +192 -0
warp/tests/test_tuple.py +361 -0
warp/tests/test_types.py +615 -0
warp/tests/test_utils.py +594 -0
warp/tests/test_vec.py +1408 -0
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/test_vec_lite.py +80 -0
warp/tests/test_vec_scalar_ops.py +2327 -0
warp/tests/test_verify_fp.py +100 -0
warp/tests/test_version.py +75 -0
warp/tests/tile/__init__.py +0 -0
warp/tests/tile/test_tile.py +1519 -0
warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
warp/tests/tile/test_tile_cholesky.py +608 -0
warp/tests/tile/test_tile_load.py +724 -0
warp/tests/tile/test_tile_mathdx.py +156 -0
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +400 -0
warp/tests/tile/test_tile_reduce.py +950 -0
warp/tests/tile/test_tile_shared_memory.py +376 -0
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/tile/test_tile_view.py +173 -0
warp/tests/unittest_serial.py +47 -0
warp/tests/unittest_suites.py +430 -0
warp/tests/unittest_utils.py +469 -0
warp/tests/walkthrough_debug.py +95 -0
warp/torch.py +24 -0
warp/types.py +51 -0
warp/utils.py +31 -0
warp_lang-1.10.0.dist-info/METADATA +459 -0
warp_lang-1.10.0.dist-info/RECORD +468 -0
warp_lang-1.10.0.dist-info/WHEEL +5 -0
warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/top_level.txt +1 -0

warp/examples/benchmarks/benchmark_cloth_numpy.py ADDED Viewed

@@ -0,0 +1,85 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+def eval_springs(x, v, indices, rest, ke, kd, f):
+    i = indices[:, 0]
+    j = indices[:, 1]
+    xi = x[i]
+    xj = x[j]
+    vi = v[i]
+    vj = v[j]
+    xij = xi - xj
+    vij = vi - vj
+    l = np.linalg.norm(xij, axis=1)
+    l_inv = 1.0 / l
+    # normalized spring direction
+    dir = (xij.T * l_inv).T
+    c = l - rest
+    dcdt = np.sum(dir * vij, axis=1)
+    # damping based on relative velocity.
+    fs = dir.T * (ke * c + kd * dcdt)
+    np.add.at(f, i, -fs.T)
+    np.add.at(f, j, fs.T)
+def integrate_particles(x, v, f, w, dt):
+    g = np.array((0.0, 0.0 - 9.8, 0.0))
+    s = w > 0.0
+    a_ext = g * s[:, None]
+    # simple semi-implicit Euler. v1 = v0 + a dt, x1 = x0 + v1 dt
+    v += ((f.T * w).T + a_ext) * dt
+    x += v * dt
+    # clear forces
+    f *= 0.0
+class NpIntegrator:
+    def __init__(self, cloth):
+        self.cloth = cloth
+        self.forces = np.zeros((self.cloth.num_particles, 3), dtype=np.float32)
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for _s in range(substeps):
+            eval_springs(
+                self.cloth.positions,
+                self.cloth.velocities,
+                self.cloth.spring_indices.reshape((self.cloth.num_springs, 2)),
+                self.cloth.spring_lengths,
+                self.cloth.spring_stiffness,
+                self.cloth.spring_damping,
+                self.forces,
+            )
+            # integrate
+            integrate_particles(self.cloth.positions, self.cloth.velocities, self.forces, self.cloth.inv_masses, sim_dt)
+        return self.cloth.positions

warp/examples/benchmarks/benchmark_cloth_paddle.py ADDED Viewed

@@ -0,0 +1,94 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import paddle
+def eval_springs(x, v, indices, rest, ke, kd, f):
+    i = indices[:, 0]
+    j = indices[:, 1]
+    xi = x[i]
+    xj = x[j]
+    vi = v[i]
+    vj = v[j]
+    xij = xi - xj
+    vij = vi - vj
+    l = paddle.linalg.norm(xij, axis=1)
+    l_inv = 1.0 / l
+    # normalized spring direction
+    dir = (xij.T * l_inv).T
+    c = l - rest
+    dcdt = paddle.sum(dir * vij, axis=1)
+    # damping based on relative velocity.
+    fs = dir.T * (ke * c + kd * dcdt)
+    f.index_add_(axis=0, index=i, value=-fs.T)
+    f.index_add_(axis=0, index=j, value=fs.T)
+def integrate_particles(x, v, f, g, w, dt):
+    s = w > 0.0
+    a_ext = g * s[:, None].astype(g.dtype)
+    # simple semi-implicit Euler. v1 = v0 + a dt, x1 = x0 + v1 dt
+    v += ((f.T * w).T + a_ext) * dt
+    x += v * dt
+    # clear forces
+    f *= 0.0
+class TrIntegrator:
+    def __init__(self, cloth, device):
+        self.cloth = cloth
+        self.positions = paddle.to_tensor(self.cloth.positions, place=device)
+        self.velocities = paddle.to_tensor(self.cloth.velocities, place=device)
+        self.inv_mass = paddle.to_tensor(self.cloth.inv_masses, place=device)
+        self.spring_indices = paddle.to_tensor(self.cloth.spring_indices, dtype=paddle.int64, place=device)
+        self.spring_lengths = paddle.to_tensor(self.cloth.spring_lengths, place=device)
+        self.spring_stiffness = paddle.to_tensor(self.cloth.spring_stiffness, place=device)
+        self.spring_damping = paddle.to_tensor(self.cloth.spring_damping, place=device)
+        self.forces = paddle.zeros((self.cloth.num_particles, 3), dtype=paddle.float32).to(device=device)
+        self.gravity = paddle.to_tensor((0.0, 0.0 - 9.8, 0.0), dtype=paddle.float32, place=device)
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for _s in range(substeps):
+            eval_springs(
+                self.positions,
+                self.velocities,
+                self.spring_indices.reshape((self.cloth.num_springs, 2)),
+                self.spring_lengths,
+                self.spring_stiffness,
+                self.spring_damping,
+                self.forces,
+            )
+            # integrate
+            integrate_particles(self.positions, self.velocities, self.forces, self.gravity, self.inv_mass, sim_dt)
+        return self.positions.cpu().numpy()

warp/examples/benchmarks/benchmark_cloth_pytorch.py ADDED Viewed

@@ -0,0 +1,94 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+def eval_springs(x, v, indices, rest, ke, kd, f):
+    i = indices[:, 0]
+    j = indices[:, 1]
+    xi = x[i]
+    xj = x[j]
+    vi = v[i]
+    vj = v[j]
+    xij = xi - xj
+    vij = vi - vj
+    l = torch.linalg.norm(xij, axis=1)
+    l_inv = 1.0 / l
+    # normalized spring direction
+    dir = (xij.T * l_inv).T
+    c = l - rest
+    dcdt = torch.sum(dir * vij, axis=1)
+    # damping based on relative velocity.
+    fs = dir.T * (ke * c + kd * dcdt)
+    f.index_add_(dim=0, index=i, source=-fs.T)
+    f.index_add_(dim=0, index=j, source=fs.T)
+def integrate_particles(x, v, f, g, w, dt):
+    s = w > 0.0
+    a_ext = g * s[:, None]
+    # simple semi-implicit Euler. v1 = v0 + a dt, x1 = x0 + v1 dt
+    v += ((f.T * w).T + a_ext) * dt
+    x += v * dt
+    # clear forces
+    f *= 0.0
+class TrIntegrator:
+    def __init__(self, cloth, device):
+        self.cloth = cloth
+        self.positions = torch.tensor(self.cloth.positions, device=device)
+        self.velocities = torch.tensor(self.cloth.velocities, device=device)
+        self.inv_mass = torch.tensor(self.cloth.inv_masses, device=device)
+        self.spring_indices = torch.tensor(self.cloth.spring_indices, device=device, dtype=torch.long)
+        self.spring_lengths = torch.tensor(self.cloth.spring_lengths, device=device)
+        self.spring_stiffness = torch.tensor(self.cloth.spring_stiffness, device=device)
+        self.spring_damping = torch.tensor(self.cloth.spring_damping, device=device)
+        self.forces = torch.zeros((self.cloth.num_particles, 3), dtype=torch.float32, device=device)
+        self.gravity = torch.tensor((0.0, 0.0 - 9.8, 0.0), dtype=torch.float32, device=device)
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for _s in range(substeps):
+            eval_springs(
+                self.positions,
+                self.velocities,
+                self.spring_indices.reshape((self.cloth.num_springs, 2)),
+                self.spring_lengths,
+                self.spring_stiffness,
+                self.spring_damping,
+                self.forces,
+            )
+            # integrate
+            integrate_particles(self.positions, self.velocities, self.forces, self.gravity, self.inv_mass, sim_dt)
+        return self.positions.cpu().numpy()

warp/examples/benchmarks/benchmark_cloth_taichi.py ADDED Viewed

@@ -0,0 +1,120 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+import taichi as ti
+@ti.func
+def step(x):
+    ret = 0.0
+    if x < 0:
+        ret = 1
+    return ret
+@ti.data_oriented
+class TiIntegrator:
+    @ti.kernel
+    def eval_springs(self):
+        for tid in range(self.cloth.num_springs):
+            i = self.spring_indices[2 * tid]
+            j = self.spring_indices[2 * tid + 1]
+            ke = self.spring_stiffness[tid]
+            kd = self.spring_damping[tid]
+            rest = self.spring_lengths[tid]
+            xi = self.positions[i]
+            xj = self.positions[j]
+            vi = self.velocities[i]
+            vj = self.velocities[j]
+            xij = xi - xj
+            vij = vi - vj
+            l = xij.norm()
+            dir = xij.normalized()
+            c = l - rest
+            dcdt = dir.dot(vij)
+            fs = dir * (ke * c + kd * dcdt)
+            self.forces[i] -= fs
+            self.forces[j] += fs
+    @ti.kernel
+    def integrate_particles(self, dt: ti.f32):
+        for tid in range(self.cloth.num_particles):
+            x0 = self.positions[tid]
+            v0 = self.velocities[tid]
+            f0 = self.forces[tid]
+            w = self.inv_mass[tid]
+            g = ti.Vector([0.0, 0.0, 0.0])
+            if w > 0.0:
+                g = ti.Vector([0.0, -9.81, 0.0])
+            v1 = v0 + (f0 * w + g) * dt
+            x1 = x0 + v1 * dt
+            self.positions[tid] = x1
+            self.velocities[tid] = v1
+            self.forces[tid] = ti.Vector([0.0, 0.0, 0.0])
+    def __init__(self, cloth, device):
+        if device == "cpu":
+            ti.init(arch=ti.cpu)
+        elif device == "cuda":
+            ti.init(arch=ti.gpu)
+        else:
+            raise RuntimeError("Unsupported Taichi device")
+        self.cloth = cloth
+        self.positions = ti.Vector.field(3, dtype=ti.f32, shape=self.cloth.num_particles)
+        self.velocities = ti.Vector.field(3, dtype=ti.f32, shape=self.cloth.num_particles)
+        self.inv_mass = ti.field(ti.f32, shape=self.cloth.num_particles)
+        self.spring_indices = ti.field(ti.i32, shape=self.cloth.num_springs * 2)
+        self.spring_lengths = ti.field(ti.f32, shape=self.cloth.num_springs)
+        self.spring_stiffness = ti.field(ti.f32, shape=self.cloth.num_springs)
+        self.spring_damping = ti.field(ti.f32, shape=self.cloth.num_springs)
+        self.forces = ti.Vector.field(3, dtype=ti.f32, shape=self.cloth.num_particles)
+        # upload data
+        self.positions.from_numpy(cloth.positions)
+        self.velocities.from_numpy(cloth.velocities)
+        self.inv_mass.from_numpy(cloth.inv_masses)
+        self.forces.from_numpy(np.zeros_like(self.cloth.velocities))
+        self.spring_indices.from_numpy(cloth.spring_indices)
+        self.spring_lengths.from_numpy(cloth.spring_lengths)
+        self.spring_stiffness.from_numpy(cloth.spring_stiffness)
+        self.spring_damping.from_numpy(cloth.spring_damping)
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for _s in range(substeps):
+            self.eval_springs()
+            self.integrate_particles(sim_dt)
+        return self.positions.to_numpy()

warp/examples/benchmarks/benchmark_cloth_warp.py ADDED Viewed

@@ -0,0 +1,153 @@
+# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warp as wp
+wp.clear_kernel_cache()
+@wp.kernel
+def eval_springs(
+    x: wp.array(dtype=wp.vec3),
+    v: wp.array(dtype=wp.vec3),
+    spring_indices: wp.array(dtype=int),
+    spring_rest_lengths: wp.array(dtype=float),
+    spring_stiffness: wp.array(dtype=float),
+    spring_damping: wp.array(dtype=float),
+    f: wp.array(dtype=wp.vec3),
+):
+    tid = wp.tid()
+    i = spring_indices[tid * 2 + 0]
+    j = spring_indices[tid * 2 + 1]
+    ke = spring_stiffness[tid]
+    kd = spring_damping[tid]
+    rest = spring_rest_lengths[tid]
+    xi = x[i]
+    xj = x[j]
+    vi = v[i]
+    vj = v[j]
+    xij = xi - xj
+    vij = vi - vj
+    l = wp.length(xij)
+    l_inv = 1.0 / l
+    # normalized spring direction
+    dir = xij * l_inv
+    c = l - rest
+    dcdt = wp.dot(dir, vij)
+    # damping based on relative velocity.
+    fs = dir * (ke * c + kd * dcdt)
+    wp.atomic_sub(f, i, fs)
+    wp.atomic_add(f, j, fs)
+@wp.kernel
+def integrate_particles(
+    x: wp.array(dtype=wp.vec3),
+    v: wp.array(dtype=wp.vec3),
+    f: wp.array(dtype=wp.vec3),
+    w: wp.array(dtype=float),
+    dt: float,
+):
+    tid = wp.tid()
+    x0 = x[tid]
+    v0 = v[tid]
+    f0 = f[tid]
+    inv_mass = w[tid]
+    g = wp.vec3()
+    # treat particles with inv_mass == 0 as kinematic
+    if inv_mass > 0.0:
+        g = wp.vec3(0.0, 0.0 - 9.81, 0.0)
+    # simple semi-implicit Euler. v1 = v0 + a dt, x1 = x0 + v1 dt
+    v1 = v0 + (f0 * inv_mass + g) * dt
+    x1 = x0 + v1 * dt
+    x[tid] = x1
+    v[tid] = v1
+    # clear forces
+    f[tid] = wp.vec3()
+class WpIntegrator:
+    def __init__(self, cloth, device):
+        self.device = wp.get_device(device)
+        with wp.ScopedDevice(self.device):
+            self.positions = wp.from_numpy(cloth.positions, dtype=wp.vec3)
+            self.positions_host = wp.from_numpy(cloth.positions, dtype=wp.vec3, device="cpu")
+            self.invmass = wp.from_numpy(cloth.inv_masses, dtype=float)
+            self.velocities = wp.zeros(cloth.num_particles, dtype=wp.vec3)
+            self.forces = wp.zeros(cloth.num_particles, dtype=wp.vec3)
+            self.spring_indices = wp.from_numpy(cloth.spring_indices, dtype=int)
+            self.spring_lengths = wp.from_numpy(cloth.spring_lengths, dtype=float)
+            self.spring_stiffness = wp.from_numpy(cloth.spring_stiffness, dtype=float)
+            self.spring_damping = wp.from_numpy(cloth.spring_damping, dtype=float)
+        self.cloth = cloth
+    def simulate(self, dt, substeps):
+        sim_dt = dt / substeps
+        for _s in range(substeps):
+            wp.launch(
+                kernel=eval_springs,
+                dim=self.cloth.num_springs,
+                inputs=[
+                    self.positions,
+                    self.velocities,
+                    self.spring_indices,
+                    self.spring_lengths,
+                    self.spring_stiffness,
+                    self.spring_damping,
+                    self.forces,
+                ],
+                outputs=[],
+                device=self.device,
+            )
+            # integrate
+            wp.launch(
+                kernel=integrate_particles,
+                dim=self.cloth.num_particles,
+                inputs=[self.positions, self.velocities, self.forces, self.invmass, sim_dt],
+                outputs=[],
+                device=self.device,
+            )
+        # copy data back to host
+        if self.device.is_cuda:
+            wp.copy(self.positions_host, self.positions)
+            wp.synchronize()
+            return self.positions_host.numpy()
+        else:
+            return self.positions.numpy()

warp/examples/benchmarks/benchmark_gemm.py ADDED Viewed

@@ -0,0 +1,164 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Compare GEMM performance between Torch and Warp (Tiled).
+This script can be used to identify optimal tile parameters for a fixed-size
+matrix multiplication.
+"""
+from itertools import product
+from statistics import mean, stdev
+from typing import List
+import numpy as np
+import torch
+import warp as wp
+# returns a kernel to compute a GEMM given m,n,k tile sizes
+def create_gemm_kernel(m, n, k):
+    TILE_M = m
+    TILE_N = n
+    TILE_K = k
+    @wp.kernel
+    def gemm(A: wp.array2d(dtype=float), B: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
+        i, j = wp.tid()
+        sum = wp.tile_zeros(shape=(TILE_M, TILE_N), dtype=wp.float32)
+        count = A.shape[1] // TILE_K
+        for k in range(count):
+            a = wp.tile_load(A, shape=(TILE_M, TILE_K), offset=(i * TILE_M, k * TILE_K))
+            b = wp.tile_load(B, shape=(TILE_K, TILE_N), offset=(k * TILE_K, j * TILE_N))
+            wp.tile_matmul(a, b, sum)
+        wp.tile_store(output, sum, offset=(i * TILE_M, j * TILE_N))
+    return gemm
+def benchmark_torch(A: torch.Tensor, B: torch.Tensor, warm_up: int, iterations: int):
+    # warm-up
+    for _ in range(warm_up):
+        torch.matmul(A, B)
+    torch.cuda.synchronize()
+    start_event = torch.cuda.Event(enable_timing=True)
+    end_event = torch.cuda.Event(enable_timing=True)
+    timing_results = []
+    for _i in range(iterations):
+        start_event.record()
+        torch.matmul(A, B)
+        end_event.record()
+        torch.cuda.synchronize()
+        timing_results.append(start_event.elapsed_time(end_event))
+    return mean(timing_results), stdev(timing_results)
+def benchmark_warp(A: wp.array, B: wp.array, config: List[int], warm_up: int, iterations: int):
+    TILE_M = config[0]
+    TILE_N = config[1]
+    TILE_K = config[2]
+    BLOCK_DIM = config[3]
+    mlp = create_gemm_kernel(TILE_M, TILE_N, TILE_K)
+    M = A.shape[0]
+    N = B.shape[1]
+    output = wp.zeros((M, N), dtype=float)
+    # create launch command
+    cmd = wp.launch_tiled(
+        kernel=mlp,
+        dim=[M // TILE_M, N // TILE_N],
+        inputs=[A, B, output],
+        block_dim=BLOCK_DIM,
+        record_cmd=True,
+    )
+    # warm-up
+    for _ in range(warm_up):
+        cmd.launch()
+    # check output
+    if warm_up > 0:
+        try:
+            np.testing.assert_allclose(output.numpy(), A.numpy() @ B.numpy(), atol=1e-3, rtol=1e-3)
+        except AssertionError as e:
+            print(f"Failed with {TILE_M=}, {TILE_N=}, {TILE_K=}, {BLOCK_DIM=}")
+            raise e
+    # benchmark
+    with wp.ScopedTimer("warp", print=False, synchronize=True, cuda_filter=wp.TIMING_KERNEL) as timer:
+        for _ in range(iterations):
+            cmd.launch()
+    timing_results = [result.elapsed for result in timer.timing_results]
+    return mean(timing_results), stdev(timing_results)
+if __name__ == "__main__":
+    torch.backends.cuda.matmul.allow_tf32 = False  # Disable TF32 for matrix multiplications
+    torch.backends.cudnn.allow_tf32 = False  # Disable TF32 for cuDNN operations
+    wp.init()
+    wp.clear_kernel_cache()
+    wp.set_module_options({"fast_math": True, "enable_backward": False})
+    tile_m = [8, 16, 32, 64]
+    tile_n = [8, 16, 32, 64]
+    tile_k = [8, 16, 64]
+    block = [32, 64, 128]
+    M = 1024
+    N = 1024
+    K = 1024
+    print(f"{M=}, {N=}, {K=}")
+    A = torch.randn(M, K).cuda()
+    B = torch.randn(K, N).cuda()
+    iterations = 100
+    warm_up = 5
+    time_torch_mean, time_torch_std = benchmark_torch(A, B, warm_up, iterations)
+    print(f"Torch: {time_torch_mean:.6g}±{time_torch_std:.2g} ms")
+    configs = list(product(tile_m, tile_n, tile_k, block))
+    wp.config.quiet = True
+    # header
+    print(
+        f"{'TILE_M':<8s} {'TILE_N':<8s} {'TILE_K':<8s} {'BLOCK':<8s} {'Time (ms)':<10s} {'Std dev (ms)':<14s} {'Warp/Torch':<12s}"
+    )
+    print("-" * 79)
+    for c in configs:
+        time_mean, time_std = benchmark_warp(wp.from_torch(A), wp.from_torch(B), c, warm_up, iterations)
+        print(
+            f"{c[0]:<8d} {c[1]:<8d} {c[2]:<8d} {c[3]:<8d} {time_mean:<10.6g} {time_std:<#14.2g} {time_mean / time_torch_mean:<12.6g}"
+        )