warp-lang 1.7.0__py3-none-manylinux_2_28_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +139 -0
- warp/__init__.pyi +1 -0
- warp/autograd.py +1142 -0
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +557 -0
- warp/build_dll.py +405 -0
- warp/builtins.py +6855 -0
- warp/codegen.py +3969 -0
- warp/config.py +158 -0
- warp/constants.py +57 -0
- warp/context.py +6812 -0
- warp/dlpack.py +462 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cartpole.urdf +110 -0
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nv_ant.xml +92 -0
- warp/examples/assets/nv_humanoid.xml +183 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/quadruped.urdf +268 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +194 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +193 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +423 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +178 -0
- warp/examples/fem/example_convection_diffusion_dg.py +204 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +220 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_magnetostatics.py +240 -0
- warp/examples/fem/example_mixed_elasticity.py +291 -0
- warp/examples/fem/example_navier_stokes.py +261 -0
- warp/examples/fem/example_nonconforming_contact.py +298 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +352 -0
- warp/examples/fem/utils.py +1000 -0
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_bounce.py +266 -0
- warp/examples/optim/example_cloth_throw.py +228 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_drone.py +870 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/optim/example_inverse_kinematics.py +182 -0
- warp/examples/optim/example_inverse_kinematics_torch.py +191 -0
- warp/examples/optim/example_softbody_properties.py +400 -0
- warp/examples/optim/example_spring_cage.py +245 -0
- warp/examples/optim/example_trajectory.py +227 -0
- warp/examples/sim/example_cartpole.py +143 -0
- warp/examples/sim/example_cloth.py +225 -0
- warp/examples/sim/example_cloth_self_contact.py +322 -0
- warp/examples/sim/example_granular.py +130 -0
- warp/examples/sim/example_granular_collision_sdf.py +202 -0
- warp/examples/sim/example_jacobian_ik.py +244 -0
- warp/examples/sim/example_particle_chain.py +124 -0
- warp/examples/sim/example_quadruped.py +203 -0
- warp/examples/sim/example_rigid_chain.py +203 -0
- warp/examples/sim/example_rigid_contact.py +195 -0
- warp/examples/sim/example_rigid_force.py +133 -0
- warp/examples/sim/example_rigid_gyroscopic.py +115 -0
- warp/examples/sim/example_rigid_soft_contact.py +140 -0
- warp/examples/sim/example_soft_body.py +196 -0
- warp/examples/tile/example_tile_cholesky.py +87 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mlp.py +383 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/examples/tile/example_tile_walker.py +327 -0
- warp/fabric.py +355 -0
- warp/fem/__init__.py +106 -0
- warp/fem/adaptivity.py +508 -0
- warp/fem/cache.py +572 -0
- warp/fem/dirichlet.py +202 -0
- warp/fem/domain.py +411 -0
- warp/fem/field/__init__.py +125 -0
- warp/fem/field/field.py +619 -0
- warp/fem/field/nodal_field.py +326 -0
- warp/fem/field/restriction.py +37 -0
- warp/fem/field/virtual.py +848 -0
- warp/fem/geometry/__init__.py +32 -0
- warp/fem/geometry/adaptive_nanogrid.py +857 -0
- warp/fem/geometry/closest_point.py +84 -0
- warp/fem/geometry/deformed_geometry.py +221 -0
- warp/fem/geometry/element.py +776 -0
- warp/fem/geometry/geometry.py +362 -0
- warp/fem/geometry/grid_2d.py +392 -0
- warp/fem/geometry/grid_3d.py +452 -0
- warp/fem/geometry/hexmesh.py +911 -0
- warp/fem/geometry/nanogrid.py +571 -0
- warp/fem/geometry/partition.py +389 -0
- warp/fem/geometry/quadmesh.py +663 -0
- warp/fem/geometry/tetmesh.py +855 -0
- warp/fem/geometry/trimesh.py +806 -0
- warp/fem/integrate.py +2335 -0
- warp/fem/linalg.py +419 -0
- warp/fem/operator.py +293 -0
- warp/fem/polynomial.py +229 -0
- warp/fem/quadrature/__init__.py +17 -0
- warp/fem/quadrature/pic_quadrature.py +299 -0
- warp/fem/quadrature/quadrature.py +591 -0
- warp/fem/space/__init__.py +228 -0
- warp/fem/space/basis_function_space.py +468 -0
- warp/fem/space/basis_space.py +667 -0
- warp/fem/space/dof_mapper.py +251 -0
- warp/fem/space/function_space.py +309 -0
- warp/fem/space/grid_2d_function_space.py +177 -0
- warp/fem/space/grid_3d_function_space.py +227 -0
- warp/fem/space/hexmesh_function_space.py +257 -0
- warp/fem/space/nanogrid_function_space.py +201 -0
- warp/fem/space/partition.py +367 -0
- warp/fem/space/quadmesh_function_space.py +223 -0
- warp/fem/space/restriction.py +179 -0
- warp/fem/space/shape/__init__.py +143 -0
- warp/fem/space/shape/cube_shape_function.py +1105 -0
- warp/fem/space/shape/shape_function.py +133 -0
- warp/fem/space/shape/square_shape_function.py +926 -0
- warp/fem/space/shape/tet_shape_function.py +834 -0
- warp/fem/space/shape/triangle_shape_function.py +672 -0
- warp/fem/space/tetmesh_function_space.py +271 -0
- warp/fem/space/topology.py +424 -0
- warp/fem/space/trimesh_function_space.py +194 -0
- warp/fem/types.py +99 -0
- warp/fem/utils.py +420 -0
- warp/jax.py +187 -0
- warp/jax_experimental/__init__.py +16 -0
- warp/jax_experimental/custom_call.py +351 -0
- warp/jax_experimental/ffi.py +698 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +244 -0
- warp/native/array.h +1145 -0
- warp/native/builtin.h +1800 -0
- warp/native/bvh.cpp +492 -0
- warp/native/bvh.cu +791 -0
- warp/native/bvh.h +554 -0
- warp/native/clang/clang.cpp +536 -0
- warp/native/coloring.cpp +613 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +362 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +646 -0
- warp/native/cuda_util.h +307 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +1878 -0
- warp/native/fabric.h +245 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +87 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1230 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +339 -0
- warp/native/marching.cpp +19 -0
- warp/native/marching.cu +514 -0
- warp/native/marching.h +19 -0
- warp/native/mat.h +2220 -0
- warp/native/mathdx.cpp +87 -0
- warp/native/matnn.h +343 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +404 -0
- warp/native/mesh.h +1980 -0
- warp/native/nanovdb/GridHandle.h +366 -0
- warp/native/nanovdb/HostBuffer.h +590 -0
- warp/native/nanovdb/NanoVDB.h +6624 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1371 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +139 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +364 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +53 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +277 -0
- warp/native/sort.h +33 -0
- warp/native/sparse.cpp +378 -0
- warp/native/sparse.cu +524 -0
- warp/native/spatial.h +657 -0
- warp/native/svd.h +702 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +2584 -0
- warp/native/tile_reduce.h +264 -0
- warp/native/vec.h +1426 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +67 -0
- warp/native/volume.h +969 -0
- warp/native/volume_builder.cu +477 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1082 -0
- warp/native/warp.cu +3636 -0
- warp/native/warp.h +381 -0
- warp/optim/__init__.py +17 -0
- warp/optim/adam.py +163 -0
- warp/optim/linear.py +1137 -0
- warp/optim/sgd.py +112 -0
- warp/paddle.py +407 -0
- warp/render/__init__.py +18 -0
- warp/render/render_opengl.py +3518 -0
- warp/render/render_usd.py +784 -0
- warp/render/utils.py +160 -0
- warp/sim/__init__.py +65 -0
- warp/sim/articulation.py +793 -0
- warp/sim/collide.py +2395 -0
- warp/sim/graph_coloring.py +300 -0
- warp/sim/import_mjcf.py +790 -0
- warp/sim/import_snu.py +227 -0
- warp/sim/import_urdf.py +579 -0
- warp/sim/import_usd.py +894 -0
- warp/sim/inertia.py +324 -0
- warp/sim/integrator.py +242 -0
- warp/sim/integrator_euler.py +1997 -0
- warp/sim/integrator_featherstone.py +2101 -0
- warp/sim/integrator_vbd.py +2048 -0
- warp/sim/integrator_xpbd.py +3292 -0
- warp/sim/model.py +4791 -0
- warp/sim/particles.py +121 -0
- warp/sim/render.py +427 -0
- warp/sim/utils.py +428 -0
- warp/sparse.py +2057 -0
- warp/stubs.py +3333 -0
- warp/tape.py +1203 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +634 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +200 -0
- warp/tests/geometry/test_hash_grid.py +221 -0
- warp/tests/geometry/test_marching_cubes.py +74 -0
- warp/tests/geometry/test_mesh.py +316 -0
- warp/tests/geometry/test_mesh_query_aabb.py +399 -0
- warp/tests/geometry/test_mesh_query_point.py +932 -0
- warp/tests/geometry/test_mesh_query_ray.py +311 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +729 -0
- warp/tests/interop/test_jax.py +371 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/sim/__init__.py +0 -0
- warp/tests/sim/disabled_kinematics.py +244 -0
- warp/tests/sim/flaky_test_sim_grad.py +290 -0
- warp/tests/sim/test_collision.py +604 -0
- warp/tests/sim/test_coloring.py +258 -0
- warp/tests/sim/test_model.py +224 -0
- warp/tests/sim/test_sim_grad_bounce_linear.py +212 -0
- warp/tests/sim/test_sim_kinematics.py +98 -0
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/test_adam.py +163 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +2972 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +250 -0
- warp/tests/test_atomic.py +153 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +1298 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +810 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +252 -0
- warp/tests/test_context.py +42 -0
- warp/tests/test_copy.py +238 -0
- warp/tests/test_ctypes.py +638 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +97 -0
- warp/tests/test_examples.py +482 -0
- warp/tests/test_fabricarray.py +996 -0
- warp/tests/test_fast_math.py +74 -0
- warp/tests/test_fem.py +2003 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +454 -0
- warp/tests/test_future_annotations.py +98 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +73 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +193 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_mat.py +2089 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +178 -0
- warp/tests/test_mlp.py +282 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +44 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +339 -0
- warp/tests/test_quat.py +2315 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +302 -0
- warp/tests/test_rounding.py +185 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +105 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +582 -0
- warp/tests/test_spatial.py +2229 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +592 -0
- warp/tests/test_struct.py +734 -0
- warp/tests/test_tape.py +204 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +145 -0
- warp/tests/test_types.py +562 -0
- warp/tests/test_utils.py +588 -0
- warp/tests/test_vec.py +1487 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +780 -0
- warp/tests/tile/test_tile_load.py +407 -0
- warp/tests/tile/test_tile_mathdx.py +208 -0
- warp/tests/tile/test_tile_mlp.py +402 -0
- warp/tests/tile/test_tile_reduce.py +447 -0
- warp/tests/tile/test_tile_shared_memory.py +247 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +427 -0
- warp/tests/unittest_utils.py +468 -0
- warp/tests/walkthrough_debug.py +93 -0
- warp/thirdparty/__init__.py +0 -0
- warp/thirdparty/appdirs.py +598 -0
- warp/thirdparty/dlpack.py +145 -0
- warp/thirdparty/unittest_parallel.py +570 -0
- warp/torch.py +391 -0
- warp/types.py +5230 -0
- warp/utils.py +1137 -0
- warp_lang-1.7.0.dist-info/METADATA +516 -0
- warp_lang-1.7.0.dist-info/RECORD +429 -0
- warp_lang-1.7.0.dist-info/WHEEL +5 -0
- warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
- warp_lang-1.7.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
import gc
|
|
17
|
+
import statistics as stats
|
|
18
|
+
|
|
19
|
+
import warp as wp
|
|
20
|
+
|
|
21
|
+
ENABLE_MEMPOOLS = False
|
|
22
|
+
ENABLE_PEER_ACCESS = False
|
|
23
|
+
ENABLE_MEMPOOL_ACCESS = False
|
|
24
|
+
ENABLE_MEMPOOL_RELEASE_THRESHOLD = False
|
|
25
|
+
|
|
26
|
+
MEMPOOL_RELEASE_THRESHOLD = 1024 * 1024 * 1024
|
|
27
|
+
|
|
28
|
+
DO_SYNC = False
|
|
29
|
+
VERBOSE = False
|
|
30
|
+
USE_NVTX = False
|
|
31
|
+
|
|
32
|
+
num_elems = 10000
|
|
33
|
+
num_runs = 10000
|
|
34
|
+
trim_runs = 2500
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@wp.kernel
|
|
38
|
+
def inc_kernel(a: wp.array(dtype=float)):
|
|
39
|
+
tid = wp.tid()
|
|
40
|
+
a[tid] = a[tid] + 1.0
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
# configure devices
|
|
44
|
+
for target_device in wp.get_cuda_devices():
|
|
45
|
+
try:
|
|
46
|
+
wp.set_mempool_enabled(target_device, ENABLE_MEMPOOLS)
|
|
47
|
+
if ENABLE_MEMPOOL_RELEASE_THRESHOLD:
|
|
48
|
+
wp.set_mempool_release_threshold(target_device, MEMPOOL_RELEASE_THRESHOLD)
|
|
49
|
+
except Exception as e:
|
|
50
|
+
print(f"Error: {e}")
|
|
51
|
+
|
|
52
|
+
for peer_device in wp.get_cuda_devices():
|
|
53
|
+
try:
|
|
54
|
+
wp.set_peer_access_enabled(target_device, peer_device, ENABLE_PEER_ACCESS)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
print(f"Error: {e}")
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
wp.set_mempool_access_enabled(target_device, peer_device, ENABLE_MEMPOOL_ACCESS)
|
|
60
|
+
except Exception as e:
|
|
61
|
+
print(f"Error: {e}")
|
|
62
|
+
|
|
63
|
+
cuda_device_count = wp.get_cuda_device_count()
|
|
64
|
+
|
|
65
|
+
cuda0 = wp.get_device("cuda:0")
|
|
66
|
+
|
|
67
|
+
# preallocate some arrays
|
|
68
|
+
arr_host = wp.zeros(num_elems, dtype=float, device="cpu", pinned=False)
|
|
69
|
+
arr_host_pinned = wp.zeros(num_elems, dtype=float, device="cpu", pinned=True)
|
|
70
|
+
arr_cuda0 = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
71
|
+
arr_cuda0_src = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
72
|
+
arr_cuda0_dst = wp.zeros(num_elems, dtype=float, device=cuda0)
|
|
73
|
+
|
|
74
|
+
# mgpu support
|
|
75
|
+
if cuda_device_count > 1:
|
|
76
|
+
cuda1 = wp.get_device("cuda:1")
|
|
77
|
+
arr_cuda1 = wp.zeros(num_elems, dtype=float, device=cuda1)
|
|
78
|
+
|
|
79
|
+
stream0 = wp.Stream(cuda0)
|
|
80
|
+
|
|
81
|
+
# preload module
|
|
82
|
+
wp.force_load(cuda0)
|
|
83
|
+
if cuda_device_count > 1:
|
|
84
|
+
wp.force_load(cuda1)
|
|
85
|
+
|
|
86
|
+
# capture graph
|
|
87
|
+
with wp.ScopedDevice(cuda0):
|
|
88
|
+
wp.capture_begin()
|
|
89
|
+
wp.launch(inc_kernel, dim=arr_cuda0.size, inputs=[arr_cuda0])
|
|
90
|
+
graph0 = wp.capture_end()
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
g_allocs = [None] * num_runs
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_alloc(num_elems, device, idx):
|
|
97
|
+
wp.synchronize()
|
|
98
|
+
|
|
99
|
+
with wp.ScopedTimer("alloc", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
100
|
+
g_allocs[idx] = wp.empty(num_elems, dtype=float, device=device)
|
|
101
|
+
|
|
102
|
+
if DO_SYNC:
|
|
103
|
+
wp.synchronize_device(device)
|
|
104
|
+
|
|
105
|
+
return timer.elapsed
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def test_free(device, idx):
|
|
109
|
+
wp.synchronize()
|
|
110
|
+
|
|
111
|
+
with wp.ScopedTimer("free", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
112
|
+
g_allocs[idx] = None
|
|
113
|
+
|
|
114
|
+
if DO_SYNC:
|
|
115
|
+
wp.synchronize_device(device)
|
|
116
|
+
|
|
117
|
+
return timer.elapsed
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_zeros(num_elems, device, idx):
|
|
121
|
+
wp.synchronize()
|
|
122
|
+
|
|
123
|
+
with wp.ScopedTimer("zeros", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
124
|
+
g_allocs[idx] = wp.zeros(num_elems, dtype=float, device=device)
|
|
125
|
+
|
|
126
|
+
if DO_SYNC:
|
|
127
|
+
wp.synchronize_device(device)
|
|
128
|
+
|
|
129
|
+
return timer.elapsed
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def test_h2d(num_elems, device):
|
|
133
|
+
wp.synchronize()
|
|
134
|
+
|
|
135
|
+
with wp.ScopedTimer("h2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
136
|
+
wp.copy(arr_cuda0, arr_host)
|
|
137
|
+
|
|
138
|
+
if DO_SYNC:
|
|
139
|
+
wp.synchronize_device(device)
|
|
140
|
+
|
|
141
|
+
return timer.elapsed
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def test_d2h(num_elems, device):
|
|
145
|
+
wp.synchronize()
|
|
146
|
+
|
|
147
|
+
with wp.ScopedTimer("d2h", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
148
|
+
wp.copy(arr_host, arr_cuda0)
|
|
149
|
+
|
|
150
|
+
if DO_SYNC:
|
|
151
|
+
wp.synchronize_device(device)
|
|
152
|
+
|
|
153
|
+
return timer.elapsed
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_h2d_pinned(num_elems, device):
|
|
157
|
+
wp.synchronize()
|
|
158
|
+
|
|
159
|
+
with wp.ScopedTimer("h2d pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
160
|
+
wp.copy(arr_cuda0, arr_host_pinned)
|
|
161
|
+
|
|
162
|
+
if DO_SYNC:
|
|
163
|
+
wp.synchronize_device(device)
|
|
164
|
+
|
|
165
|
+
return timer.elapsed
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def test_d2h_pinned(num_elems, device):
|
|
169
|
+
wp.synchronize()
|
|
170
|
+
|
|
171
|
+
with wp.ScopedTimer("d2h pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
172
|
+
wp.copy(arr_host_pinned, arr_cuda0)
|
|
173
|
+
|
|
174
|
+
if DO_SYNC:
|
|
175
|
+
wp.synchronize_device(device)
|
|
176
|
+
|
|
177
|
+
return timer.elapsed
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def test_d2d(num_elems, device):
|
|
181
|
+
wp.synchronize()
|
|
182
|
+
|
|
183
|
+
with wp.ScopedTimer("d2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
184
|
+
wp.copy(arr_cuda0_dst, arr_cuda0_src)
|
|
185
|
+
|
|
186
|
+
if DO_SYNC:
|
|
187
|
+
wp.synchronize_device(device)
|
|
188
|
+
|
|
189
|
+
return timer.elapsed
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def test_p2p(num_elems, src_device, dst_device):
|
|
193
|
+
wp.synchronize()
|
|
194
|
+
|
|
195
|
+
with wp.ScopedTimer("p2p", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
196
|
+
wp.copy(arr_cuda0, arr_cuda1)
|
|
197
|
+
|
|
198
|
+
if DO_SYNC:
|
|
199
|
+
wp.synchronize_device(src_device)
|
|
200
|
+
wp.synchronize_device(dst_device)
|
|
201
|
+
|
|
202
|
+
return timer.elapsed
|
|
203
|
+
|
|
204
|
+
|
|
205
|
+
def test_p2p_stream(num_elems, src_device, dst_device):
|
|
206
|
+
stream = stream0
|
|
207
|
+
|
|
208
|
+
wp.synchronize()
|
|
209
|
+
|
|
210
|
+
with wp.ScopedTimer("p2p stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
211
|
+
wp.copy(arr_cuda0, arr_cuda1, stream=stream)
|
|
212
|
+
|
|
213
|
+
if DO_SYNC:
|
|
214
|
+
wp.synchronize_device(src_device)
|
|
215
|
+
wp.synchronize_device(dst_device)
|
|
216
|
+
|
|
217
|
+
return timer.elapsed
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def test_launch(num_elems, device):
|
|
221
|
+
a = arr_cuda0
|
|
222
|
+
|
|
223
|
+
wp.synchronize()
|
|
224
|
+
|
|
225
|
+
with wp.ScopedTimer("launch", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
226
|
+
wp.launch(inc_kernel, dim=a.size, inputs=[a], device=device)
|
|
227
|
+
|
|
228
|
+
if DO_SYNC:
|
|
229
|
+
wp.synchronize_device(device)
|
|
230
|
+
|
|
231
|
+
return timer.elapsed
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
def test_launch_stream(num_elems, device):
|
|
235
|
+
a = arr_cuda0
|
|
236
|
+
stream = stream0
|
|
237
|
+
|
|
238
|
+
wp.synchronize()
|
|
239
|
+
|
|
240
|
+
with wp.ScopedTimer("launch stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
241
|
+
wp.launch(inc_kernel, dim=a.size, inputs=[a], stream=stream)
|
|
242
|
+
|
|
243
|
+
if DO_SYNC:
|
|
244
|
+
wp.synchronize_device(device)
|
|
245
|
+
|
|
246
|
+
return timer.elapsed
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def test_graph(num_elems, device):
|
|
250
|
+
wp.synchronize()
|
|
251
|
+
|
|
252
|
+
with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
253
|
+
wp.capture_launch(graph0)
|
|
254
|
+
|
|
255
|
+
if DO_SYNC:
|
|
256
|
+
wp.synchronize_device(device)
|
|
257
|
+
|
|
258
|
+
return timer.elapsed
|
|
259
|
+
|
|
260
|
+
|
|
261
|
+
def test_graph_stream(num_elems, device):
|
|
262
|
+
wp.synchronize()
|
|
263
|
+
|
|
264
|
+
with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
|
|
265
|
+
wp.capture_launch(graph0, stream=stream0)
|
|
266
|
+
|
|
267
|
+
if DO_SYNC:
|
|
268
|
+
wp.synchronize_device(device)
|
|
269
|
+
|
|
270
|
+
return timer.elapsed
|
|
271
|
+
|
|
272
|
+
|
|
273
|
+
alloc_times = [0] * num_runs
|
|
274
|
+
free_times = [0] * num_runs
|
|
275
|
+
zeros_times = [0] * num_runs
|
|
276
|
+
d2h_times = [0] * num_runs
|
|
277
|
+
h2d_times = [0] * num_runs
|
|
278
|
+
d2h_pinned_times = [0] * num_runs
|
|
279
|
+
h2d_pinned_times = [0] * num_runs
|
|
280
|
+
d2d_times = [0] * num_runs
|
|
281
|
+
p2p_times = [0] * num_runs
|
|
282
|
+
p2p_stream_times = [0] * num_runs
|
|
283
|
+
launch_times = [0] * num_runs
|
|
284
|
+
launch_stream_times = [0] * num_runs
|
|
285
|
+
graph_times = [0] * num_runs
|
|
286
|
+
graph_stream_times = [0] * num_runs
|
|
287
|
+
|
|
288
|
+
wp.set_device(cuda0)
|
|
289
|
+
|
|
290
|
+
# alloc
|
|
291
|
+
for i in range(num_runs):
|
|
292
|
+
gc.disable()
|
|
293
|
+
alloc_times[i] = test_alloc(num_elems, cuda0, i)
|
|
294
|
+
gc.enable()
|
|
295
|
+
|
|
296
|
+
# free
|
|
297
|
+
for i in range(num_runs):
|
|
298
|
+
gc.disable()
|
|
299
|
+
free_times[i] = test_free(cuda0, i)
|
|
300
|
+
gc.enable()
|
|
301
|
+
|
|
302
|
+
# zeros
|
|
303
|
+
for i in range(num_runs):
|
|
304
|
+
gc.disable()
|
|
305
|
+
zeros_times[i] = test_zeros(num_elems, cuda0, i)
|
|
306
|
+
gc.enable()
|
|
307
|
+
|
|
308
|
+
# free zeros
|
|
309
|
+
for i in range(num_runs):
|
|
310
|
+
g_allocs[i] = None
|
|
311
|
+
|
|
312
|
+
# h2d, d2h pageable copy
|
|
313
|
+
for i in range(num_runs):
|
|
314
|
+
gc.disable()
|
|
315
|
+
h2d_times[i] = test_h2d(num_elems, cuda0)
|
|
316
|
+
d2h_times[i] = test_d2h(num_elems, cuda0)
|
|
317
|
+
gc.enable()
|
|
318
|
+
|
|
319
|
+
# h2d, d2h pinned copy
|
|
320
|
+
for i in range(num_runs):
|
|
321
|
+
gc.disable()
|
|
322
|
+
h2d_pinned_times[i] = test_h2d_pinned(num_elems, cuda0)
|
|
323
|
+
d2h_pinned_times[i] = test_d2h_pinned(num_elems, cuda0)
|
|
324
|
+
gc.enable()
|
|
325
|
+
|
|
326
|
+
# d2d copy
|
|
327
|
+
for i in range(num_runs):
|
|
328
|
+
gc.disable()
|
|
329
|
+
d2d_times[i] = test_d2d(num_elems, cuda0)
|
|
330
|
+
gc.enable()
|
|
331
|
+
|
|
332
|
+
# p2p copy
|
|
333
|
+
if cuda_device_count > 1:
|
|
334
|
+
for i in range(num_runs):
|
|
335
|
+
gc.disable()
|
|
336
|
+
p2p_times[i] = test_p2p(num_elems, cuda1, cuda0)
|
|
337
|
+
p2p_stream_times[i] = test_p2p_stream(num_elems, cuda1, cuda0)
|
|
338
|
+
gc.enable()
|
|
339
|
+
|
|
340
|
+
# launch
|
|
341
|
+
for i in range(num_runs):
|
|
342
|
+
gc.disable()
|
|
343
|
+
launch_times[i] = test_launch(num_elems, cuda0)
|
|
344
|
+
launch_stream_times[i] = test_launch_stream(num_elems, cuda0)
|
|
345
|
+
gc.enable()
|
|
346
|
+
|
|
347
|
+
# graph
|
|
348
|
+
for i in range(num_runs):
|
|
349
|
+
gc.disable()
|
|
350
|
+
graph_times[i] = test_graph(num_elems, cuda0)
|
|
351
|
+
graph_stream_times[i] = test_graph_stream(num_elems, cuda0)
|
|
352
|
+
gc.enable()
|
|
353
|
+
|
|
354
|
+
|
|
355
|
+
def print_stat(name, data, trim=trim_runs):
|
|
356
|
+
assert len(data) - 2 * trim > 0
|
|
357
|
+
if trim > 0:
|
|
358
|
+
data = sorted(data)[trim:-trim]
|
|
359
|
+
print(f"{name:15s} {1000000 * stats.mean(data):.0f}")
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
print("=========================")
|
|
363
|
+
print_stat("Alloc", alloc_times)
|
|
364
|
+
print_stat("Free", free_times)
|
|
365
|
+
print_stat("Zeros", zeros_times)
|
|
366
|
+
print_stat("H2D", h2d_times)
|
|
367
|
+
print_stat("D2H", d2h_times)
|
|
368
|
+
print_stat("H2D pinned", h2d_pinned_times)
|
|
369
|
+
print_stat("D2H pinned", d2h_pinned_times)
|
|
370
|
+
print_stat("D2D", d2d_times)
|
|
371
|
+
print_stat("P2P", p2p_times)
|
|
372
|
+
print_stat("P2P stream", p2p_stream_times)
|
|
373
|
+
print_stat("Launch", launch_times)
|
|
374
|
+
print_stat("Launch stream", launch_stream_times)
|
|
375
|
+
print_stat("Graph", graph_times)
|
|
376
|
+
print_stat("Graph stream", graph_stream_times)
|
|
377
|
+
|
|
378
|
+
|
|
379
|
+
# ========= profiling ==========
|
|
380
|
+
|
|
381
|
+
# from pyinstrument import Profiler
|
|
382
|
+
# profiler = Profiler()
|
|
383
|
+
# profiler.start()
|
|
384
|
+
# for i in range(10):
|
|
385
|
+
# # test_alloc(num_elems, cuda0)
|
|
386
|
+
# # test_h2d(num_elems, cuda0)
|
|
387
|
+
# test_p2p(num_elems, cuda0, cuda1)
|
|
388
|
+
# profiler.stop()
|
|
389
|
+
# print(profiler.output_text(show_all=True))
|
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# include parent path
|
|
17
|
+
import csv
|
|
18
|
+
import os
|
|
19
|
+
import sys
|
|
20
|
+
|
|
21
|
+
import numpy as np
|
|
22
|
+
|
|
23
|
+
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
|
|
24
|
+
|
|
25
|
+
from pxr import Usd, UsdGeom
|
|
26
|
+
|
|
27
|
+
import warp as wp
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Cloth:
|
|
31
|
+
def __init__(
|
|
32
|
+
self, lower, dx, dy, radius, stretch_stiffness, bend_stiffness, shear_stiffness, mass, fix_corners=True
|
|
33
|
+
):
|
|
34
|
+
self.triangles = []
|
|
35
|
+
|
|
36
|
+
self.positions = []
|
|
37
|
+
self.velocities = []
|
|
38
|
+
self.inv_masses = []
|
|
39
|
+
|
|
40
|
+
self.spring_indices = []
|
|
41
|
+
self.spring_lengths = []
|
|
42
|
+
self.spring_stiffness = []
|
|
43
|
+
self.spring_damping = []
|
|
44
|
+
|
|
45
|
+
def grid(x, y, stride):
|
|
46
|
+
return y * stride + x
|
|
47
|
+
|
|
48
|
+
def create_spring(i, j, stiffness, damp=10.0):
|
|
49
|
+
length = np.linalg.norm(np.array(self.positions[i]) - np.array(self.positions[j]))
|
|
50
|
+
|
|
51
|
+
self.spring_indices.append(i)
|
|
52
|
+
self.spring_indices.append(j)
|
|
53
|
+
self.spring_lengths.append(length)
|
|
54
|
+
self.spring_stiffness.append(stiffness)
|
|
55
|
+
self.spring_damping.append(damp)
|
|
56
|
+
|
|
57
|
+
for y in range(dy):
|
|
58
|
+
for x in range(dx):
|
|
59
|
+
p = np.array(lower) + radius * np.array((float(x), float(0.0), float(y)))
|
|
60
|
+
|
|
61
|
+
self.positions.append(p)
|
|
62
|
+
self.velocities.append(np.zeros(3))
|
|
63
|
+
|
|
64
|
+
if x > 0 and y > 0:
|
|
65
|
+
self.triangles.append(grid(x - 1, y - 1, dx))
|
|
66
|
+
self.triangles.append(grid(x, y - 1, dx))
|
|
67
|
+
self.triangles.append(grid(x, y, dx))
|
|
68
|
+
|
|
69
|
+
self.triangles.append(grid(x - 1, y - 1, dx))
|
|
70
|
+
self.triangles.append(grid(x, y, dx))
|
|
71
|
+
self.triangles.append(grid(x - 1, y, dx))
|
|
72
|
+
|
|
73
|
+
if fix_corners and y == 0 and (x == 0 or x == dx - 1):
|
|
74
|
+
w = 0.0
|
|
75
|
+
else:
|
|
76
|
+
w = 1.0 / mass
|
|
77
|
+
|
|
78
|
+
self.inv_masses.append(w)
|
|
79
|
+
|
|
80
|
+
# horizontal springs
|
|
81
|
+
for y in range(dy):
|
|
82
|
+
for x in range(dx):
|
|
83
|
+
index0 = y * dx + x
|
|
84
|
+
|
|
85
|
+
if x > 0:
|
|
86
|
+
index1 = y * dx + x - 1
|
|
87
|
+
create_spring(index0, index1, stretch_stiffness)
|
|
88
|
+
|
|
89
|
+
if x > 1 and bend_stiffness > 0.0:
|
|
90
|
+
index2 = y * dx + x - 2
|
|
91
|
+
create_spring(index0, index2, bend_stiffness)
|
|
92
|
+
|
|
93
|
+
if y > 0 and x < dx - 1 and shear_stiffness > 0.0:
|
|
94
|
+
indexDiag = (y - 1) * dx + x + 1
|
|
95
|
+
create_spring(index0, indexDiag, shear_stiffness)
|
|
96
|
+
|
|
97
|
+
if y > 0 and x > 0 and shear_stiffness > 0.0:
|
|
98
|
+
indexDiag = (y - 1) * dx + x - 1
|
|
99
|
+
create_spring(index0, indexDiag, shear_stiffness)
|
|
100
|
+
|
|
101
|
+
# vertical
|
|
102
|
+
for x in range(dx):
|
|
103
|
+
for y in range(dy):
|
|
104
|
+
index0 = y * dx + x
|
|
105
|
+
|
|
106
|
+
if y > 0:
|
|
107
|
+
index1 = (y - 1) * dx + x
|
|
108
|
+
create_spring(index0, index1, stretch_stiffness)
|
|
109
|
+
|
|
110
|
+
if y > 1 and bend_stiffness > 0.0:
|
|
111
|
+
index2 = (y - 2) * dx + x
|
|
112
|
+
create_spring(index0, index2, bend_stiffness)
|
|
113
|
+
|
|
114
|
+
# harden to np arrays
|
|
115
|
+
self.positions = np.array(self.positions, dtype=np.float32)
|
|
116
|
+
self.velocities = np.array(self.velocities, dtype=np.float32)
|
|
117
|
+
self.inv_masses = np.array(self.inv_masses, dtype=np.float32)
|
|
118
|
+
self.spring_lengths = np.array(self.spring_lengths, dtype=np.float32)
|
|
119
|
+
self.spring_indices = np.array(self.spring_indices, dtype=np.int32)
|
|
120
|
+
self.spring_stiffness = np.array(self.spring_stiffness, dtype=np.float32)
|
|
121
|
+
self.spring_damping = np.array(self.spring_damping, dtype=np.float32)
|
|
122
|
+
|
|
123
|
+
self.num_particles = len(self.positions)
|
|
124
|
+
self.num_springs = len(self.spring_lengths)
|
|
125
|
+
self.num_tris = int(len(self.triangles) / 3)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def run_benchmark(mode, dim, timers, render=False):
|
|
129
|
+
# params
|
|
130
|
+
sim_width = dim
|
|
131
|
+
sim_height = dim
|
|
132
|
+
|
|
133
|
+
sim_fps = 60.0
|
|
134
|
+
sim_substeps = 16
|
|
135
|
+
sim_duration = 1.0
|
|
136
|
+
sim_frames = int(sim_duration * sim_fps)
|
|
137
|
+
sim_dt = 1.0 / sim_fps
|
|
138
|
+
sim_time = 0.0
|
|
139
|
+
|
|
140
|
+
# wave constants
|
|
141
|
+
k_stretch = 1000.0
|
|
142
|
+
k_shear = 1000.0
|
|
143
|
+
k_bend = 1000.0
|
|
144
|
+
# k_damp = 0.0
|
|
145
|
+
|
|
146
|
+
cloth = Cloth(
|
|
147
|
+
lower=(0.0, 0.0, 0.0),
|
|
148
|
+
dx=sim_width,
|
|
149
|
+
dy=sim_height,
|
|
150
|
+
radius=0.1,
|
|
151
|
+
stretch_stiffness=k_stretch,
|
|
152
|
+
bend_stiffness=k_bend,
|
|
153
|
+
shear_stiffness=k_shear,
|
|
154
|
+
mass=0.1,
|
|
155
|
+
fix_corners=True,
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if render:
|
|
159
|
+
# set up grid for visualization
|
|
160
|
+
stage = Usd.Stage.CreateNew("benchmark.usd")
|
|
161
|
+
stage.SetStartTimeCode(0.0)
|
|
162
|
+
stage.SetEndTimeCode(sim_duration * sim_fps)
|
|
163
|
+
stage.SetTimeCodesPerSecond(sim_fps)
|
|
164
|
+
|
|
165
|
+
grid = UsdGeom.Mesh.Define(stage, "/root")
|
|
166
|
+
grid.GetPointsAttr().Set(cloth.positions, 0.0)
|
|
167
|
+
grid.GetFaceVertexIndicesAttr().Set(cloth.triangles, 0.0)
|
|
168
|
+
grid.GetFaceVertexCountsAttr().Set([3] * cloth.num_tris, 0.0)
|
|
169
|
+
|
|
170
|
+
with wp.ScopedTimer("Initialization", dict=timers):
|
|
171
|
+
if mode == "warp_cpu":
|
|
172
|
+
import benchmark_cloth_warp
|
|
173
|
+
|
|
174
|
+
integrator = benchmark_cloth_warp.WpIntegrator(cloth, "cpu")
|
|
175
|
+
|
|
176
|
+
elif mode == "warp_gpu":
|
|
177
|
+
import benchmark_cloth_warp
|
|
178
|
+
|
|
179
|
+
integrator = benchmark_cloth_warp.WpIntegrator(cloth, "cuda")
|
|
180
|
+
|
|
181
|
+
elif mode == "taichi_cpu":
|
|
182
|
+
import benchmark_cloth_taichi
|
|
183
|
+
|
|
184
|
+
integrator = benchmark_cloth_taichi.TiIntegrator(cloth, "cpu")
|
|
185
|
+
|
|
186
|
+
elif mode == "taichi_gpu":
|
|
187
|
+
import benchmark_cloth_taichi
|
|
188
|
+
|
|
189
|
+
integrator = benchmark_cloth_taichi.TiIntegrator(cloth, "cuda")
|
|
190
|
+
|
|
191
|
+
elif mode == "numpy":
|
|
192
|
+
import benchmark_cloth_numpy
|
|
193
|
+
|
|
194
|
+
integrator = benchmark_cloth_numpy.NpIntegrator(cloth)
|
|
195
|
+
|
|
196
|
+
elif mode == "cupy":
|
|
197
|
+
import benchmark_cloth_cupy
|
|
198
|
+
|
|
199
|
+
integrator = benchmark_cloth_cupy.CpIntegrator(cloth)
|
|
200
|
+
|
|
201
|
+
elif mode == "numba":
|
|
202
|
+
import benchmark_cloth_numba
|
|
203
|
+
|
|
204
|
+
integrator = benchmark_cloth_numba.NbIntegrator(cloth)
|
|
205
|
+
|
|
206
|
+
elif mode == "torch_cpu":
|
|
207
|
+
import benchmark_cloth_pytorch
|
|
208
|
+
|
|
209
|
+
integrator = benchmark_cloth_pytorch.TrIntegrator(cloth, "cpu")
|
|
210
|
+
|
|
211
|
+
elif mode == "torch_gpu":
|
|
212
|
+
import benchmark_cloth_pytorch
|
|
213
|
+
|
|
214
|
+
integrator = benchmark_cloth_pytorch.TrIntegrator(cloth, "cuda")
|
|
215
|
+
|
|
216
|
+
elif mode == "jax_cpu":
|
|
217
|
+
os.environ["JAX_PLATFORM_NAME"] = "cpu"
|
|
218
|
+
|
|
219
|
+
import benchmark_cloth_jax
|
|
220
|
+
|
|
221
|
+
integrator = benchmark_cloth_jax.JxIntegrator(cloth)
|
|
222
|
+
|
|
223
|
+
elif mode == "jax_gpu":
|
|
224
|
+
os.environ["JAX_PLATFORM_NAME"] = "gpu"
|
|
225
|
+
|
|
226
|
+
import benchmark_cloth_jax
|
|
227
|
+
|
|
228
|
+
integrator = benchmark_cloth_jax.JxIntegrator(cloth)
|
|
229
|
+
|
|
230
|
+
elif mode == "paddle_cpu":
|
|
231
|
+
import benchmark_cloth_paddle
|
|
232
|
+
|
|
233
|
+
integrator = benchmark_cloth_paddle.TrIntegrator(cloth, "cpu")
|
|
234
|
+
|
|
235
|
+
elif mode == "paddle_gpu":
|
|
236
|
+
import benchmark_cloth_paddle
|
|
237
|
+
|
|
238
|
+
integrator = benchmark_cloth_paddle.TrIntegrator(cloth, "gpu")
|
|
239
|
+
|
|
240
|
+
else:
|
|
241
|
+
raise RuntimeError("Unknown simulation backend")
|
|
242
|
+
|
|
243
|
+
# run one warm-up iteration to accurately measure initialization time (some engines do lazy init)
|
|
244
|
+
positions = integrator.simulate(sim_dt, sim_substeps)
|
|
245
|
+
|
|
246
|
+
label = "Dim ({}^2)".format(dim)
|
|
247
|
+
|
|
248
|
+
# run simulation
|
|
249
|
+
for _i in range(sim_frames):
|
|
250
|
+
# simulate
|
|
251
|
+
with wp.ScopedTimer(label, dict=timers):
|
|
252
|
+
positions = integrator.simulate(sim_dt, sim_substeps)
|
|
253
|
+
|
|
254
|
+
if render:
|
|
255
|
+
grid.GetPointsAttr().Set(positions, sim_time * sim_fps)
|
|
256
|
+
|
|
257
|
+
sim_time += sim_dt
|
|
258
|
+
|
|
259
|
+
if render:
|
|
260
|
+
stage.Save()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
# record profiling information
|
|
264
|
+
timers = {}
|
|
265
|
+
|
|
266
|
+
if len(sys.argv) > 1:
|
|
267
|
+
mode = sys.argv[1]
|
|
268
|
+
else:
|
|
269
|
+
mode = "warp_gpu"
|
|
270
|
+
|
|
271
|
+
run_benchmark(mode, 32, timers, render=False)
|
|
272
|
+
run_benchmark(mode, 64, timers, render=False)
|
|
273
|
+
run_benchmark(mode, 128, timers, render=False)
|
|
274
|
+
|
|
275
|
+
# write results
|
|
276
|
+
|
|
277
|
+
for k, v in timers.items():
|
|
278
|
+
print("{:16} min: {:8.2f} max: {:8.2f} avg: {:8.2f}".format(k, np.min(v), np.max(v), np.mean(v)))
|
|
279
|
+
|
|
280
|
+
report = open(os.path.join("benchmark.csv"), "a")
|
|
281
|
+
writer = csv.writer(report, delimiter=",")
|
|
282
|
+
|
|
283
|
+
if report.tell() == 0:
|
|
284
|
+
writer.writerow(["Name", "Init", "Dim (32^2)", "Dim (64^2)", "Dim (128^2)"])
|
|
285
|
+
|
|
286
|
+
writer.writerow(
|
|
287
|
+
[
|
|
288
|
+
mode,
|
|
289
|
+
np.max(timers["Initialization"]),
|
|
290
|
+
np.mean(timers["Dim (32^2)"]),
|
|
291
|
+
np.mean(timers["Dim (64^2)"]),
|
|
292
|
+
np.mean(timers["Dim (128^2)"]),
|
|
293
|
+
]
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
report.close()
|