warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +334 -0
- warp/__init__.pyi +5856 -0
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1077 -0
- warp/_src/build.py +620 -0
- warp/_src/build_dll.py +642 -0
- warp/_src/builtins.py +10555 -0
- warp/_src/codegen.py +4361 -0
- warp/_src/config.py +178 -0
- warp/_src/constants.py +59 -0
- warp/_src/context.py +8352 -0
- warp/_src/dlpack.py +464 -0
- warp/_src/fabric.py +362 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +510 -0
- warp/_src/fem/cache.py +689 -0
- warp/_src/fem/dirichlet.py +190 -0
- warp/_src/fem/domain.py +553 -0
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +703 -0
- warp/_src/fem/field/nodal_field.py +403 -0
- warp/_src/fem/field/restriction.py +39 -0
- warp/_src/fem/field/virtual.py +1021 -0
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
- warp/_src/fem/geometry/closest_point.py +99 -0
- warp/_src/fem/geometry/deformed_geometry.py +277 -0
- warp/_src/fem/geometry/element.py +854 -0
- warp/_src/fem/geometry/geometry.py +693 -0
- warp/_src/fem/geometry/grid_2d.py +478 -0
- warp/_src/fem/geometry/grid_3d.py +539 -0
- warp/_src/fem/geometry/hexmesh.py +956 -0
- warp/_src/fem/geometry/nanogrid.py +660 -0
- warp/_src/fem/geometry/partition.py +483 -0
- warp/_src/fem/geometry/quadmesh.py +597 -0
- warp/_src/fem/geometry/tetmesh.py +762 -0
- warp/_src/fem/geometry/trimesh.py +588 -0
- warp/_src/fem/integrate.py +2507 -0
- warp/_src/fem/linalg.py +385 -0
- warp/_src/fem/operator.py +398 -0
- warp/_src/fem/polynomial.py +231 -0
- warp/_src/fem/quadrature/__init__.py +17 -0
- warp/_src/fem/quadrature/pic_quadrature.py +318 -0
- warp/_src/fem/quadrature/quadrature.py +665 -0
- warp/_src/fem/space/__init__.py +248 -0
- warp/_src/fem/space/basis_function_space.py +499 -0
- warp/_src/fem/space/basis_space.py +681 -0
- warp/_src/fem/space/dof_mapper.py +253 -0
- warp/_src/fem/space/function_space.py +312 -0
- warp/_src/fem/space/grid_2d_function_space.py +179 -0
- warp/_src/fem/space/grid_3d_function_space.py +229 -0
- warp/_src/fem/space/hexmesh_function_space.py +255 -0
- warp/_src/fem/space/nanogrid_function_space.py +199 -0
- warp/_src/fem/space/partition.py +435 -0
- warp/_src/fem/space/quadmesh_function_space.py +222 -0
- warp/_src/fem/space/restriction.py +221 -0
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
- warp/_src/fem/space/shape/shape_function.py +134 -0
- warp/_src/fem/space/shape/square_shape_function.py +928 -0
- warp/_src/fem/space/shape/tet_shape_function.py +829 -0
- warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
- warp/_src/fem/space/tetmesh_function_space.py +270 -0
- warp/_src/fem/space/topology.py +461 -0
- warp/_src/fem/space/trimesh_function_space.py +193 -0
- warp/_src/fem/types.py +114 -0
- warp/_src/fem/utils.py +488 -0
- warp/_src/jax.py +188 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +389 -0
- warp/_src/jax_experimental/ffi.py +1286 -0
- warp/_src/jax_experimental/xla_ffi.py +658 -0
- warp/_src/marching_cubes.py +710 -0
- warp/_src/math.py +416 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +165 -0
- warp/_src/optim/linear.py +1608 -0
- warp/_src/optim/sgd.py +114 -0
- warp/_src/paddle.py +408 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +291 -0
- warp/_src/render/render_opengl.py +3638 -0
- warp/_src/render/render_usd.py +939 -0
- warp/_src/render/utils.py +162 -0
- warp/_src/sparse.py +2718 -0
- warp/_src/tape.py +1208 -0
- warp/_src/thirdparty/__init__.py +0 -0
- warp/_src/thirdparty/appdirs.py +598 -0
- warp/_src/thirdparty/dlpack.py +145 -0
- warp/_src/thirdparty/unittest_parallel.py +676 -0
- warp/_src/torch.py +393 -0
- warp/_src/types.py +5888 -0
- warp/_src/utils.py +1695 -0
- warp/autograd.py +33 -0
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +29 -0
- warp/build_dll.py +24 -0
- warp/codegen.py +24 -0
- warp/constants.py +24 -0
- warp/context.py +33 -0
- warp/dlpack.py +24 -0
- warp/examples/__init__.py +24 -0
- warp/examples/assets/bear.usd +0 -0
- warp/examples/assets/bunny.usd +0 -0
- warp/examples/assets/cube.usd +0 -0
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/assets/pixel.jpg +0 -0
- warp/examples/assets/rocks.nvdb +0 -0
- warp/examples/assets/rocks.usd +0 -0
- warp/examples/assets/sphere.usd +0 -0
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_api.py +389 -0
- warp/examples/benchmarks/benchmark_cloth.py +296 -0
- warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
- warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
- warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
- warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
- warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
- warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
- warp/examples/benchmarks/benchmark_gemm.py +164 -0
- warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
- warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
- warp/examples/benchmarks/benchmark_launches.py +301 -0
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/browse.py +37 -0
- warp/examples/core/example_cupy.py +86 -0
- warp/examples/core/example_dem.py +241 -0
- warp/examples/core/example_fluid.py +299 -0
- warp/examples/core/example_graph_capture.py +150 -0
- warp/examples/core/example_marching_cubes.py +195 -0
- warp/examples/core/example_mesh.py +180 -0
- warp/examples/core/example_mesh_intersect.py +211 -0
- warp/examples/core/example_nvdb.py +182 -0
- warp/examples/core/example_raycast.py +111 -0
- warp/examples/core/example_raymarch.py +205 -0
- warp/examples/core/example_render_opengl.py +290 -0
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +411 -0
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_torch.py +211 -0
- warp/examples/core/example_wave.py +269 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/distributed/example_jacobi_mpi.py +506 -0
- warp/examples/fem/example_adaptive_grid.py +286 -0
- warp/examples/fem/example_apic_fluid.py +469 -0
- warp/examples/fem/example_burgers.py +261 -0
- warp/examples/fem/example_convection_diffusion.py +181 -0
- warp/examples/fem/example_convection_diffusion_dg.py +225 -0
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +172 -0
- warp/examples/fem/example_diffusion.py +196 -0
- warp/examples/fem/example_diffusion_3d.py +225 -0
- warp/examples/fem/example_diffusion_mgpu.py +225 -0
- warp/examples/fem/example_distortion_energy.py +228 -0
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +242 -0
- warp/examples/fem/example_mixed_elasticity.py +293 -0
- warp/examples/fem/example_navier_stokes.py +263 -0
- warp/examples/fem/example_nonconforming_contact.py +300 -0
- warp/examples/fem/example_stokes.py +213 -0
- warp/examples/fem/example_stokes_transfer.py +262 -0
- warp/examples/fem/example_streamlines.py +357 -0
- warp/examples/fem/utils.py +1047 -0
- warp/examples/interop/example_jax_callable.py +146 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +232 -0
- warp/examples/optim/example_diffray.py +561 -0
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +88 -0
- warp/examples/tile/example_tile_convolution.py +66 -0
- warp/examples/tile/example_tile_fft.py +55 -0
- warp/examples/tile/example_tile_filtering.py +113 -0
- warp/examples/tile/example_tile_matmul.py +85 -0
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/examples/tile/example_tile_mlp.py +385 -0
- warp/examples/tile/example_tile_nbody.py +199 -0
- warp/fabric.py +24 -0
- warp/fem/__init__.py +173 -0
- warp/fem/adaptivity.py +26 -0
- warp/fem/cache.py +30 -0
- warp/fem/dirichlet.py +24 -0
- warp/fem/field/__init__.py +24 -0
- warp/fem/field/field.py +26 -0
- warp/fem/geometry/__init__.py +21 -0
- warp/fem/geometry/closest_point.py +31 -0
- warp/fem/linalg.py +38 -0
- warp/fem/operator.py +32 -0
- warp/fem/polynomial.py +29 -0
- warp/fem/space/__init__.py +22 -0
- warp/fem/space/basis_space.py +24 -0
- warp/fem/space/shape/__init__.py +68 -0
- warp/fem/space/topology.py +24 -0
- warp/fem/types.py +24 -0
- warp/fem/utils.py +32 -0
- warp/jax.py +29 -0
- warp/jax_experimental/__init__.py +29 -0
- warp/jax_experimental/custom_call.py +29 -0
- warp/jax_experimental/ffi.py +39 -0
- warp/jax_experimental/xla_ffi.py +24 -0
- warp/marching_cubes.py +24 -0
- warp/math.py +37 -0
- warp/native/array.h +1687 -0
- warp/native/builtin.h +2327 -0
- warp/native/bvh.cpp +562 -0
- warp/native/bvh.cu +826 -0
- warp/native/bvh.h +555 -0
- warp/native/clang/clang.cpp +541 -0
- warp/native/coloring.cpp +622 -0
- warp/native/crt.cpp +51 -0
- warp/native/crt.h +568 -0
- warp/native/cuda_crt.h +1058 -0
- warp/native/cuda_util.cpp +677 -0
- warp/native/cuda_util.h +313 -0
- warp/native/error.cpp +77 -0
- warp/native/error.h +36 -0
- warp/native/exports.h +2023 -0
- warp/native/fabric.h +246 -0
- warp/native/hashgrid.cpp +311 -0
- warp/native/hashgrid.cu +89 -0
- warp/native/hashgrid.h +240 -0
- warp/native/initializer_array.h +41 -0
- warp/native/intersect.h +1253 -0
- warp/native/intersect_adj.h +375 -0
- warp/native/intersect_tri.h +348 -0
- warp/native/mat.h +5189 -0
- warp/native/mathdx.cpp +93 -0
- warp/native/matnn.h +221 -0
- warp/native/mesh.cpp +266 -0
- warp/native/mesh.cu +406 -0
- warp/native/mesh.h +2097 -0
- warp/native/nanovdb/GridHandle.h +533 -0
- warp/native/nanovdb/HostBuffer.h +591 -0
- warp/native/nanovdb/NanoVDB.h +6246 -0
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +3390 -0
- warp/native/noise.h +859 -0
- warp/native/quat.h +1664 -0
- warp/native/rand.h +342 -0
- warp/native/range.h +145 -0
- warp/native/reduce.cpp +174 -0
- warp/native/reduce.cu +363 -0
- warp/native/runlength_encode.cpp +79 -0
- warp/native/runlength_encode.cu +61 -0
- warp/native/scan.cpp +47 -0
- warp/native/scan.cu +55 -0
- warp/native/scan.h +23 -0
- warp/native/solid_angle.h +466 -0
- warp/native/sort.cpp +251 -0
- warp/native/sort.cu +286 -0
- warp/native/sort.h +35 -0
- warp/native/sparse.cpp +241 -0
- warp/native/sparse.cu +435 -0
- warp/native/spatial.h +1306 -0
- warp/native/svd.h +727 -0
- warp/native/temp_buffer.h +46 -0
- warp/native/tile.h +4124 -0
- warp/native/tile_radix_sort.h +1112 -0
- warp/native/tile_reduce.h +838 -0
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +2199 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +501 -0
- warp/native/volume.cu +68 -0
- warp/native/volume.h +970 -0
- warp/native/volume_builder.cu +483 -0
- warp/native/volume_builder.h +52 -0
- warp/native/volume_impl.h +70 -0
- warp/native/warp.cpp +1143 -0
- warp/native/warp.cu +4604 -0
- warp/native/warp.h +358 -0
- warp/optim/__init__.py +20 -0
- warp/optim/adam.py +24 -0
- warp/optim/linear.py +35 -0
- warp/optim/sgd.py +24 -0
- warp/paddle.py +24 -0
- warp/py.typed +0 -0
- warp/render/__init__.py +22 -0
- warp/render/imgui_manager.py +29 -0
- warp/render/render_opengl.py +24 -0
- warp/render/render_usd.py +24 -0
- warp/render/utils.py +24 -0
- warp/sparse.py +51 -0
- warp/tape.py +24 -0
- warp/tests/__init__.py +1 -0
- warp/tests/__main__.py +4 -0
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/mlp_golden.npy +0 -0
- warp/tests/assets/pixel.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/spiky.usd +0 -0
- warp/tests/assets/test_grid.nvdb +0 -0
- warp/tests/assets/test_index_grid.nvdb +0 -0
- warp/tests/assets/test_int32_grid.nvdb +0 -0
- warp/tests/assets/test_vec_grid.nvdb +0 -0
- warp/tests/assets/torus.nvdb +0 -0
- warp/tests/assets/torus.usda +105 -0
- warp/tests/aux_test_class_kernel.py +34 -0
- warp/tests/aux_test_compile_consts_dummy.py +18 -0
- warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
- warp/tests/aux_test_dependent.py +29 -0
- warp/tests/aux_test_grad_customs.py +29 -0
- warp/tests/aux_test_instancing_gc.py +26 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/aux_test_module_unload.py +23 -0
- warp/tests/aux_test_name_clash1.py +40 -0
- warp/tests/aux_test_name_clash2.py +40 -0
- warp/tests/aux_test_reference.py +9 -0
- warp/tests/aux_test_reference_reference.py +8 -0
- warp/tests/aux_test_square.py +16 -0
- warp/tests/aux_test_unresolved_func.py +22 -0
- warp/tests/aux_test_unresolved_symbol.py +22 -0
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/cuda/test_async.py +676 -0
- warp/tests/cuda/test_conditional_captures.py +1147 -0
- warp/tests/cuda/test_ipc.py +124 -0
- warp/tests/cuda/test_mempool.py +233 -0
- warp/tests/cuda/test_multigpu.py +169 -0
- warp/tests/cuda/test_peer.py +139 -0
- warp/tests/cuda/test_pinned.py +84 -0
- warp/tests/cuda/test_streams.py +691 -0
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/geometry/test_bvh.py +335 -0
- warp/tests/geometry/test_hash_grid.py +259 -0
- warp/tests/geometry/test_marching_cubes.py +294 -0
- warp/tests/geometry/test_mesh.py +318 -0
- warp/tests/geometry/test_mesh_query_aabb.py +392 -0
- warp/tests/geometry/test_mesh_query_point.py +935 -0
- warp/tests/geometry/test_mesh_query_ray.py +323 -0
- warp/tests/geometry/test_volume.py +1103 -0
- warp/tests/geometry/test_volume_write.py +346 -0
- warp/tests/interop/__init__.py +0 -0
- warp/tests/interop/test_dlpack.py +730 -0
- warp/tests/interop/test_jax.py +1673 -0
- warp/tests/interop/test_paddle.py +800 -0
- warp/tests/interop/test_torch.py +1001 -0
- warp/tests/run_coverage_serial.py +39 -0
- warp/tests/test_adam.py +162 -0
- warp/tests/test_arithmetic.py +1096 -0
- warp/tests/test_array.py +3756 -0
- warp/tests/test_array_reduce.py +156 -0
- warp/tests/test_assert.py +303 -0
- warp/tests/test_atomic.py +336 -0
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +312 -0
- warp/tests/test_bool.py +220 -0
- warp/tests/test_builtins_resolution.py +732 -0
- warp/tests/test_closest_point_edge_edge.py +327 -0
- warp/tests/test_codegen.py +974 -0
- warp/tests/test_codegen_instancing.py +1495 -0
- warp/tests/test_compile_consts.py +215 -0
- warp/tests/test_conditional.py +298 -0
- warp/tests/test_context.py +35 -0
- warp/tests/test_copy.py +319 -0
- warp/tests/test_ctypes.py +618 -0
- warp/tests/test_dense.py +73 -0
- warp/tests/test_devices.py +127 -0
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +424 -0
- warp/tests/test_fabricarray.py +998 -0
- warp/tests/test_fast_math.py +72 -0
- warp/tests/test_fem.py +2204 -0
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_fp16.py +136 -0
- warp/tests/test_func.py +501 -0
- warp/tests/test_future_annotations.py +100 -0
- warp/tests/test_generics.py +656 -0
- warp/tests/test_grad.py +893 -0
- warp/tests/test_grad_customs.py +339 -0
- warp/tests/test_grad_debug.py +341 -0
- warp/tests/test_implicit_init.py +411 -0
- warp/tests/test_import.py +45 -0
- warp/tests/test_indexedarray.py +1140 -0
- warp/tests/test_intersect.py +103 -0
- warp/tests/test_iter.py +76 -0
- warp/tests/test_large.py +177 -0
- warp/tests/test_launch.py +411 -0
- warp/tests/test_lerp.py +151 -0
- warp/tests/test_linear_solvers.py +223 -0
- warp/tests/test_lvalue.py +427 -0
- warp/tests/test_map.py +526 -0
- warp/tests/test_mat.py +3515 -0
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +573 -0
- warp/tests/test_mat_lite.py +122 -0
- warp/tests/test_mat_scalar_ops.py +2913 -0
- warp/tests/test_math.py +212 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_module_hashing.py +258 -0
- warp/tests/test_modules_lite.py +70 -0
- warp/tests/test_noise.py +252 -0
- warp/tests/test_operators.py +299 -0
- warp/tests/test_options.py +129 -0
- warp/tests/test_overwrite.py +551 -0
- warp/tests/test_print.py +408 -0
- warp/tests/test_quat.py +2653 -0
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_rand.py +339 -0
- warp/tests/test_reload.py +303 -0
- warp/tests/test_rounding.py +157 -0
- warp/tests/test_runlength_encode.py +196 -0
- warp/tests/test_scalar_ops.py +133 -0
- warp/tests/test_smoothstep.py +108 -0
- warp/tests/test_snippet.py +318 -0
- warp/tests/test_sparse.py +845 -0
- warp/tests/test_spatial.py +2859 -0
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_special_values.py +361 -0
- warp/tests/test_static.py +640 -0
- warp/tests/test_struct.py +901 -0
- warp/tests/test_tape.py +242 -0
- warp/tests/test_transient_module.py +93 -0
- warp/tests/test_triangle_closest_point.py +192 -0
- warp/tests/test_tuple.py +361 -0
- warp/tests/test_types.py +615 -0
- warp/tests/test_utils.py +594 -0
- warp/tests/test_vec.py +1408 -0
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/test_vec_lite.py +80 -0
- warp/tests/test_vec_scalar_ops.py +2327 -0
- warp/tests/test_verify_fp.py +100 -0
- warp/tests/test_version.py +75 -0
- warp/tests/tile/__init__.py +0 -0
- warp/tests/tile/test_tile.py +1519 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +608 -0
- warp/tests/tile/test_tile_load.py +724 -0
- warp/tests/tile/test_tile_mathdx.py +156 -0
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +400 -0
- warp/tests/tile/test_tile_reduce.py +950 -0
- warp/tests/tile/test_tile_shared_memory.py +376 -0
- warp/tests/tile/test_tile_sort.py +121 -0
- warp/tests/tile/test_tile_view.py +173 -0
- warp/tests/unittest_serial.py +47 -0
- warp/tests/unittest_suites.py +430 -0
- warp/tests/unittest_utils.py +469 -0
- warp/tests/walkthrough_debug.py +95 -0
- warp/torch.py +24 -0
- warp/types.py +51 -0
- warp/utils.py +31 -0
- warp_lang-1.10.0.dist-info/METADATA +459 -0
- warp_lang-1.10.0.dist-info/RECORD +468 -0
- warp_lang-1.10.0.dist-info/WHEEL +5 -0
- warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp_lang-1.10.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,385 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
###########################################################################
|
|
17
|
+
# Example Image Multilayer Perceptron (MLP)
|
|
18
|
+
#
|
|
19
|
+
# Shows how to train a coordinate-based MLP on an image to predict the RGB
|
|
20
|
+
# color at a given input position. By default, a positional encoding is
|
|
21
|
+
# applied to the input coordinates to improve the ability of the MLP to
|
|
22
|
+
# represent higher-frequency content. This can be disabled by passing the
|
|
23
|
+
# '--no_encoding' option.
|
|
24
|
+
#
|
|
25
|
+
# References:
|
|
26
|
+
# Ben Mildenhall et al. 2021. NeRF: representing scenes
|
|
27
|
+
# as neural radiance fields for view synthesis. Commun. ACM 65, 1
|
|
28
|
+
# (January 2022), 99–106. https://doi.org/10.1145/3503250
|
|
29
|
+
#
|
|
30
|
+
###########################################################################
|
|
31
|
+
|
|
32
|
+
# ruff: noqa: RUF003
|
|
33
|
+
|
|
34
|
+
import math
|
|
35
|
+
import os
|
|
36
|
+
|
|
37
|
+
import numpy as np
|
|
38
|
+
from PIL import Image
|
|
39
|
+
|
|
40
|
+
import warp as wp
|
|
41
|
+
import warp.examples
|
|
42
|
+
import warp.optim
|
|
43
|
+
|
|
44
|
+
rng = np.random.default_rng(45)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def create_layer(dim_in, dim_hid, dtype=float):
|
|
48
|
+
w = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, dim_in))
|
|
49
|
+
b = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, 1))
|
|
50
|
+
|
|
51
|
+
weights = wp.array(w, dtype=dtype, requires_grad=True)
|
|
52
|
+
bias = wp.array(b, dtype=dtype, requires_grad=True)
|
|
53
|
+
|
|
54
|
+
return (weights, bias)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def create_array(dim_in, dim_hid, dtype=float):
|
|
58
|
+
s = rng.uniform(-1.0 / np.sqrt(dim_in), 1.0 / np.sqrt(dim_in), (dim_hid, dim_in))
|
|
59
|
+
a = wp.array(s, dtype=dtype, requires_grad=True)
|
|
60
|
+
|
|
61
|
+
return a
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# number of frequencies for the positional encoding
|
|
65
|
+
NUM_FREQ = wp.constant(8)
|
|
66
|
+
|
|
67
|
+
DIM_IN = wp.constant(4 * NUM_FREQ) # sin,cos for both x,y at each frequenecy
|
|
68
|
+
DIM_HID = 32
|
|
69
|
+
DIM_OUT = 3
|
|
70
|
+
|
|
71
|
+
# threads per-block
|
|
72
|
+
NUM_THREADS = 32
|
|
73
|
+
|
|
74
|
+
IMG_WIDTH = 512
|
|
75
|
+
IMG_HEIGHT = 512
|
|
76
|
+
|
|
77
|
+
BATCH_SIZE = min(1024, int((IMG_WIDTH * IMG_HEIGHT) / 8))
|
|
78
|
+
|
|
79
|
+
# dtype for our weights and bias matrices
|
|
80
|
+
dtype = wp.float16
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@wp.func
|
|
84
|
+
def relu(x: dtype):
|
|
85
|
+
return wp.max(x, dtype(0.0))
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@wp.kernel
|
|
89
|
+
def compute(
|
|
90
|
+
indices: wp.array(dtype=int),
|
|
91
|
+
weights_0: wp.array2d(dtype=dtype),
|
|
92
|
+
bias_0: wp.array2d(dtype=dtype),
|
|
93
|
+
weights_1: wp.array2d(dtype=dtype),
|
|
94
|
+
bias_1: wp.array2d(dtype=dtype),
|
|
95
|
+
weights_2: wp.array2d(dtype=dtype),
|
|
96
|
+
bias_2: wp.array2d(dtype=dtype),
|
|
97
|
+
weights_3: wp.array2d(dtype=dtype),
|
|
98
|
+
bias_3: wp.array2d(dtype=dtype),
|
|
99
|
+
reference: wp.array2d(dtype=float),
|
|
100
|
+
loss: wp.array1d(dtype=float),
|
|
101
|
+
out: wp.array2d(dtype=float),
|
|
102
|
+
):
|
|
103
|
+
# batch indices
|
|
104
|
+
linear = indices[wp.tid()]
|
|
105
|
+
|
|
106
|
+
row = linear / IMG_WIDTH
|
|
107
|
+
col = linear % IMG_WIDTH
|
|
108
|
+
|
|
109
|
+
# normalize input coordinates to [-1, 1]
|
|
110
|
+
x = (float(row) / float(IMG_WIDTH) - 0.5) * 2.0
|
|
111
|
+
y = (float(col) / float(IMG_HEIGHT) - 0.5) * 2.0
|
|
112
|
+
|
|
113
|
+
local = wp.vector(dtype=dtype, length=DIM_IN)
|
|
114
|
+
|
|
115
|
+
# construct positional encoding
|
|
116
|
+
for s in range(NUM_FREQ):
|
|
117
|
+
scale = wp.pow(2.0, float(s)) * wp.pi
|
|
118
|
+
|
|
119
|
+
# x-coord
|
|
120
|
+
local[s * 4 + 0] = dtype(wp.sin(x * scale))
|
|
121
|
+
local[s * 4 + 1] = dtype(wp.cos(x * scale))
|
|
122
|
+
# y-coord
|
|
123
|
+
local[s * 4 + 2] = dtype(wp.sin(y * scale))
|
|
124
|
+
local[s * 4 + 3] = dtype(wp.cos(y * scale))
|
|
125
|
+
|
|
126
|
+
# tile feature vectors across the block, returns [dim(f), NUM_THREADS]
|
|
127
|
+
f = wp.tile(local)
|
|
128
|
+
|
|
129
|
+
# input layer
|
|
130
|
+
w0 = wp.tile_load(weights_0, shape=(DIM_HID, DIM_IN))
|
|
131
|
+
b0 = wp.tile_load(bias_0, shape=(DIM_HID, 1))
|
|
132
|
+
z = wp.tile_map(relu, wp.tile_matmul(w0, f) + wp.tile_broadcast(b0, shape=(DIM_HID, NUM_THREADS)))
|
|
133
|
+
|
|
134
|
+
# hidden layer
|
|
135
|
+
w1 = wp.tile_load(weights_1, shape=(DIM_HID, DIM_HID))
|
|
136
|
+
b1 = wp.tile_load(bias_1, shape=(DIM_HID, 1))
|
|
137
|
+
z = wp.tile_map(relu, wp.tile_matmul(w1, z) + wp.tile_broadcast(b1, shape=(DIM_HID, NUM_THREADS)))
|
|
138
|
+
|
|
139
|
+
w2 = wp.tile_load(weights_2, shape=(DIM_HID, DIM_HID))
|
|
140
|
+
b2 = wp.tile_load(bias_2, shape=(DIM_HID, 1))
|
|
141
|
+
z = wp.tile_map(relu, wp.tile_matmul(w2, z) + wp.tile_broadcast(b2, shape=(DIM_HID, NUM_THREADS)))
|
|
142
|
+
|
|
143
|
+
# output layer
|
|
144
|
+
w3 = wp.tile_load(weights_3, shape=(DIM_OUT, DIM_HID))
|
|
145
|
+
b3 = wp.tile_load(bias_3, shape=(DIM_OUT, 1))
|
|
146
|
+
o = wp.tile_map(relu, wp.tile_matmul(w3, z) + wp.tile_broadcast(b3, shape=(DIM_OUT, NUM_THREADS)))
|
|
147
|
+
|
|
148
|
+
# untile back to SIMT
|
|
149
|
+
output = wp.untile(o)
|
|
150
|
+
|
|
151
|
+
# compute error
|
|
152
|
+
error = wp.vec3(
|
|
153
|
+
float(output[0]) - reference[0, linear],
|
|
154
|
+
float(output[1]) - reference[1, linear],
|
|
155
|
+
float(output[2]) - reference[2, linear],
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
# write MSE loss
|
|
159
|
+
if loss:
|
|
160
|
+
wp.atomic_add(loss, 0, wp.length_sq(error) / float(3 * BATCH_SIZE))
|
|
161
|
+
|
|
162
|
+
# write image output
|
|
163
|
+
if out:
|
|
164
|
+
for i in range(DIM_OUT):
|
|
165
|
+
out[i, linear] = float(output[i])
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class Example:
|
|
169
|
+
def __init__(self, train_iters):
|
|
170
|
+
self.weights_0, self.bias_0 = create_layer(DIM_IN, DIM_HID, dtype=dtype)
|
|
171
|
+
self.weights_1, self.bias_1 = create_layer(DIM_HID, DIM_HID, dtype=dtype)
|
|
172
|
+
self.weights_2, self.bias_2 = create_layer(DIM_HID, DIM_HID, dtype=dtype)
|
|
173
|
+
self.weights_3, self.bias_3 = create_layer(DIM_HID, DIM_OUT, dtype=dtype)
|
|
174
|
+
|
|
175
|
+
# reference
|
|
176
|
+
reference_path = os.path.join(wp.examples.get_asset_directory(), "pixel.jpg")
|
|
177
|
+
with Image.open(reference_path) as im:
|
|
178
|
+
reference_image = np.asarray(im.resize((IMG_WIDTH, IMG_HEIGHT)).convert("RGB")) / 255.0
|
|
179
|
+
self.reference = wp.array(reference_image.reshape(IMG_WIDTH * IMG_HEIGHT, 3).T, dtype=float)
|
|
180
|
+
|
|
181
|
+
# create randomized batch indices
|
|
182
|
+
indices = np.arange(0, IMG_WIDTH * IMG_HEIGHT, dtype=np.int32)
|
|
183
|
+
rng.shuffle(indices)
|
|
184
|
+
self.indices = wp.array(indices)
|
|
185
|
+
|
|
186
|
+
self.num_batches = int((IMG_WIDTH * IMG_HEIGHT) / BATCH_SIZE)
|
|
187
|
+
self.max_iters = train_iters
|
|
188
|
+
self.max_epochs = max(1, int(self.max_iters / self.num_batches))
|
|
189
|
+
|
|
190
|
+
def train_warp(self):
|
|
191
|
+
params = [
|
|
192
|
+
self.weights_0,
|
|
193
|
+
self.bias_0,
|
|
194
|
+
self.weights_1,
|
|
195
|
+
self.bias_1,
|
|
196
|
+
self.weights_2,
|
|
197
|
+
self.bias_2,
|
|
198
|
+
self.weights_3,
|
|
199
|
+
self.bias_3,
|
|
200
|
+
]
|
|
201
|
+
|
|
202
|
+
optimizer_grads = [p.grad.flatten() for p in params]
|
|
203
|
+
optimizer_inputs = [p.flatten() for p in params]
|
|
204
|
+
optimizer = warp.optim.Adam(optimizer_inputs, lr=0.01)
|
|
205
|
+
|
|
206
|
+
loss = wp.zeros(1, dtype=float, requires_grad=True)
|
|
207
|
+
output = create_array(IMG_WIDTH * IMG_HEIGHT, DIM_OUT)
|
|
208
|
+
|
|
209
|
+
# capture graph for whole epoch
|
|
210
|
+
wp.capture_begin()
|
|
211
|
+
|
|
212
|
+
for b in range(0, IMG_WIDTH * IMG_HEIGHT, BATCH_SIZE):
|
|
213
|
+
loss.zero_()
|
|
214
|
+
|
|
215
|
+
with wp.Tape() as tape:
|
|
216
|
+
wp.launch(
|
|
217
|
+
compute,
|
|
218
|
+
dim=[BATCH_SIZE],
|
|
219
|
+
inputs=[
|
|
220
|
+
self.indices[b : b + BATCH_SIZE],
|
|
221
|
+
self.weights_0,
|
|
222
|
+
self.bias_0,
|
|
223
|
+
self.weights_1,
|
|
224
|
+
self.bias_1,
|
|
225
|
+
self.weights_2,
|
|
226
|
+
self.bias_2,
|
|
227
|
+
self.weights_3,
|
|
228
|
+
self.bias_3,
|
|
229
|
+
self.reference,
|
|
230
|
+
loss,
|
|
231
|
+
None,
|
|
232
|
+
],
|
|
233
|
+
block_dim=NUM_THREADS,
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
tape.backward(loss)
|
|
237
|
+
optimizer.step(optimizer_grads)
|
|
238
|
+
tape.zero()
|
|
239
|
+
|
|
240
|
+
graph = wp.capture_end()
|
|
241
|
+
|
|
242
|
+
with wp.ScopedTimer("Training"):
|
|
243
|
+
for i in range(self.max_epochs):
|
|
244
|
+
with wp.ScopedTimer("Epoch"):
|
|
245
|
+
wp.capture_launch(graph)
|
|
246
|
+
print(f"Epoch: {i} Loss: {loss.numpy()}")
|
|
247
|
+
|
|
248
|
+
# evaluate full image
|
|
249
|
+
wp.launch(
|
|
250
|
+
compute,
|
|
251
|
+
dim=[IMG_WIDTH * IMG_HEIGHT],
|
|
252
|
+
inputs=[
|
|
253
|
+
self.indices,
|
|
254
|
+
self.weights_0,
|
|
255
|
+
self.bias_0,
|
|
256
|
+
self.weights_1,
|
|
257
|
+
self.bias_1,
|
|
258
|
+
self.weights_2,
|
|
259
|
+
self.bias_2,
|
|
260
|
+
self.weights_3,
|
|
261
|
+
self.bias_3,
|
|
262
|
+
self.reference,
|
|
263
|
+
loss,
|
|
264
|
+
output,
|
|
265
|
+
],
|
|
266
|
+
block_dim=NUM_THREADS,
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
self.save_image("example_tile_mlp.jpg", output.numpy())
|
|
270
|
+
|
|
271
|
+
def train_torch(self):
|
|
272
|
+
import torch as tc
|
|
273
|
+
|
|
274
|
+
weights_0 = tc.nn.Parameter(wp.to_torch(self.weights_0))
|
|
275
|
+
weights_1 = tc.nn.Parameter(wp.to_torch(self.weights_1))
|
|
276
|
+
weights_2 = tc.nn.Parameter(wp.to_torch(self.weights_2))
|
|
277
|
+
weights_3 = tc.nn.Parameter(wp.to_torch(self.weights_3))
|
|
278
|
+
|
|
279
|
+
bias_0 = tc.nn.Parameter(wp.to_torch(self.bias_0))
|
|
280
|
+
bias_1 = tc.nn.Parameter(wp.to_torch(self.bias_1))
|
|
281
|
+
bias_2 = tc.nn.Parameter(wp.to_torch(self.bias_2))
|
|
282
|
+
bias_3 = tc.nn.Parameter(wp.to_torch(self.bias_3))
|
|
283
|
+
|
|
284
|
+
indices = wp.to_torch(self.indices)
|
|
285
|
+
reference = wp.to_torch(self.reference)
|
|
286
|
+
|
|
287
|
+
optimizer = tc.optim.Adam(
|
|
288
|
+
[weights_0, bias_0, weights_1, bias_1, weights_2, bias_2, weights_3, bias_3],
|
|
289
|
+
capturable=True,
|
|
290
|
+
lr=0.0001,
|
|
291
|
+
betas=(0.9, 0.95),
|
|
292
|
+
eps=1.0e-6,
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
# generate frequency space encoding of pixels
|
|
296
|
+
# based on their linear index in the image
|
|
297
|
+
def encode(linear):
|
|
298
|
+
row = (linear // IMG_WIDTH).float()
|
|
299
|
+
col = (linear % IMG_WIDTH).float()
|
|
300
|
+
|
|
301
|
+
x = (row / float(IMG_WIDTH) - 0.5) * 2.0
|
|
302
|
+
y = (col / float(IMG_HEIGHT) - 0.5) * 2.0
|
|
303
|
+
|
|
304
|
+
encoding = tc.zeros((NUM_FREQ * 4, len(linear)), dtype=tc.float16, device="cuda")
|
|
305
|
+
|
|
306
|
+
for s in range(NUM_FREQ):
|
|
307
|
+
scale = math.pow(2.0, float(s)) * math.pi
|
|
308
|
+
|
|
309
|
+
# Directly write the computed values into the encoding tensor
|
|
310
|
+
encoding[s * 4 + 0, :] = tc.sin(scale * x)
|
|
311
|
+
encoding[s * 4 + 1, :] = tc.cos(scale * x)
|
|
312
|
+
encoding[s * 4 + 2, :] = tc.sin(scale * y)
|
|
313
|
+
encoding[s * 4 + 3, :] = tc.cos(scale * y)
|
|
314
|
+
|
|
315
|
+
return encoding
|
|
316
|
+
|
|
317
|
+
stream = tc.cuda.Stream()
|
|
318
|
+
graph = tc.cuda.CUDAGraph()
|
|
319
|
+
|
|
320
|
+
# warm-up
|
|
321
|
+
with tc.cuda.stream(stream):
|
|
322
|
+
f = tc.rand((NUM_FREQ * 4, BATCH_SIZE), dtype=tc.float16, device="cuda")
|
|
323
|
+
z = tc.relu(weights_0 @ f + bias_0)
|
|
324
|
+
z = tc.relu(weights_1 @ z + bias_1)
|
|
325
|
+
z = tc.relu(weights_2 @ z + bias_2)
|
|
326
|
+
z = tc.relu(weights_3 @ z + bias_3)
|
|
327
|
+
ref = tc.rand((3, BATCH_SIZE), dtype=tc.float16, device="cuda")
|
|
328
|
+
loss = tc.mean((z - ref) ** 2)
|
|
329
|
+
optimizer.zero_grad()
|
|
330
|
+
loss.backward()
|
|
331
|
+
optimizer.step()
|
|
332
|
+
|
|
333
|
+
with tc.cuda.graph(graph):
|
|
334
|
+
for b in range(0, IMG_WIDTH * IMG_HEIGHT, BATCH_SIZE):
|
|
335
|
+
linear = indices[b : b + BATCH_SIZE]
|
|
336
|
+
|
|
337
|
+
f = encode(linear)
|
|
338
|
+
|
|
339
|
+
z = tc.relu(weights_0 @ f + bias_0)
|
|
340
|
+
z = tc.relu(weights_1 @ z + bias_1)
|
|
341
|
+
z = tc.relu(weights_2 @ z + bias_2)
|
|
342
|
+
z = tc.relu(weights_3 @ z + bias_3)
|
|
343
|
+
|
|
344
|
+
ref = reference[:, linear]
|
|
345
|
+
loss = tc.mean((z - ref) ** 2)
|
|
346
|
+
|
|
347
|
+
optimizer.zero_grad()
|
|
348
|
+
loss.backward()
|
|
349
|
+
optimizer.step()
|
|
350
|
+
|
|
351
|
+
with wp.ScopedTimer("Training (Torch)"):
|
|
352
|
+
for _i in range(self.max_epochs):
|
|
353
|
+
with wp.ScopedTimer("Epoch"):
|
|
354
|
+
graph.replay()
|
|
355
|
+
|
|
356
|
+
print(loss)
|
|
357
|
+
|
|
358
|
+
f = encode(tc.arange(0, IMG_WIDTH * IMG_HEIGHT))
|
|
359
|
+
z = tc.relu(weights_0 @ f + bias_0)
|
|
360
|
+
z = tc.relu(weights_1 @ z + bias_1)
|
|
361
|
+
z = tc.relu(weights_2 @ z + bias_2)
|
|
362
|
+
z = tc.relu(weights_3 @ z + bias_3)
|
|
363
|
+
|
|
364
|
+
self.save_image("example_tile_mlp_torch.jpg", z.detach().cpu().numpy())
|
|
365
|
+
|
|
366
|
+
def save_image(self, name, output):
|
|
367
|
+
predicted_image = output.T.reshape(IMG_WIDTH, IMG_HEIGHT, 3)
|
|
368
|
+
predicted_image = (predicted_image * 255).astype(np.uint8)
|
|
369
|
+
|
|
370
|
+
predicted_image_pil = Image.fromarray(predicted_image)
|
|
371
|
+
predicted_image_pil.save(name)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
if __name__ == "__main__":
|
|
375
|
+
import argparse
|
|
376
|
+
|
|
377
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
378
|
+
parser.add_argument("--train_iters", type=int, default=20000, help="Total number of training iterations.")
|
|
379
|
+
|
|
380
|
+
args = parser.parse_known_args()[0]
|
|
381
|
+
|
|
382
|
+
with wp.ScopedDevice("cuda:0"):
|
|
383
|
+
example = Example(args.train_iters)
|
|
384
|
+
example.train_warp()
|
|
385
|
+
# example.train_torch()
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
###########################################################################
|
|
17
|
+
# Example N-Body
|
|
18
|
+
#
|
|
19
|
+
# Shows how to simulate an N-Body gravitational problem using an all-pairs
|
|
20
|
+
# approach with Warp tile primitives.
|
|
21
|
+
#
|
|
22
|
+
# References:
|
|
23
|
+
# L. Nyland, M. Harris, and J. Prins. "Fast N-Body Simulation with
|
|
24
|
+
# CUDA" in GPU Gems 3. H. Nguyen, Addison-Wesley Professional, 2007.
|
|
25
|
+
# https://developer.nvidia.com/gpugems/gpugems3/part-v-physics-simulation/chapter-31-fast-n-body-simulation-cuda
|
|
26
|
+
#
|
|
27
|
+
###########################################################################
|
|
28
|
+
|
|
29
|
+
import argparse
|
|
30
|
+
|
|
31
|
+
import numpy as np
|
|
32
|
+
|
|
33
|
+
import warp as wp
|
|
34
|
+
|
|
35
|
+
wp.init()
|
|
36
|
+
|
|
37
|
+
DT = wp.constant(0.016)
|
|
38
|
+
SOFTENING_SQ = wp.constant(0.1**2) # Softening factor for numerical stability
|
|
39
|
+
TILE_SIZE = wp.constant(64)
|
|
40
|
+
PARTICLE_MASS = wp.constant(1.0)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@wp.func
|
|
44
|
+
def body_body_interaction(p0: wp.vec3, pi: wp.vec3):
|
|
45
|
+
"""Return the acceleration of the particle at position `p0` due to the
|
|
46
|
+
particle at position `pi`."""
|
|
47
|
+
r = pi - p0
|
|
48
|
+
|
|
49
|
+
dist_sq = wp.length_sq(r) + SOFTENING_SQ
|
|
50
|
+
|
|
51
|
+
inv_dist = 1.0 / wp.sqrt(dist_sq)
|
|
52
|
+
inv_dist_cubed = inv_dist * inv_dist * inv_dist
|
|
53
|
+
|
|
54
|
+
acc = PARTICLE_MASS * inv_dist_cubed * r
|
|
55
|
+
|
|
56
|
+
return acc
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@wp.kernel
|
|
60
|
+
def integrate_bodies_tiled(
|
|
61
|
+
old_position: wp.array(dtype=wp.vec3),
|
|
62
|
+
velocity: wp.array(dtype=wp.vec3),
|
|
63
|
+
new_position: wp.array(dtype=wp.vec3),
|
|
64
|
+
num_bodies: int,
|
|
65
|
+
):
|
|
66
|
+
i = wp.tid()
|
|
67
|
+
|
|
68
|
+
p0 = old_position[i]
|
|
69
|
+
|
|
70
|
+
accel = wp.vec3(0.0, 0.0, 0.0)
|
|
71
|
+
|
|
72
|
+
for k in range(num_bodies / TILE_SIZE):
|
|
73
|
+
k_tile = wp.tile_load(old_position, shape=TILE_SIZE, offset=k * TILE_SIZE)
|
|
74
|
+
for idx in range(TILE_SIZE):
|
|
75
|
+
pi = k_tile[idx]
|
|
76
|
+
accel += body_body_interaction(p0, pi)
|
|
77
|
+
|
|
78
|
+
# Advance the velocity one timestep (in-place)
|
|
79
|
+
velocity[i] = velocity[i] + accel * DT
|
|
80
|
+
|
|
81
|
+
# Advance the positions (using a second array)
|
|
82
|
+
new_position[i] = old_position[i] + DT * velocity[i]
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Example:
|
|
86
|
+
def __init__(self, headless=False, num_bodies=16384):
|
|
87
|
+
self.num_bodies = num_bodies
|
|
88
|
+
|
|
89
|
+
rng = np.random.default_rng(42)
|
|
90
|
+
|
|
91
|
+
# Sample the surface of a sphere
|
|
92
|
+
phi = np.arccos(1.0 - 2.0 * rng.uniform(low=0.0, high=1.0, size=self.num_bodies))
|
|
93
|
+
theta = rng.uniform(low=0.0, high=2.0 * np.pi, size=self.num_bodies)
|
|
94
|
+
x = np.cos(theta) * np.sin(phi)
|
|
95
|
+
y = np.sin(theta) * np.sin(phi)
|
|
96
|
+
z = np.cos(phi)
|
|
97
|
+
init_pos_np = np.stack((x, y, z), axis=1)
|
|
98
|
+
|
|
99
|
+
scale = (num_bodies / 1024) ** (1 / 2) # Scale factor to maintain a constant density
|
|
100
|
+
inner = 0.9625 * scale
|
|
101
|
+
outer = 1.54 * scale
|
|
102
|
+
radii = inner + (outer - inner) * rng.uniform(size=(self.num_bodies, 1))
|
|
103
|
+
init_pos_np = init_pos_np * radii
|
|
104
|
+
|
|
105
|
+
axis = np.array([0.0, 0.0, 1.0])
|
|
106
|
+
v_scale = scale * 3.08
|
|
107
|
+
init_vel_np = v_scale * np.cross(init_pos_np, axis)
|
|
108
|
+
|
|
109
|
+
self.graph_scale = np.max(radii) * 5.0
|
|
110
|
+
self.pos_array_0 = wp.array(init_pos_np, dtype=wp.vec3)
|
|
111
|
+
self.pos_array_1 = wp.empty_like(self.pos_array_0)
|
|
112
|
+
self.vel_array = wp.array(init_vel_np, dtype=wp.vec3)
|
|
113
|
+
|
|
114
|
+
if headless:
|
|
115
|
+
self.scatter_plot = None
|
|
116
|
+
else:
|
|
117
|
+
self.scatter_plot = self.create_plot()
|
|
118
|
+
|
|
119
|
+
def create_plot(self):
|
|
120
|
+
import matplotlib.pyplot as plt
|
|
121
|
+
|
|
122
|
+
# Create a figure and a 3D axis for the plot
|
|
123
|
+
self.fig = plt.figure()
|
|
124
|
+
ax = self.fig.add_subplot(111, projection="3d")
|
|
125
|
+
|
|
126
|
+
# Scatter plot of initial positions
|
|
127
|
+
point_size = 0.05 * self.graph_scale
|
|
128
|
+
init_pos_np = self.pos_array_0.numpy()
|
|
129
|
+
scatter_plot = ax.scatter(
|
|
130
|
+
init_pos_np[:, 0], init_pos_np[:, 1], init_pos_np[:, 2], s=point_size, c="#76b900", alpha=0.5
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Set axis limits
|
|
134
|
+
ax.set_xlim(-self.graph_scale, self.graph_scale)
|
|
135
|
+
ax.set_ylim(-self.graph_scale, self.graph_scale)
|
|
136
|
+
ax.set_zlim(-self.graph_scale, self.graph_scale)
|
|
137
|
+
|
|
138
|
+
return scatter_plot
|
|
139
|
+
|
|
140
|
+
def step(self):
|
|
141
|
+
wp.launch(
|
|
142
|
+
integrate_bodies_tiled,
|
|
143
|
+
dim=self.num_bodies,
|
|
144
|
+
inputs=[self.pos_array_0, self.vel_array, self.pos_array_1, self.num_bodies],
|
|
145
|
+
block_dim=TILE_SIZE,
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# Swap arrays
|
|
149
|
+
(self.pos_array_0, self.pos_array_1) = (self.pos_array_1, self.pos_array_0)
|
|
150
|
+
|
|
151
|
+
def render(self):
|
|
152
|
+
positions_cpu = self.pos_array_0.numpy()
|
|
153
|
+
|
|
154
|
+
# Update scatter plot positions
|
|
155
|
+
self.scatter_plot._offsets3d = (
|
|
156
|
+
positions_cpu[:, 0],
|
|
157
|
+
positions_cpu[:, 1],
|
|
158
|
+
positions_cpu[:, 2],
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Function to update the scatter plot
|
|
162
|
+
def step_and_render(self, frame):
|
|
163
|
+
self.step()
|
|
164
|
+
self.render()
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
if __name__ == "__main__":
|
|
168
|
+
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
|
169
|
+
parser.add_argument("--device", type=str, default=None, help="Override the default Warp device.")
|
|
170
|
+
parser.add_argument("--num_frames", type=int, default=1000, help="Total number of frames.")
|
|
171
|
+
parser.add_argument("-N", help="Number of bodies. Should be a multiple of 64.", type=int, default=16384)
|
|
172
|
+
parser.add_argument(
|
|
173
|
+
"--headless",
|
|
174
|
+
action="store_true",
|
|
175
|
+
help="Run in headless mode, suppressing the opening of any graphical windows.",
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
args = parser.parse_known_args()[0]
|
|
179
|
+
|
|
180
|
+
if args.device == "cpu":
|
|
181
|
+
print("This example only runs on CUDA devices.")
|
|
182
|
+
exit()
|
|
183
|
+
|
|
184
|
+
with wp.ScopedDevice(args.device):
|
|
185
|
+
example = Example(headless=args.headless, num_bodies=args.N)
|
|
186
|
+
|
|
187
|
+
if not args.headless:
|
|
188
|
+
import matplotlib.pyplot as plt
|
|
189
|
+
from matplotlib.animation import FuncAnimation
|
|
190
|
+
|
|
191
|
+
# Create the animation
|
|
192
|
+
ani = FuncAnimation(example.fig, example.step_and_render, frames=args.num_frames, interval=50, repeat=False)
|
|
193
|
+
|
|
194
|
+
# Display the animation
|
|
195
|
+
plt.show()
|
|
196
|
+
|
|
197
|
+
else:
|
|
198
|
+
for _ in range(args.num_frames):
|
|
199
|
+
example.step()
|
warp/fabric.py
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
#
|
|
4
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
# you may not use this file except in compliance with the License.
|
|
6
|
+
# You may obtain a copy of the License at
|
|
7
|
+
#
|
|
8
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
#
|
|
10
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
# See the License for the specific language governing permissions and
|
|
14
|
+
# limitations under the License.
|
|
15
|
+
|
|
16
|
+
# TODO: Remove after cleaning up the public API.
|
|
17
|
+
|
|
18
|
+
from warp._src import fabric as _fabric
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def __getattr__(name):
|
|
22
|
+
from warp._src.utils import get_deprecated_api
|
|
23
|
+
|
|
24
|
+
return get_deprecated_api(_fabric, "wp", name)
|