PyPI - warp-lang - Versions diffs - 1.10.0__py3-none-macosx_11_0_arm64.whl - Mend

warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show

warp/__init__.py +334 -0
warp/__init__.pyi +5856 -0
warp/_src/__init__.py +14 -0
warp/_src/autograd.py +1077 -0
warp/_src/build.py +620 -0
warp/_src/build_dll.py +642 -0
warp/_src/builtins.py +10555 -0
warp/_src/codegen.py +4361 -0
warp/_src/config.py +178 -0
warp/_src/constants.py +59 -0
warp/_src/context.py +8352 -0
warp/_src/dlpack.py +464 -0
warp/_src/fabric.py +362 -0
warp/_src/fem/__init__.py +14 -0
warp/_src/fem/adaptivity.py +510 -0
warp/_src/fem/cache.py +689 -0
warp/_src/fem/dirichlet.py +190 -0
warp/_src/fem/domain.py +553 -0
warp/_src/fem/field/__init__.py +131 -0
warp/_src/fem/field/field.py +703 -0
warp/_src/fem/field/nodal_field.py +403 -0
warp/_src/fem/field/restriction.py +39 -0
warp/_src/fem/field/virtual.py +1021 -0
warp/_src/fem/geometry/__init__.py +32 -0
warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
warp/_src/fem/geometry/closest_point.py +99 -0
warp/_src/fem/geometry/deformed_geometry.py +277 -0
warp/_src/fem/geometry/element.py +854 -0
warp/_src/fem/geometry/geometry.py +693 -0
warp/_src/fem/geometry/grid_2d.py +478 -0
warp/_src/fem/geometry/grid_3d.py +539 -0
warp/_src/fem/geometry/hexmesh.py +956 -0
warp/_src/fem/geometry/nanogrid.py +660 -0
warp/_src/fem/geometry/partition.py +483 -0
warp/_src/fem/geometry/quadmesh.py +597 -0
warp/_src/fem/geometry/tetmesh.py +762 -0
warp/_src/fem/geometry/trimesh.py +588 -0
warp/_src/fem/integrate.py +2507 -0
warp/_src/fem/linalg.py +385 -0
warp/_src/fem/operator.py +398 -0
warp/_src/fem/polynomial.py +231 -0
warp/_src/fem/quadrature/__init__.py +17 -0
warp/_src/fem/quadrature/pic_quadrature.py +318 -0
warp/_src/fem/quadrature/quadrature.py +665 -0
warp/_src/fem/space/__init__.py +248 -0
warp/_src/fem/space/basis_function_space.py +499 -0
warp/_src/fem/space/basis_space.py +681 -0
warp/_src/fem/space/dof_mapper.py +253 -0
warp/_src/fem/space/function_space.py +312 -0
warp/_src/fem/space/grid_2d_function_space.py +179 -0
warp/_src/fem/space/grid_3d_function_space.py +229 -0
warp/_src/fem/space/hexmesh_function_space.py +255 -0
warp/_src/fem/space/nanogrid_function_space.py +199 -0
warp/_src/fem/space/partition.py +435 -0
warp/_src/fem/space/quadmesh_function_space.py +222 -0
warp/_src/fem/space/restriction.py +221 -0
warp/_src/fem/space/shape/__init__.py +152 -0
warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
warp/_src/fem/space/shape/shape_function.py +134 -0
warp/_src/fem/space/shape/square_shape_function.py +928 -0
warp/_src/fem/space/shape/tet_shape_function.py +829 -0
warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
warp/_src/fem/space/tetmesh_function_space.py +270 -0
warp/_src/fem/space/topology.py +461 -0
warp/_src/fem/space/trimesh_function_space.py +193 -0
warp/_src/fem/types.py +114 -0
warp/_src/fem/utils.py +488 -0
warp/_src/jax.py +188 -0
warp/_src/jax_experimental/__init__.py +14 -0
warp/_src/jax_experimental/custom_call.py +389 -0
warp/_src/jax_experimental/ffi.py +1286 -0
warp/_src/jax_experimental/xla_ffi.py +658 -0
warp/_src/marching_cubes.py +710 -0
warp/_src/math.py +416 -0
warp/_src/optim/__init__.py +14 -0
warp/_src/optim/adam.py +165 -0
warp/_src/optim/linear.py +1608 -0
warp/_src/optim/sgd.py +114 -0
warp/_src/paddle.py +408 -0
warp/_src/render/__init__.py +14 -0
warp/_src/render/imgui_manager.py +291 -0
warp/_src/render/render_opengl.py +3638 -0
warp/_src/render/render_usd.py +939 -0
warp/_src/render/utils.py +162 -0
warp/_src/sparse.py +2718 -0
warp/_src/tape.py +1208 -0
warp/_src/thirdparty/__init__.py +0 -0
warp/_src/thirdparty/appdirs.py +598 -0
warp/_src/thirdparty/dlpack.py +145 -0
warp/_src/thirdparty/unittest_parallel.py +676 -0
warp/_src/torch.py +393 -0
warp/_src/types.py +5888 -0
warp/_src/utils.py +1695 -0
warp/autograd.py +33 -0
warp/bin/libwarp-clang.dylib +0 -0
warp/bin/libwarp.dylib +0 -0
warp/build.py +29 -0
warp/build_dll.py +24 -0
warp/codegen.py +24 -0
warp/constants.py +24 -0
warp/context.py +33 -0
warp/dlpack.py +24 -0
warp/examples/__init__.py +24 -0
warp/examples/assets/bear.usd +0 -0
warp/examples/assets/bunny.usd +0 -0
warp/examples/assets/cube.usd +0 -0
warp/examples/assets/nonuniform.usd +0 -0
warp/examples/assets/nvidia_logo.png +0 -0
warp/examples/assets/pixel.jpg +0 -0
warp/examples/assets/rocks.nvdb +0 -0
warp/examples/assets/rocks.usd +0 -0
warp/examples/assets/sphere.usd +0 -0
warp/examples/assets/square_cloth.usd +0 -0
warp/examples/benchmarks/benchmark_api.py +389 -0
warp/examples/benchmarks/benchmark_cloth.py +296 -0
warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
warp/examples/benchmarks/benchmark_gemm.py +164 -0
warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
warp/examples/benchmarks/benchmark_launches.py +301 -0
warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
warp/examples/browse.py +37 -0
warp/examples/core/example_cupy.py +86 -0
warp/examples/core/example_dem.py +241 -0
warp/examples/core/example_fluid.py +299 -0
warp/examples/core/example_graph_capture.py +150 -0
warp/examples/core/example_marching_cubes.py +195 -0
warp/examples/core/example_mesh.py +180 -0
warp/examples/core/example_mesh_intersect.py +211 -0
warp/examples/core/example_nvdb.py +182 -0
warp/examples/core/example_raycast.py +111 -0
warp/examples/core/example_raymarch.py +205 -0
warp/examples/core/example_render_opengl.py +290 -0
warp/examples/core/example_sample_mesh.py +300 -0
warp/examples/core/example_sph.py +411 -0
warp/examples/core/example_spin_lock.py +93 -0
warp/examples/core/example_torch.py +211 -0
warp/examples/core/example_wave.py +269 -0
warp/examples/core/example_work_queue.py +118 -0
warp/examples/distributed/example_jacobi_mpi.py +506 -0
warp/examples/fem/example_adaptive_grid.py +286 -0
warp/examples/fem/example_apic_fluid.py +469 -0
warp/examples/fem/example_burgers.py +261 -0
warp/examples/fem/example_convection_diffusion.py +181 -0
warp/examples/fem/example_convection_diffusion_dg.py +225 -0
warp/examples/fem/example_darcy_ls_optimization.py +489 -0
warp/examples/fem/example_deformed_geometry.py +172 -0
warp/examples/fem/example_diffusion.py +196 -0
warp/examples/fem/example_diffusion_3d.py +225 -0
warp/examples/fem/example_diffusion_mgpu.py +225 -0
warp/examples/fem/example_distortion_energy.py +228 -0
warp/examples/fem/example_elastic_shape_optimization.py +387 -0
warp/examples/fem/example_magnetostatics.py +242 -0
warp/examples/fem/example_mixed_elasticity.py +293 -0
warp/examples/fem/example_navier_stokes.py +263 -0
warp/examples/fem/example_nonconforming_contact.py +300 -0
warp/examples/fem/example_stokes.py +213 -0
warp/examples/fem/example_stokes_transfer.py +262 -0
warp/examples/fem/example_streamlines.py +357 -0
warp/examples/fem/utils.py +1047 -0
warp/examples/interop/example_jax_callable.py +146 -0
warp/examples/interop/example_jax_ffi_callback.py +132 -0
warp/examples/interop/example_jax_kernel.py +232 -0
warp/examples/optim/example_diffray.py +561 -0
warp/examples/optim/example_fluid_checkpoint.py +497 -0
warp/examples/tile/example_tile_block_cholesky.py +502 -0
warp/examples/tile/example_tile_cholesky.py +88 -0
warp/examples/tile/example_tile_convolution.py +66 -0
warp/examples/tile/example_tile_fft.py +55 -0
warp/examples/tile/example_tile_filtering.py +113 -0
warp/examples/tile/example_tile_matmul.py +85 -0
warp/examples/tile/example_tile_mcgp.py +191 -0
warp/examples/tile/example_tile_mlp.py +385 -0
warp/examples/tile/example_tile_nbody.py +199 -0
warp/fabric.py +24 -0
warp/fem/__init__.py +173 -0
warp/fem/adaptivity.py +26 -0
warp/fem/cache.py +30 -0
warp/fem/dirichlet.py +24 -0
warp/fem/field/__init__.py +24 -0
warp/fem/field/field.py +26 -0
warp/fem/geometry/__init__.py +21 -0
warp/fem/geometry/closest_point.py +31 -0
warp/fem/linalg.py +38 -0
warp/fem/operator.py +32 -0
warp/fem/polynomial.py +29 -0
warp/fem/space/__init__.py +22 -0
warp/fem/space/basis_space.py +24 -0
warp/fem/space/shape/__init__.py +68 -0
warp/fem/space/topology.py +24 -0
warp/fem/types.py +24 -0
warp/fem/utils.py +32 -0
warp/jax.py +29 -0
warp/jax_experimental/__init__.py +29 -0
warp/jax_experimental/custom_call.py +29 -0
warp/jax_experimental/ffi.py +39 -0
warp/jax_experimental/xla_ffi.py +24 -0
warp/marching_cubes.py +24 -0
warp/math.py +37 -0
warp/native/array.h +1687 -0
warp/native/builtin.h +2327 -0
warp/native/bvh.cpp +562 -0
warp/native/bvh.cu +826 -0
warp/native/bvh.h +555 -0
warp/native/clang/clang.cpp +541 -0
warp/native/coloring.cpp +622 -0
warp/native/crt.cpp +51 -0
warp/native/crt.h +568 -0
warp/native/cuda_crt.h +1058 -0
warp/native/cuda_util.cpp +677 -0
warp/native/cuda_util.h +313 -0
warp/native/error.cpp +77 -0
warp/native/error.h +36 -0
warp/native/exports.h +2023 -0
warp/native/fabric.h +246 -0
warp/native/hashgrid.cpp +311 -0
warp/native/hashgrid.cu +89 -0
warp/native/hashgrid.h +240 -0
warp/native/initializer_array.h +41 -0
warp/native/intersect.h +1253 -0
warp/native/intersect_adj.h +375 -0
warp/native/intersect_tri.h +348 -0
warp/native/mat.h +5189 -0
warp/native/mathdx.cpp +93 -0
warp/native/matnn.h +221 -0
warp/native/mesh.cpp +266 -0
warp/native/mesh.cu +406 -0
warp/native/mesh.h +2097 -0
warp/native/nanovdb/GridHandle.h +533 -0
warp/native/nanovdb/HostBuffer.h +591 -0
warp/native/nanovdb/NanoVDB.h +6246 -0
warp/native/nanovdb/NodeManager.h +323 -0
warp/native/nanovdb/PNanoVDB.h +3390 -0
warp/native/noise.h +859 -0
warp/native/quat.h +1664 -0
warp/native/rand.h +342 -0
warp/native/range.h +145 -0
warp/native/reduce.cpp +174 -0
warp/native/reduce.cu +363 -0
warp/native/runlength_encode.cpp +79 -0
warp/native/runlength_encode.cu +61 -0
warp/native/scan.cpp +47 -0
warp/native/scan.cu +55 -0
warp/native/scan.h +23 -0
warp/native/solid_angle.h +466 -0
warp/native/sort.cpp +251 -0
warp/native/sort.cu +286 -0
warp/native/sort.h +35 -0
warp/native/sparse.cpp +241 -0
warp/native/sparse.cu +435 -0
warp/native/spatial.h +1306 -0
warp/native/svd.h +727 -0
warp/native/temp_buffer.h +46 -0
warp/native/tile.h +4124 -0
warp/native/tile_radix_sort.h +1112 -0
warp/native/tile_reduce.h +838 -0
warp/native/tile_scan.h +240 -0
warp/native/tuple.h +189 -0
warp/native/vec.h +2199 -0
warp/native/version.h +23 -0
warp/native/volume.cpp +501 -0
warp/native/volume.cu +68 -0
warp/native/volume.h +970 -0
warp/native/volume_builder.cu +483 -0
warp/native/volume_builder.h +52 -0
warp/native/volume_impl.h +70 -0
warp/native/warp.cpp +1143 -0
warp/native/warp.cu +4604 -0
warp/native/warp.h +358 -0
warp/optim/__init__.py +20 -0
warp/optim/adam.py +24 -0
warp/optim/linear.py +35 -0
warp/optim/sgd.py +24 -0
warp/paddle.py +24 -0
warp/py.typed +0 -0
warp/render/__init__.py +22 -0
warp/render/imgui_manager.py +29 -0
warp/render/render_opengl.py +24 -0
warp/render/render_usd.py +24 -0
warp/render/utils.py +24 -0
warp/sparse.py +51 -0
warp/tape.py +24 -0
warp/tests/__init__.py +1 -0
warp/tests/__main__.py +4 -0
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/mlp_golden.npy +0 -0
warp/tests/assets/pixel.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/spiky.usd +0 -0
warp/tests/assets/test_grid.nvdb +0 -0
warp/tests/assets/test_index_grid.nvdb +0 -0
warp/tests/assets/test_int32_grid.nvdb +0 -0
warp/tests/assets/test_vec_grid.nvdb +0 -0
warp/tests/assets/torus.nvdb +0 -0
warp/tests/assets/torus.usda +105 -0
warp/tests/aux_test_class_kernel.py +34 -0
warp/tests/aux_test_compile_consts_dummy.py +18 -0
warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
warp/tests/aux_test_dependent.py +29 -0
warp/tests/aux_test_grad_customs.py +29 -0
warp/tests/aux_test_instancing_gc.py +26 -0
warp/tests/aux_test_module_aot.py +7 -0
warp/tests/aux_test_module_unload.py +23 -0
warp/tests/aux_test_name_clash1.py +40 -0
warp/tests/aux_test_name_clash2.py +40 -0
warp/tests/aux_test_reference.py +9 -0
warp/tests/aux_test_reference_reference.py +8 -0
warp/tests/aux_test_square.py +16 -0
warp/tests/aux_test_unresolved_func.py +22 -0
warp/tests/aux_test_unresolved_symbol.py +22 -0
warp/tests/cuda/__init__.py +0 -0
warp/tests/cuda/test_async.py +676 -0
warp/tests/cuda/test_conditional_captures.py +1147 -0
warp/tests/cuda/test_ipc.py +124 -0
warp/tests/cuda/test_mempool.py +233 -0
warp/tests/cuda/test_multigpu.py +169 -0
warp/tests/cuda/test_peer.py +139 -0
warp/tests/cuda/test_pinned.py +84 -0
warp/tests/cuda/test_streams.py +691 -0
warp/tests/geometry/__init__.py +0 -0
warp/tests/geometry/test_bvh.py +335 -0
warp/tests/geometry/test_hash_grid.py +259 -0
warp/tests/geometry/test_marching_cubes.py +294 -0
warp/tests/geometry/test_mesh.py +318 -0
warp/tests/geometry/test_mesh_query_aabb.py +392 -0
warp/tests/geometry/test_mesh_query_point.py +935 -0
warp/tests/geometry/test_mesh_query_ray.py +323 -0
warp/tests/geometry/test_volume.py +1103 -0
warp/tests/geometry/test_volume_write.py +346 -0
warp/tests/interop/__init__.py +0 -0
warp/tests/interop/test_dlpack.py +730 -0
warp/tests/interop/test_jax.py +1673 -0
warp/tests/interop/test_paddle.py +800 -0
warp/tests/interop/test_torch.py +1001 -0
warp/tests/run_coverage_serial.py +39 -0
warp/tests/test_adam.py +162 -0
warp/tests/test_arithmetic.py +1096 -0
warp/tests/test_array.py +3756 -0
warp/tests/test_array_reduce.py +156 -0
warp/tests/test_assert.py +303 -0
warp/tests/test_atomic.py +336 -0
warp/tests/test_atomic_bitwise.py +209 -0
warp/tests/test_atomic_cas.py +312 -0
warp/tests/test_bool.py +220 -0
warp/tests/test_builtins_resolution.py +732 -0
warp/tests/test_closest_point_edge_edge.py +327 -0
warp/tests/test_codegen.py +974 -0
warp/tests/test_codegen_instancing.py +1495 -0
warp/tests/test_compile_consts.py +215 -0
warp/tests/test_conditional.py +298 -0
warp/tests/test_context.py +35 -0
warp/tests/test_copy.py +319 -0
warp/tests/test_ctypes.py +618 -0
warp/tests/test_dense.py +73 -0
warp/tests/test_devices.py +127 -0
warp/tests/test_enum.py +136 -0
warp/tests/test_examples.py +424 -0
warp/tests/test_fabricarray.py +998 -0
warp/tests/test_fast_math.py +72 -0
warp/tests/test_fem.py +2204 -0
warp/tests/test_fixedarray.py +229 -0
warp/tests/test_fp16.py +136 -0
warp/tests/test_func.py +501 -0
warp/tests/test_future_annotations.py +100 -0
warp/tests/test_generics.py +656 -0
warp/tests/test_grad.py +893 -0
warp/tests/test_grad_customs.py +339 -0
warp/tests/test_grad_debug.py +341 -0
warp/tests/test_implicit_init.py +411 -0
warp/tests/test_import.py +45 -0
warp/tests/test_indexedarray.py +1140 -0
warp/tests/test_intersect.py +103 -0
warp/tests/test_iter.py +76 -0
warp/tests/test_large.py +177 -0
warp/tests/test_launch.py +411 -0
warp/tests/test_lerp.py +151 -0
warp/tests/test_linear_solvers.py +223 -0
warp/tests/test_lvalue.py +427 -0
warp/tests/test_map.py +526 -0
warp/tests/test_mat.py +3515 -0
warp/tests/test_mat_assign_copy.py +178 -0
warp/tests/test_mat_constructors.py +573 -0
warp/tests/test_mat_lite.py +122 -0
warp/tests/test_mat_scalar_ops.py +2913 -0
warp/tests/test_math.py +212 -0
warp/tests/test_module_aot.py +287 -0
warp/tests/test_module_hashing.py +258 -0
warp/tests/test_modules_lite.py +70 -0
warp/tests/test_noise.py +252 -0
warp/tests/test_operators.py +299 -0
warp/tests/test_options.py +129 -0
warp/tests/test_overwrite.py +551 -0
warp/tests/test_print.py +408 -0
warp/tests/test_quat.py +2653 -0
warp/tests/test_quat_assign_copy.py +145 -0
warp/tests/test_rand.py +339 -0
warp/tests/test_reload.py +303 -0
warp/tests/test_rounding.py +157 -0
warp/tests/test_runlength_encode.py +196 -0
warp/tests/test_scalar_ops.py +133 -0
warp/tests/test_smoothstep.py +108 -0
warp/tests/test_snippet.py +318 -0
warp/tests/test_sparse.py +845 -0
warp/tests/test_spatial.py +2859 -0
warp/tests/test_spatial_assign_copy.py +160 -0
warp/tests/test_special_values.py +361 -0
warp/tests/test_static.py +640 -0
warp/tests/test_struct.py +901 -0
warp/tests/test_tape.py +242 -0
warp/tests/test_transient_module.py +93 -0
warp/tests/test_triangle_closest_point.py +192 -0
warp/tests/test_tuple.py +361 -0
warp/tests/test_types.py +615 -0
warp/tests/test_utils.py +594 -0
warp/tests/test_vec.py +1408 -0
warp/tests/test_vec_assign_copy.py +143 -0
warp/tests/test_vec_constructors.py +325 -0
warp/tests/test_vec_lite.py +80 -0
warp/tests/test_vec_scalar_ops.py +2327 -0
warp/tests/test_verify_fp.py +100 -0
warp/tests/test_version.py +75 -0
warp/tests/tile/__init__.py +0 -0
warp/tests/tile/test_tile.py +1519 -0
warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
warp/tests/tile/test_tile_cholesky.py +608 -0
warp/tests/tile/test_tile_load.py +724 -0
warp/tests/tile/test_tile_mathdx.py +156 -0
warp/tests/tile/test_tile_matmul.py +179 -0
warp/tests/tile/test_tile_mlp.py +400 -0
warp/tests/tile/test_tile_reduce.py +950 -0
warp/tests/tile/test_tile_shared_memory.py +376 -0
warp/tests/tile/test_tile_sort.py +121 -0
warp/tests/tile/test_tile_view.py +173 -0
warp/tests/unittest_serial.py +47 -0
warp/tests/unittest_suites.py +430 -0
warp/tests/unittest_utils.py +469 -0
warp/tests/walkthrough_debug.py +95 -0
warp/torch.py +24 -0
warp/types.py +51 -0
warp/utils.py +31 -0
warp_lang-1.10.0.dist-info/METADATA +459 -0
warp_lang-1.10.0.dist-info/RECORD +468 -0
warp_lang-1.10.0.dist-info/WHEEL +5 -0
warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
warp_lang-1.10.0.dist-info/top_level.txt +1 -0

warp/native/spatial.h ADDED Viewed

@@ -0,0 +1,1306 @@
+/*
+ * SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+namespace wp
+{
+//---------------------------------------------------------------------------------
+// Represents a twist in se(3)
+template <typename Type>
+using spatial_vector_t = vec_t<6,Type>;
+template<typename Type>
+CUDA_CALLABLE inline Type spatial_dot(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b)
+{
+    return dot(a, b);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> &w_vec( spatial_vector_t<Type>& a )
+{
+    return *reinterpret_cast<vec_t<3,Type>*>(&a);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> &v_vec( spatial_vector_t<Type>& a )
+{
+    return *(vec_t<3,Type>*)(&a.c[3]);
+}
+template<typename Type>
+CUDA_CALLABLE inline const vec_t<3,Type> &w_vec( const spatial_vector_t<Type>& a )
+{
+    spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
+    return w_vec(non_const_vec);
+}
+template<typename Type>
+CUDA_CALLABLE inline const vec_t<3,Type> &v_vec( const spatial_vector_t<Type>& a )
+{
+    spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
+    return v_vec(non_const_vec);
+}
+template<typename Type>
+CUDA_CALLABLE inline spatial_vector_t<Type> spatial_cross(const spatial_vector_t<Type>& a,  const spatial_vector_t<Type>& b)
+{
+    vec_t<3,Type> w = cross(w_vec(a), w_vec(b));
+    vec_t<3,Type> v = cross(v_vec(a), w_vec(b)) + cross(w_vec(a), v_vec(b));
+    return spatial_vector_t<Type>({w[0], w[1], w[2], v[0], v[1], v[2]});
+}
+template<typename Type>
+CUDA_CALLABLE inline spatial_vector_t<Type> spatial_cross_dual(const spatial_vector_t<Type>& a,  const spatial_vector_t<Type>& b)
+{
+    vec_t<3,Type> w = cross(w_vec(a), w_vec(b)) + cross(v_vec(a), v_vec(b));
+    vec_t<3,Type> v = cross(w_vec(a), v_vec(b));
+    return spatial_vector_t<Type>({w[0], w[1], w[2], v[0], v[1], v[2]});
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> spatial_top(const spatial_vector_t<Type>& a)
+{
+    return w_vec(a);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> spatial_bottom(const spatial_vector_t<Type>& a)
+{
+    return v_vec(a);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_dot(const spatial_vector_t<Type>& a, const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a, spatial_vector_t<Type>& adj_b, const Type& adj_ret)
+{
+    adj_dot(a, b, adj_a, adj_b, adj_ret);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_cross(const spatial_vector_t<Type>& a,  const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a,  spatial_vector_t<Type>& adj_b, const spatial_vector_t<Type>& adj_ret)
+{
+    adj_cross(w_vec(a), w_vec(b), w_vec(adj_a), w_vec(adj_b), w_vec(adj_ret));
+    adj_cross(v_vec(a), w_vec(b), v_vec(adj_a), w_vec(adj_b), v_vec(adj_ret));
+    adj_cross(w_vec(a), v_vec(b), w_vec(adj_a), v_vec(adj_b), v_vec(adj_ret));
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_cross_dual(const spatial_vector_t<Type>& a,  const spatial_vector_t<Type>& b, spatial_vector_t<Type>& adj_a, spatial_vector_t<Type>& adj_b, const spatial_vector_t<Type>& adj_ret)
+{
+    adj_cross(w_vec(a), w_vec(b), w_vec(adj_a), w_vec(adj_b), w_vec(adj_ret));
+    adj_cross(v_vec(a), v_vec(b), v_vec(adj_a), v_vec(adj_b), w_vec(adj_ret));
+    adj_cross(w_vec(a), v_vec(b), w_vec(adj_a), v_vec(adj_b), v_vec(adj_ret));
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_top(const spatial_vector_t<Type>& a, spatial_vector_t<Type>& adj_a, const vec_t<3,Type>& adj_ret)
+{
+    w_vec(adj_a) += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_bottom(const spatial_vector_t<Type>& a, spatial_vector_t<Type>& adj_a, const vec_t<3,Type>& adj_ret)
+{
+    v_vec(adj_a) += adj_ret;
+}
+//---------------------------------------------------------------------------------
+// Represents a rigid body transform<Type>ation
+template<typename Type>
+struct transform_t
+{
+    vec_t<3,Type> p;
+    quat_t<Type> q;
+    CUDA_CALLABLE inline transform_t(vec_t<3,Type> p=vec_t<3,Type>(), quat_t<Type> q=quat_t<Type>()) : p(p), q(q) {}
+    CUDA_CALLABLE inline transform_t(Type)  {}  // helps uniform initialization
+    template<typename OtherType>
+    inline explicit CUDA_CALLABLE transform_t(const transform_t<OtherType>& other)
+    {
+        p = other.p;
+        q = other.q;
+    }
+    CUDA_CALLABLE inline transform_t(const initializer_array<7, Type> &l)
+    {
+        p = vec_t<3,Type>(l[0], l[1], l[2]);
+        q = quat_t<Type>(l[3], l[4], l[5], l[6]);
+    }
+    CUDA_CALLABLE inline Type operator[](int index) const
+    {
+        assert(index < 7);
+        return p.c[index];
+    }
+    CUDA_CALLABLE inline Type& operator[](int index)
+    {
+        assert(index < 7);
+        return p.c[index];
+    }
+};
+template<typename Type=float32>
+CUDA_CALLABLE inline transform_t<Type> transform_identity()
+{
+    return transform_t<Type>(vec_t<3,Type>(), quat_identity<Type>());
+}
+template<typename Type>
+inline CUDA_CALLABLE transform_t<Type> operator - (const transform_t<Type>& x)
+{
+    transform_t<Type> ret;
+    ret.p = -x.p;
+    ret.q = -x.q;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> pos(const transform_t<Type>& x)
+{
+    return x;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> neg(const transform_t<Type>& x)
+{
+    return -x;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_neg(const transform_t<Type>& x, transform_t<Type>& adj_x, const transform_t<Type>& adj_ret)
+{
+    adj_x -= adj_ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE bool operator==(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    return a.p == b.p && a.q == b.q;
+}
+template<typename Type>
+inline bool CUDA_CALLABLE isfinite(const transform_t<Type>& t)
+{
+    return isfinite(t.p) && isfinite(t.q);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> transform_get_translation(const transform_t<Type>& t)
+{
+    return t.p;
+}
+template<typename Type>
+CUDA_CALLABLE inline quat_t<Type> transform_get_rotation(const transform_t<Type>& t)
+{
+    return t.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_get_translation(const transform_t<Type>& t, transform_t<Type>& adj_t, const vec_t<3,Type>& adj_ret)
+{
+    adj_t.p += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_get_rotation(const transform_t<Type>& t, transform_t<Type>& adj_t, const quat_t<Type>& adj_ret)
+{
+    adj_t.q += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p = p;
+}
+template<typename Type>
+CUDA_CALLABLE inline void transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q)
+{
+    t.q = q;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    transform_t<Type> ret(t);
+    ret.p = p;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q)
+{
+    transform_t<Type> ret(t);
+    ret.q = q;
+    return ret;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_translation(transform_t<Type>& t, const vec_t<3, Type>& p, const transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p += adj_t.p;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_rotation(transform_t<Type>& t, const quat_t<Type>& q, const transform_t<Type>& adj_t, quat_t<Type>& adj_q)
+{
+    adj_q += adj_t.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_translation_copy(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p, const transform_t<Type>& adj_ret)
+{
+    adj_p += adj_ret.p;
+    adj_t.q += adj_ret.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_set_rotation_copy(transform_t<Type>& t, const quat_t<Type>& q, transform_t<Type>& adj_t, quat_t<Type>& adj_q, const transform_t<Type>& adj_ret)
+{
+    adj_q += adj_ret.q;
+    adj_t.p += adj_ret.p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p += p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p)
+{
+    t.p -= p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_transform_add_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p += adj_t.p;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_transform_sub_inplace(transform_t<Type>& t, const vec_t<3, Type>& p, transform_t<Type>& adj_t, vec_t<3, Type>& adj_p)
+{
+    adj_p -= adj_t.p;
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_multiply(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    return { quat_rotate(a.q, b.p) + a.p, mul(a.q, b.q) };
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_multiply(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
+{
+    // translational part
+    adj_quat_rotate(a.q, b.p, adj_a.q, adj_b.p, adj_ret.p);
+    adj_a.p += adj_ret.p;
+    // rotational part
+    adj_mul(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> transform_inverse(const transform_t<Type>& t)
+{
+    quat_t<Type> q_inv = quat_inverse(t.q);
+    return transform_t<Type>(-quat_rotate(q_inv, t.p), q_inv);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> transform_vector(const transform_t<Type>& t, const vec_t<3,Type>& x)
+{
+    return quat_rotate(t.q, x);
+}
+template<typename Type>
+CUDA_CALLABLE inline vec_t<3,Type> transform_point(const transform_t<Type>& t, const vec_t<3,Type>& x)
+{
+    return t.p + quat_rotate(t.q, x);
+}
+// not totally sure why you'd want to do this seeing as adding/subtracting two rotation
+// quats doesn't seem to do anything meaningful
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> add(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    return { a.p + b.p, a.q + b.q };
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> sub(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    return { a.p - b.p, a.q - b.q };
+}
+// also not sure why you'd want to do this seeing as the quat would end up unnormalized
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> mul(const transform_t<Type>& a, Type s)
+{
+    return { a.p*s, a.q*s };
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> mul(Type s, const transform_t<Type>& a)
+{
+    return mul(a, s);
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> mul(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    return transform_multiply(a, b);
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> operator*(const transform_t<Type>& a, Type s)
+{
+    return mul(a, s);
+}
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> operator*(Type s, const transform_t<Type>& a)
+{
+    return mul(a, s);
+}
+template<typename Type>
+inline CUDA_CALLABLE Type tensordot(const transform_t<Type>& a, const transform_t<Type>& b)
+{
+    // corresponds to `np.tensordot()` with all axes being contracted
+    return tensordot(a.p, b.p) + tensordot(a.q, b.q);
+}
+template<typename Type>
+inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    return t[idx];
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const transform_t<Type> & t, slice_t slice)
+{
+    vec_t<SliceLength, Type> ret;
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        ret[ii] = t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+    return ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    return &t[idx];
+}
+template<typename Type>
+inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    return &((*t)[idx]);
+}
+template<typename Type>
+inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, transform_t<Type>& adj_t, int& adj_idx, Type adj_ret)
+{
+    adj_t[idx] += adj_ret;
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_extract(
+    const transform_t<Type>& t, slice_t slice,
+    transform_t<Type>& adj_t, slice_t& adj_slice,
+    const vec_t<SliceLength, Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_t[i] += adj_ret[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
+                                       transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
+{
+    // nop
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_indexref(transform_t<Type>* t, int idx,
+                                       transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
+{
+    // nop
+}
+template<typename Type>
+inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    t[idx] += value;
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] += a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
+                                        transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    adj_value += adj_t[idx];
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_add_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    t[idx] -= value;
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] -= a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
+                                        transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    adj_value -= adj_t[idx];
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_sub_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] -= adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    t[idx] = value;
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        t[i] = a[ii];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    adj_value += adj_t[idx];
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_inplace(
+    const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (
+        int i = slice.start;
+        is_reversed ? (i > slice.stop) : (i < slice.stop);
+        i += slice.step
+    )
+    {
+        adj_a[ii] += adj_t[i];
+        ++ii;
+    }
+    assert(ii == SliceLength);
+}
+template<typename Type>
+inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    transform_t<Type> ret(t);
+    ret[idx] = value;
+    return ret;
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
+{
+    transform_t<Type> ret(t);
+    assign_inplace<SliceLength>(ret, slice, a);
+    return ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
+{
+#ifndef NDEBUG
+    if (idx < -7 || idx >= 7)
+    {
+        printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
+        assert(0);
+    }
+#endif
+    if (idx < 0)
+    {
+        idx += 7;
+    }
+    adj_value += adj_ret[idx];
+    for(unsigned i=0; i < 7; ++i)
+    {
+        if (i != idx)
+            adj_t[i] += adj_ret[i];
+    }
+}
+template<unsigned SliceLength, typename Type>
+inline CUDA_CALLABLE void adj_assign_copy(
+    transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
+    transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
+    const transform_t<Type>& adj_ret
+)
+{
+    assert(slice.start >= 0 && slice.start <= 7);
+    assert(slice.stop >= -1 && slice.stop <= 7);
+    assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
+    assert(slice_get_length(slice) == SliceLength);
+    bool is_reversed = slice.step < 0;
+    int ii = 0;
+    for (int i = 0; i < 7; ++i)
+    {
+        bool in_slice = is_reversed
+            ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
+            : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
+        if (!in_slice)
+        {
+            adj_t[i] += adj_ret[i];
+        }
+        else
+        {
+            adj_a[ii] += adj_ret[i];
+            ++ii;
+        }
+    }
+    assert(ii == SliceLength);
+}
+// adjoint methods
+template<typename Type>
+CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
+{
+    adj_add(a.p, b.p, adj_a.p, adj_b.p, adj_ret.p);
+    adj_add(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_add(
+    const transform_t<Type>& a, Type b,
+    transform_t<Type>& adj_a, Type& adj_b,
+    const transform_t<Type>& adj_ret
+)
+{
+    adj_a += adj_ret;
+    adj_b += adj_ret.p[0];
+    adj_b += adj_ret.p[1];
+    adj_b += adj_ret.p[2];
+    adj_b += adj_ret.q[0];
+    adj_b += adj_ret.q[1];
+    adj_b += adj_ret.q[2];
+    adj_b += adj_ret.q[3];
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
+{
+    adj_sub(a.p, b.p, adj_a.p, adj_b.p, adj_ret.p);
+    adj_sub(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_sub(
+    const transform_t<Type>& a, Type b,
+    transform_t<Type>& adj_a, Type& adj_b,
+    const transform_t<Type>& adj_ret
+)
+{
+    adj_a -= adj_ret;
+    adj_b -= adj_ret.p[0];
+    adj_b -= adj_ret.p[1];
+    adj_b -= adj_ret.p[2];
+    adj_b -= adj_ret.q[0];
+    adj_b -= adj_ret.q[1];
+    adj_b -= adj_ret.q[2];
+    adj_b -= adj_ret.q[3];
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, Type s, transform_t<Type>& adj_a, Type& adj_s, const transform_t<Type>& adj_ret)
+{
+    adj_mul(a.p, s, adj_a.p, adj_s, adj_ret.p);
+    adj_mul(a.q, s, adj_a.q, adj_s, adj_ret.q);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_mul(Type s, const transform_t<Type>& a, Type& adj_s, transform_t<Type>& adj_a, const transform_t<Type>& adj_ret)
+{
+    adj_mul(a, s, adj_a, adj_s, adj_ret);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
+{
+    adj_transform_multiply(a, b, adj_a, adj_b, adj_ret);
+}
+template<typename Type>
+inline CUDA_CALLABLE transform_t<Type> atomic_add(transform_t<Type>* addr, const transform_t<Type>& value)
+{
+    vec_t<3,Type> p = atomic_add(&addr->p, value.p);
+    quat_t<Type> q = atomic_add(&addr->q, value.q);
+    return transform_t<Type>(p, q);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_t(const vec_t<3,Type>& p, const quat_t<Type>& q, vec_t<3,Type>& adj_p, quat_t<Type>& adj_q, const transform_t<Type>& adj_ret)
+{
+    adj_p += adj_ret.p;
+    adj_q += adj_ret.q;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_t(const initializer_array<7, Type> &l, const initializer_array<7, Type*>& adj_l, const transform_t<Type>& adj_ret)
+{
+    *adj_l[0] += adj_ret.p[0];
+    *adj_l[1] += adj_ret.p[1];
+    *adj_l[2] += adj_ret.p[2];
+    *adj_l[3] += adj_ret.q[0];
+    *adj_l[4] += adj_ret.q[1];
+    *adj_l[5] += adj_ret.q[2];
+    *adj_l[6] += adj_ret.q[3];
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_inverse(const transform_t<Type>& t, transform_t<Type>& adj_t, const transform_t<Type>& adj_ret)
+{
+    // forward
+    quat_t<Type> q_inv = quat_inverse(t.q);
+    vec_t<3,Type> p = quat_rotate(q_inv, t.p);
+    vec_t<3,Type> np = -p;
+    // transform<Type> t = transform<Type>(np, q_inv)
+    // backward
+    quat_t<Type> adj_q_inv(0.0f);
+    quat_t<Type> adj_q(0.0f);
+    vec_t<3,Type> adj_p(0.0f);
+    vec_t<3,Type> adj_np(0.0f);
+    adj_transform_t(np, q_inv, adj_np, adj_q_inv, adj_ret);
+    adj_p = -adj_np;
+    adj_quat_rotate(q_inv, t.p, adj_q_inv, adj_t.p, adj_p);
+    adj_quat_inverse(t.q, adj_t.q, adj_q_inv);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_vector(const transform_t<Type>& t, const vec_t<3,Type>& x, transform_t<Type>& adj_t, vec_t<3,Type>& adj_x, const vec_t<3,Type>& adj_ret)
+{
+    adj_quat_rotate(t.q, x, adj_t.q, adj_x, adj_ret);
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_transform_point(const transform_t<Type>& t, const vec_t<3,Type>& x, transform_t<Type>& adj_t, vec_t<3,Type>& adj_x, const vec_t<3,Type>& adj_ret)
+{
+    adj_quat_rotate(t.q, x, adj_t.q, adj_x, adj_ret);
+    adj_t.p += adj_ret;
+}
+template<typename Type>
+CUDA_CALLABLE void print(transform_t<Type> t);
+template<typename Type>
+CUDA_CALLABLE inline transform_t<Type> lerp(const transform_t<Type>& a, const transform_t<Type>& b, Type t)
+{
+    return a*(Type(1)-t) + b*t;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_lerp(const transform_t<Type>& a, const transform_t<Type>& b, Type t, transform_t<Type>& adj_a, transform_t<Type>& adj_b, Type& adj_t, const transform_t<Type>& adj_ret)
+{
+    adj_a += adj_ret*(Type(1)-t);
+    adj_b += adj_ret*t;
+    adj_t += tensordot(b, adj_ret) - tensordot(a, adj_ret);
+}
+template<typename Type>
+CUDA_CALLABLE inline int len(const transform_t<Type>& t)
+{
+    return 7;
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_len(const transform_t<Type>& t, transform_t<Type>& adj_t, const int& adj_ret)
+{
+}
+template<typename Type>
+using spatial_matrix_t = mat_t<6,6,Type>;
+template<typename Type>
+inline CUDA_CALLABLE spatial_matrix_t<Type> spatial_adjoint(const mat_t<3,3,Type>& R, const mat_t<3,3,Type>& S)
+{
+    spatial_matrix_t<Type> adT;
+    // T = [Rah,   0]
+    //     [S  R]
+    // diagonal blocks
+    for (int i=0; i < 3; ++i)
+    {
+        for (int j=0; j < 3; ++j)
+        {
+            adT.data[i][j] = R.data[i][j];
+            adT.data[i+3][j+3] = R.data[i][j];
+        }
+    }
+    // lower off diagonal
+    for (int i=0; i < 3; ++i)
+    {
+        for (int j=0; j < 3; ++j)
+        {
+            adT.data[i+3][j] = S.data[i][j];
+        }
+    }
+    return adT;
+}
+template<typename Type>
+inline CUDA_CALLABLE void adj_spatial_adjoint(const mat_t<3,3,Type>& R, const mat_t<3,3,Type>& S, mat_t<3,3,Type>& adj_R, mat_t<3,3,Type>& adj_S, const spatial_matrix_t<Type>& adj_ret)
+{
+    // diagonal blocks
+    for (int i=0; i < 3; ++i)
+    {
+        for (int j=0; j < 3; ++j)
+        {
+            adj_R.data[i][j] += adj_ret.data[i][j];
+            adj_R.data[i][j] += adj_ret.data[i+3][j+3];
+        }
+    }
+    // lower off diagonal
+    for (int i=0; i < 3; ++i)
+    {
+        for (int j=0; j < 3; ++j)
+        {
+            adj_S.data[i][j] += adj_ret.data[i+3][j];
+        }
+    }
+}
+CUDA_CALLABLE inline int row_index(int stride, int i, int j)
+{
+    return i*stride + j;
+}
+// builds spatial Jacobian J which is an (joint_count*6)x(dof_count) matrix
+template<typename Type>
+CUDA_CALLABLE inline void spatial_jacobian(
+    const spatial_vector_t<Type>* S,
+    const int* joint_parents,
+    const int* joint_qd_start,
+    int joint_start,    // offset of the first joint for the articulation
+    int joint_count,
+    int J_start,
+    Type* J)
+{
+    const int articulation_dof_start = joint_qd_start[joint_start];
+    const int articulation_dof_end = joint_qd_start[joint_start + joint_count];
+    const int articulation_dof_count = articulation_dof_end-articulation_dof_start;
+	// shift output pointers
+	const int S_start = articulation_dof_start;
+	S += S_start;
+	J += J_start;
+    for (int i=0; i < joint_count; ++i)
+    {
+        const int row_start = i * 6;
+        int j = joint_start + i;
+        while (j != -1)
+        {
+            const int joint_dof_start = joint_qd_start[j];
+            const int joint_dof_end = joint_qd_start[j+1];
+            const int joint_dof_count = joint_dof_end-joint_dof_start;
+            // fill out each row of the Jacobian walking up the tree
+            //for (int col=dof_start; col < dof_end; ++col)
+            for (int dof=0; dof < joint_dof_count; ++dof)
+            {
+                const int col = (joint_dof_start-articulation_dof_start) + dof;
+                J[row_index(articulation_dof_count, row_start+0, col)] = S[col].w[0];
+                J[row_index(articulation_dof_count, row_start+1, col)] = S[col].w[1];
+                J[row_index(articulation_dof_count, row_start+2, col)] = S[col].w[2];
+                J[row_index(articulation_dof_count, row_start+3, col)] = S[col].v[0];
+                J[row_index(articulation_dof_count, row_start+4, col)] = S[col].v[1];
+                J[row_index(articulation_dof_count, row_start+5, col)] = S[col].v[2];
+            }
+            j = joint_parents[j];
+        }
+    }
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_jacobian(
+    const spatial_vector_t<Type>* S,
+    const int* joint_parents,
+    const int* joint_qd_start,
+    const int joint_start,
+    const int joint_count,
+    const int J_start,
+    const Type* J,
+    // adjs
+    spatial_vector_t<Type>* adj_S,
+    int* adj_joint_parents,
+    int* adj_joint_qd_start,
+    int& adj_joint_start,
+    int& adj_joint_count,
+    int& adj_J_start,
+    const Type* adj_J)
+{
+    const int articulation_dof_start = joint_qd_start[joint_start];
+    const int articulation_dof_end = joint_qd_start[joint_start + joint_count];
+    const int articulation_dof_count = articulation_dof_end-articulation_dof_start;
+	// shift output pointers
+	const int S_start = articulation_dof_start;
+	S += S_start;
+	J += J_start;
+    adj_S += S_start;
+    adj_J += J_start;
+    for (int i=0; i < joint_count; ++i)
+    {
+        const int row_start = i * 6;
+        int j = joint_start + i;
+        while (j != -1)
+        {
+            const int joint_dof_start = joint_qd_start[j];
+            const int joint_dof_end = joint_qd_start[j+1];
+            const int joint_dof_count = joint_dof_end-joint_dof_start;
+            // fill out each row of the Jacobian walking up the tree
+            //for (int col=dof_start; col < dof_end; ++col)
+            for (int dof=0; dof < joint_dof_count; ++dof)
+            {
+                const int col = (joint_dof_start-articulation_dof_start) + dof;
+                adj_S[col].w[0] += adj_J[row_index(articulation_dof_count, row_start+0, col)];
+                adj_S[col].w[1] += adj_J[row_index(articulation_dof_count, row_start+1, col)];
+                adj_S[col].w[2] += adj_J[row_index(articulation_dof_count, row_start+2, col)];
+                adj_S[col].v[0] += adj_J[row_index(articulation_dof_count, row_start+3, col)];
+                adj_S[col].v[1] += adj_J[row_index(articulation_dof_count, row_start+4, col)];
+                adj_S[col].v[2] += adj_J[row_index(articulation_dof_count, row_start+5, col)];
+            }
+            j = joint_parents[j];
+        }
+    }
+}
+template<typename Type>
+CUDA_CALLABLE inline void spatial_mass(const spatial_matrix_t<Type>* I_s, int joint_start, int joint_count, int M_start, Type* M)
+{
+    const int stride = joint_count*6;
+    for (int l=0; l < joint_count; ++l)
+    {
+        for (int i=0; i < 6; ++i)
+        {
+            for (int j=0; j < 6; ++j)
+            {
+                M[M_start + row_index(stride, l*6 + i, l*6 + j)] = I_s[joint_start + l].data[i][j];
+            }
+        }
+    }
+}
+template<typename Type>
+CUDA_CALLABLE inline void adj_spatial_mass(
+    const spatial_matrix_t<Type>* I_s,
+    const int joint_start,
+    const int joint_count,
+    const int M_start,
+    const Type* M,
+    spatial_matrix_t<Type>* adj_I_s,
+    int& adj_joint_start,
+    int& adj_joint_count,
+    int& adj_M_start,
+    const Type* adj_M)
+{
+    const int stride = joint_count*6;
+    for (int l=0; l < joint_count; ++l)
+    {
+        for (int i=0; i < 6; ++i)
+        {
+            for (int j=0; j < 6; ++j)
+            {
+                adj_I_s[joint_start + l].data[i][j] += adj_M[M_start + row_index(stride, l*6 + i, l*6 + j)];
+            }
+        }
+    }
+}
+using transform = transform_t<float>;
+using transformh = transform_t<half>;
+using transformf = transform_t<float>;
+using transformd = transform_t<double>;
+using spatial_vector = spatial_vector_t<float>;
+using spatial_vectorh = spatial_vector_t<half>;
+using spatial_vectorf = spatial_vector_t<float>;
+using spatial_vectord = spatial_vector_t<double>;
+using spatial_matrix = spatial_matrix_t<float>;
+using spatial_matrixh = spatial_matrix_t<half>;
+using spatial_matrixf = spatial_matrix_t<float>;
+using spatial_matrixd = spatial_matrix_t<double>;
+ } // namespace wp