warp-lang 1.6.1__py3-none-win_amd64.whl → 1.7.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +21 -7
- warp/autograd.py +14 -6
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +424 -6
- warp/build_dll.py +20 -20
- warp/builtins.py +467 -368
- warp/codegen.py +193 -125
- warp/config.py +56 -12
- warp/constants.py +14 -6
- warp/context.py +524 -277
- warp/dlpack.py +22 -12
- warp/examples/__init__.py +14 -6
- warp/examples/assets/nonuniform.usd +0 -0
- warp/examples/assets/nvidia_logo.png +0 -0
- warp/examples/benchmarks/benchmark_api.py +14 -6
- warp/examples/benchmarks/benchmark_cloth.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_cupy.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_jax.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_numba.py +15 -0
- warp/examples/benchmarks/benchmark_cloth_numpy.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_paddle.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_pytorch.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_taichi.py +14 -6
- warp/examples/benchmarks/benchmark_cloth_warp.py +14 -6
- warp/examples/benchmarks/benchmark_gemm.py +82 -48
- warp/examples/benchmarks/benchmark_interop_paddle.py +14 -6
- warp/examples/benchmarks/benchmark_interop_torch.py +14 -6
- warp/examples/benchmarks/benchmark_launches.py +14 -6
- warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
- warp/examples/browse.py +14 -6
- warp/examples/core/example_cupy.py +14 -6
- warp/examples/core/example_dem.py +14 -6
- warp/examples/core/example_fluid.py +14 -6
- warp/examples/core/example_graph_capture.py +14 -6
- warp/examples/core/example_marching_cubes.py +14 -6
- warp/examples/core/example_mesh.py +14 -6
- warp/examples/core/example_mesh_intersect.py +14 -6
- warp/examples/core/example_nvdb.py +14 -6
- warp/examples/core/example_raycast.py +14 -6
- warp/examples/core/example_raymarch.py +14 -6
- warp/examples/core/example_render_opengl.py +14 -6
- warp/examples/core/example_sample_mesh.py +300 -0
- warp/examples/core/example_sph.py +14 -6
- warp/examples/core/example_torch.py +14 -6
- warp/examples/core/example_wave.py +14 -6
- warp/examples/fem/example_adaptive_grid.py +14 -6
- warp/examples/fem/example_apic_fluid.py +15 -7
- warp/examples/fem/example_burgers.py +16 -8
- warp/examples/fem/example_convection_diffusion.py +14 -6
- warp/examples/fem/example_convection_diffusion_dg.py +14 -6
- warp/examples/fem/example_deformed_geometry.py +15 -7
- warp/examples/fem/example_diffusion.py +14 -6
- warp/examples/fem/example_diffusion_3d.py +14 -6
- warp/examples/fem/example_diffusion_mgpu.py +14 -6
- warp/examples/fem/example_distortion_energy.py +15 -7
- warp/examples/fem/example_magnetostatics.py +20 -12
- warp/examples/fem/example_mixed_elasticity.py +14 -6
- warp/examples/fem/example_navier_stokes.py +14 -6
- warp/examples/fem/example_nonconforming_contact.py +14 -6
- warp/examples/fem/example_stokes.py +14 -6
- warp/examples/fem/example_stokes_transfer.py +14 -6
- warp/examples/fem/example_streamlines.py +14 -6
- warp/examples/fem/utils.py +24 -3
- warp/examples/interop/example_jax_callable.py +116 -0
- warp/examples/interop/example_jax_ffi_callback.py +132 -0
- warp/examples/interop/example_jax_kernel.py +205 -0
- warp/examples/optim/example_bounce.py +14 -6
- warp/examples/optim/example_cloth_throw.py +14 -6
- warp/examples/optim/example_diffray.py +14 -6
- warp/examples/optim/example_drone.py +14 -6
- warp/examples/optim/example_fluid_checkpoint.py +497 -0
- warp/examples/optim/example_inverse_kinematics.py +14 -6
- warp/examples/optim/example_inverse_kinematics_torch.py +14 -6
- warp/examples/optim/example_softbody_properties.py +14 -6
- warp/examples/optim/example_spring_cage.py +14 -6
- warp/examples/optim/example_trajectory.py +14 -6
- warp/examples/sim/example_cartpole.py +14 -6
- warp/examples/sim/example_cloth.py +14 -6
- warp/examples/sim/example_cloth_self_contact.py +14 -6
- warp/examples/sim/example_granular.py +14 -6
- warp/examples/sim/example_granular_collision_sdf.py +14 -6
- warp/examples/sim/example_jacobian_ik.py +14 -6
- warp/examples/sim/example_particle_chain.py +14 -6
- warp/examples/sim/example_quadruped.py +14 -6
- warp/examples/sim/example_rigid_chain.py +14 -6
- warp/examples/sim/example_rigid_contact.py +14 -6
- warp/examples/sim/example_rigid_force.py +14 -6
- warp/examples/sim/example_rigid_gyroscopic.py +14 -6
- warp/examples/sim/example_rigid_soft_contact.py +14 -6
- warp/examples/sim/example_soft_body.py +14 -6
- warp/examples/tile/example_tile_cholesky.py +14 -6
- warp/examples/tile/example_tile_convolution.py +14 -6
- warp/examples/tile/example_tile_fft.py +14 -6
- warp/examples/tile/example_tile_filtering.py +14 -6
- warp/examples/tile/example_tile_matmul.py +16 -10
- warp/examples/tile/example_tile_mlp.py +14 -6
- warp/examples/tile/example_tile_nbody.py +14 -6
- warp/examples/tile/example_tile_walker.py +14 -6
- warp/fabric.py +15 -0
- warp/fem/__init__.py +26 -1
- warp/fem/adaptivity.py +19 -4
- warp/fem/cache.py +15 -0
- warp/fem/dirichlet.py +15 -0
- warp/fem/domain.py +15 -0
- warp/fem/field/__init__.py +15 -0
- warp/fem/field/field.py +15 -0
- warp/fem/field/nodal_field.py +37 -68
- warp/fem/field/restriction.py +15 -0
- warp/fem/field/virtual.py +77 -23
- warp/fem/geometry/__init__.py +15 -0
- warp/fem/geometry/adaptive_nanogrid.py +24 -10
- warp/fem/geometry/closest_point.py +16 -1
- warp/fem/geometry/deformed_geometry.py +20 -2
- warp/fem/geometry/element.py +15 -0
- warp/fem/geometry/geometry.py +20 -0
- warp/fem/geometry/grid_2d.py +27 -12
- warp/fem/geometry/grid_3d.py +27 -15
- warp/fem/geometry/hexmesh.py +20 -7
- warp/fem/geometry/nanogrid.py +24 -11
- warp/fem/geometry/partition.py +15 -0
- warp/fem/geometry/quadmesh.py +28 -13
- warp/fem/geometry/tetmesh.py +18 -4
- warp/fem/geometry/trimesh.py +18 -8
- warp/fem/integrate.py +277 -93
- warp/fem/linalg.py +20 -5
- warp/fem/operator.py +15 -0
- warp/fem/polynomial.py +15 -0
- warp/fem/quadrature/__init__.py +15 -0
- warp/fem/quadrature/pic_quadrature.py +52 -22
- warp/fem/quadrature/quadrature.py +209 -25
- warp/fem/space/__init__.py +16 -1
- warp/fem/space/basis_function_space.py +19 -2
- warp/fem/space/basis_space.py +40 -18
- warp/fem/space/dof_mapper.py +15 -0
- warp/fem/space/function_space.py +15 -0
- warp/fem/space/grid_2d_function_space.py +15 -0
- warp/fem/space/grid_3d_function_space.py +15 -0
- warp/fem/space/hexmesh_function_space.py +17 -2
- warp/fem/space/nanogrid_function_space.py +15 -0
- warp/fem/space/partition.py +21 -2
- warp/fem/space/quadmesh_function_space.py +23 -8
- warp/fem/space/restriction.py +15 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +38 -23
- warp/fem/space/shape/shape_function.py +15 -0
- warp/fem/space/shape/square_shape_function.py +27 -12
- warp/fem/space/shape/tet_shape_function.py +15 -0
- warp/fem/space/shape/triangle_shape_function.py +16 -1
- warp/fem/space/tetmesh_function_space.py +18 -3
- warp/fem/space/topology.py +15 -0
- warp/fem/space/trimesh_function_space.py +17 -2
- warp/fem/types.py +15 -0
- warp/fem/utils.py +27 -6
- warp/jax.py +28 -7
- warp/jax_experimental/__init__.py +16 -0
- warp/{jax_experimental.py → jax_experimental/custom_call.py} +28 -33
- warp/jax_experimental/ffi.py +698 -0
- warp/jax_experimental/xla_ffi.py +602 -0
- warp/math.py +103 -6
- warp/native/array.h +28 -6
- warp/native/builtin.h +44 -9
- warp/native/bvh.cpp +18 -7
- warp/native/bvh.cu +57 -20
- warp/native/bvh.h +17 -7
- warp/native/clang/clang.cpp +45 -9
- warp/native/coloring.cpp +15 -6
- warp/native/crt.cpp +15 -6
- warp/native/crt.h +15 -6
- warp/native/cuda_crt.h +15 -6
- warp/native/cuda_util.cpp +29 -6
- warp/native/cuda_util.h +17 -6
- warp/native/error.cpp +15 -6
- warp/native/error.h +15 -6
- warp/native/exports.h +85 -63
- warp/native/fabric.h +15 -6
- warp/native/hashgrid.cpp +15 -6
- warp/native/hashgrid.cu +15 -6
- warp/native/hashgrid.h +15 -6
- warp/native/initializer_array.h +15 -6
- warp/native/intersect.h +41 -32
- warp/native/intersect_adj.h +48 -39
- warp/native/intersect_tri.h +17 -0
- warp/native/marching.cpp +16 -0
- warp/native/marching.cu +16 -7
- warp/native/marching.h +17 -0
- warp/native/mat.h +528 -15
- warp/native/mathdx.cpp +15 -6
- warp/native/matnn.h +15 -6
- warp/native/mesh.cpp +15 -6
- warp/native/mesh.cu +15 -6
- warp/native/mesh.h +25 -16
- warp/native/noise.h +15 -6
- warp/native/quat.h +114 -17
- warp/native/rand.h +21 -6
- warp/native/range.h +15 -6
- warp/native/reduce.cpp +15 -6
- warp/native/reduce.cu +15 -6
- warp/native/runlength_encode.cpp +15 -6
- warp/native/runlength_encode.cu +15 -6
- warp/native/scan.cpp +15 -6
- warp/native/scan.cu +15 -6
- warp/native/scan.h +15 -6
- warp/native/solid_angle.h +17 -0
- warp/native/sort.cpp +137 -65
- warp/native/sort.cu +167 -21
- warp/native/sort.h +23 -7
- warp/native/sparse.cpp +58 -28
- warp/native/sparse.cu +67 -23
- warp/native/spatial.h +15 -6
- warp/native/svd.h +131 -6
- warp/native/temp_buffer.h +15 -6
- warp/native/tile.h +316 -111
- warp/native/tile_reduce.h +61 -9
- warp/native/vec.h +83 -13
- warp/native/volume.cpp +100 -119
- warp/native/volume.cu +15 -6
- warp/native/volume.h +15 -6
- warp/native/volume_builder.cu +40 -16
- warp/native/volume_builder.h +21 -6
- warp/native/volume_impl.h +15 -6
- warp/native/warp.cpp +20 -12
- warp/native/warp.cu +114 -16
- warp/native/warp.h +34 -16
- warp/optim/__init__.py +14 -6
- warp/optim/adam.py +14 -6
- warp/optim/linear.py +25 -10
- warp/optim/sgd.py +14 -6
- warp/paddle.py +14 -6
- warp/render/__init__.py +14 -6
- warp/render/render_opengl.py +14 -6
- warp/render/render_usd.py +14 -6
- warp/render/utils.py +14 -6
- warp/sim/__init__.py +14 -7
- warp/sim/articulation.py +18 -10
- warp/sim/collide.py +35 -16
- warp/sim/graph_coloring.py +14 -6
- warp/sim/import_mjcf.py +463 -162
- warp/sim/import_snu.py +14 -7
- warp/sim/import_urdf.py +46 -18
- warp/sim/import_usd.py +14 -7
- warp/sim/inertia.py +14 -6
- warp/sim/integrator.py +14 -6
- warp/sim/integrator_euler.py +19 -11
- warp/sim/integrator_featherstone.py +17 -16
- warp/sim/integrator_vbd.py +222 -8
- warp/sim/integrator_xpbd.py +19 -11
- warp/sim/model.py +56 -19
- warp/sim/particles.py +14 -6
- warp/sim/render.py +14 -6
- warp/sim/utils.py +17 -2
- warp/sparse.py +657 -555
- warp/stubs.py +231 -19
- warp/tape.py +14 -6
- warp/tests/aux_test_class_kernel.py +14 -6
- warp/tests/aux_test_compile_consts_dummy.py +14 -6
- warp/tests/aux_test_conditional_unequal_types_kernels.py +14 -6
- warp/tests/aux_test_dependent.py +14 -6
- warp/tests/aux_test_grad_customs.py +14 -6
- warp/tests/aux_test_instancing_gc.py +14 -6
- warp/tests/aux_test_module_unload.py +14 -6
- warp/tests/aux_test_name_clash1.py +14 -6
- warp/tests/aux_test_name_clash2.py +14 -6
- warp/tests/aux_test_unresolved_func.py +14 -6
- warp/tests/aux_test_unresolved_symbol.py +14 -6
- warp/tests/cuda/__init__.py +0 -0
- warp/tests/{test_async.py → cuda/test_async.py} +14 -6
- warp/tests/{test_ipc.py → cuda/test_ipc.py} +14 -6
- warp/tests/{test_mempool.py → cuda/test_mempool.py} +53 -6
- warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +14 -6
- warp/tests/{test_peer.py → cuda/test_peer.py} +14 -6
- warp/tests/{test_pinned.py → cuda/test_pinned.py} +14 -6
- warp/tests/{test_streams.py → cuda/test_streams.py} +85 -6
- warp/tests/geometry/__init__.py +0 -0
- warp/tests/{test_bvh.py → geometry/test_bvh.py} +14 -6
- warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +14 -6
- warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +14 -6
- warp/tests/{test_mesh.py → geometry/test_mesh.py} +14 -6
- warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +14 -6
- warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +80 -69
- warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +15 -7
- warp/tests/{test_volume.py → geometry/test_volume.py} +55 -12
- warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +14 -6
- warp/tests/interop/__init__.py +0 -0
- warp/tests/{test_dlpack.py → interop/test_dlpack.py} +42 -11
- warp/tests/{test_jax.py → interop/test_jax.py} +14 -6
- warp/tests/{test_paddle.py → interop/test_paddle.py} +14 -6
- warp/tests/{test_torch.py → interop/test_torch.py} +14 -6
- warp/tests/run_coverage_serial.py +14 -6
- warp/tests/sim/__init__.py +0 -0
- warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +23 -16
- warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +14 -6
- warp/tests/{test_collision.py → sim/test_collision.py} +16 -8
- warp/tests/{test_coloring.py → sim/test_coloring.py} +14 -7
- warp/tests/{test_model.py → sim/test_model.py} +55 -7
- warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +14 -6
- warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +16 -7
- warp/tests/sim/test_vbd.py +597 -0
- warp/tests/test_adam.py +14 -6
- warp/tests/test_arithmetic.py +14 -6
- warp/tests/test_array.py +14 -6
- warp/tests/test_array_reduce.py +14 -6
- warp/tests/test_assert.py +14 -6
- warp/tests/test_atomic.py +14 -6
- warp/tests/test_bool.py +15 -7
- warp/tests/test_builtins_resolution.py +14 -6
- warp/tests/test_closest_point_edge_edge.py +14 -6
- warp/tests/test_codegen.py +14 -6
- warp/tests/test_codegen_instancing.py +14 -6
- warp/tests/test_compile_consts.py +14 -6
- warp/tests/test_conditional.py +14 -6
- warp/tests/test_context.py +14 -6
- warp/tests/test_copy.py +14 -6
- warp/tests/test_ctypes.py +14 -6
- warp/tests/test_dense.py +14 -6
- warp/tests/test_devices.py +14 -6
- warp/tests/test_examples.py +42 -42
- warp/tests/test_fabricarray.py +14 -6
- warp/tests/test_fast_math.py +14 -6
- warp/tests/test_fem.py +37 -10
- warp/tests/test_fp16.py +14 -6
- warp/tests/test_func.py +14 -6
- warp/tests/test_future_annotations.py +14 -6
- warp/tests/test_generics.py +14 -6
- warp/tests/test_grad.py +14 -6
- warp/tests/test_grad_customs.py +14 -6
- warp/tests/test_grad_debug.py +14 -6
- warp/tests/test_implicit_init.py +14 -6
- warp/tests/test_import.py +14 -6
- warp/tests/test_indexedarray.py +14 -6
- warp/tests/test_intersect.py +14 -6
- warp/tests/test_iter.py +14 -6
- warp/tests/test_large.py +14 -6
- warp/tests/test_launch.py +14 -6
- warp/tests/test_lerp.py +14 -6
- warp/tests/test_linear_solvers.py +15 -11
- warp/tests/test_lvalue.py +14 -6
- warp/tests/test_mat.py +247 -85
- warp/tests/test_mat_lite.py +14 -6
- warp/tests/test_mat_scalar_ops.py +18 -10
- warp/tests/test_math.py +14 -6
- warp/tests/test_mlp.py +14 -6
- warp/tests/test_module_hashing.py +14 -6
- warp/tests/test_modules_lite.py +14 -6
- warp/tests/test_noise.py +14 -6
- warp/tests/test_operators.py +14 -6
- warp/tests/test_options.py +14 -6
- warp/tests/test_overwrite.py +15 -60
- warp/tests/test_print.py +14 -6
- warp/tests/test_quat.py +81 -52
- warp/tests/test_rand.py +58 -43
- warp/tests/test_reload.py +14 -6
- warp/tests/test_rounding.py +14 -6
- warp/tests/test_runlength_encode.py +14 -6
- warp/tests/test_scalar_ops.py +14 -6
- warp/tests/test_smoothstep.py +14 -6
- warp/tests/test_snippet.py +15 -0
- warp/tests/test_sparse.py +61 -12
- warp/tests/test_spatial.py +89 -6
- warp/tests/test_special_values.py +14 -6
- warp/tests/test_static.py +15 -7
- warp/tests/test_struct.py +14 -6
- warp/tests/test_tape.py +14 -6
- warp/tests/test_transient_module.py +14 -6
- warp/tests/test_triangle_closest_point.py +14 -6
- warp/tests/test_types.py +14 -6
- warp/tests/test_utils.py +98 -10
- warp/tests/test_vec.py +60 -40
- warp/tests/test_vec_lite.py +14 -6
- warp/tests/test_vec_scalar_ops.py +14 -6
- warp/tests/test_verify_fp.py +14 -6
- warp/tests/tile/__init__.py +0 -0
- warp/tests/{test_tile.py → tile/test_tile.py} +150 -57
- warp/tests/{test_tile_load.py → tile/test_tile_load.py} +15 -7
- warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +23 -12
- warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +39 -20
- warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +74 -7
- warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +14 -6
- warp/tests/{test_tile_view.py → tile/test_tile_view.py} +15 -7
- warp/tests/unittest_serial.py +15 -6
- warp/tests/unittest_suites.py +59 -65
- warp/tests/unittest_utils.py +16 -7
- warp/tests/walkthrough_debug.py +14 -6
- warp/thirdparty/unittest_parallel.py +15 -8
- warp/torch.py +14 -6
- warp/types.py +124 -664
- warp/utils.py +151 -78
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/METADATA +39 -12
- warp_lang-1.7.0.dist-info/RECORD +429 -0
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
- warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
- warp/examples/optim/example_walker.py +0 -309
- warp/native/cutlass_gemm.cpp +0 -34
- warp/native/cutlass_gemm.cu +0 -373
- warp/tests/test_matmul.py +0 -503
- warp/tests/test_matmul_lite.py +0 -403
- warp/tests/test_vbd.py +0 -378
- warp/tests/unused_test_misc.py +0 -69
- warp_lang-1.6.1.dist-info/LICENSE.md +0 -126
- warp_lang-1.6.1.dist-info/RECORD +0 -419
- {warp_lang-1.6.1.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/native/mat.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
|
@@ -198,6 +207,159 @@ struct mat_t
|
|
|
198
207
|
Type data[Rows][Cols];
|
|
199
208
|
};
|
|
200
209
|
|
|
210
|
+
template<typename Type>
|
|
211
|
+
inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_cols(vec_t<2, Type> c0, vec_t<2, Type> c1)
|
|
212
|
+
{
|
|
213
|
+
mat_t<2, 2, Type> m;
|
|
214
|
+
|
|
215
|
+
m.data[0][0] = c0[0];
|
|
216
|
+
m.data[1][0] = c0[1];
|
|
217
|
+
|
|
218
|
+
m.data[0][1] = c1[0];
|
|
219
|
+
m.data[1][1] = c1[1];
|
|
220
|
+
|
|
221
|
+
return m;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
template<typename Type>
|
|
225
|
+
inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_cols(vec_t<3, Type> c0, vec_t<3, Type> c1, vec_t<3, Type> c2)
|
|
226
|
+
{
|
|
227
|
+
mat_t<3, 3, Type> m;
|
|
228
|
+
|
|
229
|
+
m.data[0][0] = c0[0];
|
|
230
|
+
m.data[1][0] = c0[1];
|
|
231
|
+
m.data[2][0] = c0[2];
|
|
232
|
+
|
|
233
|
+
m.data[0][1] = c1[0];
|
|
234
|
+
m.data[1][1] = c1[1];
|
|
235
|
+
m.data[2][1] = c1[2];
|
|
236
|
+
|
|
237
|
+
m.data[0][2] = c2[0];
|
|
238
|
+
m.data[1][2] = c2[1];
|
|
239
|
+
m.data[2][2] = c2[2];
|
|
240
|
+
|
|
241
|
+
return m;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
template<typename Type>
|
|
245
|
+
inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_cols(vec_t<4, Type> c0, vec_t<4, Type> c1, vec_t<4, Type> c2, vec_t<4, Type> c3)
|
|
246
|
+
{
|
|
247
|
+
mat_t<4, 4, Type> m;
|
|
248
|
+
|
|
249
|
+
m.data[0][0] = c0[0];
|
|
250
|
+
m.data[1][0] = c0[1];
|
|
251
|
+
m.data[2][0] = c0[2];
|
|
252
|
+
m.data[3][0] = c0[3];
|
|
253
|
+
|
|
254
|
+
m.data[0][1] = c1[0];
|
|
255
|
+
m.data[1][1] = c1[1];
|
|
256
|
+
m.data[2][1] = c1[2];
|
|
257
|
+
m.data[3][1] = c1[3];
|
|
258
|
+
|
|
259
|
+
m.data[0][2] = c2[0];
|
|
260
|
+
m.data[1][2] = c2[1];
|
|
261
|
+
m.data[2][2] = c2[2];
|
|
262
|
+
m.data[3][2] = c2[3];
|
|
263
|
+
|
|
264
|
+
m.data[0][3] = c3[0];
|
|
265
|
+
m.data[1][3] = c3[1];
|
|
266
|
+
m.data[2][3] = c3[2];
|
|
267
|
+
m.data[3][3] = c3[3];
|
|
268
|
+
|
|
269
|
+
return m;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
273
|
+
inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_cols(const initializer_array<Cols, vec_t<Rows, Type> >& l)
|
|
274
|
+
{
|
|
275
|
+
mat_t<Rows, Cols, Type> m;
|
|
276
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
277
|
+
{
|
|
278
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
279
|
+
{
|
|
280
|
+
m.data[i][j] = l[j][i];
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
return m;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
template<typename Type>
|
|
288
|
+
inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_rows(vec_t<2, Type> r0, vec_t<2, Type> r1)
|
|
289
|
+
{
|
|
290
|
+
mat_t<2, 2, Type> m;
|
|
291
|
+
|
|
292
|
+
m.data[0][0] = r0[0];
|
|
293
|
+
m.data[0][1] = r0[1];
|
|
294
|
+
|
|
295
|
+
m.data[1][0] = r1[0];
|
|
296
|
+
m.data[1][1] = r1[1];
|
|
297
|
+
|
|
298
|
+
return m;
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
template<typename Type>
|
|
302
|
+
inline CUDA_CALLABLE mat_t<3, 3, Type> matrix_from_rows(vec_t<3, Type> r0, vec_t<3, Type> r1, vec_t<3, Type> r2)
|
|
303
|
+
{
|
|
304
|
+
mat_t<3, 3, Type> m;
|
|
305
|
+
|
|
306
|
+
m.data[0][0] = r0[0];
|
|
307
|
+
m.data[0][1] = r0[1];
|
|
308
|
+
m.data[0][2] = r0[2];
|
|
309
|
+
|
|
310
|
+
m.data[1][0] = r1[0];
|
|
311
|
+
m.data[1][1] = r1[1];
|
|
312
|
+
m.data[1][2] = r1[2];
|
|
313
|
+
|
|
314
|
+
m.data[2][0] = r2[0];
|
|
315
|
+
m.data[2][1] = r2[1];
|
|
316
|
+
m.data[2][2] = r2[2];
|
|
317
|
+
|
|
318
|
+
return m;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
template<typename Type>
|
|
322
|
+
inline CUDA_CALLABLE mat_t<4, 4, Type> matrix_from_rows(vec_t<4, Type> r0, vec_t<4, Type> r1, vec_t<4, Type> r2, vec_t<4, Type> r3)
|
|
323
|
+
{
|
|
324
|
+
mat_t<4, 4, Type> m;
|
|
325
|
+
|
|
326
|
+
m.data[0][0] = r0[0];
|
|
327
|
+
m.data[0][1] = r0[1];
|
|
328
|
+
m.data[0][2] = r0[2];
|
|
329
|
+
m.data[0][3] = r0[3];
|
|
330
|
+
|
|
331
|
+
m.data[1][0] = r1[0];
|
|
332
|
+
m.data[1][1] = r1[1];
|
|
333
|
+
m.data[1][2] = r1[2];
|
|
334
|
+
m.data[1][3] = r1[3];
|
|
335
|
+
|
|
336
|
+
m.data[2][0] = r2[0];
|
|
337
|
+
m.data[2][1] = r2[1];
|
|
338
|
+
m.data[2][2] = r2[2];
|
|
339
|
+
m.data[2][3] = r2[3];
|
|
340
|
+
|
|
341
|
+
m.data[3][0] = r3[0];
|
|
342
|
+
m.data[3][1] = r3[1];
|
|
343
|
+
m.data[3][2] = r3[2];
|
|
344
|
+
m.data[3][3] = r3[3];
|
|
345
|
+
|
|
346
|
+
return m;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
350
|
+
inline CUDA_CALLABLE mat_t<Rows, Cols, Type> matrix_from_rows(const initializer_array<Rows, vec_t<Cols, Type> >& l)
|
|
351
|
+
{
|
|
352
|
+
mat_t<Rows, Cols, Type> m;
|
|
353
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
354
|
+
{
|
|
355
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
356
|
+
{
|
|
357
|
+
m.data[i][j] = l[i][j];
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
return m;
|
|
362
|
+
}
|
|
201
363
|
|
|
202
364
|
template<unsigned Rows, typename Type>
|
|
203
365
|
inline CUDA_CALLABLE mat_t<Rows, Rows, Type> identity()
|
|
@@ -395,37 +557,241 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
|
|
|
395
557
|
|
|
396
558
|
|
|
397
559
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
398
|
-
inline CUDA_CALLABLE void
|
|
560
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
399
561
|
{
|
|
562
|
+
#ifndef NDEBUG
|
|
563
|
+
if (row < 0 || row >= Rows)
|
|
564
|
+
{
|
|
565
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
566
|
+
assert(0);
|
|
567
|
+
}
|
|
568
|
+
if (col < 0 || col >= Cols)
|
|
569
|
+
{
|
|
570
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
571
|
+
assert(0);
|
|
572
|
+
}
|
|
573
|
+
#endif
|
|
574
|
+
|
|
400
575
|
m.data[row][col] += value;
|
|
401
576
|
}
|
|
402
577
|
|
|
403
578
|
|
|
404
579
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
405
|
-
inline CUDA_CALLABLE void
|
|
580
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
581
|
+
{
|
|
582
|
+
#ifndef NDEBUG
|
|
583
|
+
if (row < 0 || row >= Rows)
|
|
584
|
+
{
|
|
585
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
586
|
+
assert(0);
|
|
587
|
+
}
|
|
588
|
+
#endif
|
|
589
|
+
|
|
590
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
591
|
+
{
|
|
592
|
+
m.data[row][i] += value[i];
|
|
593
|
+
}
|
|
594
|
+
}
|
|
595
|
+
|
|
596
|
+
|
|
597
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
598
|
+
inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
406
599
|
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
407
600
|
{
|
|
601
|
+
#ifndef NDEBUG
|
|
602
|
+
if (row < 0 || row >= Rows)
|
|
603
|
+
{
|
|
604
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
605
|
+
assert(0);
|
|
606
|
+
}
|
|
607
|
+
if (col < 0 || col >= Cols)
|
|
608
|
+
{
|
|
609
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
610
|
+
assert(0);
|
|
611
|
+
}
|
|
612
|
+
#endif
|
|
613
|
+
|
|
408
614
|
adj_value += adj_m.data[row][col];
|
|
409
615
|
}
|
|
410
616
|
|
|
411
617
|
|
|
412
618
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
413
|
-
inline CUDA_CALLABLE void
|
|
619
|
+
inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
620
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
|
|
621
|
+
{
|
|
622
|
+
#ifndef NDEBUG
|
|
623
|
+
if (row < 0 || row >= Rows)
|
|
624
|
+
{
|
|
625
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
626
|
+
assert(0);
|
|
627
|
+
}
|
|
628
|
+
#endif
|
|
629
|
+
|
|
630
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
631
|
+
{
|
|
632
|
+
adj_value[i] += adj_m.data[row][i];
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
638
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
414
639
|
{
|
|
640
|
+
#ifndef NDEBUG
|
|
641
|
+
if (row < 0 || row >= Rows)
|
|
642
|
+
{
|
|
643
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
644
|
+
assert(0);
|
|
645
|
+
}
|
|
646
|
+
if (col < 0 || col >= Cols)
|
|
647
|
+
{
|
|
648
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
649
|
+
assert(0);
|
|
650
|
+
}
|
|
651
|
+
#endif
|
|
652
|
+
|
|
415
653
|
m.data[row][col] -= value;
|
|
416
654
|
}
|
|
417
655
|
|
|
418
656
|
|
|
419
657
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
420
|
-
inline CUDA_CALLABLE void
|
|
658
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
659
|
+
{
|
|
660
|
+
#ifndef NDEBUG
|
|
661
|
+
if (row < 0 || row >= Rows)
|
|
662
|
+
{
|
|
663
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
664
|
+
assert(0);
|
|
665
|
+
}
|
|
666
|
+
#endif
|
|
667
|
+
|
|
668
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
669
|
+
{
|
|
670
|
+
m.data[row][i] -= value[i];
|
|
671
|
+
}
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
|
|
675
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
676
|
+
inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
421
677
|
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
422
678
|
{
|
|
679
|
+
#ifndef NDEBUG
|
|
680
|
+
if (row < 0 || row >= Rows)
|
|
681
|
+
{
|
|
682
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
683
|
+
assert(0);
|
|
684
|
+
}
|
|
685
|
+
if (col < 0 || col >= Cols)
|
|
686
|
+
{
|
|
687
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
688
|
+
assert(0);
|
|
689
|
+
}
|
|
690
|
+
#endif
|
|
691
|
+
|
|
423
692
|
adj_value -= adj_m.data[row][col];
|
|
424
693
|
}
|
|
425
694
|
|
|
426
695
|
|
|
427
696
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
428
|
-
inline CUDA_CALLABLE
|
|
697
|
+
inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
698
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
|
|
699
|
+
{
|
|
700
|
+
#ifndef NDEBUG
|
|
701
|
+
if (row < 0 || row >= Rows)
|
|
702
|
+
{
|
|
703
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
704
|
+
assert(0);
|
|
705
|
+
}
|
|
706
|
+
#endif
|
|
707
|
+
|
|
708
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
709
|
+
{
|
|
710
|
+
adj_value[i] -= adj_m.data[row][i];
|
|
711
|
+
}
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
716
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
717
|
+
{
|
|
718
|
+
#ifndef NDEBUG
|
|
719
|
+
if (row < 0 || row >= Rows)
|
|
720
|
+
{
|
|
721
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
722
|
+
assert(0);
|
|
723
|
+
}
|
|
724
|
+
if (col < 0 || col >= Cols)
|
|
725
|
+
{
|
|
726
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
727
|
+
assert(0);
|
|
728
|
+
}
|
|
729
|
+
#endif
|
|
730
|
+
|
|
731
|
+
m.data[row][col] = value;
|
|
732
|
+
}
|
|
733
|
+
|
|
734
|
+
|
|
735
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
736
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
737
|
+
{
|
|
738
|
+
#ifndef NDEBUG
|
|
739
|
+
if (row < 0 || row >= Rows)
|
|
740
|
+
{
|
|
741
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
742
|
+
assert(0);
|
|
743
|
+
}
|
|
744
|
+
#endif
|
|
745
|
+
|
|
746
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
747
|
+
{
|
|
748
|
+
m.data[row][i] = value[i];
|
|
749
|
+
}
|
|
750
|
+
}
|
|
751
|
+
|
|
752
|
+
|
|
753
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
754
|
+
inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
755
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
|
|
756
|
+
{
|
|
757
|
+
#ifndef NDEBUG
|
|
758
|
+
if (row < 0 || row >= Rows)
|
|
759
|
+
{
|
|
760
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
761
|
+
assert(0);
|
|
762
|
+
}
|
|
763
|
+
if (col < 0 || col >= Cols)
|
|
764
|
+
{
|
|
765
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
766
|
+
assert(0);
|
|
767
|
+
}
|
|
768
|
+
#endif
|
|
769
|
+
|
|
770
|
+
adj_value += adj_m.data[row][col];
|
|
771
|
+
}
|
|
772
|
+
|
|
773
|
+
|
|
774
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
775
|
+
inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
776
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
|
|
777
|
+
{
|
|
778
|
+
#ifndef NDEBUG
|
|
779
|
+
if (row < 0 || row >= Rows)
|
|
780
|
+
{
|
|
781
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
782
|
+
assert(0);
|
|
783
|
+
}
|
|
784
|
+
#endif
|
|
785
|
+
|
|
786
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
787
|
+
{
|
|
788
|
+
adj_value[i] += adj_m.data[row][i];
|
|
789
|
+
}
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
|
|
793
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
794
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
429
795
|
{
|
|
430
796
|
#ifndef NDEBUG
|
|
431
797
|
if (row < 0 || row >= Rows)
|
|
@@ -447,7 +813,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
|
|
|
447
813
|
|
|
448
814
|
|
|
449
815
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
450
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
816
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
451
817
|
{
|
|
452
818
|
#ifndef NDEBUG
|
|
453
819
|
if (row < 0 || row >= Rows)
|
|
@@ -467,7 +833,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int
|
|
|
467
833
|
|
|
468
834
|
|
|
469
835
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
470
|
-
inline CUDA_CALLABLE void
|
|
836
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
471
837
|
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
472
838
|
{
|
|
473
839
|
#ifndef NDEBUG
|
|
@@ -496,7 +862,7 @@ inline CUDA_CALLABLE void adj_assign(mat_t<Rows,Cols,Type>& m, int row, int col,
|
|
|
496
862
|
|
|
497
863
|
|
|
498
864
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
499
|
-
inline CUDA_CALLABLE void
|
|
865
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
500
866
|
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
501
867
|
{
|
|
502
868
|
#ifndef NDEBUG
|
|
@@ -701,7 +1067,7 @@ inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a
|
|
|
701
1067
|
mat_t<Rows,ColsOut,Type> t(0);
|
|
702
1068
|
for (unsigned i=0; i < Rows; ++i)
|
|
703
1069
|
{
|
|
704
|
-
for (unsigned j=0; j < ColsOut; ++j)
|
|
1070
|
+
for (unsigned j=0; j < ColsOut; ++j)
|
|
705
1071
|
{
|
|
706
1072
|
Type sum(0.0);
|
|
707
1073
|
|
|
@@ -1564,6 +1930,128 @@ inline CUDA_CALLABLE void adj_mat_t(const vec_t<4,Type> &cmps0, const vec_t<4,Ty
|
|
|
1564
1930
|
}
|
|
1565
1931
|
}
|
|
1566
1932
|
|
|
1933
|
+
template<typename Type>
|
|
1934
|
+
inline CUDA_CALLABLE void adj_matrix_from_cols(
|
|
1935
|
+
const vec_t<2, Type>& c0, const vec_t<2, Type>& c1,
|
|
1936
|
+
vec_t<2, Type>& adj_c0, vec_t<2, Type>& adj_c1,
|
|
1937
|
+
const mat_t<2, 2, Type>& adj_ret
|
|
1938
|
+
)
|
|
1939
|
+
{
|
|
1940
|
+
for (unsigned i=0; i < 2; ++i)
|
|
1941
|
+
{
|
|
1942
|
+
adj_c0[i] += adj_ret.data[i][0];
|
|
1943
|
+
adj_c1[i] += adj_ret.data[i][1];
|
|
1944
|
+
}
|
|
1945
|
+
}
|
|
1946
|
+
|
|
1947
|
+
template<typename Type>
|
|
1948
|
+
inline CUDA_CALLABLE void adj_matrix_from_cols(
|
|
1949
|
+
const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
|
|
1950
|
+
vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
|
|
1951
|
+
const mat_t<3, 3, Type>& adj_ret
|
|
1952
|
+
)
|
|
1953
|
+
{
|
|
1954
|
+
for (unsigned i=0; i < 3; ++i)
|
|
1955
|
+
{
|
|
1956
|
+
adj_c0[i] += adj_ret.data[i][0];
|
|
1957
|
+
adj_c1[i] += adj_ret.data[i][1];
|
|
1958
|
+
adj_c2[i] += adj_ret.data[i][2];
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
|
|
1962
|
+
template<typename Type>
|
|
1963
|
+
inline CUDA_CALLABLE void adj_matrix_from_cols(
|
|
1964
|
+
const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
|
|
1965
|
+
vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
|
|
1966
|
+
const mat_t<4, 4, Type>& adj_ret
|
|
1967
|
+
)
|
|
1968
|
+
{
|
|
1969
|
+
for (unsigned i=0; i < 4; ++i)
|
|
1970
|
+
{
|
|
1971
|
+
adj_c0[i] += adj_ret.data[i][0];
|
|
1972
|
+
adj_c1[i] += adj_ret.data[i][1];
|
|
1973
|
+
adj_c2[i] += adj_ret.data[i][2];
|
|
1974
|
+
adj_c3[i] += adj_ret.data[i][3];
|
|
1975
|
+
}
|
|
1976
|
+
}
|
|
1977
|
+
|
|
1978
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1979
|
+
inline CUDA_CALLABLE void adj_matrix_from_cols(
|
|
1980
|
+
const initializer_array<Cols, vec_t<Rows, Type> >& l,
|
|
1981
|
+
const initializer_array<Cols, vec_t<Rows, Type>* >& adj_l,
|
|
1982
|
+
const mat_t<Rows, Cols, Type>& adj_ret
|
|
1983
|
+
)
|
|
1984
|
+
{
|
|
1985
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
1986
|
+
{
|
|
1987
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
1988
|
+
{
|
|
1989
|
+
(*adj_l[j])[i] += adj_ret.data[i][j];
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
template<typename Type>
|
|
1995
|
+
inline CUDA_CALLABLE void adj_matrix_from_rows(
|
|
1996
|
+
const vec_t<2, Type>& r0, const vec_t<2, Type>& r1,
|
|
1997
|
+
vec_t<2, Type>& adj_r0, vec_t<2, Type>& adj_r1,
|
|
1998
|
+
const mat_t<2, 2, Type>& adj_ret
|
|
1999
|
+
)
|
|
2000
|
+
{
|
|
2001
|
+
for (unsigned j=0; j < 2; ++j)
|
|
2002
|
+
{
|
|
2003
|
+
adj_r0[j] += adj_ret.data[0][j];
|
|
2004
|
+
adj_r1[j] += adj_ret.data[1][j];
|
|
2005
|
+
}
|
|
2006
|
+
}
|
|
2007
|
+
|
|
2008
|
+
template<typename Type>
|
|
2009
|
+
inline CUDA_CALLABLE void adj_matrix_from_rows(
|
|
2010
|
+
const vec_t<3, Type>& c0, const vec_t<3, Type>& c1, const vec_t<3, Type>& c2,
|
|
2011
|
+
vec_t<3, Type>& adj_c0, vec_t<3, Type>& adj_c1, vec_t<3, Type>& adj_c2,
|
|
2012
|
+
const mat_t<3, 3, Type>& adj_ret
|
|
2013
|
+
)
|
|
2014
|
+
{
|
|
2015
|
+
for (unsigned j=0; j < 3; ++j)
|
|
2016
|
+
{
|
|
2017
|
+
adj_c0[j] += adj_ret.data[0][j];
|
|
2018
|
+
adj_c1[j] += adj_ret.data[1][j];
|
|
2019
|
+
adj_c2[j] += adj_ret.data[2][j];
|
|
2020
|
+
}
|
|
2021
|
+
}
|
|
2022
|
+
|
|
2023
|
+
template<typename Type>
|
|
2024
|
+
inline CUDA_CALLABLE void adj_matrix_from_rows(
|
|
2025
|
+
const vec_t<4, Type>& c0, const vec_t<4, Type>& c1, const vec_t<4, Type>& c2, const vec_t<4, Type>& c3,
|
|
2026
|
+
vec_t<4, Type>& adj_c0, vec_t<4, Type>& adj_c1, vec_t<4, Type>& adj_c2, vec_t<4, Type>& adj_c3,
|
|
2027
|
+
const mat_t<4, 4, Type>& adj_ret
|
|
2028
|
+
)
|
|
2029
|
+
{
|
|
2030
|
+
for (unsigned j=0; j < 4; ++j)
|
|
2031
|
+
{
|
|
2032
|
+
adj_c0[j] += adj_ret.data[0][j];
|
|
2033
|
+
adj_c1[j] += adj_ret.data[1][j];
|
|
2034
|
+
adj_c2[j] += adj_ret.data[2][j];
|
|
2035
|
+
adj_c3[j] += adj_ret.data[3][j];
|
|
2036
|
+
}
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2040
|
+
inline CUDA_CALLABLE void adj_matrix_from_rows(
|
|
2041
|
+
const initializer_array<Rows, vec_t<Cols, Type> >& l,
|
|
2042
|
+
const initializer_array<Rows, vec_t<Cols, Type>* >& adj_l,
|
|
2043
|
+
const mat_t<Rows, Cols, Type>& adj_ret
|
|
2044
|
+
)
|
|
2045
|
+
{
|
|
2046
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
2047
|
+
{
|
|
2048
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
2049
|
+
{
|
|
2050
|
+
(*adj_l[i])[j] += adj_ret.data[i][j];
|
|
2051
|
+
}
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
|
|
1567
2055
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1568
2056
|
CUDA_CALLABLE inline mat_t<Rows, Cols, Type> lerp(const mat_t<Rows, Cols, Type>& a, const mat_t<Rows, Cols, Type>& b, Type t)
|
|
1569
2057
|
{
|
|
@@ -1704,4 +2192,29 @@ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Col
|
|
|
1704
2192
|
{
|
|
1705
2193
|
}
|
|
1706
2194
|
|
|
2195
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2196
|
+
inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, const Type& tolerance)
|
|
2197
|
+
{
|
|
2198
|
+
Type diff(0);
|
|
2199
|
+
for (unsigned i = 0; i < Rows; ++i)
|
|
2200
|
+
{
|
|
2201
|
+
for (unsigned j = 0; j < Cols; ++j)
|
|
2202
|
+
{
|
|
2203
|
+
diff = max(diff, abs(actual.data[i][j] - expected.data[i][j]));
|
|
2204
|
+
}
|
|
2205
|
+
}
|
|
2206
|
+
if (diff > tolerance)
|
|
2207
|
+
{
|
|
2208
|
+
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
2209
|
+
printf("\t Expected: "); print(expected);
|
|
2210
|
+
printf("\t Actual: "); print(actual);
|
|
2211
|
+
}
|
|
2212
|
+
}
|
|
2213
|
+
|
|
2214
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2215
|
+
inline CUDA_CALLABLE void adj_expect_near(const mat_t<Rows,Cols,Type>& actual, const mat_t<Rows,Cols,Type>& expected, Type tolerance, mat_t<Rows,Cols,Type>& adj_actual, mat_t<Rows,Cols,Type>& adj_expected, Type adj_tolerance)
|
|
2216
|
+
{
|
|
2217
|
+
// nop
|
|
2218
|
+
}
|
|
2219
|
+
|
|
1707
2220
|
} // namespace wp
|
warp/native/mathdx.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include "builtin.h"
|
warp/native/matnn.h
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#pragma once
|
warp/native/mesh.cpp
CHANGED
|
@@ -1,9 +1,18 @@
|
|
|
1
|
-
|
|
2
|
-
* NVIDIA CORPORATION
|
|
3
|
-
*
|
|
4
|
-
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
1
|
+
/*
|
|
2
|
+
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
3
|
+
* SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
*
|
|
5
|
+
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
6
|
+
* you may not use this file except in compliance with the License.
|
|
7
|
+
* You may obtain a copy of the License at
|
|
8
|
+
*
|
|
9
|
+
* http://www.apache.org/licenses/LICENSE-2.0
|
|
10
|
+
*
|
|
11
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
12
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
13
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
14
|
+
* See the License for the specific language governing permissions and
|
|
15
|
+
* limitations under the License.
|
|
7
16
|
*/
|
|
8
17
|
|
|
9
18
|
#include "mesh.h"
|