warp-lang 1.9.1__py3-none-manylinux_2_34_aarch64.whl → 1.10.0rc2__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +301 -287
- warp/__init__.pyi +794 -305
- warp/_src/__init__.py +14 -0
- warp/_src/autograd.py +1075 -0
- warp/_src/build.py +618 -0
- warp/_src/build_dll.py +640 -0
- warp/{builtins.py → _src/builtins.py} +1382 -377
- warp/_src/codegen.py +4359 -0
- warp/{config.py → _src/config.py} +178 -169
- warp/_src/constants.py +57 -0
- warp/_src/context.py +8294 -0
- warp/_src/dlpack.py +462 -0
- warp/_src/fabric.py +355 -0
- warp/_src/fem/__init__.py +14 -0
- warp/_src/fem/adaptivity.py +508 -0
- warp/_src/fem/cache.py +687 -0
- warp/_src/fem/dirichlet.py +188 -0
- warp/{fem → _src/fem}/domain.py +40 -30
- warp/_src/fem/field/__init__.py +131 -0
- warp/_src/fem/field/field.py +701 -0
- warp/{fem → _src/fem}/field/nodal_field.py +30 -15
- warp/{fem → _src/fem}/field/restriction.py +1 -1
- warp/{fem → _src/fem}/field/virtual.py +53 -27
- warp/_src/fem/geometry/__init__.py +32 -0
- warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
- warp/_src/fem/geometry/closest_point.py +97 -0
- warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
- warp/{fem → _src/fem}/geometry/element.py +32 -10
- warp/{fem → _src/fem}/geometry/geometry.py +48 -20
- warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
- warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
- warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
- warp/{fem → _src/fem}/geometry/partition.py +121 -63
- warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
- warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
- warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
- warp/{fem → _src/fem}/integrate.py +164 -158
- warp/_src/fem/linalg.py +383 -0
- warp/_src/fem/operator.py +396 -0
- warp/_src/fem/polynomial.py +229 -0
- warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
- warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
- warp/_src/fem/space/__init__.py +248 -0
- warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
- warp/_src/fem/space/basis_space.py +679 -0
- warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
- warp/{fem → _src/fem}/space/function_space.py +14 -13
- warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
- warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
- warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
- warp/{fem → _src/fem}/space/partition.py +117 -60
- warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
- warp/{fem → _src/fem}/space/restriction.py +66 -33
- warp/_src/fem/space/shape/__init__.py +152 -0
- warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
- warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
- warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
- warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
- warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
- warp/_src/fem/space/topology.py +459 -0
- warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
- warp/_src/fem/types.py +112 -0
- warp/_src/fem/utils.py +486 -0
- warp/_src/jax.py +186 -0
- warp/_src/jax_experimental/__init__.py +14 -0
- warp/_src/jax_experimental/custom_call.py +387 -0
- warp/_src/jax_experimental/ffi.py +1284 -0
- warp/_src/jax_experimental/xla_ffi.py +656 -0
- warp/_src/marching_cubes.py +708 -0
- warp/_src/math.py +414 -0
- warp/_src/optim/__init__.py +14 -0
- warp/_src/optim/adam.py +163 -0
- warp/_src/optim/linear.py +1606 -0
- warp/_src/optim/sgd.py +112 -0
- warp/_src/paddle.py +406 -0
- warp/_src/render/__init__.py +14 -0
- warp/_src/render/imgui_manager.py +289 -0
- warp/_src/render/render_opengl.py +3636 -0
- warp/_src/render/render_usd.py +937 -0
- warp/_src/render/utils.py +160 -0
- warp/_src/sparse.py +2716 -0
- warp/_src/tape.py +1206 -0
- warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
- warp/_src/torch.py +391 -0
- warp/_src/types.py +5870 -0
- warp/_src/utils.py +1693 -0
- warp/autograd.py +12 -1054
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +8 -588
- warp/build_dll.py +6 -721
- warp/codegen.py +6 -4251
- warp/constants.py +6 -39
- warp/context.py +12 -8062
- warp/dlpack.py +6 -444
- warp/examples/distributed/example_jacobi_mpi.py +4 -5
- warp/examples/fem/example_adaptive_grid.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +8 -8
- warp/examples/fem/example_diffusion.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_mixed_elasticity.py +2 -2
- warp/examples/fem/example_navier_stokes.py +1 -1
- warp/examples/fem/example_nonconforming_contact.py +7 -7
- warp/examples/fem/example_stokes.py +1 -1
- warp/examples/fem/example_stokes_transfer.py +1 -1
- warp/examples/fem/utils.py +2 -2
- warp/examples/interop/example_jax_callable.py +1 -1
- warp/examples/interop/example_jax_ffi_callback.py +1 -1
- warp/examples/interop/example_jax_kernel.py +1 -1
- warp/examples/tile/example_tile_mcgp.py +191 -0
- warp/fabric.py +6 -337
- warp/fem/__init__.py +159 -97
- warp/fem/adaptivity.py +7 -489
- warp/fem/cache.py +9 -648
- warp/fem/dirichlet.py +6 -184
- warp/fem/field/__init__.py +8 -109
- warp/fem/field/field.py +7 -652
- warp/fem/geometry/__init__.py +7 -18
- warp/fem/geometry/closest_point.py +11 -77
- warp/fem/linalg.py +18 -366
- warp/fem/operator.py +11 -369
- warp/fem/polynomial.py +9 -209
- warp/fem/space/__init__.py +5 -211
- warp/fem/space/basis_space.py +6 -662
- warp/fem/space/shape/__init__.py +41 -118
- warp/fem/space/topology.py +6 -437
- warp/fem/types.py +6 -81
- warp/fem/utils.py +11 -444
- warp/jax.py +8 -165
- warp/jax_experimental/__init__.py +14 -1
- warp/jax_experimental/custom_call.py +8 -365
- warp/jax_experimental/ffi.py +17 -873
- warp/jax_experimental/xla_ffi.py +5 -605
- warp/marching_cubes.py +5 -689
- warp/math.py +16 -393
- warp/native/array.h +385 -37
- warp/native/builtin.h +314 -37
- warp/native/bvh.cpp +43 -9
- warp/native/bvh.cu +62 -27
- warp/native/bvh.h +310 -309
- warp/native/clang/clang.cpp +102 -97
- warp/native/coloring.cpp +0 -1
- warp/native/crt.h +208 -0
- warp/native/exports.h +156 -0
- warp/native/hashgrid.cu +2 -0
- warp/native/intersect.h +24 -1
- warp/native/intersect_tri.h +44 -35
- warp/native/mat.h +1456 -276
- warp/native/mesh.cpp +4 -4
- warp/native/mesh.cu +4 -2
- warp/native/mesh.h +176 -61
- warp/native/quat.h +0 -52
- warp/native/scan.cu +2 -0
- warp/native/sparse.cu +7 -3
- warp/native/spatial.h +12 -0
- warp/native/tile.h +681 -89
- warp/native/tile_radix_sort.h +1 -1
- warp/native/tile_reduce.h +394 -46
- warp/native/tile_scan.h +4 -4
- warp/native/vec.h +469 -0
- warp/native/version.h +23 -0
- warp/native/volume.cpp +1 -1
- warp/native/volume.cu +1 -0
- warp/native/volume.h +1 -1
- warp/native/volume_builder.cu +2 -0
- warp/native/warp.cpp +57 -29
- warp/native/warp.cu +253 -171
- warp/native/warp.h +11 -8
- warp/optim/__init__.py +6 -3
- warp/optim/adam.py +6 -145
- warp/optim/linear.py +14 -1585
- warp/optim/sgd.py +6 -94
- warp/paddle.py +6 -388
- warp/render/__init__.py +8 -4
- warp/render/imgui_manager.py +7 -267
- warp/render/render_opengl.py +6 -3618
- warp/render/render_usd.py +6 -919
- warp/render/utils.py +6 -142
- warp/sparse.py +37 -2563
- warp/tape.py +6 -1188
- warp/tests/__main__.py +1 -1
- warp/tests/cuda/test_async.py +4 -4
- warp/tests/cuda/test_conditional_captures.py +1 -1
- warp/tests/cuda/test_multigpu.py +1 -1
- warp/tests/cuda/test_streams.py +58 -1
- warp/tests/geometry/test_bvh.py +157 -22
- warp/tests/geometry/test_marching_cubes.py +0 -1
- warp/tests/geometry/test_mesh.py +5 -3
- warp/tests/geometry/test_mesh_query_aabb.py +5 -12
- warp/tests/geometry/test_mesh_query_point.py +5 -2
- warp/tests/geometry/test_mesh_query_ray.py +15 -3
- warp/tests/geometry/test_volume_write.py +5 -5
- warp/tests/interop/test_dlpack.py +14 -14
- warp/tests/interop/test_jax.py +772 -49
- warp/tests/interop/test_paddle.py +1 -1
- warp/tests/test_adam.py +0 -1
- warp/tests/test_arithmetic.py +9 -9
- warp/tests/test_array.py +527 -100
- warp/tests/test_array_reduce.py +3 -3
- warp/tests/test_atomic.py +12 -8
- warp/tests/test_atomic_bitwise.py +209 -0
- warp/tests/test_atomic_cas.py +4 -4
- warp/tests/test_bool.py +2 -2
- warp/tests/test_builtins_resolution.py +5 -571
- warp/tests/test_codegen.py +33 -14
- warp/tests/test_conditional.py +1 -1
- warp/tests/test_context.py +6 -6
- warp/tests/test_copy.py +242 -161
- warp/tests/test_ctypes.py +3 -3
- warp/tests/test_devices.py +24 -2
- warp/tests/test_examples.py +16 -84
- warp/tests/test_fabricarray.py +35 -35
- warp/tests/test_fast_math.py +0 -2
- warp/tests/test_fem.py +56 -10
- warp/tests/test_fixedarray.py +3 -3
- warp/tests/test_func.py +8 -5
- warp/tests/test_generics.py +1 -1
- warp/tests/test_indexedarray.py +24 -24
- warp/tests/test_intersect.py +39 -9
- warp/tests/test_large.py +1 -1
- warp/tests/test_lerp.py +3 -1
- warp/tests/test_linear_solvers.py +1 -1
- warp/tests/test_map.py +35 -4
- warp/tests/test_mat.py +52 -62
- warp/tests/test_mat_constructors.py +4 -5
- warp/tests/test_mat_lite.py +1 -1
- warp/tests/test_mat_scalar_ops.py +121 -121
- warp/tests/test_math.py +34 -0
- warp/tests/test_module_aot.py +4 -4
- warp/tests/test_modules_lite.py +28 -2
- warp/tests/test_print.py +11 -11
- warp/tests/test_quat.py +93 -58
- warp/tests/test_runlength_encode.py +1 -1
- warp/tests/test_scalar_ops.py +38 -10
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +126 -15
- warp/tests/test_spatial.py +105 -87
- warp/tests/test_special_values.py +6 -6
- warp/tests/test_static.py +7 -7
- warp/tests/test_struct.py +13 -2
- warp/tests/test_triangle_closest_point.py +48 -1
- warp/tests/test_types.py +27 -15
- warp/tests/test_utils.py +52 -52
- warp/tests/test_vec.py +29 -29
- warp/tests/test_vec_constructors.py +5 -5
- warp/tests/test_vec_scalar_ops.py +97 -97
- warp/tests/test_version.py +75 -0
- warp/tests/tile/test_tile.py +178 -0
- warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
- warp/tests/tile/test_tile_cholesky.py +7 -4
- warp/tests/tile/test_tile_load.py +26 -2
- warp/tests/tile/test_tile_mathdx.py +3 -3
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +2 -4
- warp/tests/tile/test_tile_reduce.py +214 -13
- warp/tests/unittest_suites.py +6 -14
- warp/tests/unittest_utils.py +10 -9
- warp/tests/walkthrough_debug.py +3 -1
- warp/torch.py +6 -373
- warp/types.py +29 -5764
- warp/utils.py +10 -1659
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +46 -99
- warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
- warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
- warp/examples/assets/cartpole.urdf +0 -110
- warp/examples/assets/crazyflie.usd +0 -0
- warp/examples/assets/nv_ant.xml +0 -92
- warp/examples/assets/nv_humanoid.xml +0 -183
- warp/examples/assets/quadruped.urdf +0 -268
- warp/examples/optim/example_bounce.py +0 -266
- warp/examples/optim/example_cloth_throw.py +0 -228
- warp/examples/optim/example_drone.py +0 -870
- warp/examples/optim/example_inverse_kinematics.py +0 -182
- warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
- warp/examples/optim/example_softbody_properties.py +0 -400
- warp/examples/optim/example_spring_cage.py +0 -245
- warp/examples/optim/example_trajectory.py +0 -227
- warp/examples/sim/example_cartpole.py +0 -143
- warp/examples/sim/example_cloth.py +0 -225
- warp/examples/sim/example_cloth_self_contact.py +0 -316
- warp/examples/sim/example_granular.py +0 -130
- warp/examples/sim/example_granular_collision_sdf.py +0 -202
- warp/examples/sim/example_jacobian_ik.py +0 -244
- warp/examples/sim/example_particle_chain.py +0 -124
- warp/examples/sim/example_quadruped.py +0 -203
- warp/examples/sim/example_rigid_chain.py +0 -203
- warp/examples/sim/example_rigid_contact.py +0 -195
- warp/examples/sim/example_rigid_force.py +0 -133
- warp/examples/sim/example_rigid_gyroscopic.py +0 -115
- warp/examples/sim/example_rigid_soft_contact.py +0 -140
- warp/examples/sim/example_soft_body.py +0 -196
- warp/examples/tile/example_tile_walker.py +0 -327
- warp/sim/__init__.py +0 -74
- warp/sim/articulation.py +0 -793
- warp/sim/collide.py +0 -2570
- warp/sim/graph_coloring.py +0 -307
- warp/sim/import_mjcf.py +0 -791
- warp/sim/import_snu.py +0 -227
- warp/sim/import_urdf.py +0 -579
- warp/sim/import_usd.py +0 -898
- warp/sim/inertia.py +0 -357
- warp/sim/integrator.py +0 -245
- warp/sim/integrator_euler.py +0 -2000
- warp/sim/integrator_featherstone.py +0 -2101
- warp/sim/integrator_vbd.py +0 -2487
- warp/sim/integrator_xpbd.py +0 -3295
- warp/sim/model.py +0 -4821
- warp/sim/particles.py +0 -121
- warp/sim/render.py +0 -431
- warp/sim/utils.py +0 -431
- warp/tests/sim/disabled_kinematics.py +0 -244
- warp/tests/sim/test_cloth.py +0 -863
- warp/tests/sim/test_collision.py +0 -743
- warp/tests/sim/test_coloring.py +0 -347
- warp/tests/sim/test_inertia.py +0 -161
- warp/tests/sim/test_model.py +0 -226
- warp/tests/sim/test_sim_grad.py +0 -287
- warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
- warp/tests/sim/test_sim_kinematics.py +0 -98
- warp/thirdparty/__init__.py +0 -0
- warp_lang-1.9.1.dist-info/RECORD +0 -456
- /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
- /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
- /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/native/mat.h
CHANGED
|
@@ -149,10 +149,6 @@ struct mat_t
|
|
|
149
149
|
data[3][3] = m33;
|
|
150
150
|
}
|
|
151
151
|
|
|
152
|
-
// implemented in quat.h
|
|
153
|
-
inline CUDA_CALLABLE mat_t(const vec_t<3,Type>& pos, const quat_t<Type>& rot, const vec_t<3,Type>& scale);
|
|
154
|
-
|
|
155
|
-
|
|
156
152
|
inline CUDA_CALLABLE mat_t(const initializer_array<Rows * Cols, Type> &l)
|
|
157
153
|
{
|
|
158
154
|
for (unsigned i=0; i < Rows; ++i)
|
|
@@ -207,6 +203,17 @@ struct mat_t
|
|
|
207
203
|
Type data[Rows < 1 ? 1 : Rows][Cols < 1 ? 1 : Cols];
|
|
208
204
|
};
|
|
209
205
|
|
|
206
|
+
// Type trait to detect if a type is a mat_t
|
|
207
|
+
template<typename T>
|
|
208
|
+
struct is_matrix {
|
|
209
|
+
static constexpr bool value = false;
|
|
210
|
+
};
|
|
211
|
+
|
|
212
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
213
|
+
struct is_matrix<mat_t<Rows, Cols, Type>> {
|
|
214
|
+
static constexpr bool value = true;
|
|
215
|
+
};
|
|
216
|
+
|
|
210
217
|
template<typename Type>
|
|
211
218
|
inline CUDA_CALLABLE mat_t<2, 2, Type> matrix_from_cols(vec_t<2, Type> c0, vec_t<2, Type> c1)
|
|
212
219
|
{
|
|
@@ -437,6 +444,42 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_add(mat_t<Rows,Cols,Type> * ad
|
|
|
437
444
|
return m;
|
|
438
445
|
}
|
|
439
446
|
|
|
447
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
448
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_and(mat_t<Rows,Cols,Type> * addr, mat_t<Rows,Cols,Type> value)
|
|
449
|
+
{
|
|
450
|
+
mat_t<Rows,Cols,Type> m;
|
|
451
|
+
|
|
452
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
453
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
454
|
+
m.data[i][j] = atomic_and(&addr->data[i][j], value.data[i][j]);
|
|
455
|
+
|
|
456
|
+
return m;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
460
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_or(mat_t<Rows,Cols,Type> * addr, mat_t<Rows,Cols,Type> value)
|
|
461
|
+
{
|
|
462
|
+
mat_t<Rows,Cols,Type> m;
|
|
463
|
+
|
|
464
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
465
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
466
|
+
m.data[i][j] = atomic_or(&addr->data[i][j], value.data[i][j]);
|
|
467
|
+
|
|
468
|
+
return m;
|
|
469
|
+
}
|
|
470
|
+
|
|
471
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
472
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_xor(mat_t<Rows,Cols,Type> * addr, mat_t<Rows,Cols,Type> value)
|
|
473
|
+
{
|
|
474
|
+
mat_t<Rows,Cols,Type> m;
|
|
475
|
+
|
|
476
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
477
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
478
|
+
m.data[i][j] = atomic_xor(&addr->data[i][j], value.data[i][j]);
|
|
479
|
+
|
|
480
|
+
return m;
|
|
481
|
+
}
|
|
482
|
+
|
|
440
483
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
441
484
|
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_min(mat_t<Rows,Cols,Type> * addr, mat_t<Rows,Cols,Type> value)
|
|
442
485
|
{
|
|
@@ -1619,7 +1662,7 @@ inline CUDA_CALLABLE void adj_sub_inplace(
|
|
|
1619
1662
|
|
|
1620
1663
|
|
|
1621
1664
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1622
|
-
inline CUDA_CALLABLE void
|
|
1665
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
1623
1666
|
{
|
|
1624
1667
|
#ifndef NDEBUG
|
|
1625
1668
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1643,12 +1686,12 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int
|
|
|
1643
1686
|
col += Cols;
|
|
1644
1687
|
}
|
|
1645
1688
|
|
|
1646
|
-
m.data[row][col]
|
|
1689
|
+
m.data[row][col] &= value;
|
|
1647
1690
|
}
|
|
1648
1691
|
|
|
1649
1692
|
|
|
1650
1693
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1651
|
-
inline CUDA_CALLABLE void
|
|
1694
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
1652
1695
|
{
|
|
1653
1696
|
#ifndef NDEBUG
|
|
1654
1697
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1665,13 +1708,13 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_
|
|
|
1665
1708
|
|
|
1666
1709
|
for(unsigned i=0; i < Cols; ++i)
|
|
1667
1710
|
{
|
|
1668
|
-
m.data[row][i]
|
|
1711
|
+
m.data[row][i] &= value[i];
|
|
1669
1712
|
}
|
|
1670
1713
|
}
|
|
1671
1714
|
|
|
1672
1715
|
|
|
1673
1716
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1674
|
-
inline CUDA_CALLABLE void
|
|
1717
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1675
1718
|
{
|
|
1676
1719
|
static_assert(
|
|
1677
1720
|
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
@@ -1694,7 +1737,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1694
1737
|
{
|
|
1695
1738
|
for (int j = 0; j < Cols; ++j)
|
|
1696
1739
|
{
|
|
1697
|
-
m.data[i][j]
|
|
1740
|
+
m.data[i][j] &= value.data[ii][j];
|
|
1698
1741
|
}
|
|
1699
1742
|
|
|
1700
1743
|
++ii;
|
|
@@ -1705,7 +1748,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1705
1748
|
|
|
1706
1749
|
|
|
1707
1750
|
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1708
|
-
inline CUDA_CALLABLE void
|
|
1751
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
1709
1752
|
{
|
|
1710
1753
|
#ifndef NDEBUG
|
|
1711
1754
|
if (col < -(int)Cols || col >= (int)Cols)
|
|
@@ -1734,7 +1777,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1734
1777
|
i += row_slice.step
|
|
1735
1778
|
)
|
|
1736
1779
|
{
|
|
1737
|
-
m.data[i][col]
|
|
1780
|
+
m.data[i][col] &= value.c[ii];
|
|
1738
1781
|
++ii;
|
|
1739
1782
|
}
|
|
1740
1783
|
|
|
@@ -1743,7 +1786,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1743
1786
|
|
|
1744
1787
|
|
|
1745
1788
|
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1746
|
-
inline CUDA_CALLABLE void
|
|
1789
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
1747
1790
|
{
|
|
1748
1791
|
#ifndef NDEBUG
|
|
1749
1792
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1772,7 +1815,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, slic
|
|
|
1772
1815
|
i += col_slice.step
|
|
1773
1816
|
)
|
|
1774
1817
|
{
|
|
1775
|
-
m.data[row][i]
|
|
1818
|
+
m.data[row][i] &= value.c[ii];
|
|
1776
1819
|
++ii;
|
|
1777
1820
|
}
|
|
1778
1821
|
|
|
@@ -1781,7 +1824,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, slic
|
|
|
1781
1824
|
|
|
1782
1825
|
|
|
1783
1826
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1784
|
-
inline CUDA_CALLABLE void
|
|
1827
|
+
inline CUDA_CALLABLE void bit_and_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1785
1828
|
{
|
|
1786
1829
|
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1787
1830
|
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
@@ -1810,7 +1853,7 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1810
1853
|
j += col_slice.step
|
|
1811
1854
|
)
|
|
1812
1855
|
{
|
|
1813
|
-
m.data[i][j]
|
|
1856
|
+
m.data[i][j] &= value.data[ii][jj];
|
|
1814
1857
|
++jj;
|
|
1815
1858
|
}
|
|
1816
1859
|
|
|
@@ -1823,8 +1866,50 @@ inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_s
|
|
|
1823
1866
|
|
|
1824
1867
|
|
|
1825
1868
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1826
|
-
inline CUDA_CALLABLE void
|
|
1827
|
-
|
|
1869
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1870
|
+
mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
1871
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value
|
|
1872
|
+
) {}
|
|
1873
|
+
|
|
1874
|
+
|
|
1875
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1876
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1877
|
+
mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
1878
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value
|
|
1879
|
+
) {}
|
|
1880
|
+
|
|
1881
|
+
|
|
1882
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1883
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1884
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1885
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1886
|
+
) {}
|
|
1887
|
+
|
|
1888
|
+
|
|
1889
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1890
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1891
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
1892
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
1893
|
+
) {}
|
|
1894
|
+
|
|
1895
|
+
|
|
1896
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1897
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1898
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
1899
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
1900
|
+
) {}
|
|
1901
|
+
|
|
1902
|
+
|
|
1903
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1904
|
+
inline CUDA_CALLABLE void adj_bit_and_inplace(
|
|
1905
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1906
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1907
|
+
) {}
|
|
1908
|
+
|
|
1909
|
+
|
|
1910
|
+
|
|
1911
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1912
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
1828
1913
|
{
|
|
1829
1914
|
#ifndef NDEBUG
|
|
1830
1915
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1848,13 +1933,12 @@ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row,
|
|
|
1848
1933
|
col += Cols;
|
|
1849
1934
|
}
|
|
1850
1935
|
|
|
1851
|
-
|
|
1936
|
+
m.data[row][col] |= value;
|
|
1852
1937
|
}
|
|
1853
1938
|
|
|
1854
1939
|
|
|
1855
1940
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1856
|
-
inline CUDA_CALLABLE void
|
|
1857
|
-
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
|
|
1941
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
1858
1942
|
{
|
|
1859
1943
|
#ifndef NDEBUG
|
|
1860
1944
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1871,16 +1955,13 @@ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row,
|
|
|
1871
1955
|
|
|
1872
1956
|
for(unsigned i=0; i < Cols; ++i)
|
|
1873
1957
|
{
|
|
1874
|
-
|
|
1958
|
+
m.data[row][i] |= value[i];
|
|
1875
1959
|
}
|
|
1876
1960
|
}
|
|
1877
1961
|
|
|
1878
1962
|
|
|
1879
1963
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1880
|
-
inline CUDA_CALLABLE void
|
|
1881
|
-
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1882
|
-
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1883
|
-
)
|
|
1964
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1884
1965
|
{
|
|
1885
1966
|
static_assert(
|
|
1886
1967
|
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
@@ -1903,7 +1984,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1903
1984
|
{
|
|
1904
1985
|
for (int j = 0; j < Cols; ++j)
|
|
1905
1986
|
{
|
|
1906
|
-
|
|
1987
|
+
m.data[i][j] |= value.data[ii][j];
|
|
1907
1988
|
}
|
|
1908
1989
|
|
|
1909
1990
|
++ii;
|
|
@@ -1914,10 +1995,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1914
1995
|
|
|
1915
1996
|
|
|
1916
1997
|
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1917
|
-
inline CUDA_CALLABLE void
|
|
1918
|
-
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
1919
|
-
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
1920
|
-
)
|
|
1998
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
1921
1999
|
{
|
|
1922
2000
|
#ifndef NDEBUG
|
|
1923
2001
|
if (col < -(int)Cols || col >= (int)Cols)
|
|
@@ -1946,7 +2024,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1946
2024
|
i += row_slice.step
|
|
1947
2025
|
)
|
|
1948
2026
|
{
|
|
1949
|
-
|
|
2027
|
+
m.data[i][col] |= value.c[ii];
|
|
1950
2028
|
++ii;
|
|
1951
2029
|
}
|
|
1952
2030
|
|
|
@@ -1955,10 +2033,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1955
2033
|
|
|
1956
2034
|
|
|
1957
2035
|
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1958
|
-
inline CUDA_CALLABLE void
|
|
1959
|
-
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
1960
|
-
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
1961
|
-
)
|
|
2036
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
1962
2037
|
{
|
|
1963
2038
|
#ifndef NDEBUG
|
|
1964
2039
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -1987,7 +2062,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1987
2062
|
i += col_slice.step
|
|
1988
2063
|
)
|
|
1989
2064
|
{
|
|
1990
|
-
|
|
2065
|
+
m.data[row][i] |= value.c[ii];
|
|
1991
2066
|
++ii;
|
|
1992
2067
|
}
|
|
1993
2068
|
|
|
@@ -1996,10 +2071,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
1996
2071
|
|
|
1997
2072
|
|
|
1998
2073
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1999
|
-
inline CUDA_CALLABLE void
|
|
2000
|
-
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2001
|
-
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2002
|
-
)
|
|
2074
|
+
inline CUDA_CALLABLE void bit_or_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2003
2075
|
{
|
|
2004
2076
|
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2005
2077
|
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
@@ -2028,7 +2100,7 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
2028
2100
|
j += col_slice.step
|
|
2029
2101
|
)
|
|
2030
2102
|
{
|
|
2031
|
-
|
|
2103
|
+
m.data[i][j] |= value.data[ii][jj];
|
|
2032
2104
|
++jj;
|
|
2033
2105
|
}
|
|
2034
2106
|
|
|
@@ -2041,100 +2113,50 @@ inline CUDA_CALLABLE void adj_assign_inplace(
|
|
|
2041
2113
|
|
|
2042
2114
|
|
|
2043
2115
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2044
|
-
inline CUDA_CALLABLE
|
|
2045
|
-
|
|
2046
|
-
|
|
2047
|
-
|
|
2048
|
-
{
|
|
2049
|
-
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2050
|
-
assert(0);
|
|
2051
|
-
}
|
|
2052
|
-
if (col < -(int)Cols || col >= (int)Cols)
|
|
2053
|
-
{
|
|
2054
|
-
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2055
|
-
assert(0);
|
|
2056
|
-
}
|
|
2057
|
-
#endif
|
|
2058
|
-
|
|
2059
|
-
if (row < 0)
|
|
2060
|
-
{
|
|
2061
|
-
row += Rows;
|
|
2062
|
-
}
|
|
2063
|
-
if (col < 0)
|
|
2064
|
-
{
|
|
2065
|
-
col += Cols;
|
|
2066
|
-
}
|
|
2067
|
-
|
|
2068
|
-
mat_t<Rows,Cols,Type> ret(m);
|
|
2069
|
-
ret.data[row][col] = value;
|
|
2070
|
-
return ret;
|
|
2071
|
-
}
|
|
2116
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2117
|
+
mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
2118
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value
|
|
2119
|
+
) {}
|
|
2072
2120
|
|
|
2073
2121
|
|
|
2074
2122
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2075
|
-
inline CUDA_CALLABLE
|
|
2076
|
-
|
|
2077
|
-
|
|
2078
|
-
|
|
2079
|
-
{
|
|
2080
|
-
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2081
|
-
assert(0);
|
|
2082
|
-
}
|
|
2083
|
-
#endif
|
|
2084
|
-
|
|
2085
|
-
if (row < 0)
|
|
2086
|
-
{
|
|
2087
|
-
row += Rows;
|
|
2088
|
-
}
|
|
2089
|
-
|
|
2090
|
-
mat_t<Rows,Cols,Type> ret(m);
|
|
2091
|
-
for(unsigned i=0; i < Cols; ++i)
|
|
2092
|
-
{
|
|
2093
|
-
ret.data[row][i] = value[i];
|
|
2094
|
-
}
|
|
2095
|
-
return ret;
|
|
2096
|
-
}
|
|
2123
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2124
|
+
mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
2125
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value
|
|
2126
|
+
) {}
|
|
2097
2127
|
|
|
2098
2128
|
|
|
2099
2129
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2100
|
-
inline CUDA_CALLABLE
|
|
2101
|
-
|
|
2102
|
-
mat_t<Rows,
|
|
2103
|
-
|
|
2104
|
-
return ret;
|
|
2105
|
-
}
|
|
2130
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2131
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2132
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2133
|
+
) {}
|
|
2106
2134
|
|
|
2107
2135
|
|
|
2108
2136
|
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2109
|
-
inline CUDA_CALLABLE
|
|
2110
|
-
|
|
2111
|
-
mat_t<Rows,
|
|
2112
|
-
|
|
2113
|
-
return ret;
|
|
2114
|
-
}
|
|
2137
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2138
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
2139
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
2140
|
+
) {}
|
|
2115
2141
|
|
|
2116
2142
|
|
|
2117
2143
|
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2118
|
-
inline CUDA_CALLABLE
|
|
2119
|
-
|
|
2120
|
-
mat_t<Rows,
|
|
2121
|
-
|
|
2122
|
-
return ret;
|
|
2123
|
-
}
|
|
2144
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2145
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
2146
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
2147
|
+
) {}
|
|
2124
2148
|
|
|
2125
2149
|
|
|
2126
2150
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2127
|
-
inline CUDA_CALLABLE
|
|
2128
|
-
|
|
2129
|
-
mat_t<Rows,
|
|
2130
|
-
|
|
2131
|
-
|
|
2132
|
-
}
|
|
2151
|
+
inline CUDA_CALLABLE void adj_bit_or_inplace(
|
|
2152
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2153
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2154
|
+
) {}
|
|
2155
|
+
|
|
2133
2156
|
|
|
2134
2157
|
|
|
2135
2158
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2136
|
-
inline CUDA_CALLABLE void
|
|
2137
|
-
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
2159
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
2138
2160
|
{
|
|
2139
2161
|
#ifndef NDEBUG
|
|
2140
2162
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -2158,21 +2180,12 @@ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int
|
|
|
2158
2180
|
col += Cols;
|
|
2159
2181
|
}
|
|
2160
2182
|
|
|
2161
|
-
|
|
2162
|
-
for(unsigned i=0; i < Rows; ++i)
|
|
2163
|
-
{
|
|
2164
|
-
for(unsigned j=0; j < Cols; ++j)
|
|
2165
|
-
{
|
|
2166
|
-
if(i != row || j != col)
|
|
2167
|
-
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2168
|
-
}
|
|
2169
|
-
}
|
|
2183
|
+
m.data[row][col] ^= value;
|
|
2170
2184
|
}
|
|
2171
2185
|
|
|
2172
2186
|
|
|
2173
2187
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2174
|
-
inline CUDA_CALLABLE void
|
|
2175
|
-
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
2188
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
2176
2189
|
{
|
|
2177
2190
|
#ifndef NDEBUG
|
|
2178
2191
|
if (row < -(int)Rows || row >= (int)Rows)
|
|
@@ -2187,25 +2200,15 @@ inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec
|
|
|
2187
2200
|
row += Rows;
|
|
2188
2201
|
}
|
|
2189
2202
|
|
|
2190
|
-
for(unsigned i=0; i <
|
|
2203
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
2191
2204
|
{
|
|
2192
|
-
|
|
2193
|
-
{
|
|
2194
|
-
if (i==row)
|
|
2195
|
-
adj_value[j] += adj_ret.data[i][j];
|
|
2196
|
-
else
|
|
2197
|
-
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2198
|
-
}
|
|
2205
|
+
m.data[row][i] ^= value[i];
|
|
2199
2206
|
}
|
|
2200
2207
|
}
|
|
2201
2208
|
|
|
2202
2209
|
|
|
2203
2210
|
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2204
|
-
inline CUDA_CALLABLE void
|
|
2205
|
-
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2206
|
-
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value,
|
|
2207
|
-
mat_t<Rows,Cols,Type>& adj_ret
|
|
2208
|
-
)
|
|
2211
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2209
2212
|
{
|
|
2210
2213
|
static_assert(
|
|
2211
2214
|
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
@@ -2220,28 +2223,18 @@ inline CUDA_CALLABLE void adj_assign_copy(
|
|
|
2220
2223
|
bool is_row_reversed = row_slice.step < 0;
|
|
2221
2224
|
|
|
2222
2225
|
int ii = 0;
|
|
2223
|
-
for (
|
|
2226
|
+
for (
|
|
2227
|
+
int i = row_slice.start;
|
|
2228
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2229
|
+
i += row_slice.step
|
|
2230
|
+
)
|
|
2224
2231
|
{
|
|
2225
|
-
|
|
2226
|
-
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
2227
|
-
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
2228
|
-
|
|
2229
|
-
if (!in_row_slice)
|
|
2232
|
+
for (int j = 0; j < Cols; ++j)
|
|
2230
2233
|
{
|
|
2231
|
-
|
|
2232
|
-
{
|
|
2233
|
-
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2234
|
-
}
|
|
2234
|
+
m.data[i][j] ^= value.data[ii][j];
|
|
2235
2235
|
}
|
|
2236
|
-
else
|
|
2237
|
-
{
|
|
2238
|
-
for (int j = 0; j < Cols; ++j)
|
|
2239
|
-
{
|
|
2240
|
-
adj_value.data[ii][j] += adj_ret.data[i][j];
|
|
2241
|
-
}
|
|
2242
2236
|
|
|
2243
|
-
|
|
2244
|
-
}
|
|
2237
|
+
++ii;
|
|
2245
2238
|
}
|
|
2246
2239
|
|
|
2247
2240
|
assert(ii == RowSliceLength);
|
|
@@ -2249,11 +2242,7 @@ inline CUDA_CALLABLE void adj_assign_copy(
|
|
|
2249
2242
|
|
|
2250
2243
|
|
|
2251
2244
|
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2252
|
-
inline CUDA_CALLABLE void
|
|
2253
|
-
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
2254
|
-
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value,
|
|
2255
|
-
mat_t<Rows,Cols,Type>& adj_ret
|
|
2256
|
-
)
|
|
2245
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
2257
2246
|
{
|
|
2258
2247
|
#ifndef NDEBUG
|
|
2259
2248
|
if (col < -(int)Cols || col >= (int)Cols)
|
|
@@ -2276,14 +2265,808 @@ inline CUDA_CALLABLE void adj_assign_copy(
|
|
|
2276
2265
|
bool is_row_reversed = row_slice.step < 0;
|
|
2277
2266
|
|
|
2278
2267
|
int ii = 0;
|
|
2279
|
-
for (
|
|
2268
|
+
for (
|
|
2269
|
+
int i = row_slice.start;
|
|
2270
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2271
|
+
i += row_slice.step
|
|
2272
|
+
)
|
|
2280
2273
|
{
|
|
2281
|
-
|
|
2282
|
-
|
|
2283
|
-
|
|
2274
|
+
m.data[i][col] ^= value.c[ii];
|
|
2275
|
+
++ii;
|
|
2276
|
+
}
|
|
2284
2277
|
|
|
2285
|
-
|
|
2286
|
-
|
|
2278
|
+
assert(ii == RowSliceLength);
|
|
2279
|
+
}
|
|
2280
|
+
|
|
2281
|
+
|
|
2282
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2283
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
2284
|
+
{
|
|
2285
|
+
#ifndef NDEBUG
|
|
2286
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2287
|
+
{
|
|
2288
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2289
|
+
assert(0);
|
|
2290
|
+
}
|
|
2291
|
+
#endif
|
|
2292
|
+
|
|
2293
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2294
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2295
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2296
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2297
|
+
|
|
2298
|
+
if (row < 0)
|
|
2299
|
+
{
|
|
2300
|
+
row += Rows;
|
|
2301
|
+
}
|
|
2302
|
+
|
|
2303
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2304
|
+
|
|
2305
|
+
int ii = 0;
|
|
2306
|
+
for (
|
|
2307
|
+
int i = col_slice.start;
|
|
2308
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
2309
|
+
i += col_slice.step
|
|
2310
|
+
)
|
|
2311
|
+
{
|
|
2312
|
+
m.data[row][i] ^= value.c[ii];
|
|
2313
|
+
++ii;
|
|
2314
|
+
}
|
|
2315
|
+
|
|
2316
|
+
assert(ii == ColSliceLength);
|
|
2317
|
+
}
|
|
2318
|
+
|
|
2319
|
+
|
|
2320
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2321
|
+
inline CUDA_CALLABLE void bit_xor_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2322
|
+
{
|
|
2323
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2324
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2325
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2326
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2327
|
+
|
|
2328
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2329
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2330
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2331
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2332
|
+
|
|
2333
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2334
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2335
|
+
|
|
2336
|
+
int ii = 0;
|
|
2337
|
+
for (
|
|
2338
|
+
int i = row_slice.start;
|
|
2339
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2340
|
+
i += row_slice.step
|
|
2341
|
+
)
|
|
2342
|
+
{
|
|
2343
|
+
int jj = 0;
|
|
2344
|
+
for (
|
|
2345
|
+
int j = col_slice.start;
|
|
2346
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
2347
|
+
j += col_slice.step
|
|
2348
|
+
)
|
|
2349
|
+
{
|
|
2350
|
+
m.data[i][j] ^= value.data[ii][jj];
|
|
2351
|
+
++jj;
|
|
2352
|
+
}
|
|
2353
|
+
|
|
2354
|
+
assert(jj == ColSliceLength);
|
|
2355
|
+
++ii;
|
|
2356
|
+
}
|
|
2357
|
+
|
|
2358
|
+
assert(ii == RowSliceLength);
|
|
2359
|
+
}
|
|
2360
|
+
|
|
2361
|
+
|
|
2362
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2363
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2364
|
+
mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
2365
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value
|
|
2366
|
+
) {}
|
|
2367
|
+
|
|
2368
|
+
|
|
2369
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2370
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2371
|
+
mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
2372
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value
|
|
2373
|
+
) {}
|
|
2374
|
+
|
|
2375
|
+
|
|
2376
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2377
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2378
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2379
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2380
|
+
) {}
|
|
2381
|
+
|
|
2382
|
+
|
|
2383
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2384
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2385
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
2386
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
2387
|
+
) {}
|
|
2388
|
+
|
|
2389
|
+
|
|
2390
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2391
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2392
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
2393
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
2394
|
+
) {}
|
|
2395
|
+
|
|
2396
|
+
|
|
2397
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2398
|
+
inline CUDA_CALLABLE void adj_bit_xor_inplace(
|
|
2399
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2400
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2401
|
+
) {}
|
|
2402
|
+
|
|
2403
|
+
|
|
2404
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2405
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
2406
|
+
{
|
|
2407
|
+
#ifndef NDEBUG
|
|
2408
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2409
|
+
{
|
|
2410
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2411
|
+
assert(0);
|
|
2412
|
+
}
|
|
2413
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2414
|
+
{
|
|
2415
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2416
|
+
assert(0);
|
|
2417
|
+
}
|
|
2418
|
+
#endif
|
|
2419
|
+
|
|
2420
|
+
if (row < 0)
|
|
2421
|
+
{
|
|
2422
|
+
row += Rows;
|
|
2423
|
+
}
|
|
2424
|
+
if (col < 0)
|
|
2425
|
+
{
|
|
2426
|
+
col += Cols;
|
|
2427
|
+
}
|
|
2428
|
+
|
|
2429
|
+
m.data[row][col] = value;
|
|
2430
|
+
}
|
|
2431
|
+
|
|
2432
|
+
|
|
2433
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2434
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
2435
|
+
{
|
|
2436
|
+
#ifndef NDEBUG
|
|
2437
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2438
|
+
{
|
|
2439
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2440
|
+
assert(0);
|
|
2441
|
+
}
|
|
2442
|
+
#endif
|
|
2443
|
+
|
|
2444
|
+
if (row < 0)
|
|
2445
|
+
{
|
|
2446
|
+
row += Rows;
|
|
2447
|
+
}
|
|
2448
|
+
|
|
2449
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
2450
|
+
{
|
|
2451
|
+
m.data[row][i] = value[i];
|
|
2452
|
+
}
|
|
2453
|
+
}
|
|
2454
|
+
|
|
2455
|
+
|
|
2456
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2457
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2458
|
+
{
|
|
2459
|
+
static_assert(
|
|
2460
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
2461
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
2462
|
+
);
|
|
2463
|
+
|
|
2464
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2465
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2466
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2467
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2468
|
+
|
|
2469
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2470
|
+
|
|
2471
|
+
int ii = 0;
|
|
2472
|
+
for (
|
|
2473
|
+
int i = row_slice.start;
|
|
2474
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2475
|
+
i += row_slice.step
|
|
2476
|
+
)
|
|
2477
|
+
{
|
|
2478
|
+
for (int j = 0; j < Cols; ++j)
|
|
2479
|
+
{
|
|
2480
|
+
m.data[i][j] = value.data[ii][j];
|
|
2481
|
+
}
|
|
2482
|
+
|
|
2483
|
+
++ii;
|
|
2484
|
+
}
|
|
2485
|
+
|
|
2486
|
+
assert(ii == RowSliceLength);
|
|
2487
|
+
}
|
|
2488
|
+
|
|
2489
|
+
|
|
2490
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2491
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
2492
|
+
{
|
|
2493
|
+
#ifndef NDEBUG
|
|
2494
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2495
|
+
{
|
|
2496
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2497
|
+
assert(0);
|
|
2498
|
+
}
|
|
2499
|
+
#endif
|
|
2500
|
+
|
|
2501
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2502
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2503
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2504
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2505
|
+
|
|
2506
|
+
if (col < 0)
|
|
2507
|
+
{
|
|
2508
|
+
col += Cols;
|
|
2509
|
+
}
|
|
2510
|
+
|
|
2511
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2512
|
+
|
|
2513
|
+
int ii = 0;
|
|
2514
|
+
for (
|
|
2515
|
+
int i = row_slice.start;
|
|
2516
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2517
|
+
i += row_slice.step
|
|
2518
|
+
)
|
|
2519
|
+
{
|
|
2520
|
+
m.data[i][col] = value.c[ii];
|
|
2521
|
+
++ii;
|
|
2522
|
+
}
|
|
2523
|
+
|
|
2524
|
+
assert(ii == RowSliceLength);
|
|
2525
|
+
}
|
|
2526
|
+
|
|
2527
|
+
|
|
2528
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2529
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
2530
|
+
{
|
|
2531
|
+
#ifndef NDEBUG
|
|
2532
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2533
|
+
{
|
|
2534
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2535
|
+
assert(0);
|
|
2536
|
+
}
|
|
2537
|
+
#endif
|
|
2538
|
+
|
|
2539
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2540
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2541
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2542
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2543
|
+
|
|
2544
|
+
if (row < 0)
|
|
2545
|
+
{
|
|
2546
|
+
row += Rows;
|
|
2547
|
+
}
|
|
2548
|
+
|
|
2549
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2550
|
+
|
|
2551
|
+
int ii = 0;
|
|
2552
|
+
for (
|
|
2553
|
+
int i = col_slice.start;
|
|
2554
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
2555
|
+
i += col_slice.step
|
|
2556
|
+
)
|
|
2557
|
+
{
|
|
2558
|
+
m.data[row][i] = value.c[ii];
|
|
2559
|
+
++ii;
|
|
2560
|
+
}
|
|
2561
|
+
|
|
2562
|
+
assert(ii == ColSliceLength);
|
|
2563
|
+
}
|
|
2564
|
+
|
|
2565
|
+
|
|
2566
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2567
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2568
|
+
{
|
|
2569
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2570
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2571
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2572
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2573
|
+
|
|
2574
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2575
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2576
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2577
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2578
|
+
|
|
2579
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2580
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2581
|
+
|
|
2582
|
+
int ii = 0;
|
|
2583
|
+
for (
|
|
2584
|
+
int i = row_slice.start;
|
|
2585
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2586
|
+
i += row_slice.step
|
|
2587
|
+
)
|
|
2588
|
+
{
|
|
2589
|
+
int jj = 0;
|
|
2590
|
+
for (
|
|
2591
|
+
int j = col_slice.start;
|
|
2592
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
2593
|
+
j += col_slice.step
|
|
2594
|
+
)
|
|
2595
|
+
{
|
|
2596
|
+
m.data[i][j] = value.data[ii][jj];
|
|
2597
|
+
++jj;
|
|
2598
|
+
}
|
|
2599
|
+
|
|
2600
|
+
assert(jj == ColSliceLength);
|
|
2601
|
+
++ii;
|
|
2602
|
+
}
|
|
2603
|
+
|
|
2604
|
+
assert(ii == RowSliceLength);
|
|
2605
|
+
}
|
|
2606
|
+
|
|
2607
|
+
|
|
2608
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2609
|
+
inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
2610
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
|
|
2611
|
+
{
|
|
2612
|
+
#ifndef NDEBUG
|
|
2613
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2614
|
+
{
|
|
2615
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2616
|
+
assert(0);
|
|
2617
|
+
}
|
|
2618
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2619
|
+
{
|
|
2620
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2621
|
+
assert(0);
|
|
2622
|
+
}
|
|
2623
|
+
#endif
|
|
2624
|
+
|
|
2625
|
+
if (row < 0)
|
|
2626
|
+
{
|
|
2627
|
+
row += Rows;
|
|
2628
|
+
}
|
|
2629
|
+
if (col < 0)
|
|
2630
|
+
{
|
|
2631
|
+
col += Cols;
|
|
2632
|
+
}
|
|
2633
|
+
|
|
2634
|
+
adj_value += adj_m.data[row][col];
|
|
2635
|
+
}
|
|
2636
|
+
|
|
2637
|
+
|
|
2638
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2639
|
+
inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
2640
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
|
|
2641
|
+
{
|
|
2642
|
+
#ifndef NDEBUG
|
|
2643
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2644
|
+
{
|
|
2645
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2646
|
+
assert(0);
|
|
2647
|
+
}
|
|
2648
|
+
#endif
|
|
2649
|
+
|
|
2650
|
+
if (row < 0)
|
|
2651
|
+
{
|
|
2652
|
+
row += Rows;
|
|
2653
|
+
}
|
|
2654
|
+
|
|
2655
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
2656
|
+
{
|
|
2657
|
+
adj_value[i] += adj_m.data[row][i];
|
|
2658
|
+
}
|
|
2659
|
+
}
|
|
2660
|
+
|
|
2661
|
+
|
|
2662
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2663
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
2664
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2665
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2666
|
+
)
|
|
2667
|
+
{
|
|
2668
|
+
static_assert(
|
|
2669
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
2670
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
2671
|
+
);
|
|
2672
|
+
|
|
2673
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2674
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2675
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2676
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2677
|
+
|
|
2678
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2679
|
+
|
|
2680
|
+
int ii = 0;
|
|
2681
|
+
for (
|
|
2682
|
+
int i = row_slice.start;
|
|
2683
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2684
|
+
i += row_slice.step
|
|
2685
|
+
)
|
|
2686
|
+
{
|
|
2687
|
+
for (int j = 0; j < Cols; ++j)
|
|
2688
|
+
{
|
|
2689
|
+
adj_value.data[ii][j] += adj_m.data[i][j];
|
|
2690
|
+
}
|
|
2691
|
+
|
|
2692
|
+
++ii;
|
|
2693
|
+
}
|
|
2694
|
+
|
|
2695
|
+
assert(ii == RowSliceLength);
|
|
2696
|
+
}
|
|
2697
|
+
|
|
2698
|
+
|
|
2699
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2700
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
2701
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
2702
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
2703
|
+
)
|
|
2704
|
+
{
|
|
2705
|
+
#ifndef NDEBUG
|
|
2706
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2707
|
+
{
|
|
2708
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2709
|
+
assert(0);
|
|
2710
|
+
}
|
|
2711
|
+
#endif
|
|
2712
|
+
|
|
2713
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2714
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2715
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2716
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2717
|
+
|
|
2718
|
+
if (col < 0)
|
|
2719
|
+
{
|
|
2720
|
+
col += Cols;
|
|
2721
|
+
}
|
|
2722
|
+
|
|
2723
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2724
|
+
|
|
2725
|
+
int ii = 0;
|
|
2726
|
+
for (
|
|
2727
|
+
int i = row_slice.start;
|
|
2728
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2729
|
+
i += row_slice.step
|
|
2730
|
+
)
|
|
2731
|
+
{
|
|
2732
|
+
adj_value.c[ii] += adj_m.data[i][col];
|
|
2733
|
+
++ii;
|
|
2734
|
+
}
|
|
2735
|
+
|
|
2736
|
+
assert(ii == RowSliceLength);
|
|
2737
|
+
}
|
|
2738
|
+
|
|
2739
|
+
|
|
2740
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2741
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
2742
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
2743
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
2744
|
+
)
|
|
2745
|
+
{
|
|
2746
|
+
#ifndef NDEBUG
|
|
2747
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2748
|
+
{
|
|
2749
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2750
|
+
assert(0);
|
|
2751
|
+
}
|
|
2752
|
+
#endif
|
|
2753
|
+
|
|
2754
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2755
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2756
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2757
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2758
|
+
|
|
2759
|
+
if (row < 0)
|
|
2760
|
+
{
|
|
2761
|
+
row += Rows;
|
|
2762
|
+
}
|
|
2763
|
+
|
|
2764
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2765
|
+
|
|
2766
|
+
int ii = 0;
|
|
2767
|
+
for (
|
|
2768
|
+
int i = col_slice.start;
|
|
2769
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
2770
|
+
i += col_slice.step
|
|
2771
|
+
)
|
|
2772
|
+
{
|
|
2773
|
+
adj_value.c[ii] += adj_m.data[row][i];
|
|
2774
|
+
++ii;
|
|
2775
|
+
}
|
|
2776
|
+
|
|
2777
|
+
assert(ii == ColSliceLength);
|
|
2778
|
+
}
|
|
2779
|
+
|
|
2780
|
+
|
|
2781
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2782
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
2783
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2784
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2785
|
+
)
|
|
2786
|
+
{
|
|
2787
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2788
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2789
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2790
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2791
|
+
|
|
2792
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2793
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2794
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2795
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2796
|
+
|
|
2797
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2798
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2799
|
+
|
|
2800
|
+
int ii = 0;
|
|
2801
|
+
for (
|
|
2802
|
+
int i = row_slice.start;
|
|
2803
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2804
|
+
i += row_slice.step
|
|
2805
|
+
)
|
|
2806
|
+
{
|
|
2807
|
+
int jj = 0;
|
|
2808
|
+
for (
|
|
2809
|
+
int j = col_slice.start;
|
|
2810
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
2811
|
+
j += col_slice.step
|
|
2812
|
+
)
|
|
2813
|
+
{
|
|
2814
|
+
adj_value.data[ii][jj] += adj_m.data[i][j];
|
|
2815
|
+
++jj;
|
|
2816
|
+
}
|
|
2817
|
+
|
|
2818
|
+
assert(jj == ColSliceLength);
|
|
2819
|
+
++ii;
|
|
2820
|
+
}
|
|
2821
|
+
|
|
2822
|
+
assert(ii == RowSliceLength);
|
|
2823
|
+
}
|
|
2824
|
+
|
|
2825
|
+
|
|
2826
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2827
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
2828
|
+
{
|
|
2829
|
+
#ifndef NDEBUG
|
|
2830
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2831
|
+
{
|
|
2832
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2833
|
+
assert(0);
|
|
2834
|
+
}
|
|
2835
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2836
|
+
{
|
|
2837
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2838
|
+
assert(0);
|
|
2839
|
+
}
|
|
2840
|
+
#endif
|
|
2841
|
+
|
|
2842
|
+
if (row < 0)
|
|
2843
|
+
{
|
|
2844
|
+
row += Rows;
|
|
2845
|
+
}
|
|
2846
|
+
if (col < 0)
|
|
2847
|
+
{
|
|
2848
|
+
col += Cols;
|
|
2849
|
+
}
|
|
2850
|
+
|
|
2851
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
2852
|
+
ret.data[row][col] = value;
|
|
2853
|
+
return ret;
|
|
2854
|
+
}
|
|
2855
|
+
|
|
2856
|
+
|
|
2857
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2858
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
2859
|
+
{
|
|
2860
|
+
#ifndef NDEBUG
|
|
2861
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2862
|
+
{
|
|
2863
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2864
|
+
assert(0);
|
|
2865
|
+
}
|
|
2866
|
+
#endif
|
|
2867
|
+
|
|
2868
|
+
if (row < 0)
|
|
2869
|
+
{
|
|
2870
|
+
row += Rows;
|
|
2871
|
+
}
|
|
2872
|
+
|
|
2873
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
2874
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
2875
|
+
{
|
|
2876
|
+
ret.data[row][i] = value[i];
|
|
2877
|
+
}
|
|
2878
|
+
return ret;
|
|
2879
|
+
}
|
|
2880
|
+
|
|
2881
|
+
|
|
2882
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2883
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2884
|
+
{
|
|
2885
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2886
|
+
assign_inplace(ret, row_slice, value);
|
|
2887
|
+
return ret;
|
|
2888
|
+
}
|
|
2889
|
+
|
|
2890
|
+
|
|
2891
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2892
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
2893
|
+
{
|
|
2894
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2895
|
+
assign_inplace(ret, row_slice, col, value);
|
|
2896
|
+
return ret;
|
|
2897
|
+
}
|
|
2898
|
+
|
|
2899
|
+
|
|
2900
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2901
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
2902
|
+
{
|
|
2903
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2904
|
+
assign_inplace(ret, row, col_slice, value);
|
|
2905
|
+
return ret;
|
|
2906
|
+
}
|
|
2907
|
+
|
|
2908
|
+
|
|
2909
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2910
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2911
|
+
{
|
|
2912
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2913
|
+
assign_inplace(ret, row_slice, col_slice, value);
|
|
2914
|
+
return ret;
|
|
2915
|
+
}
|
|
2916
|
+
|
|
2917
|
+
|
|
2918
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2919
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
2920
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
2921
|
+
{
|
|
2922
|
+
#ifndef NDEBUG
|
|
2923
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2924
|
+
{
|
|
2925
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2926
|
+
assert(0);
|
|
2927
|
+
}
|
|
2928
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2929
|
+
{
|
|
2930
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2931
|
+
assert(0);
|
|
2932
|
+
}
|
|
2933
|
+
#endif
|
|
2934
|
+
|
|
2935
|
+
if (row < 0)
|
|
2936
|
+
{
|
|
2937
|
+
row += Rows;
|
|
2938
|
+
}
|
|
2939
|
+
if (col < 0)
|
|
2940
|
+
{
|
|
2941
|
+
col += Cols;
|
|
2942
|
+
}
|
|
2943
|
+
|
|
2944
|
+
adj_value += adj_ret.data[row][col];
|
|
2945
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
2946
|
+
{
|
|
2947
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
2948
|
+
{
|
|
2949
|
+
if(i != row || j != col)
|
|
2950
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2951
|
+
}
|
|
2952
|
+
}
|
|
2953
|
+
}
|
|
2954
|
+
|
|
2955
|
+
|
|
2956
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2957
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
2958
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
2959
|
+
{
|
|
2960
|
+
#ifndef NDEBUG
|
|
2961
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2962
|
+
{
|
|
2963
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2964
|
+
assert(0);
|
|
2965
|
+
}
|
|
2966
|
+
#endif
|
|
2967
|
+
|
|
2968
|
+
if (row < 0)
|
|
2969
|
+
{
|
|
2970
|
+
row += Rows;
|
|
2971
|
+
}
|
|
2972
|
+
|
|
2973
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
2974
|
+
{
|
|
2975
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
2976
|
+
{
|
|
2977
|
+
if (i==row)
|
|
2978
|
+
adj_value[j] += adj_ret.data[i][j];
|
|
2979
|
+
else
|
|
2980
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2981
|
+
}
|
|
2982
|
+
}
|
|
2983
|
+
}
|
|
2984
|
+
|
|
2985
|
+
|
|
2986
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2987
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
2988
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2989
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value,
|
|
2990
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
2991
|
+
)
|
|
2992
|
+
{
|
|
2993
|
+
static_assert(
|
|
2994
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
2995
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
2996
|
+
);
|
|
2997
|
+
|
|
2998
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2999
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
3000
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
3001
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
3002
|
+
|
|
3003
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
3004
|
+
|
|
3005
|
+
int ii = 0;
|
|
3006
|
+
for (int i = 0; i < Rows; ++i)
|
|
3007
|
+
{
|
|
3008
|
+
bool in_row_slice = is_row_reversed
|
|
3009
|
+
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
3010
|
+
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
3011
|
+
|
|
3012
|
+
if (!in_row_slice)
|
|
3013
|
+
{
|
|
3014
|
+
for (int j = 0; j < Cols; ++j)
|
|
3015
|
+
{
|
|
3016
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
3017
|
+
}
|
|
3018
|
+
}
|
|
3019
|
+
else
|
|
3020
|
+
{
|
|
3021
|
+
for (int j = 0; j < Cols; ++j)
|
|
3022
|
+
{
|
|
3023
|
+
adj_value.data[ii][j] += adj_ret.data[i][j];
|
|
3024
|
+
}
|
|
3025
|
+
|
|
3026
|
+
++ii;
|
|
3027
|
+
}
|
|
3028
|
+
}
|
|
3029
|
+
|
|
3030
|
+
assert(ii == RowSliceLength);
|
|
3031
|
+
}
|
|
3032
|
+
|
|
3033
|
+
|
|
3034
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
3035
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
3036
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
3037
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value,
|
|
3038
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
3039
|
+
)
|
|
3040
|
+
{
|
|
3041
|
+
#ifndef NDEBUG
|
|
3042
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
3043
|
+
{
|
|
3044
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
3045
|
+
assert(0);
|
|
3046
|
+
}
|
|
3047
|
+
#endif
|
|
3048
|
+
|
|
3049
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
3050
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
3051
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
3052
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
3053
|
+
|
|
3054
|
+
if (col < 0)
|
|
3055
|
+
{
|
|
3056
|
+
col += Cols;
|
|
3057
|
+
}
|
|
3058
|
+
|
|
3059
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
3060
|
+
|
|
3061
|
+
int ii = 0;
|
|
3062
|
+
for (int i = 0; i < Rows; ++i)
|
|
3063
|
+
{
|
|
3064
|
+
bool in_row_slice = is_row_reversed
|
|
3065
|
+
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
3066
|
+
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
3067
|
+
|
|
3068
|
+
if (!in_row_slice)
|
|
3069
|
+
{
|
|
2287
3070
|
for (int j = 0; j < Cols; ++j)
|
|
2288
3071
|
{
|
|
2289
3072
|
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
@@ -2427,69 +3210,298 @@ inline CUDA_CALLABLE void adj_assign_copy(
|
|
|
2427
3210
|
}
|
|
2428
3211
|
}
|
|
2429
3212
|
|
|
2430
|
-
assert(jj == ColSliceLength);
|
|
2431
|
-
++ii;
|
|
3213
|
+
assert(jj == ColSliceLength);
|
|
3214
|
+
++ii;
|
|
3215
|
+
}
|
|
3216
|
+
}
|
|
3217
|
+
|
|
3218
|
+
assert(ii == RowSliceLength);
|
|
3219
|
+
}
|
|
3220
|
+
|
|
3221
|
+
|
|
3222
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3223
|
+
inline bool CUDA_CALLABLE isfinite(const mat_t<Rows,Cols,Type>& m)
|
|
3224
|
+
{
|
|
3225
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3226
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3227
|
+
if (!isfinite(m.data[i][j]))
|
|
3228
|
+
return false;
|
|
3229
|
+
return true;
|
|
3230
|
+
}
|
|
3231
|
+
|
|
3232
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3233
|
+
inline void CUDA_CALLABLE adj_isfinite(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
3234
|
+
{
|
|
3235
|
+
}
|
|
3236
|
+
|
|
3237
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3238
|
+
inline bool CUDA_CALLABLE isnan(const mat_t<Rows,Cols,Type>& m)
|
|
3239
|
+
{
|
|
3240
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3241
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3242
|
+
if (isnan(m.data[i][j]))
|
|
3243
|
+
return true;
|
|
3244
|
+
return false;
|
|
3245
|
+
}
|
|
3246
|
+
|
|
3247
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3248
|
+
inline void CUDA_CALLABLE adj_isnan(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
3249
|
+
{
|
|
3250
|
+
}
|
|
3251
|
+
|
|
3252
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3253
|
+
inline bool CUDA_CALLABLE isinf(const mat_t<Rows,Cols,Type>& m)
|
|
3254
|
+
{
|
|
3255
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3256
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3257
|
+
if (isinf(m.data[i][j]))
|
|
3258
|
+
return true;
|
|
3259
|
+
return false;
|
|
3260
|
+
}
|
|
3261
|
+
|
|
3262
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3263
|
+
inline void CUDA_CALLABLE adj_isinf(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
3264
|
+
{
|
|
3265
|
+
}
|
|
3266
|
+
|
|
3267
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3268
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
3269
|
+
{
|
|
3270
|
+
mat_t<Rows,Cols,Type> t;
|
|
3271
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3272
|
+
{
|
|
3273
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3274
|
+
{
|
|
3275
|
+
t.data[i][j] = a.data[i][j] + b.data[i][j];
|
|
3276
|
+
}
|
|
3277
|
+
}
|
|
3278
|
+
|
|
3279
|
+
return t;
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3283
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
3284
|
+
{
|
|
3285
|
+
mat_t<Rows,Cols,Type> t;
|
|
3286
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3287
|
+
{
|
|
3288
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3289
|
+
{
|
|
3290
|
+
t.data[i][j] = a + b.data[i][j];
|
|
3291
|
+
}
|
|
3292
|
+
}
|
|
3293
|
+
|
|
3294
|
+
return t;
|
|
3295
|
+
}
|
|
3296
|
+
|
|
3297
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3298
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
3299
|
+
{
|
|
3300
|
+
mat_t<Rows,Cols,Type> t;
|
|
3301
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3302
|
+
{
|
|
3303
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3304
|
+
{
|
|
3305
|
+
t.data[i][j] = a.data[i][j] - b.data[i][j];
|
|
3306
|
+
}
|
|
3307
|
+
}
|
|
3308
|
+
|
|
3309
|
+
return t;
|
|
3310
|
+
}
|
|
3311
|
+
|
|
3312
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3313
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
3314
|
+
{
|
|
3315
|
+
mat_t<Rows,Cols,Type> t;
|
|
3316
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3317
|
+
{
|
|
3318
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3319
|
+
{
|
|
3320
|
+
t.data[i][j] = a - b.data[i][j];
|
|
3321
|
+
}
|
|
3322
|
+
}
|
|
3323
|
+
|
|
3324
|
+
return t;
|
|
3325
|
+
}
|
|
3326
|
+
|
|
3327
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3328
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
3329
|
+
{
|
|
3330
|
+
mat_t<Rows,Cols,Type> t;
|
|
3331
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3332
|
+
{
|
|
3333
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3334
|
+
{
|
|
3335
|
+
t.data[i][j] = a.data[i][j]/b;
|
|
3336
|
+
}
|
|
3337
|
+
}
|
|
3338
|
+
|
|
3339
|
+
return t;
|
|
3340
|
+
}
|
|
3341
|
+
|
|
3342
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3343
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(Type b, const mat_t<Rows,Cols,Type>& a)
|
|
3344
|
+
{
|
|
3345
|
+
mat_t<Rows,Cols,Type> t;
|
|
3346
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3347
|
+
{
|
|
3348
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3349
|
+
{
|
|
3350
|
+
t.data[i][j] = b / a.data[i][j];
|
|
3351
|
+
}
|
|
3352
|
+
}
|
|
3353
|
+
|
|
3354
|
+
return t;
|
|
3355
|
+
}
|
|
3356
|
+
|
|
3357
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3358
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
3359
|
+
{
|
|
3360
|
+
mat_t<Rows,Cols,Type> t;
|
|
3361
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3362
|
+
{
|
|
3363
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3364
|
+
{
|
|
3365
|
+
t.data[i][j] = a.data[i][j]*b;
|
|
3366
|
+
}
|
|
3367
|
+
}
|
|
3368
|
+
|
|
3369
|
+
return t;
|
|
3370
|
+
}
|
|
3371
|
+
|
|
3372
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3373
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(Type b, const mat_t<Rows,Cols,Type>& a)
|
|
3374
|
+
{
|
|
3375
|
+
return mul(a,b);
|
|
3376
|
+
}
|
|
3377
|
+
|
|
3378
|
+
|
|
3379
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3380
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator*(Type b, const mat_t<Rows,Cols,Type>& a)
|
|
3381
|
+
{
|
|
3382
|
+
return mul(a,b);
|
|
3383
|
+
}
|
|
3384
|
+
|
|
3385
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3386
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator*( const mat_t<Rows,Cols,Type>& a, Type b)
|
|
3387
|
+
{
|
|
3388
|
+
return mul(a,b);
|
|
3389
|
+
}
|
|
3390
|
+
|
|
3391
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3392
|
+
inline CUDA_CALLABLE vec_t<Rows,Type> mul(const mat_t<Rows,Cols,Type>& a, const vec_t<Cols,Type>& b)
|
|
3393
|
+
{
|
|
3394
|
+
vec_t<Rows,Type> r = a.get_col(0)*b[0];
|
|
3395
|
+
for( unsigned i=1; i < Cols; ++i )
|
|
3396
|
+
{
|
|
3397
|
+
r += a.get_col(i)*b[i];
|
|
3398
|
+
}
|
|
3399
|
+
return r;
|
|
3400
|
+
}
|
|
3401
|
+
|
|
3402
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3403
|
+
inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a)
|
|
3404
|
+
{
|
|
3405
|
+
vec_t<Cols,Type> r = a.get_row(0)*b[0];
|
|
3406
|
+
for( unsigned i=1; i < Rows; ++i )
|
|
3407
|
+
{
|
|
3408
|
+
r += a.get_row(i)*b[i];
|
|
3409
|
+
}
|
|
3410
|
+
return r;
|
|
3411
|
+
}
|
|
3412
|
+
|
|
3413
|
+
template<typename T>
|
|
3414
|
+
inline CUDA_CALLABLE T muladd(T a, T b, T c) {
|
|
3415
|
+
return c + a*b;
|
|
3416
|
+
}
|
|
3417
|
+
template<>
|
|
3418
|
+
inline CUDA_CALLABLE float muladd(float a, float b, float c) {
|
|
3419
|
+
return fmaf(a, b, c);
|
|
3420
|
+
}
|
|
3421
|
+
template<>
|
|
3422
|
+
inline CUDA_CALLABLE double muladd(double a, double b, double c) {
|
|
3423
|
+
return fma(a, b, c);
|
|
3424
|
+
}
|
|
3425
|
+
|
|
3426
|
+
|
|
3427
|
+
template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
|
|
3428
|
+
inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
|
|
3429
|
+
{
|
|
3430
|
+
mat_t<Rows,ColsOut,Type> t(0);
|
|
3431
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3432
|
+
{
|
|
3433
|
+
for (unsigned j=0; j < ColsOut; ++j)
|
|
3434
|
+
{
|
|
3435
|
+
Type sum(0.0);
|
|
3436
|
+
|
|
3437
|
+
for (unsigned k=0; k < Cols; ++k)
|
|
3438
|
+
{
|
|
3439
|
+
sum = muladd<Type>(a.data[i][k], b.data[k][j], sum);
|
|
3440
|
+
}
|
|
3441
|
+
|
|
3442
|
+
t.data[i][j] = sum;
|
|
2432
3443
|
}
|
|
2433
3444
|
}
|
|
2434
|
-
|
|
2435
|
-
|
|
3445
|
+
|
|
3446
|
+
return t;
|
|
2436
3447
|
}
|
|
2437
3448
|
|
|
2438
|
-
|
|
3449
|
+
// bitwise AND
|
|
2439
3450
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2440
|
-
inline
|
|
3451
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_and(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
2441
3452
|
{
|
|
3453
|
+
mat_t<Rows,Cols,Type> t;
|
|
2442
3454
|
for (unsigned i=0; i < Rows; ++i)
|
|
3455
|
+
{
|
|
2443
3456
|
for (unsigned j=0; j < Cols; ++j)
|
|
2444
|
-
|
|
2445
|
-
|
|
2446
|
-
|
|
2447
|
-
}
|
|
3457
|
+
{
|
|
3458
|
+
t.data[i][j] = a.data[i][j] & b.data[i][j];
|
|
3459
|
+
}
|
|
3460
|
+
}
|
|
2448
3461
|
|
|
2449
|
-
|
|
2450
|
-
inline void CUDA_CALLABLE adj_isfinite(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
2451
|
-
{
|
|
3462
|
+
return t;
|
|
2452
3463
|
}
|
|
2453
3464
|
|
|
2454
3465
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2455
|
-
inline
|
|
3466
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_and(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
2456
3467
|
{
|
|
3468
|
+
mat_t<Rows,Cols,Type> t;
|
|
2457
3469
|
for (unsigned i=0; i < Rows; ++i)
|
|
3470
|
+
{
|
|
2458
3471
|
for (unsigned j=0; j < Cols; ++j)
|
|
2459
|
-
|
|
2460
|
-
|
|
2461
|
-
|
|
2462
|
-
}
|
|
3472
|
+
{
|
|
3473
|
+
t.data[i][j] = a.data[i][j] & b;
|
|
3474
|
+
}
|
|
3475
|
+
}
|
|
2463
3476
|
|
|
2464
|
-
|
|
2465
|
-
inline void CUDA_CALLABLE adj_isnan(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
2466
|
-
{
|
|
3477
|
+
return t;
|
|
2467
3478
|
}
|
|
2468
3479
|
|
|
2469
3480
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2470
|
-
inline
|
|
3481
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_and(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2471
3482
|
{
|
|
3483
|
+
mat_t<Rows,Cols,Type> t;
|
|
2472
3484
|
for (unsigned i=0; i < Rows; ++i)
|
|
3485
|
+
{
|
|
2473
3486
|
for (unsigned j=0; j < Cols; ++j)
|
|
2474
|
-
|
|
2475
|
-
|
|
2476
|
-
|
|
2477
|
-
}
|
|
3487
|
+
{
|
|
3488
|
+
t.data[i][j] = a & b.data[i][j];
|
|
3489
|
+
}
|
|
3490
|
+
}
|
|
2478
3491
|
|
|
2479
|
-
|
|
2480
|
-
inline void CUDA_CALLABLE adj_isinf(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const bool &adj_ret)
|
|
2481
|
-
{
|
|
3492
|
+
return t;
|
|
2482
3493
|
}
|
|
2483
3494
|
|
|
3495
|
+
// bitwise OR
|
|
2484
3496
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2485
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3497
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_or(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
2486
3498
|
{
|
|
2487
3499
|
mat_t<Rows,Cols,Type> t;
|
|
2488
3500
|
for (unsigned i=0; i < Rows; ++i)
|
|
2489
3501
|
{
|
|
2490
3502
|
for (unsigned j=0; j < Cols; ++j)
|
|
2491
3503
|
{
|
|
2492
|
-
t.data[i][j] = a.data[i][j]
|
|
3504
|
+
t.data[i][j] = a.data[i][j] | b.data[i][j];
|
|
2493
3505
|
}
|
|
2494
3506
|
}
|
|
2495
3507
|
|
|
@@ -2497,14 +3509,14 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(const mat_t<Rows,Cols,Type>& a, c
|
|
|
2497
3509
|
}
|
|
2498
3510
|
|
|
2499
3511
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2500
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3512
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_or(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
2501
3513
|
{
|
|
2502
3514
|
mat_t<Rows,Cols,Type> t;
|
|
2503
3515
|
for (unsigned i=0; i < Rows; ++i)
|
|
2504
3516
|
{
|
|
2505
3517
|
for (unsigned j=0; j < Cols; ++j)
|
|
2506
3518
|
{
|
|
2507
|
-
t.data[i][j] = a
|
|
3519
|
+
t.data[i][j] = a.data[i][j] | b;
|
|
2508
3520
|
}
|
|
2509
3521
|
}
|
|
2510
3522
|
|
|
@@ -2512,29 +3524,30 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(Type a, const mat_t<Rows,Cols,Typ
|
|
|
2512
3524
|
}
|
|
2513
3525
|
|
|
2514
3526
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2515
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3527
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_or(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2516
3528
|
{
|
|
2517
3529
|
mat_t<Rows,Cols,Type> t;
|
|
2518
3530
|
for (unsigned i=0; i < Rows; ++i)
|
|
2519
3531
|
{
|
|
2520
3532
|
for (unsigned j=0; j < Cols; ++j)
|
|
2521
3533
|
{
|
|
2522
|
-
t.data[i][j] = a
|
|
3534
|
+
t.data[i][j] = a | b.data[i][j];
|
|
2523
3535
|
}
|
|
2524
3536
|
}
|
|
2525
3537
|
|
|
2526
3538
|
return t;
|
|
2527
3539
|
}
|
|
2528
3540
|
|
|
3541
|
+
// bitwise XOR
|
|
2529
3542
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2530
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3543
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_xor(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
2531
3544
|
{
|
|
2532
3545
|
mat_t<Rows,Cols,Type> t;
|
|
2533
3546
|
for (unsigned i=0; i < Rows; ++i)
|
|
2534
3547
|
{
|
|
2535
3548
|
for (unsigned j=0; j < Cols; ++j)
|
|
2536
3549
|
{
|
|
2537
|
-
t.data[i][j] = a
|
|
3550
|
+
t.data[i][j] = a.data[i][j] ^ b.data[i][j];
|
|
2538
3551
|
}
|
|
2539
3552
|
}
|
|
2540
3553
|
|
|
@@ -2542,14 +3555,14 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(Type a, const mat_t<Rows,Cols,Typ
|
|
|
2542
3555
|
}
|
|
2543
3556
|
|
|
2544
3557
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2545
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3558
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_xor(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
2546
3559
|
{
|
|
2547
3560
|
mat_t<Rows,Cols,Type> t;
|
|
2548
3561
|
for (unsigned i=0; i < Rows; ++i)
|
|
2549
3562
|
{
|
|
2550
3563
|
for (unsigned j=0; j < Cols; ++j)
|
|
2551
3564
|
{
|
|
2552
|
-
t.data[i][j] = a.data[i][j]
|
|
3565
|
+
t.data[i][j] = a.data[i][j] ^ b;
|
|
2553
3566
|
}
|
|
2554
3567
|
}
|
|
2555
3568
|
|
|
@@ -2557,29 +3570,30 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, T
|
|
|
2557
3570
|
}
|
|
2558
3571
|
|
|
2559
3572
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2560
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3573
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> bit_xor(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2561
3574
|
{
|
|
2562
3575
|
mat_t<Rows,Cols,Type> t;
|
|
2563
3576
|
for (unsigned i=0; i < Rows; ++i)
|
|
2564
3577
|
{
|
|
2565
3578
|
for (unsigned j=0; j < Cols; ++j)
|
|
2566
3579
|
{
|
|
2567
|
-
t.data[i][j] =
|
|
3580
|
+
t.data[i][j] = a ^ b.data[i][j];
|
|
2568
3581
|
}
|
|
2569
3582
|
}
|
|
2570
3583
|
|
|
2571
3584
|
return t;
|
|
2572
3585
|
}
|
|
2573
3586
|
|
|
3587
|
+
// left shift
|
|
2574
3588
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2575
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3589
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> lshift(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
2576
3590
|
{
|
|
2577
3591
|
mat_t<Rows,Cols,Type> t;
|
|
2578
3592
|
for (unsigned i=0; i < Rows; ++i)
|
|
2579
3593
|
{
|
|
2580
3594
|
for (unsigned j=0; j < Cols; ++j)
|
|
2581
3595
|
{
|
|
2582
|
-
t.data[i][j] = a.data[i][j]
|
|
3596
|
+
t.data[i][j] = a.data[i][j] << b.data[i][j];
|
|
2583
3597
|
}
|
|
2584
3598
|
}
|
|
2585
3599
|
|
|
@@ -2587,79 +3601,94 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, T
|
|
|
2587
3601
|
}
|
|
2588
3602
|
|
|
2589
3603
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2590
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3604
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> lshift(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
2591
3605
|
{
|
|
2592
|
-
|
|
2593
|
-
|
|
3606
|
+
mat_t<Rows,Cols,Type> t;
|
|
3607
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3608
|
+
{
|
|
3609
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3610
|
+
{
|
|
3611
|
+
t.data[i][j] = a.data[i][j] << b;
|
|
3612
|
+
}
|
|
3613
|
+
}
|
|
2594
3614
|
|
|
3615
|
+
return t;
|
|
3616
|
+
}
|
|
2595
3617
|
|
|
2596
3618
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2597
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3619
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> lshift(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2598
3620
|
{
|
|
2599
|
-
|
|
3621
|
+
mat_t<Rows,Cols,Type> t;
|
|
3622
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3623
|
+
{
|
|
3624
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3625
|
+
{
|
|
3626
|
+
t.data[i][j] = a << b.data[i][j];
|
|
3627
|
+
}
|
|
3628
|
+
}
|
|
3629
|
+
|
|
3630
|
+
return t;
|
|
2600
3631
|
}
|
|
2601
3632
|
|
|
3633
|
+
// right shift
|
|
2602
3634
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2603
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type>
|
|
3635
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> rshift(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
2604
3636
|
{
|
|
2605
|
-
|
|
3637
|
+
mat_t<Rows,Cols,Type> t;
|
|
3638
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3639
|
+
{
|
|
3640
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3641
|
+
{
|
|
3642
|
+
t.data[i][j] = a.data[i][j] >> b.data[i][j];
|
|
3643
|
+
}
|
|
3644
|
+
}
|
|
3645
|
+
|
|
3646
|
+
return t;
|
|
2606
3647
|
}
|
|
2607
3648
|
|
|
2608
3649
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2609
|
-
inline CUDA_CALLABLE
|
|
3650
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> rshift(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
2610
3651
|
{
|
|
2611
|
-
|
|
2612
|
-
for(
|
|
3652
|
+
mat_t<Rows,Cols,Type> t;
|
|
3653
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
2613
3654
|
{
|
|
2614
|
-
|
|
3655
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3656
|
+
{
|
|
3657
|
+
t.data[i][j] = a.data[i][j] >> b;
|
|
3658
|
+
}
|
|
2615
3659
|
}
|
|
2616
|
-
|
|
3660
|
+
|
|
3661
|
+
return t;
|
|
2617
3662
|
}
|
|
2618
3663
|
|
|
2619
3664
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2620
|
-
inline CUDA_CALLABLE
|
|
3665
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> rshift(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2621
3666
|
{
|
|
2622
|
-
|
|
2623
|
-
for(
|
|
3667
|
+
mat_t<Rows,Cols,Type> t;
|
|
3668
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
2624
3669
|
{
|
|
2625
|
-
|
|
3670
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3671
|
+
{
|
|
3672
|
+
t.data[i][j] = a >> b.data[i][j];
|
|
3673
|
+
}
|
|
2626
3674
|
}
|
|
2627
|
-
return r;
|
|
2628
|
-
}
|
|
2629
3675
|
|
|
2630
|
-
|
|
2631
|
-
inline CUDA_CALLABLE T muladd(T a, T b, T c) {
|
|
2632
|
-
return c + a*b;
|
|
2633
|
-
}
|
|
2634
|
-
template<>
|
|
2635
|
-
inline CUDA_CALLABLE float muladd(float a, float b, float c) {
|
|
2636
|
-
return fmaf(a, b, c);
|
|
2637
|
-
}
|
|
2638
|
-
template<>
|
|
2639
|
-
inline CUDA_CALLABLE double muladd(double a, double b, double c) {
|
|
2640
|
-
return fma(a, b, c);
|
|
3676
|
+
return t;
|
|
2641
3677
|
}
|
|
2642
3678
|
|
|
2643
|
-
|
|
2644
|
-
template<unsigned Rows, unsigned Cols,
|
|
2645
|
-
inline CUDA_CALLABLE mat_t<Rows,
|
|
3679
|
+
// invert
|
|
3680
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3681
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> invert(const mat_t<Rows,Cols,Type>& m)
|
|
2646
3682
|
{
|
|
2647
|
-
mat_t<Rows,
|
|
3683
|
+
mat_t<Rows,Cols,Type> t;
|
|
2648
3684
|
for (unsigned i=0; i < Rows; ++i)
|
|
2649
|
-
{
|
|
2650
|
-
for (unsigned j=0; j <
|
|
3685
|
+
{
|
|
3686
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
2651
3687
|
{
|
|
2652
|
-
|
|
2653
|
-
|
|
2654
|
-
for (unsigned k=0; k < Cols; ++k)
|
|
2655
|
-
{
|
|
2656
|
-
sum = muladd<Type>(a.data[i][k], b.data[k][j], sum);
|
|
2657
|
-
}
|
|
2658
|
-
|
|
2659
|
-
t.data[i][j] = sum;
|
|
3688
|
+
t.data[i][j] = ~m.data[i][j];
|
|
2660
3689
|
}
|
|
2661
3690
|
}
|
|
2662
|
-
|
|
3691
|
+
|
|
2663
3692
|
return t;
|
|
2664
3693
|
}
|
|
2665
3694
|
|
|
@@ -2719,10 +3748,11 @@ inline CUDA_CALLABLE Type determinant(const mat_t<3,3,Type>& m)
|
|
|
2719
3748
|
);
|
|
2720
3749
|
}
|
|
2721
3750
|
|
|
3751
|
+
// Adapted from USD - see licenses/usd-LICENSE.txt
|
|
3752
|
+
// Copyright 2016 Pixar
|
|
2722
3753
|
template<typename Type>
|
|
2723
3754
|
inline CUDA_CALLABLE Type determinant(const mat_t<4,4,Type>& m)
|
|
2724
3755
|
{
|
|
2725
|
-
// adapted from USD GfMatrix4f::Inverse()
|
|
2726
3756
|
Type x00, x01, x02, x03;
|
|
2727
3757
|
Type x10, x11, x12, x13;
|
|
2728
3758
|
Type x20, x21, x22, x23;
|
|
@@ -2818,16 +3848,16 @@ inline CUDA_CALLABLE mat_t<2,2,Type> inverse(const mat_t<2,2,Type>& m)
|
|
|
2818
3848
|
template<typename Type>
|
|
2819
3849
|
inline CUDA_CALLABLE mat_t<3,3,Type> inverse(const mat_t<3,3,Type>& m)
|
|
2820
3850
|
{
|
|
2821
|
-
|
|
3851
|
+
Type det = determinant(m);
|
|
2822
3852
|
|
|
2823
|
-
|
|
2824
|
-
|
|
2825
|
-
|
|
2826
|
-
|
|
2827
|
-
|
|
2828
|
-
|
|
2829
|
-
|
|
2830
|
-
|
|
3853
|
+
if (det != Type(0.0f))
|
|
3854
|
+
{
|
|
3855
|
+
mat_t<3,3,Type> b;
|
|
3856
|
+
|
|
3857
|
+
b.data[0][0] = m.data[1][1]*m.data[2][2] - m.data[1][2]*m.data[2][1];
|
|
3858
|
+
b.data[1][0] = m.data[1][2]*m.data[2][0] - m.data[1][0]*m.data[2][2];
|
|
3859
|
+
b.data[2][0] = m.data[1][0]*m.data[2][1] - m.data[1][1]*m.data[2][0];
|
|
3860
|
+
|
|
2831
3861
|
b.data[0][1] = m.data[0][2]*m.data[2][1] - m.data[0][1]*m.data[2][2];
|
|
2832
3862
|
b.data[1][1] = m.data[0][0]*m.data[2][2] - m.data[0][2]*m.data[2][0];
|
|
2833
3863
|
b.data[2][1] = m.data[0][1]*m.data[2][0] - m.data[0][0]*m.data[2][1];
|
|
@@ -2836,18 +3866,19 @@ inline CUDA_CALLABLE mat_t<3,3,Type> inverse(const mat_t<3,3,Type>& m)
|
|
|
2836
3866
|
b.data[1][2] = m.data[0][2]*m.data[1][0] - m.data[0][0]*m.data[1][2];
|
|
2837
3867
|
b.data[2][2] = m.data[0][0]*m.data[1][1] - m.data[0][1]*m.data[1][0];
|
|
2838
3868
|
|
|
2839
|
-
|
|
2840
|
-
|
|
2841
|
-
|
|
2842
|
-
|
|
2843
|
-
|
|
2844
|
-
|
|
3869
|
+
return b*(Type(1.0f)/det);
|
|
3870
|
+
}
|
|
3871
|
+
else
|
|
3872
|
+
{
|
|
3873
|
+
return mat_t<3,3,Type>();
|
|
3874
|
+
}
|
|
2845
3875
|
}
|
|
2846
3876
|
|
|
3877
|
+
// Adapted from USD - see licenses/usd-LICENSE.txt
|
|
3878
|
+
// Copyright 2016 Pixar
|
|
2847
3879
|
template<typename Type>
|
|
2848
3880
|
inline CUDA_CALLABLE mat_t<4,4,Type> inverse(const mat_t<4,4,Type>& m)
|
|
2849
3881
|
{
|
|
2850
|
-
// adapted from USD GfMatrix4f::Inverse()
|
|
2851
3882
|
Type x00, x01, x02, x03;
|
|
2852
3883
|
Type x10, x11, x12, x13;
|
|
2853
3884
|
Type x20, x21, x22, x23;
|
|
@@ -3310,6 +4341,126 @@ inline CUDA_CALLABLE void adj_sub(
|
|
|
3310
4341
|
}
|
|
3311
4342
|
}
|
|
3312
4343
|
|
|
4344
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4345
|
+
inline CUDA_CALLABLE void adj_bit_and(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4346
|
+
{
|
|
4347
|
+
}
|
|
4348
|
+
|
|
4349
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4350
|
+
inline CUDA_CALLABLE void adj_bit_and(
|
|
4351
|
+
const mat_t<Rows,Cols,Type>& a, Type b,
|
|
4352
|
+
mat_t<Rows,Cols,Type>& adj_a, Type& adj_b,
|
|
4353
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4354
|
+
)
|
|
4355
|
+
{
|
|
4356
|
+
}
|
|
4357
|
+
|
|
4358
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4359
|
+
inline CUDA_CALLABLE void adj_bit_and(
|
|
4360
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
4361
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
4362
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4363
|
+
)
|
|
4364
|
+
{
|
|
4365
|
+
}
|
|
4366
|
+
|
|
4367
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4368
|
+
inline CUDA_CALLABLE void adj_bit_or(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4369
|
+
{
|
|
4370
|
+
}
|
|
4371
|
+
|
|
4372
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4373
|
+
inline CUDA_CALLABLE void adj_bit_or(
|
|
4374
|
+
const mat_t<Rows,Cols,Type>& a, Type b,
|
|
4375
|
+
mat_t<Rows,Cols,Type>& adj_a, Type& adj_b,
|
|
4376
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4377
|
+
)
|
|
4378
|
+
{
|
|
4379
|
+
}
|
|
4380
|
+
|
|
4381
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4382
|
+
inline CUDA_CALLABLE void adj_bit_or(
|
|
4383
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
4384
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
4385
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4386
|
+
)
|
|
4387
|
+
{
|
|
4388
|
+
}
|
|
4389
|
+
|
|
4390
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4391
|
+
inline CUDA_CALLABLE void adj_bit_xor(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4392
|
+
{
|
|
4393
|
+
}
|
|
4394
|
+
|
|
4395
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4396
|
+
inline CUDA_CALLABLE void adj_bit_xor(
|
|
4397
|
+
const mat_t<Rows,Cols,Type>& a, Type b,
|
|
4398
|
+
mat_t<Rows,Cols,Type>& adj_a, Type& adj_b,
|
|
4399
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4400
|
+
)
|
|
4401
|
+
{
|
|
4402
|
+
}
|
|
4403
|
+
|
|
4404
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4405
|
+
inline CUDA_CALLABLE void adj_bit_xor(
|
|
4406
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
4407
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
4408
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4409
|
+
)
|
|
4410
|
+
{
|
|
4411
|
+
}
|
|
4412
|
+
|
|
4413
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4414
|
+
inline CUDA_CALLABLE void adj_lshift(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4415
|
+
{
|
|
4416
|
+
}
|
|
4417
|
+
|
|
4418
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4419
|
+
inline CUDA_CALLABLE void adj_lshift(
|
|
4420
|
+
const mat_t<Rows,Cols,Type>& a, Type b,
|
|
4421
|
+
mat_t<Rows,Cols,Type>& adj_a, Type& adj_b,
|
|
4422
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4423
|
+
)
|
|
4424
|
+
{
|
|
4425
|
+
}
|
|
4426
|
+
|
|
4427
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4428
|
+
inline CUDA_CALLABLE void adj_lshift(
|
|
4429
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
4430
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
4431
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4432
|
+
)
|
|
4433
|
+
{
|
|
4434
|
+
}
|
|
4435
|
+
|
|
4436
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4437
|
+
inline CUDA_CALLABLE void adj_rshift(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4438
|
+
{
|
|
4439
|
+
}
|
|
4440
|
+
|
|
4441
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4442
|
+
inline CUDA_CALLABLE void adj_rshift(
|
|
4443
|
+
const mat_t<Rows,Cols,Type>& a, Type b,
|
|
4444
|
+
mat_t<Rows,Cols,Type>& adj_a, Type& adj_b,
|
|
4445
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4446
|
+
)
|
|
4447
|
+
{
|
|
4448
|
+
}
|
|
4449
|
+
|
|
4450
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4451
|
+
inline CUDA_CALLABLE void adj_rshift(
|
|
4452
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
4453
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
4454
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
4455
|
+
)
|
|
4456
|
+
{
|
|
4457
|
+
}
|
|
4458
|
+
|
|
4459
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
4460
|
+
inline CUDA_CALLABLE void adj_invert(const mat_t<Rows,Cols,Type>& m, mat_t<Rows,Cols,Type>& adj_m, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
4461
|
+
{
|
|
4462
|
+
}
|
|
4463
|
+
|
|
3313
4464
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3314
4465
|
inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<Rows,Cols,Type>& adj_a, Type& adj_s, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
3315
4466
|
{
|
|
@@ -3429,10 +4580,11 @@ inline CUDA_CALLABLE void adj_determinant(const mat_t<3,3,Type>& m, mat_t<3,3,Ty
|
|
|
3429
4580
|
(vec_t<3,Type>&)adj_m.data[2] += cross(m.get_row(0), m.get_row(1))*adj_ret;
|
|
3430
4581
|
}
|
|
3431
4582
|
|
|
4583
|
+
// Adapted from USD - see licenses/usd-LICENSE.txt
|
|
4584
|
+
// Copyright 2016 Pixar
|
|
3432
4585
|
template<typename Type>
|
|
3433
4586
|
inline CUDA_CALLABLE void adj_determinant(const mat_t<4,4,Type>& m, mat_t<4,4,Type>& adj_m, Type adj_ret)
|
|
3434
4587
|
{
|
|
3435
|
-
// adapted from USD GfMatrix4f::Inverse()
|
|
3436
4588
|
Type x00, x01, x02, x03;
|
|
3437
4589
|
Type x10, x11, x12, x13;
|
|
3438
4590
|
Type x20, x21, x22, x23;
|
|
@@ -3864,6 +5016,34 @@ template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_add(
|
|
|
3864
5016
|
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_add(mat_t<Rows, Cols, int64>* buf, const mat_t<Rows, Cols, int64> &value) { }
|
|
3865
5017
|
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_add(mat_t<Rows, Cols, uint64>* buf, const mat_t<Rows, Cols, uint64> &value) { }
|
|
3866
5018
|
|
|
5019
|
+
// for bitwise operations we do not accumulate gradients
|
|
5020
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, int8>* buf, const mat_t<Rows, Cols, int8> &value) { }
|
|
5021
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, uint8>* buf, const mat_t<Rows, Cols, uint8> &value) { }
|
|
5022
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, int16>* buf, const mat_t<Rows, Cols, int16> &value) { }
|
|
5023
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, uint16>* buf, const mat_t<Rows, Cols, uint16> &value) { }
|
|
5024
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, int32>* buf, const mat_t<Rows, Cols, int32> &value) { }
|
|
5025
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, uint32>* buf, const mat_t<Rows, Cols, uint32> &value) { }
|
|
5026
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, int64>* buf, const mat_t<Rows, Cols, int64> &value) { }
|
|
5027
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_and(mat_t<Rows, Cols, uint64>* buf, const mat_t<Rows, Cols, uint64> &value) { }
|
|
5028
|
+
|
|
5029
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, int8>* buf, const mat_t<Rows, Cols, int8> &value) { }
|
|
5030
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, uint8>* buf, const mat_t<Rows, Cols, uint8> &value) { }
|
|
5031
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, int16>* buf, const mat_t<Rows, Cols, int16> &value) { }
|
|
5032
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, uint16>* buf, const mat_t<Rows, Cols, uint16> &value) { }
|
|
5033
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, int32>* buf, const mat_t<Rows, Cols, int32> &value) { }
|
|
5034
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, uint32>* buf, const mat_t<Rows, Cols, uint32> &value) { }
|
|
5035
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, int64>* buf, const mat_t<Rows, Cols, int64> &value) { }
|
|
5036
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_or(mat_t<Rows, Cols, uint64>* buf, const mat_t<Rows, Cols, uint64> &value) { }
|
|
5037
|
+
|
|
5038
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, int8>* buf, const mat_t<Rows, Cols, int8> &value) { }
|
|
5039
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, uint8>* buf, const mat_t<Rows, Cols, uint8> &value) { }
|
|
5040
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, int16>* buf, const mat_t<Rows, Cols, int16> &value) { }
|
|
5041
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, uint16>* buf, const mat_t<Rows, Cols, uint16> &value) { }
|
|
5042
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, int32>* buf, const mat_t<Rows, Cols, int32> &value) { }
|
|
5043
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, uint32>* buf, const mat_t<Rows, Cols, uint32> &value) { }
|
|
5044
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, int64>* buf, const mat_t<Rows, Cols, int64> &value) { }
|
|
5045
|
+
template<unsigned Rows, unsigned Cols> CUDA_CALLABLE inline void adj_atomic_xor(mat_t<Rows, Cols, uint64>* buf, const mat_t<Rows, Cols, uint64> &value) { }
|
|
5046
|
+
|
|
3867
5047
|
using mat22h = mat_t<2,2,half>;
|
|
3868
5048
|
using mat33h = mat_t<3,3,half>;
|
|
3869
5049
|
using mat44h = mat_t<4,4,half>;
|