warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/mat.h
CHANGED
|
@@ -177,12 +177,12 @@ struct mat_t
|
|
|
177
177
|
|
|
178
178
|
CUDA_CALLABLE vec_t<Cols,Type> get_row(int index) const
|
|
179
179
|
{
|
|
180
|
-
return
|
|
180
|
+
return reinterpret_cast<const vec_t<Cols,Type>&>(data[index]);
|
|
181
181
|
}
|
|
182
182
|
|
|
183
183
|
CUDA_CALLABLE void set_row(int index, const vec_t<Cols,Type>& v)
|
|
184
184
|
{
|
|
185
|
-
|
|
185
|
+
reinterpret_cast<vec_t<Cols,Type>&>(data[index]) = v;
|
|
186
186
|
}
|
|
187
187
|
|
|
188
188
|
CUDA_CALLABLE vec_t<Rows,Type> get_col(int index) const
|
|
@@ -204,7 +204,7 @@ struct mat_t
|
|
|
204
204
|
}
|
|
205
205
|
|
|
206
206
|
// row major storage assumed to be compatible with PyTorch
|
|
207
|
-
Type data[Rows][Cols];
|
|
207
|
+
Type data[Rows < 1 ? 1 : Rows][Cols < 1 ? 1 : Cols];
|
|
208
208
|
};
|
|
209
209
|
|
|
210
210
|
template<typename Type>
|
|
@@ -477,6 +477,20 @@ template<unsigned Rows, unsigned Cols, typename Type>
|
|
|
477
477
|
inline CUDA_CALLABLE vec_t<Cols,Type> extract(const mat_t<Rows,Cols,Type>& m, int row)
|
|
478
478
|
{
|
|
479
479
|
vec_t<Cols,Type> ret;
|
|
480
|
+
|
|
481
|
+
#ifndef NDEBUG
|
|
482
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
483
|
+
{
|
|
484
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
485
|
+
assert(0);
|
|
486
|
+
}
|
|
487
|
+
#endif
|
|
488
|
+
|
|
489
|
+
if (row < 0)
|
|
490
|
+
{
|
|
491
|
+
row += Rows;
|
|
492
|
+
}
|
|
493
|
+
|
|
480
494
|
for(unsigned i=0; i < Cols; ++i)
|
|
481
495
|
{
|
|
482
496
|
ret.c[i] = m.data[row][i];
|
|
@@ -488,31 +502,206 @@ template<unsigned Rows, unsigned Cols, typename Type>
|
|
|
488
502
|
inline CUDA_CALLABLE Type extract(const mat_t<Rows,Cols,Type>& m, int row, int col)
|
|
489
503
|
{
|
|
490
504
|
#ifndef NDEBUG
|
|
491
|
-
if (row <
|
|
505
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
492
506
|
{
|
|
493
507
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
494
508
|
assert(0);
|
|
495
509
|
}
|
|
496
|
-
if (col <
|
|
510
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
497
511
|
{
|
|
498
512
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
499
513
|
assert(0);
|
|
500
514
|
}
|
|
501
515
|
#endif
|
|
516
|
+
|
|
517
|
+
if (row < 0)
|
|
518
|
+
{
|
|
519
|
+
row += Rows;
|
|
520
|
+
}
|
|
521
|
+
if (col < 0)
|
|
522
|
+
{
|
|
523
|
+
col += Cols;
|
|
524
|
+
}
|
|
525
|
+
|
|
502
526
|
return m.data[row][col];
|
|
503
527
|
}
|
|
504
528
|
|
|
529
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
530
|
+
inline CUDA_CALLABLE mat_t<RowSliceLength, ColSliceLength, Type> extract(const mat_t<Rows,Cols,Type>& m, slice_t row_slice)
|
|
531
|
+
{
|
|
532
|
+
static_assert(
|
|
533
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
534
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
535
|
+
);
|
|
536
|
+
|
|
537
|
+
mat_t<RowSliceLength, ColSliceLength, Type> ret;
|
|
538
|
+
|
|
539
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
540
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
541
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
542
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
543
|
+
|
|
544
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
545
|
+
|
|
546
|
+
int ii = 0;
|
|
547
|
+
for (
|
|
548
|
+
int i = row_slice.start;
|
|
549
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
550
|
+
i += row_slice.step
|
|
551
|
+
)
|
|
552
|
+
{
|
|
553
|
+
for (int j = 0; j < Cols; ++j)
|
|
554
|
+
{
|
|
555
|
+
ret.data[ii][j] = m.data[i][j];
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
++ii;
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
assert(ii == RowSliceLength);
|
|
562
|
+
return ret;
|
|
563
|
+
}
|
|
564
|
+
|
|
565
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
566
|
+
inline CUDA_CALLABLE vec_t<RowSliceLength, Type> extract(const mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col)
|
|
567
|
+
{
|
|
568
|
+
#ifndef NDEBUG
|
|
569
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
570
|
+
{
|
|
571
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
572
|
+
assert(0);
|
|
573
|
+
}
|
|
574
|
+
#endif
|
|
575
|
+
|
|
576
|
+
vec_t<RowSliceLength, Type> ret;
|
|
577
|
+
|
|
578
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
579
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
580
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
581
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
582
|
+
|
|
583
|
+
if (col < 0)
|
|
584
|
+
{
|
|
585
|
+
col += Cols;
|
|
586
|
+
}
|
|
587
|
+
|
|
588
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
589
|
+
|
|
590
|
+
int ii = 0;
|
|
591
|
+
for (
|
|
592
|
+
int i = row_slice.start;
|
|
593
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
594
|
+
i += row_slice.step
|
|
595
|
+
)
|
|
596
|
+
{
|
|
597
|
+
ret.c[ii] = m.data[i][col];
|
|
598
|
+
++ii;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
assert(ii == RowSliceLength);
|
|
602
|
+
return ret;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
606
|
+
inline CUDA_CALLABLE vec_t<ColSliceLength, Type> extract(const mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice)
|
|
607
|
+
{
|
|
608
|
+
#ifndef NDEBUG
|
|
609
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
610
|
+
{
|
|
611
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
612
|
+
assert(0);
|
|
613
|
+
}
|
|
614
|
+
#endif
|
|
615
|
+
|
|
616
|
+
vec_t<ColSliceLength, Type> ret;
|
|
617
|
+
|
|
618
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
619
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
620
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
621
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
622
|
+
|
|
623
|
+
if (row < 0)
|
|
624
|
+
{
|
|
625
|
+
row += Rows;
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
629
|
+
|
|
630
|
+
int ii = 0;
|
|
631
|
+
for (
|
|
632
|
+
int i = col_slice.start;
|
|
633
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
634
|
+
i += col_slice.step
|
|
635
|
+
)
|
|
636
|
+
{
|
|
637
|
+
ret.c[ii] = m.data[row][i];
|
|
638
|
+
++ii;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
assert(ii == ColSliceLength);
|
|
642
|
+
return ret;
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
646
|
+
inline CUDA_CALLABLE mat_t<RowSliceLength, ColSliceLength, Type> extract(const mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice)
|
|
647
|
+
{
|
|
648
|
+
mat_t<RowSliceLength, ColSliceLength, Type> ret;
|
|
649
|
+
|
|
650
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
651
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
652
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
653
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
654
|
+
|
|
655
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
656
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
657
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
658
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
659
|
+
|
|
660
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
661
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
662
|
+
|
|
663
|
+
int ii = 0;
|
|
664
|
+
for (
|
|
665
|
+
int i = row_slice.start;
|
|
666
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
667
|
+
i += row_slice.step
|
|
668
|
+
)
|
|
669
|
+
{
|
|
670
|
+
int jj = 0;
|
|
671
|
+
for (
|
|
672
|
+
int j = col_slice.start;
|
|
673
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
674
|
+
j += col_slice.step
|
|
675
|
+
)
|
|
676
|
+
{
|
|
677
|
+
ret.data[ii][jj] = m.data[i][j];
|
|
678
|
+
++jj;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
assert(jj == ColSliceLength);
|
|
682
|
+
++ii;
|
|
683
|
+
}
|
|
684
|
+
|
|
685
|
+
assert(ii == RowSliceLength);
|
|
686
|
+
return ret;
|
|
687
|
+
}
|
|
688
|
+
|
|
505
689
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
506
690
|
inline CUDA_CALLABLE vec_t<Cols, Type>* index(mat_t<Rows,Cols,Type>& m, int row)
|
|
507
691
|
{
|
|
508
692
|
#ifndef NDEBUG
|
|
509
|
-
if (row <
|
|
693
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
510
694
|
{
|
|
511
695
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
512
696
|
assert(0);
|
|
513
697
|
}
|
|
514
698
|
#endif
|
|
515
699
|
|
|
700
|
+
if (row < 0)
|
|
701
|
+
{
|
|
702
|
+
row += Rows;
|
|
703
|
+
}
|
|
704
|
+
|
|
516
705
|
return reinterpret_cast<vec_t<Cols, Type>*>(&m.data[row]);
|
|
517
706
|
}
|
|
518
707
|
|
|
@@ -520,38 +709,46 @@ template<unsigned Rows, unsigned Cols, typename Type>
|
|
|
520
709
|
inline CUDA_CALLABLE Type* index(mat_t<Rows,Cols,Type>& m, int row, int col)
|
|
521
710
|
{
|
|
522
711
|
#ifndef NDEBUG
|
|
523
|
-
if (row <
|
|
712
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
524
713
|
{
|
|
525
714
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
526
715
|
assert(0);
|
|
527
716
|
}
|
|
528
|
-
if (col <
|
|
717
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
529
718
|
{
|
|
530
719
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
531
720
|
assert(0);
|
|
532
721
|
}
|
|
533
722
|
#endif
|
|
534
723
|
|
|
724
|
+
if (row < 0)
|
|
725
|
+
{
|
|
726
|
+
row += Rows;
|
|
727
|
+
}
|
|
728
|
+
if (col < 0)
|
|
729
|
+
{
|
|
730
|
+
col += Cols;
|
|
731
|
+
}
|
|
732
|
+
|
|
535
733
|
return &m.data[row][col];
|
|
536
734
|
}
|
|
537
735
|
|
|
538
736
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
539
737
|
inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row,
|
|
540
|
-
|
|
738
|
+
const mat_t<Rows,Cols,Type>& adj_m, int adj_row, const vec_t<Cols, Type>& adj_value)
|
|
541
739
|
{
|
|
542
740
|
// nop
|
|
543
741
|
}
|
|
544
742
|
|
|
545
743
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
546
744
|
inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int col,
|
|
547
|
-
|
|
745
|
+
const mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type adj_value)
|
|
548
746
|
{
|
|
549
747
|
// nop
|
|
550
748
|
}
|
|
551
749
|
|
|
552
|
-
|
|
553
750
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
554
|
-
inline CUDA_CALLABLE
|
|
751
|
+
inline CUDA_CALLABLE Type* indexref(mat_t<Rows,Cols,Type>* m, int row, int col)
|
|
555
752
|
{
|
|
556
753
|
#ifndef NDEBUG
|
|
557
754
|
if (row < 0 || row >= Rows)
|
|
@@ -566,201 +763,1091 @@ inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col
|
|
|
566
763
|
}
|
|
567
764
|
#endif
|
|
568
765
|
|
|
569
|
-
m
|
|
766
|
+
return &(m->data)[row][col];
|
|
570
767
|
}
|
|
571
768
|
|
|
572
|
-
|
|
573
769
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
574
|
-
inline CUDA_CALLABLE void
|
|
770
|
+
inline CUDA_CALLABLE void adj_indexref(mat_t<Rows,Cols,Type>* m, int row, int col,
|
|
771
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, const Type& adj_value)
|
|
575
772
|
{
|
|
576
|
-
|
|
577
|
-
if (row < 0 || row >= Rows)
|
|
578
|
-
{
|
|
579
|
-
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
580
|
-
assert(0);
|
|
581
|
-
}
|
|
582
|
-
#endif
|
|
583
|
-
|
|
584
|
-
for(unsigned i=0; i < Cols; ++i)
|
|
585
|
-
{
|
|
586
|
-
m.data[row][i] += value[i];
|
|
587
|
-
}
|
|
773
|
+
// nop
|
|
588
774
|
}
|
|
589
775
|
|
|
590
776
|
|
|
591
777
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
592
|
-
inline CUDA_CALLABLE void
|
|
593
|
-
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
778
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
594
779
|
{
|
|
595
780
|
#ifndef NDEBUG
|
|
596
|
-
if (row <
|
|
781
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
597
782
|
{
|
|
598
783
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
599
784
|
assert(0);
|
|
600
785
|
}
|
|
601
|
-
if (col <
|
|
786
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
602
787
|
{
|
|
603
788
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
604
789
|
assert(0);
|
|
605
790
|
}
|
|
606
791
|
#endif
|
|
607
792
|
|
|
608
|
-
|
|
793
|
+
if (row < 0)
|
|
794
|
+
{
|
|
795
|
+
row += Rows;
|
|
796
|
+
}
|
|
797
|
+
if (col < 0)
|
|
798
|
+
{
|
|
799
|
+
col += Cols;
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
m.data[row][col] += value;
|
|
609
803
|
}
|
|
610
804
|
|
|
611
805
|
|
|
612
806
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
613
|
-
inline CUDA_CALLABLE void
|
|
614
|
-
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
|
|
807
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
615
808
|
{
|
|
616
809
|
#ifndef NDEBUG
|
|
617
|
-
if (row <
|
|
810
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
618
811
|
{
|
|
619
812
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
620
813
|
assert(0);
|
|
621
814
|
}
|
|
622
815
|
#endif
|
|
623
816
|
|
|
817
|
+
if (row < 0)
|
|
818
|
+
{
|
|
819
|
+
row += Rows;
|
|
820
|
+
}
|
|
821
|
+
|
|
624
822
|
for(unsigned i=0; i < Cols; ++i)
|
|
625
823
|
{
|
|
626
|
-
|
|
824
|
+
m.data[row][i] += value[i];
|
|
627
825
|
}
|
|
628
826
|
}
|
|
629
827
|
|
|
630
828
|
|
|
631
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
632
|
-
inline CUDA_CALLABLE void
|
|
829
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
830
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
633
831
|
{
|
|
634
|
-
|
|
635
|
-
|
|
832
|
+
static_assert(
|
|
833
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
834
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
835
|
+
);
|
|
836
|
+
|
|
837
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
838
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
839
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
840
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
841
|
+
|
|
842
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
843
|
+
|
|
844
|
+
int ii = 0;
|
|
845
|
+
for (
|
|
846
|
+
int i = row_slice.start;
|
|
847
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
848
|
+
i += row_slice.step
|
|
849
|
+
)
|
|
636
850
|
{
|
|
637
|
-
|
|
638
|
-
|
|
851
|
+
for (int j = 0; j < Cols; ++j)
|
|
852
|
+
{
|
|
853
|
+
m.data[i][j] += value.data[ii][j];
|
|
854
|
+
}
|
|
855
|
+
|
|
856
|
+
++ii;
|
|
639
857
|
}
|
|
640
|
-
|
|
858
|
+
|
|
859
|
+
assert(ii == RowSliceLength);
|
|
860
|
+
}
|
|
861
|
+
|
|
862
|
+
|
|
863
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
864
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
865
|
+
{
|
|
866
|
+
#ifndef NDEBUG
|
|
867
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
641
868
|
{
|
|
642
869
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
643
870
|
assert(0);
|
|
644
871
|
}
|
|
645
872
|
#endif
|
|
646
873
|
|
|
647
|
-
|
|
874
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
875
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
876
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
877
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
878
|
+
|
|
879
|
+
if (col < 0)
|
|
880
|
+
{
|
|
881
|
+
col += Cols;
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
885
|
+
|
|
886
|
+
int ii = 0;
|
|
887
|
+
for (
|
|
888
|
+
int i = row_slice.start;
|
|
889
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
890
|
+
i += row_slice.step
|
|
891
|
+
)
|
|
892
|
+
{
|
|
893
|
+
m.data[i][col] += value.c[ii];
|
|
894
|
+
++ii;
|
|
895
|
+
}
|
|
896
|
+
|
|
897
|
+
assert(ii == RowSliceLength);
|
|
648
898
|
}
|
|
649
899
|
|
|
650
900
|
|
|
651
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
652
|
-
inline CUDA_CALLABLE void
|
|
901
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
902
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
653
903
|
{
|
|
654
904
|
#ifndef NDEBUG
|
|
655
|
-
if (row <
|
|
905
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
656
906
|
{
|
|
657
907
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
658
908
|
assert(0);
|
|
659
909
|
}
|
|
660
910
|
#endif
|
|
661
911
|
|
|
662
|
-
|
|
912
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
913
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
914
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
915
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
916
|
+
|
|
917
|
+
if (row < 0)
|
|
663
918
|
{
|
|
664
|
-
|
|
919
|
+
row += Rows;
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
923
|
+
|
|
924
|
+
int ii = 0;
|
|
925
|
+
for (
|
|
926
|
+
int i = col_slice.start;
|
|
927
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
928
|
+
i += col_slice.step
|
|
929
|
+
)
|
|
930
|
+
{
|
|
931
|
+
m.data[row][i] += value.c[ii];
|
|
932
|
+
++ii;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
assert(ii == ColSliceLength);
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
|
|
939
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
940
|
+
inline CUDA_CALLABLE void add_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
941
|
+
{
|
|
942
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
943
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
944
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
945
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
946
|
+
|
|
947
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
948
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
949
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
950
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
951
|
+
|
|
952
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
953
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
954
|
+
|
|
955
|
+
int ii = 0;
|
|
956
|
+
for (
|
|
957
|
+
int i = row_slice.start;
|
|
958
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
959
|
+
i += row_slice.step
|
|
960
|
+
)
|
|
961
|
+
{
|
|
962
|
+
int jj = 0;
|
|
963
|
+
for (
|
|
964
|
+
int j = col_slice.start;
|
|
965
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
966
|
+
j += col_slice.step
|
|
967
|
+
)
|
|
968
|
+
{
|
|
969
|
+
m.data[i][j] += value.data[ii][jj];
|
|
970
|
+
++jj;
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
assert(jj == ColSliceLength);
|
|
974
|
+
++ii;
|
|
665
975
|
}
|
|
976
|
+
|
|
977
|
+
assert(ii == RowSliceLength);
|
|
666
978
|
}
|
|
667
979
|
|
|
668
980
|
|
|
669
981
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
670
|
-
inline CUDA_CALLABLE void
|
|
982
|
+
inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
671
983
|
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
672
984
|
{
|
|
673
985
|
#ifndef NDEBUG
|
|
674
|
-
if (row <
|
|
986
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
675
987
|
{
|
|
676
988
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
677
989
|
assert(0);
|
|
678
990
|
}
|
|
679
|
-
if (col <
|
|
991
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
680
992
|
{
|
|
681
993
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
682
994
|
assert(0);
|
|
683
995
|
}
|
|
684
996
|
#endif
|
|
685
997
|
|
|
686
|
-
|
|
998
|
+
if (row < 0)
|
|
999
|
+
{
|
|
1000
|
+
row += Rows;
|
|
1001
|
+
}
|
|
1002
|
+
if (col < 0)
|
|
1003
|
+
{
|
|
1004
|
+
col += Cols;
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
adj_value += adj_m.data[row][col];
|
|
687
1008
|
}
|
|
688
1009
|
|
|
689
1010
|
|
|
690
1011
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
691
|
-
inline CUDA_CALLABLE void
|
|
1012
|
+
inline CUDA_CALLABLE void adj_add_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
692
1013
|
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
|
|
693
1014
|
{
|
|
694
1015
|
#ifndef NDEBUG
|
|
695
|
-
if (row <
|
|
1016
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
696
1017
|
{
|
|
697
1018
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
698
1019
|
assert(0);
|
|
699
1020
|
}
|
|
700
1021
|
#endif
|
|
701
1022
|
|
|
1023
|
+
if (row < 0)
|
|
1024
|
+
{
|
|
1025
|
+
row += Rows;
|
|
1026
|
+
}
|
|
1027
|
+
|
|
702
1028
|
for(unsigned i=0; i < Cols; ++i)
|
|
703
1029
|
{
|
|
704
|
-
adj_value[i]
|
|
1030
|
+
adj_value[i] += adj_m.data[row][i];
|
|
705
1031
|
}
|
|
706
1032
|
}
|
|
707
1033
|
|
|
708
1034
|
|
|
709
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
710
|
-
inline CUDA_CALLABLE void
|
|
1035
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1036
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
1037
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1038
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1039
|
+
)
|
|
711
1040
|
{
|
|
712
|
-
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
1041
|
+
static_assert(
|
|
1042
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
1043
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
1044
|
+
);
|
|
1045
|
+
|
|
1046
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1047
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1048
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1049
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1050
|
+
|
|
1051
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1052
|
+
|
|
1053
|
+
int ii = 0;
|
|
1054
|
+
for (
|
|
1055
|
+
int i = row_slice.start;
|
|
1056
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1057
|
+
i += row_slice.step
|
|
1058
|
+
)
|
|
719
1059
|
{
|
|
720
|
-
|
|
721
|
-
|
|
1060
|
+
for (int j = 0; j < Cols; ++j)
|
|
1061
|
+
{
|
|
1062
|
+
adj_value.data[ii][j] += adj_m.data[i][j];
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
++ii;
|
|
722
1066
|
}
|
|
723
|
-
#endif
|
|
724
1067
|
|
|
725
|
-
|
|
1068
|
+
assert(ii == RowSliceLength);
|
|
726
1069
|
}
|
|
727
1070
|
|
|
728
1071
|
|
|
729
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
730
|
-
inline CUDA_CALLABLE void
|
|
1072
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1073
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
1074
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
1075
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
1076
|
+
)
|
|
731
1077
|
{
|
|
732
1078
|
#ifndef NDEBUG
|
|
733
|
-
if (
|
|
1079
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
734
1080
|
{
|
|
735
|
-
printf("mat
|
|
1081
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
736
1082
|
assert(0);
|
|
737
1083
|
}
|
|
738
1084
|
#endif
|
|
739
1085
|
|
|
740
|
-
|
|
1086
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1087
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1088
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1089
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1090
|
+
|
|
1091
|
+
if (col < 0)
|
|
741
1092
|
{
|
|
742
|
-
|
|
1093
|
+
col += Cols;
|
|
1094
|
+
}
|
|
1095
|
+
|
|
1096
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1097
|
+
|
|
1098
|
+
int ii = 0;
|
|
1099
|
+
for (
|
|
1100
|
+
int i = row_slice.start;
|
|
1101
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1102
|
+
i += row_slice.step
|
|
1103
|
+
)
|
|
1104
|
+
{
|
|
1105
|
+
adj_value.c[ii] += adj_m.data[i][col];
|
|
1106
|
+
++ii;
|
|
743
1107
|
}
|
|
1108
|
+
|
|
1109
|
+
assert(ii == RowSliceLength);
|
|
744
1110
|
}
|
|
745
1111
|
|
|
746
1112
|
|
|
747
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
748
|
-
inline CUDA_CALLABLE void
|
|
749
|
-
|
|
1113
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1114
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
1115
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
1116
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
1117
|
+
)
|
|
750
1118
|
{
|
|
751
1119
|
#ifndef NDEBUG
|
|
752
|
-
if (row <
|
|
1120
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
753
1121
|
{
|
|
754
1122
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
755
1123
|
assert(0);
|
|
756
1124
|
}
|
|
757
|
-
|
|
1125
|
+
#endif
|
|
1126
|
+
|
|
1127
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1128
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1129
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1130
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1131
|
+
|
|
1132
|
+
if (row < 0)
|
|
1133
|
+
{
|
|
1134
|
+
row += Rows;
|
|
1135
|
+
}
|
|
1136
|
+
|
|
1137
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1138
|
+
|
|
1139
|
+
int ii = 0;
|
|
1140
|
+
for (
|
|
1141
|
+
int i = col_slice.start;
|
|
1142
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
1143
|
+
i += col_slice.step
|
|
1144
|
+
)
|
|
1145
|
+
{
|
|
1146
|
+
adj_value.c[ii] += adj_m.data[row][i];
|
|
1147
|
+
++ii;
|
|
1148
|
+
}
|
|
1149
|
+
|
|
1150
|
+
assert(ii == ColSliceLength);
|
|
1151
|
+
}
|
|
1152
|
+
|
|
1153
|
+
|
|
1154
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1155
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
1156
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1157
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1158
|
+
)
|
|
1159
|
+
{
|
|
1160
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1161
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1162
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1163
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1164
|
+
|
|
1165
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1166
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1167
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1168
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1169
|
+
|
|
1170
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1171
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1172
|
+
|
|
1173
|
+
int ii = 0;
|
|
1174
|
+
for (
|
|
1175
|
+
int i = row_slice.start;
|
|
1176
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1177
|
+
i += row_slice.step
|
|
1178
|
+
)
|
|
1179
|
+
{
|
|
1180
|
+
int jj = 0;
|
|
1181
|
+
for (
|
|
1182
|
+
int j = col_slice.start;
|
|
1183
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
1184
|
+
j += col_slice.step
|
|
1185
|
+
)
|
|
1186
|
+
{
|
|
1187
|
+
adj_value.data[ii][jj] += adj_m.data[i][j];
|
|
1188
|
+
++jj;
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
assert(jj == ColSliceLength);
|
|
1192
|
+
++ii;
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
assert(ii == RowSliceLength);
|
|
1196
|
+
}
|
|
1197
|
+
|
|
1198
|
+
|
|
1199
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1200
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
1201
|
+
{
|
|
1202
|
+
#ifndef NDEBUG
|
|
1203
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1204
|
+
{
|
|
1205
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1206
|
+
assert(0);
|
|
1207
|
+
}
|
|
1208
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1209
|
+
{
|
|
1210
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1211
|
+
assert(0);
|
|
1212
|
+
}
|
|
1213
|
+
#endif
|
|
1214
|
+
|
|
1215
|
+
if (row < 0)
|
|
1216
|
+
{
|
|
1217
|
+
row += Rows;
|
|
1218
|
+
}
|
|
1219
|
+
if (col < 0)
|
|
1220
|
+
{
|
|
1221
|
+
col += Cols;
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
m.data[row][col] -= value;
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
|
|
1228
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1229
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
1230
|
+
{
|
|
1231
|
+
#ifndef NDEBUG
|
|
1232
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1233
|
+
{
|
|
1234
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1235
|
+
assert(0);
|
|
1236
|
+
}
|
|
1237
|
+
#endif
|
|
1238
|
+
|
|
1239
|
+
if (row < 0)
|
|
1240
|
+
{
|
|
1241
|
+
row += Rows;
|
|
1242
|
+
}
|
|
1243
|
+
|
|
1244
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
1245
|
+
{
|
|
1246
|
+
m.data[row][i] -= value[i];
|
|
1247
|
+
}
|
|
1248
|
+
}
|
|
1249
|
+
|
|
1250
|
+
|
|
1251
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1252
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1253
|
+
{
|
|
1254
|
+
static_assert(
|
|
1255
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
1256
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
1257
|
+
);
|
|
1258
|
+
|
|
1259
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1260
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1261
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1262
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1263
|
+
|
|
1264
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1265
|
+
|
|
1266
|
+
int ii = 0;
|
|
1267
|
+
for (
|
|
1268
|
+
int i = row_slice.start;
|
|
1269
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1270
|
+
i += row_slice.step
|
|
1271
|
+
)
|
|
1272
|
+
{
|
|
1273
|
+
for (int j = 0; j < Cols; ++j)
|
|
1274
|
+
{
|
|
1275
|
+
m.data[i][j] -= value.data[ii][j];
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
++ii;
|
|
1279
|
+
}
|
|
1280
|
+
|
|
1281
|
+
assert(ii == RowSliceLength);
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
|
|
1285
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1286
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
1287
|
+
{
|
|
1288
|
+
#ifndef NDEBUG
|
|
1289
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1290
|
+
{
|
|
1291
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1292
|
+
assert(0);
|
|
1293
|
+
}
|
|
1294
|
+
#endif
|
|
1295
|
+
|
|
1296
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1297
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1298
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1299
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1300
|
+
|
|
1301
|
+
if (col < 0)
|
|
1302
|
+
{
|
|
1303
|
+
col += Cols;
|
|
1304
|
+
}
|
|
1305
|
+
|
|
1306
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1307
|
+
|
|
1308
|
+
int ii = 0;
|
|
1309
|
+
for (
|
|
1310
|
+
int i = row_slice.start;
|
|
1311
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1312
|
+
i += row_slice.step
|
|
1313
|
+
)
|
|
1314
|
+
{
|
|
1315
|
+
m.data[i][col] -= value.c[ii];
|
|
1316
|
+
++ii;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
assert(ii == RowSliceLength);
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
|
|
1323
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1324
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
1325
|
+
{
|
|
1326
|
+
#ifndef NDEBUG
|
|
1327
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1328
|
+
{
|
|
1329
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1330
|
+
assert(0);
|
|
1331
|
+
}
|
|
1332
|
+
#endif
|
|
1333
|
+
|
|
1334
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1335
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1336
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1337
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1338
|
+
|
|
1339
|
+
if (row < 0)
|
|
1340
|
+
{
|
|
1341
|
+
row += Rows;
|
|
1342
|
+
}
|
|
1343
|
+
|
|
1344
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1345
|
+
|
|
1346
|
+
int ii = 0;
|
|
1347
|
+
for (
|
|
1348
|
+
int i = col_slice.start;
|
|
1349
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
1350
|
+
i += col_slice.step
|
|
1351
|
+
)
|
|
1352
|
+
{
|
|
1353
|
+
m.data[row][i] -= value.c[ii];
|
|
1354
|
+
++ii;
|
|
1355
|
+
}
|
|
1356
|
+
|
|
1357
|
+
assert(ii == ColSliceLength);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
|
|
1361
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1362
|
+
inline CUDA_CALLABLE void sub_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1363
|
+
{
|
|
1364
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1365
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1366
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1367
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1368
|
+
|
|
1369
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1370
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1371
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1372
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1373
|
+
|
|
1374
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1375
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1376
|
+
|
|
1377
|
+
int ii = 0;
|
|
1378
|
+
for (
|
|
1379
|
+
int i = row_slice.start;
|
|
1380
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1381
|
+
i += row_slice.step
|
|
1382
|
+
)
|
|
1383
|
+
{
|
|
1384
|
+
int jj = 0;
|
|
1385
|
+
for (
|
|
1386
|
+
int j = col_slice.start;
|
|
1387
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
1388
|
+
j += col_slice.step
|
|
1389
|
+
)
|
|
1390
|
+
{
|
|
1391
|
+
m.data[i][j] -= value.data[ii][jj];
|
|
1392
|
+
++jj;
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
assert(jj == ColSliceLength);
|
|
1396
|
+
++ii;
|
|
1397
|
+
}
|
|
1398
|
+
|
|
1399
|
+
assert(ii == RowSliceLength);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
|
|
1403
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1404
|
+
inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
1405
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
1406
|
+
{
|
|
1407
|
+
#ifndef NDEBUG
|
|
1408
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1409
|
+
{
|
|
1410
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1411
|
+
assert(0);
|
|
1412
|
+
}
|
|
1413
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1414
|
+
{
|
|
1415
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1416
|
+
assert(0);
|
|
1417
|
+
}
|
|
1418
|
+
#endif
|
|
1419
|
+
|
|
1420
|
+
if (row < 0)
|
|
1421
|
+
{
|
|
1422
|
+
row += Rows;
|
|
1423
|
+
}
|
|
1424
|
+
if (col < 0)
|
|
1425
|
+
{
|
|
1426
|
+
col += Cols;
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
adj_value -= adj_m.data[row][col];
|
|
1430
|
+
}
|
|
1431
|
+
|
|
1432
|
+
|
|
1433
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1434
|
+
inline CUDA_CALLABLE void adj_sub_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
1435
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, vec_t<Cols,Type>& adj_value)
|
|
1436
|
+
{
|
|
1437
|
+
#ifndef NDEBUG
|
|
1438
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1439
|
+
{
|
|
1440
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1441
|
+
assert(0);
|
|
1442
|
+
}
|
|
1443
|
+
#endif
|
|
1444
|
+
|
|
1445
|
+
if (row < 0)
|
|
1446
|
+
{
|
|
1447
|
+
row += Rows;
|
|
1448
|
+
}
|
|
1449
|
+
|
|
1450
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
1451
|
+
{
|
|
1452
|
+
adj_value[i] -= adj_m.data[row][i];
|
|
1453
|
+
}
|
|
1454
|
+
}
|
|
1455
|
+
|
|
1456
|
+
|
|
1457
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1458
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
1459
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1460
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1461
|
+
)
|
|
1462
|
+
{
|
|
1463
|
+
static_assert(
|
|
1464
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
1465
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
1466
|
+
);
|
|
1467
|
+
|
|
1468
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1469
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1470
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1471
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1472
|
+
|
|
1473
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1474
|
+
|
|
1475
|
+
int ii = 0;
|
|
1476
|
+
for (
|
|
1477
|
+
int i = row_slice.start;
|
|
1478
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1479
|
+
i += row_slice.step
|
|
1480
|
+
)
|
|
1481
|
+
{
|
|
1482
|
+
for (int j = 0; j < Cols; ++j)
|
|
1483
|
+
{
|
|
1484
|
+
adj_value.data[ii][j] -= adj_m.data[i][j];
|
|
1485
|
+
}
|
|
1486
|
+
|
|
1487
|
+
++ii;
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
assert(ii == RowSliceLength);
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
|
|
1494
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1495
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
1496
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
1497
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
1498
|
+
)
|
|
1499
|
+
{
|
|
1500
|
+
#ifndef NDEBUG
|
|
1501
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1502
|
+
{
|
|
1503
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1504
|
+
assert(0);
|
|
1505
|
+
}
|
|
1506
|
+
#endif
|
|
1507
|
+
|
|
1508
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1509
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1510
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1511
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1512
|
+
|
|
1513
|
+
if (col < 0)
|
|
1514
|
+
{
|
|
1515
|
+
col += Cols;
|
|
1516
|
+
}
|
|
1517
|
+
|
|
1518
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1519
|
+
|
|
1520
|
+
int ii = 0;
|
|
1521
|
+
for (
|
|
1522
|
+
int i = row_slice.start;
|
|
1523
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1524
|
+
i += row_slice.step
|
|
1525
|
+
)
|
|
1526
|
+
{
|
|
1527
|
+
adj_value.c[ii] -= adj_m.data[i][col];
|
|
1528
|
+
++ii;
|
|
1529
|
+
}
|
|
1530
|
+
|
|
1531
|
+
assert(ii == RowSliceLength);
|
|
1532
|
+
}
|
|
1533
|
+
|
|
1534
|
+
|
|
1535
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1536
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
1537
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
1538
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
1539
|
+
)
|
|
1540
|
+
{
|
|
1541
|
+
#ifndef NDEBUG
|
|
1542
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1543
|
+
{
|
|
1544
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1545
|
+
assert(0);
|
|
1546
|
+
}
|
|
1547
|
+
#endif
|
|
1548
|
+
|
|
1549
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1550
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1551
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1552
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1553
|
+
|
|
1554
|
+
if (row < 0)
|
|
1555
|
+
{
|
|
1556
|
+
row += Rows;
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1560
|
+
|
|
1561
|
+
int ii = 0;
|
|
1562
|
+
for (
|
|
1563
|
+
int i = col_slice.start;
|
|
1564
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
1565
|
+
i += col_slice.step
|
|
1566
|
+
)
|
|
1567
|
+
{
|
|
1568
|
+
adj_value.c[ii] -= adj_m.data[row][i];
|
|
1569
|
+
++ii;
|
|
1570
|
+
}
|
|
1571
|
+
|
|
1572
|
+
assert(ii == ColSliceLength);
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
|
|
1576
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1577
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
1578
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1579
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1580
|
+
)
|
|
1581
|
+
{
|
|
1582
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1583
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1584
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1585
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1586
|
+
|
|
1587
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1588
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1589
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1590
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1591
|
+
|
|
1592
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1593
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1594
|
+
|
|
1595
|
+
int ii = 0;
|
|
1596
|
+
for (
|
|
1597
|
+
int i = row_slice.start;
|
|
1598
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1599
|
+
i += row_slice.step
|
|
1600
|
+
)
|
|
1601
|
+
{
|
|
1602
|
+
int jj = 0;
|
|
1603
|
+
for (
|
|
1604
|
+
int j = col_slice.start;
|
|
1605
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
1606
|
+
j += col_slice.step
|
|
1607
|
+
)
|
|
1608
|
+
{
|
|
1609
|
+
adj_value.data[ii][jj] -= adj_m.data[i][j];
|
|
1610
|
+
++jj;
|
|
1611
|
+
}
|
|
1612
|
+
|
|
1613
|
+
assert(jj == ColSliceLength);
|
|
1614
|
+
++ii;
|
|
1615
|
+
}
|
|
1616
|
+
|
|
1617
|
+
assert(ii == RowSliceLength);
|
|
1618
|
+
}
|
|
1619
|
+
|
|
1620
|
+
|
|
1621
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1622
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
1623
|
+
{
|
|
1624
|
+
#ifndef NDEBUG
|
|
1625
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1626
|
+
{
|
|
1627
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1628
|
+
assert(0);
|
|
1629
|
+
}
|
|
1630
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
758
1631
|
{
|
|
759
1632
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
760
1633
|
assert(0);
|
|
761
1634
|
}
|
|
762
1635
|
#endif
|
|
763
1636
|
|
|
1637
|
+
if (row < 0)
|
|
1638
|
+
{
|
|
1639
|
+
row += Rows;
|
|
1640
|
+
}
|
|
1641
|
+
if (col < 0)
|
|
1642
|
+
{
|
|
1643
|
+
col += Cols;
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
m.data[row][col] = value;
|
|
1647
|
+
}
|
|
1648
|
+
|
|
1649
|
+
|
|
1650
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1651
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
1652
|
+
{
|
|
1653
|
+
#ifndef NDEBUG
|
|
1654
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1655
|
+
{
|
|
1656
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1657
|
+
assert(0);
|
|
1658
|
+
}
|
|
1659
|
+
#endif
|
|
1660
|
+
|
|
1661
|
+
if (row < 0)
|
|
1662
|
+
{
|
|
1663
|
+
row += Rows;
|
|
1664
|
+
}
|
|
1665
|
+
|
|
1666
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
1667
|
+
{
|
|
1668
|
+
m.data[row][i] = value[i];
|
|
1669
|
+
}
|
|
1670
|
+
}
|
|
1671
|
+
|
|
1672
|
+
|
|
1673
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1674
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1675
|
+
{
|
|
1676
|
+
static_assert(
|
|
1677
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
1678
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
1679
|
+
);
|
|
1680
|
+
|
|
1681
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1682
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1683
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1684
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1685
|
+
|
|
1686
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1687
|
+
|
|
1688
|
+
int ii = 0;
|
|
1689
|
+
for (
|
|
1690
|
+
int i = row_slice.start;
|
|
1691
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1692
|
+
i += row_slice.step
|
|
1693
|
+
)
|
|
1694
|
+
{
|
|
1695
|
+
for (int j = 0; j < Cols; ++j)
|
|
1696
|
+
{
|
|
1697
|
+
m.data[i][j] = value.data[ii][j];
|
|
1698
|
+
}
|
|
1699
|
+
|
|
1700
|
+
++ii;
|
|
1701
|
+
}
|
|
1702
|
+
|
|
1703
|
+
assert(ii == RowSliceLength);
|
|
1704
|
+
}
|
|
1705
|
+
|
|
1706
|
+
|
|
1707
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1708
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
1709
|
+
{
|
|
1710
|
+
#ifndef NDEBUG
|
|
1711
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1712
|
+
{
|
|
1713
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1714
|
+
assert(0);
|
|
1715
|
+
}
|
|
1716
|
+
#endif
|
|
1717
|
+
|
|
1718
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1719
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1720
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1721
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1722
|
+
|
|
1723
|
+
if (col < 0)
|
|
1724
|
+
{
|
|
1725
|
+
col += Cols;
|
|
1726
|
+
}
|
|
1727
|
+
|
|
1728
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1729
|
+
|
|
1730
|
+
int ii = 0;
|
|
1731
|
+
for (
|
|
1732
|
+
int i = row_slice.start;
|
|
1733
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1734
|
+
i += row_slice.step
|
|
1735
|
+
)
|
|
1736
|
+
{
|
|
1737
|
+
m.data[i][col] = value.c[ii];
|
|
1738
|
+
++ii;
|
|
1739
|
+
}
|
|
1740
|
+
|
|
1741
|
+
assert(ii == RowSliceLength);
|
|
1742
|
+
}
|
|
1743
|
+
|
|
1744
|
+
|
|
1745
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1746
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
1747
|
+
{
|
|
1748
|
+
#ifndef NDEBUG
|
|
1749
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1750
|
+
{
|
|
1751
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1752
|
+
assert(0);
|
|
1753
|
+
}
|
|
1754
|
+
#endif
|
|
1755
|
+
|
|
1756
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1757
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1758
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1759
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1760
|
+
|
|
1761
|
+
if (row < 0)
|
|
1762
|
+
{
|
|
1763
|
+
row += Rows;
|
|
1764
|
+
}
|
|
1765
|
+
|
|
1766
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1767
|
+
|
|
1768
|
+
int ii = 0;
|
|
1769
|
+
for (
|
|
1770
|
+
int i = col_slice.start;
|
|
1771
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
1772
|
+
i += col_slice.step
|
|
1773
|
+
)
|
|
1774
|
+
{
|
|
1775
|
+
m.data[row][i] = value.c[ii];
|
|
1776
|
+
++ii;
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
assert(ii == ColSliceLength);
|
|
1780
|
+
}
|
|
1781
|
+
|
|
1782
|
+
|
|
1783
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1784
|
+
inline CUDA_CALLABLE void assign_inplace(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
1785
|
+
{
|
|
1786
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1787
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1788
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1789
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1790
|
+
|
|
1791
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1792
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1793
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1794
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1795
|
+
|
|
1796
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1797
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1798
|
+
|
|
1799
|
+
int ii = 0;
|
|
1800
|
+
for (
|
|
1801
|
+
int i = row_slice.start;
|
|
1802
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1803
|
+
i += row_slice.step
|
|
1804
|
+
)
|
|
1805
|
+
{
|
|
1806
|
+
int jj = 0;
|
|
1807
|
+
for (
|
|
1808
|
+
int j = col_slice.start;
|
|
1809
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
1810
|
+
j += col_slice.step
|
|
1811
|
+
)
|
|
1812
|
+
{
|
|
1813
|
+
m.data[i][j] = value.data[ii][jj];
|
|
1814
|
+
++jj;
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
assert(jj == ColSliceLength);
|
|
1818
|
+
++ii;
|
|
1819
|
+
}
|
|
1820
|
+
|
|
1821
|
+
assert(ii == RowSliceLength);
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
|
|
1825
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1826
|
+
inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
1827
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value)
|
|
1828
|
+
{
|
|
1829
|
+
#ifndef NDEBUG
|
|
1830
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1831
|
+
{
|
|
1832
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1833
|
+
assert(0);
|
|
1834
|
+
}
|
|
1835
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1836
|
+
{
|
|
1837
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1838
|
+
assert(0);
|
|
1839
|
+
}
|
|
1840
|
+
#endif
|
|
1841
|
+
|
|
1842
|
+
if (row < 0)
|
|
1843
|
+
{
|
|
1844
|
+
row += Rows;
|
|
1845
|
+
}
|
|
1846
|
+
if (col < 0)
|
|
1847
|
+
{
|
|
1848
|
+
col += Cols;
|
|
1849
|
+
}
|
|
1850
|
+
|
|
764
1851
|
adj_value += adj_m.data[row][col];
|
|
765
1852
|
}
|
|
766
1853
|
|
|
@@ -770,13 +1857,18 @@ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row,
|
|
|
770
1857
|
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value)
|
|
771
1858
|
{
|
|
772
1859
|
#ifndef NDEBUG
|
|
773
|
-
if (row <
|
|
1860
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
774
1861
|
{
|
|
775
1862
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
776
1863
|
assert(0);
|
|
777
1864
|
}
|
|
778
1865
|
#endif
|
|
779
1866
|
|
|
1867
|
+
if (row < 0)
|
|
1868
|
+
{
|
|
1869
|
+
row += Rows;
|
|
1870
|
+
}
|
|
1871
|
+
|
|
780
1872
|
for(unsigned i=0; i < Cols; ++i)
|
|
781
1873
|
{
|
|
782
1874
|
adj_value[i] += adj_m.data[row][i];
|
|
@@ -784,99 +1876,563 @@ inline CUDA_CALLABLE void adj_assign_inplace(mat_t<Rows,Cols,Type>& m, int row,
|
|
|
784
1876
|
}
|
|
785
1877
|
|
|
786
1878
|
|
|
1879
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1880
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
1881
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
1882
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
1883
|
+
)
|
|
1884
|
+
{
|
|
1885
|
+
static_assert(
|
|
1886
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
1887
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
1888
|
+
);
|
|
1889
|
+
|
|
1890
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1891
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1892
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1893
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1894
|
+
|
|
1895
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1896
|
+
|
|
1897
|
+
int ii = 0;
|
|
1898
|
+
for (
|
|
1899
|
+
int i = row_slice.start;
|
|
1900
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1901
|
+
i += row_slice.step
|
|
1902
|
+
)
|
|
1903
|
+
{
|
|
1904
|
+
for (int j = 0; j < Cols; ++j)
|
|
1905
|
+
{
|
|
1906
|
+
adj_value.data[ii][j] += adj_m.data[i][j];
|
|
1907
|
+
}
|
|
1908
|
+
|
|
1909
|
+
++ii;
|
|
1910
|
+
}
|
|
1911
|
+
|
|
1912
|
+
assert(ii == RowSliceLength);
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
|
|
1916
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1917
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
1918
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
1919
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value
|
|
1920
|
+
)
|
|
1921
|
+
{
|
|
1922
|
+
#ifndef NDEBUG
|
|
1923
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1924
|
+
{
|
|
1925
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1926
|
+
assert(0);
|
|
1927
|
+
}
|
|
1928
|
+
#endif
|
|
1929
|
+
|
|
1930
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
1931
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
1932
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
1933
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
1934
|
+
|
|
1935
|
+
if (col < 0)
|
|
1936
|
+
{
|
|
1937
|
+
col += Cols;
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
1941
|
+
|
|
1942
|
+
int ii = 0;
|
|
1943
|
+
for (
|
|
1944
|
+
int i = row_slice.start;
|
|
1945
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
1946
|
+
i += row_slice.step
|
|
1947
|
+
)
|
|
1948
|
+
{
|
|
1949
|
+
adj_value.c[ii] += adj_m.data[i][col];
|
|
1950
|
+
++ii;
|
|
1951
|
+
}
|
|
1952
|
+
|
|
1953
|
+
assert(ii == RowSliceLength);
|
|
1954
|
+
}
|
|
1955
|
+
|
|
1956
|
+
|
|
1957
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1958
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
1959
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
1960
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value
|
|
1961
|
+
)
|
|
1962
|
+
{
|
|
1963
|
+
#ifndef NDEBUG
|
|
1964
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1965
|
+
{
|
|
1966
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1967
|
+
assert(0);
|
|
1968
|
+
}
|
|
1969
|
+
#endif
|
|
1970
|
+
|
|
1971
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
1972
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
1973
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
1974
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
1975
|
+
|
|
1976
|
+
if (row < 0)
|
|
1977
|
+
{
|
|
1978
|
+
row += Rows;
|
|
1979
|
+
}
|
|
1980
|
+
|
|
1981
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
1982
|
+
|
|
1983
|
+
int ii = 0;
|
|
1984
|
+
for (
|
|
1985
|
+
int i = col_slice.start;
|
|
1986
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
1987
|
+
i += col_slice.step
|
|
1988
|
+
)
|
|
1989
|
+
{
|
|
1990
|
+
adj_value.c[ii] += adj_m.data[row][i];
|
|
1991
|
+
++ii;
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
assert(ii == ColSliceLength);
|
|
1995
|
+
}
|
|
1996
|
+
|
|
1997
|
+
|
|
1998
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
1999
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
2000
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2001
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value
|
|
2002
|
+
)
|
|
2003
|
+
{
|
|
2004
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2005
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2006
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2007
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2008
|
+
|
|
2009
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2010
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2011
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2012
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2013
|
+
|
|
2014
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2015
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2016
|
+
|
|
2017
|
+
int ii = 0;
|
|
2018
|
+
for (
|
|
2019
|
+
int i = row_slice.start;
|
|
2020
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
2021
|
+
i += row_slice.step
|
|
2022
|
+
)
|
|
2023
|
+
{
|
|
2024
|
+
int jj = 0;
|
|
2025
|
+
for (
|
|
2026
|
+
int j = col_slice.start;
|
|
2027
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
2028
|
+
j += col_slice.step
|
|
2029
|
+
)
|
|
2030
|
+
{
|
|
2031
|
+
adj_value.data[ii][jj] += adj_m.data[i][j];
|
|
2032
|
+
++jj;
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
assert(jj == ColSliceLength);
|
|
2036
|
+
++ii;
|
|
2037
|
+
}
|
|
2038
|
+
|
|
2039
|
+
assert(ii == RowSliceLength);
|
|
2040
|
+
}
|
|
2041
|
+
|
|
2042
|
+
|
|
2043
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2044
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
2045
|
+
{
|
|
2046
|
+
#ifndef NDEBUG
|
|
2047
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2048
|
+
{
|
|
2049
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2050
|
+
assert(0);
|
|
2051
|
+
}
|
|
2052
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
2053
|
+
{
|
|
2054
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
2055
|
+
assert(0);
|
|
2056
|
+
}
|
|
2057
|
+
#endif
|
|
2058
|
+
|
|
2059
|
+
if (row < 0)
|
|
2060
|
+
{
|
|
2061
|
+
row += Rows;
|
|
2062
|
+
}
|
|
2063
|
+
if (col < 0)
|
|
2064
|
+
{
|
|
2065
|
+
col += Cols;
|
|
2066
|
+
}
|
|
2067
|
+
|
|
2068
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
2069
|
+
ret.data[row][col] = value;
|
|
2070
|
+
return ret;
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
|
|
2074
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2075
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value)
|
|
2076
|
+
{
|
|
2077
|
+
#ifndef NDEBUG
|
|
2078
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
2079
|
+
{
|
|
2080
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
2081
|
+
assert(0);
|
|
2082
|
+
}
|
|
2083
|
+
#endif
|
|
2084
|
+
|
|
2085
|
+
if (row < 0)
|
|
2086
|
+
{
|
|
2087
|
+
row += Rows;
|
|
2088
|
+
}
|
|
2089
|
+
|
|
2090
|
+
mat_t<Rows,Cols,Type> ret(m);
|
|
2091
|
+
for(unsigned i=0; i < Cols; ++i)
|
|
2092
|
+
{
|
|
2093
|
+
ret.data[row][i] = value[i];
|
|
2094
|
+
}
|
|
2095
|
+
return ret;
|
|
2096
|
+
}
|
|
2097
|
+
|
|
2098
|
+
|
|
2099
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2100
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2101
|
+
{
|
|
2102
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2103
|
+
assign_inplace(ret, row_slice, value);
|
|
2104
|
+
return ret;
|
|
2105
|
+
}
|
|
2106
|
+
|
|
2107
|
+
|
|
2108
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2109
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value)
|
|
2110
|
+
{
|
|
2111
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2112
|
+
assign_inplace(ret, row_slice, col, value);
|
|
2113
|
+
return ret;
|
|
2114
|
+
}
|
|
2115
|
+
|
|
2116
|
+
|
|
2117
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2118
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value)
|
|
2119
|
+
{
|
|
2120
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2121
|
+
assign_inplace(ret, row, col_slice, value);
|
|
2122
|
+
return ret;
|
|
2123
|
+
}
|
|
2124
|
+
|
|
2125
|
+
|
|
2126
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2127
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign_copy(mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value)
|
|
2128
|
+
{
|
|
2129
|
+
mat_t<Rows, Cols, Type> ret(m);
|
|
2130
|
+
assign_inplace(ret, row_slice, col_slice, value);
|
|
2131
|
+
return ret;
|
|
2132
|
+
}
|
|
2133
|
+
|
|
2134
|
+
|
|
787
2135
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
788
|
-
inline CUDA_CALLABLE
|
|
2136
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
2137
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
789
2138
|
{
|
|
790
2139
|
#ifndef NDEBUG
|
|
791
|
-
if (row <
|
|
2140
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
792
2141
|
{
|
|
793
2142
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
794
2143
|
assert(0);
|
|
795
2144
|
}
|
|
796
|
-
if (col <
|
|
2145
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
797
2146
|
{
|
|
798
2147
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
799
2148
|
assert(0);
|
|
800
2149
|
}
|
|
801
2150
|
#endif
|
|
802
2151
|
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
2152
|
+
if (row < 0)
|
|
2153
|
+
{
|
|
2154
|
+
row += Rows;
|
|
2155
|
+
}
|
|
2156
|
+
if (col < 0)
|
|
2157
|
+
{
|
|
2158
|
+
col += Cols;
|
|
2159
|
+
}
|
|
2160
|
+
|
|
2161
|
+
adj_value += adj_ret.data[row][col];
|
|
2162
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
2163
|
+
{
|
|
2164
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
2165
|
+
{
|
|
2166
|
+
if(i != row || j != col)
|
|
2167
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2168
|
+
}
|
|
2169
|
+
}
|
|
806
2170
|
}
|
|
807
2171
|
|
|
808
2172
|
|
|
809
2173
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
810
|
-
inline CUDA_CALLABLE
|
|
2174
|
+
inline CUDA_CALLABLE void adj_assign_copy(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols,Type>& value,
|
|
2175
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, vec_t<Cols,Type>& adj_value, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
811
2176
|
{
|
|
812
2177
|
#ifndef NDEBUG
|
|
813
|
-
if (row <
|
|
2178
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
814
2179
|
{
|
|
815
2180
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
816
2181
|
assert(0);
|
|
817
2182
|
}
|
|
818
2183
|
#endif
|
|
819
2184
|
|
|
820
|
-
|
|
821
|
-
for(unsigned i=0; i < Cols; ++i)
|
|
2185
|
+
if (row < 0)
|
|
822
2186
|
{
|
|
823
|
-
|
|
2187
|
+
row += Rows;
|
|
2188
|
+
}
|
|
2189
|
+
|
|
2190
|
+
for(unsigned i=0; i < Rows; ++i)
|
|
2191
|
+
{
|
|
2192
|
+
for(unsigned j=0; j < Cols; ++j)
|
|
2193
|
+
{
|
|
2194
|
+
if (i==row)
|
|
2195
|
+
adj_value[j] += adj_ret.data[i][j];
|
|
2196
|
+
else
|
|
2197
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2198
|
+
}
|
|
824
2199
|
}
|
|
825
|
-
return ret;
|
|
826
2200
|
}
|
|
827
2201
|
|
|
828
2202
|
|
|
829
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
830
|
-
inline CUDA_CALLABLE void adj_assign_copy(
|
|
831
|
-
|
|
2203
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2204
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
2205
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2206
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value,
|
|
2207
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
2208
|
+
)
|
|
832
2209
|
{
|
|
833
|
-
|
|
834
|
-
|
|
2210
|
+
static_assert(
|
|
2211
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
2212
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
2213
|
+
);
|
|
2214
|
+
|
|
2215
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2216
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2217
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2218
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2219
|
+
|
|
2220
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2221
|
+
|
|
2222
|
+
int ii = 0;
|
|
2223
|
+
for (int i = 0; i < Rows; ++i)
|
|
835
2224
|
{
|
|
836
|
-
|
|
837
|
-
|
|
2225
|
+
bool in_row_slice = is_row_reversed
|
|
2226
|
+
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
2227
|
+
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
2228
|
+
|
|
2229
|
+
if (!in_row_slice)
|
|
2230
|
+
{
|
|
2231
|
+
for (int j = 0; j < Cols; ++j)
|
|
2232
|
+
{
|
|
2233
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2234
|
+
}
|
|
2235
|
+
}
|
|
2236
|
+
else
|
|
2237
|
+
{
|
|
2238
|
+
for (int j = 0; j < Cols; ++j)
|
|
2239
|
+
{
|
|
2240
|
+
adj_value.data[ii][j] += adj_ret.data[i][j];
|
|
2241
|
+
}
|
|
2242
|
+
|
|
2243
|
+
++ii;
|
|
2244
|
+
}
|
|
838
2245
|
}
|
|
839
|
-
|
|
2246
|
+
|
|
2247
|
+
assert(ii == RowSliceLength);
|
|
2248
|
+
}
|
|
2249
|
+
|
|
2250
|
+
|
|
2251
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2252
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
2253
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col, vec_t<RowSliceLength, Type>& value,
|
|
2254
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col, vec_t<RowSliceLength, Type>& adj_value,
|
|
2255
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
2256
|
+
)
|
|
2257
|
+
{
|
|
2258
|
+
#ifndef NDEBUG
|
|
2259
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
840
2260
|
{
|
|
841
2261
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
842
2262
|
assert(0);
|
|
843
2263
|
}
|
|
844
2264
|
#endif
|
|
845
2265
|
|
|
846
|
-
|
|
847
|
-
|
|
2266
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2267
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2268
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2269
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2270
|
+
|
|
2271
|
+
if (col < 0)
|
|
848
2272
|
{
|
|
849
|
-
|
|
2273
|
+
col += Cols;
|
|
2274
|
+
}
|
|
2275
|
+
|
|
2276
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2277
|
+
|
|
2278
|
+
int ii = 0;
|
|
2279
|
+
for (int i = 0; i < Rows; ++i)
|
|
2280
|
+
{
|
|
2281
|
+
bool in_row_slice = is_row_reversed
|
|
2282
|
+
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
2283
|
+
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
2284
|
+
|
|
2285
|
+
if (!in_row_slice)
|
|
850
2286
|
{
|
|
851
|
-
|
|
2287
|
+
for (int j = 0; j < Cols; ++j)
|
|
2288
|
+
{
|
|
852
2289
|
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2290
|
+
}
|
|
2291
|
+
}
|
|
2292
|
+
else
|
|
2293
|
+
{
|
|
2294
|
+
for (int j = 0; j < Cols; ++j)
|
|
2295
|
+
{
|
|
2296
|
+
if (j != col)
|
|
2297
|
+
{
|
|
2298
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2299
|
+
}
|
|
2300
|
+
else
|
|
2301
|
+
{
|
|
2302
|
+
adj_value.c[ii] += adj_ret.data[i][j];
|
|
2303
|
+
}
|
|
2304
|
+
}
|
|
2305
|
+
|
|
2306
|
+
++ii;
|
|
853
2307
|
}
|
|
854
2308
|
}
|
|
2309
|
+
|
|
2310
|
+
assert(ii == RowSliceLength);
|
|
855
2311
|
}
|
|
856
2312
|
|
|
857
2313
|
|
|
858
|
-
template<unsigned Rows, unsigned Cols, typename Type>
|
|
859
|
-
inline CUDA_CALLABLE void adj_assign_copy(
|
|
860
|
-
|
|
2314
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2315
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
2316
|
+
mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice, vec_t<ColSliceLength, Type>& value,
|
|
2317
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice, vec_t<ColSliceLength, Type>& adj_value,
|
|
2318
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
2319
|
+
)
|
|
861
2320
|
{
|
|
862
2321
|
#ifndef NDEBUG
|
|
863
|
-
if (row <
|
|
2322
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
864
2323
|
{
|
|
865
2324
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
866
2325
|
assert(0);
|
|
867
2326
|
}
|
|
868
2327
|
#endif
|
|
869
2328
|
|
|
870
|
-
|
|
2329
|
+
if (row < 0)
|
|
871
2330
|
{
|
|
872
|
-
|
|
2331
|
+
row += Rows;
|
|
2332
|
+
}
|
|
2333
|
+
|
|
2334
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2335
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2336
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2337
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2338
|
+
|
|
2339
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2340
|
+
|
|
2341
|
+
int ii = 0;
|
|
2342
|
+
for (int i = 0; i < Rows; ++i)
|
|
2343
|
+
{
|
|
2344
|
+
if (i != row)
|
|
873
2345
|
{
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
2346
|
+
for (int j = 0; j < Cols; ++j)
|
|
2347
|
+
{
|
|
2348
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2349
|
+
}
|
|
2350
|
+
}
|
|
2351
|
+
else
|
|
2352
|
+
{
|
|
2353
|
+
for (int j = 0; j < Cols; ++j)
|
|
2354
|
+
{
|
|
2355
|
+
bool in_col_slice = is_col_reversed
|
|
2356
|
+
? (j <= col_slice.start && j > col_slice.stop && (col_slice.start - j) % (-col_slice.step) == 0)
|
|
2357
|
+
: (j >= col_slice.start && j < col_slice.stop && (j - col_slice.start) % col_slice.step == 0);
|
|
2358
|
+
|
|
2359
|
+
if (!in_col_slice)
|
|
2360
|
+
{
|
|
2361
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2362
|
+
}
|
|
2363
|
+
else
|
|
2364
|
+
{
|
|
2365
|
+
adj_value.c[ii] += adj_ret.data[i][j];
|
|
2366
|
+
++ii;
|
|
2367
|
+
}
|
|
2368
|
+
}
|
|
2369
|
+
}
|
|
2370
|
+
}
|
|
2371
|
+
|
|
2372
|
+
assert(ii == ColSliceLength);
|
|
2373
|
+
}
|
|
2374
|
+
|
|
2375
|
+
|
|
2376
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
2377
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
2378
|
+
mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& value,
|
|
2379
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice, mat_t<RowSliceLength, ColSliceLength, Type>& adj_value,
|
|
2380
|
+
mat_t<Rows,Cols,Type>& adj_ret
|
|
2381
|
+
)
|
|
2382
|
+
{
|
|
2383
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
2384
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
2385
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
2386
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
2387
|
+
|
|
2388
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
2389
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
2390
|
+
|
|
2391
|
+
int ii = 0;
|
|
2392
|
+
for (int i = 0; i < Rows; ++i)
|
|
2393
|
+
{
|
|
2394
|
+
bool in_row_slice = is_row_reversed
|
|
2395
|
+
? (i <= row_slice.start && i > row_slice.stop && (row_slice.start - i) % (-row_slice.step) == 0)
|
|
2396
|
+
: (i >= row_slice.start && i < row_slice.stop && (i - row_slice.start) % row_slice.step == 0);
|
|
2397
|
+
|
|
2398
|
+
if (!in_row_slice)
|
|
2399
|
+
{
|
|
2400
|
+
for (int j = 0; j < Cols; ++j)
|
|
2401
|
+
{
|
|
877
2402
|
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2403
|
+
}
|
|
2404
|
+
}
|
|
2405
|
+
else
|
|
2406
|
+
{
|
|
2407
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
2408
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
2409
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
2410
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
2411
|
+
|
|
2412
|
+
int jj = 0;
|
|
2413
|
+
for (int j = 0; j < Cols; ++j)
|
|
2414
|
+
{
|
|
2415
|
+
bool in_col_slice = is_col_reversed
|
|
2416
|
+
? (j <= col_slice.start && j > col_slice.stop && (col_slice.start - j) % (-col_slice.step) == 0)
|
|
2417
|
+
: (j >= col_slice.start && j < col_slice.stop && (j - col_slice.start) % col_slice.step == 0);
|
|
2418
|
+
|
|
2419
|
+
if (!in_col_slice)
|
|
2420
|
+
{
|
|
2421
|
+
adj_m.data[i][j] += adj_ret.data[i][j];
|
|
2422
|
+
}
|
|
2423
|
+
else
|
|
2424
|
+
{
|
|
2425
|
+
adj_value.data[ii][jj] += adj_ret.data[i][j];
|
|
2426
|
+
++jj;
|
|
2427
|
+
}
|
|
2428
|
+
}
|
|
2429
|
+
|
|
2430
|
+
assert(jj == ColSliceLength);
|
|
2431
|
+
++ii;
|
|
878
2432
|
}
|
|
879
2433
|
}
|
|
2434
|
+
|
|
2435
|
+
assert(ii == RowSliceLength);
|
|
880
2436
|
}
|
|
881
2437
|
|
|
882
2438
|
|
|
@@ -940,6 +2496,21 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(const mat_t<Rows,Cols,Type>& a, c
|
|
|
940
2496
|
return t;
|
|
941
2497
|
}
|
|
942
2498
|
|
|
2499
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2500
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> add(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2501
|
+
{
|
|
2502
|
+
mat_t<Rows,Cols,Type> t;
|
|
2503
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
2504
|
+
{
|
|
2505
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
2506
|
+
{
|
|
2507
|
+
t.data[i][j] = a + b.data[i][j];
|
|
2508
|
+
}
|
|
2509
|
+
}
|
|
2510
|
+
|
|
2511
|
+
return t;
|
|
2512
|
+
}
|
|
2513
|
+
|
|
943
2514
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
944
2515
|
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b)
|
|
945
2516
|
{
|
|
@@ -955,6 +2526,21 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(const mat_t<Rows,Cols,Type>& a, c
|
|
|
955
2526
|
return t;
|
|
956
2527
|
}
|
|
957
2528
|
|
|
2529
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
2530
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> sub(Type a, const mat_t<Rows,Cols,Type>& b)
|
|
2531
|
+
{
|
|
2532
|
+
mat_t<Rows,Cols,Type> t;
|
|
2533
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
2534
|
+
{
|
|
2535
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
2536
|
+
{
|
|
2537
|
+
t.data[i][j] = a - b.data[i][j];
|
|
2538
|
+
}
|
|
2539
|
+
}
|
|
2540
|
+
|
|
2541
|
+
return t;
|
|
2542
|
+
}
|
|
2543
|
+
|
|
958
2544
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
959
2545
|
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, Type b)
|
|
960
2546
|
{
|
|
@@ -1469,20 +3055,194 @@ template<unsigned Rows, unsigned Cols, typename Type>
|
|
|
1469
3055
|
inline void CUDA_CALLABLE adj_extract(const mat_t<Rows,Cols,Type>& m, int row, int col, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type adj_ret)
|
|
1470
3056
|
{
|
|
1471
3057
|
#ifndef NDEBUG
|
|
1472
|
-
if (row <
|
|
3058
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
1473
3059
|
{
|
|
1474
3060
|
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
1475
3061
|
assert(0);
|
|
1476
3062
|
}
|
|
1477
|
-
if (col <
|
|
3063
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
1478
3064
|
{
|
|
1479
3065
|
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
1480
3066
|
assert(0);
|
|
1481
3067
|
}
|
|
1482
3068
|
#endif
|
|
3069
|
+
|
|
3070
|
+
if (row < 0)
|
|
3071
|
+
{
|
|
3072
|
+
row += Rows;
|
|
3073
|
+
}
|
|
3074
|
+
if (col < 0)
|
|
3075
|
+
{
|
|
3076
|
+
col += Cols;
|
|
3077
|
+
}
|
|
3078
|
+
|
|
1483
3079
|
adj_m.data[row][col] += adj_ret;
|
|
1484
3080
|
}
|
|
1485
3081
|
|
|
3082
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
3083
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
3084
|
+
const mat_t<Rows,Cols,Type>& m, slice_t row_slice,
|
|
3085
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice,
|
|
3086
|
+
const mat_t<RowSliceLength, ColSliceLength, Type>& adj_ret
|
|
3087
|
+
)
|
|
3088
|
+
{
|
|
3089
|
+
static_assert(
|
|
3090
|
+
RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols,
|
|
3091
|
+
"Expected RowSliceLength == 0 ? ColSliceLength == 0 : ColSliceLength == Cols"
|
|
3092
|
+
);
|
|
3093
|
+
|
|
3094
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
3095
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
3096
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
3097
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
3098
|
+
|
|
3099
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
3100
|
+
|
|
3101
|
+
int ii = 0;
|
|
3102
|
+
for (
|
|
3103
|
+
int i = row_slice.start;
|
|
3104
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
3105
|
+
i += row_slice.step
|
|
3106
|
+
)
|
|
3107
|
+
{
|
|
3108
|
+
for (int j = 0; j < Cols; ++j)
|
|
3109
|
+
{
|
|
3110
|
+
adj_m.data[i][j] += adj_ret.data[ii][j];
|
|
3111
|
+
}
|
|
3112
|
+
|
|
3113
|
+
++ii;
|
|
3114
|
+
}
|
|
3115
|
+
|
|
3116
|
+
assert(ii == RowSliceLength);
|
|
3117
|
+
}
|
|
3118
|
+
|
|
3119
|
+
template<unsigned RowSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
3120
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
3121
|
+
const mat_t<Rows,Cols,Type>& m, slice_t row_slice, int col,
|
|
3122
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, int& adj_col,
|
|
3123
|
+
const vec_t<RowSliceLength, Type>& adj_ret
|
|
3124
|
+
)
|
|
3125
|
+
{
|
|
3126
|
+
#ifndef NDEBUG
|
|
3127
|
+
if (col < -(int)Cols || col >= (int)Cols)
|
|
3128
|
+
{
|
|
3129
|
+
printf("mat col index %d out of bounds at %s %d\n", col, __FILE__, __LINE__);
|
|
3130
|
+
assert(0);
|
|
3131
|
+
}
|
|
3132
|
+
#endif
|
|
3133
|
+
|
|
3134
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
3135
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
3136
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
3137
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
3138
|
+
|
|
3139
|
+
if (col < 0)
|
|
3140
|
+
{
|
|
3141
|
+
col += Cols;
|
|
3142
|
+
}
|
|
3143
|
+
|
|
3144
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
3145
|
+
|
|
3146
|
+
int ii = 0;
|
|
3147
|
+
for (
|
|
3148
|
+
int i = row_slice.start;
|
|
3149
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
3150
|
+
i += row_slice.step
|
|
3151
|
+
)
|
|
3152
|
+
{
|
|
3153
|
+
adj_m.data[i][col] += adj_ret.c[ii];
|
|
3154
|
+
++ii;
|
|
3155
|
+
}
|
|
3156
|
+
|
|
3157
|
+
assert(ii == RowSliceLength);
|
|
3158
|
+
}
|
|
3159
|
+
|
|
3160
|
+
template<unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
3161
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
3162
|
+
const mat_t<Rows,Cols,Type>& m, int row, slice_t col_slice,
|
|
3163
|
+
mat_t<Rows,Cols,Type>& adj_m, int& adj_row, slice_t& adj_col_slice,
|
|
3164
|
+
const vec_t<ColSliceLength, Type>& adj_ret
|
|
3165
|
+
)
|
|
3166
|
+
{
|
|
3167
|
+
#ifndef NDEBUG
|
|
3168
|
+
if (row < -(int)Rows || row >= (int)Rows)
|
|
3169
|
+
{
|
|
3170
|
+
printf("mat row index %d out of bounds at %s %d\n", row, __FILE__, __LINE__);
|
|
3171
|
+
assert(0);
|
|
3172
|
+
}
|
|
3173
|
+
#endif
|
|
3174
|
+
|
|
3175
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
3176
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
3177
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
3178
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
3179
|
+
|
|
3180
|
+
if (row < 0)
|
|
3181
|
+
{
|
|
3182
|
+
row += Rows;
|
|
3183
|
+
}
|
|
3184
|
+
|
|
3185
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
3186
|
+
|
|
3187
|
+
int ii = 0;
|
|
3188
|
+
for (
|
|
3189
|
+
int i = col_slice.start;
|
|
3190
|
+
is_col_reversed ? (i > col_slice.stop) : (i < col_slice.stop);
|
|
3191
|
+
i += col_slice.step
|
|
3192
|
+
)
|
|
3193
|
+
{
|
|
3194
|
+
adj_m.data[row][i] += adj_ret.c[ii];
|
|
3195
|
+
++ii;
|
|
3196
|
+
}
|
|
3197
|
+
|
|
3198
|
+
assert(ii == ColSliceLength);
|
|
3199
|
+
}
|
|
3200
|
+
|
|
3201
|
+
template<unsigned RowSliceLength, unsigned ColSliceLength, unsigned Rows, unsigned Cols, typename Type>
|
|
3202
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
3203
|
+
const mat_t<Rows,Cols,Type>& m, slice_t row_slice, slice_t col_slice,
|
|
3204
|
+
mat_t<Rows,Cols,Type>& adj_m, slice_t& adj_row_slice, slice_t& adj_col_slice,
|
|
3205
|
+
const mat_t<RowSliceLength, ColSliceLength, Type>& adj_ret
|
|
3206
|
+
)
|
|
3207
|
+
{
|
|
3208
|
+
assert(row_slice.start >= 0 && row_slice.start <= (int)Rows);
|
|
3209
|
+
assert(row_slice.stop >= -1 && row_slice.stop <= (int)Rows);
|
|
3210
|
+
assert(row_slice.step != 0 && row_slice.step < 0 ? row_slice.start >= row_slice.stop : row_slice.start <= row_slice.stop);
|
|
3211
|
+
assert(slice_get_length(row_slice) == RowSliceLength);
|
|
3212
|
+
|
|
3213
|
+
assert(col_slice.start >= 0 && col_slice.start <= (int)Cols);
|
|
3214
|
+
assert(col_slice.stop >= -1 && col_slice.stop <= (int)Cols);
|
|
3215
|
+
assert(col_slice.step != 0 && col_slice.step < 0 ? col_slice.start >= col_slice.stop : col_slice.start <= col_slice.stop);
|
|
3216
|
+
assert(slice_get_length(col_slice) == ColSliceLength);
|
|
3217
|
+
|
|
3218
|
+
bool is_row_reversed = row_slice.step < 0;
|
|
3219
|
+
bool is_col_reversed = col_slice.step < 0;
|
|
3220
|
+
|
|
3221
|
+
int ii = 0;
|
|
3222
|
+
for (
|
|
3223
|
+
int i = row_slice.start;
|
|
3224
|
+
is_row_reversed ? (i > row_slice.stop) : (i < row_slice.stop);
|
|
3225
|
+
i += row_slice.step
|
|
3226
|
+
)
|
|
3227
|
+
{
|
|
3228
|
+
int jj = 0;
|
|
3229
|
+
for (
|
|
3230
|
+
int j = col_slice.start;
|
|
3231
|
+
is_col_reversed ? (j > col_slice.stop) : (j < col_slice.stop);
|
|
3232
|
+
j += col_slice.step
|
|
3233
|
+
)
|
|
3234
|
+
{
|
|
3235
|
+
adj_m.data[i][j] += adj_ret.data[ii][jj];
|
|
3236
|
+
++jj;
|
|
3237
|
+
}
|
|
3238
|
+
|
|
3239
|
+
assert(jj == ColSliceLength);
|
|
3240
|
+
++ii;
|
|
3241
|
+
}
|
|
3242
|
+
|
|
3243
|
+
assert(ii == RowSliceLength);
|
|
3244
|
+
}
|
|
3245
|
+
|
|
1486
3246
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1487
3247
|
inline CUDA_CALLABLE void adj_outer(const vec_t<Rows,Type>& a, const vec_t<Cols,Type>& b, vec_t<Rows,Type>& adj_a, vec_t<Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1488
3248
|
{
|
|
@@ -1503,6 +3263,23 @@ inline CUDA_CALLABLE void adj_add(const mat_t<Rows,Cols,Type>& a, const mat_t<Ro
|
|
|
1503
3263
|
}
|
|
1504
3264
|
}
|
|
1505
3265
|
|
|
3266
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3267
|
+
inline CUDA_CALLABLE void adj_add(
|
|
3268
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
3269
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
3270
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
3271
|
+
)
|
|
3272
|
+
{
|
|
3273
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3274
|
+
{
|
|
3275
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3276
|
+
{
|
|
3277
|
+
adj_a += adj_ret.data[i][j];
|
|
3278
|
+
adj_b.data[i][j] += adj_ret.data[i][j];
|
|
3279
|
+
}
|
|
3280
|
+
}
|
|
3281
|
+
}
|
|
3282
|
+
|
|
1506
3283
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1507
3284
|
inline CUDA_CALLABLE void adj_sub(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1508
3285
|
{
|
|
@@ -1516,6 +3293,23 @@ inline CUDA_CALLABLE void adj_sub(const mat_t<Rows,Cols,Type>& a, const mat_t<Ro
|
|
|
1516
3293
|
}
|
|
1517
3294
|
}
|
|
1518
3295
|
|
|
3296
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
3297
|
+
inline CUDA_CALLABLE void adj_sub(
|
|
3298
|
+
Type a, const mat_t<Rows,Cols,Type>& b,
|
|
3299
|
+
Type& adj_a, mat_t<Rows,Cols,Type>& adj_b,
|
|
3300
|
+
const mat_t<Rows,Cols,Type>& adj_ret
|
|
3301
|
+
)
|
|
3302
|
+
{
|
|
3303
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
3304
|
+
{
|
|
3305
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
3306
|
+
{
|
|
3307
|
+
adj_a += adj_ret.data[i][j];
|
|
3308
|
+
adj_b.data[i][j] -= adj_ret.data[i][j];
|
|
3309
|
+
}
|
|
3310
|
+
}
|
|
3311
|
+
}
|
|
3312
|
+
|
|
1519
3313
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1520
3314
|
inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<Rows,Cols,Type>& adj_a, Type& adj_s, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1521
3315
|
{
|