warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -25,7 +25,7 @@ namespace wp
|
|
|
25
25
|
template<unsigned Length, typename Type>
|
|
26
26
|
struct vec_t
|
|
27
27
|
{
|
|
28
|
-
Type c[Length];
|
|
28
|
+
Type c[Length < 1 ? 1 : Length];
|
|
29
29
|
|
|
30
30
|
inline CUDA_CALLABLE vec_t()
|
|
31
31
|
: c()
|
|
@@ -440,27 +440,64 @@ template<unsigned Length, typename Type>
|
|
|
440
440
|
inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
|
|
441
441
|
{
|
|
442
442
|
#ifndef NDEBUG
|
|
443
|
-
if (idx <
|
|
443
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
444
444
|
{
|
|
445
445
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
446
446
|
assert(0);
|
|
447
447
|
}
|
|
448
448
|
#endif
|
|
449
449
|
|
|
450
|
+
if (idx < 0)
|
|
451
|
+
{
|
|
452
|
+
idx += Length;
|
|
453
|
+
}
|
|
454
|
+
|
|
450
455
|
return a[idx];
|
|
451
456
|
}
|
|
452
457
|
|
|
458
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
459
|
+
inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const vec_t<Length, Type> & a, slice_t slice)
|
|
460
|
+
{
|
|
461
|
+
vec_t<SliceLength, Type> ret;
|
|
462
|
+
|
|
463
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
464
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
465
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
466
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
467
|
+
|
|
468
|
+
bool is_reversed = slice.step < 0;
|
|
469
|
+
|
|
470
|
+
int ii = 0;
|
|
471
|
+
for (
|
|
472
|
+
int i = slice.start;
|
|
473
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
474
|
+
i += slice.step
|
|
475
|
+
)
|
|
476
|
+
{
|
|
477
|
+
ret[ii] = a[i];
|
|
478
|
+
++ii;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
assert(ii == SliceLength);
|
|
482
|
+
return ret;
|
|
483
|
+
}
|
|
484
|
+
|
|
453
485
|
template<unsigned Length, typename Type>
|
|
454
486
|
inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
|
|
455
487
|
{
|
|
456
488
|
#ifndef NDEBUG
|
|
457
|
-
if (idx <
|
|
489
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
458
490
|
{
|
|
459
491
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
460
492
|
assert(0);
|
|
461
493
|
}
|
|
462
494
|
#endif
|
|
463
495
|
|
|
496
|
+
if (idx < 0)
|
|
497
|
+
{
|
|
498
|
+
idx += Length;
|
|
499
|
+
}
|
|
500
|
+
|
|
464
501
|
return &v[idx];
|
|
465
502
|
}
|
|
466
503
|
|
|
@@ -468,13 +505,18 @@ template<unsigned Length, typename Type>
|
|
|
468
505
|
inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
|
|
469
506
|
{
|
|
470
507
|
#ifndef NDEBUG
|
|
471
|
-
if (idx <
|
|
508
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
472
509
|
{
|
|
473
510
|
printf("vec store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
474
511
|
assert(0);
|
|
475
512
|
}
|
|
476
513
|
#endif
|
|
477
514
|
|
|
515
|
+
if (idx < 0)
|
|
516
|
+
{
|
|
517
|
+
idx += Length;
|
|
518
|
+
}
|
|
519
|
+
|
|
478
520
|
return &((*v)[idx]);
|
|
479
521
|
}
|
|
480
522
|
|
|
@@ -498,120 +540,325 @@ template<unsigned Length, typename Type>
|
|
|
498
540
|
inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
499
541
|
{
|
|
500
542
|
#ifndef NDEBUG
|
|
501
|
-
if (idx <
|
|
543
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
502
544
|
{
|
|
503
545
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
504
546
|
assert(0);
|
|
505
547
|
}
|
|
506
548
|
#endif
|
|
507
549
|
|
|
550
|
+
if (idx < 0)
|
|
551
|
+
{
|
|
552
|
+
idx += Length;
|
|
553
|
+
}
|
|
554
|
+
|
|
508
555
|
v[idx] += value;
|
|
509
556
|
}
|
|
510
557
|
|
|
511
558
|
|
|
559
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
560
|
+
inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
561
|
+
{
|
|
562
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
563
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
564
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
565
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
566
|
+
|
|
567
|
+
bool is_reversed = slice.step < 0;
|
|
568
|
+
|
|
569
|
+
int ii = 0;
|
|
570
|
+
for (
|
|
571
|
+
int i = slice.start;
|
|
572
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
573
|
+
i += slice.step
|
|
574
|
+
)
|
|
575
|
+
{
|
|
576
|
+
v[i] += a[ii];
|
|
577
|
+
++ii;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
assert(ii == SliceLength);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
|
|
512
584
|
template<unsigned Length, typename Type>
|
|
513
585
|
inline CUDA_CALLABLE void adj_add_inplace(vec_t<Length, Type>& v, int idx, Type value,
|
|
514
586
|
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
|
|
515
587
|
{
|
|
516
588
|
#ifndef NDEBUG
|
|
517
|
-
if (idx <
|
|
589
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
518
590
|
{
|
|
519
591
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
520
592
|
assert(0);
|
|
521
593
|
}
|
|
522
594
|
#endif
|
|
523
595
|
|
|
596
|
+
if (idx < 0)
|
|
597
|
+
{
|
|
598
|
+
idx += Length;
|
|
599
|
+
}
|
|
600
|
+
|
|
524
601
|
adj_value += adj_v[idx];
|
|
525
602
|
}
|
|
526
603
|
|
|
527
604
|
|
|
605
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
606
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
607
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
608
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
609
|
+
)
|
|
610
|
+
{
|
|
611
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
612
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
613
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
614
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
615
|
+
|
|
616
|
+
bool is_reversed = slice.step < 0;
|
|
617
|
+
|
|
618
|
+
int ii = 0;
|
|
619
|
+
for (
|
|
620
|
+
int i = slice.start;
|
|
621
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
622
|
+
i += slice.step
|
|
623
|
+
)
|
|
624
|
+
{
|
|
625
|
+
adj_a[ii] += adj_v[i];
|
|
626
|
+
++ii;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
assert(ii == SliceLength);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
|
|
528
633
|
template<unsigned Length, typename Type>
|
|
529
634
|
inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
530
635
|
{
|
|
531
636
|
#ifndef NDEBUG
|
|
532
|
-
if (idx <
|
|
637
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
533
638
|
{
|
|
534
639
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
535
640
|
assert(0);
|
|
536
641
|
}
|
|
537
642
|
#endif
|
|
538
643
|
|
|
644
|
+
if (idx < 0)
|
|
645
|
+
{
|
|
646
|
+
idx += Length;
|
|
647
|
+
}
|
|
648
|
+
|
|
539
649
|
v[idx] -= value;
|
|
540
650
|
}
|
|
541
651
|
|
|
542
652
|
|
|
653
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
654
|
+
inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
655
|
+
{
|
|
656
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
657
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
658
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
659
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
660
|
+
|
|
661
|
+
bool is_reversed = slice.step < 0;
|
|
662
|
+
|
|
663
|
+
int ii = 0;
|
|
664
|
+
for (
|
|
665
|
+
int i = slice.start;
|
|
666
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
667
|
+
i += slice.step
|
|
668
|
+
)
|
|
669
|
+
{
|
|
670
|
+
v[i] -= a[ii];
|
|
671
|
+
++ii;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
assert(ii == SliceLength);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
|
|
543
678
|
template<unsigned Length, typename Type>
|
|
544
679
|
inline CUDA_CALLABLE void adj_sub_inplace(vec_t<Length, Type>& v, int idx, Type value,
|
|
545
680
|
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
|
|
546
681
|
{
|
|
547
682
|
#ifndef NDEBUG
|
|
548
|
-
if (idx <
|
|
683
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
549
684
|
{
|
|
550
685
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
551
686
|
assert(0);
|
|
552
687
|
}
|
|
553
688
|
#endif
|
|
554
689
|
|
|
690
|
+
if (idx < 0)
|
|
691
|
+
{
|
|
692
|
+
idx += Length;
|
|
693
|
+
}
|
|
694
|
+
|
|
555
695
|
adj_value -= adj_v[idx];
|
|
556
696
|
}
|
|
557
697
|
|
|
558
698
|
|
|
699
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
700
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
701
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
702
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
703
|
+
)
|
|
704
|
+
{
|
|
705
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
706
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
707
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
708
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
709
|
+
|
|
710
|
+
bool is_reversed = slice.step < 0;
|
|
711
|
+
|
|
712
|
+
int ii = 0;
|
|
713
|
+
for (
|
|
714
|
+
int i = slice.start;
|
|
715
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
716
|
+
i += slice.step
|
|
717
|
+
)
|
|
718
|
+
{
|
|
719
|
+
adj_a[ii] -= adj_v[i];
|
|
720
|
+
++ii;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
assert(ii == SliceLength);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
|
|
559
727
|
template<unsigned Length, typename Type>
|
|
560
728
|
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
561
729
|
{
|
|
562
730
|
#ifndef NDEBUG
|
|
563
|
-
if (idx <
|
|
731
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
564
732
|
{
|
|
565
733
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
566
734
|
assert(0);
|
|
567
735
|
}
|
|
568
736
|
#endif
|
|
569
737
|
|
|
738
|
+
if (idx < 0)
|
|
739
|
+
{
|
|
740
|
+
idx += Length;
|
|
741
|
+
}
|
|
742
|
+
|
|
570
743
|
v[idx] = value;
|
|
571
744
|
}
|
|
572
745
|
|
|
746
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
747
|
+
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
748
|
+
{
|
|
749
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
750
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
751
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
752
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
753
|
+
|
|
754
|
+
bool is_reversed = slice.step < 0;
|
|
755
|
+
|
|
756
|
+
int ii = 0;
|
|
757
|
+
for (
|
|
758
|
+
int i = slice.start;
|
|
759
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
760
|
+
i += slice.step
|
|
761
|
+
)
|
|
762
|
+
{
|
|
763
|
+
v[i] = a[ii];
|
|
764
|
+
++ii;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
assert(ii == SliceLength);
|
|
768
|
+
}
|
|
769
|
+
|
|
573
770
|
template<unsigned Length, typename Type>
|
|
574
771
|
inline CUDA_CALLABLE void adj_assign_inplace(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value)
|
|
575
772
|
{
|
|
576
773
|
#ifndef NDEBUG
|
|
577
|
-
if (idx <
|
|
774
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
578
775
|
{
|
|
579
776
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
580
777
|
assert(0);
|
|
581
778
|
}
|
|
582
779
|
#endif
|
|
583
780
|
|
|
781
|
+
if (idx < 0)
|
|
782
|
+
{
|
|
783
|
+
idx += Length;
|
|
784
|
+
}
|
|
785
|
+
|
|
584
786
|
adj_value += adj_v[idx];
|
|
585
787
|
}
|
|
586
788
|
|
|
789
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
790
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
791
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
792
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
793
|
+
)
|
|
794
|
+
{
|
|
795
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
796
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
797
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
798
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
799
|
+
|
|
800
|
+
bool is_reversed = slice.step < 0;
|
|
801
|
+
|
|
802
|
+
int ii = 0;
|
|
803
|
+
for (
|
|
804
|
+
int i = slice.start;
|
|
805
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
806
|
+
i += slice.step
|
|
807
|
+
)
|
|
808
|
+
{
|
|
809
|
+
adj_a[ii] += adj_v[i];
|
|
810
|
+
++ii;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
assert(ii == SliceLength);
|
|
814
|
+
}
|
|
815
|
+
|
|
587
816
|
|
|
588
817
|
template<unsigned Length, typename Type>
|
|
589
818
|
inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, int idx, Type value)
|
|
590
819
|
{
|
|
591
820
|
#ifndef NDEBUG
|
|
592
|
-
if (idx <
|
|
821
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
593
822
|
{
|
|
594
823
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
595
824
|
assert(0);
|
|
596
825
|
}
|
|
597
826
|
#endif
|
|
598
827
|
|
|
828
|
+
if (idx < 0)
|
|
829
|
+
{
|
|
830
|
+
idx += Length;
|
|
831
|
+
}
|
|
832
|
+
|
|
599
833
|
vec_t<Length, Type> ret(v);
|
|
600
834
|
ret[idx] = value;
|
|
601
835
|
return ret;
|
|
602
836
|
}
|
|
603
837
|
|
|
838
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
839
|
+
inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
840
|
+
{
|
|
841
|
+
vec_t<Length, Type> ret(v);
|
|
842
|
+
assign_inplace<SliceLength>(ret, slice, a);
|
|
843
|
+
return ret;
|
|
844
|
+
}
|
|
845
|
+
|
|
604
846
|
template<unsigned Length, typename Type>
|
|
605
847
|
inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
|
|
606
848
|
{
|
|
607
849
|
#ifndef NDEBUG
|
|
608
|
-
if (idx <
|
|
850
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
609
851
|
{
|
|
610
852
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
611
853
|
assert(0);
|
|
612
854
|
}
|
|
613
855
|
#endif
|
|
614
856
|
|
|
857
|
+
if (idx < 0)
|
|
858
|
+
{
|
|
859
|
+
idx += Length;
|
|
860
|
+
}
|
|
861
|
+
|
|
615
862
|
adj_value += adj_ret[idx];
|
|
616
863
|
for(unsigned i=0; i < Length; ++i)
|
|
617
864
|
{
|
|
@@ -620,6 +867,40 @@ inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type
|
|
|
620
867
|
}
|
|
621
868
|
}
|
|
622
869
|
|
|
870
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
871
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
872
|
+
vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
873
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
|
|
874
|
+
const vec_t<Length, Type>& adj_ret)
|
|
875
|
+
{
|
|
876
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
877
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
878
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
879
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
880
|
+
|
|
881
|
+
bool is_reversed = slice.step < 0;
|
|
882
|
+
|
|
883
|
+
int ii = 0;
|
|
884
|
+
for (int i = 0; i < Length; ++i)
|
|
885
|
+
{
|
|
886
|
+
bool in_slice = is_reversed
|
|
887
|
+
? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
|
|
888
|
+
: (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
|
|
889
|
+
|
|
890
|
+
if (!in_slice)
|
|
891
|
+
{
|
|
892
|
+
adj_v[i] += adj_ret[i];
|
|
893
|
+
}
|
|
894
|
+
else
|
|
895
|
+
{
|
|
896
|
+
adj_a[ii] += adj_ret[i];
|
|
897
|
+
++ii;
|
|
898
|
+
}
|
|
899
|
+
}
|
|
900
|
+
|
|
901
|
+
assert(ii == SliceLength);
|
|
902
|
+
}
|
|
903
|
+
|
|
623
904
|
template<unsigned Length, typename Type>
|
|
624
905
|
inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
|
|
625
906
|
{
|
|
@@ -1106,16 +1387,49 @@ template<unsigned Length, typename Type>
|
|
|
1106
1387
|
inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
|
|
1107
1388
|
{
|
|
1108
1389
|
#ifndef NDEBUG
|
|
1109
|
-
if (idx <
|
|
1390
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
1110
1391
|
{
|
|
1111
|
-
printf("
|
|
1392
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
1112
1393
|
assert(0);
|
|
1113
1394
|
}
|
|
1114
1395
|
#endif
|
|
1115
1396
|
|
|
1397
|
+
if (idx < 0)
|
|
1398
|
+
{
|
|
1399
|
+
idx += Length;
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1116
1402
|
adj_a[idx] += adj_ret;
|
|
1117
1403
|
}
|
|
1118
1404
|
|
|
1405
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1406
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
1407
|
+
const vec_t<Length, Type>& a, slice_t slice,
|
|
1408
|
+
vec_t<Length, Type>& adj_a, slice_t& adj_slice,
|
|
1409
|
+
const vec_t<SliceLength, Type>& adj_ret
|
|
1410
|
+
)
|
|
1411
|
+
{
|
|
1412
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
1413
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
1414
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
1415
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
1416
|
+
|
|
1417
|
+
bool is_reversed = slice.step < 0;
|
|
1418
|
+
|
|
1419
|
+
int ii = 0;
|
|
1420
|
+
for (
|
|
1421
|
+
int i = slice.start;
|
|
1422
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
1423
|
+
i += slice.step
|
|
1424
|
+
)
|
|
1425
|
+
{
|
|
1426
|
+
adj_a[i] += adj_ret[ii];
|
|
1427
|
+
++ii;
|
|
1428
|
+
}
|
|
1429
|
+
|
|
1430
|
+
assert(ii == SliceLength);
|
|
1431
|
+
}
|
|
1432
|
+
|
|
1119
1433
|
template<unsigned Length, typename Type>
|
|
1120
1434
|
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
1121
1435
|
{
|