warp-lang 1.8.1__py3-none-macosx_10_13_universal2.whl → 1.9.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +1904 -114
  3. warp/bin/libwarp-clang.dylib +0 -0
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +331 -101
  7. warp/builtins.py +1244 -160
  8. warp/codegen.py +317 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1465 -789
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/examples/interop/example_jax_kernel.py +2 -1
  18. warp/fabric.py +1 -1
  19. warp/fem/cache.py +27 -19
  20. warp/fem/domain.py +2 -2
  21. warp/fem/field/nodal_field.py +2 -2
  22. warp/fem/field/virtual.py +264 -166
  23. warp/fem/geometry/geometry.py +5 -5
  24. warp/fem/integrate.py +129 -51
  25. warp/fem/space/restriction.py +4 -0
  26. warp/fem/space/shape/tet_shape_function.py +3 -10
  27. warp/jax_experimental/custom_call.py +25 -2
  28. warp/jax_experimental/ffi.py +22 -1
  29. warp/jax_experimental/xla_ffi.py +16 -7
  30. warp/marching_cubes.py +708 -0
  31. warp/native/array.h +99 -4
  32. warp/native/builtin.h +86 -9
  33. warp/native/bvh.cpp +64 -28
  34. warp/native/bvh.cu +58 -58
  35. warp/native/bvh.h +2 -2
  36. warp/native/clang/clang.cpp +7 -7
  37. warp/native/coloring.cpp +8 -2
  38. warp/native/crt.cpp +2 -2
  39. warp/native/crt.h +3 -5
  40. warp/native/cuda_util.cpp +41 -10
  41. warp/native/cuda_util.h +10 -4
  42. warp/native/exports.h +1842 -1908
  43. warp/native/fabric.h +2 -1
  44. warp/native/hashgrid.cpp +37 -37
  45. warp/native/hashgrid.cu +2 -2
  46. warp/native/initializer_array.h +1 -1
  47. warp/native/intersect.h +2 -2
  48. warp/native/mat.h +1910 -116
  49. warp/native/mathdx.cpp +43 -43
  50. warp/native/mesh.cpp +24 -24
  51. warp/native/mesh.cu +26 -26
  52. warp/native/mesh.h +4 -2
  53. warp/native/nanovdb/GridHandle.h +179 -12
  54. warp/native/nanovdb/HostBuffer.h +8 -7
  55. warp/native/nanovdb/NanoVDB.h +517 -895
  56. warp/native/nanovdb/NodeManager.h +323 -0
  57. warp/native/nanovdb/PNanoVDB.h +2 -2
  58. warp/native/quat.h +331 -14
  59. warp/native/range.h +7 -1
  60. warp/native/reduce.cpp +10 -10
  61. warp/native/reduce.cu +13 -14
  62. warp/native/runlength_encode.cpp +2 -2
  63. warp/native/runlength_encode.cu +5 -5
  64. warp/native/scan.cpp +3 -3
  65. warp/native/scan.cu +4 -4
  66. warp/native/sort.cpp +10 -10
  67. warp/native/sort.cu +40 -31
  68. warp/native/sort.h +2 -0
  69. warp/native/sparse.cpp +8 -8
  70. warp/native/sparse.cu +13 -13
  71. warp/native/spatial.h +366 -17
  72. warp/native/temp_buffer.h +2 -2
  73. warp/native/tile.h +471 -82
  74. warp/native/vec.h +328 -14
  75. warp/native/volume.cpp +54 -54
  76. warp/native/volume.cu +1 -1
  77. warp/native/volume.h +2 -1
  78. warp/native/volume_builder.cu +30 -37
  79. warp/native/warp.cpp +150 -149
  80. warp/native/warp.cu +377 -216
  81. warp/native/warp.h +227 -226
  82. warp/optim/linear.py +736 -271
  83. warp/render/imgui_manager.py +289 -0
  84. warp/render/render_opengl.py +99 -18
  85. warp/render/render_usd.py +1 -0
  86. warp/sim/graph_coloring.py +2 -2
  87. warp/sparse.py +558 -175
  88. warp/tests/aux_test_module_aot.py +7 -0
  89. warp/tests/cuda/test_async.py +3 -3
  90. warp/tests/cuda/test_conditional_captures.py +101 -0
  91. warp/tests/geometry/test_hash_grid.py +38 -0
  92. warp/tests/geometry/test_marching_cubes.py +233 -12
  93. warp/tests/interop/test_jax.py +608 -28
  94. warp/tests/sim/test_coloring.py +6 -6
  95. warp/tests/test_array.py +58 -5
  96. warp/tests/test_codegen.py +4 -3
  97. warp/tests/test_context.py +8 -15
  98. warp/tests/test_enum.py +136 -0
  99. warp/tests/test_examples.py +2 -2
  100. warp/tests/test_fem.py +49 -6
  101. warp/tests/test_fixedarray.py +229 -0
  102. warp/tests/test_func.py +18 -15
  103. warp/tests/test_future_annotations.py +7 -5
  104. warp/tests/test_linear_solvers.py +30 -0
  105. warp/tests/test_map.py +15 -1
  106. warp/tests/test_mat.py +1518 -378
  107. warp/tests/test_mat_assign_copy.py +178 -0
  108. warp/tests/test_mat_constructors.py +574 -0
  109. warp/tests/test_module_aot.py +287 -0
  110. warp/tests/test_print.py +69 -0
  111. warp/tests/test_quat.py +140 -34
  112. warp/tests/test_quat_assign_copy.py +145 -0
  113. warp/tests/test_reload.py +2 -1
  114. warp/tests/test_sparse.py +71 -0
  115. warp/tests/test_spatial.py +140 -34
  116. warp/tests/test_spatial_assign_copy.py +160 -0
  117. warp/tests/test_struct.py +43 -3
  118. warp/tests/test_tuple.py +96 -0
  119. warp/tests/test_types.py +61 -20
  120. warp/tests/test_vec.py +179 -34
  121. warp/tests/test_vec_assign_copy.py +143 -0
  122. warp/tests/tile/test_tile.py +245 -18
  123. warp/tests/tile/test_tile_cholesky.py +605 -0
  124. warp/tests/tile/test_tile_load.py +169 -0
  125. warp/tests/tile/test_tile_mathdx.py +2 -558
  126. warp/tests/tile/test_tile_matmul.py +1 -1
  127. warp/tests/tile/test_tile_mlp.py +1 -1
  128. warp/tests/tile/test_tile_shared_memory.py +5 -5
  129. warp/tests/unittest_suites.py +6 -0
  130. warp/tests/walkthrough_debug.py +1 -1
  131. warp/thirdparty/unittest_parallel.py +108 -9
  132. warp/types.py +571 -267
  133. warp/utils.py +68 -86
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
  135. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
  136. warp/native/marching.cpp +0 -19
  137. warp/native/marching.cu +0 -514
  138. warp/native/marching.h +0 -19
  139. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
  140. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
  141. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -25,7 +25,7 @@ namespace wp
25
25
  template<unsigned Length, typename Type>
26
26
  struct vec_t
27
27
  {
28
- Type c[Length];
28
+ Type c[Length < 1 ? 1 : Length];
29
29
 
30
30
  inline CUDA_CALLABLE vec_t()
31
31
  : c()
@@ -440,27 +440,64 @@ template<unsigned Length, typename Type>
440
440
  inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
441
441
  {
442
442
  #ifndef NDEBUG
443
- if (idx < 0 || idx >= Length)
443
+ if (idx < -(int)Length || idx >= (int)Length)
444
444
  {
445
445
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
446
446
  assert(0);
447
447
  }
448
448
  #endif
449
449
 
450
+ if (idx < 0)
451
+ {
452
+ idx += Length;
453
+ }
454
+
450
455
  return a[idx];
451
456
  }
452
457
 
458
+ template<unsigned SliceLength, unsigned Length, typename Type>
459
+ inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const vec_t<Length, Type> & a, slice_t slice)
460
+ {
461
+ vec_t<SliceLength, Type> ret;
462
+
463
+ assert(slice.start >= 0 && slice.start <= (int)Length);
464
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
465
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
466
+ assert(slice_get_length(slice) == SliceLength);
467
+
468
+ bool is_reversed = slice.step < 0;
469
+
470
+ int ii = 0;
471
+ for (
472
+ int i = slice.start;
473
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
474
+ i += slice.step
475
+ )
476
+ {
477
+ ret[ii] = a[i];
478
+ ++ii;
479
+ }
480
+
481
+ assert(ii == SliceLength);
482
+ return ret;
483
+ }
484
+
453
485
  template<unsigned Length, typename Type>
454
486
  inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
455
487
  {
456
488
  #ifndef NDEBUG
457
- if (idx < 0 || idx >= Length)
489
+ if (idx < -(int)Length || idx >= (int)Length)
458
490
  {
459
491
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
460
492
  assert(0);
461
493
  }
462
494
  #endif
463
495
 
496
+ if (idx < 0)
497
+ {
498
+ idx += Length;
499
+ }
500
+
464
501
  return &v[idx];
465
502
  }
466
503
 
@@ -468,13 +505,18 @@ template<unsigned Length, typename Type>
468
505
  inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
469
506
  {
470
507
  #ifndef NDEBUG
471
- if (idx < 0 || idx >= Length)
508
+ if (idx < -(int)Length || idx >= (int)Length)
472
509
  {
473
510
  printf("vec store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
474
511
  assert(0);
475
512
  }
476
513
  #endif
477
514
 
515
+ if (idx < 0)
516
+ {
517
+ idx += Length;
518
+ }
519
+
478
520
  return &((*v)[idx]);
479
521
  }
480
522
 
@@ -498,120 +540,325 @@ template<unsigned Length, typename Type>
498
540
  inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, int idx, Type value)
499
541
  {
500
542
  #ifndef NDEBUG
501
- if (idx < 0 || idx >= Length)
543
+ if (idx < -(int)Length || idx >= (int)Length)
502
544
  {
503
545
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
504
546
  assert(0);
505
547
  }
506
548
  #endif
507
549
 
550
+ if (idx < 0)
551
+ {
552
+ idx += Length;
553
+ }
554
+
508
555
  v[idx] += value;
509
556
  }
510
557
 
511
558
 
559
+ template<unsigned SliceLength, unsigned Length, typename Type>
560
+ inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
561
+ {
562
+ assert(slice.start >= 0 && slice.start <= (int)Length);
563
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
564
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
565
+ assert(slice_get_length(slice) == SliceLength);
566
+
567
+ bool is_reversed = slice.step < 0;
568
+
569
+ int ii = 0;
570
+ for (
571
+ int i = slice.start;
572
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
573
+ i += slice.step
574
+ )
575
+ {
576
+ v[i] += a[ii];
577
+ ++ii;
578
+ }
579
+
580
+ assert(ii == SliceLength);
581
+ }
582
+
583
+
512
584
  template<unsigned Length, typename Type>
513
585
  inline CUDA_CALLABLE void adj_add_inplace(vec_t<Length, Type>& v, int idx, Type value,
514
586
  vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
515
587
  {
516
588
  #ifndef NDEBUG
517
- if (idx < 0 || idx >= Length)
589
+ if (idx < -(int)Length || idx >= (int)Length)
518
590
  {
519
591
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
520
592
  assert(0);
521
593
  }
522
594
  #endif
523
595
 
596
+ if (idx < 0)
597
+ {
598
+ idx += Length;
599
+ }
600
+
524
601
  adj_value += adj_v[idx];
525
602
  }
526
603
 
527
604
 
605
+ template<unsigned SliceLength, unsigned Length, typename Type>
606
+ inline CUDA_CALLABLE void adj_add_inplace(
607
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
608
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
609
+ )
610
+ {
611
+ assert(slice.start >= 0 && slice.start <= (int)Length);
612
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
613
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
614
+ assert(slice_get_length(slice) == SliceLength);
615
+
616
+ bool is_reversed = slice.step < 0;
617
+
618
+ int ii = 0;
619
+ for (
620
+ int i = slice.start;
621
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
622
+ i += slice.step
623
+ )
624
+ {
625
+ adj_a[ii] += adj_v[i];
626
+ ++ii;
627
+ }
628
+
629
+ assert(ii == SliceLength);
630
+ }
631
+
632
+
528
633
  template<unsigned Length, typename Type>
529
634
  inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, int idx, Type value)
530
635
  {
531
636
  #ifndef NDEBUG
532
- if (idx < 0 || idx >= Length)
637
+ if (idx < -(int)Length || idx >= (int)Length)
533
638
  {
534
639
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
535
640
  assert(0);
536
641
  }
537
642
  #endif
538
643
 
644
+ if (idx < 0)
645
+ {
646
+ idx += Length;
647
+ }
648
+
539
649
  v[idx] -= value;
540
650
  }
541
651
 
542
652
 
653
+ template<unsigned SliceLength, unsigned Length, typename Type>
654
+ inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
655
+ {
656
+ assert(slice.start >= 0 && slice.start <= (int)Length);
657
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
658
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
659
+ assert(slice_get_length(slice) == SliceLength);
660
+
661
+ bool is_reversed = slice.step < 0;
662
+
663
+ int ii = 0;
664
+ for (
665
+ int i = slice.start;
666
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
667
+ i += slice.step
668
+ )
669
+ {
670
+ v[i] -= a[ii];
671
+ ++ii;
672
+ }
673
+
674
+ assert(ii == SliceLength);
675
+ }
676
+
677
+
543
678
  template<unsigned Length, typename Type>
544
679
  inline CUDA_CALLABLE void adj_sub_inplace(vec_t<Length, Type>& v, int idx, Type value,
545
680
  vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
546
681
  {
547
682
  #ifndef NDEBUG
548
- if (idx < 0 || idx >= Length)
683
+ if (idx < -(int)Length || idx >= (int)Length)
549
684
  {
550
685
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
551
686
  assert(0);
552
687
  }
553
688
  #endif
554
689
 
690
+ if (idx < 0)
691
+ {
692
+ idx += Length;
693
+ }
694
+
555
695
  adj_value -= adj_v[idx];
556
696
  }
557
697
 
558
698
 
699
+ template<unsigned SliceLength, unsigned Length, typename Type>
700
+ inline CUDA_CALLABLE void adj_sub_inplace(
701
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
702
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
703
+ )
704
+ {
705
+ assert(slice.start >= 0 && slice.start <= (int)Length);
706
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
707
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
708
+ assert(slice_get_length(slice) == SliceLength);
709
+
710
+ bool is_reversed = slice.step < 0;
711
+
712
+ int ii = 0;
713
+ for (
714
+ int i = slice.start;
715
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
716
+ i += slice.step
717
+ )
718
+ {
719
+ adj_a[ii] -= adj_v[i];
720
+ ++ii;
721
+ }
722
+
723
+ assert(ii == SliceLength);
724
+ }
725
+
726
+
559
727
  template<unsigned Length, typename Type>
560
728
  inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
561
729
  {
562
730
  #ifndef NDEBUG
563
- if (idx < 0 || idx >= Length)
731
+ if (idx < -(int)Length || idx >= (int)Length)
564
732
  {
565
733
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
566
734
  assert(0);
567
735
  }
568
736
  #endif
569
737
 
738
+ if (idx < 0)
739
+ {
740
+ idx += Length;
741
+ }
742
+
570
743
  v[idx] = value;
571
744
  }
572
745
 
746
+ template<unsigned SliceLength, unsigned Length, typename Type>
747
+ inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
748
+ {
749
+ assert(slice.start >= 0 && slice.start <= (int)Length);
750
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
751
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
752
+ assert(slice_get_length(slice) == SliceLength);
753
+
754
+ bool is_reversed = slice.step < 0;
755
+
756
+ int ii = 0;
757
+ for (
758
+ int i = slice.start;
759
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
760
+ i += slice.step
761
+ )
762
+ {
763
+ v[i] = a[ii];
764
+ ++ii;
765
+ }
766
+
767
+ assert(ii == SliceLength);
768
+ }
769
+
573
770
  template<unsigned Length, typename Type>
574
771
  inline CUDA_CALLABLE void adj_assign_inplace(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value)
575
772
  {
576
773
  #ifndef NDEBUG
577
- if (idx < 0 || idx >= Length)
774
+ if (idx < -(int)Length || idx >= (int)Length)
578
775
  {
579
776
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
580
777
  assert(0);
581
778
  }
582
779
  #endif
583
780
 
781
+ if (idx < 0)
782
+ {
783
+ idx += Length;
784
+ }
785
+
584
786
  adj_value += adj_v[idx];
585
787
  }
586
788
 
789
+ template<unsigned SliceLength, unsigned Length, typename Type>
790
+ inline CUDA_CALLABLE void adj_assign_inplace(
791
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
792
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
793
+ )
794
+ {
795
+ assert(slice.start >= 0 && slice.start <= (int)Length);
796
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
797
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
798
+ assert(slice_get_length(slice) == SliceLength);
799
+
800
+ bool is_reversed = slice.step < 0;
801
+
802
+ int ii = 0;
803
+ for (
804
+ int i = slice.start;
805
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
806
+ i += slice.step
807
+ )
808
+ {
809
+ adj_a[ii] += adj_v[i];
810
+ ++ii;
811
+ }
812
+
813
+ assert(ii == SliceLength);
814
+ }
815
+
587
816
 
588
817
  template<unsigned Length, typename Type>
589
818
  inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, int idx, Type value)
590
819
  {
591
820
  #ifndef NDEBUG
592
- if (idx < 0 || idx >= Length)
821
+ if (idx < -(int)Length || idx >= (int)Length)
593
822
  {
594
823
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
595
824
  assert(0);
596
825
  }
597
826
  #endif
598
827
 
828
+ if (idx < 0)
829
+ {
830
+ idx += Length;
831
+ }
832
+
599
833
  vec_t<Length, Type> ret(v);
600
834
  ret[idx] = value;
601
835
  return ret;
602
836
  }
603
837
 
838
+ template<unsigned SliceLength, unsigned Length, typename Type>
839
+ inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
840
+ {
841
+ vec_t<Length, Type> ret(v);
842
+ assign_inplace<SliceLength>(ret, slice, a);
843
+ return ret;
844
+ }
845
+
604
846
  template<unsigned Length, typename Type>
605
847
  inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
606
848
  {
607
849
  #ifndef NDEBUG
608
- if (idx < 0 || idx >= Length)
850
+ if (idx < -(int)Length || idx >= (int)Length)
609
851
  {
610
852
  printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
611
853
  assert(0);
612
854
  }
613
855
  #endif
614
856
 
857
+ if (idx < 0)
858
+ {
859
+ idx += Length;
860
+ }
861
+
615
862
  adj_value += adj_ret[idx];
616
863
  for(unsigned i=0; i < Length; ++i)
617
864
  {
@@ -620,6 +867,40 @@ inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type
620
867
  }
621
868
  }
622
869
 
870
+ template<unsigned SliceLength, unsigned Length, typename Type>
871
+ inline CUDA_CALLABLE void adj_assign_copy(
872
+ vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
873
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
874
+ const vec_t<Length, Type>& adj_ret)
875
+ {
876
+ assert(slice.start >= 0 && slice.start <= (int)Length);
877
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
878
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
879
+ assert(slice_get_length(slice) == SliceLength);
880
+
881
+ bool is_reversed = slice.step < 0;
882
+
883
+ int ii = 0;
884
+ for (int i = 0; i < Length; ++i)
885
+ {
886
+ bool in_slice = is_reversed
887
+ ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
888
+ : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
889
+
890
+ if (!in_slice)
891
+ {
892
+ adj_v[i] += adj_ret[i];
893
+ }
894
+ else
895
+ {
896
+ adj_a[ii] += adj_ret[i];
897
+ ++ii;
898
+ }
899
+ }
900
+
901
+ assert(ii == SliceLength);
902
+ }
903
+
623
904
  template<unsigned Length, typename Type>
624
905
  inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
625
906
  {
@@ -1106,16 +1387,49 @@ template<unsigned Length, typename Type>
1106
1387
  inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
1107
1388
  {
1108
1389
  #ifndef NDEBUG
1109
- if (idx < 0 || idx > Length)
1390
+ if (idx < -(int)Length || idx >= (int)Length)
1110
1391
  {
1111
- printf("Tvec2<Scalar> index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
1392
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
1112
1393
  assert(0);
1113
1394
  }
1114
1395
  #endif
1115
1396
 
1397
+ if (idx < 0)
1398
+ {
1399
+ idx += Length;
1400
+ }
1401
+
1116
1402
  adj_a[idx] += adj_ret;
1117
1403
  }
1118
1404
 
1405
+ template<unsigned SliceLength, unsigned Length, typename Type>
1406
+ inline CUDA_CALLABLE void adj_extract(
1407
+ const vec_t<Length, Type>& a, slice_t slice,
1408
+ vec_t<Length, Type>& adj_a, slice_t& adj_slice,
1409
+ const vec_t<SliceLength, Type>& adj_ret
1410
+ )
1411
+ {
1412
+ assert(slice.start >= 0 && slice.start <= (int)Length);
1413
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
1414
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
1415
+ assert(slice_get_length(slice) == SliceLength);
1416
+
1417
+ bool is_reversed = slice.step < 0;
1418
+
1419
+ int ii = 0;
1420
+ for (
1421
+ int i = slice.start;
1422
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
1423
+ i += slice.step
1424
+ )
1425
+ {
1426
+ adj_a[i] += adj_ret[ii];
1427
+ ++ii;
1428
+ }
1429
+
1430
+ assert(ii == SliceLength);
1431
+ }
1432
+
1119
1433
  template<unsigned Length, typename Type>
1120
1434
  inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
1121
1435
  {