warp-lang 1.8.1__py3-none-win_amd64.whl → 1.9.1__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (141) hide show
  1. warp/__init__.py +282 -103
  2. warp/__init__.pyi +1904 -114
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +93 -30
  6. warp/build_dll.py +331 -101
  7. warp/builtins.py +1244 -160
  8. warp/codegen.py +317 -206
  9. warp/config.py +1 -1
  10. warp/context.py +1465 -789
  11. warp/examples/core/example_marching_cubes.py +1 -0
  12. warp/examples/core/example_render_opengl.py +100 -3
  13. warp/examples/fem/example_apic_fluid.py +98 -52
  14. warp/examples/fem/example_convection_diffusion_dg.py +25 -4
  15. warp/examples/fem/example_diffusion_mgpu.py +8 -3
  16. warp/examples/fem/utils.py +68 -22
  17. warp/examples/interop/example_jax_kernel.py +2 -1
  18. warp/fabric.py +1 -1
  19. warp/fem/cache.py +27 -19
  20. warp/fem/domain.py +2 -2
  21. warp/fem/field/nodal_field.py +2 -2
  22. warp/fem/field/virtual.py +264 -166
  23. warp/fem/geometry/geometry.py +5 -5
  24. warp/fem/integrate.py +129 -51
  25. warp/fem/space/restriction.py +4 -0
  26. warp/fem/space/shape/tet_shape_function.py +3 -10
  27. warp/jax_experimental/custom_call.py +25 -2
  28. warp/jax_experimental/ffi.py +22 -1
  29. warp/jax_experimental/xla_ffi.py +16 -7
  30. warp/marching_cubes.py +708 -0
  31. warp/native/array.h +99 -4
  32. warp/native/builtin.h +86 -9
  33. warp/native/bvh.cpp +64 -28
  34. warp/native/bvh.cu +58 -58
  35. warp/native/bvh.h +2 -2
  36. warp/native/clang/clang.cpp +7 -7
  37. warp/native/coloring.cpp +8 -2
  38. warp/native/crt.cpp +2 -2
  39. warp/native/crt.h +3 -5
  40. warp/native/cuda_util.cpp +41 -10
  41. warp/native/cuda_util.h +10 -4
  42. warp/native/exports.h +1842 -1908
  43. warp/native/fabric.h +2 -1
  44. warp/native/hashgrid.cpp +37 -37
  45. warp/native/hashgrid.cu +2 -2
  46. warp/native/initializer_array.h +1 -1
  47. warp/native/intersect.h +2 -2
  48. warp/native/mat.h +1910 -116
  49. warp/native/mathdx.cpp +43 -43
  50. warp/native/mesh.cpp +24 -24
  51. warp/native/mesh.cu +26 -26
  52. warp/native/mesh.h +4 -2
  53. warp/native/nanovdb/GridHandle.h +179 -12
  54. warp/native/nanovdb/HostBuffer.h +8 -7
  55. warp/native/nanovdb/NanoVDB.h +517 -895
  56. warp/native/nanovdb/NodeManager.h +323 -0
  57. warp/native/nanovdb/PNanoVDB.h +2 -2
  58. warp/native/quat.h +331 -14
  59. warp/native/range.h +7 -1
  60. warp/native/reduce.cpp +10 -10
  61. warp/native/reduce.cu +13 -14
  62. warp/native/runlength_encode.cpp +2 -2
  63. warp/native/runlength_encode.cu +5 -5
  64. warp/native/scan.cpp +3 -3
  65. warp/native/scan.cu +4 -4
  66. warp/native/sort.cpp +10 -10
  67. warp/native/sort.cu +40 -31
  68. warp/native/sort.h +2 -0
  69. warp/native/sparse.cpp +8 -8
  70. warp/native/sparse.cu +13 -13
  71. warp/native/spatial.h +366 -17
  72. warp/native/temp_buffer.h +2 -2
  73. warp/native/tile.h +471 -82
  74. warp/native/vec.h +328 -14
  75. warp/native/volume.cpp +54 -54
  76. warp/native/volume.cu +1 -1
  77. warp/native/volume.h +2 -1
  78. warp/native/volume_builder.cu +30 -37
  79. warp/native/warp.cpp +150 -149
  80. warp/native/warp.cu +377 -216
  81. warp/native/warp.h +227 -226
  82. warp/optim/linear.py +736 -271
  83. warp/render/imgui_manager.py +289 -0
  84. warp/render/render_opengl.py +99 -18
  85. warp/render/render_usd.py +1 -0
  86. warp/sim/graph_coloring.py +2 -2
  87. warp/sparse.py +558 -175
  88. warp/tests/aux_test_module_aot.py +7 -0
  89. warp/tests/cuda/test_async.py +3 -3
  90. warp/tests/cuda/test_conditional_captures.py +101 -0
  91. warp/tests/geometry/test_hash_grid.py +38 -0
  92. warp/tests/geometry/test_marching_cubes.py +233 -12
  93. warp/tests/interop/test_jax.py +608 -28
  94. warp/tests/sim/test_coloring.py +6 -6
  95. warp/tests/test_array.py +58 -5
  96. warp/tests/test_codegen.py +4 -3
  97. warp/tests/test_context.py +8 -15
  98. warp/tests/test_enum.py +136 -0
  99. warp/tests/test_examples.py +2 -2
  100. warp/tests/test_fem.py +49 -6
  101. warp/tests/test_fixedarray.py +229 -0
  102. warp/tests/test_func.py +18 -15
  103. warp/tests/test_future_annotations.py +7 -5
  104. warp/tests/test_linear_solvers.py +30 -0
  105. warp/tests/test_map.py +15 -1
  106. warp/tests/test_mat.py +1518 -378
  107. warp/tests/test_mat_assign_copy.py +178 -0
  108. warp/tests/test_mat_constructors.py +574 -0
  109. warp/tests/test_module_aot.py +287 -0
  110. warp/tests/test_print.py +69 -0
  111. warp/tests/test_quat.py +140 -34
  112. warp/tests/test_quat_assign_copy.py +145 -0
  113. warp/tests/test_reload.py +2 -1
  114. warp/tests/test_sparse.py +71 -0
  115. warp/tests/test_spatial.py +140 -34
  116. warp/tests/test_spatial_assign_copy.py +160 -0
  117. warp/tests/test_struct.py +43 -3
  118. warp/tests/test_tuple.py +96 -0
  119. warp/tests/test_types.py +61 -20
  120. warp/tests/test_vec.py +179 -34
  121. warp/tests/test_vec_assign_copy.py +143 -0
  122. warp/tests/tile/test_tile.py +245 -18
  123. warp/tests/tile/test_tile_cholesky.py +605 -0
  124. warp/tests/tile/test_tile_load.py +169 -0
  125. warp/tests/tile/test_tile_mathdx.py +2 -558
  126. warp/tests/tile/test_tile_matmul.py +1 -1
  127. warp/tests/tile/test_tile_mlp.py +1 -1
  128. warp/tests/tile/test_tile_shared_memory.py +5 -5
  129. warp/tests/unittest_suites.py +6 -0
  130. warp/tests/walkthrough_debug.py +1 -1
  131. warp/thirdparty/unittest_parallel.py +108 -9
  132. warp/types.py +571 -267
  133. warp/utils.py +68 -86
  134. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
  135. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
  136. warp/native/marching.cpp +0 -19
  137. warp/native/marching.cu +0 -514
  138. warp/native/marching.h +0 -19
  139. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
  140. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
  141. {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/spatial.h CHANGED
@@ -34,7 +34,7 @@ CUDA_CALLABLE inline Type spatial_dot(const spatial_vector_t<Type>& a, const spa
34
34
  template<typename Type>
35
35
  CUDA_CALLABLE inline vec_t<3,Type> &w_vec( spatial_vector_t<Type>& a )
36
36
  {
37
- return *(vec_t<3,Type>*)(&a);
37
+ return *reinterpret_cast<vec_t<3,Type>*>(&a);
38
38
  }
39
39
 
40
40
  template<typename Type>
@@ -46,14 +46,14 @@ CUDA_CALLABLE inline vec_t<3,Type> &v_vec( spatial_vector_t<Type>& a )
46
46
  template<typename Type>
47
47
  CUDA_CALLABLE inline const vec_t<3,Type> &w_vec( const spatial_vector_t<Type>& a )
48
48
  {
49
- spatial_vector_t<Type> &non_const_vec = *(spatial_vector_t<Type>*)(const_cast<Type*>(&a.c[0]));
49
+ spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
50
50
  return w_vec(non_const_vec);
51
51
  }
52
52
 
53
53
  template<typename Type>
54
54
  CUDA_CALLABLE inline const vec_t<3,Type> &v_vec( const spatial_vector_t<Type>& a )
55
55
  {
56
- spatial_vector_t<Type> &non_const_vec = *(spatial_vector_t<Type>*)(const_cast<Type*>(&a.c[0]));
56
+ spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
57
57
  return v_vec(non_const_vec);
58
58
  }
59
59
 
@@ -408,27 +408,64 @@ template<typename Type>
408
408
  inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
409
409
  {
410
410
  #ifndef NDEBUG
411
- if (idx < 0 || idx >= 7)
411
+ if (idx < -7 || idx >= 7)
412
412
  {
413
413
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
414
414
  assert(0);
415
415
  }
416
416
  #endif
417
-
417
+
418
+ if (idx < 0)
419
+ {
420
+ idx += 7;
421
+ }
422
+
418
423
  return t[idx];
419
424
  }
420
425
 
426
+ template<unsigned SliceLength, typename Type>
427
+ inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const transform_t<Type> & t, slice_t slice)
428
+ {
429
+ vec_t<SliceLength, Type> ret;
430
+
431
+ assert(slice.start >= 0 && slice.start <= 7);
432
+ assert(slice.stop >= -1 && slice.stop <= 7);
433
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
434
+ assert(slice_get_length(slice) == SliceLength);
435
+
436
+ bool is_reversed = slice.step < 0;
437
+
438
+ int ii = 0;
439
+ for (
440
+ int i = slice.start;
441
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
442
+ i += slice.step
443
+ )
444
+ {
445
+ ret[ii] = t[i];
446
+ ++ii;
447
+ }
448
+
449
+ assert(ii == SliceLength);
450
+ return ret;
451
+ }
452
+
421
453
  template<typename Type>
422
454
  inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
423
455
  {
424
456
  #ifndef NDEBUG
425
- if (idx < 0 || idx >= 7)
457
+ if (idx < -7 || idx >= 7)
426
458
  {
427
459
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
428
460
  assert(0);
429
461
  }
430
462
  #endif
431
463
 
464
+ if (idx < 0)
465
+ {
466
+ idx += 7;
467
+ }
468
+
432
469
  return &t[idx];
433
470
  }
434
471
 
@@ -436,13 +473,18 @@ template<typename Type>
436
473
  inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
437
474
  {
438
475
  #ifndef NDEBUG
439
- if (idx < 0 || idx >= 7)
476
+ if (idx < -7 || idx >= 7)
440
477
  {
441
- printf("transformation store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
478
+ printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
442
479
  assert(0);
443
480
  }
444
481
  #endif
445
482
 
483
+ if (idx < 0)
484
+ {
485
+ idx += 7;
486
+ }
487
+
446
488
  return &((*t)[idx]);
447
489
  }
448
490
 
@@ -452,6 +494,34 @@ inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, trans
452
494
  adj_t[idx] += adj_ret;
453
495
  }
454
496
 
497
+ template<unsigned SliceLength, typename Type>
498
+ inline CUDA_CALLABLE void adj_extract(
499
+ const transform_t<Type>& t, slice_t slice,
500
+ transform_t<Type>& adj_t, slice_t& adj_slice,
501
+ const vec_t<SliceLength, Type>& adj_ret
502
+ )
503
+ {
504
+ assert(slice.start >= 0 && slice.start <= 7);
505
+ assert(slice.stop >= -1 && slice.stop <= 7);
506
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
507
+ assert(slice_get_length(slice) == SliceLength);
508
+
509
+ bool is_reversed = slice.step < 0;
510
+
511
+ int ii = 0;
512
+ for (
513
+ int i = slice.start;
514
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
515
+ i += slice.step
516
+ )
517
+ {
518
+ adj_t[i] += adj_ret[ii];
519
+ ++ii;
520
+ }
521
+
522
+ assert(ii == SliceLength);
523
+ }
524
+
455
525
  template<typename Type>
456
526
  inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
457
527
  transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
@@ -470,120 +540,325 @@ template<typename Type>
470
540
  inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
471
541
  {
472
542
  #ifndef NDEBUG
473
- if (idx < 0 || idx >= 7)
543
+ if (idx < -7 || idx >= 7)
474
544
  {
475
545
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
476
546
  assert(0);
477
547
  }
478
548
  #endif
479
549
 
550
+ if (idx < 0)
551
+ {
552
+ idx += 7;
553
+ }
554
+
480
555
  t[idx] += value;
481
556
  }
482
557
 
483
558
 
559
+ template<unsigned SliceLength, typename Type>
560
+ inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
561
+ {
562
+ assert(slice.start >= 0 && slice.start <= 7);
563
+ assert(slice.stop >= -1 && slice.stop <= 7);
564
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
565
+ assert(slice_get_length(slice) == SliceLength);
566
+
567
+ bool is_reversed = slice.step < 0;
568
+
569
+ int ii = 0;
570
+ for (
571
+ int i = slice.start;
572
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
573
+ i += slice.step
574
+ )
575
+ {
576
+ t[i] += a[ii];
577
+ ++ii;
578
+ }
579
+
580
+ assert(ii == SliceLength);
581
+ }
582
+
583
+
484
584
  template<typename Type>
485
585
  inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
486
586
  transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
487
587
  {
488
588
  #ifndef NDEBUG
489
- if (idx < 0 || idx >= 7)
589
+ if (idx < -7 || idx >= 7)
490
590
  {
491
591
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
492
592
  assert(0);
493
593
  }
494
594
  #endif
495
595
 
596
+ if (idx < 0)
597
+ {
598
+ idx += 7;
599
+ }
600
+
496
601
  adj_value += adj_t[idx];
497
602
  }
498
603
 
499
604
 
605
+ template<unsigned SliceLength, typename Type>
606
+ inline CUDA_CALLABLE void adj_add_inplace(
607
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
608
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
609
+ )
610
+ {
611
+ assert(slice.start >= 0 && slice.start <= 7);
612
+ assert(slice.stop >= -1 && slice.stop <= 7);
613
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
614
+ assert(slice_get_length(slice) == SliceLength);
615
+
616
+ bool is_reversed = slice.step < 0;
617
+
618
+ int ii = 0;
619
+ for (
620
+ int i = slice.start;
621
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
622
+ i += slice.step
623
+ )
624
+ {
625
+ adj_a[ii] += adj_t[i];
626
+ ++ii;
627
+ }
628
+
629
+ assert(ii == SliceLength);
630
+ }
631
+
632
+
500
633
  template<typename Type>
501
634
  inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
502
635
  {
503
636
  #ifndef NDEBUG
504
- if (idx < 0 || idx >= 7)
637
+ if (idx < -7 || idx >= 7)
505
638
  {
506
639
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
507
640
  assert(0);
508
641
  }
509
642
  #endif
510
643
 
644
+ if (idx < 0)
645
+ {
646
+ idx += 7;
647
+ }
648
+
511
649
  t[idx] -= value;
512
650
  }
513
651
 
514
652
 
653
+ template<unsigned SliceLength, typename Type>
654
+ inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
655
+ {
656
+ assert(slice.start >= 0 && slice.start <= 7);
657
+ assert(slice.stop >= -1 && slice.stop <= 7);
658
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
659
+ assert(slice_get_length(slice) == SliceLength);
660
+
661
+ bool is_reversed = slice.step < 0;
662
+
663
+ int ii = 0;
664
+ for (
665
+ int i = slice.start;
666
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
667
+ i += slice.step
668
+ )
669
+ {
670
+ t[i] -= a[ii];
671
+ ++ii;
672
+ }
673
+
674
+ assert(ii == SliceLength);
675
+ }
676
+
677
+
515
678
  template<typename Type>
516
679
  inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
517
680
  transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
518
681
  {
519
682
  #ifndef NDEBUG
520
- if (idx < 0 || idx >= 7)
683
+ if (idx < -7 || idx >= 7)
521
684
  {
522
685
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
523
686
  assert(0);
524
687
  }
525
688
  #endif
526
689
 
690
+ if (idx < 0)
691
+ {
692
+ idx += 7;
693
+ }
694
+
527
695
  adj_value -= adj_t[idx];
528
696
  }
529
697
 
530
698
 
699
+ template<unsigned SliceLength, typename Type>
700
+ inline CUDA_CALLABLE void adj_sub_inplace(
701
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
702
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
703
+ )
704
+ {
705
+ assert(slice.start >= 0 && slice.start <= 7);
706
+ assert(slice.stop >= -1 && slice.stop <= 7);
707
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
708
+ assert(slice_get_length(slice) == SliceLength);
709
+
710
+ bool is_reversed = slice.step < 0;
711
+
712
+ int ii = 0;
713
+ for (
714
+ int i = slice.start;
715
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
716
+ i += slice.step
717
+ )
718
+ {
719
+ adj_a[ii] -= adj_t[i];
720
+ ++ii;
721
+ }
722
+
723
+ assert(ii == SliceLength);
724
+ }
725
+
726
+
531
727
  template<typename Type>
532
728
  inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
533
729
  {
534
730
  #ifndef NDEBUG
535
- if (idx < 0 || idx >= 7)
731
+ if (idx < -7 || idx >= 7)
536
732
  {
537
733
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
538
734
  assert(0);
539
735
  }
540
736
  #endif
541
737
 
738
+ if (idx < 0)
739
+ {
740
+ idx += 7;
741
+ }
742
+
542
743
  t[idx] = value;
543
744
  }
544
745
 
746
+ template<unsigned SliceLength, typename Type>
747
+ inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
748
+ {
749
+ assert(slice.start >= 0 && slice.start <= 7);
750
+ assert(slice.stop >= -1 && slice.stop <= 7);
751
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
752
+ assert(slice_get_length(slice) == SliceLength);
753
+
754
+ bool is_reversed = slice.step < 0;
755
+
756
+ int ii = 0;
757
+ for (
758
+ int i = slice.start;
759
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
760
+ i += slice.step
761
+ )
762
+ {
763
+ t[i] = a[ii];
764
+ ++ii;
765
+ }
766
+
767
+ assert(ii == SliceLength);
768
+ }
769
+
545
770
  template<typename Type>
546
771
  inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
547
772
  {
548
773
  #ifndef NDEBUG
549
- if (idx < 0 || idx >= 7)
774
+ if (idx < -7 || idx >= 7)
550
775
  {
551
776
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
552
777
  assert(0);
553
778
  }
554
779
  #endif
555
780
 
781
+ if (idx < 0)
782
+ {
783
+ idx += 7;
784
+ }
785
+
556
786
  adj_value += adj_t[idx];
557
787
  }
558
788
 
789
+ template<unsigned SliceLength, typename Type>
790
+ inline CUDA_CALLABLE void adj_assign_inplace(
791
+ const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
792
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
793
+ )
794
+ {
795
+ assert(slice.start >= 0 && slice.start <= 7);
796
+ assert(slice.stop >= -1 && slice.stop <= 7);
797
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
798
+ assert(slice_get_length(slice) == SliceLength);
799
+
800
+ bool is_reversed = slice.step < 0;
801
+
802
+ int ii = 0;
803
+ for (
804
+ int i = slice.start;
805
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
806
+ i += slice.step
807
+ )
808
+ {
809
+ adj_a[ii] += adj_t[i];
810
+ ++ii;
811
+ }
812
+
813
+ assert(ii == SliceLength);
814
+ }
815
+
559
816
 
560
817
  template<typename Type>
561
818
  inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
562
819
  {
563
820
  #ifndef NDEBUG
564
- if (idx < 0 || idx >= 7)
821
+ if (idx < -7 || idx >= 7)
565
822
  {
566
823
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
567
824
  assert(0);
568
825
  }
569
826
  #endif
570
827
 
828
+ if (idx < 0)
829
+ {
830
+ idx += 7;
831
+ }
832
+
571
833
  transform_t<Type> ret(t);
572
834
  ret[idx] = value;
573
835
  return ret;
574
836
  }
575
837
 
838
+ template<unsigned SliceLength, typename Type>
839
+ inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
840
+ {
841
+ transform_t<Type> ret(t);
842
+ assign_inplace<SliceLength>(ret, slice, a);
843
+ return ret;
844
+ }
845
+
576
846
  template<typename Type>
577
847
  inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
578
848
  {
579
849
  #ifndef NDEBUG
580
- if (idx < 0 || idx >= 7)
850
+ if (idx < -7 || idx >= 7)
581
851
  {
582
852
  printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
583
853
  assert(0);
584
854
  }
585
855
  #endif
586
856
 
857
+ if (idx < 0)
858
+ {
859
+ idx += 7;
860
+ }
861
+
587
862
  adj_value += adj_ret[idx];
588
863
  for(unsigned i=0; i < 7; ++i)
589
864
  {
@@ -592,6 +867,42 @@ inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type va
592
867
  }
593
868
  }
594
869
 
870
+ template<unsigned SliceLength, typename Type>
871
+ inline CUDA_CALLABLE void adj_assign_copy(
872
+ transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
873
+ transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
874
+ const transform_t<Type>& adj_ret
875
+ )
876
+ {
877
+ assert(slice.start >= 0 && slice.start <= 7);
878
+ assert(slice.stop >= -1 && slice.stop <= 7);
879
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
880
+ assert(slice_get_length(slice) == SliceLength);
881
+
882
+ bool is_reversed = slice.step < 0;
883
+
884
+ int ii = 0;
885
+ for (int i = 0; i < 7; ++i)
886
+ {
887
+ bool in_slice = is_reversed
888
+ ? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
889
+ : (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
890
+
891
+ if (!in_slice)
892
+ {
893
+ adj_t[i] += adj_ret[i];
894
+ }
895
+ else
896
+ {
897
+ adj_a[ii] += adj_ret[i];
898
+ ++ii;
899
+ }
900
+ }
901
+
902
+ assert(ii == SliceLength);
903
+ }
904
+
905
+
595
906
  // adjoint methods
596
907
  template<typename Type>
597
908
  CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
@@ -600,6 +911,25 @@ CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<
600
911
  adj_add(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
601
912
  }
602
913
 
914
+ template<typename Type>
915
+ CUDA_CALLABLE inline void adj_add(
916
+ const transform_t<Type>& a, Type b,
917
+ transform_t<Type>& adj_a, Type& adj_b,
918
+ const transform_t<Type>& adj_ret
919
+ )
920
+ {
921
+ adj_a += adj_ret;
922
+
923
+ adj_b += adj_ret.p[0];
924
+ adj_b += adj_ret.p[1];
925
+ adj_b += adj_ret.p[2];
926
+
927
+ adj_b += adj_ret.q[0];
928
+ adj_b += adj_ret.q[1];
929
+ adj_b += adj_ret.q[2];
930
+ adj_b += adj_ret.q[3];
931
+ }
932
+
603
933
  template<typename Type>
604
934
  CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
605
935
  {
@@ -607,6 +937,25 @@ CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<
607
937
  adj_sub(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
608
938
  }
609
939
 
940
+ template<typename Type>
941
+ CUDA_CALLABLE inline void adj_sub(
942
+ const transform_t<Type>& a, Type b,
943
+ transform_t<Type>& adj_a, Type& adj_b,
944
+ const transform_t<Type>& adj_ret
945
+ )
946
+ {
947
+ adj_a -= adj_ret;
948
+
949
+ adj_b -= adj_ret.p[0];
950
+ adj_b -= adj_ret.p[1];
951
+ adj_b -= adj_ret.p[2];
952
+
953
+ adj_b -= adj_ret.q[0];
954
+ adj_b -= adj_ret.q[1];
955
+ adj_b -= adj_ret.q[2];
956
+ adj_b -= adj_ret.q[3];
957
+ }
958
+
610
959
  template<typename Type>
611
960
  CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, Type s, transform_t<Type>& adj_a, Type& adj_s, const transform_t<Type>& adj_ret)
612
961
  {
@@ -942,4 +1291,4 @@ using spatial_matrixh = spatial_matrix_t<half>;
942
1291
  using spatial_matrixf = spatial_matrix_t<float>;
943
1292
  using spatial_matrixd = spatial_matrix_t<double>;
944
1293
 
945
- } // namespace wp
1294
+ } // namespace wp
warp/native/temp_buffer.h CHANGED
@@ -26,13 +26,13 @@ template <typename T = char> struct ScopedTemporary
26
26
  {
27
27
 
28
28
  ScopedTemporary(void *context, size_t size)
29
- : m_context(context), m_buffer(static_cast<T*>(alloc_device(m_context, size * sizeof(T))))
29
+ : m_context(context), m_buffer(static_cast<T*>(wp_alloc_device(m_context, size * sizeof(T))))
30
30
  {
31
31
  }
32
32
 
33
33
  ~ScopedTemporary()
34
34
  {
35
- free_device(m_context, m_buffer);
35
+ wp_free_device(m_context, m_buffer);
36
36
  }
37
37
 
38
38
  T *buffer() const