warp-lang 1.8.0__py3-none-macosx_10_13_universal2.whl → 1.9.0__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +482 -110
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +48 -63
- warp/builtins.py +955 -137
- warp/codegen.py +327 -209
- warp/config.py +1 -1
- warp/context.py +1363 -800
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_callable.py +34 -4
- warp/examples/interop/example_jax_kernel.py +27 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +266 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +200 -91
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +1 -1
- warp/jax_experimental/ffi.py +203 -54
- warp/marching_cubes.py +708 -0
- warp/native/array.h +103 -8
- warp/native/builtin.h +90 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +13 -3
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +42 -11
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +4 -4
- warp/native/mat.h +1913 -119
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +5 -3
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +337 -16
- warp/native/rand.h +7 -7
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +22 -22
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +14 -14
- warp/native/spatial.h +366 -17
- warp/native/svd.h +23 -8
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +303 -70
- warp/native/tile_radix_sort.h +5 -1
- warp/native/tile_reduce.h +16 -25
- warp/native/tuple.h +2 -2
- warp/native/vec.h +385 -18
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +337 -193
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +137 -57
- warp/render/render_usd.py +0 -1
- warp/sim/collide.py +1 -2
- warp/sim/graph_coloring.py +2 -2
- warp/sim/integrator_vbd.py +10 -2
- warp/sparse.py +559 -176
- warp/tape.py +2 -0
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/sim/test_cloth.py +89 -6
- warp/tests/sim/test_coloring.py +82 -7
- warp/tests/test_array.py +56 -5
- warp/tests/test_assert.py +53 -0
- warp/tests/test_atomic_cas.py +127 -114
- warp/tests/test_codegen.py +3 -2
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +45 -2
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +1 -1
- warp/tests/test_mat.py +1540 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +162 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +103 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_static.py +48 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tape.py +38 -0
- warp/tests/test_types.py +0 -20
- warp/tests/test_vec.py +216 -441
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/test_vec_constructors.py +325 -0
- warp/tests/tile/test_tile.py +206 -152
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +179 -0
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_reduce.py +100 -11
- warp/tests/tile/test_tile_shared_memory.py +16 -16
- warp/tests/tile/test_tile_sort.py +59 -55
- warp/tests/unittest_suites.py +16 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +554 -264
- warp/utils.py +68 -86
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/METADATA +28 -65
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/RECORD +150 -138
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.0.dist-info → warp_lang-1.9.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -25,7 +25,7 @@ namespace wp
|
|
|
25
25
|
template<unsigned Length, typename Type>
|
|
26
26
|
struct vec_t
|
|
27
27
|
{
|
|
28
|
-
Type c[Length];
|
|
28
|
+
Type c[Length < 1 ? 1 : Length];
|
|
29
29
|
|
|
30
30
|
inline CUDA_CALLABLE vec_t()
|
|
31
31
|
: c()
|
|
@@ -343,6 +343,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> add(vec_t<Length, Type> a, vec_t<Length
|
|
|
343
343
|
return ret;
|
|
344
344
|
}
|
|
345
345
|
|
|
346
|
+
template<unsigned Length, typename Type>
|
|
347
|
+
inline CUDA_CALLABLE vec_t<Length, Type> add(Type a, vec_t<Length, Type> b)
|
|
348
|
+
{
|
|
349
|
+
vec_t<Length, Type> ret;
|
|
350
|
+
for( unsigned i=0; i < Length; ++i )
|
|
351
|
+
{
|
|
352
|
+
ret[i] = a + b[i];
|
|
353
|
+
}
|
|
354
|
+
return ret;
|
|
355
|
+
}
|
|
356
|
+
|
|
346
357
|
template<typename Type>
|
|
347
358
|
inline CUDA_CALLABLE vec_t<2, Type> add(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
348
359
|
{
|
|
@@ -367,6 +378,18 @@ inline CUDA_CALLABLE vec_t<Length, Type> sub(vec_t<Length, Type> a, vec_t<Length
|
|
|
367
378
|
return ret;
|
|
368
379
|
}
|
|
369
380
|
|
|
381
|
+
template<unsigned Length, typename Type>
|
|
382
|
+
inline CUDA_CALLABLE vec_t<Length, Type> sub(Type a, vec_t<Length, Type> b)
|
|
383
|
+
{
|
|
384
|
+
vec_t<Length, Type> ret;
|
|
385
|
+
for (unsigned i=0; i < Length; ++i)
|
|
386
|
+
{
|
|
387
|
+
ret[i] = Type(a - b[i]);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
return ret;
|
|
391
|
+
}
|
|
392
|
+
|
|
370
393
|
template<typename Type>
|
|
371
394
|
inline CUDA_CALLABLE vec_t<2, Type> sub(vec_t<2, Type> a, vec_t<2, Type> b)
|
|
372
395
|
{
|
|
@@ -440,27 +463,64 @@ template<unsigned Length, typename Type>
|
|
|
440
463
|
inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
|
|
441
464
|
{
|
|
442
465
|
#ifndef NDEBUG
|
|
443
|
-
if (idx <
|
|
466
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
444
467
|
{
|
|
445
468
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
446
469
|
assert(0);
|
|
447
470
|
}
|
|
448
471
|
#endif
|
|
449
472
|
|
|
473
|
+
if (idx < 0)
|
|
474
|
+
{
|
|
475
|
+
idx += Length;
|
|
476
|
+
}
|
|
477
|
+
|
|
450
478
|
return a[idx];
|
|
451
479
|
}
|
|
452
480
|
|
|
481
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
482
|
+
inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const vec_t<Length, Type> & a, slice_t slice)
|
|
483
|
+
{
|
|
484
|
+
vec_t<SliceLength, Type> ret;
|
|
485
|
+
|
|
486
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
487
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
488
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
489
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
490
|
+
|
|
491
|
+
bool is_reversed = slice.step < 0;
|
|
492
|
+
|
|
493
|
+
int ii = 0;
|
|
494
|
+
for (
|
|
495
|
+
int i = slice.start;
|
|
496
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
497
|
+
i += slice.step
|
|
498
|
+
)
|
|
499
|
+
{
|
|
500
|
+
ret[ii] = a[i];
|
|
501
|
+
++ii;
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
assert(ii == SliceLength);
|
|
505
|
+
return ret;
|
|
506
|
+
}
|
|
507
|
+
|
|
453
508
|
template<unsigned Length, typename Type>
|
|
454
509
|
inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
|
|
455
510
|
{
|
|
456
511
|
#ifndef NDEBUG
|
|
457
|
-
if (idx <
|
|
512
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
458
513
|
{
|
|
459
514
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
460
515
|
assert(0);
|
|
461
516
|
}
|
|
462
517
|
#endif
|
|
463
518
|
|
|
519
|
+
if (idx < 0)
|
|
520
|
+
{
|
|
521
|
+
idx += Length;
|
|
522
|
+
}
|
|
523
|
+
|
|
464
524
|
return &v[idx];
|
|
465
525
|
}
|
|
466
526
|
|
|
@@ -468,13 +528,18 @@ template<unsigned Length, typename Type>
|
|
|
468
528
|
inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
|
|
469
529
|
{
|
|
470
530
|
#ifndef NDEBUG
|
|
471
|
-
if (idx <
|
|
531
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
472
532
|
{
|
|
473
533
|
printf("vec store %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
474
534
|
assert(0);
|
|
475
535
|
}
|
|
476
536
|
#endif
|
|
477
537
|
|
|
538
|
+
if (idx < 0)
|
|
539
|
+
{
|
|
540
|
+
idx += Length;
|
|
541
|
+
}
|
|
542
|
+
|
|
478
543
|
return &((*v)[idx]);
|
|
479
544
|
}
|
|
480
545
|
|
|
@@ -498,120 +563,325 @@ template<unsigned Length, typename Type>
|
|
|
498
563
|
inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
499
564
|
{
|
|
500
565
|
#ifndef NDEBUG
|
|
501
|
-
if (idx <
|
|
566
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
502
567
|
{
|
|
503
568
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
504
569
|
assert(0);
|
|
505
570
|
}
|
|
506
571
|
#endif
|
|
507
572
|
|
|
573
|
+
if (idx < 0)
|
|
574
|
+
{
|
|
575
|
+
idx += Length;
|
|
576
|
+
}
|
|
577
|
+
|
|
508
578
|
v[idx] += value;
|
|
509
579
|
}
|
|
510
580
|
|
|
511
581
|
|
|
582
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
583
|
+
inline CUDA_CALLABLE void add_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
584
|
+
{
|
|
585
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
586
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
587
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
588
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
589
|
+
|
|
590
|
+
bool is_reversed = slice.step < 0;
|
|
591
|
+
|
|
592
|
+
int ii = 0;
|
|
593
|
+
for (
|
|
594
|
+
int i = slice.start;
|
|
595
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
596
|
+
i += slice.step
|
|
597
|
+
)
|
|
598
|
+
{
|
|
599
|
+
v[i] += a[ii];
|
|
600
|
+
++ii;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
assert(ii == SliceLength);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
|
|
512
607
|
template<unsigned Length, typename Type>
|
|
513
608
|
inline CUDA_CALLABLE void adj_add_inplace(vec_t<Length, Type>& v, int idx, Type value,
|
|
514
609
|
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
|
|
515
610
|
{
|
|
516
611
|
#ifndef NDEBUG
|
|
517
|
-
if (idx <
|
|
612
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
518
613
|
{
|
|
519
614
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
520
615
|
assert(0);
|
|
521
616
|
}
|
|
522
617
|
#endif
|
|
523
618
|
|
|
619
|
+
if (idx < 0)
|
|
620
|
+
{
|
|
621
|
+
idx += Length;
|
|
622
|
+
}
|
|
623
|
+
|
|
524
624
|
adj_value += adj_v[idx];
|
|
525
625
|
}
|
|
526
626
|
|
|
527
627
|
|
|
628
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
629
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
630
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
631
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
632
|
+
)
|
|
633
|
+
{
|
|
634
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
635
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
636
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
637
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
638
|
+
|
|
639
|
+
bool is_reversed = slice.step < 0;
|
|
640
|
+
|
|
641
|
+
int ii = 0;
|
|
642
|
+
for (
|
|
643
|
+
int i = slice.start;
|
|
644
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
645
|
+
i += slice.step
|
|
646
|
+
)
|
|
647
|
+
{
|
|
648
|
+
adj_a[ii] += adj_v[i];
|
|
649
|
+
++ii;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
assert(ii == SliceLength);
|
|
653
|
+
}
|
|
654
|
+
|
|
655
|
+
|
|
528
656
|
template<unsigned Length, typename Type>
|
|
529
657
|
inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
530
658
|
{
|
|
531
659
|
#ifndef NDEBUG
|
|
532
|
-
if (idx <
|
|
660
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
533
661
|
{
|
|
534
662
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
535
663
|
assert(0);
|
|
536
664
|
}
|
|
537
665
|
#endif
|
|
538
666
|
|
|
667
|
+
if (idx < 0)
|
|
668
|
+
{
|
|
669
|
+
idx += Length;
|
|
670
|
+
}
|
|
671
|
+
|
|
539
672
|
v[idx] -= value;
|
|
540
673
|
}
|
|
541
674
|
|
|
542
675
|
|
|
676
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
677
|
+
inline CUDA_CALLABLE void sub_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
678
|
+
{
|
|
679
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
680
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
681
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
682
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
683
|
+
|
|
684
|
+
bool is_reversed = slice.step < 0;
|
|
685
|
+
|
|
686
|
+
int ii = 0;
|
|
687
|
+
for (
|
|
688
|
+
int i = slice.start;
|
|
689
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
690
|
+
i += slice.step
|
|
691
|
+
)
|
|
692
|
+
{
|
|
693
|
+
v[i] -= a[ii];
|
|
694
|
+
++ii;
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
assert(ii == SliceLength);
|
|
698
|
+
}
|
|
699
|
+
|
|
700
|
+
|
|
543
701
|
template<unsigned Length, typename Type>
|
|
544
702
|
inline CUDA_CALLABLE void adj_sub_inplace(vec_t<Length, Type>& v, int idx, Type value,
|
|
545
703
|
vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value)
|
|
546
704
|
{
|
|
547
705
|
#ifndef NDEBUG
|
|
548
|
-
if (idx <
|
|
706
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
549
707
|
{
|
|
550
708
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
551
709
|
assert(0);
|
|
552
710
|
}
|
|
553
711
|
#endif
|
|
554
712
|
|
|
713
|
+
if (idx < 0)
|
|
714
|
+
{
|
|
715
|
+
idx += Length;
|
|
716
|
+
}
|
|
717
|
+
|
|
555
718
|
adj_value -= adj_v[idx];
|
|
556
719
|
}
|
|
557
720
|
|
|
558
721
|
|
|
722
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
723
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
724
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
725
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
726
|
+
)
|
|
727
|
+
{
|
|
728
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
729
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
730
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
731
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
732
|
+
|
|
733
|
+
bool is_reversed = slice.step < 0;
|
|
734
|
+
|
|
735
|
+
int ii = 0;
|
|
736
|
+
for (
|
|
737
|
+
int i = slice.start;
|
|
738
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
739
|
+
i += slice.step
|
|
740
|
+
)
|
|
741
|
+
{
|
|
742
|
+
adj_a[ii] -= adj_v[i];
|
|
743
|
+
++ii;
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
assert(ii == SliceLength);
|
|
747
|
+
}
|
|
748
|
+
|
|
749
|
+
|
|
559
750
|
template<unsigned Length, typename Type>
|
|
560
751
|
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
|
|
561
752
|
{
|
|
562
753
|
#ifndef NDEBUG
|
|
563
|
-
if (idx <
|
|
754
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
564
755
|
{
|
|
565
756
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
566
757
|
assert(0);
|
|
567
758
|
}
|
|
568
759
|
#endif
|
|
569
760
|
|
|
761
|
+
if (idx < 0)
|
|
762
|
+
{
|
|
763
|
+
idx += Length;
|
|
764
|
+
}
|
|
765
|
+
|
|
570
766
|
v[idx] = value;
|
|
571
767
|
}
|
|
572
768
|
|
|
769
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
770
|
+
inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
771
|
+
{
|
|
772
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
773
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
774
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
775
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
776
|
+
|
|
777
|
+
bool is_reversed = slice.step < 0;
|
|
778
|
+
|
|
779
|
+
int ii = 0;
|
|
780
|
+
for (
|
|
781
|
+
int i = slice.start;
|
|
782
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
783
|
+
i += slice.step
|
|
784
|
+
)
|
|
785
|
+
{
|
|
786
|
+
v[i] = a[ii];
|
|
787
|
+
++ii;
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
assert(ii == SliceLength);
|
|
791
|
+
}
|
|
792
|
+
|
|
573
793
|
template<unsigned Length, typename Type>
|
|
574
794
|
inline CUDA_CALLABLE void adj_assign_inplace(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value)
|
|
575
795
|
{
|
|
576
796
|
#ifndef NDEBUG
|
|
577
|
-
if (idx <
|
|
797
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
578
798
|
{
|
|
579
799
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
580
800
|
assert(0);
|
|
581
801
|
}
|
|
582
802
|
#endif
|
|
583
803
|
|
|
804
|
+
if (idx < 0)
|
|
805
|
+
{
|
|
806
|
+
idx += Length;
|
|
807
|
+
}
|
|
808
|
+
|
|
584
809
|
adj_value += adj_v[idx];
|
|
585
810
|
}
|
|
586
811
|
|
|
812
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
813
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
814
|
+
const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
815
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
816
|
+
)
|
|
817
|
+
{
|
|
818
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
819
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
820
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
821
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
822
|
+
|
|
823
|
+
bool is_reversed = slice.step < 0;
|
|
824
|
+
|
|
825
|
+
int ii = 0;
|
|
826
|
+
for (
|
|
827
|
+
int i = slice.start;
|
|
828
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
829
|
+
i += slice.step
|
|
830
|
+
)
|
|
831
|
+
{
|
|
832
|
+
adj_a[ii] += adj_v[i];
|
|
833
|
+
++ii;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
assert(ii == SliceLength);
|
|
837
|
+
}
|
|
838
|
+
|
|
587
839
|
|
|
588
840
|
template<unsigned Length, typename Type>
|
|
589
841
|
inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, int idx, Type value)
|
|
590
842
|
{
|
|
591
843
|
#ifndef NDEBUG
|
|
592
|
-
if (idx <
|
|
844
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
593
845
|
{
|
|
594
846
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
595
847
|
assert(0);
|
|
596
848
|
}
|
|
597
849
|
#endif
|
|
598
850
|
|
|
851
|
+
if (idx < 0)
|
|
852
|
+
{
|
|
853
|
+
idx += Length;
|
|
854
|
+
}
|
|
855
|
+
|
|
599
856
|
vec_t<Length, Type> ret(v);
|
|
600
857
|
ret[idx] = value;
|
|
601
858
|
return ret;
|
|
602
859
|
}
|
|
603
860
|
|
|
861
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
862
|
+
inline CUDA_CALLABLE vec_t<Length, Type> assign_copy(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
863
|
+
{
|
|
864
|
+
vec_t<Length, Type> ret(v);
|
|
865
|
+
assign_inplace<SliceLength>(ret, slice, a);
|
|
866
|
+
return ret;
|
|
867
|
+
}
|
|
868
|
+
|
|
604
869
|
template<unsigned Length, typename Type>
|
|
605
870
|
inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type value, vec_t<Length, Type>& adj_v, int& adj_idx, Type& adj_value, const vec_t<Length, Type>& adj_ret)
|
|
606
871
|
{
|
|
607
872
|
#ifndef NDEBUG
|
|
608
|
-
if (idx <
|
|
873
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
609
874
|
{
|
|
610
875
|
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
611
876
|
assert(0);
|
|
612
877
|
}
|
|
613
878
|
#endif
|
|
614
879
|
|
|
880
|
+
if (idx < 0)
|
|
881
|
+
{
|
|
882
|
+
idx += Length;
|
|
883
|
+
}
|
|
884
|
+
|
|
615
885
|
adj_value += adj_ret[idx];
|
|
616
886
|
for(unsigned i=0; i < Length; ++i)
|
|
617
887
|
{
|
|
@@ -620,6 +890,40 @@ inline CUDA_CALLABLE void adj_assign_copy(vec_t<Length, Type>& v, int idx, Type
|
|
|
620
890
|
}
|
|
621
891
|
}
|
|
622
892
|
|
|
893
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
894
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
895
|
+
vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
896
|
+
vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
|
|
897
|
+
const vec_t<Length, Type>& adj_ret)
|
|
898
|
+
{
|
|
899
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
900
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
901
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
902
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
903
|
+
|
|
904
|
+
bool is_reversed = slice.step < 0;
|
|
905
|
+
|
|
906
|
+
int ii = 0;
|
|
907
|
+
for (int i = 0; i < Length; ++i)
|
|
908
|
+
{
|
|
909
|
+
bool in_slice = is_reversed
|
|
910
|
+
? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
|
|
911
|
+
: (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
|
|
912
|
+
|
|
913
|
+
if (!in_slice)
|
|
914
|
+
{
|
|
915
|
+
adj_v[i] += adj_ret[i];
|
|
916
|
+
}
|
|
917
|
+
else
|
|
918
|
+
{
|
|
919
|
+
adj_a[ii] += adj_ret[i];
|
|
920
|
+
++ii;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
assert(ii == SliceLength);
|
|
925
|
+
}
|
|
926
|
+
|
|
623
927
|
template<unsigned Length, typename Type>
|
|
624
928
|
inline CUDA_CALLABLE Type length(vec_t<Length, Type> a)
|
|
625
929
|
{
|
|
@@ -969,11 +1273,11 @@ template<unsigned Length, typename Type>
|
|
|
969
1273
|
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
970
1274
|
{
|
|
971
1275
|
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
for( unsigned i=0; i < Length; ++i )
|
|
1276
|
+
for (unsigned i=0; i < Length; ++i)
|
|
975
1277
|
{
|
|
976
|
-
|
|
1278
|
+
Type inv = Type(1) / a[i];
|
|
1279
|
+
adj_a[i] -= s * adj_ret[i] * inv * inv;
|
|
1280
|
+
adj_s += adj_ret[i] * inv;
|
|
977
1281
|
}
|
|
978
1282
|
|
|
979
1283
|
#if FP_CHECK
|
|
@@ -999,6 +1303,21 @@ inline CUDA_CALLABLE void adj_add(vec_t<Length, Type> a, vec_t<Length, Type> b,
|
|
|
999
1303
|
adj_b += adj_ret;
|
|
1000
1304
|
}
|
|
1001
1305
|
|
|
1306
|
+
template<unsigned Length, typename Type>
|
|
1307
|
+
inline CUDA_CALLABLE void adj_add(
|
|
1308
|
+
Type a, vec_t<Length, Type> b,
|
|
1309
|
+
Type& adj_a, vec_t<Length, Type>& adj_b,
|
|
1310
|
+
const vec_t<Length, Type>& adj_ret
|
|
1311
|
+
)
|
|
1312
|
+
{
|
|
1313
|
+
for (unsigned i = 0; i < Length; ++i)
|
|
1314
|
+
{
|
|
1315
|
+
adj_a += adj_ret.c[i];
|
|
1316
|
+
}
|
|
1317
|
+
|
|
1318
|
+
adj_b += adj_ret;
|
|
1319
|
+
}
|
|
1320
|
+
|
|
1002
1321
|
template<typename Type>
|
|
1003
1322
|
inline CUDA_CALLABLE void adj_add(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1004
1323
|
{
|
|
@@ -1026,6 +1345,21 @@ inline CUDA_CALLABLE void adj_sub(vec_t<Length, Type> a, vec_t<Length, Type> b,
|
|
|
1026
1345
|
adj_b -= adj_ret;
|
|
1027
1346
|
}
|
|
1028
1347
|
|
|
1348
|
+
template<unsigned Length, typename Type>
|
|
1349
|
+
inline CUDA_CALLABLE void adj_sub(
|
|
1350
|
+
Type a, vec_t<Length, Type> b,
|
|
1351
|
+
Type& adj_a, vec_t<Length, Type>& adj_b,
|
|
1352
|
+
const vec_t<Length, Type>& adj_ret
|
|
1353
|
+
)
|
|
1354
|
+
{
|
|
1355
|
+
for (unsigned i = 0; i < Length; ++i)
|
|
1356
|
+
{
|
|
1357
|
+
adj_a += adj_ret.c[i];
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
adj_b -= adj_ret;
|
|
1361
|
+
}
|
|
1362
|
+
|
|
1029
1363
|
template<typename Type>
|
|
1030
1364
|
inline CUDA_CALLABLE void adj_sub(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
|
|
1031
1365
|
{
|
|
@@ -1106,16 +1440,49 @@ template<unsigned Length, typename Type>
|
|
|
1106
1440
|
inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
|
|
1107
1441
|
{
|
|
1108
1442
|
#ifndef NDEBUG
|
|
1109
|
-
if (idx <
|
|
1443
|
+
if (idx < -(int)Length || idx >= (int)Length)
|
|
1110
1444
|
{
|
|
1111
|
-
printf("
|
|
1445
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
1112
1446
|
assert(0);
|
|
1113
1447
|
}
|
|
1114
1448
|
#endif
|
|
1115
1449
|
|
|
1450
|
+
if (idx < 0)
|
|
1451
|
+
{
|
|
1452
|
+
idx += Length;
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1116
1455
|
adj_a[idx] += adj_ret;
|
|
1117
1456
|
}
|
|
1118
1457
|
|
|
1458
|
+
template<unsigned SliceLength, unsigned Length, typename Type>
|
|
1459
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
1460
|
+
const vec_t<Length, Type>& a, slice_t slice,
|
|
1461
|
+
vec_t<Length, Type>& adj_a, slice_t& adj_slice,
|
|
1462
|
+
const vec_t<SliceLength, Type>& adj_ret
|
|
1463
|
+
)
|
|
1464
|
+
{
|
|
1465
|
+
assert(slice.start >= 0 && slice.start <= (int)Length);
|
|
1466
|
+
assert(slice.stop >= -1 && slice.stop <= (int)Length);
|
|
1467
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
1468
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
1469
|
+
|
|
1470
|
+
bool is_reversed = slice.step < 0;
|
|
1471
|
+
|
|
1472
|
+
int ii = 0;
|
|
1473
|
+
for (
|
|
1474
|
+
int i = slice.start;
|
|
1475
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
1476
|
+
i += slice.step
|
|
1477
|
+
)
|
|
1478
|
+
{
|
|
1479
|
+
adj_a[i] += adj_ret[ii];
|
|
1480
|
+
++ii;
|
|
1481
|
+
}
|
|
1482
|
+
|
|
1483
|
+
assert(ii == SliceLength);
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1119
1486
|
template<unsigned Length, typename Type>
|
|
1120
1487
|
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
1121
1488
|
{
|