warp-lang 1.8.1__py3-none-manylinux_2_34_aarch64.whl → 1.9.1__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +282 -103
- warp/__init__.pyi +1904 -114
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +93 -30
- warp/build_dll.py +331 -101
- warp/builtins.py +1244 -160
- warp/codegen.py +317 -206
- warp/config.py +1 -1
- warp/context.py +1465 -789
- warp/examples/core/example_marching_cubes.py +1 -0
- warp/examples/core/example_render_opengl.py +100 -3
- warp/examples/fem/example_apic_fluid.py +98 -52
- warp/examples/fem/example_convection_diffusion_dg.py +25 -4
- warp/examples/fem/example_diffusion_mgpu.py +8 -3
- warp/examples/fem/utils.py +68 -22
- warp/examples/interop/example_jax_kernel.py +2 -1
- warp/fabric.py +1 -1
- warp/fem/cache.py +27 -19
- warp/fem/domain.py +2 -2
- warp/fem/field/nodal_field.py +2 -2
- warp/fem/field/virtual.py +264 -166
- warp/fem/geometry/geometry.py +5 -5
- warp/fem/integrate.py +129 -51
- warp/fem/space/restriction.py +4 -0
- warp/fem/space/shape/tet_shape_function.py +3 -10
- warp/jax_experimental/custom_call.py +25 -2
- warp/jax_experimental/ffi.py +22 -1
- warp/jax_experimental/xla_ffi.py +16 -7
- warp/marching_cubes.py +708 -0
- warp/native/array.h +99 -4
- warp/native/builtin.h +86 -9
- warp/native/bvh.cpp +64 -28
- warp/native/bvh.cu +58 -58
- warp/native/bvh.h +2 -2
- warp/native/clang/clang.cpp +7 -7
- warp/native/coloring.cpp +8 -2
- warp/native/crt.cpp +2 -2
- warp/native/crt.h +3 -5
- warp/native/cuda_util.cpp +41 -10
- warp/native/cuda_util.h +10 -4
- warp/native/exports.h +1842 -1908
- warp/native/fabric.h +2 -1
- warp/native/hashgrid.cpp +37 -37
- warp/native/hashgrid.cu +2 -2
- warp/native/initializer_array.h +1 -1
- warp/native/intersect.h +2 -2
- warp/native/mat.h +1910 -116
- warp/native/mathdx.cpp +43 -43
- warp/native/mesh.cpp +24 -24
- warp/native/mesh.cu +26 -26
- warp/native/mesh.h +4 -2
- warp/native/nanovdb/GridHandle.h +179 -12
- warp/native/nanovdb/HostBuffer.h +8 -7
- warp/native/nanovdb/NanoVDB.h +517 -895
- warp/native/nanovdb/NodeManager.h +323 -0
- warp/native/nanovdb/PNanoVDB.h +2 -2
- warp/native/quat.h +331 -14
- warp/native/range.h +7 -1
- warp/native/reduce.cpp +10 -10
- warp/native/reduce.cu +13 -14
- warp/native/runlength_encode.cpp +2 -2
- warp/native/runlength_encode.cu +5 -5
- warp/native/scan.cpp +3 -3
- warp/native/scan.cu +4 -4
- warp/native/sort.cpp +10 -10
- warp/native/sort.cu +40 -31
- warp/native/sort.h +2 -0
- warp/native/sparse.cpp +8 -8
- warp/native/sparse.cu +13 -13
- warp/native/spatial.h +366 -17
- warp/native/temp_buffer.h +2 -2
- warp/native/tile.h +471 -82
- warp/native/vec.h +328 -14
- warp/native/volume.cpp +54 -54
- warp/native/volume.cu +1 -1
- warp/native/volume.h +2 -1
- warp/native/volume_builder.cu +30 -37
- warp/native/warp.cpp +150 -149
- warp/native/warp.cu +377 -216
- warp/native/warp.h +227 -226
- warp/optim/linear.py +736 -271
- warp/render/imgui_manager.py +289 -0
- warp/render/render_opengl.py +99 -18
- warp/render/render_usd.py +1 -0
- warp/sim/graph_coloring.py +2 -2
- warp/sparse.py +558 -175
- warp/tests/aux_test_module_aot.py +7 -0
- warp/tests/cuda/test_async.py +3 -3
- warp/tests/cuda/test_conditional_captures.py +101 -0
- warp/tests/geometry/test_hash_grid.py +38 -0
- warp/tests/geometry/test_marching_cubes.py +233 -12
- warp/tests/interop/test_jax.py +608 -28
- warp/tests/sim/test_coloring.py +6 -6
- warp/tests/test_array.py +58 -5
- warp/tests/test_codegen.py +4 -3
- warp/tests/test_context.py +8 -15
- warp/tests/test_enum.py +136 -0
- warp/tests/test_examples.py +2 -2
- warp/tests/test_fem.py +49 -6
- warp/tests/test_fixedarray.py +229 -0
- warp/tests/test_func.py +18 -15
- warp/tests/test_future_annotations.py +7 -5
- warp/tests/test_linear_solvers.py +30 -0
- warp/tests/test_map.py +15 -1
- warp/tests/test_mat.py +1518 -378
- warp/tests/test_mat_assign_copy.py +178 -0
- warp/tests/test_mat_constructors.py +574 -0
- warp/tests/test_module_aot.py +287 -0
- warp/tests/test_print.py +69 -0
- warp/tests/test_quat.py +140 -34
- warp/tests/test_quat_assign_copy.py +145 -0
- warp/tests/test_reload.py +2 -1
- warp/tests/test_sparse.py +71 -0
- warp/tests/test_spatial.py +140 -34
- warp/tests/test_spatial_assign_copy.py +160 -0
- warp/tests/test_struct.py +43 -3
- warp/tests/test_tuple.py +96 -0
- warp/tests/test_types.py +61 -20
- warp/tests/test_vec.py +179 -34
- warp/tests/test_vec_assign_copy.py +143 -0
- warp/tests/tile/test_tile.py +245 -18
- warp/tests/tile/test_tile_cholesky.py +605 -0
- warp/tests/tile/test_tile_load.py +169 -0
- warp/tests/tile/test_tile_mathdx.py +2 -558
- warp/tests/tile/test_tile_matmul.py +1 -1
- warp/tests/tile/test_tile_mlp.py +1 -1
- warp/tests/tile/test_tile_shared_memory.py +5 -5
- warp/tests/unittest_suites.py +6 -0
- warp/tests/walkthrough_debug.py +1 -1
- warp/thirdparty/unittest_parallel.py +108 -9
- warp/types.py +571 -267
- warp/utils.py +68 -86
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/METADATA +29 -69
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/RECORD +138 -128
- warp/native/marching.cpp +0 -19
- warp/native/marching.cu +0 -514
- warp/native/marching.h +0 -19
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/WHEEL +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.8.1.dist-info → warp_lang-1.9.1.dist-info}/top_level.txt +0 -0
warp/native/spatial.h
CHANGED
|
@@ -34,7 +34,7 @@ CUDA_CALLABLE inline Type spatial_dot(const spatial_vector_t<Type>& a, const spa
|
|
|
34
34
|
template<typename Type>
|
|
35
35
|
CUDA_CALLABLE inline vec_t<3,Type> &w_vec( spatial_vector_t<Type>& a )
|
|
36
36
|
{
|
|
37
|
-
return *
|
|
37
|
+
return *reinterpret_cast<vec_t<3,Type>*>(&a);
|
|
38
38
|
}
|
|
39
39
|
|
|
40
40
|
template<typename Type>
|
|
@@ -46,14 +46,14 @@ CUDA_CALLABLE inline vec_t<3,Type> &v_vec( spatial_vector_t<Type>& a )
|
|
|
46
46
|
template<typename Type>
|
|
47
47
|
CUDA_CALLABLE inline const vec_t<3,Type> &w_vec( const spatial_vector_t<Type>& a )
|
|
48
48
|
{
|
|
49
|
-
spatial_vector_t<Type> &non_const_vec = *
|
|
49
|
+
spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
|
|
50
50
|
return w_vec(non_const_vec);
|
|
51
51
|
}
|
|
52
52
|
|
|
53
53
|
template<typename Type>
|
|
54
54
|
CUDA_CALLABLE inline const vec_t<3,Type> &v_vec( const spatial_vector_t<Type>& a )
|
|
55
55
|
{
|
|
56
|
-
spatial_vector_t<Type> &non_const_vec = *
|
|
56
|
+
spatial_vector_t<Type> &non_const_vec = *reinterpret_cast<spatial_vector_t<Type>*>(const_cast<Type*>(&a.c[0]));
|
|
57
57
|
return v_vec(non_const_vec);
|
|
58
58
|
}
|
|
59
59
|
|
|
@@ -408,27 +408,64 @@ template<typename Type>
|
|
|
408
408
|
inline CUDA_CALLABLE Type extract(const transform_t<Type>& t, int idx)
|
|
409
409
|
{
|
|
410
410
|
#ifndef NDEBUG
|
|
411
|
-
if (idx <
|
|
411
|
+
if (idx < -7 || idx >= 7)
|
|
412
412
|
{
|
|
413
413
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
414
414
|
assert(0);
|
|
415
415
|
}
|
|
416
416
|
#endif
|
|
417
|
-
|
|
417
|
+
|
|
418
|
+
if (idx < 0)
|
|
419
|
+
{
|
|
420
|
+
idx += 7;
|
|
421
|
+
}
|
|
422
|
+
|
|
418
423
|
return t[idx];
|
|
419
424
|
}
|
|
420
425
|
|
|
426
|
+
template<unsigned SliceLength, typename Type>
|
|
427
|
+
inline CUDA_CALLABLE vec_t<SliceLength, Type> extract(const transform_t<Type> & t, slice_t slice)
|
|
428
|
+
{
|
|
429
|
+
vec_t<SliceLength, Type> ret;
|
|
430
|
+
|
|
431
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
432
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
433
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
434
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
435
|
+
|
|
436
|
+
bool is_reversed = slice.step < 0;
|
|
437
|
+
|
|
438
|
+
int ii = 0;
|
|
439
|
+
for (
|
|
440
|
+
int i = slice.start;
|
|
441
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
442
|
+
i += slice.step
|
|
443
|
+
)
|
|
444
|
+
{
|
|
445
|
+
ret[ii] = t[i];
|
|
446
|
+
++ii;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
assert(ii == SliceLength);
|
|
450
|
+
return ret;
|
|
451
|
+
}
|
|
452
|
+
|
|
421
453
|
template<typename Type>
|
|
422
454
|
inline CUDA_CALLABLE Type* index(transform_t<Type>& t, int idx)
|
|
423
455
|
{
|
|
424
456
|
#ifndef NDEBUG
|
|
425
|
-
if (idx <
|
|
457
|
+
if (idx < -7 || idx >= 7)
|
|
426
458
|
{
|
|
427
459
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
428
460
|
assert(0);
|
|
429
461
|
}
|
|
430
462
|
#endif
|
|
431
463
|
|
|
464
|
+
if (idx < 0)
|
|
465
|
+
{
|
|
466
|
+
idx += 7;
|
|
467
|
+
}
|
|
468
|
+
|
|
432
469
|
return &t[idx];
|
|
433
470
|
}
|
|
434
471
|
|
|
@@ -436,13 +473,18 @@ template<typename Type>
|
|
|
436
473
|
inline CUDA_CALLABLE Type* indexref(transform_t<Type>* t, int idx)
|
|
437
474
|
{
|
|
438
475
|
#ifndef NDEBUG
|
|
439
|
-
if (idx <
|
|
476
|
+
if (idx < -7 || idx >= 7)
|
|
440
477
|
{
|
|
441
|
-
printf("transformation
|
|
478
|
+
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
442
479
|
assert(0);
|
|
443
480
|
}
|
|
444
481
|
#endif
|
|
445
482
|
|
|
483
|
+
if (idx < 0)
|
|
484
|
+
{
|
|
485
|
+
idx += 7;
|
|
486
|
+
}
|
|
487
|
+
|
|
446
488
|
return &((*t)[idx]);
|
|
447
489
|
}
|
|
448
490
|
|
|
@@ -452,6 +494,34 @@ inline void CUDA_CALLABLE adj_extract(const transform_t<Type>& t, int idx, trans
|
|
|
452
494
|
adj_t[idx] += adj_ret;
|
|
453
495
|
}
|
|
454
496
|
|
|
497
|
+
template<unsigned SliceLength, typename Type>
|
|
498
|
+
inline CUDA_CALLABLE void adj_extract(
|
|
499
|
+
const transform_t<Type>& t, slice_t slice,
|
|
500
|
+
transform_t<Type>& adj_t, slice_t& adj_slice,
|
|
501
|
+
const vec_t<SliceLength, Type>& adj_ret
|
|
502
|
+
)
|
|
503
|
+
{
|
|
504
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
505
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
506
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
507
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
508
|
+
|
|
509
|
+
bool is_reversed = slice.step < 0;
|
|
510
|
+
|
|
511
|
+
int ii = 0;
|
|
512
|
+
for (
|
|
513
|
+
int i = slice.start;
|
|
514
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
515
|
+
i += slice.step
|
|
516
|
+
)
|
|
517
|
+
{
|
|
518
|
+
adj_t[i] += adj_ret[ii];
|
|
519
|
+
++ii;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
assert(ii == SliceLength);
|
|
523
|
+
}
|
|
524
|
+
|
|
455
525
|
template<typename Type>
|
|
456
526
|
inline CUDA_CALLABLE void adj_index(transform_t<Type>& t, int idx,
|
|
457
527
|
transform_t<Type>& adj_t, int adj_idx, const Type& adj_value)
|
|
@@ -470,120 +540,325 @@ template<typename Type>
|
|
|
470
540
|
inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, int idx, Type value)
|
|
471
541
|
{
|
|
472
542
|
#ifndef NDEBUG
|
|
473
|
-
if (idx <
|
|
543
|
+
if (idx < -7 || idx >= 7)
|
|
474
544
|
{
|
|
475
545
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
476
546
|
assert(0);
|
|
477
547
|
}
|
|
478
548
|
#endif
|
|
479
549
|
|
|
550
|
+
if (idx < 0)
|
|
551
|
+
{
|
|
552
|
+
idx += 7;
|
|
553
|
+
}
|
|
554
|
+
|
|
480
555
|
t[idx] += value;
|
|
481
556
|
}
|
|
482
557
|
|
|
483
558
|
|
|
559
|
+
template<unsigned SliceLength, typename Type>
|
|
560
|
+
inline CUDA_CALLABLE void add_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
561
|
+
{
|
|
562
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
563
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
564
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
565
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
566
|
+
|
|
567
|
+
bool is_reversed = slice.step < 0;
|
|
568
|
+
|
|
569
|
+
int ii = 0;
|
|
570
|
+
for (
|
|
571
|
+
int i = slice.start;
|
|
572
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
573
|
+
i += slice.step
|
|
574
|
+
)
|
|
575
|
+
{
|
|
576
|
+
t[i] += a[ii];
|
|
577
|
+
++ii;
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
assert(ii == SliceLength);
|
|
581
|
+
}
|
|
582
|
+
|
|
583
|
+
|
|
484
584
|
template<typename Type>
|
|
485
585
|
inline CUDA_CALLABLE void adj_add_inplace(transform_t<Type>& t, int idx, Type value,
|
|
486
586
|
transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
|
|
487
587
|
{
|
|
488
588
|
#ifndef NDEBUG
|
|
489
|
-
if (idx <
|
|
589
|
+
if (idx < -7 || idx >= 7)
|
|
490
590
|
{
|
|
491
591
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
492
592
|
assert(0);
|
|
493
593
|
}
|
|
494
594
|
#endif
|
|
495
595
|
|
|
596
|
+
if (idx < 0)
|
|
597
|
+
{
|
|
598
|
+
idx += 7;
|
|
599
|
+
}
|
|
600
|
+
|
|
496
601
|
adj_value += adj_t[idx];
|
|
497
602
|
}
|
|
498
603
|
|
|
499
604
|
|
|
605
|
+
template<unsigned SliceLength, typename Type>
|
|
606
|
+
inline CUDA_CALLABLE void adj_add_inplace(
|
|
607
|
+
const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
608
|
+
transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
609
|
+
)
|
|
610
|
+
{
|
|
611
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
612
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
613
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
614
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
615
|
+
|
|
616
|
+
bool is_reversed = slice.step < 0;
|
|
617
|
+
|
|
618
|
+
int ii = 0;
|
|
619
|
+
for (
|
|
620
|
+
int i = slice.start;
|
|
621
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
622
|
+
i += slice.step
|
|
623
|
+
)
|
|
624
|
+
{
|
|
625
|
+
adj_a[ii] += adj_t[i];
|
|
626
|
+
++ii;
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
assert(ii == SliceLength);
|
|
630
|
+
}
|
|
631
|
+
|
|
632
|
+
|
|
500
633
|
template<typename Type>
|
|
501
634
|
inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, int idx, Type value)
|
|
502
635
|
{
|
|
503
636
|
#ifndef NDEBUG
|
|
504
|
-
if (idx <
|
|
637
|
+
if (idx < -7 || idx >= 7)
|
|
505
638
|
{
|
|
506
639
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
507
640
|
assert(0);
|
|
508
641
|
}
|
|
509
642
|
#endif
|
|
510
643
|
|
|
644
|
+
if (idx < 0)
|
|
645
|
+
{
|
|
646
|
+
idx += 7;
|
|
647
|
+
}
|
|
648
|
+
|
|
511
649
|
t[idx] -= value;
|
|
512
650
|
}
|
|
513
651
|
|
|
514
652
|
|
|
653
|
+
template<unsigned SliceLength, typename Type>
|
|
654
|
+
inline CUDA_CALLABLE void sub_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
655
|
+
{
|
|
656
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
657
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
658
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
659
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
660
|
+
|
|
661
|
+
bool is_reversed = slice.step < 0;
|
|
662
|
+
|
|
663
|
+
int ii = 0;
|
|
664
|
+
for (
|
|
665
|
+
int i = slice.start;
|
|
666
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
667
|
+
i += slice.step
|
|
668
|
+
)
|
|
669
|
+
{
|
|
670
|
+
t[i] -= a[ii];
|
|
671
|
+
++ii;
|
|
672
|
+
}
|
|
673
|
+
|
|
674
|
+
assert(ii == SliceLength);
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
|
|
515
678
|
template<typename Type>
|
|
516
679
|
inline CUDA_CALLABLE void adj_sub_inplace(transform_t<Type>& t, int idx, Type value,
|
|
517
680
|
transform_t<Type>& adj_t, int adj_idx, Type& adj_value)
|
|
518
681
|
{
|
|
519
682
|
#ifndef NDEBUG
|
|
520
|
-
if (idx <
|
|
683
|
+
if (idx < -7 || idx >= 7)
|
|
521
684
|
{
|
|
522
685
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
523
686
|
assert(0);
|
|
524
687
|
}
|
|
525
688
|
#endif
|
|
526
689
|
|
|
690
|
+
if (idx < 0)
|
|
691
|
+
{
|
|
692
|
+
idx += 7;
|
|
693
|
+
}
|
|
694
|
+
|
|
527
695
|
adj_value -= adj_t[idx];
|
|
528
696
|
}
|
|
529
697
|
|
|
530
698
|
|
|
699
|
+
template<unsigned SliceLength, typename Type>
|
|
700
|
+
inline CUDA_CALLABLE void adj_sub_inplace(
|
|
701
|
+
const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
702
|
+
transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
703
|
+
)
|
|
704
|
+
{
|
|
705
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
706
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
707
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
708
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
709
|
+
|
|
710
|
+
bool is_reversed = slice.step < 0;
|
|
711
|
+
|
|
712
|
+
int ii = 0;
|
|
713
|
+
for (
|
|
714
|
+
int i = slice.start;
|
|
715
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
716
|
+
i += slice.step
|
|
717
|
+
)
|
|
718
|
+
{
|
|
719
|
+
adj_a[ii] -= adj_t[i];
|
|
720
|
+
++ii;
|
|
721
|
+
}
|
|
722
|
+
|
|
723
|
+
assert(ii == SliceLength);
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
|
|
531
727
|
template<typename Type>
|
|
532
728
|
inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, int idx, Type value)
|
|
533
729
|
{
|
|
534
730
|
#ifndef NDEBUG
|
|
535
|
-
if (idx <
|
|
731
|
+
if (idx < -7 || idx >= 7)
|
|
536
732
|
{
|
|
537
733
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
538
734
|
assert(0);
|
|
539
735
|
}
|
|
540
736
|
#endif
|
|
541
737
|
|
|
738
|
+
if (idx < 0)
|
|
739
|
+
{
|
|
740
|
+
idx += 7;
|
|
741
|
+
}
|
|
742
|
+
|
|
542
743
|
t[idx] = value;
|
|
543
744
|
}
|
|
544
745
|
|
|
746
|
+
template<unsigned SliceLength, typename Type>
|
|
747
|
+
inline CUDA_CALLABLE void assign_inplace(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
748
|
+
{
|
|
749
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
750
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
751
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
752
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
753
|
+
|
|
754
|
+
bool is_reversed = slice.step < 0;
|
|
755
|
+
|
|
756
|
+
int ii = 0;
|
|
757
|
+
for (
|
|
758
|
+
int i = slice.start;
|
|
759
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
760
|
+
i += slice.step
|
|
761
|
+
)
|
|
762
|
+
{
|
|
763
|
+
t[i] = a[ii];
|
|
764
|
+
++ii;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
assert(ii == SliceLength);
|
|
768
|
+
}
|
|
769
|
+
|
|
545
770
|
template<typename Type>
|
|
546
771
|
inline CUDA_CALLABLE void adj_assign_inplace(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value)
|
|
547
772
|
{
|
|
548
773
|
#ifndef NDEBUG
|
|
549
|
-
if (idx <
|
|
774
|
+
if (idx < -7 || idx >= 7)
|
|
550
775
|
{
|
|
551
776
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
552
777
|
assert(0);
|
|
553
778
|
}
|
|
554
779
|
#endif
|
|
555
780
|
|
|
781
|
+
if (idx < 0)
|
|
782
|
+
{
|
|
783
|
+
idx += 7;
|
|
784
|
+
}
|
|
785
|
+
|
|
556
786
|
adj_value += adj_t[idx];
|
|
557
787
|
}
|
|
558
788
|
|
|
789
|
+
template<unsigned SliceLength, typename Type>
|
|
790
|
+
inline CUDA_CALLABLE void adj_assign_inplace(
|
|
791
|
+
const transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
792
|
+
transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
|
|
793
|
+
)
|
|
794
|
+
{
|
|
795
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
796
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
797
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
798
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
799
|
+
|
|
800
|
+
bool is_reversed = slice.step < 0;
|
|
801
|
+
|
|
802
|
+
int ii = 0;
|
|
803
|
+
for (
|
|
804
|
+
int i = slice.start;
|
|
805
|
+
is_reversed ? (i > slice.stop) : (i < slice.stop);
|
|
806
|
+
i += slice.step
|
|
807
|
+
)
|
|
808
|
+
{
|
|
809
|
+
adj_a[ii] += adj_t[i];
|
|
810
|
+
++ii;
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
assert(ii == SliceLength);
|
|
814
|
+
}
|
|
815
|
+
|
|
559
816
|
|
|
560
817
|
template<typename Type>
|
|
561
818
|
inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, int idx, Type value)
|
|
562
819
|
{
|
|
563
820
|
#ifndef NDEBUG
|
|
564
|
-
if (idx <
|
|
821
|
+
if (idx < -7 || idx >= 7)
|
|
565
822
|
{
|
|
566
823
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
567
824
|
assert(0);
|
|
568
825
|
}
|
|
569
826
|
#endif
|
|
570
827
|
|
|
828
|
+
if (idx < 0)
|
|
829
|
+
{
|
|
830
|
+
idx += 7;
|
|
831
|
+
}
|
|
832
|
+
|
|
571
833
|
transform_t<Type> ret(t);
|
|
572
834
|
ret[idx] = value;
|
|
573
835
|
return ret;
|
|
574
836
|
}
|
|
575
837
|
|
|
838
|
+
template<unsigned SliceLength, typename Type>
|
|
839
|
+
inline CUDA_CALLABLE transform_t<Type> assign_copy(transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a)
|
|
840
|
+
{
|
|
841
|
+
transform_t<Type> ret(t);
|
|
842
|
+
assign_inplace<SliceLength>(ret, slice, a);
|
|
843
|
+
return ret;
|
|
844
|
+
}
|
|
845
|
+
|
|
576
846
|
template<typename Type>
|
|
577
847
|
inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type value, transform_t<Type>& adj_t, int& adj_idx, Type& adj_value, const transform_t<Type>& adj_ret)
|
|
578
848
|
{
|
|
579
849
|
#ifndef NDEBUG
|
|
580
|
-
if (idx <
|
|
850
|
+
if (idx < -7 || idx >= 7)
|
|
581
851
|
{
|
|
582
852
|
printf("transformation index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
583
853
|
assert(0);
|
|
584
854
|
}
|
|
585
855
|
#endif
|
|
586
856
|
|
|
857
|
+
if (idx < 0)
|
|
858
|
+
{
|
|
859
|
+
idx += 7;
|
|
860
|
+
}
|
|
861
|
+
|
|
587
862
|
adj_value += adj_ret[idx];
|
|
588
863
|
for(unsigned i=0; i < 7; ++i)
|
|
589
864
|
{
|
|
@@ -592,6 +867,42 @@ inline CUDA_CALLABLE void adj_assign_copy(transform_t<Type>& t, int idx, Type va
|
|
|
592
867
|
}
|
|
593
868
|
}
|
|
594
869
|
|
|
870
|
+
template<unsigned SliceLength, typename Type>
|
|
871
|
+
inline CUDA_CALLABLE void adj_assign_copy(
|
|
872
|
+
transform_t<Type>& t, slice_t slice, const vec_t<SliceLength, Type> &a,
|
|
873
|
+
transform_t<Type>& adj_t, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a,
|
|
874
|
+
const transform_t<Type>& adj_ret
|
|
875
|
+
)
|
|
876
|
+
{
|
|
877
|
+
assert(slice.start >= 0 && slice.start <= 7);
|
|
878
|
+
assert(slice.stop >= -1 && slice.stop <= 7);
|
|
879
|
+
assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
|
|
880
|
+
assert(slice_get_length(slice) == SliceLength);
|
|
881
|
+
|
|
882
|
+
bool is_reversed = slice.step < 0;
|
|
883
|
+
|
|
884
|
+
int ii = 0;
|
|
885
|
+
for (int i = 0; i < 7; ++i)
|
|
886
|
+
{
|
|
887
|
+
bool in_slice = is_reversed
|
|
888
|
+
? (i <= slice.start && i > slice.stop && (slice.start - i) % (-slice.step) == 0)
|
|
889
|
+
: (i >= slice.start && i < slice.stop && (i - slice.start) % slice.step == 0);
|
|
890
|
+
|
|
891
|
+
if (!in_slice)
|
|
892
|
+
{
|
|
893
|
+
adj_t[i] += adj_ret[i];
|
|
894
|
+
}
|
|
895
|
+
else
|
|
896
|
+
{
|
|
897
|
+
adj_a[ii] += adj_ret[i];
|
|
898
|
+
++ii;
|
|
899
|
+
}
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
assert(ii == SliceLength);
|
|
903
|
+
}
|
|
904
|
+
|
|
905
|
+
|
|
595
906
|
// adjoint methods
|
|
596
907
|
template<typename Type>
|
|
597
908
|
CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
|
|
@@ -600,6 +911,25 @@ CUDA_CALLABLE inline void adj_add(const transform_t<Type>& a, const transform_t<
|
|
|
600
911
|
adj_add(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
|
|
601
912
|
}
|
|
602
913
|
|
|
914
|
+
template<typename Type>
|
|
915
|
+
CUDA_CALLABLE inline void adj_add(
|
|
916
|
+
const transform_t<Type>& a, Type b,
|
|
917
|
+
transform_t<Type>& adj_a, Type& adj_b,
|
|
918
|
+
const transform_t<Type>& adj_ret
|
|
919
|
+
)
|
|
920
|
+
{
|
|
921
|
+
adj_a += adj_ret;
|
|
922
|
+
|
|
923
|
+
adj_b += adj_ret.p[0];
|
|
924
|
+
adj_b += adj_ret.p[1];
|
|
925
|
+
adj_b += adj_ret.p[2];
|
|
926
|
+
|
|
927
|
+
adj_b += adj_ret.q[0];
|
|
928
|
+
adj_b += adj_ret.q[1];
|
|
929
|
+
adj_b += adj_ret.q[2];
|
|
930
|
+
adj_b += adj_ret.q[3];
|
|
931
|
+
}
|
|
932
|
+
|
|
603
933
|
template<typename Type>
|
|
604
934
|
CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<Type>& b, transform_t<Type>& adj_a, transform_t<Type>& adj_b, const transform_t<Type>& adj_ret)
|
|
605
935
|
{
|
|
@@ -607,6 +937,25 @@ CUDA_CALLABLE inline void adj_sub(const transform_t<Type>& a, const transform_t<
|
|
|
607
937
|
adj_sub(a.q, b.q, adj_a.q, adj_b.q, adj_ret.q);
|
|
608
938
|
}
|
|
609
939
|
|
|
940
|
+
template<typename Type>
|
|
941
|
+
CUDA_CALLABLE inline void adj_sub(
|
|
942
|
+
const transform_t<Type>& a, Type b,
|
|
943
|
+
transform_t<Type>& adj_a, Type& adj_b,
|
|
944
|
+
const transform_t<Type>& adj_ret
|
|
945
|
+
)
|
|
946
|
+
{
|
|
947
|
+
adj_a -= adj_ret;
|
|
948
|
+
|
|
949
|
+
adj_b -= adj_ret.p[0];
|
|
950
|
+
adj_b -= adj_ret.p[1];
|
|
951
|
+
adj_b -= adj_ret.p[2];
|
|
952
|
+
|
|
953
|
+
adj_b -= adj_ret.q[0];
|
|
954
|
+
adj_b -= adj_ret.q[1];
|
|
955
|
+
adj_b -= adj_ret.q[2];
|
|
956
|
+
adj_b -= adj_ret.q[3];
|
|
957
|
+
}
|
|
958
|
+
|
|
610
959
|
template<typename Type>
|
|
611
960
|
CUDA_CALLABLE inline void adj_mul(const transform_t<Type>& a, Type s, transform_t<Type>& adj_a, Type& adj_s, const transform_t<Type>& adj_ret)
|
|
612
961
|
{
|
|
@@ -942,4 +1291,4 @@ using spatial_matrixh = spatial_matrix_t<half>;
|
|
|
942
1291
|
using spatial_matrixf = spatial_matrix_t<float>;
|
|
943
1292
|
using spatial_matrixd = spatial_matrix_t<double>;
|
|
944
1293
|
|
|
945
|
-
} // namespace wp
|
|
1294
|
+
} // namespace wp
|
warp/native/temp_buffer.h
CHANGED
|
@@ -26,13 +26,13 @@ template <typename T = char> struct ScopedTemporary
|
|
|
26
26
|
{
|
|
27
27
|
|
|
28
28
|
ScopedTemporary(void *context, size_t size)
|
|
29
|
-
: m_context(context), m_buffer(static_cast<T*>(
|
|
29
|
+
: m_context(context), m_buffer(static_cast<T*>(wp_alloc_device(m_context, size * sizeof(T))))
|
|
30
30
|
{
|
|
31
31
|
}
|
|
32
32
|
|
|
33
33
|
~ScopedTemporary()
|
|
34
34
|
{
|
|
35
|
-
|
|
35
|
+
wp_free_device(m_context, m_buffer);
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
T *buffer() const
|