warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +15 -7
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +22 -443
- warp/build_dll.py +384 -0
- warp/builtins.py +998 -488
- warp/codegen.py +1307 -739
- warp/config.py +5 -3
- warp/constants.py +6 -0
- warp/context.py +1291 -548
- warp/dlpack.py +31 -31
- warp/fabric.py +326 -0
- warp/fem/__init__.py +27 -0
- warp/fem/cache.py +389 -0
- warp/fem/dirichlet.py +181 -0
- warp/fem/domain.py +263 -0
- warp/fem/field/__init__.py +101 -0
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +299 -0
- warp/fem/field/restriction.py +21 -0
- warp/fem/field/test.py +181 -0
- warp/fem/field/trial.py +183 -0
- warp/fem/geometry/__init__.py +19 -0
- warp/fem/geometry/closest_point.py +70 -0
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +744 -0
- warp/fem/geometry/geometry.py +186 -0
- warp/fem/geometry/grid_2d.py +373 -0
- warp/fem/geometry/grid_3d.py +435 -0
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +376 -0
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +840 -0
- warp/fem/geometry/trimesh_2d.py +577 -0
- warp/fem/integrate.py +1616 -0
- warp/fem/operator.py +191 -0
- warp/fem/polynomial.py +213 -0
- warp/fem/quadrature/__init__.py +2 -0
- warp/fem/quadrature/pic_quadrature.py +245 -0
- warp/fem/quadrature/quadrature.py +294 -0
- warp/fem/space/__init__.py +292 -0
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +236 -0
- warp/fem/space/function_space.py +145 -0
- warp/fem/space/grid_2d_function_space.py +267 -0
- warp/fem/space/grid_3d_function_space.py +306 -0
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +350 -0
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +160 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +292 -0
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +221 -0
- warp/fem/types.py +77 -0
- warp/fem/utils.py +495 -0
- warp/native/array.h +164 -55
- warp/native/builtin.h +150 -174
- warp/native/bvh.cpp +75 -328
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +37 -45
- warp/native/clang/clang.cpp +136 -24
- warp/native/crt.cpp +1 -76
- warp/native/crt.h +111 -104
- warp/native/cuda_crt.h +1049 -0
- warp/native/cuda_util.cpp +15 -3
- warp/native/cuda_util.h +3 -1
- warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
- warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
- warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
- warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
- warp/native/cutlass/tools/library/scripts/library.py +799 -0
- warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
- warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
- warp/native/cutlass/tools/library/scripts/rt.py +796 -0
- warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
- warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
- warp/native/cutlass_gemm.cu +5 -3
- warp/native/exports.h +1240 -949
- warp/native/fabric.h +228 -0
- warp/native/hashgrid.cpp +4 -4
- warp/native/hashgrid.h +22 -2
- warp/native/initializer_array.h +2 -2
- warp/native/intersect.h +22 -7
- warp/native/intersect_adj.h +8 -8
- warp/native/intersect_tri.h +13 -16
- warp/native/marching.cu +157 -161
- warp/native/mat.h +119 -19
- warp/native/matnn.h +2 -2
- warp/native/mesh.cpp +108 -83
- warp/native/mesh.cu +243 -6
- warp/native/mesh.h +1547 -458
- warp/native/nanovdb/NanoVDB.h +1 -1
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +45 -35
- warp/native/range.h +6 -2
- warp/native/reduce.cpp +157 -0
- warp/native/reduce.cu +348 -0
- warp/native/runlength_encode.cpp +62 -0
- warp/native/runlength_encode.cu +46 -0
- warp/native/scan.cu +11 -13
- warp/native/scan.h +1 -0
- warp/native/solid_angle.h +442 -0
- warp/native/sort.cpp +13 -0
- warp/native/sort.cu +9 -1
- warp/native/sparse.cpp +338 -0
- warp/native/sparse.cu +545 -0
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +30 -0
- warp/native/vec.h +126 -24
- warp/native/volume.h +120 -0
- warp/native/warp.cpp +658 -53
- warp/native/warp.cu +660 -68
- warp/native/warp.h +112 -12
- warp/optim/__init__.py +1 -0
- warp/optim/linear.py +922 -0
- warp/optim/sgd.py +92 -0
- warp/render/render_opengl.py +392 -152
- warp/render/render_usd.py +11 -11
- warp/sim/__init__.py +2 -2
- warp/sim/articulation.py +385 -185
- warp/sim/collide.py +21 -8
- warp/sim/import_mjcf.py +297 -106
- warp/sim/import_urdf.py +389 -210
- warp/sim/import_usd.py +198 -97
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_euler.py +14 -8
- warp/sim/integrator_xpbd.py +161 -19
- warp/sim/model.py +795 -291
- warp/sim/optimizer.py +2 -6
- warp/sim/render.py +65 -3
- warp/sim/utils.py +3 -0
- warp/sparse.py +1227 -0
- warp/stubs.py +665 -223
- warp/tape.py +66 -15
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/torus.usda +105 -105
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +128 -74
- warp/tests/test_array.py +1497 -211
- warp/tests/test_array_reduce.py +150 -0
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +99 -0
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +75 -43
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +233 -128
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +136 -108
- warp/tests/test_examples.py +277 -0
- warp/tests/test_fabricarray.py +955 -0
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1271 -0
- warp/tests/test_fp16.py +53 -19
- warp/tests/test_func.py +187 -74
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +180 -116
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +52 -37
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +577 -24
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +251 -15
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_linear_solvers.py +154 -0
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +508 -2778
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +305 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +71 -14
- warp/tests/test_mesh_query_aabb.py +41 -25
- warp/tests/test_mesh_query_point.py +325 -34
- warp/tests/test_mesh_query_ray.py +39 -22
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +190 -0
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +460 -0
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +331 -85
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +118 -89
- warp/tests/test_transient_module.py +12 -13
- warp/tests/test_types.py +614 -0
- warp/tests/test_utils.py +494 -0
- warp/tests/test_vec.py +354 -1987
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +457 -293
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +341 -0
- warp/tests/unittest_utils.py +568 -0
- warp/tests/unused_test_misc.py +71 -0
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +549 -0
- warp/torch.py +72 -30
- warp/types.py +1744 -713
- warp/utils.py +360 -350
- warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
- warp_lang-0.11.0.dist-info/METADATA +238 -0
- warp_lang-0.11.0.dist-info/RECORD +332 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
- warp/bin/warp-clang.exp +0 -0
- warp/bin/warp-clang.lib +0 -0
- warp/bin/warp.exp +0 -0
- warp/bin/warp.lib +0 -0
- warp/tests/test_all.py +0 -215
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-0.9.0.dist-info/METADATA +0 -20
- warp_lang-0.9.0.dist-info/RECORD +0 -177
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -16,9 +16,11 @@ namespace wp
|
|
|
16
16
|
template<unsigned Length, typename Type>
|
|
17
17
|
struct vec_t
|
|
18
18
|
{
|
|
19
|
-
Type c[Length]
|
|
19
|
+
Type c[Length];
|
|
20
20
|
|
|
21
|
-
inline vec_t()
|
|
21
|
+
inline CUDA_CALLABLE vec_t()
|
|
22
|
+
: c()
|
|
23
|
+
{}
|
|
22
24
|
|
|
23
25
|
inline CUDA_CALLABLE vec_t(Type s)
|
|
24
26
|
{
|
|
@@ -27,6 +29,15 @@ struct vec_t
|
|
|
27
29
|
c[i] = s;
|
|
28
30
|
}
|
|
29
31
|
}
|
|
32
|
+
|
|
33
|
+
template <typename OtherType>
|
|
34
|
+
inline explicit CUDA_CALLABLE vec_t(const vec_t<Length, OtherType>& other)
|
|
35
|
+
{
|
|
36
|
+
for( unsigned i=0; i < Length; ++i )
|
|
37
|
+
{
|
|
38
|
+
c[i] = static_cast<Type>(other[i]);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
30
41
|
|
|
31
42
|
inline CUDA_CALLABLE vec_t(Type x, Type y)
|
|
32
43
|
{
|
|
@@ -275,12 +286,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
|
|
|
275
286
|
return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
|
|
276
287
|
}
|
|
277
288
|
|
|
289
|
+
template<unsigned Length, typename Type>
|
|
290
|
+
inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
|
|
291
|
+
{
|
|
292
|
+
vec_t<Length, Type> ret;
|
|
293
|
+
for (unsigned i=0; i < Length; ++i)
|
|
294
|
+
{
|
|
295
|
+
ret[i] = s / a[i];
|
|
296
|
+
}
|
|
297
|
+
return ret;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template<typename Type>
|
|
301
|
+
inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
|
|
302
|
+
{
|
|
303
|
+
return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
template<typename Type>
|
|
307
|
+
inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
|
|
308
|
+
{
|
|
309
|
+
return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
|
|
310
|
+
}
|
|
311
|
+
|
|
278
312
|
template<unsigned Length, typename Type>
|
|
279
313
|
inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
|
|
280
314
|
{
|
|
281
315
|
return div(a,s);
|
|
282
316
|
}
|
|
283
317
|
|
|
318
|
+
template<unsigned Length, typename Type>
|
|
319
|
+
inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
|
|
320
|
+
{
|
|
321
|
+
return div(s, a);
|
|
322
|
+
}
|
|
323
|
+
|
|
284
324
|
// component wise division
|
|
285
325
|
template<unsigned Length, typename Type>
|
|
286
326
|
inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -374,7 +414,7 @@ inline CUDA_CALLABLE Type tensordot(vec_t<Length, Type> a, vec_t<Length, Type> b
|
|
|
374
414
|
|
|
375
415
|
|
|
376
416
|
template<unsigned Length, typename Type>
|
|
377
|
-
inline CUDA_CALLABLE Type
|
|
417
|
+
inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
|
|
378
418
|
{
|
|
379
419
|
#ifndef NDEBUG
|
|
380
420
|
if (idx < 0 || idx >= Length)
|
|
@@ -388,7 +428,21 @@ inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
|
|
|
388
428
|
}
|
|
389
429
|
|
|
390
430
|
template<unsigned Length, typename Type>
|
|
391
|
-
inline CUDA_CALLABLE
|
|
431
|
+
inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
|
|
432
|
+
{
|
|
433
|
+
#ifndef NDEBUG
|
|
434
|
+
if (idx < 0 || idx >= Length)
|
|
435
|
+
{
|
|
436
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
437
|
+
assert(0);
|
|
438
|
+
}
|
|
439
|
+
#endif
|
|
440
|
+
|
|
441
|
+
return &v[idx];
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
template<unsigned Length, typename Type>
|
|
445
|
+
inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
|
|
392
446
|
{
|
|
393
447
|
#ifndef NDEBUG
|
|
394
448
|
if (idx < 0 || idx >= Length)
|
|
@@ -398,17 +452,23 @@ inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
|
|
|
398
452
|
}
|
|
399
453
|
#endif
|
|
400
454
|
|
|
401
|
-
v[idx]
|
|
455
|
+
return &((*v)[idx]);
|
|
402
456
|
}
|
|
403
457
|
|
|
404
458
|
template<unsigned Length, typename Type>
|
|
405
|
-
inline CUDA_CALLABLE void
|
|
459
|
+
inline CUDA_CALLABLE void adj_index(vec_t<Length, Type>& v, int idx,
|
|
406
460
|
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
407
461
|
{
|
|
408
462
|
// nop
|
|
409
463
|
}
|
|
410
464
|
|
|
411
465
|
|
|
466
|
+
template<unsigned Length, typename Type>
|
|
467
|
+
inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
|
|
468
|
+
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
469
|
+
{
|
|
470
|
+
// nop
|
|
471
|
+
}
|
|
412
472
|
|
|
413
473
|
|
|
414
474
|
template<unsigned Length, typename Type>
|
|
@@ -572,7 +632,7 @@ inline CUDA_CALLABLE void expect_near(const vec_t<Length, Type>& actual, const v
|
|
|
572
632
|
}
|
|
573
633
|
if (diff > tolerance)
|
|
574
634
|
{
|
|
575
|
-
printf("Error, expect_near() failed with
|
|
635
|
+
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
576
636
|
printf("\t Expected: "); print(expected);
|
|
577
637
|
printf("\t Actual: "); print(actual);
|
|
578
638
|
}
|
|
@@ -630,6 +690,15 @@ inline CUDA_CALLABLE void adj_vec_t(Type s, Type& adj_s, const vec_t<Length, Typ
|
|
|
630
690
|
}
|
|
631
691
|
}
|
|
632
692
|
|
|
693
|
+
// adjoint for the casting constructor
|
|
694
|
+
template<unsigned Length, typename Type, typename OtherType>
|
|
695
|
+
inline CUDA_CALLABLE void adj_vec_t(const vec_t<Length, OtherType>& other, vec_t<Length, OtherType>& adj_other, const vec_t<Length, Type>& adj_ret)
|
|
696
|
+
{
|
|
697
|
+
for( unsigned i=0; i < Length; ++i )
|
|
698
|
+
{
|
|
699
|
+
adj_other[i] += static_cast<OtherType>(adj_ret[i]);
|
|
700
|
+
}
|
|
701
|
+
}
|
|
633
702
|
|
|
634
703
|
template<typename Type>
|
|
635
704
|
CUDA_CALLABLE inline void adj_vec_t(const vec_t<3,Type>& w, const vec_t<3,Type>& v, vec_t<3,Type>& adj_w, vec_t<3,Type>& adj_v, const vec_t<6,Type>& adj_ret)
|
|
@@ -697,9 +766,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
|
|
|
697
766
|
}
|
|
698
767
|
|
|
699
768
|
template<unsigned Length, typename Type>
|
|
700
|
-
inline CUDA_CALLABLE void
|
|
769
|
+
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
770
|
+
{
|
|
771
|
+
|
|
772
|
+
adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
|
|
773
|
+
|
|
774
|
+
for( unsigned i=0; i < Length; ++i )
|
|
775
|
+
{
|
|
776
|
+
adj_a[i] += s / adj_ret[i];
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
#if FP_CHECK
|
|
780
|
+
if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
|
|
781
|
+
{
|
|
782
|
+
// \TODO: How shall we implement this error message?
|
|
783
|
+
// printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
|
|
784
|
+
assert(0);
|
|
785
|
+
}
|
|
786
|
+
#endif
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
template<unsigned Length, typename Type>
|
|
790
|
+
inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
|
|
701
791
|
adj_a += cw_div(adj_ret, b);
|
|
702
|
-
adj_b -= cw_mul(adj_ret, cw_div(
|
|
792
|
+
adj_b -= cw_mul(adj_ret, cw_div(ret, b));
|
|
703
793
|
}
|
|
704
794
|
|
|
705
795
|
template<unsigned Length, typename Type>
|
|
@@ -798,7 +888,7 @@ inline CUDA_CALLABLE void adj_dot(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
798
888
|
|
|
799
889
|
|
|
800
890
|
template<unsigned Length, typename Type>
|
|
801
|
-
inline CUDA_CALLABLE void
|
|
891
|
+
inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
|
|
802
892
|
{
|
|
803
893
|
#ifndef NDEBUG
|
|
804
894
|
if (idx < 0 || idx > Length)
|
|
@@ -812,9 +902,12 @@ inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_
|
|
|
812
902
|
}
|
|
813
903
|
|
|
814
904
|
template<unsigned Length, typename Type>
|
|
815
|
-
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
905
|
+
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
816
906
|
{
|
|
817
|
-
|
|
907
|
+
if (ret > Type(kEps))
|
|
908
|
+
{
|
|
909
|
+
adj_a += div(a, ret) * adj_ret;
|
|
910
|
+
}
|
|
818
911
|
|
|
819
912
|
#if FP_CHECK
|
|
820
913
|
if (!isfinite(adj_a))
|
|
@@ -842,7 +935,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
842
935
|
}
|
|
843
936
|
|
|
844
937
|
template<unsigned Length, typename Type>
|
|
845
|
-
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
938
|
+
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
846
939
|
{
|
|
847
940
|
Type d = length(a);
|
|
848
941
|
|
|
@@ -850,9 +943,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
850
943
|
{
|
|
851
944
|
Type invd = Type(1.0f)/d;
|
|
852
945
|
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
|
|
946
|
+
adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
|
|
856
947
|
|
|
857
948
|
#if FP_CHECK
|
|
858
949
|
if (!isfinite(adj_a))
|
|
@@ -913,8 +1004,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
|
|
|
913
1004
|
|
|
914
1005
|
// Do I need to specialize these for different lengths?
|
|
915
1006
|
template<unsigned Length, typename Type>
|
|
916
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
917
|
-
|
|
1007
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1008
|
+
{
|
|
918
1009
|
vec_t<Length, Type> ret;
|
|
919
1010
|
for( unsigned i=0; i < Length; ++i )
|
|
920
1011
|
{
|
|
@@ -925,8 +1016,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
|
|
|
925
1016
|
}
|
|
926
1017
|
|
|
927
1018
|
template<unsigned Length, typename Type>
|
|
928
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
929
|
-
|
|
1019
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1020
|
+
{
|
|
930
1021
|
vec_t<Length, Type> ret;
|
|
931
1022
|
for( unsigned i=0; i < Length; ++i )
|
|
932
1023
|
{
|
|
@@ -937,8 +1028,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
|
|
|
937
1028
|
}
|
|
938
1029
|
|
|
939
1030
|
template<unsigned Length, typename Type>
|
|
940
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
941
|
-
|
|
1031
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1032
|
+
{
|
|
942
1033
|
vec_t<Length, Type> ret;
|
|
943
1034
|
for( unsigned i=0; i < Length; ++i )
|
|
944
1035
|
{
|
|
@@ -948,6 +1039,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
948
1039
|
return ret;
|
|
949
1040
|
}
|
|
950
1041
|
|
|
1042
|
+
template<unsigned Length, typename Type>
|
|
1043
|
+
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1044
|
+
vec_t<Length,Type> *addr,
|
|
1045
|
+
vec_t<Length,Type> *adj_addr,
|
|
1046
|
+
const vec_t<Length,Type> &value,
|
|
1047
|
+
vec_t<Length,Type> &adj_value)
|
|
1048
|
+
{
|
|
1049
|
+
for (unsigned i=0; i < Length; ++i)
|
|
1050
|
+
adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
951
1053
|
// ok, the original implementation of this didn't take the absolute values.
|
|
952
1054
|
// I wouldn't consider this expected behavior. It looks like it's only
|
|
953
1055
|
// being used for bounding boxes at the moment, where this doesn't matter,
|
|
@@ -956,11 +1058,11 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
956
1058
|
template<unsigned Length, typename Type>
|
|
957
1059
|
CUDA_CALLABLE inline int longest_axis(const vec_t<Length, Type>& v)
|
|
958
1060
|
{
|
|
959
|
-
Type lmax =
|
|
1061
|
+
Type lmax = abs(v[0]);
|
|
960
1062
|
int ret(0);
|
|
961
1063
|
for( unsigned i=1; i < Length; ++i )
|
|
962
1064
|
{
|
|
963
|
-
Type l =
|
|
1065
|
+
Type l = abs(v[i]);
|
|
964
1066
|
if( l > lmax )
|
|
965
1067
|
{
|
|
966
1068
|
ret = i;
|
warp/native/volume.h
CHANGED
|
@@ -232,6 +232,126 @@ CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t& a
|
|
|
232
232
|
// NOP
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
// Sampling the volume at the given index-space coordinates, uvw can be fractional
|
|
236
|
+
CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3& grad)
|
|
237
|
+
{
|
|
238
|
+
const pnanovdb_buf_t buf = volume::id_to_buffer(id);
|
|
239
|
+
const pnanovdb_root_handle_t root = volume::get_root(buf);
|
|
240
|
+
const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
|
|
241
|
+
|
|
242
|
+
if (sampling_mode == volume::CLOSEST)
|
|
243
|
+
{
|
|
244
|
+
const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano);
|
|
245
|
+
float val;
|
|
246
|
+
pnano_read(val, buf, root, PNANOVDB_REF(ijk));
|
|
247
|
+
grad = vec3(0.0f, 0.0f, 0.0f);
|
|
248
|
+
return val;
|
|
249
|
+
}
|
|
250
|
+
else if (sampling_mode == volume::LINEAR)
|
|
251
|
+
{
|
|
252
|
+
// NB. linear sampling is not used on int volumes
|
|
253
|
+
constexpr pnanovdb_coord_t OFFSETS[] = {
|
|
254
|
+
{ 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
|
|
258
|
+
const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
|
|
259
|
+
const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
|
|
260
|
+
|
|
261
|
+
pnanovdb_readaccessor_t accessor;
|
|
262
|
+
pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
|
|
263
|
+
float val = 0.0f;
|
|
264
|
+
const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
|
|
265
|
+
const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
|
|
266
|
+
const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
|
|
267
|
+
|
|
268
|
+
const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
|
269
|
+
const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
|
|
270
|
+
const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
|
|
271
|
+
|
|
272
|
+
float dfdx = 0.0f;
|
|
273
|
+
float dfdy = 0.0f;
|
|
274
|
+
float dfdz = 0.0f;
|
|
275
|
+
for (int idx = 0; idx < 8; ++idx)
|
|
276
|
+
{
|
|
277
|
+
const pnanovdb_coord_t& offs = OFFSETS[idx];
|
|
278
|
+
const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
|
|
279
|
+
float v;
|
|
280
|
+
pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
|
|
281
|
+
val = add(val, wx[offs.x] * wy[offs.y] * wz[offs.z] * v);
|
|
282
|
+
dfdx = add(dfdx, wy[offs.y] * wz[offs.z] * sign_dx[idx] * v);
|
|
283
|
+
dfdy = add(dfdy, wx[offs.x] * wz[offs.z] * sign_dy[idx] * v);
|
|
284
|
+
dfdz = add(dfdz, wx[offs.x] * wy[offs.y] * sign_dz[idx] * v);
|
|
285
|
+
}
|
|
286
|
+
grad = vec3(dfdx, dfdy, dfdz);
|
|
287
|
+
return val;
|
|
288
|
+
}
|
|
289
|
+
return 0.0f;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
CUDA_CALLABLE inline void adj_volume_sample_grad_f(
|
|
293
|
+
uint64_t id, vec3 uvw, int sampling_mode, vec3& grad, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, vec3& adj_grad, const float& adj_ret)
|
|
294
|
+
{
|
|
295
|
+
if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return;
|
|
296
|
+
|
|
297
|
+
if (sampling_mode != volume::LINEAR) {
|
|
298
|
+
return; // NOP
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const pnanovdb_buf_t buf = volume::id_to_buffer(id);
|
|
302
|
+
const pnanovdb_root_handle_t root = volume::get_root(buf);
|
|
303
|
+
const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
|
|
304
|
+
|
|
305
|
+
constexpr pnanovdb_coord_t OFFSETS[] = {
|
|
306
|
+
{ 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
|
|
310
|
+
const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
|
|
311
|
+
const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
|
|
312
|
+
|
|
313
|
+
pnanovdb_readaccessor_t accessor;
|
|
314
|
+
pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
|
|
315
|
+
const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
|
|
316
|
+
const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
|
|
317
|
+
const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
|
|
318
|
+
const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
|
319
|
+
const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
|
|
320
|
+
const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
|
|
321
|
+
|
|
322
|
+
float dfdxdy = 0.0f;
|
|
323
|
+
float dfdxdz = 0.0f;
|
|
324
|
+
float dfdydx = 0.0f;
|
|
325
|
+
float dfdydz = 0.0f;
|
|
326
|
+
float dfdzdx = 0.0f;
|
|
327
|
+
float dfdzdy = 0.0f;
|
|
328
|
+
vec3 dphi(0,0,0);
|
|
329
|
+
for (int idx = 0; idx < 8; ++idx)
|
|
330
|
+
{
|
|
331
|
+
const pnanovdb_coord_t& offs = OFFSETS[idx];
|
|
332
|
+
const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
|
|
333
|
+
float v;
|
|
334
|
+
pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
|
|
335
|
+
const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1);
|
|
336
|
+
const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]);
|
|
337
|
+
dphi = add(dphi, mul(v, grad_w));
|
|
338
|
+
|
|
339
|
+
dfdxdy = add(dfdxdy, signs[1] * wz[offs.z] * sign_dx[idx] * v);
|
|
340
|
+
dfdxdz = add(dfdxdz, wy[offs.y] * signs[2] * sign_dx[idx] * v);
|
|
341
|
+
|
|
342
|
+
dfdydx = add(dfdydx, signs[0] * wz[offs.z] * sign_dy[idx] * v);
|
|
343
|
+
dfdydz = add(dfdydz, wx[offs.x] * signs[2] * sign_dy[idx] * v);
|
|
344
|
+
|
|
345
|
+
dfdzdx = add(dfdzdx, signs[0] * wy[offs.y] * sign_dz[idx] * v);
|
|
346
|
+
dfdzdy = add(dfdzdy, wx[offs.x] * signs[1] * sign_dz[idx] * v);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
adj_uvw += mul(dphi, adj_ret);
|
|
350
|
+
adj_uvw[0] += adj_grad[1] * dfdydx + adj_grad[2] * dfdzdx;
|
|
351
|
+
adj_uvw[1] += adj_grad[0] * dfdxdy + adj_grad[2] * dfdzdy;
|
|
352
|
+
adj_uvw[2] += adj_grad[0] * dfdxdz + adj_grad[1] * dfdydz;
|
|
353
|
+
}
|
|
354
|
+
|
|
235
355
|
CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k)
|
|
236
356
|
{
|
|
237
357
|
if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f;
|