warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +10 -4
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +5 -3
- warp/build_dll.py +29 -9
- warp/builtins.py +868 -507
- warp/codegen.py +1074 -638
- warp/config.py +3 -3
- warp/constants.py +6 -0
- warp/context.py +715 -222
- warp/fabric.py +326 -0
- warp/fem/__init__.py +27 -0
- warp/fem/cache.py +389 -0
- warp/fem/dirichlet.py +181 -0
- warp/fem/domain.py +263 -0
- warp/fem/field/__init__.py +101 -0
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +299 -0
- warp/fem/field/restriction.py +21 -0
- warp/fem/field/test.py +181 -0
- warp/fem/field/trial.py +183 -0
- warp/fem/geometry/__init__.py +19 -0
- warp/fem/geometry/closest_point.py +70 -0
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +744 -0
- warp/fem/geometry/geometry.py +186 -0
- warp/fem/geometry/grid_2d.py +373 -0
- warp/fem/geometry/grid_3d.py +435 -0
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +376 -0
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +840 -0
- warp/fem/geometry/trimesh_2d.py +577 -0
- warp/fem/integrate.py +1616 -0
- warp/fem/operator.py +191 -0
- warp/fem/polynomial.py +213 -0
- warp/fem/quadrature/__init__.py +2 -0
- warp/fem/quadrature/pic_quadrature.py +245 -0
- warp/fem/quadrature/quadrature.py +294 -0
- warp/fem/space/__init__.py +292 -0
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +236 -0
- warp/fem/space/function_space.py +145 -0
- warp/fem/space/grid_2d_function_space.py +267 -0
- warp/fem/space/grid_3d_function_space.py +306 -0
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +350 -0
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +160 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +292 -0
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +221 -0
- warp/fem/types.py +77 -0
- warp/fem/utils.py +495 -0
- warp/native/array.h +147 -44
- warp/native/builtin.h +122 -149
- warp/native/bvh.cpp +73 -325
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +34 -43
- warp/native/clang/clang.cpp +13 -8
- warp/native/crt.h +2 -0
- warp/native/cuda_crt.h +5 -0
- warp/native/cuda_util.cpp +15 -3
- warp/native/cuda_util.h +3 -1
- warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
- warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
- warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
- warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
- warp/native/cutlass/tools/library/scripts/library.py +799 -0
- warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
- warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
- warp/native/cutlass/tools/library/scripts/rt.py +796 -0
- warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
- warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
- warp/native/cutlass_gemm.cu +5 -3
- warp/native/exports.h +1240 -952
- warp/native/fabric.h +228 -0
- warp/native/hashgrid.cpp +4 -4
- warp/native/hashgrid.h +22 -2
- warp/native/intersect.h +22 -7
- warp/native/intersect_adj.h +8 -8
- warp/native/intersect_tri.h +1 -1
- warp/native/marching.cu +157 -161
- warp/native/mat.h +80 -19
- warp/native/matnn.h +2 -2
- warp/native/mesh.cpp +33 -108
- warp/native/mesh.cu +114 -23
- warp/native/mesh.h +446 -46
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +45 -35
- warp/native/range.h +6 -2
- warp/native/reduce.cpp +1 -1
- warp/native/reduce.cu +10 -12
- warp/native/runlength_encode.cu +6 -10
- warp/native/scan.cu +8 -11
- warp/native/sparse.cpp +4 -4
- warp/native/sparse.cu +164 -154
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +14 -30
- warp/native/vec.h +107 -23
- warp/native/volume.h +120 -0
- warp/native/warp.cpp +560 -30
- warp/native/warp.cu +431 -44
- warp/native/warp.h +13 -4
- warp/optim/__init__.py +1 -0
- warp/optim/linear.py +922 -0
- warp/optim/sgd.py +92 -0
- warp/render/render_opengl.py +335 -119
- warp/render/render_usd.py +11 -11
- warp/sim/__init__.py +2 -2
- warp/sim/articulation.py +385 -185
- warp/sim/collide.py +8 -0
- warp/sim/import_mjcf.py +297 -106
- warp/sim/import_urdf.py +389 -210
- warp/sim/import_usd.py +198 -97
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_euler.py +14 -8
- warp/sim/integrator_xpbd.py +158 -16
- warp/sim/model.py +795 -291
- warp/sim/render.py +3 -3
- warp/sim/utils.py +3 -0
- warp/sparse.py +640 -150
- warp/stubs.py +606 -267
- warp/tape.py +61 -10
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +128 -74
- warp/tests/test_array.py +212 -97
- warp/tests/test_array_reduce.py +57 -23
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +99 -0
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +42 -18
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +208 -130
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +75 -75
- warp/tests/test_examples.py +277 -0
- warp/tests/test_fabricarray.py +955 -0
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1271 -0
- warp/tests/test_fp16.py +53 -19
- warp/tests/test_func.py +187 -86
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +178 -109
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +52 -37
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +32 -31
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +14 -41
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_linear_solvers.py +154 -0
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +517 -2898
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +305 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +71 -14
- warp/tests/test_mesh_query_aabb.py +41 -25
- warp/tests/test_mesh_query_point.py +140 -22
- warp/tests/test_mesh_query_ray.py +39 -22
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +168 -20
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +261 -63
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +268 -63
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +118 -89
- warp/tests/test_transient_module.py +12 -13
- warp/tests/test_types.py +614 -0
- warp/tests/test_utils.py +494 -0
- warp/tests/test_vec.py +354 -2050
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +457 -293
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +341 -0
- warp/tests/unittest_utils.py +568 -0
- warp/tests/unused_test_misc.py +71 -0
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +549 -0
- warp/torch.py +9 -6
- warp/types.py +1089 -366
- warp/utils.py +93 -387
- warp_lang-0.11.0.dist-info/METADATA +238 -0
- warp_lang-0.11.0.dist-info/RECORD +332 -0
- {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
- warp/tests/test_all.py +0 -219
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-0.10.1.dist-info/METADATA +0 -21
- warp_lang-0.10.1.dist-info/RECORD +0 -188
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/vec.h
CHANGED
|
@@ -16,9 +16,11 @@ namespace wp
|
|
|
16
16
|
template<unsigned Length, typename Type>
|
|
17
17
|
struct vec_t
|
|
18
18
|
{
|
|
19
|
-
Type c[Length]
|
|
19
|
+
Type c[Length];
|
|
20
20
|
|
|
21
|
-
inline vec_t()
|
|
21
|
+
inline CUDA_CALLABLE vec_t()
|
|
22
|
+
: c()
|
|
23
|
+
{}
|
|
22
24
|
|
|
23
25
|
inline CUDA_CALLABLE vec_t(Type s)
|
|
24
26
|
{
|
|
@@ -33,7 +35,7 @@ struct vec_t
|
|
|
33
35
|
{
|
|
34
36
|
for( unsigned i=0; i < Length; ++i )
|
|
35
37
|
{
|
|
36
|
-
c[i] = other[i];
|
|
38
|
+
c[i] = static_cast<Type>(other[i]);
|
|
37
39
|
}
|
|
38
40
|
}
|
|
39
41
|
|
|
@@ -284,12 +286,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
|
|
|
284
286
|
return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
|
|
285
287
|
}
|
|
286
288
|
|
|
289
|
+
template<unsigned Length, typename Type>
|
|
290
|
+
inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
|
|
291
|
+
{
|
|
292
|
+
vec_t<Length, Type> ret;
|
|
293
|
+
for (unsigned i=0; i < Length; ++i)
|
|
294
|
+
{
|
|
295
|
+
ret[i] = s / a[i];
|
|
296
|
+
}
|
|
297
|
+
return ret;
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
template<typename Type>
|
|
301
|
+
inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
|
|
302
|
+
{
|
|
303
|
+
return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
template<typename Type>
|
|
307
|
+
inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
|
|
308
|
+
{
|
|
309
|
+
return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
|
|
310
|
+
}
|
|
311
|
+
|
|
287
312
|
template<unsigned Length, typename Type>
|
|
288
313
|
inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
|
|
289
314
|
{
|
|
290
315
|
return div(a,s);
|
|
291
316
|
}
|
|
292
317
|
|
|
318
|
+
template<unsigned Length, typename Type>
|
|
319
|
+
inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
|
|
320
|
+
{
|
|
321
|
+
return div(s, a);
|
|
322
|
+
}
|
|
323
|
+
|
|
293
324
|
// component wise division
|
|
294
325
|
template<unsigned Length, typename Type>
|
|
295
326
|
inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -383,7 +414,7 @@ inline CUDA_CALLABLE Type tensordot(vec_t<Length, Type> a, vec_t<Length, Type> b
|
|
|
383
414
|
|
|
384
415
|
|
|
385
416
|
template<unsigned Length, typename Type>
|
|
386
|
-
inline CUDA_CALLABLE Type
|
|
417
|
+
inline CUDA_CALLABLE Type extract(const vec_t<Length, Type> & a, int idx)
|
|
387
418
|
{
|
|
388
419
|
#ifndef NDEBUG
|
|
389
420
|
if (idx < 0 || idx >= Length)
|
|
@@ -397,7 +428,21 @@ inline CUDA_CALLABLE Type index(const vec_t<Length, Type> & a, int idx)
|
|
|
397
428
|
}
|
|
398
429
|
|
|
399
430
|
template<unsigned Length, typename Type>
|
|
400
|
-
inline CUDA_CALLABLE
|
|
431
|
+
inline CUDA_CALLABLE Type* index(vec_t<Length, Type>& v, int idx)
|
|
432
|
+
{
|
|
433
|
+
#ifndef NDEBUG
|
|
434
|
+
if (idx < 0 || idx >= Length)
|
|
435
|
+
{
|
|
436
|
+
printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
|
|
437
|
+
assert(0);
|
|
438
|
+
}
|
|
439
|
+
#endif
|
|
440
|
+
|
|
441
|
+
return &v[idx];
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
template<unsigned Length, typename Type>
|
|
445
|
+
inline CUDA_CALLABLE Type* indexref(vec_t<Length, Type>* v, int idx)
|
|
401
446
|
{
|
|
402
447
|
#ifndef NDEBUG
|
|
403
448
|
if (idx < 0 || idx >= Length)
|
|
@@ -407,17 +452,23 @@ inline CUDA_CALLABLE void indexset(vec_t<Length, Type>& v, int idx, Type value)
|
|
|
407
452
|
}
|
|
408
453
|
#endif
|
|
409
454
|
|
|
410
|
-
v[idx]
|
|
455
|
+
return &((*v)[idx]);
|
|
411
456
|
}
|
|
412
457
|
|
|
413
458
|
template<unsigned Length, typename Type>
|
|
414
|
-
inline CUDA_CALLABLE void
|
|
459
|
+
inline CUDA_CALLABLE void adj_index(vec_t<Length, Type>& v, int idx,
|
|
415
460
|
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
416
461
|
{
|
|
417
462
|
// nop
|
|
418
463
|
}
|
|
419
464
|
|
|
420
465
|
|
|
466
|
+
template<unsigned Length, typename Type>
|
|
467
|
+
inline CUDA_CALLABLE void adj_indexref(vec_t<Length, Type>* v, int idx,
|
|
468
|
+
vec_t<Length, Type>& adj_v, int adj_idx, const Type& adj_value)
|
|
469
|
+
{
|
|
470
|
+
// nop
|
|
471
|
+
}
|
|
421
472
|
|
|
422
473
|
|
|
423
474
|
template<unsigned Length, typename Type>
|
|
@@ -645,7 +696,7 @@ inline CUDA_CALLABLE void adj_vec_t(const vec_t<Length, OtherType>& other, vec_t
|
|
|
645
696
|
{
|
|
646
697
|
for( unsigned i=0; i < Length; ++i )
|
|
647
698
|
{
|
|
648
|
-
adj_other[i] += adj_ret[i];
|
|
699
|
+
adj_other[i] += static_cast<OtherType>(adj_ret[i]);
|
|
649
700
|
}
|
|
650
701
|
}
|
|
651
702
|
|
|
@@ -715,9 +766,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
|
|
|
715
766
|
}
|
|
716
767
|
|
|
717
768
|
template<unsigned Length, typename Type>
|
|
718
|
-
inline CUDA_CALLABLE void
|
|
769
|
+
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
770
|
+
{
|
|
771
|
+
|
|
772
|
+
adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
|
|
773
|
+
|
|
774
|
+
for( unsigned i=0; i < Length; ++i )
|
|
775
|
+
{
|
|
776
|
+
adj_a[i] += s / adj_ret[i];
|
|
777
|
+
}
|
|
778
|
+
|
|
779
|
+
#if FP_CHECK
|
|
780
|
+
if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
|
|
781
|
+
{
|
|
782
|
+
// \TODO: How shall we implement this error message?
|
|
783
|
+
// printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
|
|
784
|
+
assert(0);
|
|
785
|
+
}
|
|
786
|
+
#endif
|
|
787
|
+
}
|
|
788
|
+
|
|
789
|
+
template<unsigned Length, typename Type>
|
|
790
|
+
inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
|
|
719
791
|
adj_a += cw_div(adj_ret, b);
|
|
720
|
-
adj_b -= cw_mul(adj_ret, cw_div(
|
|
792
|
+
adj_b -= cw_mul(adj_ret, cw_div(ret, b));
|
|
721
793
|
}
|
|
722
794
|
|
|
723
795
|
template<unsigned Length, typename Type>
|
|
@@ -816,7 +888,7 @@ inline CUDA_CALLABLE void adj_dot(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
|
|
|
816
888
|
|
|
817
889
|
|
|
818
890
|
template<unsigned Length, typename Type>
|
|
819
|
-
inline CUDA_CALLABLE void
|
|
891
|
+
inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, vec_t<Length, Type> & adj_a, int & adj_idx, Type & adj_ret)
|
|
820
892
|
{
|
|
821
893
|
#ifndef NDEBUG
|
|
822
894
|
if (idx < 0 || idx > Length)
|
|
@@ -830,9 +902,12 @@ inline CUDA_CALLABLE void adj_index(const vec_t<Length, Type> & a, int idx, vec_
|
|
|
830
902
|
}
|
|
831
903
|
|
|
832
904
|
template<unsigned Length, typename Type>
|
|
833
|
-
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
905
|
+
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
834
906
|
{
|
|
835
|
-
|
|
907
|
+
if (ret > Type(kEps))
|
|
908
|
+
{
|
|
909
|
+
adj_a += div(a, ret) * adj_ret;
|
|
910
|
+
}
|
|
836
911
|
|
|
837
912
|
#if FP_CHECK
|
|
838
913
|
if (!isfinite(adj_a))
|
|
@@ -860,7 +935,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
860
935
|
}
|
|
861
936
|
|
|
862
937
|
template<unsigned Length, typename Type>
|
|
863
|
-
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
938
|
+
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
864
939
|
{
|
|
865
940
|
Type d = length(a);
|
|
866
941
|
|
|
@@ -868,9 +943,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
868
943
|
{
|
|
869
944
|
Type invd = Type(1.0f)/d;
|
|
870
945
|
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
|
|
946
|
+
adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
|
|
874
947
|
|
|
875
948
|
#if FP_CHECK
|
|
876
949
|
if (!isfinite(adj_a))
|
|
@@ -931,8 +1004,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
|
|
|
931
1004
|
|
|
932
1005
|
// Do I need to specialize these for different lengths?
|
|
933
1006
|
template<unsigned Length, typename Type>
|
|
934
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
935
|
-
|
|
1007
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1008
|
+
{
|
|
936
1009
|
vec_t<Length, Type> ret;
|
|
937
1010
|
for( unsigned i=0; i < Length; ++i )
|
|
938
1011
|
{
|
|
@@ -943,8 +1016,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
|
|
|
943
1016
|
}
|
|
944
1017
|
|
|
945
1018
|
template<unsigned Length, typename Type>
|
|
946
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
947
|
-
|
|
1019
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1020
|
+
{
|
|
948
1021
|
vec_t<Length, Type> ret;
|
|
949
1022
|
for( unsigned i=0; i < Length; ++i )
|
|
950
1023
|
{
|
|
@@ -955,8 +1028,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
|
|
|
955
1028
|
}
|
|
956
1029
|
|
|
957
1030
|
template<unsigned Length, typename Type>
|
|
958
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
959
|
-
|
|
1031
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1032
|
+
{
|
|
960
1033
|
vec_t<Length, Type> ret;
|
|
961
1034
|
for( unsigned i=0; i < Length; ++i )
|
|
962
1035
|
{
|
|
@@ -966,6 +1039,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
966
1039
|
return ret;
|
|
967
1040
|
}
|
|
968
1041
|
|
|
1042
|
+
template<unsigned Length, typename Type>
|
|
1043
|
+
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1044
|
+
vec_t<Length,Type> *addr,
|
|
1045
|
+
vec_t<Length,Type> *adj_addr,
|
|
1046
|
+
const vec_t<Length,Type> &value,
|
|
1047
|
+
vec_t<Length,Type> &adj_value)
|
|
1048
|
+
{
|
|
1049
|
+
for (unsigned i=0; i < Length; ++i)
|
|
1050
|
+
adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
|
|
1051
|
+
}
|
|
1052
|
+
|
|
969
1053
|
// ok, the original implementation of this didn't take the absolute values.
|
|
970
1054
|
// I wouldn't consider this expected behavior. It looks like it's only
|
|
971
1055
|
// being used for bounding boxes at the moment, where this doesn't matter,
|
warp/native/volume.h
CHANGED
|
@@ -232,6 +232,126 @@ CUDA_CALLABLE inline void adj_volume_sample_i(uint64_t id, vec3 uvw, uint64_t& a
|
|
|
232
232
|
// NOP
|
|
233
233
|
}
|
|
234
234
|
|
|
235
|
+
// Sampling the volume at the given index-space coordinates, uvw can be fractional
|
|
236
|
+
CUDA_CALLABLE inline float volume_sample_grad_f(uint64_t id, vec3 uvw, int sampling_mode, vec3& grad)
|
|
237
|
+
{
|
|
238
|
+
const pnanovdb_buf_t buf = volume::id_to_buffer(id);
|
|
239
|
+
const pnanovdb_root_handle_t root = volume::get_root(buf);
|
|
240
|
+
const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
|
|
241
|
+
|
|
242
|
+
if (sampling_mode == volume::CLOSEST)
|
|
243
|
+
{
|
|
244
|
+
const pnanovdb_coord_t ijk = pnanovdb_vec3_round_to_coord(uvw_pnano);
|
|
245
|
+
float val;
|
|
246
|
+
pnano_read(val, buf, root, PNANOVDB_REF(ijk));
|
|
247
|
+
grad = vec3(0.0f, 0.0f, 0.0f);
|
|
248
|
+
return val;
|
|
249
|
+
}
|
|
250
|
+
else if (sampling_mode == volume::LINEAR)
|
|
251
|
+
{
|
|
252
|
+
// NB. linear sampling is not used on int volumes
|
|
253
|
+
constexpr pnanovdb_coord_t OFFSETS[] = {
|
|
254
|
+
{ 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
|
|
258
|
+
const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
|
|
259
|
+
const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
|
|
260
|
+
|
|
261
|
+
pnanovdb_readaccessor_t accessor;
|
|
262
|
+
pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
|
|
263
|
+
float val = 0.0f;
|
|
264
|
+
const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
|
|
265
|
+
const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
|
|
266
|
+
const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
|
|
267
|
+
|
|
268
|
+
const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
|
269
|
+
const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
|
|
270
|
+
const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
|
|
271
|
+
|
|
272
|
+
float dfdx = 0.0f;
|
|
273
|
+
float dfdy = 0.0f;
|
|
274
|
+
float dfdz = 0.0f;
|
|
275
|
+
for (int idx = 0; idx < 8; ++idx)
|
|
276
|
+
{
|
|
277
|
+
const pnanovdb_coord_t& offs = OFFSETS[idx];
|
|
278
|
+
const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
|
|
279
|
+
float v;
|
|
280
|
+
pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
|
|
281
|
+
val = add(val, wx[offs.x] * wy[offs.y] * wz[offs.z] * v);
|
|
282
|
+
dfdx = add(dfdx, wy[offs.y] * wz[offs.z] * sign_dx[idx] * v);
|
|
283
|
+
dfdy = add(dfdy, wx[offs.x] * wz[offs.z] * sign_dy[idx] * v);
|
|
284
|
+
dfdz = add(dfdz, wx[offs.x] * wy[offs.y] * sign_dz[idx] * v);
|
|
285
|
+
}
|
|
286
|
+
grad = vec3(dfdx, dfdy, dfdz);
|
|
287
|
+
return val;
|
|
288
|
+
}
|
|
289
|
+
return 0.0f;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
CUDA_CALLABLE inline void adj_volume_sample_grad_f(
|
|
293
|
+
uint64_t id, vec3 uvw, int sampling_mode, vec3& grad, uint64_t& adj_id, vec3& adj_uvw, int& adj_sampling_mode, vec3& adj_grad, const float& adj_ret)
|
|
294
|
+
{
|
|
295
|
+
if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return;
|
|
296
|
+
|
|
297
|
+
if (sampling_mode != volume::LINEAR) {
|
|
298
|
+
return; // NOP
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
const pnanovdb_buf_t buf = volume::id_to_buffer(id);
|
|
302
|
+
const pnanovdb_root_handle_t root = volume::get_root(buf);
|
|
303
|
+
const pnanovdb_vec3_t uvw_pnano{ uvw[0], uvw[1], uvw[2] };
|
|
304
|
+
|
|
305
|
+
constexpr pnanovdb_coord_t OFFSETS[] = {
|
|
306
|
+
{ 0, 0, 0 }, { 0, 0, 1 }, { 0, 1, 0 }, { 0, 1, 1 }, { 1, 0, 0 }, { 1, 0, 1 }, { 1, 1, 0 }, { 1, 1, 1 },
|
|
307
|
+
};
|
|
308
|
+
|
|
309
|
+
const pnanovdb_vec3_t ijk_base{ floorf(uvw_pnano.x), floorf(uvw_pnano.y), floorf(uvw_pnano.z) };
|
|
310
|
+
const pnanovdb_vec3_t ijk_frac{ uvw_pnano.x - ijk_base.x, uvw_pnano.y - ijk_base.y, uvw_pnano.z - ijk_base.z };
|
|
311
|
+
const pnanovdb_coord_t ijk{ (pnanovdb_int32_t)ijk_base.x, (pnanovdb_int32_t)ijk_base.y, (pnanovdb_int32_t)ijk_base.z };
|
|
312
|
+
|
|
313
|
+
pnanovdb_readaccessor_t accessor;
|
|
314
|
+
pnanovdb_readaccessor_init(PNANOVDB_REF(accessor), root);
|
|
315
|
+
const float wx[2]{ 1 - ijk_frac.x, ijk_frac.x };
|
|
316
|
+
const float wy[2]{ 1 - ijk_frac.y, ijk_frac.y };
|
|
317
|
+
const float wz[2]{ 1 - ijk_frac.z, ijk_frac.z };
|
|
318
|
+
const float sign_dx[8] = {-1.0f, -1.0f, -1.0f, -1.0f, 1.0f, 1.0f, 1.0f, 1.0f};
|
|
319
|
+
const float sign_dy[8] = {-1.0f, -1.0f, 1.0f, 1.0f, -1.0f, -1.0f, 1.0f, 1.0f};
|
|
320
|
+
const float sign_dz[8] = {-1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f, -1.0f, 1.0f};
|
|
321
|
+
|
|
322
|
+
float dfdxdy = 0.0f;
|
|
323
|
+
float dfdxdz = 0.0f;
|
|
324
|
+
float dfdydx = 0.0f;
|
|
325
|
+
float dfdydz = 0.0f;
|
|
326
|
+
float dfdzdx = 0.0f;
|
|
327
|
+
float dfdzdy = 0.0f;
|
|
328
|
+
vec3 dphi(0,0,0);
|
|
329
|
+
for (int idx = 0; idx < 8; ++idx)
|
|
330
|
+
{
|
|
331
|
+
const pnanovdb_coord_t& offs = OFFSETS[idx];
|
|
332
|
+
const pnanovdb_coord_t ijk_shifted = pnanovdb_coord_add(ijk, offs);
|
|
333
|
+
float v;
|
|
334
|
+
pnano_read(v, buf, PNANOVDB_REF(accessor), PNANOVDB_REF(ijk_shifted));
|
|
335
|
+
const vec3 signs(offs.x * 2 - 1, offs.y * 2 - 1, offs.z * 2 - 1);
|
|
336
|
+
const vec3 grad_w(signs[0] * wy[offs.y] * wz[offs.z], signs[1] * wx[offs.x] * wz[offs.z], signs[2] * wx[offs.x] * wy[offs.y]);
|
|
337
|
+
dphi = add(dphi, mul(v, grad_w));
|
|
338
|
+
|
|
339
|
+
dfdxdy = add(dfdxdy, signs[1] * wz[offs.z] * sign_dx[idx] * v);
|
|
340
|
+
dfdxdz = add(dfdxdz, wy[offs.y] * signs[2] * sign_dx[idx] * v);
|
|
341
|
+
|
|
342
|
+
dfdydx = add(dfdydx, signs[0] * wz[offs.z] * sign_dy[idx] * v);
|
|
343
|
+
dfdydz = add(dfdydz, wx[offs.x] * signs[2] * sign_dy[idx] * v);
|
|
344
|
+
|
|
345
|
+
dfdzdx = add(dfdzdx, signs[0] * wy[offs.y] * sign_dz[idx] * v);
|
|
346
|
+
dfdzdy = add(dfdzdy, wx[offs.x] * signs[1] * sign_dz[idx] * v);
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
adj_uvw += mul(dphi, adj_ret);
|
|
350
|
+
adj_uvw[0] += adj_grad[1] * dfdydx + adj_grad[2] * dfdzdx;
|
|
351
|
+
adj_uvw[1] += adj_grad[0] * dfdxdy + adj_grad[2] * dfdzdy;
|
|
352
|
+
adj_uvw[2] += adj_grad[0] * dfdxdz + adj_grad[1] * dfdydz;
|
|
353
|
+
}
|
|
354
|
+
|
|
235
355
|
CUDA_CALLABLE inline float volume_lookup_f(uint64_t id, int32_t i, int32_t j, int32_t k)
|
|
236
356
|
{
|
|
237
357
|
if (volume::get_grid_type(volume::id_to_buffer(id)) != PNANOVDB_GRID_TYPE_FLOAT) return 0.f;
|