warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +15 -7
- warp/__init__.pyi +1 -0
- warp/bin/warp-clang.dll +0 -0
- warp/bin/warp.dll +0 -0
- warp/build.py +22 -443
- warp/build_dll.py +384 -0
- warp/builtins.py +998 -488
- warp/codegen.py +1307 -739
- warp/config.py +5 -3
- warp/constants.py +6 -0
- warp/context.py +1291 -548
- warp/dlpack.py +31 -31
- warp/fabric.py +326 -0
- warp/fem/__init__.py +27 -0
- warp/fem/cache.py +389 -0
- warp/fem/dirichlet.py +181 -0
- warp/fem/domain.py +263 -0
- warp/fem/field/__init__.py +101 -0
- warp/fem/field/field.py +149 -0
- warp/fem/field/nodal_field.py +299 -0
- warp/fem/field/restriction.py +21 -0
- warp/fem/field/test.py +181 -0
- warp/fem/field/trial.py +183 -0
- warp/fem/geometry/__init__.py +19 -0
- warp/fem/geometry/closest_point.py +70 -0
- warp/fem/geometry/deformed_geometry.py +271 -0
- warp/fem/geometry/element.py +744 -0
- warp/fem/geometry/geometry.py +186 -0
- warp/fem/geometry/grid_2d.py +373 -0
- warp/fem/geometry/grid_3d.py +435 -0
- warp/fem/geometry/hexmesh.py +953 -0
- warp/fem/geometry/partition.py +376 -0
- warp/fem/geometry/quadmesh_2d.py +532 -0
- warp/fem/geometry/tetmesh.py +840 -0
- warp/fem/geometry/trimesh_2d.py +577 -0
- warp/fem/integrate.py +1616 -0
- warp/fem/operator.py +191 -0
- warp/fem/polynomial.py +213 -0
- warp/fem/quadrature/__init__.py +2 -0
- warp/fem/quadrature/pic_quadrature.py +245 -0
- warp/fem/quadrature/quadrature.py +294 -0
- warp/fem/space/__init__.py +292 -0
- warp/fem/space/basis_space.py +489 -0
- warp/fem/space/collocated_function_space.py +105 -0
- warp/fem/space/dof_mapper.py +236 -0
- warp/fem/space/function_space.py +145 -0
- warp/fem/space/grid_2d_function_space.py +267 -0
- warp/fem/space/grid_3d_function_space.py +306 -0
- warp/fem/space/hexmesh_function_space.py +352 -0
- warp/fem/space/partition.py +350 -0
- warp/fem/space/quadmesh_2d_function_space.py +369 -0
- warp/fem/space/restriction.py +160 -0
- warp/fem/space/shape/__init__.py +15 -0
- warp/fem/space/shape/cube_shape_function.py +738 -0
- warp/fem/space/shape/shape_function.py +103 -0
- warp/fem/space/shape/square_shape_function.py +611 -0
- warp/fem/space/shape/tet_shape_function.py +567 -0
- warp/fem/space/shape/triangle_shape_function.py +429 -0
- warp/fem/space/tetmesh_function_space.py +292 -0
- warp/fem/space/topology.py +295 -0
- warp/fem/space/trimesh_2d_function_space.py +221 -0
- warp/fem/types.py +77 -0
- warp/fem/utils.py +495 -0
- warp/native/array.h +164 -55
- warp/native/builtin.h +150 -174
- warp/native/bvh.cpp +75 -328
- warp/native/bvh.cu +406 -23
- warp/native/bvh.h +37 -45
- warp/native/clang/clang.cpp +136 -24
- warp/native/crt.cpp +1 -76
- warp/native/crt.h +111 -104
- warp/native/cuda_crt.h +1049 -0
- warp/native/cuda_util.cpp +15 -3
- warp/native/cuda_util.h +3 -1
- warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
- warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
- warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
- warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
- warp/native/cutlass/tools/library/scripts/library.py +799 -0
- warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
- warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
- warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
- warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
- warp/native/cutlass/tools/library/scripts/rt.py +796 -0
- warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
- warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
- warp/native/cutlass_gemm.cu +5 -3
- warp/native/exports.h +1240 -949
- warp/native/fabric.h +228 -0
- warp/native/hashgrid.cpp +4 -4
- warp/native/hashgrid.h +22 -2
- warp/native/initializer_array.h +2 -2
- warp/native/intersect.h +22 -7
- warp/native/intersect_adj.h +8 -8
- warp/native/intersect_tri.h +13 -16
- warp/native/marching.cu +157 -161
- warp/native/mat.h +119 -19
- warp/native/matnn.h +2 -2
- warp/native/mesh.cpp +108 -83
- warp/native/mesh.cu +243 -6
- warp/native/mesh.h +1547 -458
- warp/native/nanovdb/NanoVDB.h +1 -1
- warp/native/noise.h +272 -329
- warp/native/quat.h +51 -8
- warp/native/rand.h +45 -35
- warp/native/range.h +6 -2
- warp/native/reduce.cpp +157 -0
- warp/native/reduce.cu +348 -0
- warp/native/runlength_encode.cpp +62 -0
- warp/native/runlength_encode.cu +46 -0
- warp/native/scan.cu +11 -13
- warp/native/scan.h +1 -0
- warp/native/solid_angle.h +442 -0
- warp/native/sort.cpp +13 -0
- warp/native/sort.cu +9 -1
- warp/native/sparse.cpp +338 -0
- warp/native/sparse.cu +545 -0
- warp/native/spatial.h +2 -2
- warp/native/temp_buffer.h +30 -0
- warp/native/vec.h +126 -24
- warp/native/volume.h +120 -0
- warp/native/warp.cpp +658 -53
- warp/native/warp.cu +660 -68
- warp/native/warp.h +112 -12
- warp/optim/__init__.py +1 -0
- warp/optim/linear.py +922 -0
- warp/optim/sgd.py +92 -0
- warp/render/render_opengl.py +392 -152
- warp/render/render_usd.py +11 -11
- warp/sim/__init__.py +2 -2
- warp/sim/articulation.py +385 -185
- warp/sim/collide.py +21 -8
- warp/sim/import_mjcf.py +297 -106
- warp/sim/import_urdf.py +389 -210
- warp/sim/import_usd.py +198 -97
- warp/sim/inertia.py +17 -18
- warp/sim/integrator_euler.py +14 -8
- warp/sim/integrator_xpbd.py +161 -19
- warp/sim/model.py +795 -291
- warp/sim/optimizer.py +2 -6
- warp/sim/render.py +65 -3
- warp/sim/utils.py +3 -0
- warp/sparse.py +1227 -0
- warp/stubs.py +665 -223
- warp/tape.py +66 -15
- warp/tests/__main__.py +3 -6
- warp/tests/assets/curlnoise_golden.npy +0 -0
- warp/tests/assets/pnoise_golden.npy +0 -0
- warp/tests/assets/torus.usda +105 -105
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/disabled_kinematics.py +239 -0
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +103 -106
- warp/tests/test_arithmetic.py +128 -74
- warp/tests/test_array.py +1497 -211
- warp/tests/test_array_reduce.py +150 -0
- warp/tests/test_atomic.py +64 -28
- warp/tests/test_bool.py +99 -0
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +75 -43
- warp/tests/test_closest_point_edge_edge.py +54 -57
- warp/tests/test_codegen.py +233 -128
- warp/tests/test_compile_consts.py +28 -20
- warp/tests/test_conditional.py +108 -24
- warp/tests/test_copy.py +10 -12
- warp/tests/test_ctypes.py +112 -88
- warp/tests/test_dense.py +21 -14
- warp/tests/test_devices.py +98 -0
- warp/tests/test_dlpack.py +136 -108
- warp/tests/test_examples.py +277 -0
- warp/tests/test_fabricarray.py +955 -0
- warp/tests/test_fast_math.py +15 -11
- warp/tests/test_fem.py +1271 -0
- warp/tests/test_fp16.py +53 -19
- warp/tests/test_func.py +187 -74
- warp/tests/test_generics.py +194 -49
- warp/tests/test_grad.py +180 -116
- warp/tests/test_grad_customs.py +176 -0
- warp/tests/test_hash_grid.py +52 -37
- warp/tests/test_import.py +10 -23
- warp/tests/test_indexedarray.py +577 -24
- warp/tests/test_intersect.py +18 -9
- warp/tests/test_large.py +141 -0
- warp/tests/test_launch.py +251 -15
- warp/tests/test_lerp.py +64 -65
- warp/tests/test_linear_solvers.py +154 -0
- warp/tests/test_lvalue.py +493 -0
- warp/tests/test_marching_cubes.py +12 -13
- warp/tests/test_mat.py +508 -2778
- warp/tests/test_mat_lite.py +115 -0
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +103 -9
- warp/tests/test_matmul.py +305 -69
- warp/tests/test_matmul_lite.py +410 -0
- warp/tests/test_mesh.py +71 -14
- warp/tests/test_mesh_query_aabb.py +41 -25
- warp/tests/test_mesh_query_point.py +325 -34
- warp/tests/test_mesh_query_ray.py +39 -22
- warp/tests/test_mlp.py +30 -22
- warp/tests/test_model.py +92 -89
- warp/tests/test_modules_lite.py +39 -0
- warp/tests/test_multigpu.py +88 -114
- warp/tests/test_noise.py +12 -11
- warp/tests/test_operators.py +16 -20
- warp/tests/test_options.py +11 -11
- warp/tests/test_pinned.py +17 -18
- warp/tests/test_print.py +32 -11
- warp/tests/test_quat.py +275 -129
- warp/tests/test_rand.py +18 -16
- warp/tests/test_reload.py +38 -34
- warp/tests/test_rounding.py +50 -43
- warp/tests/test_runlength_encode.py +190 -0
- warp/tests/test_smoothstep.py +9 -11
- warp/tests/test_snippet.py +143 -0
- warp/tests/test_sparse.py +460 -0
- warp/tests/test_spatial.py +276 -243
- warp/tests/test_streams.py +110 -85
- warp/tests/test_struct.py +331 -85
- warp/tests/test_tape.py +39 -21
- warp/tests/test_torch.py +118 -89
- warp/tests/test_transient_module.py +12 -13
- warp/tests/test_types.py +614 -0
- warp/tests/test_utils.py +494 -0
- warp/tests/test_vec.py +354 -1987
- warp/tests/test_vec_lite.py +73 -0
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +457 -293
- warp/tests/test_volume_write.py +124 -134
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +341 -0
- warp/tests/unittest_utils.py +568 -0
- warp/tests/unused_test_misc.py +71 -0
- warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
- warp/thirdparty/appdirs.py +36 -45
- warp/thirdparty/unittest_parallel.py +549 -0
- warp/torch.py +72 -30
- warp/types.py +1744 -713
- warp/utils.py +360 -350
- warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
- warp_lang-0.11.0.dist-info/METADATA +238 -0
- warp_lang-0.11.0.dist-info/RECORD +332 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
- warp/bin/warp-clang.exp +0 -0
- warp/bin/warp-clang.lib +0 -0
- warp/bin/warp.exp +0 -0
- warp/bin/warp.lib +0 -0
- warp/tests/test_all.py +0 -215
- warp/tests/test_array_scan.py +0 -60
- warp/tests/test_base.py +0 -208
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- warp_lang-0.9.0.dist-info/METADATA +0 -20
- warp_lang-0.9.0.dist-info/RECORD +0 -177
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/fabric.h
ADDED
|
@@ -0,0 +1,228 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include "builtin.h"
|
|
4
|
+
|
|
5
|
+
namespace wp
|
|
6
|
+
{
|
|
7
|
+
|
|
8
|
+
struct fabricbucket_t
|
|
9
|
+
{
|
|
10
|
+
size_t index_start;
|
|
11
|
+
size_t index_end;
|
|
12
|
+
void* ptr;
|
|
13
|
+
size_t* lengths;
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
template <typename T>
|
|
18
|
+
struct fabricarray_t
|
|
19
|
+
{
|
|
20
|
+
CUDA_CALLABLE inline fabricarray_t()
|
|
21
|
+
: nbuckets(0),
|
|
22
|
+
size(0)
|
|
23
|
+
{}
|
|
24
|
+
|
|
25
|
+
CUDA_CALLABLE inline bool empty() const { return !size; }
|
|
26
|
+
|
|
27
|
+
fabricbucket_t* buckets; // array of fabricbucket_t on the correct device
|
|
28
|
+
|
|
29
|
+
size_t nbuckets;
|
|
30
|
+
size_t size;
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
template <typename T>
|
|
35
|
+
struct indexedfabricarray_t
|
|
36
|
+
{
|
|
37
|
+
CUDA_CALLABLE inline indexedfabricarray_t()
|
|
38
|
+
: indices(),
|
|
39
|
+
size(0)
|
|
40
|
+
{}
|
|
41
|
+
|
|
42
|
+
CUDA_CALLABLE inline bool empty() const { return !size; }
|
|
43
|
+
|
|
44
|
+
fabricarray_t<T> fa;
|
|
45
|
+
|
|
46
|
+
// TODO: we use 32-bit indices for consistency with other Warp indexed arrays,
|
|
47
|
+
// but Fabric uses 64-bit indexing.
|
|
48
|
+
int* indices;
|
|
49
|
+
size_t size;
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
#ifndef FABRICARRAY_USE_BINARY_SEARCH
|
|
54
|
+
#define FABRICARRAY_USE_BINARY_SEARCH 1
|
|
55
|
+
#endif
|
|
56
|
+
|
|
57
|
+
template <typename T>
|
|
58
|
+
CUDA_CALLABLE inline const fabricbucket_t* fabricarray_find_bucket(const fabricarray_t<T>& fa, size_t i)
|
|
59
|
+
{
|
|
60
|
+
#if FABRICARRAY_USE_BINARY_SEARCH
|
|
61
|
+
// use binary search to find the right bucket
|
|
62
|
+
const fabricbucket_t* bucket = nullptr;
|
|
63
|
+
size_t lo = 0;
|
|
64
|
+
size_t hi = fa.nbuckets - 1;
|
|
65
|
+
while (hi >= lo)
|
|
66
|
+
{
|
|
67
|
+
size_t mid = (lo + hi) >> 1;
|
|
68
|
+
bucket = fa.buckets + mid;
|
|
69
|
+
if (i >= bucket->index_end)
|
|
70
|
+
lo = mid + 1;
|
|
71
|
+
else if (i < bucket->index_start)
|
|
72
|
+
hi = mid - 1;
|
|
73
|
+
else
|
|
74
|
+
return bucket;
|
|
75
|
+
}
|
|
76
|
+
return nullptr;
|
|
77
|
+
#else
|
|
78
|
+
// use linear search to find the right bucket
|
|
79
|
+
const fabricbucket_t* bucket = fa.buckets;
|
|
80
|
+
const fabricbucket_t* bucket_end = bucket + fa.nbuckets;
|
|
81
|
+
for (; bucket < bucket_end; ++bucket)
|
|
82
|
+
{
|
|
83
|
+
if (i < bucket->index_end)
|
|
84
|
+
return bucket;
|
|
85
|
+
}
|
|
86
|
+
return nullptr;
|
|
87
|
+
#endif
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
// Compute the pointer to a fabricarray element at index i.
|
|
92
|
+
// This function is similar to wp::index(), but the array data type doesn't need to be known at compile time.
|
|
93
|
+
CUDA_CALLABLE inline void* fabricarray_element_ptr(const fabricarray_t<void>& fa, size_t i, size_t elem_size)
|
|
94
|
+
{
|
|
95
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
96
|
+
|
|
97
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
98
|
+
|
|
99
|
+
return (char*)bucket->ptr + index_in_bucket * elem_size;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
template <typename T>
|
|
104
|
+
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i)
|
|
105
|
+
{
|
|
106
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
107
|
+
assert(bucket && "Fabric array index out of range");
|
|
108
|
+
|
|
109
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
110
|
+
|
|
111
|
+
T& result = *((T*)bucket->ptr + index_in_bucket);
|
|
112
|
+
|
|
113
|
+
FP_VERIFY_FWD_1(result)
|
|
114
|
+
|
|
115
|
+
return result;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
// indexing for fabric array of arrays
|
|
120
|
+
template <typename T>
|
|
121
|
+
CUDA_CALLABLE inline T& index(const fabricarray_t<T>& fa, size_t i, size_t j)
|
|
122
|
+
{
|
|
123
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
124
|
+
assert(bucket && "Fabric array index out of range");
|
|
125
|
+
|
|
126
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
127
|
+
|
|
128
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
129
|
+
|
|
130
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
131
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
132
|
+
|
|
133
|
+
assert(j < length && "Fabric array inner index out of range");
|
|
134
|
+
|
|
135
|
+
T& result = *((T*)ptr + j);
|
|
136
|
+
|
|
137
|
+
FP_VERIFY_FWD_1(result)
|
|
138
|
+
|
|
139
|
+
return result;
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
template <typename T>
|
|
144
|
+
CUDA_CALLABLE inline array_t<T> view(fabricarray_t<T>& fa, size_t i)
|
|
145
|
+
{
|
|
146
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(fa, i);
|
|
147
|
+
assert(bucket && "Fabric array index out of range");
|
|
148
|
+
|
|
149
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
150
|
+
|
|
151
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
152
|
+
|
|
153
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
154
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
155
|
+
|
|
156
|
+
return array_t<T>((T*)ptr, int(length));
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
template <typename T>
|
|
161
|
+
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i)
|
|
162
|
+
{
|
|
163
|
+
// index lookup
|
|
164
|
+
assert(i < ifa.size);
|
|
165
|
+
i = size_t(ifa.indices[i]);
|
|
166
|
+
|
|
167
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
168
|
+
assert(bucket && "Fabric array index out of range");
|
|
169
|
+
|
|
170
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
171
|
+
|
|
172
|
+
T& result = *((T*)bucket->ptr + index_in_bucket);
|
|
173
|
+
|
|
174
|
+
FP_VERIFY_FWD_1(result)
|
|
175
|
+
|
|
176
|
+
return result;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
// indexing for fabric array of arrays
|
|
181
|
+
template <typename T>
|
|
182
|
+
CUDA_CALLABLE inline T& index(const indexedfabricarray_t<T>& ifa, size_t i, size_t j)
|
|
183
|
+
{
|
|
184
|
+
// index lookup
|
|
185
|
+
assert(i < ifa.size);
|
|
186
|
+
i = size_t(ifa.indices[i]);
|
|
187
|
+
|
|
188
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
189
|
+
assert(bucket && "Fabric array index out of range");
|
|
190
|
+
|
|
191
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
192
|
+
|
|
193
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
194
|
+
|
|
195
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
196
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
197
|
+
|
|
198
|
+
assert(j < length && "Fabric array inner index out of range");
|
|
199
|
+
|
|
200
|
+
T& result = *((T*)ptr + j);
|
|
201
|
+
|
|
202
|
+
FP_VERIFY_FWD_1(result)
|
|
203
|
+
|
|
204
|
+
return result;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
template <typename T>
|
|
209
|
+
CUDA_CALLABLE inline array_t<T> view(indexedfabricarray_t<T>& ifa, size_t i)
|
|
210
|
+
{
|
|
211
|
+
// index lookup
|
|
212
|
+
assert(i < ifa.size);
|
|
213
|
+
i = size_t(ifa.indices[i]);
|
|
214
|
+
|
|
215
|
+
const fabricbucket_t* bucket = fabricarray_find_bucket(ifa.fa, i);
|
|
216
|
+
assert(bucket && "Fabric array index out of range");
|
|
217
|
+
|
|
218
|
+
assert(bucket->lengths && "Missing inner array lengths");
|
|
219
|
+
|
|
220
|
+
size_t index_in_bucket = i - bucket->index_start;
|
|
221
|
+
|
|
222
|
+
void* ptr = *((void**)bucket->ptr + index_in_bucket);
|
|
223
|
+
size_t length = *((size_t*)bucket->lengths + index_in_bucket);
|
|
224
|
+
|
|
225
|
+
return array_t<T>((T*)ptr, int(length));
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
} // namespace wp
|
warp/native/hashgrid.cpp
CHANGED
|
@@ -93,8 +93,8 @@ void hash_grid_reserve_host(uint64_t id, int num_points)
|
|
|
93
93
|
free_host(grid->point_ids);
|
|
94
94
|
|
|
95
95
|
const int num_to_alloc = num_points*3/2;
|
|
96
|
-
grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for
|
|
97
|
-
grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for
|
|
96
|
+
grid->point_cells = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
97
|
+
grid->point_ids = (int*)alloc_host(2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
98
98
|
|
|
99
99
|
grid->max_points = num_to_alloc;
|
|
100
100
|
}
|
|
@@ -212,8 +212,8 @@ void hash_grid_reserve_device(uint64_t id, int num_points)
|
|
|
212
212
|
free_device(WP_CURRENT_CONTEXT, grid.point_ids);
|
|
213
213
|
|
|
214
214
|
const int num_to_alloc = num_points*3/2;
|
|
215
|
-
grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for
|
|
216
|
-
grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for
|
|
215
|
+
grid.point_cells = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
216
|
+
grid.point_ids = (int*)alloc_device(WP_CURRENT_CONTEXT, 2*num_to_alloc*sizeof(int)); // *2 for auxiliary radix buffers
|
|
217
217
|
grid.max_points = num_to_alloc;
|
|
218
218
|
|
|
219
219
|
// ensure we pre-size our sort routine to avoid
|
warp/native/hashgrid.h
CHANGED
|
@@ -75,8 +75,28 @@ CUDA_CALLABLE inline int hash_grid_index(const HashGrid& grid, const vec3& p)
|
|
|
75
75
|
// stores state required to traverse neighboring cells of a point
|
|
76
76
|
struct hash_grid_query_t
|
|
77
77
|
{
|
|
78
|
-
CUDA_CALLABLE hash_grid_query_t()
|
|
79
|
-
|
|
78
|
+
CUDA_CALLABLE hash_grid_query_t()
|
|
79
|
+
: x_start(0),
|
|
80
|
+
y_start(0),
|
|
81
|
+
z_start(0),
|
|
82
|
+
x_end(0),
|
|
83
|
+
y_end(0),
|
|
84
|
+
z_end(0),
|
|
85
|
+
x(0),
|
|
86
|
+
y(0),
|
|
87
|
+
z(0),
|
|
88
|
+
cell(0),
|
|
89
|
+
cell_index(0),
|
|
90
|
+
cell_end(0),
|
|
91
|
+
current(0),
|
|
92
|
+
grid()
|
|
93
|
+
{}
|
|
94
|
+
|
|
95
|
+
// Required for adjoint computations.
|
|
96
|
+
CUDA_CALLABLE inline hash_grid_query_t& operator+=(const hash_grid_query_t& other)
|
|
97
|
+
{
|
|
98
|
+
return *this;
|
|
99
|
+
}
|
|
80
100
|
|
|
81
101
|
int x_start;
|
|
82
102
|
int y_start;
|
warp/native/initializer_array.h
CHANGED
|
@@ -18,12 +18,12 @@ struct initializer_array
|
|
|
18
18
|
{
|
|
19
19
|
const Type storage[Length];
|
|
20
20
|
|
|
21
|
-
const Type operator[](unsigned i)
|
|
21
|
+
CUDA_CALLABLE const Type operator[](unsigned i)
|
|
22
22
|
{
|
|
23
23
|
return storage[i];
|
|
24
24
|
}
|
|
25
25
|
|
|
26
|
-
const Type operator[](unsigned i) const
|
|
26
|
+
CUDA_CALLABLE const Type operator[](unsigned i) const
|
|
27
27
|
{
|
|
28
28
|
return storage[i];
|
|
29
29
|
}
|
warp/native/intersect.h
CHANGED
|
@@ -114,6 +114,21 @@ CUDA_CALLABLE inline vec2 closest_point_to_triangle(const vec3& a, const vec3& b
|
|
|
114
114
|
return vec2(u, v);
|
|
115
115
|
}
|
|
116
116
|
|
|
117
|
+
CUDA_CALLABLE inline vec2 furthest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p)
|
|
118
|
+
{
|
|
119
|
+
vec3 pa = p-a;
|
|
120
|
+
vec3 pb = p-b;
|
|
121
|
+
vec3 pc = p-c;
|
|
122
|
+
float dist_a = dot(pa, pa);
|
|
123
|
+
float dist_b = dot(pb, pb);
|
|
124
|
+
float dist_c = dot(pc, pc);
|
|
125
|
+
|
|
126
|
+
if (dist_a > dist_b && dist_a > dist_c)
|
|
127
|
+
return vec2(1.0f, 0.0f); // a is furthest
|
|
128
|
+
if (dist_b > dist_c)
|
|
129
|
+
return vec2(0.0f, 1.0f); // b is furthest
|
|
130
|
+
return vec2(0.0f, 0.0f); // c is furthest
|
|
131
|
+
}
|
|
117
132
|
|
|
118
133
|
CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_dir, const vec3& lower, const vec3& upper, float& t)
|
|
119
134
|
{
|
|
@@ -357,8 +372,8 @@ CUDA_CALLABLE inline bool intersect_ray_tri_woop(const vec3& p, const vec3& dir,
|
|
|
357
372
|
}
|
|
358
373
|
|
|
359
374
|
CUDA_CALLABLE inline void adj_intersect_ray_tri_woop(
|
|
360
|
-
const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float
|
|
361
|
-
vec3& adj_p, vec3& adj_dir, vec3& adj_a, vec3& adj_b, vec3& adj_c, float& adj_t, float& adj_u, float& adj_v, float& adj_sign, vec3
|
|
375
|
+
const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float t, float u, float v, float sign, const vec3& normal,
|
|
376
|
+
vec3& adj_p, vec3& adj_dir, vec3& adj_a, vec3& adj_b, vec3& adj_c, float& adj_t, float& adj_u, float& adj_v, float& adj_sign, vec3& adj_normal, bool& adj_ret)
|
|
362
377
|
{
|
|
363
378
|
|
|
364
379
|
// todo: precompute for ray
|
|
@@ -854,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
854
869
|
wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
|
|
855
870
|
wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
|
|
856
871
|
wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
|
|
857
|
-
wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
|
|
872
|
+
wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
|
|
858
873
|
wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
|
|
859
874
|
wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
|
|
860
875
|
wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
|
|
@@ -866,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
866
881
|
}
|
|
867
882
|
wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
|
|
868
883
|
wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
|
|
869
|
-
wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
|
|
884
|
+
wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
|
|
870
885
|
wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
|
|
871
886
|
wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
|
|
872
887
|
wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
|
|
@@ -881,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
881
896
|
wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
|
|
882
897
|
wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
|
|
883
898
|
}
|
|
884
|
-
wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
|
|
899
|
+
wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
|
|
885
900
|
wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
|
|
886
901
|
wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
|
|
887
902
|
wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
|
|
@@ -902,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
902
917
|
wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
|
|
903
918
|
wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
|
|
904
919
|
}
|
|
905
|
-
wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
|
|
920
|
+
wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
|
|
906
921
|
wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
|
|
907
922
|
wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
|
|
908
923
|
wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);
|
|
@@ -974,7 +989,7 @@ def closest_point_edge_edge(
|
|
|
974
989
|
else:
|
|
975
990
|
c = wp.dot(d1, r)
|
|
976
991
|
if e <= epsilon:
|
|
977
|
-
# second segment
|
|
992
|
+
# second segment generates into a point
|
|
978
993
|
s = wp.clamp(-c / a, 0.0, 1.0) # t = 0 => s = (b*t-c)/a = -c/a
|
|
979
994
|
t = float(0.0)
|
|
980
995
|
else:
|
warp/native/intersect_adj.h
CHANGED
|
@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
276
276
|
label1:;
|
|
277
277
|
adj_71 += adj_ret;
|
|
278
278
|
wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
|
|
279
|
-
wp::adj_length(var_69, adj_69, adj_70);
|
|
279
|
+
wp::adj_length(var_69, var_70, adj_69, adj_70);
|
|
280
280
|
wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
|
|
281
281
|
wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
|
|
282
282
|
wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
|
|
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
297
297
|
wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
|
|
298
298
|
if (var_51) {
|
|
299
299
|
wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
|
|
300
|
-
wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
|
|
300
|
+
wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
|
|
301
301
|
wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
|
|
302
302
|
}
|
|
303
303
|
}
|
|
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
305
305
|
wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
|
|
306
306
|
if (var_45) {
|
|
307
307
|
wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
|
|
308
|
-
wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
|
|
308
|
+
wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
|
|
309
309
|
wp::adj_neg(var_21, adj_21, adj_46);
|
|
310
310
|
}
|
|
311
|
-
wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
|
|
311
|
+
wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
|
|
312
312
|
wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
|
|
313
313
|
wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
|
|
314
314
|
wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
|
|
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
317
317
|
wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
|
|
318
318
|
if (var_34) {
|
|
319
319
|
wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
|
|
320
|
-
wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
|
|
320
|
+
wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
|
|
321
321
|
wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
|
|
322
322
|
wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
|
|
323
323
|
wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
|
|
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
332
332
|
if (var_22) {
|
|
333
333
|
wp::adj_cast_float(var_6, adj_6, adj_27);
|
|
334
334
|
wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
|
|
335
|
-
wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
|
|
335
|
+
wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
|
|
336
336
|
wp::adj_neg(var_21, adj_21, adj_23);
|
|
337
337
|
}
|
|
338
338
|
wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
|
|
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
341
341
|
wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
|
|
342
342
|
if (var_15) {
|
|
343
343
|
wp::adj_cast_float(var_17, adj_17, adj_18);
|
|
344
|
-
wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
|
|
344
|
+
wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
|
|
345
345
|
wp::adj_cast_float(var_6, adj_6, adj_16);
|
|
346
346
|
}
|
|
347
347
|
if (var_13) {
|
|
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
349
349
|
adj_14 += adj_ret;
|
|
350
350
|
wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
|
|
351
351
|
}
|
|
352
|
-
wp::adj_length(var_9, adj_9, adj_10);
|
|
352
|
+
wp::adj_length(var_9, var_10, adj_9, adj_10);
|
|
353
353
|
wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
|
|
354
354
|
wp::adj_cast_float(var_6, adj_6, adj_8);
|
|
355
355
|
wp::adj_cast_float(var_6, adj_6, adj_7);
|
warp/native/intersect_tri.h
CHANGED
|
@@ -32,9 +32,6 @@
|
|
|
32
32
|
OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
33
33
|
*/
|
|
34
34
|
|
|
35
|
-
|
|
36
|
-
#define FABS(x) (float(fabs(x))) /* implement as is fastest on your machine */
|
|
37
|
-
|
|
38
35
|
/* if USE_EPSILON_TEST is true then we do a check:
|
|
39
36
|
if |dv|<EPSILON then dv=0.0;
|
|
40
37
|
else no check is done (which is less robust)
|
|
@@ -106,7 +103,7 @@
|
|
|
106
103
|
#define POINT_IN_TRI(V0,U0,U1,U2) \
|
|
107
104
|
{ \
|
|
108
105
|
float a,b,c,d0,d1,d2; \
|
|
109
|
-
/* is T1
|
|
106
|
+
/* is T1 completely inside T2? */ \
|
|
110
107
|
/* check if V0 is inside tri(U0,U1,U2) */ \
|
|
111
108
|
a=U1[i1]-U0[i1]; \
|
|
112
109
|
b=-(U1[i0]-U0[i0]); \
|
|
@@ -135,9 +132,9 @@ CUDA_CALLABLE inline int coplanar_tri_tri(float N[3],float V0[3],float V1[3],flo
|
|
|
135
132
|
short i0,i1;
|
|
136
133
|
/* first project onto an axis-aligned plane, that maximizes the area */
|
|
137
134
|
/* of the triangles, compute indices: i0,i1. */
|
|
138
|
-
A[0]=
|
|
139
|
-
A[1]=
|
|
140
|
-
A[2]=
|
|
135
|
+
A[0]=fabsf(N[0]);
|
|
136
|
+
A[1]=fabsf(N[1]);
|
|
137
|
+
A[2]=fabsf(N[2]);
|
|
141
138
|
if(A[0]>A[1])
|
|
142
139
|
{
|
|
143
140
|
if(A[0]>A[2])
|
|
@@ -242,9 +239,9 @@ CUDA_CALLABLE inline int NoDivTriTriIsect(float V0[3],float V1[3],float V2[3],
|
|
|
242
239
|
|
|
243
240
|
/* coplanarity robustness check */
|
|
244
241
|
#if USE_EPSILON_TEST==TRUE
|
|
245
|
-
if(
|
|
246
|
-
if(
|
|
247
|
-
if(
|
|
242
|
+
if(fabsf(du0)<EPSILON) du0=0.0;
|
|
243
|
+
if(fabsf(du1)<EPSILON) du1=0.0;
|
|
244
|
+
if(fabsf(du2)<EPSILON) du2=0.0;
|
|
248
245
|
#endif
|
|
249
246
|
du0du1=du0*du1;
|
|
250
247
|
du0du2=du0*du2;
|
|
@@ -265,9 +262,9 @@ CUDA_CALLABLE inline int NoDivTriTriIsect(float V0[3],float V1[3],float V2[3],
|
|
|
265
262
|
dv2=DOT(N2,V2)+d2;
|
|
266
263
|
|
|
267
264
|
#if USE_EPSILON_TEST==TRUE
|
|
268
|
-
if(
|
|
269
|
-
if(
|
|
270
|
-
if(
|
|
265
|
+
if(fabsf(dv0)<EPSILON) dv0=0.0;
|
|
266
|
+
if(fabsf(dv1)<EPSILON) dv1=0.0;
|
|
267
|
+
if(fabsf(dv2)<EPSILON) dv2=0.0;
|
|
271
268
|
#endif
|
|
272
269
|
|
|
273
270
|
dv0dv1=dv0*dv1;
|
|
@@ -280,10 +277,10 @@ CUDA_CALLABLE inline int NoDivTriTriIsect(float V0[3],float V1[3],float V2[3],
|
|
|
280
277
|
CROSS(D,N1,N2);
|
|
281
278
|
|
|
282
279
|
/* compute and index to the largest component of D */
|
|
283
|
-
max=(
|
|
280
|
+
max=fabsf(D[0]);
|
|
284
281
|
index=0;
|
|
285
|
-
bb=(
|
|
286
|
-
cc=(
|
|
282
|
+
bb=fabsf(D[1]);
|
|
283
|
+
cc=fabsf(D[2]);
|
|
287
284
|
if(bb>max) max=bb,index=1;
|
|
288
285
|
if(cc>max) max=cc,index=2;
|
|
289
286
|
|