PyPI - warp-lang - Versions diffs - 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show

warp/__init__.py +15 -7
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +22 -443
warp/build_dll.py +384 -0
warp/builtins.py +998 -488
warp/codegen.py +1307 -739
warp/config.py +5 -3
warp/constants.py +6 -0
warp/context.py +1291 -548
warp/dlpack.py +31 -31
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +164 -55
warp/native/builtin.h +150 -174
warp/native/bvh.cpp +75 -328
warp/native/bvh.cu +406 -23
warp/native/bvh.h +37 -45
warp/native/clang/clang.cpp +136 -24
warp/native/crt.cpp +1 -76
warp/native/crt.h +111 -104
warp/native/cuda_crt.h +1049 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -949
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/initializer_array.h +2 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +13 -16
warp/native/marching.cu +157 -161
warp/native/mat.h +119 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +108 -83
warp/native/mesh.cu +243 -6
warp/native/mesh.h +1547 -458
warp/native/nanovdb/NanoVDB.h +1 -1
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +157 -0
warp/native/reduce.cu +348 -0
warp/native/runlength_encode.cpp +62 -0
warp/native/runlength_encode.cu +46 -0
warp/native/scan.cu +11 -13
warp/native/scan.h +1 -0
warp/native/solid_angle.h +442 -0
warp/native/sort.cpp +13 -0
warp/native/sort.cu +9 -1
warp/native/sparse.cpp +338 -0
warp/native/sparse.cu +545 -0
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +30 -0
warp/native/vec.h +126 -24
warp/native/volume.h +120 -0
warp/native/warp.cpp +658 -53
warp/native/warp.cu +660 -68
warp/native/warp.h +112 -12
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +392 -152
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +21 -8
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +161 -19
warp/sim/model.py +795 -291
warp/sim/optimizer.py +2 -6
warp/sim/render.py +65 -3
warp/sim/utils.py +3 -0
warp/sparse.py +1227 -0
warp/stubs.py +665 -223
warp/tape.py +66 -15
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/torus.usda +105 -105
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +1497 -211
warp/tests/test_array_reduce.py +150 -0
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +75 -43
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +233 -128
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +136 -108
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -74
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +180 -116
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +577 -24
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +251 -15
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +508 -2778
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +325 -34
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +190 -0
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +460 -0
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +331 -85
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -1987
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +72 -30
warp/types.py +1744 -713
warp/utils.py +360 -350
warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/bin/warp-clang.exp +0 -0
warp/bin/warp-clang.lib +0 -0
warp/bin/warp.exp +0 -0
warp/bin/warp.lib +0 -0
warp/tests/test_all.py +0 -215
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.9.0.dist-info/METADATA +0 -20
warp_lang-0.9.0.dist-info/RECORD +0 -177
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/native/mesh.cpp CHANGED Viewed

@@ -49,131 +49,156 @@ void mesh_rem_descriptor(uint64_t id)
 } // namespace wp
-uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris)
+void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
 {
-    Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
+    BVHPackedNodeHalf& lower = bvh.node_lowers[index];
+    BVHPackedNodeHalf& upper = bvh.node_uppers[index];
-    m->bounds = new bounds3[num_tris];
+    if (lower.b)
+    {
+        // Leaf, compute properties
+        const int leaf_index = lower.i;
-    for (int i=0; i < num_tris; ++i)
+        precompute_triangle_solid_angle_props(mesh.points[mesh.indices[leaf_index*3+0]], mesh.points[mesh.indices[leaf_index*3+1]], mesh.points[mesh.indices[leaf_index*3+2]], mesh.solid_angle_props[index]);
+        (vec3&)lower = mesh.solid_angle_props[index].box.lower;
+        (vec3&)upper = mesh.solid_angle_props[index].box.upper;
+    }
+    else
     {
-        m->bounds[i].add_point(points[indices[i*3+0]]);
-        m->bounds[i].add_point(points[indices[i*3+1]]);
-        m->bounds[i].add_point(points[indices[i*3+2]]);
+        int left_index = lower.i;
+        int right_index = upper.i;
+        bvh_refit_with_solid_angle_recursive_host(bvh, left_index, mesh);
+        bvh_refit_with_solid_angle_recursive_host(bvh, right_index, mesh);
+        // combine
+        SolidAngleProps* left_child_data = &mesh.solid_angle_props[left_index];
+        SolidAngleProps* right_child_data = (left_index != right_index) ? &mesh.solid_angle_props[right_index] : NULL;
+        combine_precomputed_solid_angle_props(mesh.solid_angle_props[index], left_child_data, right_child_data);
+        // compute union of children
+        const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
+        const vec3& left_upper = (vec3&)bvh.node_uppers[left_index];
+        const vec3& right_lower = (vec3&)bvh.node_lowers[right_index];
+        const vec3& right_upper = (vec3&)bvh.node_uppers[right_index];
+        // union of child bounds
+        vec3 new_lower = min(left_lower, right_lower);
+        vec3 new_upper = max(left_upper, right_upper);
+        // write new BVH nodes
+        (vec3&)lower = new_lower;
+        (vec3&)upper = new_upper;
     }
-    m->bvh = bvh_create(m->bounds, num_tris);
-    return (uint64_t)m;
 }
-uint64_t mesh_create_device(void* context, array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris)
+void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
 {
-    ContextGuard guard(context);
-    Mesh mesh(points, velocities, indices, num_points, num_tris);
-    mesh.context = context ? context : cuda_context_get_current();
+    bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
+}
-    // mesh.points = array_t<vec3>(points, num_points, points_grad);
-    // mesh.velocities = array_t<vec3>(velocities, num_points, velocities_grad);
-    // mesh.indices = array_t<int>(indices, num_tris, 3);
+uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
+{
+    Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
-    // mesh.num_points = num_points;
-    // mesh.num_tris = num_tris;
+    m->lowers = new vec3[num_tris];
+    m->uppers = new vec3[num_tris];
+    float sum = 0.0;
+    for (int i=0; i < num_tris; ++i)
     {
-        // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
-        vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
-        int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
-        bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
-        memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
-        memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
-        cuda_context_synchronize(WP_CURRENT_CONTEXT);
-        for (int i=0; i < num_tris; ++i)
-        {
-            bounds_host[i] = bounds3();
-            bounds_host[i].add_point(points_host[indices_host[i*3+0]]);
-            bounds_host[i].add_point(points_host[indices_host[i*3+1]]);
-            bounds_host[i].add_point(points_host[indices_host[i*3+2]]);
-        }
-        BVH bvh_host = bvh_create(bounds_host, num_tris);
-        BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
-        bvh_destroy_host(bvh_host);
-        // save gpu-side copy of bounds
-        mesh.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*num_tris);
-        memcpy_h2d(WP_CURRENT_CONTEXT, mesh.bounds, bounds_host, sizeof(bounds3)*num_tris);
-        free_host(points_host);
-        free_host(indices_host);
-        free_host(bounds_host);
-        mesh.bvh = bvh_device;
+        wp::vec3& p0 = points[indices[i*3+0]];
+        wp::vec3& p1 = points[indices[i*3+1]];
+        wp::vec3& p2 = points[indices[i*3+2]];
+        // compute triangle bounds
+        bounds3 b;
+        b.add_point(p0);
+        b.add_point(p1);
+        b.add_point(p2);
+        m->lowers[i] = b.lower;
+        m->uppers[i] = b.upper;
+        // compute edge lengths
+        sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
     }
+    m->average_edge_length = sum / (num_tris*3);
-    Mesh* mesh_device = (Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(Mesh));
-    memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(Mesh));
+    m->bvh = *(wp::BVH*)bvh_create_host(m->lowers, m->uppers, num_tris);
-    // save descriptor
-    uint64_t mesh_id = (uint64_t)mesh_device;
-    mesh_add_descriptor(mesh_id, mesh);
+    if (support_winding_number)
+    {
+        // Let's first compute the sold
+        int num_bvh_nodes = 2*num_tris-1;
+        m->solid_angle_props = new SolidAngleProps[num_bvh_nodes];
+        bvh_refit_with_solid_angle_host(m->bvh, *m);
+    }
-    return mesh_id;
+    return (uint64_t)m;
 }
 void mesh_destroy_host(uint64_t id)
 {
     Mesh* m = (Mesh*)(id);
-    delete[] m->bounds;
+    delete[] m->lowers;
+    delete[] m->uppers;
+    if (m->solid_angle_props) {
+        delete [] m->solid_angle_props;
+    }
     bvh_destroy_host(m->bvh);
     delete m;
 }
-void mesh_destroy_device(uint64_t id)
+void mesh_refit_host(uint64_t id)
 {
-    Mesh mesh;
-    if (mesh_get_descriptor(id, mesh))
+    Mesh* m = (Mesh*)(id);
+    float sum = 0.0;
+    for (int i=0; i < m->num_tris; ++i)
     {
-        ContextGuard guard(mesh.context);
+        wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
+        wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
+        wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
-        bvh_destroy_device(mesh.bvh);
+        // compute triangle bounds
+        bounds3 b;
+        b.add_point(p0);
+        b.add_point(p1);
+        b.add_point(p2);
-        free_device(WP_CURRENT_CONTEXT, mesh.bounds);
-        free_device(WP_CURRENT_CONTEXT, (Mesh*)id);
+        m->lowers[i] = b.lower;
+        m->uppers[i] = b.upper;
-        mesh_rem_descriptor(id);
+        sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
     }
-}
-void mesh_refit_host(uint64_t id)
-{
-    Mesh* m = (Mesh*)(id);
+    m->average_edge_length = sum / (m->num_tris*3);
-    for (int i=0; i < m->num_tris; ++i)
+    if (m->solid_angle_props)
     {
-        m->bounds[i] = bounds3();
-        m->bounds[i].add_point(m->points.data[m->indices.data[i*3+0]]);
-        m->bounds[i].add_point(m->points.data[m->indices.data[i*3+1]]);
-        m->bounds[i].add_point(m->points.data[m->indices.data[i*3+2]]);
+        // If solid angle were used, use refit solid angle
+        bvh_refit_with_solid_angle_host(m->bvh, *m);
+    }
+    else
+    {
+        bvh_refit_host(m->bvh);
     }
-    bvh_refit_host(m->bvh, m->bounds);
 }
 // stubs for non-CUDA platforms
 #if !WP_ENABLE_CUDA
-void mesh_refit_device(uint64_t id)
-{
-}
+WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
+WP_API void mesh_destroy_device(uint64_t id) {}
+WP_API void mesh_refit_device(uint64_t id) {}
 #endif // !WP_ENABLE_CUDA

warp/native/mesh.cu CHANGED Viewed

@@ -10,11 +10,12 @@
 #include "cuda_util.h"
 #include "mesh.h"
 #include "bvh.h"
+#include "scan.h"
 namespace wp
 {
-__global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, bounds3* b)
+__global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, vec3* lowers, vec3* uppers)
 {
     const int tid = blockIdx.x*blockDim.x + threadIdx.x;
@@ -32,25 +33,261 @@ __global__ void compute_triangle_bounds(int n, const vec3* points, const int* in
         vec3 lower = min(min(p, q), r);
         vec3 upper = max(max(p, q), r);
-        b[tid] = bounds3(lower, upper);
+        lowers[tid] = lower;
+        uppers[tid] = upper;
     }
 }
+__global__ void compute_mesh_edge_lengths(int n, const vec3* points, const int* indices, float* edge_lengths)
+{
+    const int tid = blockIdx.x*blockDim.x + threadIdx.x;
+    if (tid < n)
+    {
+        // if leaf then update bounds
+        int i = indices[tid*3+0];
+        int j = indices[tid*3+1];
+        int k = indices[tid*3+2];
+        vec3 p = points[i];
+        vec3 q = points[j];
+        vec3 r = points[k];
+        edge_lengths[tid] = length(p-q) + length(p-r) + length(q-r);
+    }
+}
+__global__ void compute_average_mesh_edge_length(int n, float* sum_edge_lengths, Mesh* m)
+{
+    m->average_edge_length = sum_edge_lengths[n - 1] / (3*n);
+}
+__global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers, const vec3* points, const int* indices, SolidAngleProps* solid_angle_props)
+{
+    int index = blockDim.x*blockIdx.x + threadIdx.x;
+    if (index < n)
+    {
+        bool leaf = lowers[index].b;
+        if (leaf)
+        {
+            // update the leaf node
+            const int leaf_index = lowers[index].i;
+            precompute_triangle_solid_angle_props(points[indices[leaf_index*3+0]], points[indices[leaf_index*3+1]], points[indices[leaf_index*3+2]], solid_angle_props[index]);
+            make_node(lowers+index, solid_angle_props[index].box.lower, leaf_index, true);
+            make_node(uppers+index, solid_angle_props[index].box.upper, 0, false);
+        }
+        else
+        {
+            // only keep leaf threads
+            return;
+        }
+        // update hierarchy
+        for (;;)
+        {
+            int parent = parents[index];
+            // reached root
+            if (parent == -1)
+                return;
+            // ensure all writes are visible
+            __threadfence();
+            int finished = atomicAdd(&child_count[parent], 1);
+            // if we have are the last thread (such that the parent node is now complete)
+            // then update its bounds and move onto the the next parent in the hierarchy
+            if (finished == 1)
+            {
+                //printf("Compute non-leaf at %d\n", index);
+                const int left_child = lowers[parent].i;
+                const int right_child = uppers[parent].i;
+                vec3 left_lower = vec3(lowers[left_child].x,
+                                       lowers[left_child].y,
+                                       lowers[left_child].z);
+                vec3 left_upper = vec3(uppers[left_child].x,
+                                       uppers[left_child].y,
+                                       uppers[left_child].z);
+                vec3 right_lower = vec3(lowers[right_child].x,
+                                       lowers[right_child].y,
+                                       lowers[right_child].z);
+                vec3 right_upper = vec3(uppers[right_child].x,
+                                       uppers[right_child].y,
+                                       uppers[right_child].z);
+                // union of child bounds
+                vec3 lower = min(left_lower, right_lower);
+                vec3 upper = max(left_upper, right_upper);
+                // write new BVH nodes
+                make_node(lowers+parent, lower, left_child, false);
+                make_node(uppers+parent, upper, right_child, false);
+                // combine
+                SolidAngleProps* left_child_data = &solid_angle_props[left_child];
+                SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
+                combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
+                // move onto processing the parent
+                index = parent;
+            }
+            else
+            {
+                // parent not ready (we are the first child), terminate thread
+                break;
+            }
+        }
+    }
+}
+void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
+{
+    ContextGuard guard(bvh.context);
+    // clear child counters
+    memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
+    wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_items, (bvh.num_items, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
+}
 } // namespace wp
-void mesh_refit_device(uint64_t id)
+uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
+{
+    ContextGuard guard(context);
+    wp::Mesh mesh(points, velocities, indices, num_points, num_tris);
+    mesh.context = context ? context : cuda_context_get_current();
+    {
+        // // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
+        // vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
+        // int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
+        // bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
+        // memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
+        // memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
+        // cuda_context_synchronize(WP_CURRENT_CONTEXT);
+        // float sum = 0.0;
+        // for (int i=0; i < num_tris; ++i)
+        // {
+        //     bounds_host[i] = bounds3();
+        //     wp::vec3 p0 = points_host[indices_host[i*3+0]];
+        //     wp::vec3 p1 = points_host[indices_host[i*3+1]];
+        //     wp::vec3 p2 = points_host[indices_host[i*3+2]];
+        //     bounds_host[i].add_point(p0);
+        //     bounds_host[i].add_point(p1);
+        //     bounds_host[i].add_point(p2);
+        //     sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
+        // }
+        // mesh.average_edge_length = sum / (num_tris*3);
+        // BVH bvh_host = bvh_create(bounds_host, num_tris);
+        // BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
+        // bvh_destroy_host(bvh_host);
+        // create lower upper arrays expected by GPU BVH builder
+        mesh.lowers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
+        mesh.uppers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
+        wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
+        uint64_t bvh_id = bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris);
+        wp::bvh_get_descriptor(bvh_id, mesh.bvh);
+        if (support_winding_number)
+        {
+            int num_bvh_nodes = 2*num_tris;
+            mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
+        }
+    }
+    wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
+    memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
+    // save descriptor
+    uint64_t mesh_id = (uint64_t)mesh_device;
+    mesh_add_descriptor(mesh_id, mesh);
+    if (support_winding_number)
+        mesh_refit_device(mesh_id);
+    return mesh_id;
+}
+void mesh_destroy_device(uint64_t id)
+{
+    wp::Mesh mesh;
+    if (wp::mesh_get_descriptor(id, mesh))
+    {
+        ContextGuard guard(mesh.context);
+        wp::bvh_destroy_device(mesh.bvh);
+        free_device(WP_CURRENT_CONTEXT, mesh.lowers);
+        free_device(WP_CURRENT_CONTEXT, mesh.uppers);
+        free_device(WP_CURRENT_CONTEXT, (wp::Mesh*)id);
+        if (mesh.solid_angle_props) {
+            free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
+        }
+        wp::mesh_rem_descriptor(id);
+    }
+}
+void mesh_update_stats(uint64_t id)
 {
+}
+void mesh_refit_device(uint64_t id)
+{
     // recompute triangle bounds
     wp::Mesh m;
     if (mesh_get_descriptor(id, m))
     {
         ContextGuard guard(m.context);
-        wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.bounds));
+        wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
-        bvh_refit_device(m.bvh, m.bounds);
-    }
+        if (m.solid_angle_props)
+        {
+            // we compute mesh the average edge length
+            // for use in mesh_query_point_sign_normal()
+            // since it relies on an epsilon for welding
+            // reuse bounds memory temporarily for computing edge lengths
+            float* length_tmp_ptr = (float*)m.lowers;
+            wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
+            scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
+            wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
+            wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
+            // update solid angle data
+            bvh_refit_with_solid_angle_device(m.bvh, m);
+        }
+        else
+        {
+            bvh_refit_device(m.bvh);
+        }
+    }
 }