warp-lang 1.5.0__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1124 -497
- warp/codegen.py +261 -136
- warp/config.py +1 -1
- warp/context.py +357 -119
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_torch.py +18 -34
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth.py +3 -1
- warp/examples/sim/example_cloth_self_contact.py +260 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +180 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/fem/geometry/geometry.py +0 -2
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +5 -1
- warp/native/coloring.cpp +5 -1
- warp/native/cuda_util.cpp +91 -53
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +41 -0
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1187 -669
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +130 -64
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +270 -26
- warp/sim/import_urdf.py +8 -8
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +154 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +134 -72
- warp/sparse.py +1 -1
- warp/stubs.py +265 -132
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +74 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +2 -2
- warp/tests/test_coloring.py +12 -2
- warp/tests/test_examples.py +12 -1
- warp/tests/test_func.py +21 -4
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_lerp.py +13 -87
- warp/tests/test_mat.py +138 -167
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +17 -16
- warp/tests/test_matmul_lite.py +10 -15
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +47 -2
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_smoothstep.py +17 -83
- warp/tests/test_static.py +19 -3
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +178 -191
- warp/tests/test_tile_load.py +356 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -13
- warp/thirdparty/unittest_parallel.py +2 -2
- warp/types.py +411 -101
- warp/utils.py +10 -7
- {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/METADATA +92 -69
- {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/RECORD +130 -119
- {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
warp/native/exports.h
CHANGED
|
@@ -1001,46 +1001,6 @@ WP_API void builtin_spatial_top_spatial_vectord(spatial_vectord& svec, vec3d* re
|
|
|
1001
1001
|
WP_API void builtin_spatial_bottom_spatial_vectorh(spatial_vectorh& svec, vec3h* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1002
1002
|
WP_API void builtin_spatial_bottom_spatial_vectorf(spatial_vectorf& svec, vec3f* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1003
1003
|
WP_API void builtin_spatial_bottom_spatial_vectord(spatial_vectord& svec, vec3d* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1004
|
-
WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
|
|
1005
|
-
WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
|
|
1006
|
-
WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
|
|
1007
|
-
WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
|
|
1008
|
-
WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
|
|
1009
|
-
WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
|
|
1010
|
-
WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
|
|
1011
|
-
WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
|
|
1012
|
-
WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
|
|
1013
|
-
WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
|
|
1014
|
-
WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
|
|
1015
|
-
WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
|
|
1016
|
-
WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
|
|
1017
|
-
WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
|
|
1018
|
-
WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
|
|
1019
|
-
WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
|
|
1020
|
-
WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
|
|
1021
|
-
WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
|
|
1022
|
-
WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
|
|
1023
|
-
WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
|
|
1024
|
-
WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
|
|
1025
|
-
WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
|
|
1026
|
-
WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
|
|
1027
|
-
WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
|
|
1028
|
-
WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
|
|
1029
|
-
WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
|
|
1030
|
-
WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
|
|
1031
|
-
WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
|
|
1032
|
-
WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
|
|
1033
|
-
WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
|
|
1034
|
-
WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
|
|
1035
|
-
WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
|
|
1036
|
-
WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
|
|
1037
|
-
WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
|
|
1038
|
-
WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
|
|
1039
|
-
WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
|
|
1040
|
-
WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
|
|
1041
|
-
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1042
|
-
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1043
|
-
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1044
1004
|
WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
|
|
1045
1005
|
WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
|
|
1046
1006
|
WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
|
|
@@ -1104,6 +1064,46 @@ WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value,
|
|
|
1104
1064
|
WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
|
|
1105
1065
|
WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
|
|
1106
1066
|
WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
|
|
1067
|
+
WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
|
|
1068
|
+
WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
|
|
1069
|
+
WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
|
|
1070
|
+
WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
|
|
1071
|
+
WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
|
|
1072
|
+
WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
|
|
1073
|
+
WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
|
|
1074
|
+
WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
|
|
1075
|
+
WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
|
|
1076
|
+
WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
|
|
1077
|
+
WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
|
|
1078
|
+
WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
|
|
1079
|
+
WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
|
|
1080
|
+
WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
|
|
1081
|
+
WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
|
|
1082
|
+
WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
|
|
1083
|
+
WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
|
|
1084
|
+
WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
|
|
1085
|
+
WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
|
|
1086
|
+
WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
|
|
1087
|
+
WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
|
|
1088
|
+
WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
|
|
1089
|
+
WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
|
|
1090
|
+
WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
|
|
1091
|
+
WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
|
|
1092
|
+
WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
|
|
1093
|
+
WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
|
|
1094
|
+
WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
|
|
1095
|
+
WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
|
|
1096
|
+
WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
|
|
1097
|
+
WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
|
|
1098
|
+
WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
|
|
1099
|
+
WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
|
|
1100
|
+
WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
|
|
1101
|
+
WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
|
|
1102
|
+
WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
|
|
1103
|
+
WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
|
|
1104
|
+
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1105
|
+
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1106
|
+
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1107
1107
|
WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1108
1108
|
WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1109
1109
|
WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
warp/native/intersect.h
CHANGED
|
@@ -156,6 +156,23 @@ CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_di
|
|
|
156
156
|
return hit;
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
+
CUDA_CALLABLE inline bool intersect_aabb_aabb(const vec3& a_lower, const vec3& a_upper, const vec3& b_lower, const vec3& b_upper)
|
|
160
|
+
{
|
|
161
|
+
if (a_lower[0] > b_upper[0] ||
|
|
162
|
+
a_lower[1] > b_upper[1] ||
|
|
163
|
+
a_lower[2] > b_upper[2] ||
|
|
164
|
+
a_upper[0] < b_lower[0] ||
|
|
165
|
+
a_upper[1] < b_lower[1] ||
|
|
166
|
+
a_upper[2] < b_lower[2])
|
|
167
|
+
{
|
|
168
|
+
return false;
|
|
169
|
+
}
|
|
170
|
+
else
|
|
171
|
+
{
|
|
172
|
+
return true;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
159
176
|
|
|
160
177
|
// Moller and Trumbore's method
|
|
161
178
|
CUDA_CALLABLE inline bool intersect_ray_tri_moller(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal)
|
warp/native/mat.h
CHANGED
|
@@ -394,6 +394,36 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
|
|
|
394
394
|
}
|
|
395
395
|
|
|
396
396
|
|
|
397
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
398
|
+
inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
399
|
+
{
|
|
400
|
+
m.data[row][col] += value;
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
405
|
+
inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
406
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
407
|
+
{
|
|
408
|
+
adj_value += adj_m.data[row][col];
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
|
|
412
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
413
|
+
inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
414
|
+
{
|
|
415
|
+
m.data[row][col] -= value;
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
420
|
+
inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
|
|
421
|
+
mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
|
|
422
|
+
{
|
|
423
|
+
adj_value -= adj_m.data[row][col];
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
|
|
397
427
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
398
428
|
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
|
|
399
429
|
{
|
|
@@ -1650,4 +1680,15 @@ inline CUDA_CALLABLE void adj_mat44(float m00, float m01, float m02, float m03,
|
|
|
1650
1680
|
a33 += adj_ret.data[3][3];
|
|
1651
1681
|
}
|
|
1652
1682
|
|
|
1683
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1684
|
+
CUDA_CALLABLE inline int len(const mat_t<Rows,Cols,Type>& x)
|
|
1685
|
+
{
|
|
1686
|
+
return Rows;
|
|
1687
|
+
}
|
|
1688
|
+
|
|
1689
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1690
|
+
CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Cols,Type>& adj_x, const int& adj_ret)
|
|
1691
|
+
{
|
|
1692
|
+
}
|
|
1693
|
+
|
|
1653
1694
|
} // namespace wp
|
warp/native/mathdx.cpp
CHANGED
|
@@ -32,6 +32,7 @@ bool cuda_compile_fft(
|
|
|
32
32
|
}
|
|
33
33
|
|
|
34
34
|
WP_API bool cuda_compile_dot(
|
|
35
|
+
const char* fatbin_output_path,
|
|
35
36
|
const char* ltoir_output_path,
|
|
36
37
|
const char* symbol_name,
|
|
37
38
|
int num_include_dirs,
|
|
@@ -54,6 +55,24 @@ WP_API bool cuda_compile_dot(
|
|
|
54
55
|
return false;
|
|
55
56
|
}
|
|
56
57
|
|
|
58
|
+
WP_API bool cuda_compile_solver(
|
|
59
|
+
const char* ltoir_output_path,
|
|
60
|
+
const char* symbol_name,
|
|
61
|
+
int num_include_dirs,
|
|
62
|
+
const char** include_dirs,
|
|
63
|
+
const char* mathdx_include_dir,
|
|
64
|
+
int arch,
|
|
65
|
+
int M,
|
|
66
|
+
int N,
|
|
67
|
+
int function,
|
|
68
|
+
int precision,
|
|
69
|
+
int fill_mode,
|
|
70
|
+
int num_threads)
|
|
71
|
+
{
|
|
72
|
+
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
|
|
57
76
|
} // extern "C"
|
|
58
77
|
|
|
59
78
|
#endif // !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
|
warp/native/mesh.cpp
CHANGED
|
@@ -67,11 +67,28 @@ void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
|
|
|
67
67
|
if (lower.b)
|
|
68
68
|
{
|
|
69
69
|
// Leaf, compute properties
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
70
|
+
const int start = lower.i;
|
|
71
|
+
const int end = upper.i;
|
|
72
|
+
// loops through primitives in the leaf
|
|
73
|
+
for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
|
|
74
|
+
{
|
|
75
|
+
int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
|
|
76
|
+
if (primitive_counter == start)
|
|
77
|
+
{
|
|
78
|
+
precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
|
|
79
|
+
mesh.points[mesh.indices[primitive_index * 3 + 2]], mesh.solid_angle_props[index]);
|
|
80
|
+
}
|
|
81
|
+
else
|
|
82
|
+
{
|
|
83
|
+
SolidAngleProps triangle_solid_angle_props;
|
|
84
|
+
precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
|
|
85
|
+
mesh.points[mesh.indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
|
|
86
|
+
mesh.solid_angle_props[index] = combine_precomputed_solid_angle_props(&mesh.solid_angle_props[index], &triangle_solid_angle_props);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
(vec3&)lower = mesh.solid_angle_props[index].box.lower;
|
|
91
|
+
(vec3&)upper = mesh.solid_angle_props[index].box.upper;
|
|
75
92
|
}
|
|
76
93
|
else
|
|
77
94
|
{
|
|
@@ -109,7 +126,7 @@ void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
|
|
|
109
126
|
bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
|
|
110
127
|
}
|
|
111
128
|
|
|
112
|
-
uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
|
|
129
|
+
uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
|
|
113
130
|
{
|
|
114
131
|
Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
|
|
115
132
|
|
|
@@ -137,7 +154,7 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
|
|
|
137
154
|
}
|
|
138
155
|
m->average_edge_length = sum / (num_tris*3);
|
|
139
156
|
|
|
140
|
-
wp::bvh_create_host(m->lowers, m->uppers, num_tris, m->bvh);
|
|
157
|
+
wp::bvh_create_host(m->lowers, m->uppers, num_tris, constructor_type, m->bvh);
|
|
141
158
|
|
|
142
159
|
if (support_winding_number)
|
|
143
160
|
{
|
|
@@ -230,7 +247,7 @@ void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
|
|
|
230
247
|
#if !WP_ENABLE_CUDA
|
|
231
248
|
|
|
232
249
|
|
|
233
|
-
WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
|
|
250
|
+
WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number, int constructor_type) { return 0; }
|
|
234
251
|
WP_API void mesh_destroy_device(uint64_t id) {}
|
|
235
252
|
WP_API void mesh_refit_device(uint64_t id) {}
|
|
236
253
|
WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
|
warp/native/mesh.cu
CHANGED
|
@@ -63,23 +63,61 @@ __global__ void compute_average_mesh_edge_length(int n, float* sum_edge_lengths,
|
|
|
63
63
|
m->average_edge_length = sum_edge_lengths[n - 1] / (3*n);
|
|
64
64
|
}
|
|
65
65
|
|
|
66
|
-
__global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents,
|
|
66
|
+
__global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents,
|
|
67
|
+
int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers,
|
|
68
|
+
const vec3* points, const int* indices, const int* primitive_indices, SolidAngleProps* solid_angle_props)
|
|
67
69
|
{
|
|
68
70
|
int index = blockDim.x*blockIdx.x + threadIdx.x;
|
|
69
71
|
|
|
70
72
|
if (index < n)
|
|
71
73
|
{
|
|
72
|
-
bool leaf =
|
|
74
|
+
bool leaf = node_lowers[index].b;
|
|
75
|
+
int parent = parents[index];
|
|
73
76
|
|
|
74
77
|
if (leaf)
|
|
75
78
|
{
|
|
79
|
+
BVHPackedNodeHalf& lower = node_lowers[index];
|
|
80
|
+
BVHPackedNodeHalf& upper = node_uppers[index];
|
|
81
|
+
|
|
76
82
|
// update the leaf node
|
|
77
|
-
|
|
78
|
-
|
|
83
|
+
bool true_leaf = true;
|
|
84
|
+
|
|
85
|
+
if (parent != -1)
|
|
86
|
+
{
|
|
87
|
+
true_leaf = !node_lowers[parent].b;
|
|
88
|
+
}
|
|
79
89
|
|
|
80
|
-
|
|
81
|
-
|
|
90
|
+
if (true_leaf)
|
|
91
|
+
{
|
|
92
|
+
SolidAngleProps node_solid_angle_props;
|
|
93
|
+
|
|
94
|
+
const int start = lower.i;
|
|
95
|
+
const int end = upper.i;
|
|
96
|
+
|
|
97
|
+
// loops through primitives in the leaf
|
|
98
|
+
for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
|
|
99
|
+
{
|
|
100
|
+
int primitive_index = primitive_indices[primitive_counter];
|
|
101
|
+
if (primitive_counter == start)
|
|
102
|
+
{
|
|
103
|
+
precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
|
|
104
|
+
points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
|
|
105
|
+
}
|
|
106
|
+
else
|
|
107
|
+
{
|
|
108
|
+
SolidAngleProps triangle_solid_angle_props;
|
|
109
|
+
precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
|
|
110
|
+
points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
|
|
111
|
+
node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
(vec3&)lower = node_solid_angle_props.box.lower;
|
|
116
|
+
(vec3&)upper = node_solid_angle_props.box.upper;
|
|
117
|
+
solid_angle_props[index] = node_solid_angle_props;
|
|
118
|
+
}
|
|
82
119
|
}
|
|
120
|
+
|
|
83
121
|
else
|
|
84
122
|
{
|
|
85
123
|
// only keep leaf threads
|
|
@@ -89,7 +127,7 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
|
|
|
89
127
|
// update hierarchy
|
|
90
128
|
for (;;)
|
|
91
129
|
{
|
|
92
|
-
|
|
130
|
+
parent = parents[index];
|
|
93
131
|
|
|
94
132
|
// reached root
|
|
95
133
|
if (parent == -1)
|
|
@@ -104,41 +142,74 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
|
|
|
104
142
|
// then update its bounds and move onto the next parent in the hierarchy
|
|
105
143
|
if (finished == 1)
|
|
106
144
|
{
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
145
|
+
BVHPackedNodeHalf& parent_lower = node_lowers[parent];
|
|
146
|
+
BVHPackedNodeHalf& parent_upper = node_uppers[parent];
|
|
147
|
+
if (parent_lower.b)
|
|
148
|
+
// a packed leaf node can still be a parent in LBVH, we need to recompute its bounds
|
|
149
|
+
// since we've lost its left and right child node index in the muting process
|
|
150
|
+
{
|
|
151
|
+
int parent_parent = parents[parent];;
|
|
152
|
+
// only need to compute bound when this is a valid leaf node
|
|
153
|
+
bool true_leaf = true;
|
|
154
|
+
|
|
155
|
+
if (parent_parent != -1)
|
|
156
|
+
{
|
|
157
|
+
true_leaf = !node_lowers[parent_parent].b;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
if (true_leaf)
|
|
161
|
+
{
|
|
162
|
+
SolidAngleProps node_solid_angle_props;
|
|
163
|
+
const int start = parent_lower.i;
|
|
164
|
+
const int end = parent_upper.i;
|
|
165
|
+
// loops through primitives in the leaf
|
|
166
|
+
for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
|
|
167
|
+
{
|
|
168
|
+
int primitive_index = primitive_indices[primitive_counter];
|
|
169
|
+
if (primitive_counter == start)
|
|
170
|
+
{
|
|
171
|
+
precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
|
|
172
|
+
points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
|
|
173
|
+
}
|
|
174
|
+
else
|
|
175
|
+
{
|
|
176
|
+
SolidAngleProps triangle_solid_angle_props;
|
|
177
|
+
precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
|
|
178
|
+
points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
|
|
179
|
+
node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
(vec3&)parent_lower = node_solid_angle_props.box.lower;
|
|
184
|
+
(vec3&)parent_upper = node_solid_angle_props.box.upper;
|
|
185
|
+
solid_angle_props[parent] = node_solid_angle_props;
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
else
|
|
189
|
+
{
|
|
190
|
+
//printf("Compute non-leaf at %d\n", index);
|
|
191
|
+
const int left_child = node_lowers[parent].i;
|
|
192
|
+
const int right_child = node_uppers[parent].i;
|
|
193
|
+
|
|
194
|
+
vec3 left_lower = (vec3&)(node_lowers[left_child]);
|
|
195
|
+
vec3 left_upper = (vec3&)(node_uppers[left_child]);
|
|
196
|
+
vec3 right_lower = (vec3&)(node_lowers[right_child]);
|
|
197
|
+
vec3 right_upper = (vec3&)(node_uppers[right_child]);
|
|
198
|
+
|
|
199
|
+
// union of child bounds
|
|
200
|
+
vec3 lower = min(left_lower, right_lower);
|
|
201
|
+
vec3 upper = max(left_upper, right_upper);
|
|
202
|
+
|
|
203
|
+
// write new BVH nodes
|
|
204
|
+
(vec3&)parent_lower = lower;
|
|
205
|
+
(vec3&)parent_upper = upper;
|
|
206
|
+
|
|
207
|
+
// combine
|
|
208
|
+
SolidAngleProps* left_child_data = &solid_angle_props[left_child];
|
|
209
|
+
SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
|
|
210
|
+
|
|
211
|
+
combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
|
|
212
|
+
}
|
|
142
213
|
// move onto processing the parent
|
|
143
214
|
index = parent;
|
|
144
215
|
}
|
|
@@ -157,15 +228,15 @@ void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
|
|
|
157
228
|
ContextGuard guard(bvh.context);
|
|
158
229
|
|
|
159
230
|
// clear child counters
|
|
160
|
-
memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
|
|
161
|
-
|
|
162
|
-
|
|
231
|
+
memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int) * bvh.max_nodes);
|
|
232
|
+
wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_leaf_nodes,
|
|
233
|
+
(bvh.num_leaf_nodes, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, bvh.primitive_indices, mesh.solid_angle_props));
|
|
163
234
|
}
|
|
164
235
|
|
|
165
236
|
} // namespace wp
|
|
166
237
|
|
|
167
238
|
|
|
168
|
-
uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
|
|
239
|
+
uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
|
|
169
240
|
{
|
|
170
241
|
ContextGuard guard(context);
|
|
171
242
|
|
|
@@ -173,55 +244,38 @@ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::arr
|
|
|
173
244
|
|
|
174
245
|
mesh.context = context ? context : cuda_context_get_current();
|
|
175
246
|
|
|
247
|
+
// create lower upper arrays expected by GPU BVH builder
|
|
248
|
+
mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
|
|
249
|
+
mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
|
|
250
|
+
|
|
251
|
+
if (support_winding_number)
|
|
176
252
|
{
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
// int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
|
|
180
|
-
// bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
|
|
181
|
-
|
|
182
|
-
// memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
|
|
183
|
-
// memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
|
|
184
|
-
// cuda_context_synchronize(WP_CURRENT_CONTEXT);
|
|
185
|
-
|
|
186
|
-
// float sum = 0.0;
|
|
187
|
-
// for (int i=0; i < num_tris; ++i)
|
|
188
|
-
// {
|
|
189
|
-
// bounds_host[i] = bounds3();
|
|
190
|
-
// wp::vec3 p0 = points_host[indices_host[i*3+0]];
|
|
191
|
-
// wp::vec3 p1 = points_host[indices_host[i*3+1]];
|
|
192
|
-
// wp::vec3 p2 = points_host[indices_host[i*3+2]];
|
|
193
|
-
// bounds_host[i].add_point(p0);
|
|
194
|
-
// bounds_host[i].add_point(p1);
|
|
195
|
-
// bounds_host[i].add_point(p2);
|
|
196
|
-
// sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
|
|
197
|
-
// }
|
|
198
|
-
// mesh.average_edge_length = sum / (num_tris*3);
|
|
199
|
-
|
|
200
|
-
// BVH bvh_host = bvh_create(bounds_host, num_tris);
|
|
201
|
-
// BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
|
|
202
|
-
|
|
203
|
-
// bvh_destroy_host(bvh_host);
|
|
204
|
-
|
|
205
|
-
// create lower upper arrays expected by GPU BVH builder
|
|
206
|
-
mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
|
|
207
|
-
mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
|
|
208
|
-
|
|
209
|
-
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
|
|
210
|
-
|
|
211
|
-
wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, mesh.bvh);
|
|
212
|
-
|
|
213
|
-
if (support_winding_number)
|
|
214
|
-
{
|
|
215
|
-
int num_bvh_nodes = 2*num_tris;
|
|
216
|
-
mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
|
|
217
|
-
}
|
|
253
|
+
int num_bvh_nodes = 2 * num_tris;
|
|
254
|
+
mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps) * num_bvh_nodes);
|
|
218
255
|
}
|
|
219
256
|
|
|
220
257
|
wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
|
|
221
258
|
memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
|
|
222
|
-
|
|
259
|
+
|
|
223
260
|
// save descriptor
|
|
224
261
|
uint64_t mesh_id = (uint64_t)mesh_device;
|
|
262
|
+
|
|
263
|
+
// we compute mesh the average edge length
|
|
264
|
+
// for use in mesh_query_point_sign_normal()
|
|
265
|
+
// since it relies on an epsilon for welding
|
|
266
|
+
// reuse bounds memory temporarily for computing edge lengths
|
|
267
|
+
float* length_tmp_ptr = (float*)mesh.lowers;
|
|
268
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, length_tmp_ptr));
|
|
269
|
+
scan_device(length_tmp_ptr, length_tmp_ptr, mesh.num_tris, true);
|
|
270
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (mesh.num_tris, length_tmp_ptr, mesh_device));
|
|
271
|
+
|
|
272
|
+
// compute triangle bound and construct BVH
|
|
273
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, mesh.lowers, mesh.uppers));
|
|
274
|
+
wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, constructor_type, mesh.bvh);
|
|
275
|
+
|
|
276
|
+
// we need to overwrite mesh.bvh because it is not initialized when we construct it on device
|
|
277
|
+
memcpy_h2d(WP_CURRENT_CONTEXT, &(mesh_device->bvh), &mesh.bvh, sizeof(wp::BVH));
|
|
278
|
+
|
|
225
279
|
mesh_add_descriptor(mesh_id, mesh);
|
|
226
280
|
|
|
227
281
|
if (support_winding_number)
|
|
@@ -263,23 +317,21 @@ void mesh_refit_device(uint64_t id)
|
|
|
263
317
|
{
|
|
264
318
|
ContextGuard guard(m.context);
|
|
265
319
|
|
|
320
|
+
// we compute mesh the average edge length
|
|
321
|
+
// for use in mesh_query_point_sign_normal()
|
|
322
|
+
// since it relies on an epsilon for welding
|
|
323
|
+
|
|
324
|
+
// reuse bounds memory temporarily for computing edge lengths
|
|
325
|
+
float* length_tmp_ptr = (float*)m.lowers;
|
|
326
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
|
|
327
|
+
|
|
328
|
+
scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
|
|
329
|
+
|
|
330
|
+
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
|
|
266
331
|
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
|
|
267
332
|
|
|
268
333
|
if (m.solid_angle_props)
|
|
269
334
|
{
|
|
270
|
-
// we compute mesh the average edge length
|
|
271
|
-
// for use in mesh_query_point_sign_normal()
|
|
272
|
-
// since it relies on an epsilon for welding
|
|
273
|
-
|
|
274
|
-
// reuse bounds memory temporarily for computing edge lengths
|
|
275
|
-
float* length_tmp_ptr = (float*)m.lowers;
|
|
276
|
-
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
|
|
277
|
-
|
|
278
|
-
scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
|
|
279
|
-
|
|
280
|
-
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
|
|
281
|
-
wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
|
|
282
|
-
|
|
283
335
|
// update solid angle data
|
|
284
336
|
bvh_refit_with_solid_angle_device(m.bvh, m);
|
|
285
337
|
}
|