warp-lang 1.5.0__py3-none-manylinux2014_x86_64.whl → 1.6.0__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (132) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1124 -497
  8. warp/codegen.py +261 -136
  9. warp/config.py +1 -1
  10. warp/context.py +357 -119
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth.py +3 -1
  27. warp/examples/sim/example_cloth_self_contact.py +260 -0
  28. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  29. warp/examples/sim/example_jacobian_ik.py +0 -2
  30. warp/examples/sim/example_quadruped.py +5 -2
  31. warp/examples/tile/example_tile_cholesky.py +79 -0
  32. warp/examples/tile/example_tile_convolution.py +2 -2
  33. warp/examples/tile/example_tile_fft.py +2 -2
  34. warp/examples/tile/example_tile_filtering.py +3 -3
  35. warp/examples/tile/example_tile_matmul.py +4 -4
  36. warp/examples/tile/example_tile_mlp.py +12 -12
  37. warp/examples/tile/example_tile_nbody.py +180 -0
  38. warp/examples/tile/example_tile_walker.py +319 -0
  39. warp/fem/geometry/geometry.py +0 -2
  40. warp/math.py +147 -0
  41. warp/native/array.h +12 -0
  42. warp/native/builtin.h +0 -1
  43. warp/native/bvh.cpp +149 -70
  44. warp/native/bvh.cu +287 -68
  45. warp/native/bvh.h +195 -85
  46. warp/native/clang/clang.cpp +5 -1
  47. warp/native/coloring.cpp +5 -1
  48. warp/native/cuda_util.cpp +91 -53
  49. warp/native/cuda_util.h +5 -0
  50. warp/native/exports.h +40 -40
  51. warp/native/intersect.h +17 -0
  52. warp/native/mat.h +41 -0
  53. warp/native/mathdx.cpp +19 -0
  54. warp/native/mesh.cpp +25 -8
  55. warp/native/mesh.cu +153 -101
  56. warp/native/mesh.h +482 -403
  57. warp/native/quat.h +40 -0
  58. warp/native/solid_angle.h +7 -0
  59. warp/native/sort.cpp +85 -0
  60. warp/native/sort.cu +34 -0
  61. warp/native/sort.h +3 -1
  62. warp/native/spatial.h +11 -0
  63. warp/native/tile.h +1187 -669
  64. warp/native/tile_reduce.h +8 -6
  65. warp/native/vec.h +41 -0
  66. warp/native/warp.cpp +8 -1
  67. warp/native/warp.cu +263 -40
  68. warp/native/warp.h +19 -5
  69. warp/optim/linear.py +22 -4
  70. warp/render/render_opengl.py +130 -64
  71. warp/sim/__init__.py +6 -1
  72. warp/sim/collide.py +270 -26
  73. warp/sim/import_urdf.py +8 -8
  74. warp/sim/integrator_euler.py +25 -7
  75. warp/sim/integrator_featherstone.py +154 -35
  76. warp/sim/integrator_vbd.py +842 -40
  77. warp/sim/model.py +134 -72
  78. warp/sparse.py +1 -1
  79. warp/stubs.py +265 -132
  80. warp/tape.py +28 -30
  81. warp/tests/aux_test_module_unload.py +15 -0
  82. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  83. warp/tests/test_array.py +74 -0
  84. warp/tests/test_assert.py +242 -0
  85. warp/tests/test_codegen.py +14 -61
  86. warp/tests/test_collision.py +2 -2
  87. warp/tests/test_coloring.py +12 -2
  88. warp/tests/test_examples.py +12 -1
  89. warp/tests/test_func.py +21 -4
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_lerp.py +13 -87
  94. warp/tests/test_mat.py +138 -167
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +17 -16
  97. warp/tests/test_matmul_lite.py +10 -15
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +47 -2
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_smoothstep.py +17 -83
  109. warp/tests/test_static.py +19 -3
  110. warp/tests/test_tape.py +25 -0
  111. warp/tests/test_tile.py +178 -191
  112. warp/tests/test_tile_load.py +356 -0
  113. warp/tests/test_tile_mathdx.py +61 -8
  114. warp/tests/test_tile_mlp.py +17 -17
  115. warp/tests/test_tile_reduce.py +24 -18
  116. warp/tests/test_tile_shared_memory.py +66 -17
  117. warp/tests/test_tile_view.py +165 -0
  118. warp/tests/test_torch.py +35 -0
  119. warp/tests/test_utils.py +36 -24
  120. warp/tests/test_vec.py +110 -0
  121. warp/tests/unittest_suites.py +29 -4
  122. warp/tests/unittest_utils.py +30 -13
  123. warp/thirdparty/unittest_parallel.py +2 -2
  124. warp/types.py +411 -101
  125. warp/utils.py +10 -7
  126. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/METADATA +92 -69
  127. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/RECORD +130 -119
  128. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  129. warp/examples/benchmarks/benchmark_tile.py +0 -179
  130. warp/native/tile_gemm.h +0 -341
  131. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  132. {warp_lang-1.5.0.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
warp/native/exports.h CHANGED
@@ -1001,46 +1001,6 @@ WP_API void builtin_spatial_top_spatial_vectord(spatial_vectord& svec, vec3d* re
1001
1001
  WP_API void builtin_spatial_bottom_spatial_vectorh(spatial_vectorh& svec, vec3h* ret) { *ret = wp::spatial_bottom(svec); }
1002
1002
  WP_API void builtin_spatial_bottom_spatial_vectorf(spatial_vectorf& svec, vec3f* ret) { *ret = wp::spatial_bottom(svec); }
1003
1003
  WP_API void builtin_spatial_bottom_spatial_vectord(spatial_vectord& svec, vec3d* ret) { *ret = wp::spatial_bottom(svec); }
1004
- WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
1005
- WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
1006
- WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
1007
- WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
1008
- WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
1009
- WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
1010
- WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
1011
- WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
1012
- WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
1013
- WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
1014
- WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
1015
- WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
1016
- WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
1017
- WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
1018
- WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
1019
- WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
1020
- WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
1021
- WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
1022
- WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
1023
- WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
1024
- WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
1025
- WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
1026
- WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
1027
- WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
1028
- WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
1029
- WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
1030
- WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
1031
- WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
1032
- WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
1033
- WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
1034
- WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
1035
- WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
1036
- WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
1037
- WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
1038
- WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
1039
- WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
1040
- WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
1041
- WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
1042
- WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
1043
- WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
1044
1004
  WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
1045
1005
  WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
1046
1006
  WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
@@ -1104,6 +1064,46 @@ WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value,
1104
1064
  WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
1105
1065
  WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
1106
1066
  WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
1067
+ WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
1068
+ WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
1069
+ WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
1070
+ WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
1071
+ WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
1072
+ WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
1073
+ WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
1074
+ WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
1075
+ WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
1076
+ WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
1077
+ WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
1078
+ WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
1079
+ WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
1080
+ WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
1081
+ WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
1082
+ WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
1083
+ WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
1084
+ WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
1085
+ WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
1086
+ WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
1087
+ WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
1088
+ WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
1089
+ WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
1090
+ WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
1091
+ WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
1092
+ WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
1093
+ WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
1094
+ WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
1095
+ WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
1096
+ WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
1097
+ WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
1098
+ WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
1099
+ WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
1100
+ WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
1101
+ WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
1102
+ WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
1103
+ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
1104
+ WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
1105
+ WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
1106
+ WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
1107
1107
  WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1108
1108
  WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1109
1109
  WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
warp/native/intersect.h CHANGED
@@ -156,6 +156,23 @@ CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_di
156
156
  return hit;
157
157
  }
158
158
 
159
+ CUDA_CALLABLE inline bool intersect_aabb_aabb(const vec3& a_lower, const vec3& a_upper, const vec3& b_lower, const vec3& b_upper)
160
+ {
161
+ if (a_lower[0] > b_upper[0] ||
162
+ a_lower[1] > b_upper[1] ||
163
+ a_lower[2] > b_upper[2] ||
164
+ a_upper[0] < b_lower[0] ||
165
+ a_upper[1] < b_lower[1] ||
166
+ a_upper[2] < b_lower[2])
167
+ {
168
+ return false;
169
+ }
170
+ else
171
+ {
172
+ return true;
173
+ }
174
+ }
175
+
159
176
 
160
177
  // Moller and Trumbore's method
161
178
  CUDA_CALLABLE inline bool intersect_ray_tri_moller(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal)
warp/native/mat.h CHANGED
@@ -394,6 +394,36 @@ inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int
394
394
  }
395
395
 
396
396
 
397
+ template<unsigned Rows, unsigned Cols, typename Type>
398
+ inline CUDA_CALLABLE void augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
399
+ {
400
+ m.data[row][col] += value;
401
+ }
402
+
403
+
404
+ template<unsigned Rows, unsigned Cols, typename Type>
405
+ inline CUDA_CALLABLE void adj_augassign_add(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
406
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
407
+ {
408
+ adj_value += adj_m.data[row][col];
409
+ }
410
+
411
+
412
+ template<unsigned Rows, unsigned Cols, typename Type>
413
+ inline CUDA_CALLABLE void augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
414
+ {
415
+ m.data[row][col] -= value;
416
+ }
417
+
418
+
419
+ template<unsigned Rows, unsigned Cols, typename Type>
420
+ inline CUDA_CALLABLE void adj_augassign_sub(mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
421
+ mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type& adj_value)
422
+ {
423
+ adj_value -= adj_m.data[row][col];
424
+ }
425
+
426
+
397
427
  template<unsigned Rows, unsigned Cols, typename Type>
398
428
  inline CUDA_CALLABLE mat_t<Rows,Cols,Type> assign(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
399
429
  {
@@ -1650,4 +1680,15 @@ inline CUDA_CALLABLE void adj_mat44(float m00, float m01, float m02, float m03,
1650
1680
  a33 += adj_ret.data[3][3];
1651
1681
  }
1652
1682
 
1683
+ template<unsigned Rows, unsigned Cols, typename Type>
1684
+ CUDA_CALLABLE inline int len(const mat_t<Rows,Cols,Type>& x)
1685
+ {
1686
+ return Rows;
1687
+ }
1688
+
1689
+ template<unsigned Rows, unsigned Cols, typename Type>
1690
+ CUDA_CALLABLE inline void adj_len(const mat_t<Rows,Cols,Type>& x, mat_t<Rows,Cols,Type>& adj_x, const int& adj_ret)
1691
+ {
1692
+ }
1693
+
1653
1694
  } // namespace wp
warp/native/mathdx.cpp CHANGED
@@ -32,6 +32,7 @@ bool cuda_compile_fft(
32
32
  }
33
33
 
34
34
  WP_API bool cuda_compile_dot(
35
+ const char* fatbin_output_path,
35
36
  const char* ltoir_output_path,
36
37
  const char* symbol_name,
37
38
  int num_include_dirs,
@@ -54,6 +55,24 @@ WP_API bool cuda_compile_dot(
54
55
  return false;
55
56
  }
56
57
 
58
+ WP_API bool cuda_compile_solver(
59
+ const char* ltoir_output_path,
60
+ const char* symbol_name,
61
+ int num_include_dirs,
62
+ const char** include_dirs,
63
+ const char* mathdx_include_dir,
64
+ int arch,
65
+ int M,
66
+ int N,
67
+ int function,
68
+ int precision,
69
+ int fill_mode,
70
+ int num_threads)
71
+ {
72
+ printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
73
+ return false;
74
+ }
75
+
57
76
  } // extern "C"
58
77
 
59
78
  #endif // !WP_ENABLE_CUDA || !WP_ENABLE_MATHDX
warp/native/mesh.cpp CHANGED
@@ -67,11 +67,28 @@ void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
67
67
  if (lower.b)
68
68
  {
69
69
  // Leaf, compute properties
70
- const int leaf_index = lower.i;
71
-
72
- precompute_triangle_solid_angle_props(mesh.points[mesh.indices[leaf_index*3+0]], mesh.points[mesh.indices[leaf_index*3+1]], mesh.points[mesh.indices[leaf_index*3+2]], mesh.solid_angle_props[index]);
73
- (vec3&)lower = mesh.solid_angle_props[index].box.lower;
74
- (vec3&)upper = mesh.solid_angle_props[index].box.upper;
70
+ const int start = lower.i;
71
+ const int end = upper.i;
72
+ // loops through primitives in the leaf
73
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
74
+ {
75
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
76
+ if (primitive_counter == start)
77
+ {
78
+ precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
79
+ mesh.points[mesh.indices[primitive_index * 3 + 2]], mesh.solid_angle_props[index]);
80
+ }
81
+ else
82
+ {
83
+ SolidAngleProps triangle_solid_angle_props;
84
+ precompute_triangle_solid_angle_props(mesh.points[mesh.indices[primitive_index * 3 + 0]], mesh.points[mesh.indices[primitive_index * 3 + 1]],
85
+ mesh.points[mesh.indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
86
+ mesh.solid_angle_props[index] = combine_precomputed_solid_angle_props(&mesh.solid_angle_props[index], &triangle_solid_angle_props);
87
+ }
88
+ }
89
+
90
+ (vec3&)lower = mesh.solid_angle_props[index].box.lower;
91
+ (vec3&)upper = mesh.solid_angle_props[index].box.upper;
75
92
  }
76
93
  else
77
94
  {
@@ -109,7 +126,7 @@ void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
109
126
  bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
110
127
  }
111
128
 
112
- uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
129
+ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
113
130
  {
114
131
  Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
115
132
 
@@ -137,7 +154,7 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
137
154
  }
138
155
  m->average_edge_length = sum / (num_tris*3);
139
156
 
140
- wp::bvh_create_host(m->lowers, m->uppers, num_tris, m->bvh);
157
+ wp::bvh_create_host(m->lowers, m->uppers, num_tris, constructor_type, m->bvh);
141
158
 
142
159
  if (support_winding_number)
143
160
  {
@@ -230,7 +247,7 @@ void mesh_set_velocities_host(uint64_t id, wp::array_t<wp::vec3> velocities)
230
247
  #if !WP_ENABLE_CUDA
231
248
 
232
249
 
233
- WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
250
+ WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number, int constructor_type) { return 0; }
234
251
  WP_API void mesh_destroy_device(uint64_t id) {}
235
252
  WP_API void mesh_refit_device(uint64_t id) {}
236
253
  WP_API void mesh_set_points_device(uint64_t id, wp::array_t<wp::vec3> points) {};
warp/native/mesh.cu CHANGED
@@ -63,23 +63,61 @@ __global__ void compute_average_mesh_edge_length(int n, float* sum_edge_lengths,
63
63
  m->average_edge_length = sum_edge_lengths[n - 1] / (3*n);
64
64
  }
65
65
 
66
- __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers, const vec3* points, const int* indices, SolidAngleProps* solid_angle_props)
66
+ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents,
67
+ int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers,
68
+ const vec3* points, const int* indices, const int* primitive_indices, SolidAngleProps* solid_angle_props)
67
69
  {
68
70
  int index = blockDim.x*blockIdx.x + threadIdx.x;
69
71
 
70
72
  if (index < n)
71
73
  {
72
- bool leaf = lowers[index].b;
74
+ bool leaf = node_lowers[index].b;
75
+ int parent = parents[index];
73
76
 
74
77
  if (leaf)
75
78
  {
79
+ BVHPackedNodeHalf& lower = node_lowers[index];
80
+ BVHPackedNodeHalf& upper = node_uppers[index];
81
+
76
82
  // update the leaf node
77
- const int leaf_index = lowers[index].i;
78
- precompute_triangle_solid_angle_props(points[indices[leaf_index*3+0]], points[indices[leaf_index*3+1]], points[indices[leaf_index*3+2]], solid_angle_props[index]);
83
+ bool true_leaf = true;
84
+
85
+ if (parent != -1)
86
+ {
87
+ true_leaf = !node_lowers[parent].b;
88
+ }
79
89
 
80
- make_node(lowers+index, solid_angle_props[index].box.lower, leaf_index, true);
81
- make_node(uppers+index, solid_angle_props[index].box.upper, 0, false);
90
+ if (true_leaf)
91
+ {
92
+ SolidAngleProps node_solid_angle_props;
93
+
94
+ const int start = lower.i;
95
+ const int end = upper.i;
96
+
97
+ // loops through primitives in the leaf
98
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
99
+ {
100
+ int primitive_index = primitive_indices[primitive_counter];
101
+ if (primitive_counter == start)
102
+ {
103
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
104
+ points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
105
+ }
106
+ else
107
+ {
108
+ SolidAngleProps triangle_solid_angle_props;
109
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
110
+ points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
111
+ node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
112
+ }
113
+ }
114
+
115
+ (vec3&)lower = node_solid_angle_props.box.lower;
116
+ (vec3&)upper = node_solid_angle_props.box.upper;
117
+ solid_angle_props[index] = node_solid_angle_props;
118
+ }
82
119
  }
120
+
83
121
  else
84
122
  {
85
123
  // only keep leaf threads
@@ -89,7 +127,7 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
89
127
  // update hierarchy
90
128
  for (;;)
91
129
  {
92
- int parent = parents[index];
130
+ parent = parents[index];
93
131
 
94
132
  // reached root
95
133
  if (parent == -1)
@@ -104,41 +142,74 @@ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__
104
142
  // then update its bounds and move onto the next parent in the hierarchy
105
143
  if (finished == 1)
106
144
  {
107
- //printf("Compute non-leaf at %d\n", index);
108
- const int left_child = lowers[parent].i;
109
- const int right_child = uppers[parent].i;
110
-
111
- vec3 left_lower = vec3(lowers[left_child].x,
112
- lowers[left_child].y,
113
- lowers[left_child].z);
114
-
115
- vec3 left_upper = vec3(uppers[left_child].x,
116
- uppers[left_child].y,
117
- uppers[left_child].z);
118
-
119
- vec3 right_lower = vec3(lowers[right_child].x,
120
- lowers[right_child].y,
121
- lowers[right_child].z);
122
-
123
-
124
- vec3 right_upper = vec3(uppers[right_child].x,
125
- uppers[right_child].y,
126
- uppers[right_child].z);
127
-
128
- // union of child bounds
129
- vec3 lower = min(left_lower, right_lower);
130
- vec3 upper = max(left_upper, right_upper);
131
-
132
- // write new BVH nodes
133
- make_node(lowers+parent, lower, left_child, false);
134
- make_node(uppers+parent, upper, right_child, false);
135
-
136
- // combine
137
- SolidAngleProps* left_child_data = &solid_angle_props[left_child];
138
- SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
139
-
140
- combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
141
-
145
+ BVHPackedNodeHalf& parent_lower = node_lowers[parent];
146
+ BVHPackedNodeHalf& parent_upper = node_uppers[parent];
147
+ if (parent_lower.b)
148
+ // a packed leaf node can still be a parent in LBVH, we need to recompute its bounds
149
+ // since we've lost its left and right child node index in the muting process
150
+ {
151
+ int parent_parent = parents[parent];;
152
+ // only need to compute bound when this is a valid leaf node
153
+ bool true_leaf = true;
154
+
155
+ if (parent_parent != -1)
156
+ {
157
+ true_leaf = !node_lowers[parent_parent].b;
158
+ }
159
+
160
+ if (true_leaf)
161
+ {
162
+ SolidAngleProps node_solid_angle_props;
163
+ const int start = parent_lower.i;
164
+ const int end = parent_upper.i;
165
+ // loops through primitives in the leaf
166
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
167
+ {
168
+ int primitive_index = primitive_indices[primitive_counter];
169
+ if (primitive_counter == start)
170
+ {
171
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
172
+ points[indices[primitive_index * 3 + 2]], node_solid_angle_props);
173
+ }
174
+ else
175
+ {
176
+ SolidAngleProps triangle_solid_angle_props;
177
+ precompute_triangle_solid_angle_props(points[indices[primitive_index * 3 + 0]], points[indices[primitive_index * 3 + 1]],
178
+ points[indices[primitive_index * 3 + 2]], triangle_solid_angle_props);
179
+ node_solid_angle_props = combine_precomputed_solid_angle_props(&node_solid_angle_props, &triangle_solid_angle_props);
180
+ }
181
+ }
182
+
183
+ (vec3&)parent_lower = node_solid_angle_props.box.lower;
184
+ (vec3&)parent_upper = node_solid_angle_props.box.upper;
185
+ solid_angle_props[parent] = node_solid_angle_props;
186
+ }
187
+ }
188
+ else
189
+ {
190
+ //printf("Compute non-leaf at %d\n", index);
191
+ const int left_child = node_lowers[parent].i;
192
+ const int right_child = node_uppers[parent].i;
193
+
194
+ vec3 left_lower = (vec3&)(node_lowers[left_child]);
195
+ vec3 left_upper = (vec3&)(node_uppers[left_child]);
196
+ vec3 right_lower = (vec3&)(node_lowers[right_child]);
197
+ vec3 right_upper = (vec3&)(node_uppers[right_child]);
198
+
199
+ // union of child bounds
200
+ vec3 lower = min(left_lower, right_lower);
201
+ vec3 upper = max(left_upper, right_upper);
202
+
203
+ // write new BVH nodes
204
+ (vec3&)parent_lower = lower;
205
+ (vec3&)parent_upper = upper;
206
+
207
+ // combine
208
+ SolidAngleProps* left_child_data = &solid_angle_props[left_child];
209
+ SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
210
+
211
+ combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
212
+ }
142
213
  // move onto processing the parent
143
214
  index = parent;
144
215
  }
@@ -157,15 +228,15 @@ void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
157
228
  ContextGuard guard(bvh.context);
158
229
 
159
230
  // clear child counters
160
- memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
161
-
162
- wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_items, (bvh.num_items, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
231
+ memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int) * bvh.max_nodes);
232
+ wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_leaf_nodes,
233
+ (bvh.num_leaf_nodes, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, bvh.primitive_indices, mesh.solid_angle_props));
163
234
  }
164
235
 
165
236
  } // namespace wp
166
237
 
167
238
 
168
- uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
239
+ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number, int constructor_type)
169
240
  {
170
241
  ContextGuard guard(context);
171
242
 
@@ -173,55 +244,38 @@ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::arr
173
244
 
174
245
  mesh.context = context ? context : cuda_context_get_current();
175
246
 
247
+ // create lower upper arrays expected by GPU BVH builder
248
+ mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
249
+ mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
250
+
251
+ if (support_winding_number)
176
252
  {
177
- // // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
178
- // vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
179
- // int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
180
- // bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
181
-
182
- // memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
183
- // memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
184
- // cuda_context_synchronize(WP_CURRENT_CONTEXT);
185
-
186
- // float sum = 0.0;
187
- // for (int i=0; i < num_tris; ++i)
188
- // {
189
- // bounds_host[i] = bounds3();
190
- // wp::vec3 p0 = points_host[indices_host[i*3+0]];
191
- // wp::vec3 p1 = points_host[indices_host[i*3+1]];
192
- // wp::vec3 p2 = points_host[indices_host[i*3+2]];
193
- // bounds_host[i].add_point(p0);
194
- // bounds_host[i].add_point(p1);
195
- // bounds_host[i].add_point(p2);
196
- // sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
197
- // }
198
- // mesh.average_edge_length = sum / (num_tris*3);
199
-
200
- // BVH bvh_host = bvh_create(bounds_host, num_tris);
201
- // BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
202
-
203
- // bvh_destroy_host(bvh_host);
204
-
205
- // create lower upper arrays expected by GPU BVH builder
206
- mesh.lowers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
207
- mesh.uppers = (wp::vec3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
208
-
209
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
210
-
211
- wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, mesh.bvh);
212
-
213
- if (support_winding_number)
214
- {
215
- int num_bvh_nodes = 2*num_tris;
216
- mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
217
- }
253
+ int num_bvh_nodes = 2 * num_tris;
254
+ mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps) * num_bvh_nodes);
218
255
  }
219
256
 
220
257
  wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
221
258
  memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
222
-
259
+
223
260
  // save descriptor
224
261
  uint64_t mesh_id = (uint64_t)mesh_device;
262
+
263
+ // we compute mesh the average edge length
264
+ // for use in mesh_query_point_sign_normal()
265
+ // since it relies on an epsilon for welding
266
+ // reuse bounds memory temporarily for computing edge lengths
267
+ float* length_tmp_ptr = (float*)mesh.lowers;
268
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, length_tmp_ptr));
269
+ scan_device(length_tmp_ptr, length_tmp_ptr, mesh.num_tris, true);
270
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (mesh.num_tris, length_tmp_ptr, mesh_device));
271
+
272
+ // compute triangle bound and construct BVH
273
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, mesh.num_tris, (mesh.num_tris, mesh.points, mesh.indices, mesh.lowers, mesh.uppers));
274
+ wp::bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris, constructor_type, mesh.bvh);
275
+
276
+ // we need to overwrite mesh.bvh because it is not initialized when we construct it on device
277
+ memcpy_h2d(WP_CURRENT_CONTEXT, &(mesh_device->bvh), &mesh.bvh, sizeof(wp::BVH));
278
+
225
279
  mesh_add_descriptor(mesh_id, mesh);
226
280
 
227
281
  if (support_winding_number)
@@ -263,23 +317,21 @@ void mesh_refit_device(uint64_t id)
263
317
  {
264
318
  ContextGuard guard(m.context);
265
319
 
320
+ // we compute mesh the average edge length
321
+ // for use in mesh_query_point_sign_normal()
322
+ // since it relies on an epsilon for welding
323
+
324
+ // reuse bounds memory temporarily for computing edge lengths
325
+ float* length_tmp_ptr = (float*)m.lowers;
326
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
327
+
328
+ scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
329
+
330
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
266
331
  wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
267
332
 
268
333
  if (m.solid_angle_props)
269
334
  {
270
- // we compute mesh the average edge length
271
- // for use in mesh_query_point_sign_normal()
272
- // since it relies on an epsilon for welding
273
-
274
- // reuse bounds memory temporarily for computing edge lengths
275
- float* length_tmp_ptr = (float*)m.lowers;
276
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
277
-
278
- scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
279
-
280
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
281
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
282
-
283
335
  // update solid angle data
284
336
  bvh_refit_with_solid_angle_device(m.bvh, m);
285
337
  }