warp-lang 1.5.1__py3-none-manylinux2014_aarch64.whl → 1.6.0__py3-none-manylinux2014_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (123) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1076 -480
  8. warp/codegen.py +240 -119
  9. warp/config.py +1 -1
  10. warp/context.py +298 -84
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_torch.py +18 -34
  16. warp/examples/fem/example_apic_fluid.py +1 -0
  17. warp/examples/fem/example_mixed_elasticity.py +1 -1
  18. warp/examples/optim/example_bounce.py +1 -1
  19. warp/examples/optim/example_cloth_throw.py +1 -1
  20. warp/examples/optim/example_diffray.py +4 -15
  21. warp/examples/optim/example_drone.py +1 -1
  22. warp/examples/optim/example_softbody_properties.py +392 -0
  23. warp/examples/optim/example_trajectory.py +1 -3
  24. warp/examples/optim/example_walker.py +5 -0
  25. warp/examples/sim/example_cartpole.py +0 -2
  26. warp/examples/sim/example_cloth_self_contact.py +260 -0
  27. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  28. warp/examples/sim/example_jacobian_ik.py +0 -2
  29. warp/examples/sim/example_quadruped.py +5 -2
  30. warp/examples/tile/example_tile_cholesky.py +79 -0
  31. warp/examples/tile/example_tile_convolution.py +2 -2
  32. warp/examples/tile/example_tile_fft.py +2 -2
  33. warp/examples/tile/example_tile_filtering.py +3 -3
  34. warp/examples/tile/example_tile_matmul.py +4 -4
  35. warp/examples/tile/example_tile_mlp.py +12 -12
  36. warp/examples/tile/example_tile_nbody.py +180 -0
  37. warp/examples/tile/example_tile_walker.py +319 -0
  38. warp/math.py +147 -0
  39. warp/native/array.h +12 -0
  40. warp/native/builtin.h +0 -1
  41. warp/native/bvh.cpp +149 -70
  42. warp/native/bvh.cu +287 -68
  43. warp/native/bvh.h +195 -85
  44. warp/native/clang/clang.cpp +5 -1
  45. warp/native/cuda_util.cpp +35 -0
  46. warp/native/cuda_util.h +5 -0
  47. warp/native/exports.h +40 -40
  48. warp/native/intersect.h +17 -0
  49. warp/native/mat.h +41 -0
  50. warp/native/mathdx.cpp +19 -0
  51. warp/native/mesh.cpp +25 -8
  52. warp/native/mesh.cu +153 -101
  53. warp/native/mesh.h +482 -403
  54. warp/native/quat.h +40 -0
  55. warp/native/solid_angle.h +7 -0
  56. warp/native/sort.cpp +85 -0
  57. warp/native/sort.cu +34 -0
  58. warp/native/sort.h +3 -1
  59. warp/native/spatial.h +11 -0
  60. warp/native/tile.h +1185 -664
  61. warp/native/tile_reduce.h +8 -6
  62. warp/native/vec.h +41 -0
  63. warp/native/warp.cpp +8 -1
  64. warp/native/warp.cu +263 -40
  65. warp/native/warp.h +19 -5
  66. warp/optim/linear.py +22 -4
  67. warp/render/render_opengl.py +124 -59
  68. warp/sim/__init__.py +6 -1
  69. warp/sim/collide.py +270 -26
  70. warp/sim/integrator_euler.py +25 -7
  71. warp/sim/integrator_featherstone.py +154 -35
  72. warp/sim/integrator_vbd.py +842 -40
  73. warp/sim/model.py +111 -53
  74. warp/stubs.py +248 -115
  75. warp/tape.py +28 -30
  76. warp/tests/aux_test_module_unload.py +15 -0
  77. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  78. warp/tests/test_array.py +74 -0
  79. warp/tests/test_assert.py +242 -0
  80. warp/tests/test_codegen.py +14 -61
  81. warp/tests/test_collision.py +2 -2
  82. warp/tests/test_examples.py +9 -0
  83. warp/tests/test_grad_debug.py +87 -2
  84. warp/tests/test_hash_grid.py +1 -1
  85. warp/tests/test_ipc.py +116 -0
  86. warp/tests/test_mat.py +138 -167
  87. warp/tests/test_math.py +47 -1
  88. warp/tests/test_matmul.py +11 -7
  89. warp/tests/test_matmul_lite.py +4 -4
  90. warp/tests/test_mesh.py +84 -60
  91. warp/tests/test_mesh_query_aabb.py +165 -0
  92. warp/tests/test_mesh_query_point.py +328 -286
  93. warp/tests/test_mesh_query_ray.py +134 -121
  94. warp/tests/test_mlp.py +2 -2
  95. warp/tests/test_operators.py +43 -0
  96. warp/tests/test_overwrite.py +2 -2
  97. warp/tests/test_quat.py +77 -0
  98. warp/tests/test_reload.py +29 -0
  99. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  100. warp/tests/test_static.py +16 -0
  101. warp/tests/test_tape.py +25 -0
  102. warp/tests/test_tile.py +134 -191
  103. warp/tests/test_tile_load.py +356 -0
  104. warp/tests/test_tile_mathdx.py +61 -8
  105. warp/tests/test_tile_mlp.py +17 -17
  106. warp/tests/test_tile_reduce.py +24 -18
  107. warp/tests/test_tile_shared_memory.py +66 -17
  108. warp/tests/test_tile_view.py +165 -0
  109. warp/tests/test_torch.py +35 -0
  110. warp/tests/test_utils.py +36 -24
  111. warp/tests/test_vec.py +110 -0
  112. warp/tests/unittest_suites.py +29 -4
  113. warp/tests/unittest_utils.py +30 -11
  114. warp/thirdparty/unittest_parallel.py +2 -2
  115. warp/types.py +409 -99
  116. warp/utils.py +9 -5
  117. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/METADATA +68 -44
  118. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/RECORD +121 -110
  119. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/WHEEL +1 -1
  120. warp/examples/benchmarks/benchmark_tile.py +0 -179
  121. warp/native/tile_gemm.h +0 -341
  122. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/LICENSE.md +0 -0
  123. {warp_lang-1.5.1.dist-info → warp_lang-1.6.0.dist-info}/top_level.txt +0 -0
warp/native/mesh.h CHANGED
@@ -80,58 +80,31 @@ CUDA_CALLABLE inline Mesh& operator += (Mesh& a, const Mesh& b) {
80
80
 
81
81
  CUDA_CALLABLE inline float distance_to_aabb_sq(const vec3& p, const vec3& lower, const vec3& upper)
82
82
  {
83
- vec3 cp = closest_point_to_aabb(p, lower, upper);
84
-
85
- return length_sq(p-cp);
83
+ const float dx = min(upper[0], max(lower[0], p[0])) - p[0];
84
+ const float dy = min(upper[1], max(lower[1], p[1])) - p[1];
85
+ const float dz = min(upper[2], max(lower[2], p[2])) - p[2];
86
+ return dx * dx + dy * dy + dz * dz;
86
87
  }
87
88
 
88
89
  CUDA_CALLABLE inline float furthest_distance_to_aabb_sq(const vec3& p, const vec3& lower, const vec3& upper)
89
90
  {
90
- vec3 c0 = vec3(lower[0], lower[1], lower[2]);
91
- vec3 c1 = vec3(lower[0], lower[1], upper[2]);
92
- vec3 c2 = vec3(lower[0], upper[1], lower[2]);
93
- vec3 c3 = vec3(lower[0], upper[1], upper[2]);
94
- vec3 c4 = vec3(upper[0], lower[1], lower[2]);
95
- vec3 c5 = vec3(upper[0], lower[1], upper[2]);
96
- vec3 c6 = vec3(upper[0], upper[1], lower[2]);
97
- vec3 c7 = vec3(upper[0], upper[1], upper[2]);
98
-
99
- float max_dist_sq = 0.0;
100
- float d;
101
-
102
- d = length_sq(p-c0);
103
- if (d > max_dist_sq)
104
- max_dist_sq = d;
105
-
106
- d = length_sq(p-c1);
107
- if (d > max_dist_sq)
108
- max_dist_sq = d;
109
-
110
- d = length_sq(p-c2);
111
- if (d > max_dist_sq)
112
- max_dist_sq = d;
113
-
114
- d = length_sq(p-c3);
115
- if (d > max_dist_sq)
116
- max_dist_sq = d;
117
-
118
- d = length_sq(p-c4);
119
- if (d > max_dist_sq)
120
- max_dist_sq = d;
121
-
122
- d = length_sq(p-c5);
123
- if (d > max_dist_sq)
124
- max_dist_sq = d;
125
-
126
- d = length_sq(p-c6);
127
- if (d > max_dist_sq)
128
- max_dist_sq = d;
129
-
130
- d = length_sq(p-c7);
131
- if (d > max_dist_sq)
132
- max_dist_sq = d;
133
-
134
- return max_dist_sq;
91
+ // X-axis
92
+ float dist_lower_x = fabs(p[0] - lower[0]);
93
+ float dist_upper_x = fabs(p[0] - upper[0]);
94
+ float corner_diff_x = (dist_lower_x > dist_upper_x) ? dist_lower_x : dist_upper_x;
95
+
96
+ // Y-axis
97
+ float dist_lower_y = fabs(p[1] - lower[1]);
98
+ float dist_upper_y = fabs(p[1] - upper[1]);
99
+ float corner_diff_y = (dist_lower_y > dist_upper_y) ? dist_lower_y : dist_upper_y;
100
+
101
+ // Z-axis
102
+ float dist_lower_z = fabs(p[2] - lower[2]);
103
+ float dist_upper_z = fabs(p[2] - upper[2]);
104
+ float corner_diff_z = (dist_lower_z > dist_upper_z) ? dist_lower_z : dist_upper_z;
105
+
106
+ // Calculate and return the distance
107
+ return corner_diff_x* corner_diff_x + corner_diff_y * corner_diff_y + corner_diff_z * corner_diff_z;
135
108
  }
136
109
 
137
110
  CUDA_CALLABLE inline float mesh_query_inside(uint64_t id, const vec3& p);
@@ -164,8 +137,8 @@ CUDA_CALLABLE inline bool mesh_query_point(uint64_t id, const vec3& point, float
164
137
  {
165
138
  const int nodeIndex = stack[--count];
166
139
 
167
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
168
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
140
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
141
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
169
142
 
170
143
  // re-test distance
171
144
  float node_dist_sq = distance_to_aabb_sq(point, vec3(lower.x, lower.y, lower.z), vec3(upper.x, upper.y, upper.z));
@@ -182,38 +155,44 @@ CUDA_CALLABLE inline bool mesh_query_point(uint64_t id, const vec3& point, float
182
155
 
183
156
  if (lower.b)
184
157
  {
185
- // compute closest point on tri
186
- int i = mesh.indices[left_index*3+0];
187
- int j = mesh.indices[left_index*3+1];
188
- int k = mesh.indices[left_index*3+2];
158
+ const int start = left_index;
159
+ const int end = right_index;
160
+ // loops through primitives in the leaf
161
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
162
+ {
163
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
164
+ int i = mesh.indices[primitive_index * 3 + 0];
165
+ int j = mesh.indices[primitive_index * 3 + 1];
166
+ int k = mesh.indices[primitive_index * 3 + 2];
189
167
 
190
- vec3 p = mesh.points[i];
191
- vec3 q = mesh.points[j];
192
- vec3 r = mesh.points[k];
193
-
194
- vec3 e0 = q-p;
195
- vec3 e1 = r-p;
196
- vec3 e2 = r-q;
197
- vec3 normal = cross(e0, e1);
198
-
199
- // sliver detection
200
- if (length(normal)/(dot(e0,e0) + dot(e1,e1) + dot(e2,e2)) < 1.e-6f)
201
- continue;
168
+ vec3 p = mesh.points[i];
169
+ vec3 q = mesh.points[j];
170
+ vec3 r = mesh.points[k];
202
171
 
203
- vec2 barycentric = closest_point_to_triangle(p, q, r, point);
204
- float u = barycentric[0];
205
- float v = barycentric[1];
206
- float w = 1.f - u - v;
207
- vec3 c = u*p + v*q + w*r;
172
+ vec3 e0 = q - p;
173
+ vec3 e1 = r - p;
174
+ vec3 e2 = r - q;
175
+ vec3 normal = cross(e0, e1);
208
176
 
209
- float dist_sq = length_sq(c-point);
177
+ // sliver detection
178
+ if (length(normal) / (dot(e0, e0) + dot(e1, e1) + dot(e2, e2)) < 1.e-6f)
179
+ continue;
210
180
 
211
- if (dist_sq < min_dist_sq)
212
- {
213
- min_dist_sq = dist_sq;
214
- min_v = v;
215
- min_w = w;
216
- min_face = left_index;
181
+ vec2 barycentric = closest_point_to_triangle(p, q, r, point);
182
+ float u = barycentric[0];
183
+ float v = barycentric[1];
184
+ float w = 1.f - u - v;
185
+ vec3 c = u * p + v * q + w * r;
186
+
187
+ float dist_sq = length_sq(c - point);
188
+
189
+ if (dist_sq < min_dist_sq)
190
+ {
191
+ min_dist_sq = dist_sq;
192
+ min_v = v;
193
+ min_w = w;
194
+ min_face = primitive_index;
195
+ }
217
196
  }
218
197
 
219
198
  #if BVH_DEBUG
@@ -237,36 +216,33 @@ CUDA_CALLABLE inline bool mesh_query_point(uint64_t id, const vec3& point, float
237
216
  }
238
217
  else
239
218
  {
240
- BVHPackedNodeHalf left_lower = mesh.bvh.node_lowers[left_index];
241
- BVHPackedNodeHalf left_upper = mesh.bvh.node_uppers[left_index];
219
+ BVHPackedNodeHalf left_lower = bvh_load_node(mesh.bvh.node_lowers, left_index);
220
+ BVHPackedNodeHalf left_upper = bvh_load_node(mesh.bvh.node_uppers, left_index);
242
221
 
243
- BVHPackedNodeHalf right_lower = mesh.bvh.node_lowers[right_index];
244
- BVHPackedNodeHalf right_upper = mesh.bvh.node_uppers[right_index];
222
+ BVHPackedNodeHalf right_lower = bvh_load_node(mesh.bvh.node_lowers, right_index);
223
+ BVHPackedNodeHalf right_upper = bvh_load_node(mesh.bvh.node_uppers, right_index);
245
224
 
246
225
  float left_dist_sq = distance_to_aabb_sq(point, vec3(left_lower.x, left_lower.y, left_lower.z), vec3(left_upper.x, left_upper.y, left_upper.z));
247
226
  float right_dist_sq = distance_to_aabb_sq(point, vec3(right_lower.x, right_lower.y, right_lower.z), vec3(right_upper.x, right_upper.y, right_upper.z));
248
227
 
249
- float left_score = left_dist_sq;
250
- float right_score = right_dist_sq;
251
-
252
- if (left_score < right_score)
228
+ wp::vec2i child_indices;
229
+ wp::vec2 child_dist;
230
+ if (left_dist_sq < right_dist_sq)
253
231
  {
254
- // put left on top of the stack
255
- if (right_dist_sq < min_dist_sq)
256
- stack[count++] = right_index;
257
-
258
- if (left_dist_sq < min_dist_sq)
259
- stack[count++] = left_index;
232
+ child_indices = wp::vec2i(right_index, left_index);
233
+ child_dist = wp::vec2(right_dist_sq, left_dist_sq);
260
234
  }
261
235
  else
262
236
  {
263
- // put right on top of the stack
264
- if (left_dist_sq < min_dist_sq)
265
- stack[count++] = left_index;
266
-
267
- if (right_dist_sq < min_dist_sq)
268
- stack[count++] = right_index;
237
+ child_indices = wp::vec2i(left_index, right_index);
238
+ child_dist = wp::vec2(left_dist_sq, right_dist_sq);
269
239
  }
240
+
241
+ if (child_dist[0] < min_dist_sq)
242
+ stack[count++] = child_indices[0];
243
+
244
+ if (child_dist[1] < min_dist_sq)
245
+ stack[count++] = child_indices[1];
270
246
  }
271
247
  }
272
248
 
@@ -349,9 +325,9 @@ CUDA_CALLABLE inline bool mesh_query_point_no_sign(uint64_t id, const vec3& poin
349
325
  {
350
326
  const int nodeIndex = stack[--count];
351
327
 
352
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
353
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
354
-
328
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
329
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
330
+
355
331
  // re-test distance
356
332
  float node_dist_sq = distance_to_aabb_sq(point, vec3(lower.x, lower.y, lower.z), vec3(upper.x, upper.y, upper.z));
357
333
  if (node_dist_sq > min_dist_sq)
@@ -366,39 +342,44 @@ CUDA_CALLABLE inline bool mesh_query_point_no_sign(uint64_t id, const vec3& poin
366
342
  const int right_index = upper.i;
367
343
 
368
344
  if (lower.b)
369
- {
370
- // compute closest point on tri
371
- int i = mesh.indices[left_index*3+0];
372
- int j = mesh.indices[left_index*3+1];
373
- int k = mesh.indices[left_index*3+2];
345
+ {
346
+ const int start = left_index;
347
+ const int end = right_index;
348
+ // loops through primitives in the leaf
349
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
350
+ {
351
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
352
+ int i = mesh.indices[primitive_index * 3 + 0];
353
+ int j = mesh.indices[primitive_index * 3 + 1];
354
+ int k = mesh.indices[primitive_index * 3 + 2];
374
355
 
375
- vec3 p = mesh.points[i];
376
- vec3 q = mesh.points[j];
377
- vec3 r = mesh.points[k];
378
-
379
- vec3 e0 = q-p;
380
- vec3 e1 = r-p;
381
- vec3 e2 = r-q;
382
- vec3 normal = cross(e0, e1);
383
-
384
- // sliver detection
385
- if (length(normal)/(dot(e0,e0) + dot(e1,e1) + dot(e2,e2)) < 1.e-6f)
386
- continue;
356
+ vec3 p = mesh.points[i];
357
+ vec3 q = mesh.points[j];
358
+ vec3 r = mesh.points[k];
359
+ vec3 e0 = q - p;
360
+ vec3 e1 = r - p;
361
+ vec3 e2 = r - q;
362
+ vec3 normal = cross(e0, e1);
387
363
 
388
- vec2 barycentric = closest_point_to_triangle(p, q, r, point);
389
- float u = barycentric[0];
390
- float v = barycentric[1];
391
- float w = 1.f - u - v;
392
- vec3 c = u*p + v*q + w*r;
364
+ // sliver detection
365
+ if (length(normal) / (dot(e0, e0) + dot(e1, e1) + dot(e2, e2)) < 1.e-6f)
366
+ continue;
393
367
 
394
- float dist_sq = length_sq(c-point);
368
+ vec2 barycentric = closest_point_to_triangle(p, q, r, point);
369
+ float u = barycentric[0];
370
+ float v = barycentric[1];
371
+ float w = 1.f - u - v;
372
+ vec3 c = u * p + v * q + w * r;
395
373
 
396
- if (dist_sq < min_dist_sq)
397
- {
398
- min_dist_sq = dist_sq;
399
- min_v = v;
400
- min_w = w;
401
- min_face = left_index;
374
+ float dist_sq = length_sq(c - point);
375
+
376
+ if (dist_sq < min_dist_sq)
377
+ {
378
+ min_dist_sq = dist_sq;
379
+ min_v = v;
380
+ min_w = w;
381
+ min_face = primitive_index;
382
+ }
402
383
  }
403
384
 
404
385
  #if BVH_DEBUG
@@ -422,36 +403,33 @@ CUDA_CALLABLE inline bool mesh_query_point_no_sign(uint64_t id, const vec3& poin
422
403
  }
423
404
  else
424
405
  {
425
- BVHPackedNodeHalf left_lower = mesh.bvh.node_lowers[left_index];
426
- BVHPackedNodeHalf left_upper = mesh.bvh.node_uppers[left_index];
406
+ BVHPackedNodeHalf left_lower = bvh_load_node(mesh.bvh.node_lowers, left_index);
407
+ BVHPackedNodeHalf left_upper = bvh_load_node(mesh.bvh.node_uppers, left_index);
427
408
 
428
- BVHPackedNodeHalf right_lower = mesh.bvh.node_lowers[right_index];
429
- BVHPackedNodeHalf right_upper = mesh.bvh.node_uppers[right_index];
409
+ BVHPackedNodeHalf right_lower = bvh_load_node(mesh.bvh.node_lowers, right_index);
410
+ BVHPackedNodeHalf right_upper = bvh_load_node(mesh.bvh.node_uppers, right_index);
430
411
 
431
412
  float left_dist_sq = distance_to_aabb_sq(point, vec3(left_lower.x, left_lower.y, left_lower.z), vec3(left_upper.x, left_upper.y, left_upper.z));
432
413
  float right_dist_sq = distance_to_aabb_sq(point, vec3(right_lower.x, right_lower.y, right_lower.z), vec3(right_upper.x, right_upper.y, right_upper.z));
433
414
 
434
- float left_score = left_dist_sq;
435
- float right_score = right_dist_sq;
436
-
437
- if (left_score < right_score)
415
+ wp::vec2i child_indices;
416
+ wp::vec2 child_dist;
417
+ if (left_dist_sq < right_dist_sq)
438
418
  {
439
- // put left on top of the stack
440
- if (right_dist_sq < min_dist_sq)
441
- stack[count++] = right_index;
442
-
443
- if (left_dist_sq < min_dist_sq)
444
- stack[count++] = left_index;
419
+ child_indices = wp::vec2i(right_index, left_index);
420
+ child_dist = wp::vec2(right_dist_sq, left_dist_sq);
445
421
  }
446
422
  else
447
423
  {
448
- // put right on top of the stack
449
- if (left_dist_sq < min_dist_sq)
450
- stack[count++] = left_index;
451
-
452
- if (right_dist_sq < min_dist_sq)
453
- stack[count++] = right_index;
424
+ child_indices = wp::vec2i(left_index, right_index);
425
+ child_dist = wp::vec2(left_dist_sq, right_dist_sq);
454
426
  }
427
+
428
+ if (child_dist[0] < min_dist_sq)
429
+ stack[count++] = child_indices[0];
430
+
431
+ if (child_dist[1] < min_dist_sq)
432
+ stack[count++] = child_indices[1];
455
433
  }
456
434
  }
457
435
 
@@ -513,10 +491,10 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
513
491
 
514
492
  int count = 1;
515
493
 
516
- float max_dist_sq = min_dist*min_dist;
517
- int min_face;
518
- float min_v;
519
- float min_w;
494
+ float min_dist_sq = min_dist*min_dist;
495
+ int max_face;
496
+ float max_v;
497
+ float max_w;
520
498
 
521
499
  #if BVH_DEBUG
522
500
  int tests = 0;
@@ -531,14 +509,14 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
531
509
  {
532
510
  const int nodeIndex = stack[--count];
533
511
 
534
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
535
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
512
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
513
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
536
514
 
537
515
  // re-test distance
538
516
  float node_dist_sq = furthest_distance_to_aabb_sq(point, vec3(lower.x, lower.y, lower.z), vec3(upper.x, upper.y, upper.z));
539
517
 
540
518
  // if maximum distance to this node is less than our existing furthest max then skip
541
- if (node_dist_sq < max_dist_sq)
519
+ if (node_dist_sq < min_dist_sq)
542
520
  {
543
521
  #if BVH_DEBUG
544
522
  secondary_culls++;
@@ -551,38 +529,44 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
551
529
 
552
530
  if (lower.b)
553
531
  {
554
- // compute closest point on tri
555
- int i = mesh.indices[left_index*3+0];
556
- int j = mesh.indices[left_index*3+1];
557
- int k = mesh.indices[left_index*3+2];
532
+ const int start = left_index;
533
+ const int end = right_index;
534
+ // loops through primitives in the leaf
535
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
536
+ {
537
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
538
+ int i = mesh.indices[primitive_index * 3 + 0];
539
+ int j = mesh.indices[primitive_index * 3 + 1];
540
+ int k = mesh.indices[primitive_index * 3 + 2];
558
541
 
559
- vec3 p = mesh.points[i];
560
- vec3 q = mesh.points[j];
561
- vec3 r = mesh.points[k];
542
+ vec3 p = mesh.points[i];
543
+ vec3 q = mesh.points[j];
544
+ vec3 r = mesh.points[k];
562
545
 
563
- vec3 e0 = q-p;
564
- vec3 e1 = r-p;
565
- vec3 e2 = r-q;
566
- vec3 normal = cross(e0, e1);
546
+ vec3 e0 = q-p;
547
+ vec3 e1 = r-p;
548
+ vec3 e2 = r-q;
549
+ vec3 normal = cross(e0, e1);
567
550
 
568
- // sliver detection
569
- if (length(normal)/(dot(e0,e0) + dot(e1,e1) + dot(e2,e2)) < 1.e-6f)
570
- continue;
551
+ // sliver detection
552
+ if (length(normal)/(dot(e0,e0) + dot(e1,e1) + dot(e2,e2)) < 1.e-6f)
553
+ continue;
571
554
 
572
- vec2 barycentric = furthest_point_to_triangle(p, q, r, point);
573
- float u = barycentric[0];
574
- float v = barycentric[1];
575
- float w = 1.f - u - v;
576
- vec3 c = u*p + v*q + w*r;
555
+ vec2 barycentric = furthest_point_to_triangle(p, q, r, point);
556
+ float u = barycentric[0];
557
+ float v = barycentric[1];
558
+ float w = 1.f - u - v;
559
+ vec3 c = u*p + v*q + w*r;
577
560
 
578
- float dist_sq = length_sq(c-point);
561
+ float dist_sq = length_sq(c-point);
579
562
 
580
- if (dist_sq > max_dist_sq)
581
- {
582
- max_dist_sq = dist_sq;
583
- min_v = v;
584
- min_w = w;
585
- min_face = left_index;
563
+ if (dist_sq > min_dist_sq)
564
+ {
565
+ min_dist_sq = dist_sq;
566
+ max_v = v;
567
+ max_w = w;
568
+ max_face = primitive_index;
569
+ }
586
570
  }
587
571
 
588
572
  #if BVH_DEBUG
@@ -606,36 +590,33 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
606
590
  }
607
591
  else
608
592
  {
609
- BVHPackedNodeHalf left_lower = mesh.bvh.node_lowers[left_index];
610
- BVHPackedNodeHalf left_upper = mesh.bvh.node_uppers[left_index];
593
+ BVHPackedNodeHalf left_lower = bvh_load_node(mesh.bvh.node_lowers, left_index);
594
+ BVHPackedNodeHalf left_upper = bvh_load_node(mesh.bvh.node_uppers, left_index);
611
595
 
612
- BVHPackedNodeHalf right_lower = mesh.bvh.node_lowers[right_index];
613
- BVHPackedNodeHalf right_upper = mesh.bvh.node_uppers[right_index];
596
+ BVHPackedNodeHalf right_lower = bvh_load_node(mesh.bvh.node_lowers, right_index);
597
+ BVHPackedNodeHalf right_upper = bvh_load_node(mesh.bvh.node_uppers, right_index);
614
598
 
615
599
  float left_dist_sq = furthest_distance_to_aabb_sq(point, vec3(left_lower.x, left_lower.y, left_lower.z), vec3(left_upper.x, left_upper.y, left_upper.z));
616
600
  float right_dist_sq = furthest_distance_to_aabb_sq(point, vec3(right_lower.x, right_lower.y, right_lower.z), vec3(right_upper.x, right_upper.y, right_upper.z));
617
601
 
618
- float left_score = left_dist_sq;
619
- float right_score = right_dist_sq;
620
-
621
- if (left_score > right_score)
602
+ wp::vec2i child_indices;
603
+ wp::vec2 child_dist;
604
+ if (left_dist_sq > right_dist_sq)
622
605
  {
623
- // put left on top of the stack
624
- if (right_dist_sq > max_dist_sq)
625
- stack[count++] = right_index;
626
-
627
- if (left_dist_sq > max_dist_sq)
628
- stack[count++] = left_index;
606
+ child_indices = wp::vec2i(right_index, left_index);
607
+ child_dist = wp::vec2(right_dist_sq, left_dist_sq);
629
608
  }
630
609
  else
631
610
  {
632
- // put right on top of the stack
633
- if (left_dist_sq > max_dist_sq)
634
- stack[count++] = left_index;
635
-
636
- if (right_dist_sq > max_dist_sq)
637
- stack[count++] = right_index;
611
+ child_indices = wp::vec2i(left_index, right_index);
612
+ child_dist = wp::vec2(left_dist_sq, right_dist_sq);
638
613
  }
614
+
615
+ if (child_dist[0] > min_dist_sq)
616
+ stack[count++] = child_indices[0];
617
+
618
+ if (child_dist[1] > min_dist_sq)
619
+ stack[count++] = child_indices[1];
639
620
  }
640
621
  }
641
622
 
@@ -655,7 +636,7 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
655
636
  {
656
637
  max_tests = tests;
657
638
  max_point = point;
658
- max_point_dist = sqrtf(max_dist_sq);
639
+ max_point_dist = sqrtf(min_dist_sq);
659
640
 
660
641
  printf("max_tests: %d max_point: %f %f %f max_point_dist: %f max_second_culls: %d\n", max_tests, max_point[0], max_point[1], max_point[2], max_point_dist, max_secondary_culls);
661
642
 
@@ -673,11 +654,11 @@ CUDA_CALLABLE inline bool mesh_query_furthest_point_no_sign(uint64_t id, const v
673
654
  #endif
674
655
 
675
656
  // check if we found a point, and write outputs
676
- if (max_dist_sq > min_dist*min_dist)
657
+ if (min_dist_sq > min_dist*min_dist)
677
658
  {
678
- u = 1.0f - min_v - min_w;
679
- v = min_v;
680
- face = min_face;
659
+ u = 1.0f - max_v - max_w;
660
+ v = max_v;
661
+ face = max_face;
681
662
 
682
663
  return true;
683
664
  }
@@ -700,6 +681,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
700
681
  float min_v;
701
682
  float min_w;
702
683
  vec3 accumulated_angle_weighted_normal;
684
+
703
685
  #if BVH_DEBUG
704
686
  int tests = 0;
705
687
  int secondary_culls = 0;
@@ -709,11 +691,13 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
709
691
  #endif
710
692
  float epsilon_min_dist = mesh.average_edge_length * epsilon;
711
693
  float epsilon_min_dist_sq = epsilon_min_dist*epsilon_min_dist;
694
+
712
695
  while (count)
713
696
  {
714
697
  const int nodeIndex = stack[--count];
715
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
716
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
698
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
699
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
700
+
717
701
  // re-test distance
718
702
  float node_dist_sq = distance_to_aabb_sq(point, vec3(lower.x, lower.y, lower.z), vec3(upper.x, upper.y, upper.z));
719
703
  if (node_dist_sq > (min_dist + epsilon_min_dist)*(min_dist + epsilon_min_dist))
@@ -723,92 +707,109 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
723
707
  #endif
724
708
  continue;
725
709
  }
710
+
726
711
  const int left_index = lower.i;
727
712
  const int right_index = upper.i;
713
+
728
714
  if (lower.b)
729
715
  {
730
- // compute closest point on tri
731
- int i = mesh.indices[left_index*3+0];
732
- int j = mesh.indices[left_index*3+1];
733
- int k = mesh.indices[left_index*3+2];
734
- vec3 p = mesh.points[i];
735
- vec3 q = mesh.points[j];
736
- vec3 r = mesh.points[k];
737
- vec3 e0 = q-p;
738
- vec3 e1 = r-p;
739
- vec3 e2 = r-q;
740
- vec3 normal = cross(e0, e1);
741
- // sliver detection
742
- float e0_norm_sq = dot(e0,e0);
743
- float e1_norm_sq = dot(e1,e1);
744
- float e2_norm_sq = dot(e2,e2);
745
- if (length(normal)/(e0_norm_sq + e1_norm_sq + e2_norm_sq) < 1.e-6f)
746
- continue;
747
- vec2 barycentric = closest_point_to_triangle(p, q, r, point);
748
- float u = barycentric[0];
749
- float v = barycentric[1];
750
- float w = 1.f - u - v;
751
- vec3 c = u*p + v*q + w*r;
752
- float dist = sqrtf(length_sq(c-point));
753
- if (dist < min_dist + epsilon_min_dist)
716
+ const int start = left_index;
717
+ const int end = right_index;
718
+ // loops through primitives in the leaf
719
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
754
720
  {
755
- float weight = 0.0f;
756
- vec3 cp = c-p;
757
- vec3 cq = c-q;
758
- vec3 cr = c-r;
759
- float len_cp_sq = length_sq(cp);
760
- float len_cq_sq = length_sq(cq);
761
- float len_cr_sq = length_sq(cr);
762
-
763
- // Check if near vertex
764
- if (len_cp_sq < epsilon_min_dist_sq)
765
- {
766
- // Vertex 0 is the closest feature
767
- weight = acosf(dot(normalize(e0), normalize(e1)));
768
- } else
769
- if (len_cq_sq < epsilon_min_dist_sq)
770
- {
771
- // Vertex 1 is the closest feature
772
- weight = acosf(dot(normalize(e2), normalize(-e0)));
773
- } else
774
- if (len_cr_sq < epsilon_min_dist_sq)
775
- {
776
- // Vertex 2 is the closest feature
777
- weight = acosf(dot(normalize(-e1), normalize(-e2)));
778
- } else
779
- {
780
- float e0cp = dot(e0, cp);
781
- float e2cq = dot(e2, cq);
782
- float e1cp = dot(e1, cp);
783
-
784
- if ((len_cp_sq*e0_norm_sq-e0cp*e0cp < epsilon_min_dist_sq*e0_norm_sq) ||
785
- (len_cq_sq*e2_norm_sq-e2cq*e2cq < epsilon_min_dist_sq*e2_norm_sq) ||
786
- (len_cp_sq*e1_norm_sq-e1cp*e1cp < epsilon_min_dist_sq*e1_norm_sq)) {
787
- // One of the edge
788
- weight = 3.14159265359f; // PI
789
- } else {
790
- weight = 2.0f*3.14159265359f; // 2*PI
791
- }
792
- }
721
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
722
+ int i = mesh.indices[primitive_index * 3 + 0];
723
+ int j = mesh.indices[primitive_index * 3 + 1];
724
+ int k = mesh.indices[primitive_index * 3 + 2];
725
+
726
+ vec3 p = mesh.points[i];
727
+ vec3 q = mesh.points[j];
728
+ vec3 r = mesh.points[k];
793
729
 
794
- if (dist > min_dist - epsilon_min_dist)
730
+ vec3 e0 = q-p;
731
+ vec3 e1 = r-p;
732
+ vec3 e2 = r-q;
733
+ vec3 normal = cross(e0, e1);
734
+
735
+ // sliver detection
736
+ float e0_norm_sq = dot(e0,e0);
737
+ float e1_norm_sq = dot(e1,e1);
738
+ float e2_norm_sq = dot(e2,e2);
739
+ if (length(normal)/(e0_norm_sq + e1_norm_sq + e2_norm_sq) < 1.e-6f)
740
+ continue;
741
+
742
+ vec2 barycentric = closest_point_to_triangle(p, q, r, point);
743
+ float u = barycentric[0];
744
+ float v = barycentric[1];
745
+ float w = 1.f - u - v;
746
+ vec3 c = u*p + v*q + w*r;
747
+ float dist = sqrtf(length_sq(c-point));
748
+ if (dist < min_dist + epsilon_min_dist)
795
749
  {
796
- // Treat as equal
797
- accumulated_angle_weighted_normal += weight*normalize(normal);
798
- if (dist < min_dist)
750
+ float weight = 0.0f;
751
+ vec3 cp = c - p;
752
+ vec3 cq = c - q;
753
+ vec3 cr = c - r;
754
+ float len_cp_sq = length_sq(cp);
755
+ float len_cq_sq = length_sq(cq);
756
+ float len_cr_sq = length_sq(cr);
757
+
758
+ // Check if near vertex
759
+ if (len_cp_sq < epsilon_min_dist_sq)
760
+ {
761
+ // Vertex 0 is the closest feature
762
+ weight = acosf(dot(normalize(e0), normalize(e1)));
763
+ }
764
+ else
765
+ if (len_cq_sq < epsilon_min_dist_sq)
766
+ {
767
+ // Vertex 1 is the closest feature
768
+ weight = acosf(dot(normalize(e2), normalize(-e0)));
769
+ }
770
+ else
771
+ if (len_cr_sq < epsilon_min_dist_sq)
772
+ {
773
+ // Vertex 2 is the closest feature
774
+ weight = acosf(dot(normalize(-e1), normalize(-e2)));
775
+ }
776
+ else
777
+ {
778
+ float e0cp = dot(e0, cp);
779
+ float e2cq = dot(e2, cq);
780
+ float e1cp = dot(e1, cp);
781
+
782
+ if ((len_cp_sq * e0_norm_sq - e0cp * e0cp < epsilon_min_dist_sq * e0_norm_sq) ||
783
+ (len_cq_sq * e2_norm_sq - e2cq * e2cq < epsilon_min_dist_sq * e2_norm_sq) ||
784
+ (len_cp_sq * e1_norm_sq - e1cp * e1cp < epsilon_min_dist_sq * e1_norm_sq)) {
785
+ // One of the edge
786
+ weight = 3.14159265359f; // PI
787
+ }
788
+ else {
789
+ weight = 2.0f * 3.14159265359f; // 2*PI
790
+ }
791
+ }
792
+
793
+ if (dist > min_dist - epsilon_min_dist)
799
794
  {
795
+ // Treat as equal
796
+ accumulated_angle_weighted_normal += weight * normalize(normal);
797
+ if (dist < min_dist)
798
+ {
799
+ min_dist = dist;
800
+ min_v = v;
801
+ min_w = w;
802
+ min_face = primitive_index;
803
+ }
804
+ }
805
+ else {
806
+ // Less
800
807
  min_dist = dist;
801
808
  min_v = v;
802
809
  min_w = w;
803
- min_face = left_index;
810
+ min_face = primitive_index;
811
+ accumulated_angle_weighted_normal = weight * normalize(normal);
804
812
  }
805
- } else {
806
- // Less
807
- min_dist = dist;
808
- min_v = v;
809
- min_w = w;
810
- min_face = left_index;
811
- accumulated_angle_weighted_normal = weight*normalize(normal);
812
813
  }
813
814
  }
814
815
  #if BVH_DEBUG
@@ -828,33 +829,33 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_normal(uint64_t id, const vec3&
828
829
  }
829
830
  else
830
831
  {
831
- BVHPackedNodeHalf left_lower = mesh.bvh.node_lowers[left_index];
832
- BVHPackedNodeHalf left_upper = mesh.bvh.node_uppers[left_index];
833
- BVHPackedNodeHalf right_lower = mesh.bvh.node_lowers[right_index];
834
- BVHPackedNodeHalf right_upper = mesh.bvh.node_uppers[right_index];
832
+ BVHPackedNodeHalf left_lower = bvh_load_node(mesh.bvh.node_lowers, left_index);
833
+ BVHPackedNodeHalf left_upper = bvh_load_node(mesh.bvh.node_uppers, left_index);
834
+
835
+ BVHPackedNodeHalf right_lower = bvh_load_node(mesh.bvh.node_lowers, right_index);
836
+ BVHPackedNodeHalf right_upper = bvh_load_node(mesh.bvh.node_uppers, right_index);
835
837
 
836
838
  float left_dist_sq = distance_to_aabb_sq(point, vec3(left_lower.x, left_lower.y, left_lower.z), vec3(left_upper.x, left_upper.y, left_upper.z));
837
839
  float right_dist_sq = distance_to_aabb_sq(point, vec3(right_lower.x, right_lower.y, right_lower.z), vec3(right_upper.x, right_upper.y, right_upper.z));
838
840
 
839
- float left_score = left_dist_sq;
840
- float right_score = right_dist_sq;
841
-
842
- if (left_score < right_score)
841
+ wp::vec2i child_indices;
842
+ wp::vec2 child_dist;
843
+ if (left_dist_sq < right_dist_sq)
843
844
  {
844
- // put left on top of the stack
845
- if (right_dist_sq < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
846
- stack[count++] = right_index;
847
- if (left_dist_sq < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
848
- stack[count++] = left_index;
845
+ child_indices = wp::vec2i(right_index, left_index);
846
+ child_dist = wp::vec2(right_dist_sq, left_dist_sq);
849
847
  }
850
848
  else
851
849
  {
852
- // put right on top of the stack
853
- if (left_dist_sq < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
854
- stack[count++] = left_index;
855
- if (right_dist_sq < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
856
- stack[count++] = right_index;
850
+ child_indices = wp::vec2i(left_index, right_index);
851
+ child_dist = wp::vec2(left_dist_sq, right_dist_sq);
857
852
  }
853
+
854
+ if (child_dist[0] < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
855
+ stack[count++] = child_indices[0];
856
+
857
+ if (child_dist[1] < (min_dist + epsilon_min_dist) * (min_dist + epsilon_min_dist))
858
+ stack[count++] = child_indices[1];
858
859
  }
859
860
  }
860
861
  #if BVH_DEBUG
@@ -928,17 +929,29 @@ CUDA_CALLABLE inline float solid_angle_iterative(uint64_t id, const vec3& p, con
928
929
  while (count)
929
930
  {
930
931
  const int nodeIndex = stack[count - 1];
931
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
932
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
933
-
932
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
933
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
934
+
934
935
  const int left_index = lower.i;
935
936
  const int right_index = upper.i;
936
937
  if (lower.b)
937
938
  {
938
939
  // compute closest point on tri
939
- const int leaf_index = left_index;
940
- angle[count - 1] = robust_solid_angle(mesh.points[mesh.indices[leaf_index*3+0]], mesh.points[mesh.indices[leaf_index*3+1]], mesh.points[mesh.indices[leaf_index*3+2]], p);
941
- //printf("Leaf %d, got %f\n", leaf_index, my_data[count - 1]);
940
+ const int start = left_index;
941
+ const int end = right_index;
942
+ angle[count - 1] = 0.f;
943
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
944
+ {
945
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
946
+ int i = mesh.indices[primitive_index * 3 + 0];
947
+ int j = mesh.indices[primitive_index * 3 + 1];
948
+ int k = mesh.indices[primitive_index * 3 + 2];
949
+ angle[count - 1] += robust_solid_angle(
950
+ mesh.points[i],
951
+ mesh.points[j],
952
+ mesh.points[k], p);
953
+ //printf("Leaf %d, got %f\n", leaf_index, my_data[count - 1]);
954
+ }
942
955
  count--;
943
956
  }
944
957
  else
@@ -1018,8 +1031,8 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
1018
1031
  {
1019
1032
  const int nodeIndex = stack[--count];
1020
1033
 
1021
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
1022
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
1034
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
1035
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
1023
1036
 
1024
1037
  // re-test distance
1025
1038
  float node_dist_sq = distance_to_aabb_sq(point, vec3(lower.x, lower.y, lower.z), vec3(upper.x, upper.y, upper.z));
@@ -1036,40 +1049,45 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
1036
1049
 
1037
1050
  if (lower.b)
1038
1051
  {
1039
- // compute closest point on tri
1040
- int i = mesh.indices[left_index*3+0];
1041
- int j = mesh.indices[left_index*3+1];
1042
- int k = mesh.indices[left_index*3+2];
1052
+ const int start = left_index;
1053
+ const int end = right_index;
1054
+ // loops through primitives in the leaf
1055
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
1056
+ {
1057
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
1058
+ int i = mesh.indices[primitive_index * 3 + 0];
1059
+ int j = mesh.indices[primitive_index * 3 + 1];
1060
+ int k = mesh.indices[primitive_index * 3 + 2];
1043
1061
 
1044
- vec3 p = mesh.points[i];
1045
- vec3 q = mesh.points[j];
1046
- vec3 r = mesh.points[k];
1047
-
1048
- vec3 e0 = q-p;
1049
- vec3 e1 = r-p;
1050
- vec3 e2 = r-q;
1051
- vec3 normal = cross(e0, e1);
1052
-
1053
- // sliver detection
1054
- if (length(normal)/(dot(e0,e0) + dot(e1,e1) + dot(e2,e2)) < 1.e-6f)
1055
- continue;
1062
+ vec3 p = mesh.points[i];
1063
+ vec3 q = mesh.points[j];
1064
+ vec3 r = mesh.points[k];
1056
1065
 
1057
- vec2 barycentric = closest_point_to_triangle(p, q, r, point);
1058
- float u = barycentric[0];
1059
- float v = barycentric[1];
1060
- float w = 1.f - u - v;
1061
- vec3 c = u*p + v*q + w*r;
1066
+ vec3 e0 = q - p;
1067
+ vec3 e1 = r - p;
1068
+ vec3 e2 = r - q;
1069
+ vec3 normal = cross(e0, e1);
1062
1070
 
1063
- float dist_sq = length_sq(c-point);
1071
+ // sliver detection
1072
+ if (length(normal) / (dot(e0, e0) + dot(e1, e1) + dot(e2, e2)) < 1.e-6f)
1073
+ continue;
1064
1074
 
1065
- if (dist_sq < min_dist_sq)
1066
- {
1067
- min_dist_sq = dist_sq;
1068
- min_v = v;
1069
- min_w = w;
1070
- min_face = left_index;
1071
- }
1075
+ vec2 barycentric = closest_point_to_triangle(p, q, r, point);
1076
+ float u = barycentric[0];
1077
+ float v = barycentric[1];
1078
+ float w = 1.f - u - v;
1079
+ vec3 c = u * p + v * q + w * r;
1072
1080
 
1081
+ float dist_sq = length_sq(c - point);
1082
+
1083
+ if (dist_sq < min_dist_sq)
1084
+ {
1085
+ min_dist_sq = dist_sq;
1086
+ min_v = v;
1087
+ min_w = w;
1088
+ min_face = primitive_index;
1089
+ }
1090
+ }
1073
1091
  #if BVH_DEBUG
1074
1092
 
1075
1093
  tests++;
@@ -1091,36 +1109,33 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
1091
1109
  }
1092
1110
  else
1093
1111
  {
1094
- BVHPackedNodeHalf left_lower = mesh.bvh.node_lowers[left_index];
1095
- BVHPackedNodeHalf left_upper = mesh.bvh.node_uppers[left_index];
1112
+ BVHPackedNodeHalf left_lower = bvh_load_node(mesh.bvh.node_lowers, left_index);
1113
+ BVHPackedNodeHalf left_upper = bvh_load_node(mesh.bvh.node_uppers, left_index);
1096
1114
 
1097
- BVHPackedNodeHalf right_lower = mesh.bvh.node_lowers[right_index];
1098
- BVHPackedNodeHalf right_upper = mesh.bvh.node_uppers[right_index];
1115
+ BVHPackedNodeHalf right_lower = bvh_load_node(mesh.bvh.node_lowers, right_index);
1116
+ BVHPackedNodeHalf right_upper = bvh_load_node(mesh.bvh.node_uppers, right_index);
1099
1117
 
1100
1118
  float left_dist_sq = distance_to_aabb_sq(point, vec3(left_lower.x, left_lower.y, left_lower.z), vec3(left_upper.x, left_upper.y, left_upper.z));
1101
1119
  float right_dist_sq = distance_to_aabb_sq(point, vec3(right_lower.x, right_lower.y, right_lower.z), vec3(right_upper.x, right_upper.y, right_upper.z));
1102
1120
 
1103
- float left_score = left_dist_sq;
1104
- float right_score = right_dist_sq;
1105
-
1106
- if (left_score < right_score)
1121
+ wp::vec2i child_indices;
1122
+ wp::vec2 child_dist;
1123
+ if (left_dist_sq < right_dist_sq)
1107
1124
  {
1108
- // put left on top of the stack
1109
- if (right_dist_sq < min_dist_sq)
1110
- stack[count++] = right_index;
1111
-
1112
- if (left_dist_sq < min_dist_sq)
1113
- stack[count++] = left_index;
1125
+ child_indices = wp::vec2i(right_index, left_index);
1126
+ child_dist = wp::vec2(right_dist_sq, left_dist_sq);
1114
1127
  }
1115
1128
  else
1116
1129
  {
1117
- // put right on top of the stack
1118
- if (left_dist_sq < min_dist_sq)
1119
- stack[count++] = left_index;
1120
-
1121
- if (right_dist_sq < min_dist_sq)
1122
- stack[count++] = right_index;
1130
+ child_indices = wp::vec2i(left_index, right_index);
1131
+ child_dist = wp::vec2(left_dist_sq, right_dist_sq);
1123
1132
  }
1133
+
1134
+ if (child_dist[0] < min_dist_sq)
1135
+ stack[count++] = child_indices[0];
1136
+
1137
+ if (child_dist[1] < min_dist_sq)
1138
+ stack[count++] = child_indices[1];
1124
1139
  }
1125
1140
  }
1126
1141
 
@@ -1364,8 +1379,8 @@ CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const v
1364
1379
  {
1365
1380
  const int nodeIndex = stack[--count];
1366
1381
 
1367
- BVHPackedNodeHalf lower = mesh.bvh.node_lowers[nodeIndex];
1368
- BVHPackedNodeHalf upper = mesh.bvh.node_uppers[nodeIndex];
1382
+ BVHPackedNodeHalf lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
1383
+ BVHPackedNodeHalf upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
1369
1384
 
1370
1385
  // todo: switch to robust ray-aabb, or expand bounds in build stage
1371
1386
  float eps = 1.e-3f;
@@ -1379,28 +1394,34 @@ CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const v
1379
1394
 
1380
1395
  if (lower.b)
1381
1396
  {
1382
- // compute closest point on tri
1383
- int i = mesh.indices[left_index*3+0];
1384
- int j = mesh.indices[left_index*3+1];
1385
- int k = mesh.indices[left_index*3+2];
1397
+ const int start_index = left_index;
1398
+ const int end_index = right_index;
1399
+ // loops through primitives in the leaf
1400
+ for (int primitive_counter = start_index; primitive_counter < end_index ; primitive_counter++)
1401
+ {
1402
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
1403
+ int i = mesh.indices[primitive_index * 3 + 0];
1404
+ int j = mesh.indices[primitive_index * 3 + 1];
1405
+ int k = mesh.indices[primitive_index * 3 + 2];
1386
1406
 
1387
- vec3 p = mesh.points[i];
1388
- vec3 q = mesh.points[j];
1389
- vec3 r = mesh.points[k];
1407
+ vec3 p = mesh.points[i];
1408
+ vec3 q = mesh.points[j];
1409
+ vec3 r = mesh.points[k];
1390
1410
 
1391
- float t, u, v, sign;
1392
- vec3 n;
1393
-
1394
- if (intersect_ray_tri_woop(start, dir, p, q, r, t, u, v, sign, &n))
1395
- {
1396
- if (t < min_t && t >= 0.0f)
1411
+ float t, u, v, sign;
1412
+ vec3 n;
1413
+
1414
+ if (intersect_ray_tri_woop(start, dir, p, q, r, t, u, v, sign, &n))
1397
1415
  {
1398
- min_t = t;
1399
- min_face = left_index;
1400
- min_u = u;
1401
- min_v = v;
1402
- min_sign = sign;
1403
- min_normal = n;
1416
+ if (t < min_t && t >= 0.0f)
1417
+ {
1418
+ min_t = t;
1419
+ min_face = primitive_index;
1420
+ min_u = u;
1421
+ min_v = v;
1422
+ min_sign = sign;
1423
+ min_normal = n;
1424
+ }
1404
1425
  }
1405
1426
  }
1406
1427
  }
@@ -1482,13 +1503,13 @@ struct mesh_query_ray_t
1482
1503
  return *this;
1483
1504
  }
1484
1505
 
1485
- bool result;
1486
1506
  float sign;
1487
1507
  int face;
1488
1508
  float t;
1489
1509
  float u;
1490
1510
  float v;
1491
1511
  vec3 normal;
1512
+ bool result;
1492
1513
  };
1493
1514
 
1494
1515
  CUDA_CALLABLE inline mesh_query_ray_t mesh_query_ray(uint64_t id, const vec3& start, const vec3& dir, float max_t)
@@ -1528,7 +1549,7 @@ CUDA_CALLABLE inline float mesh_query_inside(uint64_t id, const vec3& p)
1528
1549
  }
1529
1550
  }
1530
1551
 
1531
- if (vote == 3)
1552
+ if (vote >= 2)
1532
1553
  return -1.0f;
1533
1554
  else
1534
1555
  return 1.0f;
@@ -1544,7 +1565,8 @@ struct mesh_query_aabb_t
1544
1565
  count(0),
1545
1566
  input_lower(),
1546
1567
  input_upper(),
1547
- face(0)
1568
+ face(0),
1569
+ primitive_counter(-1)
1548
1570
  {}
1549
1571
 
1550
1572
  // Required for adjoint computations.
@@ -1563,6 +1585,9 @@ struct mesh_query_aabb_t
1563
1585
  wp::vec3 input_lower;
1564
1586
  wp::vec3 input_upper;
1565
1587
 
1588
+ // >= 0 if currently in a packed leaf node
1589
+ int primitive_counter;
1590
+
1566
1591
  // Face
1567
1592
  int face;
1568
1593
  };
@@ -1593,13 +1618,10 @@ CUDA_CALLABLE inline mesh_query_aabb_t mesh_query_aabb(
1593
1618
  while (query.count)
1594
1619
  {
1595
1620
  const int nodeIndex = query.stack[--query.count];
1596
- BVHPackedNodeHalf node_lower = mesh.bvh.node_lowers[nodeIndex];
1597
- BVHPackedNodeHalf node_upper = mesh.bvh.node_uppers[nodeIndex];
1621
+ BVHPackedNodeHalf node_lower = bvh_load_node(mesh.bvh.node_lowers, nodeIndex);
1622
+ BVHPackedNodeHalf node_upper = bvh_load_node(mesh.bvh.node_uppers, nodeIndex);
1598
1623
 
1599
- wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
1600
- wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
1601
- wp::bounds3 current_bounds(lower_pos, upper_pos);
1602
- if (!input_bounds.overlaps(current_bounds))
1624
+ if (!input_bounds.overlaps((vec3&)node_lower, (vec3&)node_upper))
1603
1625
  {
1604
1626
  // Skip this box, it doesn't overlap with our target box.
1605
1627
  continue;
@@ -1611,8 +1633,9 @@ CUDA_CALLABLE inline mesh_query_aabb_t mesh_query_aabb(
1611
1633
  // Make bounds from this AABB
1612
1634
  if (node_lower.b)
1613
1635
  {
1614
- // found very first triangle index.
1636
+ // Reached a leaf node, point to its first primitive
1615
1637
  // Back up one level and return
1638
+ query.primitive_counter = left_index;
1616
1639
  query.stack[query.count++] = nodeIndex;
1617
1640
  return query;
1618
1641
  }
@@ -1636,19 +1659,54 @@ CUDA_CALLABLE inline void adj_mesh_query_aabb(uint64_t id, const vec3& lower, co
1636
1659
  CUDA_CALLABLE inline bool mesh_query_aabb_next(mesh_query_aabb_t& query, int& index)
1637
1660
  {
1638
1661
  Mesh mesh = query.mesh;
1639
-
1662
+
1640
1663
  wp::bounds3 input_bounds(query.input_lower, query.input_upper);
1664
+
1665
+ if (query.primitive_counter != -1)
1666
+ // currently in a leaf node which is the last node in the stack
1667
+ {
1668
+ const int node_index = query.stack[query.count - 1];
1669
+ BVHPackedNodeHalf node_lower = bvh_load_node(mesh.bvh.node_lowers, node_index);
1670
+ BVHPackedNodeHalf node_upper = bvh_load_node(mesh.bvh.node_uppers, node_index);
1671
+
1672
+ const int end = node_upper.i;
1673
+ for (int primitive_counter = query.primitive_counter; primitive_counter < end; primitive_counter++)
1674
+ {
1675
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
1676
+ if (input_bounds.overlaps(mesh.lowers[primitive_index], mesh.uppers[primitive_index]))
1677
+ {
1678
+ if (primitive_counter < end - 1)
1679
+ // still need to come back to this leaf node for the leftover primitives
1680
+ {
1681
+ query.primitive_counter = primitive_counter + 1;
1682
+ }
1683
+ else
1684
+ // no need to come back to this leaf node
1685
+ {
1686
+ query.count--;
1687
+ query.primitive_counter = -1;
1688
+ }
1689
+ index = primitive_index;
1690
+ query.face = primitive_index;
1691
+
1692
+ return true;
1693
+ }
1694
+ }
1695
+ // if we reach here it means we have finished the current leaf node without finding intersections
1696
+ query.primitive_counter = -1;
1697
+ // remove the leaf node from the back of the stack because it is finished
1698
+ // and continue the bvh traversal
1699
+ query.count--;
1700
+ }
1701
+
1641
1702
  // Navigate through the bvh, find the first overlapping leaf node.
1642
1703
  while (query.count)
1643
1704
  {
1644
- const int nodeIndex = query.stack[--query.count];
1645
- BVHPackedNodeHalf node_lower = mesh.bvh.node_lowers[nodeIndex];
1646
- BVHPackedNodeHalf node_upper = mesh.bvh.node_uppers[nodeIndex];
1705
+ const int node_index = query.stack[--query.count];
1706
+ BVHPackedNodeHalf node_lower = bvh_load_node(mesh.bvh.node_lowers, node_index);
1707
+ BVHPackedNodeHalf node_upper = bvh_load_node(mesh.bvh.node_uppers, node_index);
1647
1708
 
1648
- wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
1649
- wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
1650
- wp::bounds3 current_bounds(lower_pos, upper_pos);
1651
- if (!input_bounds.overlaps(current_bounds))
1709
+ if (!input_bounds.overlaps((vec3&)node_lower, (vec3&)node_upper))
1652
1710
  {
1653
1711
  // Skip this box, it doesn't overlap with our target box.
1654
1712
  continue;
@@ -1660,14 +1718,35 @@ CUDA_CALLABLE inline bool mesh_query_aabb_next(mesh_query_aabb_t& query, int& in
1660
1718
  // Make bounds from this AABB
1661
1719
  if (node_lower.b)
1662
1720
  {
1663
- // found very first triangle index
1664
- query.face = left_index;
1665
- index = left_index;
1666
- return true;
1721
+ // found leaf, loop through its content primitives
1722
+ const int start = left_index;
1723
+ const int end = right_index;
1724
+
1725
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
1726
+ {
1727
+ int primitive_index = mesh.bvh.primitive_indices[primitive_counter];
1728
+ if (input_bounds.overlaps(mesh.lowers[primitive_index], mesh.uppers[primitive_index]))
1729
+ {
1730
+ if (primitive_counter < end - 1)
1731
+ // still need to come back to this leaf node for the leftover primitives
1732
+ {
1733
+ query.primitive_counter = primitive_counter + 1;
1734
+ query.stack[query.count++] = node_index;
1735
+ }
1736
+ else
1737
+ // no need to come back to this leaf node
1738
+ {
1739
+ query.primitive_counter = -1;
1740
+ }
1741
+ index = primitive_index;
1742
+ query.face = primitive_index;
1743
+
1744
+ return true;
1745
+ }
1746
+ }
1667
1747
  }
1668
1748
  else
1669
1749
  {
1670
-
1671
1750
  query.stack[query.count++] = left_index;
1672
1751
  query.stack[query.count++] = right_index;
1673
1752
  }