warp-lang 1.5.1__py3-none-manylinux2014_x86_64.whl → 1.6.1__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (131) hide show
  1. warp/__init__.py +5 -0
  2. warp/autograd.py +414 -191
  3. warp/bin/warp-clang.so +0 -0
  4. warp/bin/warp.so +0 -0
  5. warp/build.py +40 -12
  6. warp/build_dll.py +13 -6
  7. warp/builtins.py +1077 -481
  8. warp/codegen.py +250 -122
  9. warp/config.py +65 -21
  10. warp/context.py +500 -149
  11. warp/examples/assets/square_cloth.usd +0 -0
  12. warp/examples/benchmarks/benchmark_gemm.py +27 -18
  13. warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
  14. warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
  15. warp/examples/core/example_marching_cubes.py +1 -1
  16. warp/examples/core/example_mesh.py +1 -1
  17. warp/examples/core/example_torch.py +18 -34
  18. warp/examples/core/example_wave.py +1 -1
  19. warp/examples/fem/example_apic_fluid.py +1 -0
  20. warp/examples/fem/example_mixed_elasticity.py +1 -1
  21. warp/examples/optim/example_bounce.py +1 -1
  22. warp/examples/optim/example_cloth_throw.py +1 -1
  23. warp/examples/optim/example_diffray.py +4 -15
  24. warp/examples/optim/example_drone.py +1 -1
  25. warp/examples/optim/example_softbody_properties.py +392 -0
  26. warp/examples/optim/example_trajectory.py +1 -3
  27. warp/examples/optim/example_walker.py +5 -0
  28. warp/examples/sim/example_cartpole.py +0 -2
  29. warp/examples/sim/example_cloth_self_contact.py +314 -0
  30. warp/examples/sim/example_granular_collision_sdf.py +4 -5
  31. warp/examples/sim/example_jacobian_ik.py +0 -2
  32. warp/examples/sim/example_quadruped.py +5 -2
  33. warp/examples/tile/example_tile_cholesky.py +79 -0
  34. warp/examples/tile/example_tile_convolution.py +2 -2
  35. warp/examples/tile/example_tile_fft.py +2 -2
  36. warp/examples/tile/example_tile_filtering.py +3 -3
  37. warp/examples/tile/example_tile_matmul.py +4 -4
  38. warp/examples/tile/example_tile_mlp.py +12 -12
  39. warp/examples/tile/example_tile_nbody.py +191 -0
  40. warp/examples/tile/example_tile_walker.py +319 -0
  41. warp/math.py +147 -0
  42. warp/native/array.h +12 -0
  43. warp/native/builtin.h +0 -1
  44. warp/native/bvh.cpp +149 -70
  45. warp/native/bvh.cu +287 -68
  46. warp/native/bvh.h +195 -85
  47. warp/native/clang/clang.cpp +6 -2
  48. warp/native/crt.h +1 -0
  49. warp/native/cuda_util.cpp +35 -0
  50. warp/native/cuda_util.h +5 -0
  51. warp/native/exports.h +40 -40
  52. warp/native/intersect.h +17 -0
  53. warp/native/mat.h +57 -3
  54. warp/native/mathdx.cpp +19 -0
  55. warp/native/mesh.cpp +25 -8
  56. warp/native/mesh.cu +153 -101
  57. warp/native/mesh.h +482 -403
  58. warp/native/quat.h +40 -0
  59. warp/native/solid_angle.h +7 -0
  60. warp/native/sort.cpp +85 -0
  61. warp/native/sort.cu +34 -0
  62. warp/native/sort.h +3 -1
  63. warp/native/spatial.h +11 -0
  64. warp/native/tile.h +1189 -664
  65. warp/native/tile_reduce.h +8 -6
  66. warp/native/vec.h +41 -0
  67. warp/native/warp.cpp +8 -1
  68. warp/native/warp.cu +263 -40
  69. warp/native/warp.h +19 -5
  70. warp/optim/linear.py +22 -4
  71. warp/render/render_opengl.py +132 -59
  72. warp/render/render_usd.py +10 -2
  73. warp/sim/__init__.py +6 -1
  74. warp/sim/collide.py +289 -32
  75. warp/sim/import_urdf.py +20 -5
  76. warp/sim/integrator_euler.py +25 -7
  77. warp/sim/integrator_featherstone.py +147 -35
  78. warp/sim/integrator_vbd.py +842 -40
  79. warp/sim/model.py +173 -112
  80. warp/sim/render.py +2 -2
  81. warp/stubs.py +249 -116
  82. warp/tape.py +28 -30
  83. warp/tests/aux_test_module_unload.py +15 -0
  84. warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
  85. warp/tests/test_array.py +100 -0
  86. warp/tests/test_assert.py +242 -0
  87. warp/tests/test_codegen.py +14 -61
  88. warp/tests/test_collision.py +8 -8
  89. warp/tests/test_examples.py +16 -1
  90. warp/tests/test_grad_debug.py +87 -2
  91. warp/tests/test_hash_grid.py +1 -1
  92. warp/tests/test_ipc.py +116 -0
  93. warp/tests/test_launch.py +77 -26
  94. warp/tests/test_mat.py +213 -168
  95. warp/tests/test_math.py +47 -1
  96. warp/tests/test_matmul.py +11 -7
  97. warp/tests/test_matmul_lite.py +4 -4
  98. warp/tests/test_mesh.py +84 -60
  99. warp/tests/test_mesh_query_aabb.py +165 -0
  100. warp/tests/test_mesh_query_point.py +328 -286
  101. warp/tests/test_mesh_query_ray.py +134 -121
  102. warp/tests/test_mlp.py +2 -2
  103. warp/tests/test_operators.py +43 -0
  104. warp/tests/test_overwrite.py +6 -5
  105. warp/tests/test_quat.py +77 -0
  106. warp/tests/test_reload.py +29 -0
  107. warp/tests/test_sim_grad_bounce_linear.py +204 -0
  108. warp/tests/test_static.py +16 -0
  109. warp/tests/test_tape.py +25 -0
  110. warp/tests/test_tile.py +134 -191
  111. warp/tests/test_tile_load.py +399 -0
  112. warp/tests/test_tile_mathdx.py +61 -8
  113. warp/tests/test_tile_mlp.py +17 -17
  114. warp/tests/test_tile_reduce.py +24 -18
  115. warp/tests/test_tile_shared_memory.py +66 -17
  116. warp/tests/test_tile_view.py +165 -0
  117. warp/tests/test_torch.py +35 -0
  118. warp/tests/test_utils.py +36 -24
  119. warp/tests/test_vec.py +110 -0
  120. warp/tests/unittest_suites.py +29 -4
  121. warp/tests/unittest_utils.py +30 -11
  122. warp/thirdparty/unittest_parallel.py +5 -2
  123. warp/types.py +419 -111
  124. warp/utils.py +9 -5
  125. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
  126. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
  127. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
  128. warp/examples/benchmarks/benchmark_tile.py +0 -179
  129. warp/native/tile_gemm.h +0 -341
  130. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
  131. {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/native/bvh.h CHANGED
@@ -11,6 +11,14 @@
11
11
  #include "builtin.h"
12
12
  #include "intersect.h"
13
13
 
14
+ #define BVH_LEAF_SIZE (4)
15
+ #define SAH_NUM_BUCKETS (16)
16
+ #define USE_LOAD4
17
+
18
+ #define BVH_CONSTRUCTOR_SAH (0)
19
+ #define BVH_CONSTRUCTOR_MEDIAN (1)
20
+ #define BVH_CONSTRUCTOR_LBVH (2)
21
+
14
22
  namespace wp
15
23
  {
16
24
 
@@ -72,12 +80,38 @@ struct bounds3
72
80
  }
73
81
  }
74
82
 
83
+ CUDA_CALLABLE inline bool overlaps(const vec3& b_lower, const vec3& b_upper) const
84
+ {
85
+ if (lower[0] > b_upper[0] ||
86
+ lower[1] > b_upper[1] ||
87
+ lower[2] > b_upper[2] ||
88
+ upper[0] < b_lower[0] ||
89
+ upper[1] < b_lower[1] ||
90
+ upper[2] < b_lower[2])
91
+ {
92
+ return false;
93
+ }
94
+ else
95
+ {
96
+ return true;
97
+ }
98
+ }
99
+
75
100
  CUDA_CALLABLE inline void add_point(const vec3& p)
76
101
  {
77
102
  lower = min(lower, p);
78
103
  upper = max(upper, p);
79
104
  }
80
105
 
106
+ CUDA_CALLABLE inline void add_bounds(const vec3& lower_other, const vec3& upper_other)
107
+ {
108
+ // lower_other will only impact the lower of the new bounds
109
+ // upper_other will only impact the upper of the new bounds
110
+ // this costs only half of the computation of adding lower_other and upper_other separately
111
+ lower = min(lower, lower_other);
112
+ upper = max(upper, upper_other);
113
+ }
114
+
81
115
  CUDA_CALLABLE inline float area() const
82
116
  {
83
117
  vec3 e = upper-lower;
@@ -108,6 +142,13 @@ struct BVHPackedNodeHalf
108
142
  float x;
109
143
  float y;
110
144
  float z;
145
+ // For non-leaf nodes:
146
+ // - 'lower.i' represents the index of the left child node.
147
+ // - 'upper.i' represents the index of the right child node.
148
+ //
149
+ // For leaf nodes:
150
+ // - 'lower.i' indicates the start index of the primitives in 'primitive_indices'.
151
+ // - 'upper.i' indicates the index just after the last primitive in 'primitive_indices'
111
152
  unsigned int i : 31;
112
153
  unsigned int b : 1;
113
154
  };
@@ -120,11 +161,15 @@ struct BVH
120
161
  // used for fast refits
121
162
  int* node_parents;
122
163
  int* node_counts;
164
+ // reordered primitive indices corresponds to the ordering of leaf nodes
165
+ int* primitive_indices;
123
166
 
124
167
  int max_depth;
125
168
  int max_nodes;
126
169
  int num_nodes;
127
-
170
+ // since we use packed leaf nodes, the number of them is no longer the number of items, but variable
171
+ int num_leaf_nodes;
172
+
128
173
  // pointer (CPU or GPU) to a single integer index in node_lowers, node_uppers
129
174
  // representing the root of the tree, this is not always the first node
130
175
  // for bottom-up builders
@@ -161,6 +206,24 @@ CUDA_CALLABLE inline void make_node(volatile BVHPackedNodeHalf* n, const vec3& b
161
206
  n->b = (unsigned int)(leaf?1:0);
162
207
  }
163
208
 
209
+ #ifdef __CUDA_ARCH__
210
+ __device__ inline wp::BVHPackedNodeHalf bvh_load_node(const wp::BVHPackedNodeHalf* nodes, int index)
211
+ {
212
+ #ifdef USE_LOAD4
213
+ //return (const wp::BVHPackedNodeHalf&)(__ldg((const float4*)(nodes)+index));
214
+ return (const wp::BVHPackedNodeHalf&)(*((const float4*)(nodes)+index));
215
+ #else
216
+ return nodes[index];
217
+ #endif // USE_LOAD4
218
+
219
+ }
220
+ #else
221
+ inline wp::BVHPackedNodeHalf bvh_load_node(const wp::BVHPackedNodeHalf* nodes, int index)
222
+ {
223
+ return nodes[index];
224
+ }
225
+ #endif // __CUDACC__
226
+
164
227
  CUDA_CALLABLE inline int clz(int x)
165
228
  {
166
229
  int n;
@@ -215,7 +278,8 @@ struct bvh_query_t
215
278
  is_ray(false),
216
279
  input_lower(),
217
280
  input_upper(),
218
- bounds_nr(0)
281
+ bounds_nr(0),
282
+ primitive_counter(-1)
219
283
  {}
220
284
 
221
285
  // Required for adjoint computations.
@@ -230,22 +294,37 @@ struct bvh_query_t
230
294
  int stack[32];
231
295
  int count;
232
296
 
297
+ // >= 0 if currently in a packed leaf node
298
+ int primitive_counter;
299
+
233
300
  // inputs
234
- bool is_ray;
235
301
  wp::vec3 input_lower; // start for ray
236
302
  wp::vec3 input_upper; // dir for ray
237
303
 
238
304
  int bounds_nr;
305
+ bool is_ray;
239
306
  };
240
307
 
308
+ CUDA_CALLABLE inline bool bvh_query_intersection_test(const bvh_query_t& query, const vec3& node_lower, const vec3& node_upper)
309
+ {
310
+ if (query.is_ray)
311
+ {
312
+ float t = 0.0f;
313
+ return intersect_ray_aabb(query.input_lower, query.input_upper, node_lower, node_upper, t);
314
+ }
315
+ else
316
+ {
317
+ return intersect_aabb_aabb(query.input_lower, query.input_upper, node_lower, node_upper);
318
+ }
319
+ }
241
320
 
242
321
  CUDA_CALLABLE inline bvh_query_t bvh_query(
243
- uint64_t id, bool is_ray, const vec3& lower, const vec3& upper)
322
+ uint64_t id, bool is_ray, const vec3& lower, const vec3& upper)
244
323
  {
245
- // This routine traverses the BVH tree until it finds
324
+ // This routine traverses the BVH tree until it finds
246
325
  // the first overlapping bound.
247
326
 
248
- // initialize empty
327
+ // initialize empty
249
328
  bvh_query_t query;
250
329
 
251
330
  query.bounds_nr = -1;
@@ -255,57 +334,41 @@ CUDA_CALLABLE inline bvh_query_t bvh_query(
255
334
  query.bvh = bvh;
256
335
  query.is_ray = is_ray;
257
336
 
258
- // optimization: make the latest
337
+ // optimization: make the latest
259
338
  query.stack[0] = *bvh.root;
260
339
  query.count = 1;
261
- query.input_lower = lower;
262
- query.input_upper = upper;
340
+ query.input_lower = lower;
341
+ query.input_upper = upper;
263
342
 
264
- wp::bounds3 input_bounds(query.input_lower, query.input_upper);
265
-
266
- // Navigate through the bvh, find the first overlapping leaf node.
267
- while (query.count)
268
- {
343
+ // Navigate through the bvh, find the first overlapping leaf node.
344
+ while (query.count)
345
+ {
269
346
  const int node_index = query.stack[--query.count];
347
+ BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
348
+ BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
270
349
 
271
- BVHPackedNodeHalf node_lower = bvh.node_lowers[node_index];
272
- BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
273
-
274
- wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
275
- wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
276
- wp::bounds3 current_bounds(lower_pos, upper_pos);
277
-
278
- if (query.is_ray)
279
- {
280
- float t = 0.0f;
281
- if (!intersect_ray_aabb(query.input_lower, query.input_upper, current_bounds.lower, current_bounds.upper, t))
282
- // Skip this box, it doesn't overlap with our ray.
283
- continue;
284
- }
285
- else
350
+ if (!bvh_query_intersection_test(query, (vec3&)node_lower, (vec3&)node_upper))
286
351
  {
287
- if (!input_bounds.overlaps(current_bounds))
288
- // Skip this box, it doesn't overlap with our target box.
289
- continue;
352
+ continue;
290
353
  }
291
354
 
292
355
  const int left_index = node_lower.i;
293
356
  const int right_index = node_upper.i;
294
-
295
- // Make bounds from this AABB
296
- if (node_lower.b)
297
- {
298
- // found very first leaf index.
357
+ // Make bounds from this AABB
358
+ if (node_lower.b)
359
+ {
360
+ // Reached a leaf node, point to its first primitive
299
361
  // Back up one level and return
362
+ query.primitive_counter = left_index;
300
363
  query.stack[query.count++] = node_index;
301
364
  return query;
302
- }
303
- else
304
- {
305
- query.stack[query.count++] = left_index;
306
- query.stack[query.count++] = right_index;
307
365
  }
308
- }
366
+ else
367
+ {
368
+ query.stack[query.count++] = left_index;
369
+ query.stack[query.count++] = right_index;
370
+ }
371
+ }
309
372
 
310
373
  return query;
311
374
  }
@@ -338,52 +401,100 @@ CUDA_CALLABLE inline void adj_bvh_query_ray(uint64_t id, const vec3& start, cons
338
401
 
339
402
  CUDA_CALLABLE inline bool bvh_query_next(bvh_query_t& query, int& index)
340
403
  {
341
- BVH bvh = query.bvh;
342
-
343
- wp::bounds3 input_bounds(query.input_lower, query.input_upper);
344
-
345
- // Navigate through the bvh, find the first overlapping leaf node.
346
- while (query.count)
347
- {
348
- const int node_index = query.stack[--query.count];
349
- BVHPackedNodeHalf node_lower = bvh.node_lowers[node_index];
350
- BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
404
+ BVH bvh = query.bvh;
351
405
 
352
- wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
353
- wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
354
- wp::bounds3 current_bounds(lower_pos, upper_pos);
406
+ if (query.primitive_counter != -1)
407
+ // currently in a leaf node which is the last node in the stack
408
+ {
409
+ const int node_index = query.stack[query.count - 1];
410
+ BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
411
+ BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
355
412
 
356
- if (query.is_ray)
413
+ const int end = node_upper.i;
414
+ for (int primitive_counter = query.primitive_counter; primitive_counter < end; primitive_counter++)
357
415
  {
358
- float t = 0.0f;
359
- if (!intersect_ray_aabb(query.input_lower, query.input_upper, current_bounds.lower, current_bounds.upper, t))
360
- // Skip this box, it doesn't overlap with our ray.
361
- continue;
416
+ int primitive_index = bvh.primitive_indices[primitive_counter];
417
+ if (bvh_query_intersection_test(query, bvh.item_lowers[primitive_index], bvh.item_uppers[primitive_index]))
418
+ {
419
+ if (primitive_counter < end - 1)
420
+ // still need to come back to this leaf node for the leftover primitives
421
+ {
422
+ query.primitive_counter = primitive_counter + 1;
423
+ }
424
+ else
425
+ // no need to come back to this leaf node
426
+ {
427
+ query.count--;
428
+ query.primitive_counter = -1;
429
+ }
430
+ index = primitive_index;
431
+ query.bounds_nr = primitive_index;
432
+
433
+ return true;
434
+ }
362
435
  }
363
- else {
364
- if (!input_bounds.overlaps(current_bounds))
365
- // Skip this box, it doesn't overlap with our target box.
366
- continue;
436
+ // if we reach here that means we have finished the current leaf node without finding intersections
437
+ query.primitive_counter = -1;
438
+ // remove the leaf node from the back of the stack because it is finished
439
+ // and continue the bvh traversal
440
+ query.count--;
441
+ }
442
+
443
+ // Navigate through the bvh, find the first overlapping leaf node.
444
+ while (query.count)
445
+ {
446
+ const int node_index = query.stack[--query.count];
447
+ BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
448
+ BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
449
+
450
+ const int left_index = node_lower.i;
451
+ const int right_index = node_upper.i;
452
+
453
+ wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
454
+ wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
455
+ wp::bounds3 current_bounds(lower_pos, upper_pos);
456
+
457
+ if (!bvh_query_intersection_test(query, (vec3&)node_lower, (vec3&)node_upper))
458
+ {
459
+ continue;
367
460
  }
368
461
 
369
- const int left_index = node_lower.i;
370
- const int right_index = node_upper.i;
371
-
372
- if (node_lower.b)
373
- {
374
- // found leaf
375
- query.bounds_nr = left_index;
376
- index = left_index;
377
- return true;
378
- }
379
- else
380
- {
381
-
382
- query.stack[query.count++] = left_index;
383
- query.stack[query.count++] = right_index;
384
- }
385
- }
386
- return false;
462
+ if (node_lower.b)
463
+ {
464
+ // found leaf, loop through its content primitives
465
+ const int start = left_index;
466
+ const int end = right_index;
467
+
468
+ for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
469
+ {
470
+ int primitive_index = bvh.primitive_indices[primitive_counter];
471
+ if (bvh_query_intersection_test(query, bvh.item_lowers[primitive_index], bvh.item_uppers[primitive_index]))
472
+ {
473
+ if (primitive_counter < end - 1)
474
+ // still need to come back to this leaf node for the leftover primitives
475
+ {
476
+ query.primitive_counter = primitive_counter + 1;
477
+ query.stack[query.count++] = node_index;
478
+ }
479
+ else
480
+ // no need to come back to this leaf node
481
+ {
482
+ query.primitive_counter = -1;
483
+ }
484
+ index = primitive_index;
485
+ query.bounds_nr = primitive_index;
486
+
487
+ return true;
488
+ }
489
+ }
490
+ }
491
+ else
492
+ {
493
+ query.stack[query.count++] = left_index;
494
+ query.stack[query.count++] = right_index;
495
+ }
496
+ }
497
+ return false;
387
498
  }
388
499
 
389
500
 
@@ -421,7 +532,7 @@ CUDA_CALLABLE void bvh_rem_descriptor(uint64_t id);
421
532
 
422
533
  #if !__CUDA_ARCH__
423
534
 
424
- void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, BVH& bvh);
535
+ void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type, BVH& bvh);
425
536
  void bvh_destroy_host(wp::BVH& bvh);
426
537
  void bvh_refit_host(wp::BVH& bvh);
427
538
 
@@ -431,4 +542,3 @@ void bvh_refit_device(uint64_t id);
431
542
  #endif
432
543
 
433
544
  } // namespace wp
434
-
@@ -218,7 +218,7 @@ static std::unique_ptr<llvm::Module> cuda_to_llvm(const std::string& input_file,
218
218
 
219
219
  extern "C" {
220
220
 
221
- WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug, bool verify_fp)
221
+ WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug, bool verify_fp, bool fuse_fp)
222
222
  {
223
223
  initialize_llvm();
224
224
 
@@ -236,6 +236,10 @@ WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char*
236
236
  const char* CPU = "generic";
237
237
  const char* features = "";
238
238
  llvm::TargetOptions target_options;
239
+ if (fuse_fp)
240
+ target_options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
241
+ else
242
+ target_options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
239
243
  llvm::Reloc::Model relocation_model = llvm::Reloc::PIC_; // Position Independent Code
240
244
  llvm::CodeModel::Model code_model = llvm::CodeModel::Large; // Don't make assumptions about displacement sizes
241
245
  llvm::TargetMachine* target_machine = target->createTargetMachine(target_triple, CPU, features, target_options, relocation_model, code_model);
@@ -409,7 +413,7 @@ WP_API int load_obj(const char* object_file, const char* module_name)
409
413
  SYMBOL(sinhf), SYMBOL_T(sinh, double(*)(double)),
410
414
  SYMBOL(coshf), SYMBOL_T(cosh, double(*)(double)),
411
415
  SYMBOL(tanhf), SYMBOL_T(tanh, double(*)(double)),
412
- SYMBOL(fmaf),
416
+ SYMBOL(fmaf), SYMBOL_T(fma, double(*)(double, double, double)),
413
417
  SYMBOL(memcpy), SYMBOL(memset), SYMBOL(memmove),
414
418
  SYMBOL(_wp_assert),
415
419
  SYMBOL(_wp_isfinite),
warp/native/crt.h CHANGED
@@ -303,6 +303,7 @@ double cosh(double);
303
303
  float tanhf(float);
304
304
  double tanh(double);
305
305
  float fmaf(float, float, float);
306
+ double fma(double, double, double);
306
307
 
307
308
  // stddef.h
308
309
  #if defined(_WIN32)
warp/native/cuda_util.cpp CHANGED
@@ -102,6 +102,11 @@ static PFN_cuGraphicsGLRegisterBuffer_v3000 pfn_cuGraphicsGLRegisterBuffer;
102
102
  static PFN_cuGraphicsUnregisterResource_v3000 pfn_cuGraphicsUnregisterResource;
103
103
  static PFN_cuModuleGetGlobal_v3020 pfn_cuModuleGetGlobal;
104
104
  static PFN_cuFuncSetAttribute_v9000 pfn_cuFuncSetAttribute;
105
+ static PFN_cuIpcGetEventHandle_v4010 pfn_cuIpcGetEventHandle;
106
+ static PFN_cuIpcOpenEventHandle_v4010 pfn_cuIpcOpenEventHandle;
107
+ static PFN_cuIpcGetMemHandle_v4010 pfn_cuIpcGetMemHandle;
108
+ static PFN_cuIpcOpenMemHandle_v11000 pfn_cuIpcOpenMemHandle;
109
+ static PFN_cuIpcCloseMemHandle_v4010 pfn_cuIpcCloseMemHandle;
105
110
 
106
111
  static bool cuda_driver_initialized = false;
107
112
 
@@ -238,6 +243,11 @@ bool init_cuda_driver()
238
243
  get_driver_entry_point("cuGraphicsUnregisterResource", 3000, &(void*&)pfn_cuGraphicsUnregisterResource);
239
244
  get_driver_entry_point("cuModuleGetGlobal", 3020, &(void*&)pfn_cuModuleGetGlobal);
240
245
  get_driver_entry_point("cuFuncSetAttribute", 9000, &(void*&)pfn_cuFuncSetAttribute);
246
+ get_driver_entry_point("cuIpcGetEventHandle", 4010, &(void*&)pfn_cuIpcGetEventHandle);
247
+ get_driver_entry_point("cuIpcOpenEventHandle", 4010, &(void*&)pfn_cuIpcOpenEventHandle);
248
+ get_driver_entry_point("cuIpcGetMemHandle", 4010, &(void*&)pfn_cuIpcGetMemHandle);
249
+ get_driver_entry_point("cuIpcOpenMemHandle", 11000, &(void*&)pfn_cuIpcOpenMemHandle);
250
+ get_driver_entry_point("cuIpcCloseMemHandle", 4010, &(void*&)pfn_cuIpcCloseMemHandle);
241
251
 
242
252
  if (pfn_cuInit)
243
253
  cuda_driver_initialized = check_cu(pfn_cuInit(0));
@@ -585,4 +595,29 @@ CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int
585
595
  return pfn_cuFuncSetAttribute ? pfn_cuFuncSetAttribute(hfunc, attrib, value) : DRIVER_ENTRY_POINT_ERROR;
586
596
  }
587
597
 
598
+ CUresult cuIpcGetEventHandle_f(CUipcEventHandle *pHandle, CUevent event)
599
+ {
600
+ return pfn_cuIpcGetEventHandle ? pfn_cuIpcGetEventHandle(pHandle, event) : DRIVER_ENTRY_POINT_ERROR;
601
+ }
602
+
603
+ CUresult cuIpcOpenEventHandle_f(CUevent *phEvent, CUipcEventHandle handle)
604
+ {
605
+ return pfn_cuIpcOpenEventHandle ? pfn_cuIpcOpenEventHandle(phEvent, handle) : DRIVER_ENTRY_POINT_ERROR;
606
+ }
607
+
608
+ CUresult cuIpcGetMemHandle_f(CUipcMemHandle *pHandle, CUdeviceptr dptr)
609
+ {
610
+ return pfn_cuIpcGetMemHandle ? pfn_cuIpcGetMemHandle(pHandle, dptr) : DRIVER_ENTRY_POINT_ERROR;
611
+ }
612
+
613
+ CUresult cuIpcOpenMemHandle_f(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int flags)
614
+ {
615
+ return pfn_cuIpcOpenMemHandle ? pfn_cuIpcOpenMemHandle(pdptr, handle, flags) : DRIVER_ENTRY_POINT_ERROR;
616
+ }
617
+
618
+ CUresult cuIpcCloseMemHandle_f(CUdeviceptr dptr)
619
+ {
620
+ return pfn_cuIpcCloseMemHandle ? pfn_cuIpcCloseMemHandle(dptr) : DRIVER_ENTRY_POINT_ERROR;
621
+ }
622
+
588
623
  #endif // WP_ENABLE_CUDA
warp/native/cuda_util.h CHANGED
@@ -101,6 +101,11 @@ CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigne
101
101
  CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
102
102
  CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name );
103
103
  CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value);
104
+ CUresult cuIpcGetEventHandle_f(CUipcEventHandle *pHandle, CUevent event);
105
+ CUresult cuIpcOpenEventHandle_f(CUevent *phEvent, CUipcEventHandle handle);
106
+ CUresult cuIpcGetMemHandle_f(CUipcMemHandle *pHandle, CUdeviceptr dptr);
107
+ CUresult cuIpcOpenMemHandle_f(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int flags);
108
+ CUresult cuIpcCloseMemHandle_f(CUdeviceptr dptr);
104
109
 
105
110
  bool init_cuda_driver();
106
111
  bool is_cuda_driver_initialized();
warp/native/exports.h CHANGED
@@ -1001,46 +1001,6 @@ WP_API void builtin_spatial_top_spatial_vectord(spatial_vectord& svec, vec3d* re
1001
1001
  WP_API void builtin_spatial_bottom_spatial_vectorh(spatial_vectorh& svec, vec3h* ret) { *ret = wp::spatial_bottom(svec); }
1002
1002
  WP_API void builtin_spatial_bottom_spatial_vectorf(spatial_vectorf& svec, vec3f* ret) { *ret = wp::spatial_bottom(svec); }
1003
1003
  WP_API void builtin_spatial_bottom_spatial_vectord(spatial_vectord& svec, vec3d* ret) { *ret = wp::spatial_bottom(svec); }
1004
- WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
1005
- WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
1006
- WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
1007
- WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
1008
- WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
1009
- WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
1010
- WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
1011
- WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
1012
- WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
1013
- WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
1014
- WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
1015
- WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
1016
- WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
1017
- WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
1018
- WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
1019
- WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
1020
- WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
1021
- WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
1022
- WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
1023
- WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
1024
- WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
1025
- WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
1026
- WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
1027
- WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
1028
- WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
1029
- WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
1030
- WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
1031
- WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
1032
- WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
1033
- WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
1034
- WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
1035
- WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
1036
- WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
1037
- WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
1038
- WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
1039
- WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
1040
- WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
1041
- WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
1042
- WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
1043
- WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
1044
1004
  WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
1045
1005
  WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
1046
1006
  WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
@@ -1104,6 +1064,46 @@ WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value,
1104
1064
  WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
1105
1065
  WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
1106
1066
  WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
1067
+ WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
1068
+ WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
1069
+ WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
1070
+ WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
1071
+ WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
1072
+ WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
1073
+ WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
1074
+ WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
1075
+ WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
1076
+ WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
1077
+ WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
1078
+ WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
1079
+ WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
1080
+ WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
1081
+ WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
1082
+ WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
1083
+ WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
1084
+ WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
1085
+ WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
1086
+ WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
1087
+ WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
1088
+ WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
1089
+ WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
1090
+ WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
1091
+ WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
1092
+ WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
1093
+ WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
1094
+ WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
1095
+ WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
1096
+ WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
1097
+ WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
1098
+ WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
1099
+ WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
1100
+ WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
1101
+ WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
1102
+ WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
1103
+ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
1104
+ WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
1105
+ WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
1106
+ WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
1107
1107
  WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1108
1108
  WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1109
1109
  WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
warp/native/intersect.h CHANGED
@@ -156,6 +156,23 @@ CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_di
156
156
  return hit;
157
157
  }
158
158
 
159
+ CUDA_CALLABLE inline bool intersect_aabb_aabb(const vec3& a_lower, const vec3& a_upper, const vec3& b_lower, const vec3& b_upper)
160
+ {
161
+ if (a_lower[0] > b_upper[0] ||
162
+ a_lower[1] > b_upper[1] ||
163
+ a_lower[2] > b_upper[2] ||
164
+ a_upper[0] < b_lower[0] ||
165
+ a_upper[1] < b_lower[1] ||
166
+ a_upper[2] < b_lower[2])
167
+ {
168
+ return false;
169
+ }
170
+ else
171
+ {
172
+ return true;
173
+ }
174
+ }
175
+
159
176
 
160
177
  // Moller and Trumbore's method
161
178
  CUDA_CALLABLE inline bool intersect_ray_tri_moller(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal)