warp-lang 1.5.1__py3-none-macosx_10_13_universal2.whl → 1.6.1__py3-none-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +5 -0
- warp/autograd.py +414 -191
- warp/bin/libwarp-clang.dylib +0 -0
- warp/bin/libwarp.dylib +0 -0
- warp/build.py +40 -12
- warp/build_dll.py +13 -6
- warp/builtins.py +1077 -481
- warp/codegen.py +250 -122
- warp/config.py +65 -21
- warp/context.py +500 -149
- warp/examples/assets/square_cloth.usd +0 -0
- warp/examples/benchmarks/benchmark_gemm.py +27 -18
- warp/examples/benchmarks/benchmark_interop_paddle.py +3 -3
- warp/examples/benchmarks/benchmark_interop_torch.py +3 -3
- warp/examples/core/example_marching_cubes.py +1 -1
- warp/examples/core/example_mesh.py +1 -1
- warp/examples/core/example_torch.py +18 -34
- warp/examples/core/example_wave.py +1 -1
- warp/examples/fem/example_apic_fluid.py +1 -0
- warp/examples/fem/example_mixed_elasticity.py +1 -1
- warp/examples/optim/example_bounce.py +1 -1
- warp/examples/optim/example_cloth_throw.py +1 -1
- warp/examples/optim/example_diffray.py +4 -15
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/optim/example_softbody_properties.py +392 -0
- warp/examples/optim/example_trajectory.py +1 -3
- warp/examples/optim/example_walker.py +5 -0
- warp/examples/sim/example_cartpole.py +0 -2
- warp/examples/sim/example_cloth_self_contact.py +314 -0
- warp/examples/sim/example_granular_collision_sdf.py +4 -5
- warp/examples/sim/example_jacobian_ik.py +0 -2
- warp/examples/sim/example_quadruped.py +5 -2
- warp/examples/tile/example_tile_cholesky.py +79 -0
- warp/examples/tile/example_tile_convolution.py +2 -2
- warp/examples/tile/example_tile_fft.py +2 -2
- warp/examples/tile/example_tile_filtering.py +3 -3
- warp/examples/tile/example_tile_matmul.py +4 -4
- warp/examples/tile/example_tile_mlp.py +12 -12
- warp/examples/tile/example_tile_nbody.py +191 -0
- warp/examples/tile/example_tile_walker.py +319 -0
- warp/math.py +147 -0
- warp/native/array.h +12 -0
- warp/native/builtin.h +0 -1
- warp/native/bvh.cpp +149 -70
- warp/native/bvh.cu +287 -68
- warp/native/bvh.h +195 -85
- warp/native/clang/clang.cpp +6 -2
- warp/native/crt.h +1 -0
- warp/native/cuda_util.cpp +35 -0
- warp/native/cuda_util.h +5 -0
- warp/native/exports.h +40 -40
- warp/native/intersect.h +17 -0
- warp/native/mat.h +57 -3
- warp/native/mathdx.cpp +19 -0
- warp/native/mesh.cpp +25 -8
- warp/native/mesh.cu +153 -101
- warp/native/mesh.h +482 -403
- warp/native/quat.h +40 -0
- warp/native/solid_angle.h +7 -0
- warp/native/sort.cpp +85 -0
- warp/native/sort.cu +34 -0
- warp/native/sort.h +3 -1
- warp/native/spatial.h +11 -0
- warp/native/tile.h +1189 -664
- warp/native/tile_reduce.h +8 -6
- warp/native/vec.h +41 -0
- warp/native/warp.cpp +8 -1
- warp/native/warp.cu +263 -40
- warp/native/warp.h +19 -5
- warp/optim/linear.py +22 -4
- warp/render/render_opengl.py +132 -59
- warp/render/render_usd.py +10 -2
- warp/sim/__init__.py +6 -1
- warp/sim/collide.py +289 -32
- warp/sim/import_urdf.py +20 -5
- warp/sim/integrator_euler.py +25 -7
- warp/sim/integrator_featherstone.py +147 -35
- warp/sim/integrator_vbd.py +842 -40
- warp/sim/model.py +173 -112
- warp/sim/render.py +2 -2
- warp/stubs.py +249 -116
- warp/tape.py +28 -30
- warp/tests/aux_test_module_unload.py +15 -0
- warp/tests/{test_sim_grad.py → flaky_test_sim_grad.py} +104 -63
- warp/tests/test_array.py +100 -0
- warp/tests/test_assert.py +242 -0
- warp/tests/test_codegen.py +14 -61
- warp/tests/test_collision.py +8 -8
- warp/tests/test_examples.py +16 -1
- warp/tests/test_grad_debug.py +87 -2
- warp/tests/test_hash_grid.py +1 -1
- warp/tests/test_ipc.py +116 -0
- warp/tests/test_launch.py +77 -26
- warp/tests/test_mat.py +213 -168
- warp/tests/test_math.py +47 -1
- warp/tests/test_matmul.py +11 -7
- warp/tests/test_matmul_lite.py +4 -4
- warp/tests/test_mesh.py +84 -60
- warp/tests/test_mesh_query_aabb.py +165 -0
- warp/tests/test_mesh_query_point.py +328 -286
- warp/tests/test_mesh_query_ray.py +134 -121
- warp/tests/test_mlp.py +2 -2
- warp/tests/test_operators.py +43 -0
- warp/tests/test_overwrite.py +6 -5
- warp/tests/test_quat.py +77 -0
- warp/tests/test_reload.py +29 -0
- warp/tests/test_sim_grad_bounce_linear.py +204 -0
- warp/tests/test_static.py +16 -0
- warp/tests/test_tape.py +25 -0
- warp/tests/test_tile.py +134 -191
- warp/tests/test_tile_load.py +399 -0
- warp/tests/test_tile_mathdx.py +61 -8
- warp/tests/test_tile_mlp.py +17 -17
- warp/tests/test_tile_reduce.py +24 -18
- warp/tests/test_tile_shared_memory.py +66 -17
- warp/tests/test_tile_view.py +165 -0
- warp/tests/test_torch.py +35 -0
- warp/tests/test_utils.py +36 -24
- warp/tests/test_vec.py +110 -0
- warp/tests/unittest_suites.py +29 -4
- warp/tests/unittest_utils.py +30 -11
- warp/thirdparty/unittest_parallel.py +5 -2
- warp/types.py +419 -111
- warp/utils.py +9 -5
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/METADATA +86 -45
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/RECORD +129 -118
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/WHEEL +1 -1
- warp/examples/benchmarks/benchmark_tile.py +0 -179
- warp/native/tile_gemm.h +0 -341
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.5.1.dist-info → warp_lang-1.6.1.dist-info}/top_level.txt +0 -0
warp/native/bvh.h
CHANGED
|
@@ -11,6 +11,14 @@
|
|
|
11
11
|
#include "builtin.h"
|
|
12
12
|
#include "intersect.h"
|
|
13
13
|
|
|
14
|
+
#define BVH_LEAF_SIZE (4)
|
|
15
|
+
#define SAH_NUM_BUCKETS (16)
|
|
16
|
+
#define USE_LOAD4
|
|
17
|
+
|
|
18
|
+
#define BVH_CONSTRUCTOR_SAH (0)
|
|
19
|
+
#define BVH_CONSTRUCTOR_MEDIAN (1)
|
|
20
|
+
#define BVH_CONSTRUCTOR_LBVH (2)
|
|
21
|
+
|
|
14
22
|
namespace wp
|
|
15
23
|
{
|
|
16
24
|
|
|
@@ -72,12 +80,38 @@ struct bounds3
|
|
|
72
80
|
}
|
|
73
81
|
}
|
|
74
82
|
|
|
83
|
+
CUDA_CALLABLE inline bool overlaps(const vec3& b_lower, const vec3& b_upper) const
|
|
84
|
+
{
|
|
85
|
+
if (lower[0] > b_upper[0] ||
|
|
86
|
+
lower[1] > b_upper[1] ||
|
|
87
|
+
lower[2] > b_upper[2] ||
|
|
88
|
+
upper[0] < b_lower[0] ||
|
|
89
|
+
upper[1] < b_lower[1] ||
|
|
90
|
+
upper[2] < b_lower[2])
|
|
91
|
+
{
|
|
92
|
+
return false;
|
|
93
|
+
}
|
|
94
|
+
else
|
|
95
|
+
{
|
|
96
|
+
return true;
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
75
100
|
CUDA_CALLABLE inline void add_point(const vec3& p)
|
|
76
101
|
{
|
|
77
102
|
lower = min(lower, p);
|
|
78
103
|
upper = max(upper, p);
|
|
79
104
|
}
|
|
80
105
|
|
|
106
|
+
CUDA_CALLABLE inline void add_bounds(const vec3& lower_other, const vec3& upper_other)
|
|
107
|
+
{
|
|
108
|
+
// lower_other will only impact the lower of the new bounds
|
|
109
|
+
// upper_other will only impact the upper of the new bounds
|
|
110
|
+
// this costs only half of the computation of adding lower_other and upper_other separately
|
|
111
|
+
lower = min(lower, lower_other);
|
|
112
|
+
upper = max(upper, upper_other);
|
|
113
|
+
}
|
|
114
|
+
|
|
81
115
|
CUDA_CALLABLE inline float area() const
|
|
82
116
|
{
|
|
83
117
|
vec3 e = upper-lower;
|
|
@@ -108,6 +142,13 @@ struct BVHPackedNodeHalf
|
|
|
108
142
|
float x;
|
|
109
143
|
float y;
|
|
110
144
|
float z;
|
|
145
|
+
// For non-leaf nodes:
|
|
146
|
+
// - 'lower.i' represents the index of the left child node.
|
|
147
|
+
// - 'upper.i' represents the index of the right child node.
|
|
148
|
+
//
|
|
149
|
+
// For leaf nodes:
|
|
150
|
+
// - 'lower.i' indicates the start index of the primitives in 'primitive_indices'.
|
|
151
|
+
// - 'upper.i' indicates the index just after the last primitive in 'primitive_indices'
|
|
111
152
|
unsigned int i : 31;
|
|
112
153
|
unsigned int b : 1;
|
|
113
154
|
};
|
|
@@ -120,11 +161,15 @@ struct BVH
|
|
|
120
161
|
// used for fast refits
|
|
121
162
|
int* node_parents;
|
|
122
163
|
int* node_counts;
|
|
164
|
+
// reordered primitive indices corresponds to the ordering of leaf nodes
|
|
165
|
+
int* primitive_indices;
|
|
123
166
|
|
|
124
167
|
int max_depth;
|
|
125
168
|
int max_nodes;
|
|
126
169
|
int num_nodes;
|
|
127
|
-
|
|
170
|
+
// since we use packed leaf nodes, the number of them is no longer the number of items, but variable
|
|
171
|
+
int num_leaf_nodes;
|
|
172
|
+
|
|
128
173
|
// pointer (CPU or GPU) to a single integer index in node_lowers, node_uppers
|
|
129
174
|
// representing the root of the tree, this is not always the first node
|
|
130
175
|
// for bottom-up builders
|
|
@@ -161,6 +206,24 @@ CUDA_CALLABLE inline void make_node(volatile BVHPackedNodeHalf* n, const vec3& b
|
|
|
161
206
|
n->b = (unsigned int)(leaf?1:0);
|
|
162
207
|
}
|
|
163
208
|
|
|
209
|
+
#ifdef __CUDA_ARCH__
|
|
210
|
+
__device__ inline wp::BVHPackedNodeHalf bvh_load_node(const wp::BVHPackedNodeHalf* nodes, int index)
|
|
211
|
+
{
|
|
212
|
+
#ifdef USE_LOAD4
|
|
213
|
+
//return (const wp::BVHPackedNodeHalf&)(__ldg((const float4*)(nodes)+index));
|
|
214
|
+
return (const wp::BVHPackedNodeHalf&)(*((const float4*)(nodes)+index));
|
|
215
|
+
#else
|
|
216
|
+
return nodes[index];
|
|
217
|
+
#endif // USE_LOAD4
|
|
218
|
+
|
|
219
|
+
}
|
|
220
|
+
#else
|
|
221
|
+
inline wp::BVHPackedNodeHalf bvh_load_node(const wp::BVHPackedNodeHalf* nodes, int index)
|
|
222
|
+
{
|
|
223
|
+
return nodes[index];
|
|
224
|
+
}
|
|
225
|
+
#endif // __CUDACC__
|
|
226
|
+
|
|
164
227
|
CUDA_CALLABLE inline int clz(int x)
|
|
165
228
|
{
|
|
166
229
|
int n;
|
|
@@ -215,7 +278,8 @@ struct bvh_query_t
|
|
|
215
278
|
is_ray(false),
|
|
216
279
|
input_lower(),
|
|
217
280
|
input_upper(),
|
|
218
|
-
bounds_nr(0)
|
|
281
|
+
bounds_nr(0),
|
|
282
|
+
primitive_counter(-1)
|
|
219
283
|
{}
|
|
220
284
|
|
|
221
285
|
// Required for adjoint computations.
|
|
@@ -230,22 +294,37 @@ struct bvh_query_t
|
|
|
230
294
|
int stack[32];
|
|
231
295
|
int count;
|
|
232
296
|
|
|
297
|
+
// >= 0 if currently in a packed leaf node
|
|
298
|
+
int primitive_counter;
|
|
299
|
+
|
|
233
300
|
// inputs
|
|
234
|
-
bool is_ray;
|
|
235
301
|
wp::vec3 input_lower; // start for ray
|
|
236
302
|
wp::vec3 input_upper; // dir for ray
|
|
237
303
|
|
|
238
304
|
int bounds_nr;
|
|
305
|
+
bool is_ray;
|
|
239
306
|
};
|
|
240
307
|
|
|
308
|
+
CUDA_CALLABLE inline bool bvh_query_intersection_test(const bvh_query_t& query, const vec3& node_lower, const vec3& node_upper)
|
|
309
|
+
{
|
|
310
|
+
if (query.is_ray)
|
|
311
|
+
{
|
|
312
|
+
float t = 0.0f;
|
|
313
|
+
return intersect_ray_aabb(query.input_lower, query.input_upper, node_lower, node_upper, t);
|
|
314
|
+
}
|
|
315
|
+
else
|
|
316
|
+
{
|
|
317
|
+
return intersect_aabb_aabb(query.input_lower, query.input_upper, node_lower, node_upper);
|
|
318
|
+
}
|
|
319
|
+
}
|
|
241
320
|
|
|
242
321
|
CUDA_CALLABLE inline bvh_query_t bvh_query(
|
|
243
|
-
|
|
322
|
+
uint64_t id, bool is_ray, const vec3& lower, const vec3& upper)
|
|
244
323
|
{
|
|
245
|
-
|
|
324
|
+
// This routine traverses the BVH tree until it finds
|
|
246
325
|
// the first overlapping bound.
|
|
247
326
|
|
|
248
|
-
|
|
327
|
+
// initialize empty
|
|
249
328
|
bvh_query_t query;
|
|
250
329
|
|
|
251
330
|
query.bounds_nr = -1;
|
|
@@ -255,57 +334,41 @@ CUDA_CALLABLE inline bvh_query_t bvh_query(
|
|
|
255
334
|
query.bvh = bvh;
|
|
256
335
|
query.is_ray = is_ray;
|
|
257
336
|
|
|
258
|
-
|
|
337
|
+
// optimization: make the latest
|
|
259
338
|
query.stack[0] = *bvh.root;
|
|
260
339
|
query.count = 1;
|
|
261
|
-
|
|
262
|
-
|
|
340
|
+
query.input_lower = lower;
|
|
341
|
+
query.input_upper = upper;
|
|
263
342
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
while (query.count)
|
|
268
|
-
{
|
|
343
|
+
// Navigate through the bvh, find the first overlapping leaf node.
|
|
344
|
+
while (query.count)
|
|
345
|
+
{
|
|
269
346
|
const int node_index = query.stack[--query.count];
|
|
347
|
+
BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
|
|
348
|
+
BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
|
|
270
349
|
|
|
271
|
-
|
|
272
|
-
BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
|
|
273
|
-
|
|
274
|
-
wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
|
|
275
|
-
wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
|
|
276
|
-
wp::bounds3 current_bounds(lower_pos, upper_pos);
|
|
277
|
-
|
|
278
|
-
if (query.is_ray)
|
|
279
|
-
{
|
|
280
|
-
float t = 0.0f;
|
|
281
|
-
if (!intersect_ray_aabb(query.input_lower, query.input_upper, current_bounds.lower, current_bounds.upper, t))
|
|
282
|
-
// Skip this box, it doesn't overlap with our ray.
|
|
283
|
-
continue;
|
|
284
|
-
}
|
|
285
|
-
else
|
|
350
|
+
if (!bvh_query_intersection_test(query, (vec3&)node_lower, (vec3&)node_upper))
|
|
286
351
|
{
|
|
287
|
-
|
|
288
|
-
// Skip this box, it doesn't overlap with our target box.
|
|
289
|
-
continue;
|
|
352
|
+
continue;
|
|
290
353
|
}
|
|
291
354
|
|
|
292
355
|
const int left_index = node_lower.i;
|
|
293
356
|
const int right_index = node_upper.i;
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
// found very first leaf index.
|
|
357
|
+
// Make bounds from this AABB
|
|
358
|
+
if (node_lower.b)
|
|
359
|
+
{
|
|
360
|
+
// Reached a leaf node, point to its first primitive
|
|
299
361
|
// Back up one level and return
|
|
362
|
+
query.primitive_counter = left_index;
|
|
300
363
|
query.stack[query.count++] = node_index;
|
|
301
364
|
return query;
|
|
302
|
-
}
|
|
303
|
-
else
|
|
304
|
-
{
|
|
305
|
-
query.stack[query.count++] = left_index;
|
|
306
|
-
query.stack[query.count++] = right_index;
|
|
307
365
|
}
|
|
308
|
-
|
|
366
|
+
else
|
|
367
|
+
{
|
|
368
|
+
query.stack[query.count++] = left_index;
|
|
369
|
+
query.stack[query.count++] = right_index;
|
|
370
|
+
}
|
|
371
|
+
}
|
|
309
372
|
|
|
310
373
|
return query;
|
|
311
374
|
}
|
|
@@ -338,52 +401,100 @@ CUDA_CALLABLE inline void adj_bvh_query_ray(uint64_t id, const vec3& start, cons
|
|
|
338
401
|
|
|
339
402
|
CUDA_CALLABLE inline bool bvh_query_next(bvh_query_t& query, int& index)
|
|
340
403
|
{
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
wp::bounds3 input_bounds(query.input_lower, query.input_upper);
|
|
344
|
-
|
|
345
|
-
// Navigate through the bvh, find the first overlapping leaf node.
|
|
346
|
-
while (query.count)
|
|
347
|
-
{
|
|
348
|
-
const int node_index = query.stack[--query.count];
|
|
349
|
-
BVHPackedNodeHalf node_lower = bvh.node_lowers[node_index];
|
|
350
|
-
BVHPackedNodeHalf node_upper = bvh.node_uppers[node_index];
|
|
404
|
+
BVH bvh = query.bvh;
|
|
351
405
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
406
|
+
if (query.primitive_counter != -1)
|
|
407
|
+
// currently in a leaf node which is the last node in the stack
|
|
408
|
+
{
|
|
409
|
+
const int node_index = query.stack[query.count - 1];
|
|
410
|
+
BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
|
|
411
|
+
BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
|
|
355
412
|
|
|
356
|
-
|
|
413
|
+
const int end = node_upper.i;
|
|
414
|
+
for (int primitive_counter = query.primitive_counter; primitive_counter < end; primitive_counter++)
|
|
357
415
|
{
|
|
358
|
-
|
|
359
|
-
if (
|
|
360
|
-
|
|
361
|
-
|
|
416
|
+
int primitive_index = bvh.primitive_indices[primitive_counter];
|
|
417
|
+
if (bvh_query_intersection_test(query, bvh.item_lowers[primitive_index], bvh.item_uppers[primitive_index]))
|
|
418
|
+
{
|
|
419
|
+
if (primitive_counter < end - 1)
|
|
420
|
+
// still need to come back to this leaf node for the leftover primitives
|
|
421
|
+
{
|
|
422
|
+
query.primitive_counter = primitive_counter + 1;
|
|
423
|
+
}
|
|
424
|
+
else
|
|
425
|
+
// no need to come back to this leaf node
|
|
426
|
+
{
|
|
427
|
+
query.count--;
|
|
428
|
+
query.primitive_counter = -1;
|
|
429
|
+
}
|
|
430
|
+
index = primitive_index;
|
|
431
|
+
query.bounds_nr = primitive_index;
|
|
432
|
+
|
|
433
|
+
return true;
|
|
434
|
+
}
|
|
362
435
|
}
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
436
|
+
// if we reach here that means we have finished the current leaf node without finding intersections
|
|
437
|
+
query.primitive_counter = -1;
|
|
438
|
+
// remove the leaf node from the back of the stack because it is finished
|
|
439
|
+
// and continue the bvh traversal
|
|
440
|
+
query.count--;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// Navigate through the bvh, find the first overlapping leaf node.
|
|
444
|
+
while (query.count)
|
|
445
|
+
{
|
|
446
|
+
const int node_index = query.stack[--query.count];
|
|
447
|
+
BVHPackedNodeHalf node_lower = bvh_load_node(bvh.node_lowers, node_index);
|
|
448
|
+
BVHPackedNodeHalf node_upper = bvh_load_node(bvh.node_uppers, node_index);
|
|
449
|
+
|
|
450
|
+
const int left_index = node_lower.i;
|
|
451
|
+
const int right_index = node_upper.i;
|
|
452
|
+
|
|
453
|
+
wp::vec3 lower_pos(node_lower.x, node_lower.y, node_lower.z);
|
|
454
|
+
wp::vec3 upper_pos(node_upper.x, node_upper.y, node_upper.z);
|
|
455
|
+
wp::bounds3 current_bounds(lower_pos, upper_pos);
|
|
456
|
+
|
|
457
|
+
if (!bvh_query_intersection_test(query, (vec3&)node_lower, (vec3&)node_upper))
|
|
458
|
+
{
|
|
459
|
+
continue;
|
|
367
460
|
}
|
|
368
461
|
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
462
|
+
if (node_lower.b)
|
|
463
|
+
{
|
|
464
|
+
// found leaf, loop through its content primitives
|
|
465
|
+
const int start = left_index;
|
|
466
|
+
const int end = right_index;
|
|
467
|
+
|
|
468
|
+
for (int primitive_counter = start; primitive_counter < end; primitive_counter++)
|
|
469
|
+
{
|
|
470
|
+
int primitive_index = bvh.primitive_indices[primitive_counter];
|
|
471
|
+
if (bvh_query_intersection_test(query, bvh.item_lowers[primitive_index], bvh.item_uppers[primitive_index]))
|
|
472
|
+
{
|
|
473
|
+
if (primitive_counter < end - 1)
|
|
474
|
+
// still need to come back to this leaf node for the leftover primitives
|
|
475
|
+
{
|
|
476
|
+
query.primitive_counter = primitive_counter + 1;
|
|
477
|
+
query.stack[query.count++] = node_index;
|
|
478
|
+
}
|
|
479
|
+
else
|
|
480
|
+
// no need to come back to this leaf node
|
|
481
|
+
{
|
|
482
|
+
query.primitive_counter = -1;
|
|
483
|
+
}
|
|
484
|
+
index = primitive_index;
|
|
485
|
+
query.bounds_nr = primitive_index;
|
|
486
|
+
|
|
487
|
+
return true;
|
|
488
|
+
}
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
else
|
|
492
|
+
{
|
|
493
|
+
query.stack[query.count++] = left_index;
|
|
494
|
+
query.stack[query.count++] = right_index;
|
|
495
|
+
}
|
|
496
|
+
}
|
|
497
|
+
return false;
|
|
387
498
|
}
|
|
388
499
|
|
|
389
500
|
|
|
@@ -421,7 +532,7 @@ CUDA_CALLABLE void bvh_rem_descriptor(uint64_t id);
|
|
|
421
532
|
|
|
422
533
|
#if !__CUDA_ARCH__
|
|
423
534
|
|
|
424
|
-
void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, BVH& bvh);
|
|
535
|
+
void bvh_create_host(vec3* lowers, vec3* uppers, int num_items, int constructor_type, BVH& bvh);
|
|
425
536
|
void bvh_destroy_host(wp::BVH& bvh);
|
|
426
537
|
void bvh_refit_host(wp::BVH& bvh);
|
|
427
538
|
|
|
@@ -431,4 +542,3 @@ void bvh_refit_device(uint64_t id);
|
|
|
431
542
|
#endif
|
|
432
543
|
|
|
433
544
|
} // namespace wp
|
|
434
|
-
|
warp/native/clang/clang.cpp
CHANGED
|
@@ -218,7 +218,7 @@ static std::unique_ptr<llvm::Module> cuda_to_llvm(const std::string& input_file,
|
|
|
218
218
|
|
|
219
219
|
extern "C" {
|
|
220
220
|
|
|
221
|
-
WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug, bool verify_fp)
|
|
221
|
+
WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char* include_dir, const char* output_file, bool debug, bool verify_fp, bool fuse_fp)
|
|
222
222
|
{
|
|
223
223
|
initialize_llvm();
|
|
224
224
|
|
|
@@ -236,6 +236,10 @@ WP_API int compile_cpp(const char* cpp_src, const char *input_file, const char*
|
|
|
236
236
|
const char* CPU = "generic";
|
|
237
237
|
const char* features = "";
|
|
238
238
|
llvm::TargetOptions target_options;
|
|
239
|
+
if (fuse_fp)
|
|
240
|
+
target_options.AllowFPOpFusion = llvm::FPOpFusion::Standard;
|
|
241
|
+
else
|
|
242
|
+
target_options.AllowFPOpFusion = llvm::FPOpFusion::Strict;
|
|
239
243
|
llvm::Reloc::Model relocation_model = llvm::Reloc::PIC_; // Position Independent Code
|
|
240
244
|
llvm::CodeModel::Model code_model = llvm::CodeModel::Large; // Don't make assumptions about displacement sizes
|
|
241
245
|
llvm::TargetMachine* target_machine = target->createTargetMachine(target_triple, CPU, features, target_options, relocation_model, code_model);
|
|
@@ -409,7 +413,7 @@ WP_API int load_obj(const char* object_file, const char* module_name)
|
|
|
409
413
|
SYMBOL(sinhf), SYMBOL_T(sinh, double(*)(double)),
|
|
410
414
|
SYMBOL(coshf), SYMBOL_T(cosh, double(*)(double)),
|
|
411
415
|
SYMBOL(tanhf), SYMBOL_T(tanh, double(*)(double)),
|
|
412
|
-
SYMBOL(fmaf),
|
|
416
|
+
SYMBOL(fmaf), SYMBOL_T(fma, double(*)(double, double, double)),
|
|
413
417
|
SYMBOL(memcpy), SYMBOL(memset), SYMBOL(memmove),
|
|
414
418
|
SYMBOL(_wp_assert),
|
|
415
419
|
SYMBOL(_wp_isfinite),
|
warp/native/crt.h
CHANGED
warp/native/cuda_util.cpp
CHANGED
|
@@ -102,6 +102,11 @@ static PFN_cuGraphicsGLRegisterBuffer_v3000 pfn_cuGraphicsGLRegisterBuffer;
|
|
|
102
102
|
static PFN_cuGraphicsUnregisterResource_v3000 pfn_cuGraphicsUnregisterResource;
|
|
103
103
|
static PFN_cuModuleGetGlobal_v3020 pfn_cuModuleGetGlobal;
|
|
104
104
|
static PFN_cuFuncSetAttribute_v9000 pfn_cuFuncSetAttribute;
|
|
105
|
+
static PFN_cuIpcGetEventHandle_v4010 pfn_cuIpcGetEventHandle;
|
|
106
|
+
static PFN_cuIpcOpenEventHandle_v4010 pfn_cuIpcOpenEventHandle;
|
|
107
|
+
static PFN_cuIpcGetMemHandle_v4010 pfn_cuIpcGetMemHandle;
|
|
108
|
+
static PFN_cuIpcOpenMemHandle_v11000 pfn_cuIpcOpenMemHandle;
|
|
109
|
+
static PFN_cuIpcCloseMemHandle_v4010 pfn_cuIpcCloseMemHandle;
|
|
105
110
|
|
|
106
111
|
static bool cuda_driver_initialized = false;
|
|
107
112
|
|
|
@@ -238,6 +243,11 @@ bool init_cuda_driver()
|
|
|
238
243
|
get_driver_entry_point("cuGraphicsUnregisterResource", 3000, &(void*&)pfn_cuGraphicsUnregisterResource);
|
|
239
244
|
get_driver_entry_point("cuModuleGetGlobal", 3020, &(void*&)pfn_cuModuleGetGlobal);
|
|
240
245
|
get_driver_entry_point("cuFuncSetAttribute", 9000, &(void*&)pfn_cuFuncSetAttribute);
|
|
246
|
+
get_driver_entry_point("cuIpcGetEventHandle", 4010, &(void*&)pfn_cuIpcGetEventHandle);
|
|
247
|
+
get_driver_entry_point("cuIpcOpenEventHandle", 4010, &(void*&)pfn_cuIpcOpenEventHandle);
|
|
248
|
+
get_driver_entry_point("cuIpcGetMemHandle", 4010, &(void*&)pfn_cuIpcGetMemHandle);
|
|
249
|
+
get_driver_entry_point("cuIpcOpenMemHandle", 11000, &(void*&)pfn_cuIpcOpenMemHandle);
|
|
250
|
+
get_driver_entry_point("cuIpcCloseMemHandle", 4010, &(void*&)pfn_cuIpcCloseMemHandle);
|
|
241
251
|
|
|
242
252
|
if (pfn_cuInit)
|
|
243
253
|
cuda_driver_initialized = check_cu(pfn_cuInit(0));
|
|
@@ -585,4 +595,29 @@ CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int
|
|
|
585
595
|
return pfn_cuFuncSetAttribute ? pfn_cuFuncSetAttribute(hfunc, attrib, value) : DRIVER_ENTRY_POINT_ERROR;
|
|
586
596
|
}
|
|
587
597
|
|
|
598
|
+
CUresult cuIpcGetEventHandle_f(CUipcEventHandle *pHandle, CUevent event)
|
|
599
|
+
{
|
|
600
|
+
return pfn_cuIpcGetEventHandle ? pfn_cuIpcGetEventHandle(pHandle, event) : DRIVER_ENTRY_POINT_ERROR;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
CUresult cuIpcOpenEventHandle_f(CUevent *phEvent, CUipcEventHandle handle)
|
|
604
|
+
{
|
|
605
|
+
return pfn_cuIpcOpenEventHandle ? pfn_cuIpcOpenEventHandle(phEvent, handle) : DRIVER_ENTRY_POINT_ERROR;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
CUresult cuIpcGetMemHandle_f(CUipcMemHandle *pHandle, CUdeviceptr dptr)
|
|
609
|
+
{
|
|
610
|
+
return pfn_cuIpcGetMemHandle ? pfn_cuIpcGetMemHandle(pHandle, dptr) : DRIVER_ENTRY_POINT_ERROR;
|
|
611
|
+
}
|
|
612
|
+
|
|
613
|
+
CUresult cuIpcOpenMemHandle_f(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int flags)
|
|
614
|
+
{
|
|
615
|
+
return pfn_cuIpcOpenMemHandle ? pfn_cuIpcOpenMemHandle(pdptr, handle, flags) : DRIVER_ENTRY_POINT_ERROR;
|
|
616
|
+
}
|
|
617
|
+
|
|
618
|
+
CUresult cuIpcCloseMemHandle_f(CUdeviceptr dptr)
|
|
619
|
+
{
|
|
620
|
+
return pfn_cuIpcCloseMemHandle ? pfn_cuIpcCloseMemHandle(dptr) : DRIVER_ENTRY_POINT_ERROR;
|
|
621
|
+
}
|
|
622
|
+
|
|
588
623
|
#endif // WP_ENABLE_CUDA
|
warp/native/cuda_util.h
CHANGED
|
@@ -101,6 +101,11 @@ CUresult cuGraphicsGLRegisterBuffer_f(CUgraphicsResource *pCudaResource, unsigne
|
|
|
101
101
|
CUresult cuGraphicsUnregisterResource_f(CUgraphicsResource resource);
|
|
102
102
|
CUresult cuModuleGetGlobal_f(CUdeviceptr* dptr, size_t* bytes, CUmodule hmod, const char* name );
|
|
103
103
|
CUresult cuFuncSetAttribute_f(CUfunction hfunc, CUfunction_attribute attrib, int value);
|
|
104
|
+
CUresult cuIpcGetEventHandle_f(CUipcEventHandle *pHandle, CUevent event);
|
|
105
|
+
CUresult cuIpcOpenEventHandle_f(CUevent *phEvent, CUipcEventHandle handle);
|
|
106
|
+
CUresult cuIpcGetMemHandle_f(CUipcMemHandle *pHandle, CUdeviceptr dptr);
|
|
107
|
+
CUresult cuIpcOpenMemHandle_f(CUdeviceptr *pdptr, CUipcMemHandle handle, unsigned int flags);
|
|
108
|
+
CUresult cuIpcCloseMemHandle_f(CUdeviceptr dptr);
|
|
104
109
|
|
|
105
110
|
bool init_cuda_driver();
|
|
106
111
|
bool is_cuda_driver_initialized();
|
warp/native/exports.h
CHANGED
|
@@ -1001,46 +1001,6 @@ WP_API void builtin_spatial_top_spatial_vectord(spatial_vectord& svec, vec3d* re
|
|
|
1001
1001
|
WP_API void builtin_spatial_bottom_spatial_vectorh(spatial_vectorh& svec, vec3h* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1002
1002
|
WP_API void builtin_spatial_bottom_spatial_vectorf(spatial_vectorf& svec, vec3f* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1003
1003
|
WP_API void builtin_spatial_bottom_spatial_vectord(spatial_vectord& svec, vec3d* ret) { *ret = wp::spatial_bottom(svec); }
|
|
1004
|
-
WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
|
|
1005
|
-
WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
|
|
1006
|
-
WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
|
|
1007
|
-
WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
|
|
1008
|
-
WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
|
|
1009
|
-
WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
|
|
1010
|
-
WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
|
|
1011
|
-
WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
|
|
1012
|
-
WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
|
|
1013
|
-
WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
|
|
1014
|
-
WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
|
|
1015
|
-
WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
|
|
1016
|
-
WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
|
|
1017
|
-
WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
|
|
1018
|
-
WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
|
|
1019
|
-
WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
|
|
1020
|
-
WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
|
|
1021
|
-
WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
|
|
1022
|
-
WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
|
|
1023
|
-
WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
|
|
1024
|
-
WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
|
|
1025
|
-
WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
|
|
1026
|
-
WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
|
|
1027
|
-
WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
|
|
1028
|
-
WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
|
|
1029
|
-
WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
|
|
1030
|
-
WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
|
|
1031
|
-
WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
|
|
1032
|
-
WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
|
|
1033
|
-
WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
|
|
1034
|
-
WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
|
|
1035
|
-
WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
|
|
1036
|
-
WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
|
|
1037
|
-
WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
|
|
1038
|
-
WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
|
|
1039
|
-
WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
|
|
1040
|
-
WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
|
|
1041
|
-
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1042
|
-
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1043
|
-
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1044
1004
|
WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
|
|
1045
1005
|
WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
|
|
1046
1006
|
WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
|
|
@@ -1104,6 +1064,46 @@ WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value,
|
|
|
1104
1064
|
WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
|
|
1105
1065
|
WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
|
|
1106
1066
|
WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
|
|
1067
|
+
WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
|
|
1068
|
+
WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
|
|
1069
|
+
WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
|
|
1070
|
+
WP_API void builtin_volume_sample_v_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f* ret) { *ret = wp::volume_sample_v(id, uvw, sampling_mode); }
|
|
1071
|
+
WP_API void builtin_volume_lookup_v_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, vec3f* ret) { *ret = wp::volume_lookup_v(id, i, j, k); }
|
|
1072
|
+
WP_API void builtin_volume_sample_i_uint64_vec3f(uint64 id, vec3f& uvw, int* ret) { *ret = wp::volume_sample_i(id, uvw); }
|
|
1073
|
+
WP_API void builtin_volume_lookup_i_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int* ret) { *ret = wp::volume_lookup_i(id, i, j, k); }
|
|
1074
|
+
WP_API void builtin_volume_lookup_index_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, int32* ret) { *ret = wp::volume_lookup_index(id, i, j, k); }
|
|
1075
|
+
WP_API void builtin_volume_index_to_world_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world(id, uvw); }
|
|
1076
|
+
WP_API void builtin_volume_world_to_index_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index(id, xyz); }
|
|
1077
|
+
WP_API void builtin_volume_index_to_world_dir_uint64_vec3f(uint64 id, vec3f& uvw, vec3f* ret) { *ret = wp::volume_index_to_world_dir(id, uvw); }
|
|
1078
|
+
WP_API void builtin_volume_world_to_index_dir_uint64_vec3f(uint64 id, vec3f& xyz, vec3f* ret) { *ret = wp::volume_world_to_index_dir(id, xyz); }
|
|
1079
|
+
WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_init(seed); }
|
|
1080
|
+
WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
|
|
1081
|
+
WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
|
|
1082
|
+
WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
|
|
1083
|
+
WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
|
|
1084
|
+
WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
|
|
1085
|
+
WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
|
|
1086
|
+
WP_API void builtin_sample_triangle_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_triangle(state); }
|
|
1087
|
+
WP_API void builtin_sample_unit_ring_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_ring(state); }
|
|
1088
|
+
WP_API void builtin_sample_unit_disk_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_disk(state); }
|
|
1089
|
+
WP_API void builtin_sample_unit_sphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere_surface(state); }
|
|
1090
|
+
WP_API void builtin_sample_unit_sphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_sphere(state); }
|
|
1091
|
+
WP_API void builtin_sample_unit_hemisphere_surface_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere_surface(state); }
|
|
1092
|
+
WP_API void builtin_sample_unit_hemisphere_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_hemisphere(state); }
|
|
1093
|
+
WP_API void builtin_sample_unit_square_uint32(uint32 state, vec2f* ret) { *ret = wp::sample_unit_square(state); }
|
|
1094
|
+
WP_API void builtin_sample_unit_cube_uint32(uint32 state, vec3f* ret) { *ret = wp::sample_unit_cube(state); }
|
|
1095
|
+
WP_API void builtin_poisson_uint32_float32(uint32 state, float32 lam, uint32* ret) { *ret = wp::poisson(state, lam); }
|
|
1096
|
+
WP_API void builtin_noise_uint32_float32(uint32 state, float32 x, float* ret) { *ret = wp::noise(state, x); }
|
|
1097
|
+
WP_API void builtin_noise_uint32_vec2f(uint32 state, vec2f& xy, float* ret) { *ret = wp::noise(state, xy); }
|
|
1098
|
+
WP_API void builtin_noise_uint32_vec3f(uint32 state, vec3f& xyz, float* ret) { *ret = wp::noise(state, xyz); }
|
|
1099
|
+
WP_API void builtin_noise_uint32_vec4f(uint32 state, vec4f& xyzt, float* ret) { *ret = wp::noise(state, xyzt); }
|
|
1100
|
+
WP_API void builtin_pnoise_uint32_float32_int32(uint32 state, float32 x, int32 px, float* ret) { *ret = wp::pnoise(state, x, px); }
|
|
1101
|
+
WP_API void builtin_pnoise_uint32_vec2f_int32_int32(uint32 state, vec2f& xy, int32 px, int32 py, float* ret) { *ret = wp::pnoise(state, xy, px, py); }
|
|
1102
|
+
WP_API void builtin_pnoise_uint32_vec3f_int32_int32_int32(uint32 state, vec3f& xyz, int32 px, int32 py, int32 pz, float* ret) { *ret = wp::pnoise(state, xyz, px, py, pz); }
|
|
1103
|
+
WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, vec4f& xyzt, int32 px, int32 py, int32 pz, int32 pt, float* ret) { *ret = wp::pnoise(state, xyzt, px, py, pz, pt); }
|
|
1104
|
+
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1105
|
+
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1106
|
+
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1107
1107
|
WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1108
1108
|
WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1109
1109
|
WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
warp/native/intersect.h
CHANGED
|
@@ -156,6 +156,23 @@ CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_di
|
|
|
156
156
|
return hit;
|
|
157
157
|
}
|
|
158
158
|
|
|
159
|
+
CUDA_CALLABLE inline bool intersect_aabb_aabb(const vec3& a_lower, const vec3& a_upper, const vec3& b_lower, const vec3& b_upper)
|
|
160
|
+
{
|
|
161
|
+
if (a_lower[0] > b_upper[0] ||
|
|
162
|
+
a_lower[1] > b_upper[1] ||
|
|
163
|
+
a_lower[2] > b_upper[2] ||
|
|
164
|
+
a_upper[0] < b_lower[0] ||
|
|
165
|
+
a_upper[1] < b_lower[1] ||
|
|
166
|
+
a_upper[2] < b_lower[2])
|
|
167
|
+
{
|
|
168
|
+
return false;
|
|
169
|
+
}
|
|
170
|
+
else
|
|
171
|
+
{
|
|
172
|
+
return true;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
|
|
159
176
|
|
|
160
177
|
// Moller and Trumbore's method
|
|
161
178
|
CUDA_CALLABLE inline bool intersect_ray_tri_moller(const vec3& p, const vec3& dir, const vec3& a, const vec3& b, const vec3& c, float& t, float& u, float& v, float& w, float& sign, vec3* normal)
|