warp-lang 1.6.2__py3-none-macosx_10_13_universal2.whl → 1.7.0__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (179) hide show
  1. warp/__init__.py +7 -1
  2. warp/bin/libwarp-clang.dylib +0 -0
  3. warp/bin/libwarp.dylib +0 -0
  4. warp/build.py +410 -0
  5. warp/build_dll.py +6 -14
  6. warp/builtins.py +452 -362
  7. warp/codegen.py +179 -119
  8. warp/config.py +42 -6
  9. warp/context.py +490 -271
  10. warp/dlpack.py +8 -6
  11. warp/examples/assets/nonuniform.usd +0 -0
  12. warp/examples/assets/nvidia_logo.png +0 -0
  13. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  14. warp/examples/core/example_sample_mesh.py +300 -0
  15. warp/examples/fem/example_apic_fluid.py +1 -1
  16. warp/examples/fem/example_burgers.py +2 -2
  17. warp/examples/fem/example_deformed_geometry.py +1 -1
  18. warp/examples/fem/example_distortion_energy.py +1 -1
  19. warp/examples/fem/example_magnetostatics.py +6 -6
  20. warp/examples/fem/utils.py +9 -3
  21. warp/examples/interop/example_jax_callable.py +116 -0
  22. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  23. warp/examples/interop/example_jax_kernel.py +205 -0
  24. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  25. warp/examples/tile/example_tile_matmul.py +2 -4
  26. warp/fem/__init__.py +11 -1
  27. warp/fem/adaptivity.py +4 -4
  28. warp/fem/field/nodal_field.py +22 -68
  29. warp/fem/field/virtual.py +62 -23
  30. warp/fem/geometry/adaptive_nanogrid.py +9 -10
  31. warp/fem/geometry/closest_point.py +1 -1
  32. warp/fem/geometry/deformed_geometry.py +5 -2
  33. warp/fem/geometry/geometry.py +5 -0
  34. warp/fem/geometry/grid_2d.py +12 -12
  35. warp/fem/geometry/grid_3d.py +12 -15
  36. warp/fem/geometry/hexmesh.py +5 -7
  37. warp/fem/geometry/nanogrid.py +9 -11
  38. warp/fem/geometry/quadmesh.py +13 -13
  39. warp/fem/geometry/tetmesh.py +3 -4
  40. warp/fem/geometry/trimesh.py +3 -8
  41. warp/fem/integrate.py +262 -93
  42. warp/fem/linalg.py +5 -5
  43. warp/fem/quadrature/pic_quadrature.py +37 -22
  44. warp/fem/quadrature/quadrature.py +194 -25
  45. warp/fem/space/__init__.py +1 -1
  46. warp/fem/space/basis_function_space.py +4 -2
  47. warp/fem/space/basis_space.py +25 -18
  48. warp/fem/space/hexmesh_function_space.py +2 -2
  49. warp/fem/space/partition.py +6 -2
  50. warp/fem/space/quadmesh_function_space.py +8 -8
  51. warp/fem/space/shape/cube_shape_function.py +23 -23
  52. warp/fem/space/shape/square_shape_function.py +12 -12
  53. warp/fem/space/shape/triangle_shape_function.py +1 -1
  54. warp/fem/space/tetmesh_function_space.py +3 -3
  55. warp/fem/space/trimesh_function_space.py +2 -2
  56. warp/fem/utils.py +12 -6
  57. warp/jax.py +14 -1
  58. warp/jax_experimental/__init__.py +16 -0
  59. warp/{jax_experimental.py → jax_experimental/custom_call.py} +14 -27
  60. warp/jax_experimental/ffi.py +698 -0
  61. warp/jax_experimental/xla_ffi.py +602 -0
  62. warp/math.py +89 -0
  63. warp/native/array.h +13 -0
  64. warp/native/builtin.h +29 -3
  65. warp/native/bvh.cpp +3 -1
  66. warp/native/bvh.cu +42 -14
  67. warp/native/bvh.h +2 -1
  68. warp/native/clang/clang.cpp +30 -3
  69. warp/native/cuda_util.cpp +14 -0
  70. warp/native/cuda_util.h +2 -0
  71. warp/native/exports.h +68 -63
  72. warp/native/intersect.h +26 -26
  73. warp/native/intersect_adj.h +33 -33
  74. warp/native/marching.cu +1 -1
  75. warp/native/mat.h +513 -9
  76. warp/native/mesh.h +10 -10
  77. warp/native/quat.h +99 -11
  78. warp/native/rand.h +6 -0
  79. warp/native/sort.cpp +122 -59
  80. warp/native/sort.cu +152 -15
  81. warp/native/sort.h +8 -1
  82. warp/native/sparse.cpp +43 -22
  83. warp/native/sparse.cu +52 -17
  84. warp/native/svd.h +116 -0
  85. warp/native/tile.h +301 -105
  86. warp/native/tile_reduce.h +46 -3
  87. warp/native/vec.h +68 -7
  88. warp/native/volume.cpp +85 -113
  89. warp/native/volume_builder.cu +25 -10
  90. warp/native/volume_builder.h +6 -0
  91. warp/native/warp.cpp +5 -6
  92. warp/native/warp.cu +99 -10
  93. warp/native/warp.h +19 -10
  94. warp/optim/linear.py +10 -10
  95. warp/sim/articulation.py +4 -4
  96. warp/sim/collide.py +21 -10
  97. warp/sim/import_mjcf.py +449 -155
  98. warp/sim/import_urdf.py +32 -12
  99. warp/sim/integrator_euler.py +5 -5
  100. warp/sim/integrator_featherstone.py +3 -10
  101. warp/sim/integrator_vbd.py +207 -2
  102. warp/sim/integrator_xpbd.py +5 -5
  103. warp/sim/model.py +42 -13
  104. warp/sim/utils.py +2 -2
  105. warp/sparse.py +642 -555
  106. warp/stubs.py +216 -19
  107. warp/tests/__main__.py +0 -15
  108. warp/tests/cuda/__init__.py +0 -0
  109. warp/tests/{test_mempool.py → cuda/test_mempool.py} +39 -0
  110. warp/tests/{test_streams.py → cuda/test_streams.py} +71 -0
  111. warp/tests/geometry/__init__.py +0 -0
  112. warp/tests/{test_mesh_query_point.py → geometry/test_mesh_query_point.py} +66 -63
  113. warp/tests/{test_mesh_query_ray.py → geometry/test_mesh_query_ray.py} +1 -1
  114. warp/tests/{test_volume.py → geometry/test_volume.py} +41 -6
  115. warp/tests/interop/__init__.py +0 -0
  116. warp/tests/{test_dlpack.py → interop/test_dlpack.py} +28 -5
  117. warp/tests/sim/__init__.py +0 -0
  118. warp/tests/{disabled_kinematics.py → sim/disabled_kinematics.py} +9 -10
  119. warp/tests/{test_collision.py → sim/test_collision.py} +2 -2
  120. warp/tests/{test_model.py → sim/test_model.py} +40 -0
  121. warp/tests/{test_sim_kinematics.py → sim/test_sim_kinematics.py} +2 -1
  122. warp/tests/sim/test_vbd.py +597 -0
  123. warp/tests/test_bool.py +1 -1
  124. warp/tests/test_examples.py +28 -36
  125. warp/tests/test_fem.py +23 -4
  126. warp/tests/test_linear_solvers.py +0 -11
  127. warp/tests/test_mat.py +233 -79
  128. warp/tests/test_mat_scalar_ops.py +4 -4
  129. warp/tests/test_overwrite.py +0 -60
  130. warp/tests/test_quat.py +67 -46
  131. warp/tests/test_rand.py +44 -37
  132. warp/tests/test_sparse.py +47 -6
  133. warp/tests/test_spatial.py +75 -0
  134. warp/tests/test_static.py +1 -1
  135. warp/tests/test_utils.py +84 -4
  136. warp/tests/test_vec.py +46 -34
  137. warp/tests/tile/__init__.py +0 -0
  138. warp/tests/{test_tile.py → tile/test_tile.py} +136 -51
  139. warp/tests/{test_tile_load.py → tile/test_tile_load.py} +1 -1
  140. warp/tests/{test_tile_mathdx.py → tile/test_tile_mathdx.py} +9 -6
  141. warp/tests/{test_tile_mlp.py → tile/test_tile_mlp.py} +25 -14
  142. warp/tests/{test_tile_reduce.py → tile/test_tile_reduce.py} +60 -1
  143. warp/tests/{test_tile_view.py → tile/test_tile_view.py} +1 -1
  144. warp/tests/unittest_serial.py +1 -0
  145. warp/tests/unittest_suites.py +45 -59
  146. warp/tests/unittest_utils.py +2 -1
  147. warp/thirdparty/unittest_parallel.py +3 -1
  148. warp/types.py +110 -658
  149. warp/utils.py +137 -72
  150. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/METADATA +29 -7
  151. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/RECORD +172 -162
  152. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/WHEEL +1 -1
  153. warp/examples/optim/example_walker.py +0 -317
  154. warp/native/cutlass_gemm.cpp +0 -43
  155. warp/native/cutlass_gemm.cu +0 -382
  156. warp/tests/test_matmul.py +0 -511
  157. warp/tests/test_matmul_lite.py +0 -411
  158. warp/tests/test_vbd.py +0 -386
  159. warp/tests/unused_test_misc.py +0 -77
  160. /warp/tests/{test_async.py → cuda/test_async.py} +0 -0
  161. /warp/tests/{test_ipc.py → cuda/test_ipc.py} +0 -0
  162. /warp/tests/{test_multigpu.py → cuda/test_multigpu.py} +0 -0
  163. /warp/tests/{test_peer.py → cuda/test_peer.py} +0 -0
  164. /warp/tests/{test_pinned.py → cuda/test_pinned.py} +0 -0
  165. /warp/tests/{test_bvh.py → geometry/test_bvh.py} +0 -0
  166. /warp/tests/{test_hash_grid.py → geometry/test_hash_grid.py} +0 -0
  167. /warp/tests/{test_marching_cubes.py → geometry/test_marching_cubes.py} +0 -0
  168. /warp/tests/{test_mesh.py → geometry/test_mesh.py} +0 -0
  169. /warp/tests/{test_mesh_query_aabb.py → geometry/test_mesh_query_aabb.py} +0 -0
  170. /warp/tests/{test_volume_write.py → geometry/test_volume_write.py} +0 -0
  171. /warp/tests/{test_jax.py → interop/test_jax.py} +0 -0
  172. /warp/tests/{test_paddle.py → interop/test_paddle.py} +0 -0
  173. /warp/tests/{test_torch.py → interop/test_torch.py} +0 -0
  174. /warp/tests/{flaky_test_sim_grad.py → sim/flaky_test_sim_grad.py} +0 -0
  175. /warp/tests/{test_coloring.py → sim/test_coloring.py} +0 -0
  176. /warp/tests/{test_sim_grad_bounce_linear.py → sim/test_sim_grad_bounce_linear.py} +0 -0
  177. /warp/tests/{test_tile_shared_memory.py → tile/test_tile_shared_memory.py} +0 -0
  178. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info/licenses}/LICENSE.md +0 -0
  179. {warp_lang-1.6.2.dist-info → warp_lang-1.7.0.dist-info}/top_level.txt +0 -0
warp/native/array.h CHANGED
@@ -811,6 +811,19 @@ CUDA_CALLABLE inline void adj_select(const array_t<T1>& arr, const T2& a, const
811
811
  adj_a += adj_ret;
812
812
  }
813
813
 
814
+ // where operator to check for array being null, opposite convention compared to select
815
+ template <typename T1, typename T2>
816
+ CUDA_CALLABLE inline T2 where(const array_t<T1>& arr, const T2& a, const T2& b) { return arr.data?a:b; }
817
+
818
+ template <typename T1, typename T2>
819
+ CUDA_CALLABLE inline void adj_where(const array_t<T1>& arr, const T2& a, const T2& b, const array_t<T1>& adj_cond, T2& adj_a, T2& adj_b, const T2& adj_ret)
820
+ {
821
+ if (arr.data)
822
+ adj_a += adj_ret;
823
+ else
824
+ adj_b += adj_ret;
825
+ }
826
+
814
827
  // stub for the case where we have an nested array inside a struct and
815
828
  // atomic add the whole struct onto an array (e.g.: during backwards pass)
816
829
  template <typename T>
warp/native/builtin.h CHANGED
@@ -233,6 +233,16 @@ inline CUDA_CALLABLE half operator * (half a,half b)
233
233
  return float_to_half( half_to_float(a) * half_to_float(b) );
234
234
  }
235
235
 
236
+ inline CUDA_CALLABLE half operator * (half a,float b)
237
+ {
238
+ return float_to_half( half_to_float(a) * b );
239
+ }
240
+
241
+ inline CUDA_CALLABLE half operator * (float a,half b)
242
+ {
243
+ return float_to_half( a * half_to_float(b) );
244
+ }
245
+
236
246
  inline CUDA_CALLABLE half operator * (half a,double b)
237
247
  {
238
248
  return float_to_half( half_to_float(a) * b );
@@ -1084,6 +1094,23 @@ CUDA_CALLABLE inline void adj_select(const C& cond, const T& a, const T& b, C& a
1084
1094
  adj_a += adj_ret;
1085
1095
  }
1086
1096
 
1097
+ template <typename C, typename T>
1098
+ CUDA_CALLABLE inline T where(const C& cond, const T& a, const T& b)
1099
+ {
1100
+ // The double NOT operator !! casts to bool without compiler warnings.
1101
+ return (!!cond) ? a : b;
1102
+ }
1103
+
1104
+ template <typename C, typename T>
1105
+ CUDA_CALLABLE inline void adj_where(const C& cond, const T& a, const T& b, C& adj_cond, T& adj_a, T& adj_b, const T& adj_ret)
1106
+ {
1107
+ // The double NOT operator !! casts to bool without compiler warnings.
1108
+ if (!!cond)
1109
+ adj_a += adj_ret;
1110
+ else
1111
+ adj_b += adj_ret;
1112
+ }
1113
+
1087
1114
  template <typename T>
1088
1115
  CUDA_CALLABLE inline T copy(const T& src)
1089
1116
  {
@@ -1767,8 +1794,7 @@ inline CUDA_CALLABLE void adj_expect_near(const vec3& actual, const vec3& expect
1767
1794
  #include "noise.h"
1768
1795
  #include "matnn.h"
1769
1796
 
1770
- // only include in kernels for now
1771
- #if defined(__CUDACC_RTC__)
1797
+ #if !defined(WP_ENABLE_CUDA) // only include in kernels for now
1772
1798
  #include "tile.h"
1773
1799
  #include "tile_reduce.h"
1774
- #endif
1800
+ #endif //!defined(WP_ENABLE_CUDA)
warp/native/bvh.cpp CHANGED
@@ -285,7 +285,9 @@ int TopDownBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3*
285
285
 
286
286
  bounds3 b = calc_bounds(lowers, uppers, bvh.primitive_indices, start, end);
287
287
 
288
- if (n <= BVH_LEAF_SIZE)
288
+ // If the depth exceeds BVH_QUERY_STACK_SIZE, an out-of-bounds access bug may occur during querying.
289
+ // In that case, we merge the following nodes into a single large leaf node.
290
+ if (n <= BVH_LEAF_SIZE || depth >= BVH_QUERY_STACK_SIZE - 1)
289
291
  {
290
292
  bvh.node_lowers[node_index] = make_node(b.lower, start, true);
291
293
  bvh.node_uppers[node_index] = make_node(b.upper, end, false);
warp/native/bvh.cu CHANGED
@@ -40,7 +40,7 @@ namespace wp
40
40
  // for LBVH: this will start with some muted leaf nodes, but that is okay, we can still trace up because there parents information is still valid
41
41
  // the only thing worth mentioning is that when the parent leaf node is also a leaf node, we need to recompute its bounds, since their child information are lost
42
42
  // for a compact tree such as those from SAH or Median constructor, there is no muted leaf nodes
43
- __global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, int* __restrict__ primitive_indices, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers, const vec3* item_lowers, const vec3* item_uppers)
43
+ __global__ void bvh_refit_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, const int* __restrict__ primitive_indices, BVHPackedNodeHalf* __restrict__ node_lowers, BVHPackedNodeHalf* __restrict__ node_uppers, const vec3* __restrict__ item_lowers, const vec3* __restrict__ item_uppers)
44
44
  {
45
45
  int index = blockDim.x*blockIdx.x + threadIdx.x;
46
46
 
@@ -257,7 +257,7 @@ __global__ void build_leaves(const vec3* __restrict__ item_lowers, const vec3* _
257
257
  // there is one thread launched per-leaf node, each thread calculates it's parent node and assigns
258
258
  // itself to either the left or right parent slot, the last child to complete the parent and moves
259
259
  // up the hierarchy
260
- __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas, int* __restrict__ num_children, volatile int* __restrict__ range_lefts, volatile int* __restrict__ range_rights, volatile int* __restrict__ parents, volatile BVHPackedNodeHalf* __restrict__ lowers, volatile BVHPackedNodeHalf* __restrict__ uppers)
260
+ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas, int* __restrict__ num_children, const int* __restrict__ primitive_indices, volatile int* __restrict__ range_lefts, volatile int* __restrict__ range_rights, volatile int* __restrict__ parents, volatile BVHPackedNodeHalf* __restrict__ lowers, volatile BVHPackedNodeHalf* __restrict__ uppers)
261
261
  {
262
262
  int index = blockDim.x*blockIdx.x + threadIdx.x;
263
263
 
@@ -283,13 +283,34 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
283
283
 
284
284
  int parent;
285
285
 
286
- if (left == 0 || (right != n-1 && deltas[right] < deltas[left-1]))
286
+ bool parent_right = false;
287
+ if (left == 0)
288
+ {
289
+ parent_right = true;
290
+ }
291
+ else if ((right != n - 1 && deltas[right] <= deltas[left - 1]))
292
+ {
293
+ // tie breaking, this avoid always choosing the right node which can result in a very deep tree
294
+ // generate a pseudo-random binary value to randomly choose left or right groupings
295
+ // since the primitives with same Morton code are not sorted at all, determining order based on primitive_indices may also be unreliable.
296
+ // Here, the decision is made using the XOR result of whether the keys before and after the internal node are divisible by 2.
297
+ if (deltas[right] == deltas[left - 1])
298
+ {
299
+ parent_right = (primitive_indices[left - 1] % 2) ^ (primitive_indices[right] % 2);
300
+ }
301
+ else
302
+ {
303
+ parent_right = true;
304
+ }
305
+ }
306
+
307
+ if (parent_right)
287
308
  {
288
309
  parent = right + internal_offset;
289
310
 
290
311
  // set parent left child
291
312
  parents[index] = parent;
292
- lowers[parent].i = index;
313
+ lowers[parent].i = index;
293
314
  range_lefts[parent] = left;
294
315
 
295
316
  // ensure above writes are visible to all threads
@@ -363,27 +384,34 @@ __global__ void build_hierarchy(int n, int* root, const int* __restrict__ deltas
363
384
  * <= BVH_LEAF_SIZE into a new leaf node. This process is done using the new kernel function called
364
385
  * mark_packed_leaf_nodes .
365
386
  */
366
- __global__ void mark_packed_leaf_nodes(int n, volatile int* __restrict__ range_lefts, volatile int* __restrict__ range_rights,
367
- volatile BVHPackedNodeHalf* __restrict__ lowers, volatile BVHPackedNodeHalf* __restrict__ uppers)
387
+ __global__ void mark_packed_leaf_nodes(int n, const int* __restrict__ range_lefts, const int* __restrict__ range_rights, const int* __restrict__ parents,
388
+ BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers)
368
389
  {
369
390
  int node_index = blockDim.x * blockIdx.x + threadIdx.x;
370
391
  if (node_index < n)
371
392
  {
372
- // mark the node as leaf if its range is less than LEAF_SIZE_LBVH
393
+ // mark the node as leaf if its range is less than LEAF_SIZE_LBVH or it is deeper than BVH_QUERY_STACK_SIZE
373
394
  // this will forever mute its child nodes so that they will never be accessed
374
395
 
396
+ // calculate depth
397
+ int depth = 1;
398
+ int parent = parents[node_index];
399
+ while (parent != -1)
400
+ {
401
+ int old_parent = parent;
402
+ parent = parents[parent];
403
+ depth++;
404
+ }
405
+
375
406
  int left = range_lefts[node_index];
376
407
  // the LBVH constructor's range is defined as left <= i <= right
377
408
  // we need to convert it to our convention: left <= i < right
378
409
  int right = range_rights[node_index] + 1;
379
- // printf("node %d (left %d right %d)", node_index, left, right);
380
- if (right - left <= BVH_LEAF_SIZE)
410
+ if (right - left <= BVH_LEAF_SIZE || depth >= BVH_QUERY_STACK_SIZE)
381
411
  {
382
412
  lowers[node_index].b = 1;
383
413
  lowers[node_index].i = left;
384
414
  uppers[node_index].i = right;
385
-
386
- // printf("node %d (left %d right %d) is set to child\n", node_index, left, right);
387
415
  }
388
416
  }
389
417
  }
@@ -516,8 +544,8 @@ void LinearBVHBuilderGPU::build(BVH& bvh, const vec3* item_lowers, const vec3* i
516
544
  memset_device(WP_CURRENT_CONTEXT, num_children, 0, sizeof(int)*bvh.max_nodes);
517
545
 
518
546
  // build the tree and internal node bounds
519
- wp_launch_device(WP_CURRENT_CONTEXT, build_hierarchy, num_items, (num_items, bvh.root, deltas, num_children, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
520
- wp_launch_device(WP_CURRENT_CONTEXT, mark_packed_leaf_nodes, bvh.max_nodes, (bvh.max_nodes, range_lefts, range_rights, bvh.node_lowers, bvh.node_uppers));
547
+ wp_launch_device(WP_CURRENT_CONTEXT, build_hierarchy, num_items, (num_items, bvh.root, deltas, num_children, bvh.primitive_indices, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
548
+ wp_launch_device(WP_CURRENT_CONTEXT, mark_packed_leaf_nodes, bvh.max_nodes, (bvh.max_nodes, range_lefts, range_rights, bvh.node_parents, bvh.node_lowers, bvh.node_uppers));
521
549
 
522
550
  // free temporary memory
523
551
  free_device(WP_CURRENT_CONTEXT, indices);
@@ -673,7 +701,7 @@ void bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_items,
673
701
  else if (constructor_type == BVH_CONSTRUCTOR_LBVH)
674
702
  {
675
703
  bvh_device_on_host.num_items = num_items;
676
- bvh_device_on_host.max_nodes = 2 * num_items;
704
+ bvh_device_on_host.max_nodes = 2 * num_items - 1;
677
705
  bvh_device_on_host.num_leaf_nodes = num_items;
678
706
  bvh_device_on_host.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf) * bvh_device_on_host.max_nodes);
679
707
  memset_device(WP_CURRENT_CONTEXT, bvh_device_on_host.node_lowers, 0, sizeof(BVHPackedNodeHalf) * bvh_device_on_host.max_nodes);
warp/native/bvh.h CHANGED
@@ -23,6 +23,7 @@
23
23
  #define BVH_LEAF_SIZE (4)
24
24
  #define SAH_NUM_BUCKETS (16)
25
25
  #define USE_LOAD4
26
+ #define BVH_QUERY_STACK_SIZE (32)
26
27
 
27
28
  #define BVH_CONSTRUCTOR_SAH (0)
28
29
  #define BVH_CONSTRUCTOR_MEDIAN (1)
@@ -300,7 +301,7 @@ struct bvh_query_t
300
301
  BVH bvh;
301
302
 
302
303
  // BVH traversal stack:
303
- int stack[32];
304
+ int stack[BVH_QUERY_STACK_SIZE];
304
305
  int count;
305
306
 
306
307
  // >= 0 if currently in a packed leaf node
@@ -58,10 +58,30 @@
58
58
  #if defined(_WIN64)
59
59
  extern "C" void __chkstk();
60
60
  #elif defined(__APPLE__)
61
- extern "C" void __bzero(void*, size_t);
61
+
62
+ #if defined(__MACH__) && defined(__aarch64__)
63
+ extern "C" void _bzero(void *s, size_t n) {
64
+ memset(s, 0, n);
65
+ }
66
+ extern "C" void __bzero(void *s, size_t n) {
67
+ memset(s, 0, n);
68
+ }
69
+
70
+ extern "C" void _memset_pattern16(void *s, const void *pattern, size_t n);
71
+ extern "C" void __memset_pattern16(void *s, const void *pattern, size_t n);
72
+
73
+ #else
74
+ // // Intel Mac's define bzero in libSystem.dylib
75
+ extern "C" void __bzero(void *s, size_t n);
76
+
77
+ extern "C" void _memset_pattern16(void *s, const void *pattern, size_t n);
78
+ extern "C" void __memset_pattern16(void *s, const void *pattern, size_t n);
79
+
80
+ #endif
81
+
62
82
  extern "C" __double2 __sincos_stret(double);
63
83
  extern "C" __float2 __sincosf_stret(float);
64
- #endif
84
+ #endif // defined(__APPLE__)
65
85
 
66
86
  extern "C" {
67
87
 
@@ -434,7 +454,14 @@ WP_API int load_obj(const char* object_file, const char* module_name)
434
454
  // triggering the stack overflow guards.
435
455
  SYMBOL(__chkstk),
436
456
  #elif defined(__APPLE__)
437
- SYMBOL(__bzero),
457
+ #if defined(__MACH__) && defined(__aarch64__)
458
+ SYMBOL(bzero),
459
+ SYMBOL(_bzero),
460
+ #else
461
+ // Intel Mac
462
+ SYMBOL(__bzero),
463
+ #endif
464
+ SYMBOL(memset_pattern16),
438
465
  SYMBOL(__sincos_stret), SYMBOL(__sincosf_stret),
439
466
  #else
440
467
  SYMBOL(sincosf), SYMBOL_T(sincos, void(*)(double,double*,double*)),
warp/native/cuda_util.cpp CHANGED
@@ -86,6 +86,7 @@ static PFN_cuCtxEnablePeerAccess_v4000 pfn_cuCtxEnablePeerAccess;
86
86
  static PFN_cuCtxDisablePeerAccess_v4000 pfn_cuCtxDisablePeerAccess;
87
87
  static PFN_cuStreamCreate_v2000 pfn_cuStreamCreate;
88
88
  static PFN_cuStreamDestroy_v4000 pfn_cuStreamDestroy;
89
+ static PFN_cuStreamQuery_v2000 pfn_cuStreamQuery;
89
90
  static PFN_cuStreamSynchronize_v2000 pfn_cuStreamSynchronize;
90
91
  static PFN_cuStreamWaitEvent_v3020 pfn_cuStreamWaitEvent;
91
92
  static PFN_cuStreamGetCtx_v9020 pfn_cuStreamGetCtx;
@@ -95,6 +96,7 @@ static PFN_cuStreamCreateWithPriority_v5050 pfn_cuStreamCreateWithPriority;
95
96
  static PFN_cuStreamGetPriority_v5050 pfn_cuStreamGetPriority;
96
97
  static PFN_cuEventCreate_v2000 pfn_cuEventCreate;
97
98
  static PFN_cuEventDestroy_v4000 pfn_cuEventDestroy;
99
+ static PFN_cuEventQuery_v2000 pfn_cuEventQuery;
98
100
  static PFN_cuEventRecord_v2000 pfn_cuEventRecord;
99
101
  static PFN_cuEventRecordWithFlags_v11010 pfn_cuEventRecordWithFlags;
100
102
  static PFN_cuEventSynchronize_v2000 pfn_cuEventSynchronize;
@@ -227,6 +229,7 @@ bool init_cuda_driver()
227
229
  get_driver_entry_point("cuCtxDisablePeerAccess", 4000, &(void*&)pfn_cuCtxDisablePeerAccess);
228
230
  get_driver_entry_point("cuStreamCreate", 2000, &(void*&)pfn_cuStreamCreate);
229
231
  get_driver_entry_point("cuStreamDestroy", 4000, &(void*&)pfn_cuStreamDestroy);
232
+ get_driver_entry_point("cuStreamQuery", 2000, &(void*&)pfn_cuStreamQuery);
230
233
  get_driver_entry_point("cuStreamSynchronize", 2000, &(void*&)pfn_cuStreamSynchronize);
231
234
  get_driver_entry_point("cuStreamWaitEvent", 3020, &(void*&)pfn_cuStreamWaitEvent);
232
235
  get_driver_entry_point("cuStreamGetCtx", 9020, &(void*&)pfn_cuStreamGetCtx);
@@ -236,6 +239,7 @@ bool init_cuda_driver()
236
239
  get_driver_entry_point("cuStreamGetPriority", 5050, &(void*&)pfn_cuStreamGetPriority);
237
240
  get_driver_entry_point("cuEventCreate", 2000, &(void*&)pfn_cuEventCreate);
238
241
  get_driver_entry_point("cuEventDestroy", 4000, &(void*&)pfn_cuEventDestroy);
242
+ get_driver_entry_point("cuEventQuery", 2000, &(void*&)pfn_cuEventQuery);
239
243
  get_driver_entry_point("cuEventRecord", 2000, &(void*&)pfn_cuEventRecord);
240
244
  get_driver_entry_point("cuEventRecordWithFlags", 11010, &(void*&)pfn_cuEventRecordWithFlags);
241
245
  get_driver_entry_point("cuEventSynchronize", 2000, &(void*&)pfn_cuEventSynchronize);
@@ -479,6 +483,11 @@ CUresult cuStreamDestroy_f(CUstream stream)
479
483
  return pfn_cuStreamDestroy ? pfn_cuStreamDestroy(stream) : DRIVER_ENTRY_POINT_ERROR;
480
484
  }
481
485
 
486
+ CUresult cuStreamQuery_f(CUstream stream)
487
+ {
488
+ return pfn_cuStreamQuery ? pfn_cuStreamQuery(stream) : DRIVER_ENTRY_POINT_ERROR;
489
+ }
490
+
482
491
  CUresult cuStreamSynchronize_f(CUstream stream)
483
492
  {
484
493
  return pfn_cuStreamSynchronize ? pfn_cuStreamSynchronize(stream) : DRIVER_ENTRY_POINT_ERROR;
@@ -524,6 +533,11 @@ CUresult cuEventDestroy_f(CUevent event)
524
533
  return pfn_cuEventDestroy ? pfn_cuEventDestroy(event) : DRIVER_ENTRY_POINT_ERROR;
525
534
  }
526
535
 
536
+ CUresult cuEventQuery_f(CUevent event)
537
+ {
538
+ return pfn_cuEventQuery ? pfn_cuEventQuery(event) : DRIVER_ENTRY_POINT_ERROR;
539
+ }
540
+
527
541
  CUresult cuEventRecord_f(CUevent event, CUstream stream)
528
542
  {
529
543
  return pfn_cuEventRecord ? pfn_cuEventRecord(event, stream) : DRIVER_ENTRY_POINT_ERROR;
warp/native/cuda_util.h CHANGED
@@ -85,6 +85,7 @@ CUresult cuCtxEnablePeerAccess_f(CUcontext peer_ctx, unsigned int flags);
85
85
  CUresult cuCtxDisablePeerAccess_f(CUcontext peer_ctx);
86
86
  CUresult cuStreamCreate_f(CUstream* stream, unsigned int flags);
87
87
  CUresult cuStreamDestroy_f(CUstream stream);
88
+ CUresult cuStreamQuery_f(CUstream stream);
88
89
  CUresult cuStreamSynchronize_f(CUstream stream);
89
90
  CUresult cuStreamWaitEvent_f(CUstream stream, CUevent event, unsigned int flags);
90
91
  CUresult cuStreamGetCtx_f(CUstream stream, CUcontext* pctx);
@@ -94,6 +95,7 @@ CUresult cuStreamCreateWithPriority_f(CUstream* phStream, unsigned int flags, in
94
95
  CUresult cuStreamGetPriority_f(CUstream hStream, int* priority);
95
96
  CUresult cuEventCreate_f(CUevent* event, unsigned int flags);
96
97
  CUresult cuEventDestroy_f(CUevent event);
98
+ CUresult cuEventQuery_f(CUevent event);
97
99
  CUresult cuEventRecord_f(CUevent event, CUstream stream);
98
100
  CUresult cuEventRecordWithFlags_f(CUevent event, CUstream stream, unsigned int flags);
99
101
  CUresult cuEventSynchronize_f(CUevent event);
warp/native/exports.h CHANGED
@@ -960,6 +960,9 @@ WP_API void builtin_quat_from_axis_angle_vec3d_float64(vec3d& axis, float64 angl
960
960
  WP_API void builtin_quat_from_matrix_mat33h(mat33h& mat, quath* ret) { *ret = wp::quat_from_matrix(mat); }
961
961
  WP_API void builtin_quat_from_matrix_mat33f(mat33f& mat, quatf* ret) { *ret = wp::quat_from_matrix(mat); }
962
962
  WP_API void builtin_quat_from_matrix_mat33d(mat33d& mat, quatd* ret) { *ret = wp::quat_from_matrix(mat); }
963
+ WP_API void builtin_quat_from_matrix_mat44h(mat44h& mat, quath* ret) { *ret = wp::quat_from_matrix(mat); }
964
+ WP_API void builtin_quat_from_matrix_mat44f(mat44f& mat, quatf* ret) { *ret = wp::quat_from_matrix(mat); }
965
+ WP_API void builtin_quat_from_matrix_mat44d(mat44d& mat, quatd* ret) { *ret = wp::quat_from_matrix(mat); }
963
966
  WP_API void builtin_quat_rpy_float16_float16_float16(float16 roll, float16 pitch, float16 yaw, quath* ret) { *ret = wp::quat_rpy(roll, pitch, yaw); }
964
967
  WP_API void builtin_quat_rpy_float32_float32_float32(float32 roll, float32 pitch, float32 yaw, quatf* ret) { *ret = wp::quat_rpy(roll, pitch, yaw); }
965
968
  WP_API void builtin_quat_rpy_float64_float64_float64(float64 roll, float64 pitch, float64 yaw, quatd* ret) { *ret = wp::quat_rpy(roll, pitch, yaw); }
@@ -1018,69 +1021,6 @@ WP_API void builtin_spatial_top_spatial_vectord(spatial_vectord& svec, vec3d* re
1018
1021
  WP_API void builtin_spatial_bottom_spatial_vectorh(spatial_vectorh& svec, vec3h* ret) { *ret = wp::spatial_bottom(svec); }
1019
1022
  WP_API void builtin_spatial_bottom_spatial_vectorf(spatial_vectorf& svec, vec3f* ret) { *ret = wp::spatial_bottom(svec); }
1020
1023
  WP_API void builtin_spatial_bottom_spatial_vectord(spatial_vectord& svec, vec3d* ret) { *ret = wp::spatial_bottom(svec); }
1021
- WP_API void builtin_assign_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign(a, i, value); }
1022
- WP_API void builtin_assign_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign(a, i, value); }
1023
- WP_API void builtin_assign_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign(a, i, value); }
1024
- WP_API void builtin_assign_spatial_vectorh_int32_float16(spatial_vectorh& a, int32 i, float16 value, spatial_vectorh* ret) { *ret = wp::assign(a, i, value); }
1025
- WP_API void builtin_assign_vec2f_int32_float32(vec2f& a, int32 i, float32 value, vec2f* ret) { *ret = wp::assign(a, i, value); }
1026
- WP_API void builtin_assign_vec3f_int32_float32(vec3f& a, int32 i, float32 value, vec3f* ret) { *ret = wp::assign(a, i, value); }
1027
- WP_API void builtin_assign_vec4f_int32_float32(vec4f& a, int32 i, float32 value, vec4f* ret) { *ret = wp::assign(a, i, value); }
1028
- WP_API void builtin_assign_spatial_vectorf_int32_float32(spatial_vectorf& a, int32 i, float32 value, spatial_vectorf* ret) { *ret = wp::assign(a, i, value); }
1029
- WP_API void builtin_assign_vec2d_int32_float64(vec2d& a, int32 i, float64 value, vec2d* ret) { *ret = wp::assign(a, i, value); }
1030
- WP_API void builtin_assign_vec3d_int32_float64(vec3d& a, int32 i, float64 value, vec3d* ret) { *ret = wp::assign(a, i, value); }
1031
- WP_API void builtin_assign_vec4d_int32_float64(vec4d& a, int32 i, float64 value, vec4d* ret) { *ret = wp::assign(a, i, value); }
1032
- WP_API void builtin_assign_spatial_vectord_int32_float64(spatial_vectord& a, int32 i, float64 value, spatial_vectord* ret) { *ret = wp::assign(a, i, value); }
1033
- WP_API void builtin_assign_vec2s_int32_int16(vec2s& a, int32 i, int16 value, vec2s* ret) { *ret = wp::assign(a, i, value); }
1034
- WP_API void builtin_assign_vec3s_int32_int16(vec3s& a, int32 i, int16 value, vec3s* ret) { *ret = wp::assign(a, i, value); }
1035
- WP_API void builtin_assign_vec4s_int32_int16(vec4s& a, int32 i, int16 value, vec4s* ret) { *ret = wp::assign(a, i, value); }
1036
- WP_API void builtin_assign_vec2i_int32_int32(vec2i& a, int32 i, int32 value, vec2i* ret) { *ret = wp::assign(a, i, value); }
1037
- WP_API void builtin_assign_vec3i_int32_int32(vec3i& a, int32 i, int32 value, vec3i* ret) { *ret = wp::assign(a, i, value); }
1038
- WP_API void builtin_assign_vec4i_int32_int32(vec4i& a, int32 i, int32 value, vec4i* ret) { *ret = wp::assign(a, i, value); }
1039
- WP_API void builtin_assign_vec2l_int32_int64(vec2l& a, int32 i, int64 value, vec2l* ret) { *ret = wp::assign(a, i, value); }
1040
- WP_API void builtin_assign_vec3l_int32_int64(vec3l& a, int32 i, int64 value, vec3l* ret) { *ret = wp::assign(a, i, value); }
1041
- WP_API void builtin_assign_vec4l_int32_int64(vec4l& a, int32 i, int64 value, vec4l* ret) { *ret = wp::assign(a, i, value); }
1042
- WP_API void builtin_assign_vec2b_int32_int8(vec2b& a, int32 i, int8 value, vec2b* ret) { *ret = wp::assign(a, i, value); }
1043
- WP_API void builtin_assign_vec3b_int32_int8(vec3b& a, int32 i, int8 value, vec3b* ret) { *ret = wp::assign(a, i, value); }
1044
- WP_API void builtin_assign_vec4b_int32_int8(vec4b& a, int32 i, int8 value, vec4b* ret) { *ret = wp::assign(a, i, value); }
1045
- WP_API void builtin_assign_vec2us_int32_uint16(vec2us& a, int32 i, uint16 value, vec2us* ret) { *ret = wp::assign(a, i, value); }
1046
- WP_API void builtin_assign_vec3us_int32_uint16(vec3us& a, int32 i, uint16 value, vec3us* ret) { *ret = wp::assign(a, i, value); }
1047
- WP_API void builtin_assign_vec4us_int32_uint16(vec4us& a, int32 i, uint16 value, vec4us* ret) { *ret = wp::assign(a, i, value); }
1048
- WP_API void builtin_assign_vec2ui_int32_uint32(vec2ui& a, int32 i, uint32 value, vec2ui* ret) { *ret = wp::assign(a, i, value); }
1049
- WP_API void builtin_assign_vec3ui_int32_uint32(vec3ui& a, int32 i, uint32 value, vec3ui* ret) { *ret = wp::assign(a, i, value); }
1050
- WP_API void builtin_assign_vec4ui_int32_uint32(vec4ui& a, int32 i, uint32 value, vec4ui* ret) { *ret = wp::assign(a, i, value); }
1051
- WP_API void builtin_assign_vec2ul_int32_uint64(vec2ul& a, int32 i, uint64 value, vec2ul* ret) { *ret = wp::assign(a, i, value); }
1052
- WP_API void builtin_assign_vec3ul_int32_uint64(vec3ul& a, int32 i, uint64 value, vec3ul* ret) { *ret = wp::assign(a, i, value); }
1053
- WP_API void builtin_assign_vec4ul_int32_uint64(vec4ul& a, int32 i, uint64 value, vec4ul* ret) { *ret = wp::assign(a, i, value); }
1054
- WP_API void builtin_assign_vec2ub_int32_uint8(vec2ub& a, int32 i, uint8 value, vec2ub* ret) { *ret = wp::assign(a, i, value); }
1055
- WP_API void builtin_assign_vec3ub_int32_uint8(vec3ub& a, int32 i, uint8 value, vec3ub* ret) { *ret = wp::assign(a, i, value); }
1056
- WP_API void builtin_assign_vec4ub_int32_uint8(vec4ub& a, int32 i, uint8 value, vec4ub* ret) { *ret = wp::assign(a, i, value); }
1057
- WP_API void builtin_assign_quath_int32_float16(quath& a, int32 i, float16 value, quath* ret) { *ret = wp::assign(a, i, value); }
1058
- WP_API void builtin_assign_quatf_int32_float32(quatf& a, int32 i, float32 value, quatf* ret) { *ret = wp::assign(a, i, value); }
1059
- WP_API void builtin_assign_quatd_int32_float64(quatd& a, int32 i, float64 value, quatd* ret) { *ret = wp::assign(a, i, value); }
1060
- WP_API void builtin_assign_mat22h_int32_int32_float16(mat22h& a, int32 i, int32 j, float16 value, mat22h* ret) { *ret = wp::assign(a, i, j, value); }
1061
- WP_API void builtin_assign_mat33h_int32_int32_float16(mat33h& a, int32 i, int32 j, float16 value, mat33h* ret) { *ret = wp::assign(a, i, j, value); }
1062
- WP_API void builtin_assign_mat44h_int32_int32_float16(mat44h& a, int32 i, int32 j, float16 value, mat44h* ret) { *ret = wp::assign(a, i, j, value); }
1063
- WP_API void builtin_assign_spatial_matrixh_int32_int32_float16(spatial_matrixh& a, int32 i, int32 j, float16 value, spatial_matrixh* ret) { *ret = wp::assign(a, i, j, value); }
1064
- WP_API void builtin_assign_mat22f_int32_int32_float32(mat22f& a, int32 i, int32 j, float32 value, mat22f* ret) { *ret = wp::assign(a, i, j, value); }
1065
- WP_API void builtin_assign_mat33f_int32_int32_float32(mat33f& a, int32 i, int32 j, float32 value, mat33f* ret) { *ret = wp::assign(a, i, j, value); }
1066
- WP_API void builtin_assign_mat44f_int32_int32_float32(mat44f& a, int32 i, int32 j, float32 value, mat44f* ret) { *ret = wp::assign(a, i, j, value); }
1067
- WP_API void builtin_assign_spatial_matrixf_int32_int32_float32(spatial_matrixf& a, int32 i, int32 j, float32 value, spatial_matrixf* ret) { *ret = wp::assign(a, i, j, value); }
1068
- WP_API void builtin_assign_mat22d_int32_int32_float64(mat22d& a, int32 i, int32 j, float64 value, mat22d* ret) { *ret = wp::assign(a, i, j, value); }
1069
- WP_API void builtin_assign_mat33d_int32_int32_float64(mat33d& a, int32 i, int32 j, float64 value, mat33d* ret) { *ret = wp::assign(a, i, j, value); }
1070
- WP_API void builtin_assign_mat44d_int32_int32_float64(mat44d& a, int32 i, int32 j, float64 value, mat44d* ret) { *ret = wp::assign(a, i, j, value); }
1071
- WP_API void builtin_assign_spatial_matrixd_int32_int32_float64(spatial_matrixd& a, int32 i, int32 j, float64 value, spatial_matrixd* ret) { *ret = wp::assign(a, i, j, value); }
1072
- WP_API void builtin_assign_mat22h_int32_vec2h(mat22h& a, int32 i, vec2h& value, mat22h* ret) { *ret = wp::assign(a, i, value); }
1073
- WP_API void builtin_assign_mat33h_int32_vec3h(mat33h& a, int32 i, vec3h& value, mat33h* ret) { *ret = wp::assign(a, i, value); }
1074
- WP_API void builtin_assign_mat44h_int32_vec4h(mat44h& a, int32 i, vec4h& value, mat44h* ret) { *ret = wp::assign(a, i, value); }
1075
- WP_API void builtin_assign_spatial_matrixh_int32_spatial_vectorh(spatial_matrixh& a, int32 i, spatial_vectorh& value, spatial_matrixh* ret) { *ret = wp::assign(a, i, value); }
1076
- WP_API void builtin_assign_mat22f_int32_vec2f(mat22f& a, int32 i, vec2f& value, mat22f* ret) { *ret = wp::assign(a, i, value); }
1077
- WP_API void builtin_assign_mat33f_int32_vec3f(mat33f& a, int32 i, vec3f& value, mat33f* ret) { *ret = wp::assign(a, i, value); }
1078
- WP_API void builtin_assign_mat44f_int32_vec4f(mat44f& a, int32 i, vec4f& value, mat44f* ret) { *ret = wp::assign(a, i, value); }
1079
- WP_API void builtin_assign_spatial_matrixf_int32_spatial_vectorf(spatial_matrixf& a, int32 i, spatial_vectorf& value, spatial_matrixf* ret) { *ret = wp::assign(a, i, value); }
1080
- WP_API void builtin_assign_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value, mat22d* ret) { *ret = wp::assign(a, i, value); }
1081
- WP_API void builtin_assign_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign(a, i, value); }
1082
- WP_API void builtin_assign_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign(a, i, value); }
1083
- WP_API void builtin_assign_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign(a, i, value); }
1084
1024
  WP_API void builtin_volume_sample_f_uint64_vec3f_int32(uint64 id, vec3f& uvw, int32 sampling_mode, float* ret) { *ret = wp::volume_sample_f(id, uvw, sampling_mode); }
1085
1025
  WP_API void builtin_volume_sample_grad_f_uint64_vec3f_int32_vec3f(uint64 id, vec3f& uvw, int32 sampling_mode, vec3f& grad, float* ret) { *ret = wp::volume_sample_grad_f(id, uvw, sampling_mode, grad); }
1086
1026
  WP_API void builtin_volume_lookup_f_uint64_int32_int32_int32(uint64 id, int32 i, int32 j, int32 k, float* ret) { *ret = wp::volume_lookup_f(id, i, j, k); }
@@ -1097,6 +1037,8 @@ WP_API void builtin_rand_init_int32(int32 seed, uint32* ret) { *ret = wp::rand_i
1097
1037
  WP_API void builtin_rand_init_int32_int32(int32 seed, int32 offset, uint32* ret) { *ret = wp::rand_init(seed, offset); }
1098
1038
  WP_API void builtin_randi_uint32(uint32 state, int* ret) { *ret = wp::randi(state); }
1099
1039
  WP_API void builtin_randi_uint32_int32_int32(uint32 state, int32 low, int32 high, int* ret) { *ret = wp::randi(state, low, high); }
1040
+ WP_API void builtin_randu_uint32(uint32 state, uint32* ret) { *ret = wp::randu(state); }
1041
+ WP_API void builtin_randu_uint32_uint32_uint32(uint32 state, uint32 low, uint32 high, uint32* ret) { *ret = wp::randu(state, low, high); }
1100
1042
  WP_API void builtin_randf_uint32(uint32 state, float* ret) { *ret = wp::randf(state); }
1101
1043
  WP_API void builtin_randf_uint32_float32_float32(uint32 state, float32 low, float32 high, float* ret) { *ret = wp::randf(state, low, high); }
1102
1044
  WP_API void builtin_randn_uint32(uint32 state, float* ret) { *ret = wp::randn(state); }
@@ -1188,6 +1130,69 @@ WP_API void builtin_extract_transformh_int32(transformh& a, int32 i, float16* re
1188
1130
  WP_API void builtin_extract_transformf_int32(transformf& a, int32 i, float32* ret) { *ret = wp::extract(a, i); }
1189
1131
  WP_API void builtin_extract_transformd_int32(transformd& a, int32 i, float64* ret) { *ret = wp::extract(a, i); }
1190
1132
  WP_API void builtin_extract_shape_t_int32(shape_t s, int32 i, int* ret) { *ret = wp::extract(s, i); }
1133
+ WP_API void builtin_assign_copy_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign_copy(a, i, value); }
1134
+ WP_API void builtin_assign_copy_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign_copy(a, i, value); }
1135
+ WP_API void builtin_assign_copy_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign_copy(a, i, value); }
1136
+ WP_API void builtin_assign_copy_spatial_vectorh_int32_float16(spatial_vectorh& a, int32 i, float16 value, spatial_vectorh* ret) { *ret = wp::assign_copy(a, i, value); }
1137
+ WP_API void builtin_assign_copy_vec2f_int32_float32(vec2f& a, int32 i, float32 value, vec2f* ret) { *ret = wp::assign_copy(a, i, value); }
1138
+ WP_API void builtin_assign_copy_vec3f_int32_float32(vec3f& a, int32 i, float32 value, vec3f* ret) { *ret = wp::assign_copy(a, i, value); }
1139
+ WP_API void builtin_assign_copy_vec4f_int32_float32(vec4f& a, int32 i, float32 value, vec4f* ret) { *ret = wp::assign_copy(a, i, value); }
1140
+ WP_API void builtin_assign_copy_spatial_vectorf_int32_float32(spatial_vectorf& a, int32 i, float32 value, spatial_vectorf* ret) { *ret = wp::assign_copy(a, i, value); }
1141
+ WP_API void builtin_assign_copy_vec2d_int32_float64(vec2d& a, int32 i, float64 value, vec2d* ret) { *ret = wp::assign_copy(a, i, value); }
1142
+ WP_API void builtin_assign_copy_vec3d_int32_float64(vec3d& a, int32 i, float64 value, vec3d* ret) { *ret = wp::assign_copy(a, i, value); }
1143
+ WP_API void builtin_assign_copy_vec4d_int32_float64(vec4d& a, int32 i, float64 value, vec4d* ret) { *ret = wp::assign_copy(a, i, value); }
1144
+ WP_API void builtin_assign_copy_spatial_vectord_int32_float64(spatial_vectord& a, int32 i, float64 value, spatial_vectord* ret) { *ret = wp::assign_copy(a, i, value); }
1145
+ WP_API void builtin_assign_copy_vec2s_int32_int16(vec2s& a, int32 i, int16 value, vec2s* ret) { *ret = wp::assign_copy(a, i, value); }
1146
+ WP_API void builtin_assign_copy_vec3s_int32_int16(vec3s& a, int32 i, int16 value, vec3s* ret) { *ret = wp::assign_copy(a, i, value); }
1147
+ WP_API void builtin_assign_copy_vec4s_int32_int16(vec4s& a, int32 i, int16 value, vec4s* ret) { *ret = wp::assign_copy(a, i, value); }
1148
+ WP_API void builtin_assign_copy_vec2i_int32_int32(vec2i& a, int32 i, int32 value, vec2i* ret) { *ret = wp::assign_copy(a, i, value); }
1149
+ WP_API void builtin_assign_copy_vec3i_int32_int32(vec3i& a, int32 i, int32 value, vec3i* ret) { *ret = wp::assign_copy(a, i, value); }
1150
+ WP_API void builtin_assign_copy_vec4i_int32_int32(vec4i& a, int32 i, int32 value, vec4i* ret) { *ret = wp::assign_copy(a, i, value); }
1151
+ WP_API void builtin_assign_copy_vec2l_int32_int64(vec2l& a, int32 i, int64 value, vec2l* ret) { *ret = wp::assign_copy(a, i, value); }
1152
+ WP_API void builtin_assign_copy_vec3l_int32_int64(vec3l& a, int32 i, int64 value, vec3l* ret) { *ret = wp::assign_copy(a, i, value); }
1153
+ WP_API void builtin_assign_copy_vec4l_int32_int64(vec4l& a, int32 i, int64 value, vec4l* ret) { *ret = wp::assign_copy(a, i, value); }
1154
+ WP_API void builtin_assign_copy_vec2b_int32_int8(vec2b& a, int32 i, int8 value, vec2b* ret) { *ret = wp::assign_copy(a, i, value); }
1155
+ WP_API void builtin_assign_copy_vec3b_int32_int8(vec3b& a, int32 i, int8 value, vec3b* ret) { *ret = wp::assign_copy(a, i, value); }
1156
+ WP_API void builtin_assign_copy_vec4b_int32_int8(vec4b& a, int32 i, int8 value, vec4b* ret) { *ret = wp::assign_copy(a, i, value); }
1157
+ WP_API void builtin_assign_copy_vec2us_int32_uint16(vec2us& a, int32 i, uint16 value, vec2us* ret) { *ret = wp::assign_copy(a, i, value); }
1158
+ WP_API void builtin_assign_copy_vec3us_int32_uint16(vec3us& a, int32 i, uint16 value, vec3us* ret) { *ret = wp::assign_copy(a, i, value); }
1159
+ WP_API void builtin_assign_copy_vec4us_int32_uint16(vec4us& a, int32 i, uint16 value, vec4us* ret) { *ret = wp::assign_copy(a, i, value); }
1160
+ WP_API void builtin_assign_copy_vec2ui_int32_uint32(vec2ui& a, int32 i, uint32 value, vec2ui* ret) { *ret = wp::assign_copy(a, i, value); }
1161
+ WP_API void builtin_assign_copy_vec3ui_int32_uint32(vec3ui& a, int32 i, uint32 value, vec3ui* ret) { *ret = wp::assign_copy(a, i, value); }
1162
+ WP_API void builtin_assign_copy_vec4ui_int32_uint32(vec4ui& a, int32 i, uint32 value, vec4ui* ret) { *ret = wp::assign_copy(a, i, value); }
1163
+ WP_API void builtin_assign_copy_vec2ul_int32_uint64(vec2ul& a, int32 i, uint64 value, vec2ul* ret) { *ret = wp::assign_copy(a, i, value); }
1164
+ WP_API void builtin_assign_copy_vec3ul_int32_uint64(vec3ul& a, int32 i, uint64 value, vec3ul* ret) { *ret = wp::assign_copy(a, i, value); }
1165
+ WP_API void builtin_assign_copy_vec4ul_int32_uint64(vec4ul& a, int32 i, uint64 value, vec4ul* ret) { *ret = wp::assign_copy(a, i, value); }
1166
+ WP_API void builtin_assign_copy_vec2ub_int32_uint8(vec2ub& a, int32 i, uint8 value, vec2ub* ret) { *ret = wp::assign_copy(a, i, value); }
1167
+ WP_API void builtin_assign_copy_vec3ub_int32_uint8(vec3ub& a, int32 i, uint8 value, vec3ub* ret) { *ret = wp::assign_copy(a, i, value); }
1168
+ WP_API void builtin_assign_copy_vec4ub_int32_uint8(vec4ub& a, int32 i, uint8 value, vec4ub* ret) { *ret = wp::assign_copy(a, i, value); }
1169
+ WP_API void builtin_assign_copy_quath_int32_float16(quath& a, int32 i, float16 value, quath* ret) { *ret = wp::assign_copy(a, i, value); }
1170
+ WP_API void builtin_assign_copy_quatf_int32_float32(quatf& a, int32 i, float32 value, quatf* ret) { *ret = wp::assign_copy(a, i, value); }
1171
+ WP_API void builtin_assign_copy_quatd_int32_float64(quatd& a, int32 i, float64 value, quatd* ret) { *ret = wp::assign_copy(a, i, value); }
1172
+ WP_API void builtin_assign_copy_mat22h_int32_int32_float16(mat22h& a, int32 i, int32 j, float16 value, mat22h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1173
+ WP_API void builtin_assign_copy_mat33h_int32_int32_float16(mat33h& a, int32 i, int32 j, float16 value, mat33h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1174
+ WP_API void builtin_assign_copy_mat44h_int32_int32_float16(mat44h& a, int32 i, int32 j, float16 value, mat44h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1175
+ WP_API void builtin_assign_copy_spatial_matrixh_int32_int32_float16(spatial_matrixh& a, int32 i, int32 j, float16 value, spatial_matrixh* ret) { *ret = wp::assign_copy(a, i, j, value); }
1176
+ WP_API void builtin_assign_copy_mat22f_int32_int32_float32(mat22f& a, int32 i, int32 j, float32 value, mat22f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1177
+ WP_API void builtin_assign_copy_mat33f_int32_int32_float32(mat33f& a, int32 i, int32 j, float32 value, mat33f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1178
+ WP_API void builtin_assign_copy_mat44f_int32_int32_float32(mat44f& a, int32 i, int32 j, float32 value, mat44f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1179
+ WP_API void builtin_assign_copy_spatial_matrixf_int32_int32_float32(spatial_matrixf& a, int32 i, int32 j, float32 value, spatial_matrixf* ret) { *ret = wp::assign_copy(a, i, j, value); }
1180
+ WP_API void builtin_assign_copy_mat22d_int32_int32_float64(mat22d& a, int32 i, int32 j, float64 value, mat22d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1181
+ WP_API void builtin_assign_copy_mat33d_int32_int32_float64(mat33d& a, int32 i, int32 j, float64 value, mat33d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1182
+ WP_API void builtin_assign_copy_mat44d_int32_int32_float64(mat44d& a, int32 i, int32 j, float64 value, mat44d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1183
+ WP_API void builtin_assign_copy_spatial_matrixd_int32_int32_float64(spatial_matrixd& a, int32 i, int32 j, float64 value, spatial_matrixd* ret) { *ret = wp::assign_copy(a, i, j, value); }
1184
+ WP_API void builtin_assign_copy_mat22h_int32_vec2h(mat22h& a, int32 i, vec2h& value, mat22h* ret) { *ret = wp::assign_copy(a, i, value); }
1185
+ WP_API void builtin_assign_copy_mat33h_int32_vec3h(mat33h& a, int32 i, vec3h& value, mat33h* ret) { *ret = wp::assign_copy(a, i, value); }
1186
+ WP_API void builtin_assign_copy_mat44h_int32_vec4h(mat44h& a, int32 i, vec4h& value, mat44h* ret) { *ret = wp::assign_copy(a, i, value); }
1187
+ WP_API void builtin_assign_copy_spatial_matrixh_int32_spatial_vectorh(spatial_matrixh& a, int32 i, spatial_vectorh& value, spatial_matrixh* ret) { *ret = wp::assign_copy(a, i, value); }
1188
+ WP_API void builtin_assign_copy_mat22f_int32_vec2f(mat22f& a, int32 i, vec2f& value, mat22f* ret) { *ret = wp::assign_copy(a, i, value); }
1189
+ WP_API void builtin_assign_copy_mat33f_int32_vec3f(mat33f& a, int32 i, vec3f& value, mat33f* ret) { *ret = wp::assign_copy(a, i, value); }
1190
+ WP_API void builtin_assign_copy_mat44f_int32_vec4f(mat44f& a, int32 i, vec4f& value, mat44f* ret) { *ret = wp::assign_copy(a, i, value); }
1191
+ WP_API void builtin_assign_copy_spatial_matrixf_int32_spatial_vectorf(spatial_matrixf& a, int32 i, spatial_vectorf& value, spatial_matrixf* ret) { *ret = wp::assign_copy(a, i, value); }
1192
+ WP_API void builtin_assign_copy_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value, mat22d* ret) { *ret = wp::assign_copy(a, i, value); }
1193
+ WP_API void builtin_assign_copy_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign_copy(a, i, value); }
1194
+ WP_API void builtin_assign_copy_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign_copy(a, i, value); }
1195
+ WP_API void builtin_assign_copy_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign_copy(a, i, value); }
1191
1196
  WP_API void builtin_lerp_float16_float16_float16(float16 a, float16 b, float16 t, float16* ret) { *ret = wp::lerp(a, b, t); }
1192
1197
  WP_API void builtin_lerp_float32_float32_float32(float32 a, float32 b, float32 t, float32* ret) { *ret = wp::lerp(a, b, t); }
1193
1198
  WP_API void builtin_lerp_float64_float64_float64(float64 a, float64 b, float64 t, float64* ret) { *ret = wp::lerp(a, b, t); }