warp-lang 1.7.2rc1__py3-none-macosx_10_13_universal2.whl → 1.8.1__py3-none-macosx_10_13_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (192) hide show
  1. warp/__init__.py +3 -1
  2. warp/__init__.pyi +3489 -1
  3. warp/autograd.py +45 -122
  4. warp/bin/libwarp.dylib +0 -0
  5. warp/build.py +241 -252
  6. warp/build_dll.py +130 -26
  7. warp/builtins.py +1907 -384
  8. warp/codegen.py +272 -104
  9. warp/config.py +12 -1
  10. warp/constants.py +1 -1
  11. warp/context.py +770 -238
  12. warp/dlpack.py +1 -1
  13. warp/examples/benchmarks/benchmark_cloth.py +2 -2
  14. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  15. warp/examples/core/example_sample_mesh.py +1 -1
  16. warp/examples/core/example_spin_lock.py +93 -0
  17. warp/examples/core/example_work_queue.py +118 -0
  18. warp/examples/fem/example_adaptive_grid.py +5 -5
  19. warp/examples/fem/example_apic_fluid.py +1 -1
  20. warp/examples/fem/example_burgers.py +1 -1
  21. warp/examples/fem/example_convection_diffusion.py +9 -6
  22. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  23. warp/examples/fem/example_deformed_geometry.py +1 -1
  24. warp/examples/fem/example_diffusion.py +2 -2
  25. warp/examples/fem/example_diffusion_3d.py +1 -1
  26. warp/examples/fem/example_distortion_energy.py +1 -1
  27. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  28. warp/examples/fem/example_magnetostatics.py +5 -3
  29. warp/examples/fem/example_mixed_elasticity.py +5 -3
  30. warp/examples/fem/example_navier_stokes.py +11 -9
  31. warp/examples/fem/example_nonconforming_contact.py +5 -3
  32. warp/examples/fem/example_streamlines.py +8 -3
  33. warp/examples/fem/utils.py +9 -8
  34. warp/examples/interop/example_jax_callable.py +34 -4
  35. warp/examples/interop/example_jax_ffi_callback.py +2 -2
  36. warp/examples/interop/example_jax_kernel.py +27 -1
  37. warp/examples/optim/example_drone.py +1 -1
  38. warp/examples/sim/example_cloth.py +1 -1
  39. warp/examples/sim/example_cloth_self_contact.py +48 -54
  40. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  41. warp/examples/tile/example_tile_cholesky.py +2 -1
  42. warp/examples/tile/example_tile_convolution.py +1 -1
  43. warp/examples/tile/example_tile_filtering.py +1 -1
  44. warp/examples/tile/example_tile_matmul.py +1 -1
  45. warp/examples/tile/example_tile_mlp.py +2 -0
  46. warp/fabric.py +7 -7
  47. warp/fem/__init__.py +5 -0
  48. warp/fem/adaptivity.py +1 -1
  49. warp/fem/cache.py +152 -63
  50. warp/fem/dirichlet.py +2 -2
  51. warp/fem/domain.py +136 -6
  52. warp/fem/field/field.py +141 -99
  53. warp/fem/field/nodal_field.py +85 -39
  54. warp/fem/field/virtual.py +99 -52
  55. warp/fem/geometry/adaptive_nanogrid.py +91 -86
  56. warp/fem/geometry/closest_point.py +13 -0
  57. warp/fem/geometry/deformed_geometry.py +102 -40
  58. warp/fem/geometry/element.py +56 -2
  59. warp/fem/geometry/geometry.py +323 -22
  60. warp/fem/geometry/grid_2d.py +157 -62
  61. warp/fem/geometry/grid_3d.py +116 -20
  62. warp/fem/geometry/hexmesh.py +86 -20
  63. warp/fem/geometry/nanogrid.py +166 -86
  64. warp/fem/geometry/partition.py +59 -25
  65. warp/fem/geometry/quadmesh.py +86 -135
  66. warp/fem/geometry/tetmesh.py +47 -119
  67. warp/fem/geometry/trimesh.py +77 -270
  68. warp/fem/integrate.py +181 -95
  69. warp/fem/linalg.py +25 -58
  70. warp/fem/operator.py +124 -27
  71. warp/fem/quadrature/pic_quadrature.py +36 -14
  72. warp/fem/quadrature/quadrature.py +40 -16
  73. warp/fem/space/__init__.py +1 -1
  74. warp/fem/space/basis_function_space.py +66 -46
  75. warp/fem/space/basis_space.py +17 -4
  76. warp/fem/space/dof_mapper.py +1 -1
  77. warp/fem/space/function_space.py +2 -2
  78. warp/fem/space/grid_2d_function_space.py +4 -1
  79. warp/fem/space/hexmesh_function_space.py +4 -2
  80. warp/fem/space/nanogrid_function_space.py +3 -1
  81. warp/fem/space/partition.py +11 -2
  82. warp/fem/space/quadmesh_function_space.py +4 -1
  83. warp/fem/space/restriction.py +5 -2
  84. warp/fem/space/shape/__init__.py +10 -8
  85. warp/fem/space/tetmesh_function_space.py +4 -1
  86. warp/fem/space/topology.py +52 -21
  87. warp/fem/space/trimesh_function_space.py +4 -1
  88. warp/fem/utils.py +53 -8
  89. warp/jax.py +1 -2
  90. warp/jax_experimental/ffi.py +210 -67
  91. warp/jax_experimental/xla_ffi.py +37 -24
  92. warp/math.py +171 -1
  93. warp/native/array.h +103 -4
  94. warp/native/builtin.h +182 -35
  95. warp/native/coloring.cpp +6 -2
  96. warp/native/cuda_util.cpp +1 -1
  97. warp/native/exports.h +118 -63
  98. warp/native/intersect.h +5 -5
  99. warp/native/mat.h +8 -13
  100. warp/native/mathdx.cpp +11 -5
  101. warp/native/matnn.h +1 -123
  102. warp/native/mesh.h +1 -1
  103. warp/native/quat.h +34 -6
  104. warp/native/rand.h +7 -7
  105. warp/native/sparse.cpp +121 -258
  106. warp/native/sparse.cu +181 -274
  107. warp/native/spatial.h +305 -17
  108. warp/native/svd.h +23 -8
  109. warp/native/tile.h +603 -73
  110. warp/native/tile_radix_sort.h +1112 -0
  111. warp/native/tile_reduce.h +239 -13
  112. warp/native/tile_scan.h +240 -0
  113. warp/native/tuple.h +189 -0
  114. warp/native/vec.h +10 -20
  115. warp/native/warp.cpp +36 -4
  116. warp/native/warp.cu +588 -52
  117. warp/native/warp.h +47 -74
  118. warp/optim/linear.py +5 -1
  119. warp/paddle.py +7 -8
  120. warp/py.typed +0 -0
  121. warp/render/render_opengl.py +110 -80
  122. warp/render/render_usd.py +124 -62
  123. warp/sim/__init__.py +9 -0
  124. warp/sim/collide.py +253 -80
  125. warp/sim/graph_coloring.py +8 -1
  126. warp/sim/import_mjcf.py +4 -3
  127. warp/sim/import_usd.py +11 -7
  128. warp/sim/integrator.py +5 -2
  129. warp/sim/integrator_euler.py +1 -1
  130. warp/sim/integrator_featherstone.py +1 -1
  131. warp/sim/integrator_vbd.py +761 -322
  132. warp/sim/integrator_xpbd.py +1 -1
  133. warp/sim/model.py +265 -260
  134. warp/sim/utils.py +10 -7
  135. warp/sparse.py +303 -166
  136. warp/tape.py +54 -51
  137. warp/tests/cuda/test_conditional_captures.py +1046 -0
  138. warp/tests/cuda/test_streams.py +1 -1
  139. warp/tests/geometry/test_volume.py +2 -2
  140. warp/tests/interop/test_dlpack.py +9 -9
  141. warp/tests/interop/test_jax.py +0 -1
  142. warp/tests/run_coverage_serial.py +1 -1
  143. warp/tests/sim/disabled_kinematics.py +2 -2
  144. warp/tests/sim/{test_vbd.py → test_cloth.py} +378 -112
  145. warp/tests/sim/test_collision.py +159 -51
  146. warp/tests/sim/test_coloring.py +91 -2
  147. warp/tests/test_array.py +254 -2
  148. warp/tests/test_array_reduce.py +2 -2
  149. warp/tests/test_assert.py +53 -0
  150. warp/tests/test_atomic_cas.py +312 -0
  151. warp/tests/test_codegen.py +142 -19
  152. warp/tests/test_conditional.py +47 -1
  153. warp/tests/test_ctypes.py +0 -20
  154. warp/tests/test_devices.py +8 -0
  155. warp/tests/test_fabricarray.py +4 -2
  156. warp/tests/test_fem.py +58 -25
  157. warp/tests/test_func.py +42 -1
  158. warp/tests/test_grad.py +1 -1
  159. warp/tests/test_lerp.py +1 -3
  160. warp/tests/test_map.py +481 -0
  161. warp/tests/test_mat.py +23 -24
  162. warp/tests/test_quat.py +28 -15
  163. warp/tests/test_rounding.py +10 -38
  164. warp/tests/test_runlength_encode.py +7 -7
  165. warp/tests/test_smoothstep.py +1 -1
  166. warp/tests/test_sparse.py +83 -2
  167. warp/tests/test_spatial.py +507 -1
  168. warp/tests/test_static.py +48 -0
  169. warp/tests/test_struct.py +2 -2
  170. warp/tests/test_tape.py +38 -0
  171. warp/tests/test_tuple.py +265 -0
  172. warp/tests/test_types.py +2 -2
  173. warp/tests/test_utils.py +24 -18
  174. warp/tests/test_vec.py +38 -408
  175. warp/tests/test_vec_constructors.py +325 -0
  176. warp/tests/tile/test_tile.py +438 -131
  177. warp/tests/tile/test_tile_mathdx.py +518 -14
  178. warp/tests/tile/test_tile_matmul.py +179 -0
  179. warp/tests/tile/test_tile_reduce.py +307 -5
  180. warp/tests/tile/test_tile_shared_memory.py +136 -7
  181. warp/tests/tile/test_tile_sort.py +121 -0
  182. warp/tests/unittest_suites.py +14 -6
  183. warp/types.py +462 -308
  184. warp/utils.py +647 -86
  185. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/METADATA +20 -6
  186. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/RECORD +189 -175
  187. warp/stubs.py +0 -3381
  188. warp/tests/sim/test_xpbd.py +0 -399
  189. warp/tests/test_mlp.py +0 -282
  190. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/WHEEL +0 -0
  191. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/licenses/LICENSE.md +0 -0
  192. {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.1.dist-info}/top_level.txt +0 -0
warp/native/builtin.h CHANGED
@@ -52,6 +52,11 @@
52
52
  __device__ void __debugbreak() {}
53
53
  #endif
54
54
 
55
+ #if defined(__clang__) && defined(__CUDA__) && defined(__CUDA_ARCH__)
56
+ // clang compiling CUDA code, device mode (NOTE: Used when building core library with Clang)
57
+ #include <cuda_fp16.h>
58
+ #endif
59
+
55
60
  namespace wp
56
61
  {
57
62
 
@@ -177,14 +182,14 @@ CUDA_CALLABLE inline float half_to_float(half x)
177
182
  #elif defined(__clang__)
178
183
 
179
184
  // _Float16 is Clang's native half-precision floating-point type
180
- inline half float_to_half(float x)
185
+ CUDA_CALLABLE inline half float_to_half(float x)
181
186
  {
182
187
 
183
188
  _Float16 f16 = static_cast<_Float16>(x);
184
189
  return *reinterpret_cast<half*>(&f16);
185
190
  }
186
191
 
187
- inline float half_to_float(half h)
192
+ CUDA_CALLABLE inline float half_to_float(half h)
188
193
  {
189
194
  _Float16 f16 = *reinterpret_cast<_Float16*>(&h);
190
195
  return static_cast<float>(f16);
@@ -263,16 +268,20 @@ inline CUDA_CALLABLE half operator / (half a,half b)
263
268
 
264
269
 
265
270
  template <typename T>
266
- CUDA_CALLABLE float cast_float(T x) { return (float)(x); }
271
+ CUDA_CALLABLE inline float cast_float(T x) { return (float)(x); }
267
272
 
268
273
  template <typename T>
269
- CUDA_CALLABLE int cast_int(T x) { return (int)(x); }
274
+ CUDA_CALLABLE inline int cast_int(T x) { return (int)(x); }
270
275
 
271
276
  template <typename T>
272
- CUDA_CALLABLE void adj_cast_float(T x, T& adj_x, float adj_ret) { adj_x += T(adj_ret); }
277
+ CUDA_CALLABLE inline void adj_cast_float(T x, T& adj_x, float adj_ret) {}
278
+
279
+ CUDA_CALLABLE inline void adj_cast_float(float16 x, float16& adj_x, float adj_ret) { adj_x += float16(adj_ret); }
280
+ CUDA_CALLABLE inline void adj_cast_float(float32 x, float32& adj_x, float adj_ret) { adj_x += float32(adj_ret); }
281
+ CUDA_CALLABLE inline void adj_cast_float(float64 x, float64& adj_x, float adj_ret) { adj_x += float64(adj_ret); }
273
282
 
274
283
  template <typename T>
275
- CUDA_CALLABLE void adj_cast_int(T x, T& adj_x, int adj_ret) { adj_x += adj_ret; }
284
+ CUDA_CALLABLE inline void adj_cast_int(T x, T& adj_x, int adj_ret) {}
276
285
 
277
286
  template <typename T>
278
287
  CUDA_CALLABLE inline void adj_int8(T, T&, int8) {}
@@ -1221,6 +1230,15 @@ inline CUDA_CALLABLE launch_coord_t launch_coord(size_t linear, const launch_bou
1221
1230
  return coord;
1222
1231
  }
1223
1232
 
1233
+ inline CUDA_CALLABLE int block_dim()
1234
+ {
1235
+ #if defined(__CUDA_ARCH__)
1236
+ return blockDim.x;
1237
+ #else
1238
+ return 1;
1239
+ #endif
1240
+ }
1241
+
1224
1242
  inline CUDA_CALLABLE int tid(size_t index, const launch_bounds_t& bounds)
1225
1243
  {
1226
1244
  // For the 1-D tid() we need to warn the user if we're about to provide a truncated index
@@ -1301,34 +1319,35 @@ inline CUDA_CALLABLE float16 atomic_add(float16* buf, float16 value)
1301
1319
  float16 old = buf[0];
1302
1320
  buf[0] += value;
1303
1321
  return old;
1304
- #elif defined(__clang__) // CUDA compiled by Clang
1305
- __half r = atomicAdd(reinterpret_cast<__half*>(buf), *reinterpret_cast<__half*>(&value));
1306
- return *reinterpret_cast<float16*>(&r);
1307
1322
  #else // CUDA compiled by NVRTC
1308
- //return atomicAdd(buf, value);
1309
-
1310
- /* Define __PTR for atomicAdd prototypes below, undef after done */
1311
- #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
1312
- #define __PTR "l"
1313
- #else
1314
- #define __PTR "r"
1315
- #endif /*(defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)*/
1316
-
1317
- half r = 0.0;
1318
-
1319
1323
  #if __CUDA_ARCH__ >= 700
1320
-
1321
- asm volatile ("{ atom.add.noftz.f16 %0,[%1],%2; }\n"
1322
- : "=h"(r.u)
1323
- : __PTR(buf), "h"(value.u)
1324
- : "memory");
1324
+ #if defined(__clang__) // CUDA compiled by Clang
1325
+ __half r = atomicAdd(reinterpret_cast<__half*>(buf), *reinterpret_cast<__half*>(&value));
1326
+ return *reinterpret_cast<float16*>(&r);
1327
+ #else // CUDA compiled by NVRTC
1328
+ /* Define __PTR for atomicAdd prototypes below, undef after done */
1329
+ #if (defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)
1330
+ #define __PTR "l"
1331
+ #else
1332
+ #define __PTR "r"
1333
+ #endif /*(defined(_MSC_VER) && defined(_WIN64)) || defined(__LP64__) || defined(__CUDACC_RTC__)*/
1334
+
1335
+ half r = 0.0;
1336
+
1337
+ asm volatile ("{ atom.add.noftz.f16 %0,[%1],%2; }\n"
1338
+ : "=h"(r.u)
1339
+ : __PTR(buf), "h"(value.u)
1340
+ : "memory");
1341
+
1342
+ return r;
1343
+
1344
+ #undef __PTR
1345
+ #endif
1346
+ #else
1347
+ // No native __half atomic support on compute capability < 7.0
1348
+ return float16(0.0f);
1325
1349
  #endif
1326
-
1327
- return r;
1328
-
1329
- #undef __PTR
1330
-
1331
- #endif // CUDA compiled by NVRTC
1350
+ #endif
1332
1351
  }
1333
1352
 
1334
1353
  template<>
@@ -1508,6 +1527,129 @@ CUDA_CALLABLE inline void adj_atomic_minmax(uint64* buf, uint64* adj_buf, const
1508
1527
  CUDA_CALLABLE inline void adj_atomic_minmax(bool* buf, bool* adj_buf, const bool &value, bool &adj_value) { }
1509
1528
 
1510
1529
 
1530
+ template<typename T>
1531
+ inline CUDA_CALLABLE T atomic_cas(T* address, T compare, T val)
1532
+ {
1533
+ #if defined(__CUDA_ARCH__)
1534
+ return atomicCAS(address, compare, val);
1535
+ #else
1536
+ T old = *address;
1537
+ if (old == compare)
1538
+ {
1539
+ *address = val;
1540
+ }
1541
+ return old;
1542
+ #endif
1543
+ }
1544
+
1545
+ template<>
1546
+ inline CUDA_CALLABLE float atomic_cas(float* address, float compare, float val)
1547
+ {
1548
+ #if defined(__CUDA_ARCH__)
1549
+ auto result = atomicCAS(reinterpret_cast<unsigned int*>(address),
1550
+ reinterpret_cast<unsigned int&>(compare),
1551
+ reinterpret_cast<unsigned int&>(val));
1552
+ return reinterpret_cast<float&>(result);
1553
+ #else
1554
+ float old = *address;
1555
+ if (old == compare)
1556
+ {
1557
+ *address = val;
1558
+ }
1559
+ return old;
1560
+ #endif
1561
+ }
1562
+
1563
+ template<>
1564
+ inline CUDA_CALLABLE double atomic_cas(double* address, double compare, double val)
1565
+ {
1566
+ #if defined(__CUDA_ARCH__)
1567
+ auto result = atomicCAS(reinterpret_cast<unsigned long long int *>(address),
1568
+ reinterpret_cast<unsigned long long int &>(compare),
1569
+ reinterpret_cast<unsigned long long int &>(val));
1570
+ return reinterpret_cast<double&>(result);
1571
+ #else
1572
+ double old = *address;
1573
+ if (old == compare)
1574
+ {
1575
+ *address = val;
1576
+ }
1577
+ return old;
1578
+ #endif
1579
+ }
1580
+
1581
+ template<>
1582
+ inline CUDA_CALLABLE int64 atomic_cas(int64* address, int64 compare, int64 val)
1583
+ {
1584
+ #if defined(__CUDA_ARCH__)
1585
+ auto result = atomicCAS(reinterpret_cast<unsigned long long int *>(address),
1586
+ reinterpret_cast<unsigned long long int &>(compare),
1587
+ reinterpret_cast<unsigned long long int &>(val));
1588
+ return reinterpret_cast<int64&>(result);
1589
+ #else
1590
+ int64 old = *address;
1591
+ if (old == compare)
1592
+ {
1593
+ *address = val;
1594
+ }
1595
+ return old;
1596
+ #endif
1597
+ }
1598
+
1599
+ template<typename T>
1600
+ inline CUDA_CALLABLE T atomic_exch(T* address, T val)
1601
+ {
1602
+ #if defined(__CUDA_ARCH__)
1603
+ return atomicExch(address, val);
1604
+ #else
1605
+ T old = *address;
1606
+ *address = val;
1607
+ return old;
1608
+ #endif
1609
+ }
1610
+
1611
+ template<>
1612
+ inline CUDA_CALLABLE double atomic_exch(double* address, double val)
1613
+ {
1614
+ #if defined(__CUDA_ARCH__)
1615
+ auto result = atomicExch(reinterpret_cast<unsigned long long int*>(address),
1616
+ reinterpret_cast<unsigned long long int&>(val));
1617
+ return reinterpret_cast<double&>(result);
1618
+ #else
1619
+ double old = *address;
1620
+ *address = val;
1621
+ return old;
1622
+ #endif
1623
+ }
1624
+
1625
+ template<>
1626
+ inline CUDA_CALLABLE int64 atomic_exch(int64* address, int64 val)
1627
+ {
1628
+ #if defined(__CUDA_ARCH__)
1629
+ auto result = atomicExch(reinterpret_cast<unsigned long long int*>(address),
1630
+ reinterpret_cast<unsigned long long int&>(val));
1631
+ return reinterpret_cast<int64&>(result);
1632
+ #else
1633
+ int64 old = *address;
1634
+ *address = val;
1635
+ return old;
1636
+ #endif
1637
+ }
1638
+
1639
+
1640
+ template<typename T>
1641
+ CUDA_CALLABLE inline void adj_atomic_cas(T* address, T compare, T val, T* adj_address, T& adj_compare, T& adj_val, T adj_ret)
1642
+ {
1643
+ // Not implemented
1644
+ }
1645
+
1646
+ template<typename T>
1647
+ CUDA_CALLABLE inline void adj_atomic_exch(T* address, T val, T* adj_address, T& adj_val, T adj_ret)
1648
+ {
1649
+ // Not implemented
1650
+ }
1651
+
1652
+
1511
1653
  } // namespace wp
1512
1654
 
1513
1655
 
@@ -1778,8 +1920,9 @@ inline CUDA_CALLABLE void expect_near(const T& actual, const T& expected, const
1778
1920
  if (abs(actual - expected) > tolerance)
1779
1921
  {
1780
1922
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
1781
- printf("\t Expected: "); print(expected);
1782
- printf("\t Actual: "); print(actual);
1923
+ printf(" Expected: "); print(expected);
1924
+ printf(" Actual: "); print(actual);
1925
+ printf(" Absolute difference: "); print(abs(actual - expected));
1783
1926
  }
1784
1927
  }
1785
1928
 
@@ -1789,8 +1932,9 @@ inline CUDA_CALLABLE void expect_near(const vec3& actual, const vec3& expected,
1789
1932
  if (diff > tolerance)
1790
1933
  {
1791
1934
  printf("Error, expect_near() failed with tolerance "); print(tolerance);
1792
- printf("\t Expected: "); print(expected);
1793
- printf("\t Actual: "); print(actual);
1935
+ printf(" Expected: "); print(expected);
1936
+ printf(" Actual: "); print(actual);
1937
+ printf(" Max absolute difference: "); print(diff);
1794
1938
  }
1795
1939
  }
1796
1940
 
@@ -1810,6 +1954,7 @@ inline CUDA_CALLABLE void adj_expect_near(const vec3& actual, const vec3& expect
1810
1954
 
1811
1955
  // include array.h so we have the print, isfinite functions for the inner array types defined
1812
1956
  #include "array.h"
1957
+ #include "tuple.h"
1813
1958
  #include "mesh.h"
1814
1959
  #include "bvh.h"
1815
1960
  #include "svd.h"
@@ -1823,4 +1968,6 @@ inline CUDA_CALLABLE void adj_expect_near(const vec3& actual, const vec3& expect
1823
1968
  #if !defined(WP_ENABLE_CUDA) // only include in kernels for now
1824
1969
  #include "tile.h"
1825
1970
  #include "tile_reduce.h"
1971
+ #include "tile_scan.h"
1972
+ #include "tile_radix_sort.h"
1826
1973
  #endif //!defined(WP_ENABLE_CUDA)
warp/native/coloring.cpp CHANGED
@@ -209,9 +209,13 @@ float balance_color_groups(float target_max_min_ratio,
209
209
  do
210
210
  {
211
211
  int biggest_group = -1, smallest_group = -1;
212
-
212
+ float prev_max_min_ratio = max_min_ratio;
213
213
  max_min_ratio = find_largest_smallest_groups(color_groups, biggest_group, smallest_group);
214
214
 
215
+ if (prev_max_min_ratio > 0 && prev_max_min_ratio < max_min_ratio) {
216
+ return max_min_ratio;
217
+ }
218
+
215
219
  // graph is not optimizable anymore or target ratio reached
216
220
  if (color_groups[biggest_group].size() - color_groups[smallest_group].size() <= 2
217
221
  || max_min_ratio < target_max_min_ratio)
@@ -372,7 +376,7 @@ public:
372
376
  // we need to update max_weight because weight_buckets[max_weight] became empty
373
377
  {
374
378
  int new_max_weight = 0;
375
- for (size_t bucket_idx = max_weight - 1; bucket_idx >= 0; bucket_idx--)
379
+ for (int bucket_idx = max_weight - 1; bucket_idx >= 0; bucket_idx--)
376
380
  {
377
381
  if (weight_buckets[bucket_idx].size())
378
382
  {
warp/native/cuda_util.cpp CHANGED
@@ -212,7 +212,7 @@ bool init_cuda_driver()
212
212
  get_driver_entry_point("cuDeviceGetCount", 2000, &(void*&)pfn_cuDeviceGetCount);
213
213
  get_driver_entry_point("cuDeviceGetName", 2000, &(void*&)pfn_cuDeviceGetName);
214
214
  get_driver_entry_point("cuDeviceGetAttribute", 2000, &(void*&)pfn_cuDeviceGetAttribute);
215
- get_driver_entry_point("cuDeviceGetUuid", 110400, &(void*&)pfn_cuDeviceGetUuid);
215
+ get_driver_entry_point("cuDeviceGetUuid", 11040, &(void*&)pfn_cuDeviceGetUuid);
216
216
  get_driver_entry_point("cuDevicePrimaryCtxRetain", 7000, &(void*&)pfn_cuDevicePrimaryCtxRetain);
217
217
  get_driver_entry_point("cuDevicePrimaryCtxRelease", 11000, &(void*&)pfn_cuDevicePrimaryCtxRelease);
218
218
  get_driver_entry_point("cuDeviceCanAccessPeer", 4000, &(void*&)pfn_cuDeviceCanAccessPeer);
warp/native/exports.h CHANGED
@@ -953,10 +953,25 @@ WP_API void builtin_cw_div_mat22d_mat22d(mat22d& a, mat22d& b, mat22d* ret) { *r
953
953
  WP_API void builtin_cw_div_mat33d_mat33d(mat33d& a, mat33d& b, mat33d* ret) { *ret = wp::cw_div(a, b); }
954
954
  WP_API void builtin_cw_div_mat44d_mat44d(mat44d& a, mat44d& b, mat44d* ret) { *ret = wp::cw_div(a, b); }
955
955
  WP_API void builtin_cw_div_spatial_matrixd_spatial_matrixd(spatial_matrixd& a, spatial_matrixd& b, spatial_matrixd* ret) { *ret = wp::cw_div(a, b); }
956
+ WP_API void builtin_svd3_mat33h(mat33h& A, mat33h& ret_0, vec3h& ret_1, mat33h& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
957
+ WP_API void builtin_svd3_mat33f(mat33f& A, mat33f& ret_0, vec3f& ret_1, mat33f& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
958
+ WP_API void builtin_svd3_mat33d(mat33d& A, mat33d& ret_0, vec3d& ret_1, mat33d& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
959
+ WP_API void builtin_svd2_mat22h(mat22h& A, mat22h& ret_0, vec2h& ret_1, mat22h& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
960
+ WP_API void builtin_svd2_mat22f(mat22f& A, mat22f& ret_0, vec2f& ret_1, mat22f& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
961
+ WP_API void builtin_svd2_mat22d(mat22d& A, mat22d& ret_0, vec2d& ret_1, mat22d& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
962
+ WP_API void builtin_qr3_mat33h(mat33h& A, mat33h& ret_0, mat33h& ret_1) { wp::qr3(A, ret_0, ret_1); }
963
+ WP_API void builtin_qr3_mat33f(mat33f& A, mat33f& ret_0, mat33f& ret_1) { wp::qr3(A, ret_0, ret_1); }
964
+ WP_API void builtin_qr3_mat33d(mat33d& A, mat33d& ret_0, mat33d& ret_1) { wp::qr3(A, ret_0, ret_1); }
965
+ WP_API void builtin_eig3_mat33h(mat33h& A, mat33h& ret_0, vec3h& ret_1) { wp::eig3(A, ret_0, ret_1); }
966
+ WP_API void builtin_eig3_mat33f(mat33f& A, mat33f& ret_0, vec3f& ret_1) { wp::eig3(A, ret_0, ret_1); }
967
+ WP_API void builtin_eig3_mat33d(mat33d& A, mat33d& ret_0, vec3d& ret_1) { wp::eig3(A, ret_0, ret_1); }
956
968
  WP_API void builtin_quat_identity(quatf* ret) { *ret = wp::quat_identity(); }
957
969
  WP_API void builtin_quat_from_axis_angle_vec3h_float16(vec3h& axis, float16 angle, quath* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
958
970
  WP_API void builtin_quat_from_axis_angle_vec3f_float32(vec3f& axis, float32 angle, quatf* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
959
971
  WP_API void builtin_quat_from_axis_angle_vec3d_float64(vec3d& axis, float64 angle, quatd* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
972
+ WP_API void builtin_quat_to_axis_angle_quath(quath& quat, vec3h& ret_0, float16& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
973
+ WP_API void builtin_quat_to_axis_angle_quatf(quatf& quat, vec3f& ret_0, float32& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
974
+ WP_API void builtin_quat_to_axis_angle_quatd(quatd& quat, vec3d& ret_0, float64& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
960
975
  WP_API void builtin_quat_from_matrix_mat33h(mat33h& mat, quath* ret) { *ret = wp::quat_from_matrix(mat); }
961
976
  WP_API void builtin_quat_from_matrix_mat33f(mat33f& mat, quatf* ret) { *ret = wp::quat_from_matrix(mat); }
962
977
  WP_API void builtin_quat_from_matrix_mat33d(mat33d& mat, quatd* ret) { *ret = wp::quat_from_matrix(mat); }
@@ -988,6 +1003,12 @@ WP_API void builtin_transform_get_translation_transformd(transformd& xform, vec3
988
1003
  WP_API void builtin_transform_get_rotation_transformh(transformh& xform, quath* ret) { *ret = wp::transform_get_rotation(xform); }
989
1004
  WP_API void builtin_transform_get_rotation_transformf(transformf& xform, quatf* ret) { *ret = wp::transform_get_rotation(xform); }
990
1005
  WP_API void builtin_transform_get_rotation_transformd(transformd& xform, quatd* ret) { *ret = wp::transform_get_rotation(xform); }
1006
+ WP_API void builtin_transform_set_translation_transformh_vec3h(transformh& xform, vec3h& p) { wp::transform_set_translation(xform, p); }
1007
+ WP_API void builtin_transform_set_translation_transformf_vec3f(transformf& xform, vec3f& p) { wp::transform_set_translation(xform, p); }
1008
+ WP_API void builtin_transform_set_translation_transformd_vec3d(transformd& xform, vec3d& p) { wp::transform_set_translation(xform, p); }
1009
+ WP_API void builtin_transform_set_rotation_transformh_quath(transformh& xform, quath& q) { wp::transform_set_rotation(xform, q); }
1010
+ WP_API void builtin_transform_set_rotation_transformf_quatf(transformf& xform, quatf& q) { wp::transform_set_rotation(xform, q); }
1011
+ WP_API void builtin_transform_set_rotation_transformd_quatd(transformd& xform, quatd& q) { wp::transform_set_rotation(xform, q); }
991
1012
  WP_API void builtin_transform_multiply_transformh_transformh(transformh& a, transformh& b, transformh* ret) { *ret = wp::transform_multiply(a, b); }
992
1013
  WP_API void builtin_transform_multiply_transformf_transformf(transformf& a, transformf& b, transformf* ret) { *ret = wp::transform_multiply(a, b); }
993
1014
  WP_API void builtin_transform_multiply_transformd_transformd(transformd& a, transformd& b, transformd* ret) { *ret = wp::transform_multiply(a, b); }
@@ -1063,6 +1084,7 @@ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, ve
1063
1084
  WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
1064
1085
  WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
1065
1086
  WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
1087
+ WP_API void builtin_block_dim(int* ret) { *ret = wp::block_dim(); }
1066
1088
  WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1067
1089
  WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
1068
1090
  WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
@@ -1130,69 +1152,72 @@ WP_API void builtin_extract_transformh_int32(transformh& a, int32 i, float16* re
1130
1152
  WP_API void builtin_extract_transformf_int32(transformf& a, int32 i, float32* ret) { *ret = wp::extract(a, i); }
1131
1153
  WP_API void builtin_extract_transformd_int32(transformd& a, int32 i, float64* ret) { *ret = wp::extract(a, i); }
1132
1154
  WP_API void builtin_extract_shape_t_int32(shape_t s, int32 i, int* ret) { *ret = wp::extract(s, i); }
1133
- WP_API void builtin_assign_copy_vec2h_int32_float16(vec2h& a, int32 i, float16 value, vec2h* ret) { *ret = wp::assign_copy(a, i, value); }
1134
- WP_API void builtin_assign_copy_vec3h_int32_float16(vec3h& a, int32 i, float16 value, vec3h* ret) { *ret = wp::assign_copy(a, i, value); }
1135
- WP_API void builtin_assign_copy_vec4h_int32_float16(vec4h& a, int32 i, float16 value, vec4h* ret) { *ret = wp::assign_copy(a, i, value); }
1136
- WP_API void builtin_assign_copy_spatial_vectorh_int32_float16(spatial_vectorh& a, int32 i, float16 value, spatial_vectorh* ret) { *ret = wp::assign_copy(a, i, value); }
1137
- WP_API void builtin_assign_copy_vec2f_int32_float32(vec2f& a, int32 i, float32 value, vec2f* ret) { *ret = wp::assign_copy(a, i, value); }
1138
- WP_API void builtin_assign_copy_vec3f_int32_float32(vec3f& a, int32 i, float32 value, vec3f* ret) { *ret = wp::assign_copy(a, i, value); }
1139
- WP_API void builtin_assign_copy_vec4f_int32_float32(vec4f& a, int32 i, float32 value, vec4f* ret) { *ret = wp::assign_copy(a, i, value); }
1140
- WP_API void builtin_assign_copy_spatial_vectorf_int32_float32(spatial_vectorf& a, int32 i, float32 value, spatial_vectorf* ret) { *ret = wp::assign_copy(a, i, value); }
1141
- WP_API void builtin_assign_copy_vec2d_int32_float64(vec2d& a, int32 i, float64 value, vec2d* ret) { *ret = wp::assign_copy(a, i, value); }
1142
- WP_API void builtin_assign_copy_vec3d_int32_float64(vec3d& a, int32 i, float64 value, vec3d* ret) { *ret = wp::assign_copy(a, i, value); }
1143
- WP_API void builtin_assign_copy_vec4d_int32_float64(vec4d& a, int32 i, float64 value, vec4d* ret) { *ret = wp::assign_copy(a, i, value); }
1144
- WP_API void builtin_assign_copy_spatial_vectord_int32_float64(spatial_vectord& a, int32 i, float64 value, spatial_vectord* ret) { *ret = wp::assign_copy(a, i, value); }
1145
- WP_API void builtin_assign_copy_vec2s_int32_int16(vec2s& a, int32 i, int16 value, vec2s* ret) { *ret = wp::assign_copy(a, i, value); }
1146
- WP_API void builtin_assign_copy_vec3s_int32_int16(vec3s& a, int32 i, int16 value, vec3s* ret) { *ret = wp::assign_copy(a, i, value); }
1147
- WP_API void builtin_assign_copy_vec4s_int32_int16(vec4s& a, int32 i, int16 value, vec4s* ret) { *ret = wp::assign_copy(a, i, value); }
1148
- WP_API void builtin_assign_copy_vec2i_int32_int32(vec2i& a, int32 i, int32 value, vec2i* ret) { *ret = wp::assign_copy(a, i, value); }
1149
- WP_API void builtin_assign_copy_vec3i_int32_int32(vec3i& a, int32 i, int32 value, vec3i* ret) { *ret = wp::assign_copy(a, i, value); }
1150
- WP_API void builtin_assign_copy_vec4i_int32_int32(vec4i& a, int32 i, int32 value, vec4i* ret) { *ret = wp::assign_copy(a, i, value); }
1151
- WP_API void builtin_assign_copy_vec2l_int32_int64(vec2l& a, int32 i, int64 value, vec2l* ret) { *ret = wp::assign_copy(a, i, value); }
1152
- WP_API void builtin_assign_copy_vec3l_int32_int64(vec3l& a, int32 i, int64 value, vec3l* ret) { *ret = wp::assign_copy(a, i, value); }
1153
- WP_API void builtin_assign_copy_vec4l_int32_int64(vec4l& a, int32 i, int64 value, vec4l* ret) { *ret = wp::assign_copy(a, i, value); }
1154
- WP_API void builtin_assign_copy_vec2b_int32_int8(vec2b& a, int32 i, int8 value, vec2b* ret) { *ret = wp::assign_copy(a, i, value); }
1155
- WP_API void builtin_assign_copy_vec3b_int32_int8(vec3b& a, int32 i, int8 value, vec3b* ret) { *ret = wp::assign_copy(a, i, value); }
1156
- WP_API void builtin_assign_copy_vec4b_int32_int8(vec4b& a, int32 i, int8 value, vec4b* ret) { *ret = wp::assign_copy(a, i, value); }
1157
- WP_API void builtin_assign_copy_vec2us_int32_uint16(vec2us& a, int32 i, uint16 value, vec2us* ret) { *ret = wp::assign_copy(a, i, value); }
1158
- WP_API void builtin_assign_copy_vec3us_int32_uint16(vec3us& a, int32 i, uint16 value, vec3us* ret) { *ret = wp::assign_copy(a, i, value); }
1159
- WP_API void builtin_assign_copy_vec4us_int32_uint16(vec4us& a, int32 i, uint16 value, vec4us* ret) { *ret = wp::assign_copy(a, i, value); }
1160
- WP_API void builtin_assign_copy_vec2ui_int32_uint32(vec2ui& a, int32 i, uint32 value, vec2ui* ret) { *ret = wp::assign_copy(a, i, value); }
1161
- WP_API void builtin_assign_copy_vec3ui_int32_uint32(vec3ui& a, int32 i, uint32 value, vec3ui* ret) { *ret = wp::assign_copy(a, i, value); }
1162
- WP_API void builtin_assign_copy_vec4ui_int32_uint32(vec4ui& a, int32 i, uint32 value, vec4ui* ret) { *ret = wp::assign_copy(a, i, value); }
1163
- WP_API void builtin_assign_copy_vec2ul_int32_uint64(vec2ul& a, int32 i, uint64 value, vec2ul* ret) { *ret = wp::assign_copy(a, i, value); }
1164
- WP_API void builtin_assign_copy_vec3ul_int32_uint64(vec3ul& a, int32 i, uint64 value, vec3ul* ret) { *ret = wp::assign_copy(a, i, value); }
1165
- WP_API void builtin_assign_copy_vec4ul_int32_uint64(vec4ul& a, int32 i, uint64 value, vec4ul* ret) { *ret = wp::assign_copy(a, i, value); }
1166
- WP_API void builtin_assign_copy_vec2ub_int32_uint8(vec2ub& a, int32 i, uint8 value, vec2ub* ret) { *ret = wp::assign_copy(a, i, value); }
1167
- WP_API void builtin_assign_copy_vec3ub_int32_uint8(vec3ub& a, int32 i, uint8 value, vec3ub* ret) { *ret = wp::assign_copy(a, i, value); }
1168
- WP_API void builtin_assign_copy_vec4ub_int32_uint8(vec4ub& a, int32 i, uint8 value, vec4ub* ret) { *ret = wp::assign_copy(a, i, value); }
1169
- WP_API void builtin_assign_copy_quath_int32_float16(quath& a, int32 i, float16 value, quath* ret) { *ret = wp::assign_copy(a, i, value); }
1170
- WP_API void builtin_assign_copy_quatf_int32_float32(quatf& a, int32 i, float32 value, quatf* ret) { *ret = wp::assign_copy(a, i, value); }
1171
- WP_API void builtin_assign_copy_quatd_int32_float64(quatd& a, int32 i, float64 value, quatd* ret) { *ret = wp::assign_copy(a, i, value); }
1172
- WP_API void builtin_assign_copy_mat22h_int32_int32_float16(mat22h& a, int32 i, int32 j, float16 value, mat22h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1173
- WP_API void builtin_assign_copy_mat33h_int32_int32_float16(mat33h& a, int32 i, int32 j, float16 value, mat33h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1174
- WP_API void builtin_assign_copy_mat44h_int32_int32_float16(mat44h& a, int32 i, int32 j, float16 value, mat44h* ret) { *ret = wp::assign_copy(a, i, j, value); }
1175
- WP_API void builtin_assign_copy_spatial_matrixh_int32_int32_float16(spatial_matrixh& a, int32 i, int32 j, float16 value, spatial_matrixh* ret) { *ret = wp::assign_copy(a, i, j, value); }
1176
- WP_API void builtin_assign_copy_mat22f_int32_int32_float32(mat22f& a, int32 i, int32 j, float32 value, mat22f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1177
- WP_API void builtin_assign_copy_mat33f_int32_int32_float32(mat33f& a, int32 i, int32 j, float32 value, mat33f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1178
- WP_API void builtin_assign_copy_mat44f_int32_int32_float32(mat44f& a, int32 i, int32 j, float32 value, mat44f* ret) { *ret = wp::assign_copy(a, i, j, value); }
1179
- WP_API void builtin_assign_copy_spatial_matrixf_int32_int32_float32(spatial_matrixf& a, int32 i, int32 j, float32 value, spatial_matrixf* ret) { *ret = wp::assign_copy(a, i, j, value); }
1180
- WP_API void builtin_assign_copy_mat22d_int32_int32_float64(mat22d& a, int32 i, int32 j, float64 value, mat22d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1181
- WP_API void builtin_assign_copy_mat33d_int32_int32_float64(mat33d& a, int32 i, int32 j, float64 value, mat33d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1182
- WP_API void builtin_assign_copy_mat44d_int32_int32_float64(mat44d& a, int32 i, int32 j, float64 value, mat44d* ret) { *ret = wp::assign_copy(a, i, j, value); }
1183
- WP_API void builtin_assign_copy_spatial_matrixd_int32_int32_float64(spatial_matrixd& a, int32 i, int32 j, float64 value, spatial_matrixd* ret) { *ret = wp::assign_copy(a, i, j, value); }
1184
- WP_API void builtin_assign_copy_mat22h_int32_vec2h(mat22h& a, int32 i, vec2h& value, mat22h* ret) { *ret = wp::assign_copy(a, i, value); }
1185
- WP_API void builtin_assign_copy_mat33h_int32_vec3h(mat33h& a, int32 i, vec3h& value, mat33h* ret) { *ret = wp::assign_copy(a, i, value); }
1186
- WP_API void builtin_assign_copy_mat44h_int32_vec4h(mat44h& a, int32 i, vec4h& value, mat44h* ret) { *ret = wp::assign_copy(a, i, value); }
1187
- WP_API void builtin_assign_copy_spatial_matrixh_int32_spatial_vectorh(spatial_matrixh& a, int32 i, spatial_vectorh& value, spatial_matrixh* ret) { *ret = wp::assign_copy(a, i, value); }
1188
- WP_API void builtin_assign_copy_mat22f_int32_vec2f(mat22f& a, int32 i, vec2f& value, mat22f* ret) { *ret = wp::assign_copy(a, i, value); }
1189
- WP_API void builtin_assign_copy_mat33f_int32_vec3f(mat33f& a, int32 i, vec3f& value, mat33f* ret) { *ret = wp::assign_copy(a, i, value); }
1190
- WP_API void builtin_assign_copy_mat44f_int32_vec4f(mat44f& a, int32 i, vec4f& value, mat44f* ret) { *ret = wp::assign_copy(a, i, value); }
1191
- WP_API void builtin_assign_copy_spatial_matrixf_int32_spatial_vectorf(spatial_matrixf& a, int32 i, spatial_vectorf& value, spatial_matrixf* ret) { *ret = wp::assign_copy(a, i, value); }
1192
- WP_API void builtin_assign_copy_mat22d_int32_vec2d(mat22d& a, int32 i, vec2d& value, mat22d* ret) { *ret = wp::assign_copy(a, i, value); }
1193
- WP_API void builtin_assign_copy_mat33d_int32_vec3d(mat33d& a, int32 i, vec3d& value, mat33d* ret) { *ret = wp::assign_copy(a, i, value); }
1194
- WP_API void builtin_assign_copy_mat44d_int32_vec4d(mat44d& a, int32 i, vec4d& value, mat44d* ret) { *ret = wp::assign_copy(a, i, value); }
1195
- WP_API void builtin_assign_copy_spatial_matrixd_int32_spatial_vectord(spatial_matrixd& a, int32 i, spatial_vectord& value, spatial_matrixd* ret) { *ret = wp::assign_copy(a, i, value); }
1155
+ WP_API void builtin_expect_eq_int8_int8(int8 a, int8 b) { wp::expect_eq(a, b); }
1156
+ WP_API void builtin_expect_eq_uint8_uint8(uint8 a, uint8 b) { wp::expect_eq(a, b); }
1157
+ WP_API void builtin_expect_eq_int16_int16(int16 a, int16 b) { wp::expect_eq(a, b); }
1158
+ WP_API void builtin_expect_eq_uint16_uint16(uint16 a, uint16 b) { wp::expect_eq(a, b); }
1159
+ WP_API void builtin_expect_eq_int32_int32(int32 a, int32 b) { wp::expect_eq(a, b); }
1160
+ WP_API void builtin_expect_eq_uint32_uint32(uint32 a, uint32 b) { wp::expect_eq(a, b); }
1161
+ WP_API void builtin_expect_eq_int64_int64(int64 a, int64 b) { wp::expect_eq(a, b); }
1162
+ WP_API void builtin_expect_eq_uint64_uint64(uint64 a, uint64 b) { wp::expect_eq(a, b); }
1163
+ WP_API void builtin_expect_eq_float16_float16(float16 a, float16 b) { wp::expect_eq(a, b); }
1164
+ WP_API void builtin_expect_eq_float32_float32(float32 a, float32 b) { wp::expect_eq(a, b); }
1165
+ WP_API void builtin_expect_eq_float64_float64(float64 a, float64 b) { wp::expect_eq(a, b); }
1166
+ WP_API void builtin_expect_eq_quath_quath(quath& a, quath& b) { wp::expect_eq(a, b); }
1167
+ WP_API void builtin_expect_eq_quatf_quatf(quatf& a, quatf& b) { wp::expect_eq(a, b); }
1168
+ WP_API void builtin_expect_eq_quatd_quatd(quatd& a, quatd& b) { wp::expect_eq(a, b); }
1169
+ WP_API void builtin_expect_eq_transformh_transformh(transformh& a, transformh& b) { wp::expect_eq(a, b); }
1170
+ WP_API void builtin_expect_eq_transformf_transformf(transformf& a, transformf& b) { wp::expect_eq(a, b); }
1171
+ WP_API void builtin_expect_eq_transformd_transformd(transformd& a, transformd& b) { wp::expect_eq(a, b); }
1172
+ WP_API void builtin_expect_eq_bool_bool(bool a, bool b) { wp::expect_eq(a, b); }
1173
+ WP_API void builtin_expect_eq_vec2h_vec2h(vec2h& a, vec2h& b) { wp::expect_eq(a, b); }
1174
+ WP_API void builtin_expect_eq_vec3h_vec3h(vec3h& a, vec3h& b) { wp::expect_eq(a, b); }
1175
+ WP_API void builtin_expect_eq_vec4h_vec4h(vec4h& a, vec4h& b) { wp::expect_eq(a, b); }
1176
+ WP_API void builtin_expect_eq_spatial_vectorh_spatial_vectorh(spatial_vectorh& a, spatial_vectorh& b) { wp::expect_eq(a, b); }
1177
+ WP_API void builtin_expect_eq_vec2f_vec2f(vec2f& a, vec2f& b) { wp::expect_eq(a, b); }
1178
+ WP_API void builtin_expect_eq_vec3f_vec3f(vec3f& a, vec3f& b) { wp::expect_eq(a, b); }
1179
+ WP_API void builtin_expect_eq_vec4f_vec4f(vec4f& a, vec4f& b) { wp::expect_eq(a, b); }
1180
+ WP_API void builtin_expect_eq_spatial_vectorf_spatial_vectorf(spatial_vectorf& a, spatial_vectorf& b) { wp::expect_eq(a, b); }
1181
+ WP_API void builtin_expect_eq_vec2d_vec2d(vec2d& a, vec2d& b) { wp::expect_eq(a, b); }
1182
+ WP_API void builtin_expect_eq_vec3d_vec3d(vec3d& a, vec3d& b) { wp::expect_eq(a, b); }
1183
+ WP_API void builtin_expect_eq_vec4d_vec4d(vec4d& a, vec4d& b) { wp::expect_eq(a, b); }
1184
+ WP_API void builtin_expect_eq_spatial_vectord_spatial_vectord(spatial_vectord& a, spatial_vectord& b) { wp::expect_eq(a, b); }
1185
+ WP_API void builtin_expect_eq_vec2s_vec2s(vec2s& a, vec2s& b) { wp::expect_eq(a, b); }
1186
+ WP_API void builtin_expect_eq_vec3s_vec3s(vec3s& a, vec3s& b) { wp::expect_eq(a, b); }
1187
+ WP_API void builtin_expect_eq_vec4s_vec4s(vec4s& a, vec4s& b) { wp::expect_eq(a, b); }
1188
+ WP_API void builtin_expect_eq_vec2i_vec2i(vec2i& a, vec2i& b) { wp::expect_eq(a, b); }
1189
+ WP_API void builtin_expect_eq_vec3i_vec3i(vec3i& a, vec3i& b) { wp::expect_eq(a, b); }
1190
+ WP_API void builtin_expect_eq_vec4i_vec4i(vec4i& a, vec4i& b) { wp::expect_eq(a, b); }
1191
+ WP_API void builtin_expect_eq_vec2l_vec2l(vec2l& a, vec2l& b) { wp::expect_eq(a, b); }
1192
+ WP_API void builtin_expect_eq_vec3l_vec3l(vec3l& a, vec3l& b) { wp::expect_eq(a, b); }
1193
+ WP_API void builtin_expect_eq_vec4l_vec4l(vec4l& a, vec4l& b) { wp::expect_eq(a, b); }
1194
+ WP_API void builtin_expect_eq_vec2b_vec2b(vec2b& a, vec2b& b) { wp::expect_eq(a, b); }
1195
+ WP_API void builtin_expect_eq_vec3b_vec3b(vec3b& a, vec3b& b) { wp::expect_eq(a, b); }
1196
+ WP_API void builtin_expect_eq_vec4b_vec4b(vec4b& a, vec4b& b) { wp::expect_eq(a, b); }
1197
+ WP_API void builtin_expect_eq_vec2us_vec2us(vec2us& a, vec2us& b) { wp::expect_eq(a, b); }
1198
+ WP_API void builtin_expect_eq_vec3us_vec3us(vec3us& a, vec3us& b) { wp::expect_eq(a, b); }
1199
+ WP_API void builtin_expect_eq_vec4us_vec4us(vec4us& a, vec4us& b) { wp::expect_eq(a, b); }
1200
+ WP_API void builtin_expect_eq_vec2ui_vec2ui(vec2ui& a, vec2ui& b) { wp::expect_eq(a, b); }
1201
+ WP_API void builtin_expect_eq_vec3ui_vec3ui(vec3ui& a, vec3ui& b) { wp::expect_eq(a, b); }
1202
+ WP_API void builtin_expect_eq_vec4ui_vec4ui(vec4ui& a, vec4ui& b) { wp::expect_eq(a, b); }
1203
+ WP_API void builtin_expect_eq_vec2ul_vec2ul(vec2ul& a, vec2ul& b) { wp::expect_eq(a, b); }
1204
+ WP_API void builtin_expect_eq_vec3ul_vec3ul(vec3ul& a, vec3ul& b) { wp::expect_eq(a, b); }
1205
+ WP_API void builtin_expect_eq_vec4ul_vec4ul(vec4ul& a, vec4ul& b) { wp::expect_eq(a, b); }
1206
+ WP_API void builtin_expect_eq_vec2ub_vec2ub(vec2ub& a, vec2ub& b) { wp::expect_eq(a, b); }
1207
+ WP_API void builtin_expect_eq_vec3ub_vec3ub(vec3ub& a, vec3ub& b) { wp::expect_eq(a, b); }
1208
+ WP_API void builtin_expect_eq_vec4ub_vec4ub(vec4ub& a, vec4ub& b) { wp::expect_eq(a, b); }
1209
+ WP_API void builtin_expect_eq_mat22h_mat22h(mat22h& a, mat22h& b) { wp::expect_eq(a, b); }
1210
+ WP_API void builtin_expect_eq_mat33h_mat33h(mat33h& a, mat33h& b) { wp::expect_eq(a, b); }
1211
+ WP_API void builtin_expect_eq_mat44h_mat44h(mat44h& a, mat44h& b) { wp::expect_eq(a, b); }
1212
+ WP_API void builtin_expect_eq_spatial_matrixh_spatial_matrixh(spatial_matrixh& a, spatial_matrixh& b) { wp::expect_eq(a, b); }
1213
+ WP_API void builtin_expect_eq_mat22f_mat22f(mat22f& a, mat22f& b) { wp::expect_eq(a, b); }
1214
+ WP_API void builtin_expect_eq_mat33f_mat33f(mat33f& a, mat33f& b) { wp::expect_eq(a, b); }
1215
+ WP_API void builtin_expect_eq_mat44f_mat44f(mat44f& a, mat44f& b) { wp::expect_eq(a, b); }
1216
+ WP_API void builtin_expect_eq_spatial_matrixf_spatial_matrixf(spatial_matrixf& a, spatial_matrixf& b) { wp::expect_eq(a, b); }
1217
+ WP_API void builtin_expect_eq_mat22d_mat22d(mat22d& a, mat22d& b) { wp::expect_eq(a, b); }
1218
+ WP_API void builtin_expect_eq_mat33d_mat33d(mat33d& a, mat33d& b) { wp::expect_eq(a, b); }
1219
+ WP_API void builtin_expect_eq_mat44d_mat44d(mat44d& a, mat44d& b) { wp::expect_eq(a, b); }
1220
+ WP_API void builtin_expect_eq_spatial_matrixd_spatial_matrixd(spatial_matrixd& a, spatial_matrixd& b) { wp::expect_eq(a, b); }
1196
1221
  WP_API void builtin_lerp_float16_float16_float16(float16 a, float16 b, float16 t, float16* ret) { *ret = wp::lerp(a, b, t); }
1197
1222
  WP_API void builtin_lerp_float32_float32_float32(float32 a, float32 b, float32 t, float32* ret) { *ret = wp::lerp(a, b, t); }
1198
1223
  WP_API void builtin_lerp_float64_float64_float64(float64 a, float64 b, float64 t, float64* ret) { *ret = wp::lerp(a, b, t); }
@@ -1229,6 +1254,36 @@ WP_API void builtin_lerp_transformd_transformd_float64(transformd& a, transformd
1229
1254
  WP_API void builtin_smoothstep_float16_float16_float16(float16 a, float16 b, float16 x, float16* ret) { *ret = wp::smoothstep(a, b, x); }
1230
1255
  WP_API void builtin_smoothstep_float32_float32_float32(float32 a, float32 b, float32 x, float32* ret) { *ret = wp::smoothstep(a, b, x); }
1231
1256
  WP_API void builtin_smoothstep_float64_float64_float64(float64 a, float64 b, float64 x, float64* ret) { *ret = wp::smoothstep(a, b, x); }
1257
+ WP_API void builtin_expect_near_float16_float16_float16(float16 a, float16 b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1258
+ WP_API void builtin_expect_near_float32_float32_float32(float32 a, float32 b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1259
+ WP_API void builtin_expect_near_float64_float64_float64(float64 a, float64 b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1260
+ WP_API void builtin_expect_near_vec2h_vec2h_float16(vec2h& a, vec2h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1261
+ WP_API void builtin_expect_near_vec3h_vec3h_float16(vec3h& a, vec3h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1262
+ WP_API void builtin_expect_near_vec4h_vec4h_float16(vec4h& a, vec4h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1263
+ WP_API void builtin_expect_near_spatial_vectorh_spatial_vectorh_float16(spatial_vectorh& a, spatial_vectorh& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1264
+ WP_API void builtin_expect_near_vec2f_vec2f_float32(vec2f& a, vec2f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1265
+ WP_API void builtin_expect_near_vec3f_vec3f_float32(vec3f& a, vec3f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1266
+ WP_API void builtin_expect_near_vec4f_vec4f_float32(vec4f& a, vec4f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1267
+ WP_API void builtin_expect_near_spatial_vectorf_spatial_vectorf_float32(spatial_vectorf& a, spatial_vectorf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1268
+ WP_API void builtin_expect_near_vec2d_vec2d_float64(vec2d& a, vec2d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1269
+ WP_API void builtin_expect_near_vec3d_vec3d_float64(vec3d& a, vec3d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1270
+ WP_API void builtin_expect_near_vec4d_vec4d_float64(vec4d& a, vec4d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1271
+ WP_API void builtin_expect_near_spatial_vectord_spatial_vectord_float64(spatial_vectord& a, spatial_vectord& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1272
+ WP_API void builtin_expect_near_quath_quath_float16(quath& a, quath& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1273
+ WP_API void builtin_expect_near_quatf_quatf_float32(quatf& a, quatf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1274
+ WP_API void builtin_expect_near_quatd_quatd_float64(quatd& a, quatd& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1275
+ WP_API void builtin_expect_near_mat22h_mat22h_float16(mat22h& a, mat22h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1276
+ WP_API void builtin_expect_near_mat33h_mat33h_float16(mat33h& a, mat33h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1277
+ WP_API void builtin_expect_near_mat44h_mat44h_float16(mat44h& a, mat44h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1278
+ WP_API void builtin_expect_near_spatial_matrixh_spatial_matrixh_float16(spatial_matrixh& a, spatial_matrixh& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
1279
+ WP_API void builtin_expect_near_mat22f_mat22f_float32(mat22f& a, mat22f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1280
+ WP_API void builtin_expect_near_mat33f_mat33f_float32(mat33f& a, mat33f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1281
+ WP_API void builtin_expect_near_mat44f_mat44f_float32(mat44f& a, mat44f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1282
+ WP_API void builtin_expect_near_spatial_matrixf_spatial_matrixf_float32(spatial_matrixf& a, spatial_matrixf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
1283
+ WP_API void builtin_expect_near_mat22d_mat22d_float64(mat22d& a, mat22d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1284
+ WP_API void builtin_expect_near_mat33d_mat33d_float64(mat33d& a, mat33d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1285
+ WP_API void builtin_expect_near_mat44d_mat44d_float64(mat44d& a, mat44d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1286
+ WP_API void builtin_expect_near_spatial_matrixd_spatial_matrixd_float64(spatial_matrixd& a, spatial_matrixd& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
1232
1287
  WP_API void builtin_add_float16_float16(float16 a, float16 b, float16* ret) { *ret = wp::add(a, b); }
1233
1288
  WP_API void builtin_add_float32_float32(float32 a, float32 b, float32* ret) { *ret = wp::add(a, b); }
1234
1289
  WP_API void builtin_add_float64_float64(float64 a, float64 b, float64* ret) { *ret = wp::add(a, b); }
warp/native/intersect.h CHANGED
@@ -316,7 +316,7 @@ CUDA_CALLABLE inline bool intersect_ray_tri_woop(const vec3& p, const vec3& dir,
316
316
 
317
317
  if (dir[kz] < 0.0f)
318
318
  {
319
- float tmp = kx;
319
+ int tmp = kx;
320
320
  kx = ky;
321
321
  ky = tmp;
322
322
  }
@@ -410,7 +410,7 @@ CUDA_CALLABLE inline void adj_intersect_ray_tri_woop(
410
410
 
411
411
  if (dir[kz] < 0.0f)
412
412
  {
413
- float tmp = kx;
413
+ int tmp = kx;
414
414
  kx = ky;
415
415
  ky = tmp;
416
416
  }
@@ -665,7 +665,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
665
665
  bool var_25;
666
666
  bool var_26;
667
667
  bool var_27;
668
- float32 var_28;
668
+ float32 var_28 = 0.0;
669
669
  vec2 var_29;
670
670
  vec2 var_30;
671
671
  vec3 var_31;
@@ -685,7 +685,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
685
685
  bool var_45;
686
686
  bool var_46;
687
687
  bool var_47;
688
- float32 var_48;
688
+ float32 var_48 = 0.0;
689
689
  vec2 var_49;
690
690
  vec2 var_50;
691
691
  float32 var_51;
@@ -702,7 +702,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
702
702
  float32 var_62;
703
703
  bool var_63;
704
704
  bool var_64;
705
- float32 var_65;
705
+ float32 var_65 = 0.0;
706
706
  vec2 var_66;
707
707
  // vec2 var_67;
708
708
  float32 var_68;