warp-lang 1.9.0__py3-none-win_amd64.whl → 1.10.0rc2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (350) hide show
  1. warp/__init__.py +301 -287
  2. warp/__init__.pyi +2220 -313
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1075 -0
  5. warp/_src/build.py +618 -0
  6. warp/_src/build_dll.py +640 -0
  7. warp/{builtins.py → _src/builtins.py} +1497 -226
  8. warp/_src/codegen.py +4359 -0
  9. warp/{config.py → _src/config.py} +178 -169
  10. warp/_src/constants.py +57 -0
  11. warp/_src/context.py +8294 -0
  12. warp/_src/dlpack.py +462 -0
  13. warp/_src/fabric.py +355 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +508 -0
  16. warp/_src/fem/cache.py +687 -0
  17. warp/_src/fem/dirichlet.py +188 -0
  18. warp/{fem → _src/fem}/domain.py +40 -30
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +701 -0
  21. warp/{fem → _src/fem}/field/nodal_field.py +30 -15
  22. warp/{fem → _src/fem}/field/restriction.py +1 -1
  23. warp/{fem → _src/fem}/field/virtual.py +53 -27
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
  26. warp/_src/fem/geometry/closest_point.py +97 -0
  27. warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
  28. warp/{fem → _src/fem}/geometry/element.py +32 -10
  29. warp/{fem → _src/fem}/geometry/geometry.py +48 -20
  30. warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
  31. warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
  32. warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
  33. warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
  34. warp/{fem → _src/fem}/geometry/partition.py +121 -63
  35. warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
  36. warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
  37. warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
  38. warp/{fem → _src/fem}/integrate.py +164 -158
  39. warp/_src/fem/linalg.py +383 -0
  40. warp/_src/fem/operator.py +396 -0
  41. warp/_src/fem/polynomial.py +229 -0
  42. warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
  43. warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
  44. warp/_src/fem/space/__init__.py +248 -0
  45. warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
  46. warp/_src/fem/space/basis_space.py +679 -0
  47. warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
  48. warp/{fem → _src/fem}/space/function_space.py +14 -13
  49. warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
  50. warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
  51. warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
  52. warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
  53. warp/{fem → _src/fem}/space/partition.py +117 -60
  54. warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
  55. warp/{fem → _src/fem}/space/restriction.py +66 -33
  56. warp/_src/fem/space/shape/__init__.py +152 -0
  57. warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
  58. warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
  59. warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
  60. warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
  61. warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
  62. warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
  63. warp/_src/fem/space/topology.py +459 -0
  64. warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
  65. warp/_src/fem/types.py +112 -0
  66. warp/_src/fem/utils.py +486 -0
  67. warp/_src/jax.py +186 -0
  68. warp/_src/jax_experimental/__init__.py +14 -0
  69. warp/_src/jax_experimental/custom_call.py +387 -0
  70. warp/_src/jax_experimental/ffi.py +1284 -0
  71. warp/_src/jax_experimental/xla_ffi.py +656 -0
  72. warp/_src/marching_cubes.py +708 -0
  73. warp/_src/math.py +414 -0
  74. warp/_src/optim/__init__.py +14 -0
  75. warp/_src/optim/adam.py +163 -0
  76. warp/_src/optim/linear.py +1606 -0
  77. warp/_src/optim/sgd.py +112 -0
  78. warp/_src/paddle.py +406 -0
  79. warp/_src/render/__init__.py +14 -0
  80. warp/_src/render/imgui_manager.py +289 -0
  81. warp/_src/render/render_opengl.py +3636 -0
  82. warp/_src/render/render_usd.py +937 -0
  83. warp/_src/render/utils.py +160 -0
  84. warp/_src/sparse.py +2716 -0
  85. warp/_src/tape.py +1206 -0
  86. warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
  87. warp/_src/torch.py +391 -0
  88. warp/_src/types.py +5870 -0
  89. warp/_src/utils.py +1693 -0
  90. warp/autograd.py +12 -1054
  91. warp/bin/warp-clang.dll +0 -0
  92. warp/bin/warp.dll +0 -0
  93. warp/build.py +8 -588
  94. warp/build_dll.py +6 -471
  95. warp/codegen.py +6 -4246
  96. warp/constants.py +6 -39
  97. warp/context.py +12 -7851
  98. warp/dlpack.py +6 -444
  99. warp/examples/distributed/example_jacobi_mpi.py +4 -5
  100. warp/examples/fem/example_adaptive_grid.py +1 -1
  101. warp/examples/fem/example_apic_fluid.py +1 -1
  102. warp/examples/fem/example_burgers.py +8 -8
  103. warp/examples/fem/example_diffusion.py +1 -1
  104. warp/examples/fem/example_distortion_energy.py +1 -1
  105. warp/examples/fem/example_mixed_elasticity.py +2 -2
  106. warp/examples/fem/example_navier_stokes.py +1 -1
  107. warp/examples/fem/example_nonconforming_contact.py +7 -7
  108. warp/examples/fem/example_stokes.py +1 -1
  109. warp/examples/fem/example_stokes_transfer.py +1 -1
  110. warp/examples/fem/utils.py +2 -2
  111. warp/examples/interop/example_jax_callable.py +1 -1
  112. warp/examples/interop/example_jax_ffi_callback.py +1 -1
  113. warp/examples/interop/example_jax_kernel.py +3 -2
  114. warp/examples/tile/example_tile_mcgp.py +191 -0
  115. warp/fabric.py +6 -337
  116. warp/fem/__init__.py +159 -97
  117. warp/fem/adaptivity.py +7 -489
  118. warp/fem/cache.py +9 -648
  119. warp/fem/dirichlet.py +6 -184
  120. warp/fem/field/__init__.py +8 -109
  121. warp/fem/field/field.py +7 -652
  122. warp/fem/geometry/__init__.py +7 -18
  123. warp/fem/geometry/closest_point.py +11 -77
  124. warp/fem/linalg.py +18 -366
  125. warp/fem/operator.py +11 -369
  126. warp/fem/polynomial.py +9 -209
  127. warp/fem/space/__init__.py +5 -211
  128. warp/fem/space/basis_space.py +6 -662
  129. warp/fem/space/shape/__init__.py +41 -118
  130. warp/fem/space/topology.py +6 -437
  131. warp/fem/types.py +6 -81
  132. warp/fem/utils.py +11 -444
  133. warp/jax.py +8 -165
  134. warp/jax_experimental/__init__.py +14 -1
  135. warp/jax_experimental/custom_call.py +8 -342
  136. warp/jax_experimental/ffi.py +17 -853
  137. warp/jax_experimental/xla_ffi.py +5 -596
  138. warp/marching_cubes.py +5 -689
  139. warp/math.py +16 -393
  140. warp/native/array.h +385 -37
  141. warp/native/builtin.h +316 -39
  142. warp/native/bvh.cpp +43 -9
  143. warp/native/bvh.cu +62 -27
  144. warp/native/bvh.h +310 -309
  145. warp/native/clang/clang.cpp +102 -97
  146. warp/native/coloring.cpp +0 -1
  147. warp/native/crt.h +208 -0
  148. warp/native/exports.h +156 -0
  149. warp/native/hashgrid.cu +2 -0
  150. warp/native/intersect.h +24 -1
  151. warp/native/intersect_tri.h +44 -35
  152. warp/native/mat.h +1456 -276
  153. warp/native/mesh.cpp +4 -4
  154. warp/native/mesh.cu +4 -2
  155. warp/native/mesh.h +176 -61
  156. warp/native/quat.h +0 -52
  157. warp/native/scan.cu +2 -0
  158. warp/native/sort.cu +22 -13
  159. warp/native/sort.h +2 -0
  160. warp/native/sparse.cu +7 -3
  161. warp/native/spatial.h +12 -0
  162. warp/native/tile.h +837 -70
  163. warp/native/tile_radix_sort.h +1 -1
  164. warp/native/tile_reduce.h +394 -46
  165. warp/native/tile_scan.h +4 -4
  166. warp/native/vec.h +469 -53
  167. warp/native/version.h +23 -0
  168. warp/native/volume.cpp +1 -1
  169. warp/native/volume.cu +1 -0
  170. warp/native/volume.h +1 -1
  171. warp/native/volume_builder.cu +2 -0
  172. warp/native/warp.cpp +60 -32
  173. warp/native/warp.cu +313 -201
  174. warp/native/warp.h +14 -11
  175. warp/optim/__init__.py +6 -3
  176. warp/optim/adam.py +6 -145
  177. warp/optim/linear.py +14 -1585
  178. warp/optim/sgd.py +6 -94
  179. warp/paddle.py +6 -388
  180. warp/render/__init__.py +8 -4
  181. warp/render/imgui_manager.py +7 -267
  182. warp/render/render_opengl.py +6 -3616
  183. warp/render/render_usd.py +6 -918
  184. warp/render/utils.py +6 -142
  185. warp/sparse.py +37 -2563
  186. warp/tape.py +6 -1188
  187. warp/tests/__main__.py +1 -1
  188. warp/tests/cuda/test_async.py +4 -4
  189. warp/tests/cuda/test_conditional_captures.py +1 -1
  190. warp/tests/cuda/test_multigpu.py +1 -1
  191. warp/tests/cuda/test_streams.py +58 -1
  192. warp/tests/geometry/test_bvh.py +157 -22
  193. warp/tests/geometry/test_hash_grid.py +38 -0
  194. warp/tests/geometry/test_marching_cubes.py +0 -1
  195. warp/tests/geometry/test_mesh.py +5 -3
  196. warp/tests/geometry/test_mesh_query_aabb.py +5 -12
  197. warp/tests/geometry/test_mesh_query_point.py +5 -2
  198. warp/tests/geometry/test_mesh_query_ray.py +15 -3
  199. warp/tests/geometry/test_volume_write.py +5 -5
  200. warp/tests/interop/test_dlpack.py +14 -14
  201. warp/tests/interop/test_jax.py +1382 -79
  202. warp/tests/interop/test_paddle.py +1 -1
  203. warp/tests/test_adam.py +0 -1
  204. warp/tests/test_arithmetic.py +9 -9
  205. warp/tests/test_array.py +529 -100
  206. warp/tests/test_array_reduce.py +3 -3
  207. warp/tests/test_atomic.py +12 -8
  208. warp/tests/test_atomic_bitwise.py +209 -0
  209. warp/tests/test_atomic_cas.py +4 -4
  210. warp/tests/test_bool.py +2 -2
  211. warp/tests/test_builtins_resolution.py +5 -571
  212. warp/tests/test_codegen.py +34 -15
  213. warp/tests/test_conditional.py +1 -1
  214. warp/tests/test_context.py +6 -6
  215. warp/tests/test_copy.py +242 -161
  216. warp/tests/test_ctypes.py +3 -3
  217. warp/tests/test_devices.py +24 -2
  218. warp/tests/test_examples.py +16 -84
  219. warp/tests/test_fabricarray.py +35 -35
  220. warp/tests/test_fast_math.py +0 -2
  221. warp/tests/test_fem.py +60 -14
  222. warp/tests/test_fixedarray.py +3 -3
  223. warp/tests/test_func.py +8 -5
  224. warp/tests/test_generics.py +1 -1
  225. warp/tests/test_indexedarray.py +24 -24
  226. warp/tests/test_intersect.py +39 -9
  227. warp/tests/test_large.py +1 -1
  228. warp/tests/test_lerp.py +3 -1
  229. warp/tests/test_linear_solvers.py +1 -1
  230. warp/tests/test_map.py +49 -4
  231. warp/tests/test_mat.py +52 -62
  232. warp/tests/test_mat_constructors.py +4 -5
  233. warp/tests/test_mat_lite.py +1 -1
  234. warp/tests/test_mat_scalar_ops.py +121 -121
  235. warp/tests/test_math.py +34 -0
  236. warp/tests/test_module_aot.py +4 -4
  237. warp/tests/test_modules_lite.py +28 -2
  238. warp/tests/test_print.py +11 -11
  239. warp/tests/test_quat.py +93 -58
  240. warp/tests/test_runlength_encode.py +1 -1
  241. warp/tests/test_scalar_ops.py +38 -10
  242. warp/tests/test_smoothstep.py +1 -1
  243. warp/tests/test_sparse.py +126 -15
  244. warp/tests/test_spatial.py +105 -87
  245. warp/tests/test_special_values.py +6 -6
  246. warp/tests/test_static.py +7 -7
  247. warp/tests/test_struct.py +13 -2
  248. warp/tests/test_triangle_closest_point.py +48 -1
  249. warp/tests/test_tuple.py +96 -0
  250. warp/tests/test_types.py +82 -9
  251. warp/tests/test_utils.py +52 -52
  252. warp/tests/test_vec.py +29 -29
  253. warp/tests/test_vec_constructors.py +5 -5
  254. warp/tests/test_vec_scalar_ops.py +97 -97
  255. warp/tests/test_version.py +75 -0
  256. warp/tests/tile/test_tile.py +239 -0
  257. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  258. warp/tests/tile/test_tile_cholesky.py +7 -4
  259. warp/tests/tile/test_tile_load.py +26 -2
  260. warp/tests/tile/test_tile_mathdx.py +3 -3
  261. warp/tests/tile/test_tile_matmul.py +1 -1
  262. warp/tests/tile/test_tile_mlp.py +2 -4
  263. warp/tests/tile/test_tile_reduce.py +214 -13
  264. warp/tests/unittest_suites.py +6 -14
  265. warp/tests/unittest_utils.py +10 -9
  266. warp/tests/walkthrough_debug.py +3 -1
  267. warp/torch.py +6 -373
  268. warp/types.py +29 -5750
  269. warp/utils.py +10 -1659
  270. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +47 -103
  271. warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
  272. warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  273. warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  274. warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  275. warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  276. warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  277. warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  278. warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  279. warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  280. warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  281. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  282. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  283. warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  284. warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  285. warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  286. warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  287. warp/examples/assets/cartpole.urdf +0 -110
  288. warp/examples/assets/crazyflie.usd +0 -0
  289. warp/examples/assets/nv_ant.xml +0 -92
  290. warp/examples/assets/nv_humanoid.xml +0 -183
  291. warp/examples/assets/quadruped.urdf +0 -268
  292. warp/examples/optim/example_bounce.py +0 -266
  293. warp/examples/optim/example_cloth_throw.py +0 -228
  294. warp/examples/optim/example_drone.py +0 -870
  295. warp/examples/optim/example_inverse_kinematics.py +0 -182
  296. warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
  297. warp/examples/optim/example_softbody_properties.py +0 -400
  298. warp/examples/optim/example_spring_cage.py +0 -245
  299. warp/examples/optim/example_trajectory.py +0 -227
  300. warp/examples/sim/example_cartpole.py +0 -143
  301. warp/examples/sim/example_cloth.py +0 -225
  302. warp/examples/sim/example_cloth_self_contact.py +0 -316
  303. warp/examples/sim/example_granular.py +0 -130
  304. warp/examples/sim/example_granular_collision_sdf.py +0 -202
  305. warp/examples/sim/example_jacobian_ik.py +0 -244
  306. warp/examples/sim/example_particle_chain.py +0 -124
  307. warp/examples/sim/example_quadruped.py +0 -203
  308. warp/examples/sim/example_rigid_chain.py +0 -203
  309. warp/examples/sim/example_rigid_contact.py +0 -195
  310. warp/examples/sim/example_rigid_force.py +0 -133
  311. warp/examples/sim/example_rigid_gyroscopic.py +0 -115
  312. warp/examples/sim/example_rigid_soft_contact.py +0 -140
  313. warp/examples/sim/example_soft_body.py +0 -196
  314. warp/examples/tile/example_tile_walker.py +0 -327
  315. warp/sim/__init__.py +0 -74
  316. warp/sim/articulation.py +0 -793
  317. warp/sim/collide.py +0 -2570
  318. warp/sim/graph_coloring.py +0 -307
  319. warp/sim/import_mjcf.py +0 -791
  320. warp/sim/import_snu.py +0 -227
  321. warp/sim/import_urdf.py +0 -579
  322. warp/sim/import_usd.py +0 -898
  323. warp/sim/inertia.py +0 -357
  324. warp/sim/integrator.py +0 -245
  325. warp/sim/integrator_euler.py +0 -2000
  326. warp/sim/integrator_featherstone.py +0 -2101
  327. warp/sim/integrator_vbd.py +0 -2487
  328. warp/sim/integrator_xpbd.py +0 -3295
  329. warp/sim/model.py +0 -4821
  330. warp/sim/particles.py +0 -121
  331. warp/sim/render.py +0 -431
  332. warp/sim/utils.py +0 -431
  333. warp/tests/sim/disabled_kinematics.py +0 -244
  334. warp/tests/sim/test_cloth.py +0 -863
  335. warp/tests/sim/test_collision.py +0 -743
  336. warp/tests/sim/test_coloring.py +0 -347
  337. warp/tests/sim/test_inertia.py +0 -161
  338. warp/tests/sim/test_model.py +0 -226
  339. warp/tests/sim/test_sim_grad.py +0 -287
  340. warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
  341. warp/tests/sim/test_sim_kinematics.py +0 -98
  342. warp/thirdparty/__init__.py +0 -0
  343. warp_lang-1.9.0.dist-info/RECORD +0 -456
  344. /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
  345. /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
  346. /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
  347. /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
  348. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
  349. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  350. {warp_lang-1.9.0.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
warp/native/vec.h CHANGED
@@ -149,6 +149,17 @@ using vec2d = vec_t<2,double>;
149
149
  using vec3d = vec_t<3,double>;
150
150
  using vec4d = vec_t<4,double>;
151
151
 
152
+ // Type trait to detect if a type is a vec_t
153
+ template<typename T>
154
+ struct is_vector {
155
+ static constexpr bool value = false;
156
+ };
157
+
158
+ template<unsigned Length, typename Type>
159
+ struct is_vector<vec_t<Length, Type>> {
160
+ static constexpr bool value = true;
161
+ };
162
+
152
163
  template<unsigned Length, typename Type>
153
164
  inline CUDA_CALLABLE vec_t<Length, Type> operator - (const vec_t<Length, Type>& x)
154
165
  {
@@ -343,17 +354,6 @@ inline CUDA_CALLABLE vec_t<Length, Type> add(vec_t<Length, Type> a, vec_t<Length
343
354
  return ret;
344
355
  }
345
356
 
346
- template<unsigned Length, typename Type>
347
- inline CUDA_CALLABLE vec_t<Length, Type> add(Type a, vec_t<Length, Type> b)
348
- {
349
- vec_t<Length, Type> ret;
350
- for( unsigned i=0; i < Length; ++i )
351
- {
352
- ret[i] = a + b[i];
353
- }
354
- return ret;
355
- }
356
-
357
357
  template<typename Type>
358
358
  inline CUDA_CALLABLE vec_t<2, Type> add(vec_t<2, Type> a, vec_t<2, Type> b)
359
359
  {
@@ -378,18 +378,6 @@ inline CUDA_CALLABLE vec_t<Length, Type> sub(vec_t<Length, Type> a, vec_t<Length
378
378
  return ret;
379
379
  }
380
380
 
381
- template<unsigned Length, typename Type>
382
- inline CUDA_CALLABLE vec_t<Length, Type> sub(Type a, vec_t<Length, Type> b)
383
- {
384
- vec_t<Length, Type> ret;
385
- for (unsigned i=0; i < Length; ++i)
386
- {
387
- ret[i] = Type(a - b[i]);
388
- }
389
-
390
- return ret;
391
- }
392
-
393
381
  template<typename Type>
394
382
  inline CUDA_CALLABLE vec_t<2, Type> sub(vec_t<2, Type> a, vec_t<2, Type> b)
395
383
  {
@@ -427,6 +415,139 @@ inline CUDA_CALLABLE vec_t<3, Type> mod(vec_t<3, Type> a, vec_t<3, Type> b)
427
415
  return vec_t<3, Type>(mod(a.c[0], b.c[0]), mod(a.c[1], b.c[1]), mod(a.c[2], b.c[2]));
428
416
  }
429
417
 
418
+ // bitwise AND
419
+ template<unsigned Length, typename Type>
420
+ inline CUDA_CALLABLE vec_t<Length, Type> bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b)
421
+ {
422
+ vec_t<Length, Type> ret;
423
+ for( unsigned i=0; i < Length; ++i )
424
+ {
425
+ ret[i] = Type(a[i] & b[i]);
426
+ }
427
+ return ret;
428
+ }
429
+
430
+ template<typename Type>
431
+ inline CUDA_CALLABLE vec_t<2, Type> bit_and(vec_t<2, Type> a, vec_t<2, Type> b)
432
+ {
433
+ return vec_t<2, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1]);
434
+ }
435
+
436
+ template<typename Type>
437
+ inline CUDA_CALLABLE vec_t<3, Type> bit_and(vec_t<3, Type> a, vec_t<3, Type> b)
438
+ {
439
+ return vec_t<3, Type>( a.c[0] & b.c[0], a.c[1] & b.c[1], a.c[2] & b.c[2]);
440
+ }
441
+
442
+ // bitwise OR
443
+ template<unsigned Length, typename Type>
444
+ inline CUDA_CALLABLE vec_t<Length, Type> bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b)
445
+ {
446
+ vec_t<Length, Type> ret;
447
+ for( unsigned i=0; i < Length; ++i )
448
+ {
449
+ ret[i] = Type(a[i] | b[i]);
450
+ }
451
+ return ret;
452
+ }
453
+
454
+ template<typename Type>
455
+ inline CUDA_CALLABLE vec_t<2, Type> bit_or(vec_t<2, Type> a, vec_t<2, Type> b)
456
+ {
457
+ return vec_t<2, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1]);
458
+ }
459
+
460
+ template<typename Type>
461
+ inline CUDA_CALLABLE vec_t<3, Type> bit_or(vec_t<3, Type> a, vec_t<3, Type> b)
462
+ {
463
+ return vec_t<3, Type>( a.c[0] | b.c[0], a.c[1] | b.c[1], a.c[2] | b.c[2]);
464
+ }
465
+
466
+ // bitwise XOR
467
+ template<unsigned Length, typename Type>
468
+ inline CUDA_CALLABLE vec_t<Length, Type> bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b)
469
+ {
470
+ vec_t<Length, Type> ret;
471
+ for( unsigned i=0; i < Length; ++i )
472
+ {
473
+ ret[i] = Type(a[i] ^ b[i]);
474
+ }
475
+ return ret;
476
+ }
477
+
478
+ template<typename Type>
479
+ inline CUDA_CALLABLE vec_t<2, Type> bit_xor(vec_t<2, Type> a, vec_t<2, Type> b)
480
+ {
481
+ return vec_t<2, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1]);
482
+ }
483
+
484
+ template<typename Type>
485
+ inline CUDA_CALLABLE vec_t<3, Type> bit_xor(vec_t<3, Type> a, vec_t<3, Type> b)
486
+ {
487
+ return vec_t<3, Type>( a.c[0] ^ b.c[0], a.c[1] ^ b.c[1], a.c[2] ^ b.c[2]);
488
+ }
489
+
490
+ // left shift
491
+ template<unsigned Length, typename Type>
492
+ inline CUDA_CALLABLE vec_t<Length, Type> lshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
493
+ {
494
+ vec_t<Length, Type> ret;
495
+ for( unsigned i=0; i < Length; ++i )
496
+ {
497
+ ret[i] = Type(a[i] << b[i]);
498
+ }
499
+ return ret;
500
+ }
501
+
502
+ template<typename Type>
503
+ inline CUDA_CALLABLE vec_t<2, Type> lshift(vec_t<2, Type> a, vec_t<2, Type> b)
504
+ {
505
+ return vec_t<2, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1]);
506
+ }
507
+
508
+ template<typename Type>
509
+ inline CUDA_CALLABLE vec_t<3, Type> lshift(vec_t<3, Type> a, vec_t<3, Type> b)
510
+ {
511
+ return vec_t<3, Type>( a.c[0] << b.c[0], a.c[1] << b.c[1], a.c[2] << b.c[2]);
512
+ }
513
+
514
+ // right shift
515
+ template<unsigned Length, typename Type>
516
+ inline CUDA_CALLABLE vec_t<Length, Type> rshift(vec_t<Length, Type> a, vec_t<Length, Type> b)
517
+ {
518
+ vec_t<Length, Type> ret;
519
+ for( unsigned i=0; i < Length; ++i )
520
+ {
521
+ ret[i] = Type(a[i] >> b[i]);
522
+ }
523
+ return ret;
524
+ }
525
+
526
+ template<typename Type>
527
+ inline CUDA_CALLABLE vec_t<2, Type> rshift(vec_t<2, Type> a, vec_t<2, Type> b)
528
+ {
529
+ return vec_t<2, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1]);
530
+ }
531
+
532
+ template<typename Type>
533
+ inline CUDA_CALLABLE vec_t<3, Type> rshift(vec_t<3, Type> a, vec_t<3, Type> b)
534
+ {
535
+ return vec_t<3, Type>( a.c[0] >> b.c[0], a.c[1] >> b.c[1], a.c[2] >> b.c[2]);
536
+ }
537
+
538
+ // invert
539
+ template<unsigned Length, typename Type>
540
+ inline CUDA_CALLABLE vec_t<Length,Type> invert(vec_t<Length,Type> v)
541
+ {
542
+ vec_t<Length,Type> ret;
543
+ for (unsigned i=0; i < Length; ++i)
544
+ {
545
+ ret[i] = ~v[i];
546
+ }
547
+
548
+ return ret;
549
+ }
550
+
430
551
  // dot product:
431
552
  template<unsigned Length, typename Type>
432
553
  inline CUDA_CALLABLE Type dot(vec_t<Length, Type> a, vec_t<Length, Type> b)
@@ -747,6 +868,183 @@ inline CUDA_CALLABLE void adj_sub_inplace(
747
868
  }
748
869
 
749
870
 
871
+ template<unsigned Length, typename Type>
872
+ inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, int idx, Type value)
873
+ {
874
+ #ifndef NDEBUG
875
+ if (idx < -(int)Length || idx >= (int)Length)
876
+ {
877
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
878
+ assert(0);
879
+ }
880
+ #endif
881
+
882
+ if (idx < 0)
883
+ {
884
+ idx += Length;
885
+ }
886
+
887
+ v[idx] &= value;
888
+ }
889
+
890
+
891
+ template<unsigned SliceLength, unsigned Length, typename Type>
892
+ inline CUDA_CALLABLE void bit_and_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
893
+ {
894
+ assert(slice.start >= 0 && slice.start <= (int)Length);
895
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
896
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
897
+ assert(slice_get_length(slice) == SliceLength);
898
+
899
+ bool is_reversed = slice.step < 0;
900
+
901
+ int ii = 0;
902
+ for (
903
+ int i = slice.start;
904
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
905
+ i += slice.step
906
+ )
907
+ {
908
+ v[i] &= a[ii];
909
+ ++ii;
910
+ }
911
+
912
+ assert(ii == SliceLength);
913
+ }
914
+
915
+
916
+ template<unsigned Length, typename Type>
917
+ inline CUDA_CALLABLE void adj_bit_and_inplace(
918
+ vec_t<Length, Type>& v, int idx, Type value,
919
+ vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
920
+ ) {}
921
+
922
+
923
+ template<unsigned SliceLength, unsigned Length, typename Type>
924
+ inline CUDA_CALLABLE void adj_bit_and_inplace(
925
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
926
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
927
+ ) {}
928
+
929
+
930
+ template<unsigned Length, typename Type>
931
+ inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, int idx, Type value)
932
+ {
933
+ #ifndef NDEBUG
934
+ if (idx < -(int)Length || idx >= (int)Length)
935
+ {
936
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
937
+ assert(0);
938
+ }
939
+ #endif
940
+
941
+ if (idx < 0)
942
+ {
943
+ idx += Length;
944
+ }
945
+
946
+ v[idx] |= value;
947
+ }
948
+
949
+
950
+ template<unsigned SliceLength, unsigned Length, typename Type>
951
+ inline CUDA_CALLABLE void bit_or_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
952
+ {
953
+ assert(slice.start >= 0 && slice.start <= (int)Length);
954
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
955
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
956
+ assert(slice_get_length(slice) == SliceLength);
957
+
958
+ bool is_reversed = slice.step < 0;
959
+
960
+ int ii = 0;
961
+ for (
962
+ int i = slice.start;
963
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
964
+ i += slice.step
965
+ )
966
+ {
967
+ v[i] |= a[ii];
968
+ ++ii;
969
+ }
970
+
971
+ assert(ii == SliceLength);
972
+ }
973
+
974
+
975
+ template<unsigned Length, typename Type>
976
+ inline CUDA_CALLABLE void adj_bit_or_inplace(
977
+ vec_t<Length, Type>& v, int idx, Type value,
978
+ vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
979
+ ) {}
980
+
981
+
982
+ template<unsigned SliceLength, unsigned Length, typename Type>
983
+ inline CUDA_CALLABLE void adj_bit_or_inplace(
984
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
985
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
986
+ ) {}
987
+
988
+
989
+ template<unsigned Length, typename Type>
990
+ inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, int idx, Type value)
991
+ {
992
+ #ifndef NDEBUG
993
+ if (idx < -(int)Length || idx >= (int)Length)
994
+ {
995
+ printf("vec index %d out of bounds at %s %d\n", idx, __FILE__, __LINE__);
996
+ assert(0);
997
+ }
998
+ #endif
999
+
1000
+ if (idx < 0)
1001
+ {
1002
+ idx += Length;
1003
+ }
1004
+
1005
+ v[idx] ^= value;
1006
+ }
1007
+
1008
+
1009
+ template<unsigned SliceLength, unsigned Length, typename Type>
1010
+ inline CUDA_CALLABLE void bit_xor_inplace(vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a)
1011
+ {
1012
+ assert(slice.start >= 0 && slice.start <= (int)Length);
1013
+ assert(slice.stop >= -1 && slice.stop <= (int)Length);
1014
+ assert(slice.step != 0 && slice.step < 0 ? slice.start >= slice.stop : slice.start <= slice.stop);
1015
+ assert(slice_get_length(slice) == SliceLength);
1016
+
1017
+ bool is_reversed = slice.step < 0;
1018
+
1019
+ int ii = 0;
1020
+ for (
1021
+ int i = slice.start;
1022
+ is_reversed ? (i > slice.stop) : (i < slice.stop);
1023
+ i += slice.step
1024
+ )
1025
+ {
1026
+ v[i] ^= a[ii];
1027
+ ++ii;
1028
+ }
1029
+
1030
+ assert(ii == SliceLength);
1031
+ }
1032
+
1033
+
1034
+ template<unsigned Length, typename Type>
1035
+ inline CUDA_CALLABLE void adj_bit_xor_inplace(
1036
+ vec_t<Length, Type>& v, int idx, Type value,
1037
+ vec_t<Length, Type>& adj_v, int adj_idx, Type& adj_value
1038
+ ) {}
1039
+
1040
+
1041
+ template<unsigned SliceLength, unsigned Length, typename Type>
1042
+ inline CUDA_CALLABLE void adj_bit_xor_inplace(
1043
+ const vec_t<Length, Type>& v, slice_t slice, const vec_t<SliceLength, Type> &a,
1044
+ vec_t<Length, Type>& adj_v, slice_t& adj_slice, vec_t<SliceLength, Type>& adj_a
1045
+ ) {}
1046
+
1047
+
750
1048
  template<unsigned Length, typename Type>
751
1049
  inline CUDA_CALLABLE void assign_inplace(vec_t<Length, Type>& v, int idx, Type value)
752
1050
  {
@@ -1303,21 +1601,6 @@ inline CUDA_CALLABLE void adj_add(vec_t<Length, Type> a, vec_t<Length, Type> b,
1303
1601
  adj_b += adj_ret;
1304
1602
  }
1305
1603
 
1306
- template<unsigned Length, typename Type>
1307
- inline CUDA_CALLABLE void adj_add(
1308
- Type a, vec_t<Length, Type> b,
1309
- Type& adj_a, vec_t<Length, Type>& adj_b,
1310
- const vec_t<Length, Type>& adj_ret
1311
- )
1312
- {
1313
- for (unsigned i = 0; i < Length; ++i)
1314
- {
1315
- adj_a += adj_ret.c[i];
1316
- }
1317
-
1318
- adj_b += adj_ret;
1319
- }
1320
-
1321
1604
  template<typename Type>
1322
1605
  inline CUDA_CALLABLE void adj_add(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1323
1606
  {
@@ -1345,21 +1628,6 @@ inline CUDA_CALLABLE void adj_sub(vec_t<Length, Type> a, vec_t<Length, Type> b,
1345
1628
  adj_b -= adj_ret;
1346
1629
  }
1347
1630
 
1348
- template<unsigned Length, typename Type>
1349
- inline CUDA_CALLABLE void adj_sub(
1350
- Type a, vec_t<Length, Type> b,
1351
- Type& adj_a, vec_t<Length, Type>& adj_b,
1352
- const vec_t<Length, Type>& adj_ret
1353
- )
1354
- {
1355
- for (unsigned i = 0; i < Length; ++i)
1356
- {
1357
- adj_a += adj_ret.c[i];
1358
- }
1359
-
1360
- adj_b -= adj_ret;
1361
- }
1362
-
1363
1631
  template<typename Type>
1364
1632
  inline CUDA_CALLABLE void adj_sub(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1365
1633
  {
@@ -1395,6 +1663,90 @@ inline CUDA_CALLABLE void adj_mod(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, T
1395
1663
  {
1396
1664
  }
1397
1665
 
1666
+ template<unsigned Length, typename Type>
1667
+ inline CUDA_CALLABLE void adj_bit_and(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
1668
+ {
1669
+ }
1670
+
1671
+ template<typename Type>
1672
+ inline CUDA_CALLABLE void adj_bit_and(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1673
+ {
1674
+ }
1675
+
1676
+ template<typename Type>
1677
+ inline CUDA_CALLABLE void adj_bit_and(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
1678
+ {
1679
+ }
1680
+
1681
+ template<unsigned Length, typename Type>
1682
+ inline CUDA_CALLABLE void adj_bit_or(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
1683
+ {
1684
+ }
1685
+
1686
+ template<typename Type>
1687
+ inline CUDA_CALLABLE void adj_bit_or(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1688
+ {
1689
+ }
1690
+
1691
+ template<typename Type>
1692
+ inline CUDA_CALLABLE void adj_bit_or(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
1693
+ {
1694
+ }
1695
+
1696
+ template<unsigned Length, typename Type>
1697
+ inline CUDA_CALLABLE void adj_bit_xor(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
1698
+ {
1699
+ }
1700
+
1701
+ template<typename Type>
1702
+ inline CUDA_CALLABLE void adj_bit_xor(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1703
+ {
1704
+ }
1705
+
1706
+ template<typename Type>
1707
+ inline CUDA_CALLABLE void adj_bit_xor(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
1708
+ {
1709
+ }
1710
+
1711
+ template<unsigned Length, typename Type>
1712
+ inline CUDA_CALLABLE void adj_lshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
1713
+ {
1714
+ }
1715
+
1716
+ template<typename Type>
1717
+ inline CUDA_CALLABLE void adj_lshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1718
+ {
1719
+ }
1720
+
1721
+ template<typename Type>
1722
+ inline CUDA_CALLABLE void adj_lshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
1723
+ {
1724
+ }
1725
+
1726
+ template<unsigned Length, typename Type>
1727
+ inline CUDA_CALLABLE void adj_rshift(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret)
1728
+ {
1729
+ }
1730
+
1731
+ template<typename Type>
1732
+ inline CUDA_CALLABLE void adj_rshift(vec_t<2, Type> a, vec_t<2, Type> b, vec_t<2, Type>& adj_a, vec_t<2, Type>& adj_b, const vec_t<2, Type>& adj_ret)
1733
+ {
1734
+ }
1735
+
1736
+ template<typename Type>
1737
+ inline CUDA_CALLABLE void adj_rshift(vec_t<3, Type> a, vec_t<3, Type> b, vec_t<3, Type>& adj_a, vec_t<3, Type>& adj_b, const vec_t<3, Type>& adj_ret)
1738
+ {
1739
+ }
1740
+
1741
+ template<unsigned Length, typename Type>
1742
+ inline CUDA_CALLABLE void adj_invert(
1743
+ const vec_t<Length,Type>& v,
1744
+ vec_t<Length,Type>& adj_v,
1745
+ const vec_t<Length,Type>& adj_ret
1746
+ )
1747
+ {
1748
+ }
1749
+
1398
1750
  template<unsigned Length, typename Type>
1399
1751
  inline CUDA_CALLABLE void adj_dot(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const Type adj_ret)
1400
1752
  {
@@ -1679,6 +2031,42 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
1679
2031
  return ret;
1680
2032
  }
1681
2033
 
2034
+ template<unsigned Length, typename Type>
2035
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_and(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
2036
+ {
2037
+ vec_t<Length, Type> ret;
2038
+ for( unsigned i=0; i < Length; ++i )
2039
+ {
2040
+ ret[i] = atomic_and(&(addr -> c[i]), value[i]);
2041
+ }
2042
+
2043
+ return ret;
2044
+ }
2045
+
2046
+ template<unsigned Length, typename Type>
2047
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_or(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
2048
+ {
2049
+ vec_t<Length, Type> ret;
2050
+ for( unsigned i=0; i < Length; ++i )
2051
+ {
2052
+ ret[i] = atomic_or(&(addr -> c[i]), value[i]);
2053
+ }
2054
+
2055
+ return ret;
2056
+ }
2057
+
2058
+ template<unsigned Length, typename Type>
2059
+ inline CUDA_CALLABLE vec_t<Length, Type> atomic_xor(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
2060
+ {
2061
+ vec_t<Length, Type> ret;
2062
+ for( unsigned i=0; i < Length; ++i )
2063
+ {
2064
+ ret[i] = atomic_xor(&(addr -> c[i]), value[i]);
2065
+ }
2066
+
2067
+ return ret;
2068
+ }
2069
+
1682
2070
  template<unsigned Length, typename Type>
1683
2071
  inline CUDA_CALLABLE void adj_atomic_minmax(
1684
2072
  vec_t<Length,Type> *addr,
@@ -1736,6 +2124,34 @@ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length,
1736
2124
  template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
1737
2125
  template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_add(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
1738
2126
 
2127
+ // for bitwise operations we do not accumulate gradients
2128
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
2129
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
2130
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
2131
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
2132
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
2133
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
2134
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
2135
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_and(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
2136
+
2137
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
2138
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
2139
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
2140
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
2141
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
2142
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
2143
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
2144
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_or(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
2145
+
2146
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int8>* buf, const vec_t<Length, int8> &value) { }
2147
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint8>* buf, const vec_t<Length, uint8> &value) { }
2148
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int16>* buf, const vec_t<Length, int16> &value) { }
2149
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint16>* buf, const vec_t<Length, uint16> &value) { }
2150
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int32>* buf, const vec_t<Length, int32> &value) { }
2151
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint32>* buf, const vec_t<Length, uint32> &value) { }
2152
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, int64>* buf, const vec_t<Length, int64> &value) { }
2153
+ template<unsigned Length> CUDA_CALLABLE inline void adj_atomic_xor(vec_t<Length, uint64>* buf, const vec_t<Length, uint64> &value) { }
2154
+
1739
2155
 
1740
2156
  // adjoints for some of the constructors, used in intersect.h
1741
2157
  inline CUDA_CALLABLE void adj_vec2(float x, float y, float& adj_x, float& adj_y, const vec2& adj_ret)
warp/native/version.h ADDED
@@ -0,0 +1,23 @@
1
+ /*
2
+ * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3
+ * SPDX-License-Identifier: Apache-2.0
4
+ *
5
+ * Licensed under the Apache License, Version 2.0 (the "License");
6
+ * you may not use this file except in compliance with the License.
7
+ * You may obtain a copy of the License at
8
+ *
9
+ * http://www.apache.org/licenses/LICENSE-2.0
10
+ *
11
+ * Unless required by applicable law or agreed to in writing, software
12
+ * distributed under the License is distributed on an "AS IS" BASIS,
13
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ * See the License for the specific language governing permissions and
15
+ * limitations under the License.
16
+ */
17
+
18
+ #ifndef WP_VERSION_H
19
+ #define WP_VERSION_H
20
+
21
+ #define WP_VERSION_STRING "1.10.0rc2"
22
+
23
+ #endif // WP_VERSION_H
warp/native/volume.cpp CHANGED
@@ -205,7 +205,7 @@ void wp_volume_get_buffer_info(uint64_t id, void** buf, uint64_t* size)
205
205
 
206
206
  void wp_volume_get_voxel_size(uint64_t id, float* dx, float* dy, float* dz)
207
207
  {
208
- *dx = *dx = *dz = 0.0f;
208
+ *dx = *dy = *dz = 0.0f;
209
209
 
210
210
  const VolumeDesc* volume;
211
211
  if (volume_get_descriptor(id, volume))
warp/native/volume.cu CHANGED
@@ -19,6 +19,7 @@
19
19
  #include "volume_impl.h"
20
20
  #include "warp.h"
21
21
 
22
+ extern CUcontext get_current_context();
22
23
 
23
24
  __global__ void volume_get_leaf_coords(const uint32_t leaf_count, pnanovdb_coord_t *leaf_coords,
24
25
  const pnanovdb_buf_t buf)
warp/native/volume.h CHANGED
@@ -161,7 +161,7 @@ CUDA_CALLABLE inline void pnano_read(T &result, pnanovdb_buf_t buf, PNANOVDB_INO
161
161
  pnano_read<T>(result, buf, address);
162
162
  }
163
163
 
164
- /// regular grid accessor (values stored in leafs)
164
+ /// regular grid accessor (values stored in leaves)
165
165
 
166
166
  struct value_accessor_base
167
167
  {
@@ -16,6 +16,8 @@
16
16
  */
17
17
 
18
18
  #include "volume_builder.h"
19
+ #include "cuda_util.h"
20
+ #include "warp.h"
19
21
 
20
22
  #include <nanovdb/tools/cuda/PointsToGrid.cuh>
21
23