warp-lang 1.9.1__py3-none-win_amd64.whl → 1.10.0rc2__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (346) hide show
  1. warp/__init__.py +301 -287
  2. warp/__init__.pyi +794 -305
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1075 -0
  5. warp/_src/build.py +618 -0
  6. warp/_src/build_dll.py +640 -0
  7. warp/{builtins.py → _src/builtins.py} +1382 -377
  8. warp/_src/codegen.py +4359 -0
  9. warp/{config.py → _src/config.py} +178 -169
  10. warp/_src/constants.py +57 -0
  11. warp/_src/context.py +8294 -0
  12. warp/_src/dlpack.py +462 -0
  13. warp/_src/fabric.py +355 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +508 -0
  16. warp/_src/fem/cache.py +687 -0
  17. warp/_src/fem/dirichlet.py +188 -0
  18. warp/{fem → _src/fem}/domain.py +40 -30
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +701 -0
  21. warp/{fem → _src/fem}/field/nodal_field.py +30 -15
  22. warp/{fem → _src/fem}/field/restriction.py +1 -1
  23. warp/{fem → _src/fem}/field/virtual.py +53 -27
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/{fem → _src/fem}/geometry/adaptive_nanogrid.py +77 -163
  26. warp/_src/fem/geometry/closest_point.py +97 -0
  27. warp/{fem → _src/fem}/geometry/deformed_geometry.py +14 -22
  28. warp/{fem → _src/fem}/geometry/element.py +32 -10
  29. warp/{fem → _src/fem}/geometry/geometry.py +48 -20
  30. warp/{fem → _src/fem}/geometry/grid_2d.py +12 -23
  31. warp/{fem → _src/fem}/geometry/grid_3d.py +12 -23
  32. warp/{fem → _src/fem}/geometry/hexmesh.py +40 -63
  33. warp/{fem → _src/fem}/geometry/nanogrid.py +255 -248
  34. warp/{fem → _src/fem}/geometry/partition.py +121 -63
  35. warp/{fem → _src/fem}/geometry/quadmesh.py +26 -45
  36. warp/{fem → _src/fem}/geometry/tetmesh.py +40 -63
  37. warp/{fem → _src/fem}/geometry/trimesh.py +26 -45
  38. warp/{fem → _src/fem}/integrate.py +164 -158
  39. warp/_src/fem/linalg.py +383 -0
  40. warp/_src/fem/operator.py +396 -0
  41. warp/_src/fem/polynomial.py +229 -0
  42. warp/{fem → _src/fem}/quadrature/pic_quadrature.py +15 -20
  43. warp/{fem → _src/fem}/quadrature/quadrature.py +95 -47
  44. warp/_src/fem/space/__init__.py +248 -0
  45. warp/{fem → _src/fem}/space/basis_function_space.py +20 -11
  46. warp/_src/fem/space/basis_space.py +679 -0
  47. warp/{fem → _src/fem}/space/dof_mapper.py +3 -3
  48. warp/{fem → _src/fem}/space/function_space.py +14 -13
  49. warp/{fem → _src/fem}/space/grid_2d_function_space.py +4 -7
  50. warp/{fem → _src/fem}/space/grid_3d_function_space.py +4 -4
  51. warp/{fem → _src/fem}/space/hexmesh_function_space.py +4 -10
  52. warp/{fem → _src/fem}/space/nanogrid_function_space.py +3 -9
  53. warp/{fem → _src/fem}/space/partition.py +117 -60
  54. warp/{fem → _src/fem}/space/quadmesh_function_space.py +4 -10
  55. warp/{fem → _src/fem}/space/restriction.py +66 -33
  56. warp/_src/fem/space/shape/__init__.py +152 -0
  57. warp/{fem → _src/fem}/space/shape/cube_shape_function.py +9 -9
  58. warp/{fem → _src/fem}/space/shape/shape_function.py +8 -9
  59. warp/{fem → _src/fem}/space/shape/square_shape_function.py +6 -6
  60. warp/{fem → _src/fem}/space/shape/tet_shape_function.py +3 -3
  61. warp/{fem → _src/fem}/space/shape/triangle_shape_function.py +3 -3
  62. warp/{fem → _src/fem}/space/tetmesh_function_space.py +3 -9
  63. warp/_src/fem/space/topology.py +459 -0
  64. warp/{fem → _src/fem}/space/trimesh_function_space.py +3 -9
  65. warp/_src/fem/types.py +112 -0
  66. warp/_src/fem/utils.py +486 -0
  67. warp/_src/jax.py +186 -0
  68. warp/_src/jax_experimental/__init__.py +14 -0
  69. warp/_src/jax_experimental/custom_call.py +387 -0
  70. warp/_src/jax_experimental/ffi.py +1284 -0
  71. warp/_src/jax_experimental/xla_ffi.py +656 -0
  72. warp/_src/marching_cubes.py +708 -0
  73. warp/_src/math.py +414 -0
  74. warp/_src/optim/__init__.py +14 -0
  75. warp/_src/optim/adam.py +163 -0
  76. warp/_src/optim/linear.py +1606 -0
  77. warp/_src/optim/sgd.py +112 -0
  78. warp/_src/paddle.py +406 -0
  79. warp/_src/render/__init__.py +14 -0
  80. warp/_src/render/imgui_manager.py +289 -0
  81. warp/_src/render/render_opengl.py +3636 -0
  82. warp/_src/render/render_usd.py +937 -0
  83. warp/_src/render/utils.py +160 -0
  84. warp/_src/sparse.py +2716 -0
  85. warp/_src/tape.py +1206 -0
  86. warp/{thirdparty → _src/thirdparty}/unittest_parallel.py +9 -2
  87. warp/_src/torch.py +391 -0
  88. warp/_src/types.py +5870 -0
  89. warp/_src/utils.py +1693 -0
  90. warp/autograd.py +12 -1054
  91. warp/bin/warp-clang.dll +0 -0
  92. warp/bin/warp.dll +0 -0
  93. warp/build.py +8 -588
  94. warp/build_dll.py +6 -721
  95. warp/codegen.py +6 -4251
  96. warp/constants.py +6 -39
  97. warp/context.py +12 -8062
  98. warp/dlpack.py +6 -444
  99. warp/examples/distributed/example_jacobi_mpi.py +4 -5
  100. warp/examples/fem/example_adaptive_grid.py +1 -1
  101. warp/examples/fem/example_apic_fluid.py +1 -1
  102. warp/examples/fem/example_burgers.py +8 -8
  103. warp/examples/fem/example_diffusion.py +1 -1
  104. warp/examples/fem/example_distortion_energy.py +1 -1
  105. warp/examples/fem/example_mixed_elasticity.py +2 -2
  106. warp/examples/fem/example_navier_stokes.py +1 -1
  107. warp/examples/fem/example_nonconforming_contact.py +7 -7
  108. warp/examples/fem/example_stokes.py +1 -1
  109. warp/examples/fem/example_stokes_transfer.py +1 -1
  110. warp/examples/fem/utils.py +2 -2
  111. warp/examples/interop/example_jax_callable.py +1 -1
  112. warp/examples/interop/example_jax_ffi_callback.py +1 -1
  113. warp/examples/interop/example_jax_kernel.py +1 -1
  114. warp/examples/tile/example_tile_mcgp.py +191 -0
  115. warp/fabric.py +6 -337
  116. warp/fem/__init__.py +159 -97
  117. warp/fem/adaptivity.py +7 -489
  118. warp/fem/cache.py +9 -648
  119. warp/fem/dirichlet.py +6 -184
  120. warp/fem/field/__init__.py +8 -109
  121. warp/fem/field/field.py +7 -652
  122. warp/fem/geometry/__init__.py +7 -18
  123. warp/fem/geometry/closest_point.py +11 -77
  124. warp/fem/linalg.py +18 -366
  125. warp/fem/operator.py +11 -369
  126. warp/fem/polynomial.py +9 -209
  127. warp/fem/space/__init__.py +5 -211
  128. warp/fem/space/basis_space.py +6 -662
  129. warp/fem/space/shape/__init__.py +41 -118
  130. warp/fem/space/topology.py +6 -437
  131. warp/fem/types.py +6 -81
  132. warp/fem/utils.py +11 -444
  133. warp/jax.py +8 -165
  134. warp/jax_experimental/__init__.py +14 -1
  135. warp/jax_experimental/custom_call.py +8 -365
  136. warp/jax_experimental/ffi.py +17 -873
  137. warp/jax_experimental/xla_ffi.py +5 -605
  138. warp/marching_cubes.py +5 -689
  139. warp/math.py +16 -393
  140. warp/native/array.h +385 -37
  141. warp/native/builtin.h +314 -37
  142. warp/native/bvh.cpp +43 -9
  143. warp/native/bvh.cu +62 -27
  144. warp/native/bvh.h +310 -309
  145. warp/native/clang/clang.cpp +102 -97
  146. warp/native/coloring.cpp +0 -1
  147. warp/native/crt.h +208 -0
  148. warp/native/exports.h +156 -0
  149. warp/native/hashgrid.cu +2 -0
  150. warp/native/intersect.h +24 -1
  151. warp/native/intersect_tri.h +44 -35
  152. warp/native/mat.h +1456 -276
  153. warp/native/mesh.cpp +4 -4
  154. warp/native/mesh.cu +4 -2
  155. warp/native/mesh.h +176 -61
  156. warp/native/quat.h +0 -52
  157. warp/native/scan.cu +2 -0
  158. warp/native/sparse.cu +7 -3
  159. warp/native/spatial.h +12 -0
  160. warp/native/tile.h +681 -89
  161. warp/native/tile_radix_sort.h +1 -1
  162. warp/native/tile_reduce.h +394 -46
  163. warp/native/tile_scan.h +4 -4
  164. warp/native/vec.h +469 -0
  165. warp/native/version.h +23 -0
  166. warp/native/volume.cpp +1 -1
  167. warp/native/volume.cu +1 -0
  168. warp/native/volume.h +1 -1
  169. warp/native/volume_builder.cu +2 -0
  170. warp/native/warp.cpp +57 -29
  171. warp/native/warp.cu +253 -171
  172. warp/native/warp.h +11 -8
  173. warp/optim/__init__.py +6 -3
  174. warp/optim/adam.py +6 -145
  175. warp/optim/linear.py +14 -1585
  176. warp/optim/sgd.py +6 -94
  177. warp/paddle.py +6 -388
  178. warp/render/__init__.py +8 -4
  179. warp/render/imgui_manager.py +7 -267
  180. warp/render/render_opengl.py +6 -3618
  181. warp/render/render_usd.py +6 -919
  182. warp/render/utils.py +6 -142
  183. warp/sparse.py +37 -2563
  184. warp/tape.py +6 -1188
  185. warp/tests/__main__.py +1 -1
  186. warp/tests/cuda/test_async.py +4 -4
  187. warp/tests/cuda/test_conditional_captures.py +1 -1
  188. warp/tests/cuda/test_multigpu.py +1 -1
  189. warp/tests/cuda/test_streams.py +58 -1
  190. warp/tests/geometry/test_bvh.py +157 -22
  191. warp/tests/geometry/test_marching_cubes.py +0 -1
  192. warp/tests/geometry/test_mesh.py +5 -3
  193. warp/tests/geometry/test_mesh_query_aabb.py +5 -12
  194. warp/tests/geometry/test_mesh_query_point.py +5 -2
  195. warp/tests/geometry/test_mesh_query_ray.py +15 -3
  196. warp/tests/geometry/test_volume_write.py +5 -5
  197. warp/tests/interop/test_dlpack.py +14 -14
  198. warp/tests/interop/test_jax.py +772 -49
  199. warp/tests/interop/test_paddle.py +1 -1
  200. warp/tests/test_adam.py +0 -1
  201. warp/tests/test_arithmetic.py +9 -9
  202. warp/tests/test_array.py +527 -100
  203. warp/tests/test_array_reduce.py +3 -3
  204. warp/tests/test_atomic.py +12 -8
  205. warp/tests/test_atomic_bitwise.py +209 -0
  206. warp/tests/test_atomic_cas.py +4 -4
  207. warp/tests/test_bool.py +2 -2
  208. warp/tests/test_builtins_resolution.py +5 -571
  209. warp/tests/test_codegen.py +33 -14
  210. warp/tests/test_conditional.py +1 -1
  211. warp/tests/test_context.py +6 -6
  212. warp/tests/test_copy.py +242 -161
  213. warp/tests/test_ctypes.py +3 -3
  214. warp/tests/test_devices.py +24 -2
  215. warp/tests/test_examples.py +16 -84
  216. warp/tests/test_fabricarray.py +35 -35
  217. warp/tests/test_fast_math.py +0 -2
  218. warp/tests/test_fem.py +56 -10
  219. warp/tests/test_fixedarray.py +3 -3
  220. warp/tests/test_func.py +8 -5
  221. warp/tests/test_generics.py +1 -1
  222. warp/tests/test_indexedarray.py +24 -24
  223. warp/tests/test_intersect.py +39 -9
  224. warp/tests/test_large.py +1 -1
  225. warp/tests/test_lerp.py +3 -1
  226. warp/tests/test_linear_solvers.py +1 -1
  227. warp/tests/test_map.py +35 -4
  228. warp/tests/test_mat.py +52 -62
  229. warp/tests/test_mat_constructors.py +4 -5
  230. warp/tests/test_mat_lite.py +1 -1
  231. warp/tests/test_mat_scalar_ops.py +121 -121
  232. warp/tests/test_math.py +34 -0
  233. warp/tests/test_module_aot.py +4 -4
  234. warp/tests/test_modules_lite.py +28 -2
  235. warp/tests/test_print.py +11 -11
  236. warp/tests/test_quat.py +93 -58
  237. warp/tests/test_runlength_encode.py +1 -1
  238. warp/tests/test_scalar_ops.py +38 -10
  239. warp/tests/test_smoothstep.py +1 -1
  240. warp/tests/test_sparse.py +126 -15
  241. warp/tests/test_spatial.py +105 -87
  242. warp/tests/test_special_values.py +6 -6
  243. warp/tests/test_static.py +7 -7
  244. warp/tests/test_struct.py +13 -2
  245. warp/tests/test_triangle_closest_point.py +48 -1
  246. warp/tests/test_types.py +27 -15
  247. warp/tests/test_utils.py +52 -52
  248. warp/tests/test_vec.py +29 -29
  249. warp/tests/test_vec_constructors.py +5 -5
  250. warp/tests/test_vec_scalar_ops.py +97 -97
  251. warp/tests/test_version.py +75 -0
  252. warp/tests/tile/test_tile.py +178 -0
  253. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  254. warp/tests/tile/test_tile_cholesky.py +7 -4
  255. warp/tests/tile/test_tile_load.py +26 -2
  256. warp/tests/tile/test_tile_mathdx.py +3 -3
  257. warp/tests/tile/test_tile_matmul.py +1 -1
  258. warp/tests/tile/test_tile_mlp.py +2 -4
  259. warp/tests/tile/test_tile_reduce.py +214 -13
  260. warp/tests/unittest_suites.py +6 -14
  261. warp/tests/unittest_utils.py +10 -9
  262. warp/tests/walkthrough_debug.py +3 -1
  263. warp/torch.py +6 -373
  264. warp/types.py +29 -5764
  265. warp/utils.py +10 -1659
  266. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/METADATA +46 -99
  267. warp_lang-1.10.0rc2.dist-info/RECORD +468 -0
  268. warp_lang-1.10.0rc2.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  269. warp_lang-1.10.0rc2.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  270. warp_lang-1.10.0rc2.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  271. warp_lang-1.10.0rc2.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  272. warp_lang-1.10.0rc2.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  273. warp_lang-1.10.0rc2.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  274. warp_lang-1.10.0rc2.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  275. warp_lang-1.10.0rc2.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  276. warp_lang-1.10.0rc2.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  277. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  278. warp_lang-1.10.0rc2.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  279. warp_lang-1.10.0rc2.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  280. warp_lang-1.10.0rc2.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  281. warp_lang-1.10.0rc2.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  282. warp_lang-1.10.0rc2.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  283. warp/examples/assets/cartpole.urdf +0 -110
  284. warp/examples/assets/crazyflie.usd +0 -0
  285. warp/examples/assets/nv_ant.xml +0 -92
  286. warp/examples/assets/nv_humanoid.xml +0 -183
  287. warp/examples/assets/quadruped.urdf +0 -268
  288. warp/examples/optim/example_bounce.py +0 -266
  289. warp/examples/optim/example_cloth_throw.py +0 -228
  290. warp/examples/optim/example_drone.py +0 -870
  291. warp/examples/optim/example_inverse_kinematics.py +0 -182
  292. warp/examples/optim/example_inverse_kinematics_torch.py +0 -191
  293. warp/examples/optim/example_softbody_properties.py +0 -400
  294. warp/examples/optim/example_spring_cage.py +0 -245
  295. warp/examples/optim/example_trajectory.py +0 -227
  296. warp/examples/sim/example_cartpole.py +0 -143
  297. warp/examples/sim/example_cloth.py +0 -225
  298. warp/examples/sim/example_cloth_self_contact.py +0 -316
  299. warp/examples/sim/example_granular.py +0 -130
  300. warp/examples/sim/example_granular_collision_sdf.py +0 -202
  301. warp/examples/sim/example_jacobian_ik.py +0 -244
  302. warp/examples/sim/example_particle_chain.py +0 -124
  303. warp/examples/sim/example_quadruped.py +0 -203
  304. warp/examples/sim/example_rigid_chain.py +0 -203
  305. warp/examples/sim/example_rigid_contact.py +0 -195
  306. warp/examples/sim/example_rigid_force.py +0 -133
  307. warp/examples/sim/example_rigid_gyroscopic.py +0 -115
  308. warp/examples/sim/example_rigid_soft_contact.py +0 -140
  309. warp/examples/sim/example_soft_body.py +0 -196
  310. warp/examples/tile/example_tile_walker.py +0 -327
  311. warp/sim/__init__.py +0 -74
  312. warp/sim/articulation.py +0 -793
  313. warp/sim/collide.py +0 -2570
  314. warp/sim/graph_coloring.py +0 -307
  315. warp/sim/import_mjcf.py +0 -791
  316. warp/sim/import_snu.py +0 -227
  317. warp/sim/import_urdf.py +0 -579
  318. warp/sim/import_usd.py +0 -898
  319. warp/sim/inertia.py +0 -357
  320. warp/sim/integrator.py +0 -245
  321. warp/sim/integrator_euler.py +0 -2000
  322. warp/sim/integrator_featherstone.py +0 -2101
  323. warp/sim/integrator_vbd.py +0 -2487
  324. warp/sim/integrator_xpbd.py +0 -3295
  325. warp/sim/model.py +0 -4821
  326. warp/sim/particles.py +0 -121
  327. warp/sim/render.py +0 -431
  328. warp/sim/utils.py +0 -431
  329. warp/tests/sim/disabled_kinematics.py +0 -244
  330. warp/tests/sim/test_cloth.py +0 -863
  331. warp/tests/sim/test_collision.py +0 -743
  332. warp/tests/sim/test_coloring.py +0 -347
  333. warp/tests/sim/test_inertia.py +0 -161
  334. warp/tests/sim/test_model.py +0 -226
  335. warp/tests/sim/test_sim_grad.py +0 -287
  336. warp/tests/sim/test_sim_grad_bounce_linear.py +0 -212
  337. warp/tests/sim/test_sim_kinematics.py +0 -98
  338. warp/thirdparty/__init__.py +0 -0
  339. warp_lang-1.9.1.dist-info/RECORD +0 -456
  340. /warp/{fem → _src/fem}/quadrature/__init__.py +0 -0
  341. /warp/{tests/sim → _src/thirdparty}/__init__.py +0 -0
  342. /warp/{thirdparty → _src/thirdparty}/appdirs.py +0 -0
  343. /warp/{thirdparty → _src/thirdparty}/dlpack.py +0 -0
  344. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/WHEEL +0 -0
  345. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/licenses/LICENSE.md +0 -0
  346. {warp_lang-1.9.1.dist-info → warp_lang-1.10.0rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,403 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import unittest
17
+
18
+ import numpy as np
19
+
20
+ import warp as wp
21
+ from warp.tests.unittest_utils import *
22
+
23
+
24
+ @wp.kernel
25
+ def test_tile_atomic_bitwise_scalar_kernel(
26
+ a: wp.array(dtype=wp.uint32), b: wp.array(dtype=wp.uint32), c: wp.array(dtype=wp.uint32), op_type: int
27
+ ):
28
+ word_idx, bit_idx = wp.tid()
29
+ block_dim = wp.block_dim()
30
+ assert block_dim == 32
31
+ s = wp.tile_zeros(shape=1, dtype=wp.uint32)
32
+ # write to tile first, then write only once to the array
33
+ s[0] = a[word_idx]
34
+ if op_type < 3:
35
+ bit_mask = wp.uint32(1) << wp.uint32(bit_idx)
36
+ if op_type == 0:
37
+ s[0] &= (b[word_idx] & bit_mask) | ~bit_mask
38
+ elif op_type == 1:
39
+ s[0] |= b[word_idx] & bit_mask
40
+ elif op_type == 2:
41
+ s[0] ^= b[word_idx] & bit_mask
42
+ else:
43
+ # inter-tile operations
44
+ s_bit_mask = wp.tile_zeros(shape=32, dtype=wp.uint32)
45
+ s_bit_mask[(bit_idx + 1) % 32] = wp.uint32(1) << wp.uint32((bit_idx + 1) % 32)
46
+ if op_type == 3:
47
+ s[0] &= (b[word_idx] & s_bit_mask[bit_idx]) | ~s_bit_mask[bit_idx]
48
+ elif op_type == 4:
49
+ s[0] |= b[word_idx] & s_bit_mask[bit_idx]
50
+ elif op_type == 5:
51
+ s[0] ^= b[word_idx] & s_bit_mask[bit_idx]
52
+ c[word_idx] = s[0]
53
+
54
+
55
+ @wp.kernel
56
+ def test_tile_atomic_bitwise_scalar_tilewise_kernel(
57
+ a: wp.array(dtype=wp.uint32), b: wp.array(dtype=wp.uint32), c: wp.array(dtype=wp.uint32), op_type: int
58
+ ):
59
+ batch_idx, _ = wp.tid()
60
+ block_dim = wp.block_dim()
61
+ assert block_dim == 32
62
+ # Each tile is responsible for a batch of 32 elements
63
+ s1 = wp.tile_load(a, shape=32, offset=batch_idx * 32)
64
+ s2 = wp.tile_load(b, shape=32, offset=batch_idx * 32)
65
+ # inter-tile operations (batch-wise)
66
+ if op_type < 9:
67
+ if op_type == 6:
68
+ s1 &= s2
69
+ elif op_type == 7:
70
+ s1 |= s2
71
+ elif op_type == 8:
72
+ s1 ^= s2
73
+ wp.tile_store(c, s1, offset=batch_idx * 32)
74
+ else:
75
+ if op_type == 9:
76
+ s3 = s1 & s2
77
+ elif op_type == 10:
78
+ s3 = s1 | s2
79
+ elif op_type == 11:
80
+ s3 = s1 ^ s2
81
+ wp.tile_store(c, s3, offset=batch_idx * 32)
82
+
83
+
84
+ def test_tile_atomic_bitwise_scalar(test, device):
85
+ n = 1024
86
+ rng = np.random.default_rng(42)
87
+
88
+ a = rng.integers(0, np.iinfo(np.uint32).max, size=n, dtype=np.uint32)
89
+ b = rng.integers(0, np.iinfo(np.uint32).max, size=n, dtype=np.uint32)
90
+
91
+ expected_and = a & b
92
+ expected_or = a | b
93
+ expected_xor = a ^ b
94
+
95
+ with wp.ScopedDevice(device):
96
+ a_wp = wp.array(a, dtype=wp.uint32, device=device)
97
+ b_wp = wp.array(b, dtype=wp.uint32, device=device)
98
+ c_wp = wp.zeros(shape=n, dtype=wp.uint32, device=device)
99
+
100
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 0], block_dim=32)
101
+ assert_np_equal(c_wp.numpy(), expected_and)
102
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 1], block_dim=32)
103
+ assert_np_equal(c_wp.numpy(), expected_or)
104
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 2], block_dim=32)
105
+ assert_np_equal(c_wp.numpy(), expected_xor)
106
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 3], block_dim=32)
107
+ assert_np_equal(c_wp.numpy(), expected_and)
108
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 4], block_dim=32)
109
+ assert_np_equal(c_wp.numpy(), expected_or)
110
+ wp.launch_tiled(test_tile_atomic_bitwise_scalar_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 5], block_dim=32)
111
+ assert_np_equal(c_wp.numpy(), expected_xor)
112
+
113
+ wp.launch_tiled(
114
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 6], block_dim=32
115
+ )
116
+ assert_np_equal(c_wp.numpy(), expected_and)
117
+ wp.launch_tiled(
118
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 7], block_dim=32
119
+ )
120
+ assert_np_equal(c_wp.numpy(), expected_or)
121
+ wp.launch_tiled(
122
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 8], block_dim=32
123
+ )
124
+ assert_np_equal(c_wp.numpy(), expected_xor)
125
+ wp.launch_tiled(
126
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 9], block_dim=32
127
+ )
128
+ assert_np_equal(c_wp.numpy(), expected_and)
129
+ wp.launch_tiled(
130
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 10], block_dim=32
131
+ )
132
+ assert_np_equal(c_wp.numpy(), expected_or)
133
+ wp.launch_tiled(
134
+ test_tile_atomic_bitwise_scalar_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 11], block_dim=32
135
+ )
136
+ assert_np_equal(c_wp.numpy(), expected_xor)
137
+
138
+
139
+ @wp.kernel
140
+ def test_tile_atomic_bitwise_vector_kernel(
141
+ a: wp.array(dtype=wp.vec3ui), b: wp.array(dtype=wp.vec3ui), c: wp.array(dtype=wp.vec3ui), op_type: int
142
+ ):
143
+ word_idx, bit_idx = wp.tid()
144
+ block_dim = wp.block_dim()
145
+ assert block_dim == 32
146
+ s = wp.tile_zeros(shape=1, dtype=wp.vec3ui)
147
+ # write to tile first, then write only once to the array
148
+ s[0] = a[word_idx]
149
+ if op_type < 3:
150
+ bit_mask = wp.vec3ui(wp.uint32(1)) << wp.vec3ui(wp.uint32(bit_idx))
151
+ if op_type == 0:
152
+ s[0] &= (b[word_idx] & bit_mask) | ~bit_mask
153
+ elif op_type == 1:
154
+ s[0] |= b[word_idx] & bit_mask
155
+ elif op_type == 2:
156
+ s[0] ^= b[word_idx] & bit_mask
157
+ else:
158
+ # inter-tile operations
159
+ s_bit_mask = wp.tile_zeros(shape=32, dtype=wp.vec3ui)
160
+ s_bit_mask[(bit_idx + 1) % 32] = wp.vec3ui(wp.uint32(1) << wp.uint32((bit_idx + 1) % 32))
161
+ if op_type == 3:
162
+ s[0] &= (b[word_idx] & s_bit_mask[bit_idx]) | ~s_bit_mask[bit_idx]
163
+ elif op_type == 4:
164
+ s[0] |= b[word_idx] & s_bit_mask[bit_idx]
165
+ elif op_type == 5:
166
+ s[0] ^= b[word_idx] & s_bit_mask[bit_idx]
167
+ c[word_idx] = s[0]
168
+
169
+
170
+ @wp.kernel
171
+ def test_tile_atomic_bitwise_vector_tilewise_kernel(
172
+ a: wp.array(dtype=wp.vec3ui), b: wp.array(dtype=wp.vec3ui), c: wp.array(dtype=wp.vec3ui), op_type: int
173
+ ):
174
+ batch_idx, _ = wp.tid()
175
+ block_dim = wp.block_dim()
176
+ assert block_dim == 32
177
+ # Each tile is responsible for a batch of 32 elements
178
+ s1 = wp.tile_load(a, shape=32, offset=batch_idx * 32)
179
+ s2 = wp.tile_load(b, shape=32, offset=batch_idx * 32)
180
+ # inter-tile operations (batch-wise)
181
+ if op_type < 9:
182
+ if op_type == 6:
183
+ s1 &= s2
184
+ elif op_type == 7:
185
+ s1 |= s2
186
+ elif op_type == 8:
187
+ s1 ^= s2
188
+ wp.tile_store(c, s1, offset=batch_idx * 32)
189
+ else:
190
+ if op_type == 9:
191
+ s3 = s1 & s2
192
+ elif op_type == 10:
193
+ s3 = s1 | s2
194
+ elif op_type == 11:
195
+ s3 = s1 ^ s2
196
+ wp.tile_store(c, s3, offset=batch_idx * 32)
197
+
198
+
199
+ def test_tile_atomic_bitwise_vector(test, device):
200
+ n = 1024
201
+ rng = np.random.default_rng(42)
202
+
203
+ a = rng.integers(0, np.iinfo(np.uint32).max, size=(n, 3), dtype=np.uint32)
204
+ b = rng.integers(0, np.iinfo(np.uint32).max, size=(n, 3), dtype=np.uint32)
205
+
206
+ expected_and = a & b
207
+ expected_or = a | b
208
+ expected_xor = a ^ b
209
+
210
+ with wp.ScopedDevice(device):
211
+ a_wp = wp.array(a, dtype=wp.vec3ui, device=device)
212
+ b_wp = wp.array(b, dtype=wp.vec3ui, device=device)
213
+ c_wp = wp.zeros(shape=n, dtype=wp.vec3ui, device=device)
214
+
215
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 0], block_dim=32)
216
+ assert_np_equal(c_wp.numpy(), expected_and)
217
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 1], block_dim=32)
218
+ assert_np_equal(c_wp.numpy(), expected_or)
219
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 2], block_dim=32)
220
+ assert_np_equal(c_wp.numpy(), expected_xor)
221
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 3], block_dim=32)
222
+ assert_np_equal(c_wp.numpy(), expected_and)
223
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 4], block_dim=32)
224
+ assert_np_equal(c_wp.numpy(), expected_or)
225
+ wp.launch_tiled(test_tile_atomic_bitwise_vector_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 5], block_dim=32)
226
+ assert_np_equal(c_wp.numpy(), expected_xor)
227
+
228
+ wp.launch_tiled(
229
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 6], block_dim=32
230
+ )
231
+ assert_np_equal(c_wp.numpy(), expected_and)
232
+ wp.launch_tiled(
233
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 7], block_dim=32
234
+ )
235
+ assert_np_equal(c_wp.numpy(), expected_or)
236
+ wp.launch_tiled(
237
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 8], block_dim=32
238
+ )
239
+ assert_np_equal(c_wp.numpy(), expected_xor)
240
+ wp.launch_tiled(
241
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 9], block_dim=32
242
+ )
243
+ assert_np_equal(c_wp.numpy(), expected_and)
244
+ wp.launch_tiled(
245
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 10], block_dim=32
246
+ )
247
+ assert_np_equal(c_wp.numpy(), expected_or)
248
+ wp.launch_tiled(
249
+ test_tile_atomic_bitwise_vector_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 11], block_dim=32
250
+ )
251
+ assert_np_equal(c_wp.numpy(), expected_xor)
252
+
253
+
254
+ mat33ui = wp._src.types.matrix(shape=(3, 3), dtype=wp.uint32)
255
+
256
+
257
+ @wp.kernel
258
+ def test_tile_atomic_bitwise_matrix_kernel(
259
+ a: wp.array(dtype=mat33ui), b: wp.array(dtype=mat33ui), c: wp.array(dtype=mat33ui), op_type: int
260
+ ):
261
+ word_idx, bit_idx = wp.tid()
262
+ block_dim = wp.block_dim()
263
+ assert block_dim == 32
264
+ s = wp.tile_zeros(shape=1, dtype=mat33ui)
265
+ # write to tile first, then write only once to the array
266
+ s[0] = a[word_idx]
267
+ if op_type < 3:
268
+ bit_mask = mat33ui(wp.uint32(1)) << mat33ui(wp.uint32(bit_idx))
269
+ if op_type == 0:
270
+ s[0] &= (b[word_idx] & bit_mask) | ~bit_mask
271
+ elif op_type == 1:
272
+ s[0] |= b[word_idx] & bit_mask
273
+ elif op_type == 2:
274
+ s[0] ^= b[word_idx] & bit_mask
275
+ else:
276
+ # inter-tile operations
277
+ s_bit_mask = wp.tile_zeros(shape=32, dtype=mat33ui)
278
+ s_bit_mask[(bit_idx + 1) % 32] = mat33ui(wp.uint32(1) << wp.uint32((bit_idx + 1) % 32))
279
+ if op_type == 3:
280
+ s[0] &= (b[word_idx] & s_bit_mask[bit_idx]) | ~s_bit_mask[bit_idx]
281
+ elif op_type == 4:
282
+ s[0] |= b[word_idx] & s_bit_mask[bit_idx]
283
+ elif op_type == 5:
284
+ s[0] ^= b[word_idx] & s_bit_mask[bit_idx]
285
+ c[word_idx] = s[0]
286
+
287
+
288
+ @wp.kernel
289
+ def test_tile_atomic_bitwise_matrix_tilewise_kernel(
290
+ a: wp.array(dtype=mat33ui), b: wp.array(dtype=mat33ui), c: wp.array(dtype=mat33ui), op_type: int
291
+ ):
292
+ batch_idx, _ = wp.tid()
293
+ block_dim = wp.block_dim()
294
+ assert block_dim == 32
295
+ # Each tile is responsible for a batch of 32 elements
296
+ s1 = wp.tile_load(a, shape=32, offset=batch_idx * 32)
297
+ s2 = wp.tile_load(b, shape=32, offset=batch_idx * 32)
298
+ # inter-tile operations (batch-wise)
299
+ if op_type < 9:
300
+ if op_type == 6:
301
+ s1 &= s2
302
+ elif op_type == 7:
303
+ s1 |= s2
304
+ elif op_type == 8:
305
+ s1 ^= s2
306
+ wp.tile_store(c, s1, offset=batch_idx * 32)
307
+ else:
308
+ if op_type == 9:
309
+ s3 = s1 & s2
310
+ elif op_type == 10:
311
+ s3 = s1 | s2
312
+ elif op_type == 11:
313
+ s3 = s1 ^ s2
314
+ wp.tile_store(c, s3, offset=batch_idx * 32)
315
+
316
+
317
+ def test_tile_atomic_bitwise_matrix(test, device):
318
+ n = 1024
319
+ rng = np.random.default_rng(42)
320
+
321
+ a = rng.integers(0, np.iinfo(np.uint32).max, size=(n, 3, 3), dtype=np.uint32)
322
+ b = rng.integers(0, np.iinfo(np.uint32).max, size=(n, 3, 3), dtype=np.uint32)
323
+
324
+ expected_and = a & b
325
+ expected_or = a | b
326
+ expected_xor = a ^ b
327
+
328
+ with wp.ScopedDevice(device):
329
+ a_wp = wp.array(a, dtype=mat33ui, device=device)
330
+ b_wp = wp.array(b, dtype=mat33ui, device=device)
331
+ c_wp = wp.zeros(shape=n, dtype=mat33ui, device=device)
332
+
333
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 0], block_dim=32)
334
+ assert_np_equal(c_wp.numpy(), expected_and)
335
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 1], block_dim=32)
336
+ assert_np_equal(c_wp.numpy(), expected_or)
337
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 2], block_dim=32)
338
+ assert_np_equal(c_wp.numpy(), expected_xor)
339
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 3], block_dim=32)
340
+ assert_np_equal(c_wp.numpy(), expected_and)
341
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 4], block_dim=32)
342
+ assert_np_equal(c_wp.numpy(), expected_or)
343
+ wp.launch_tiled(test_tile_atomic_bitwise_matrix_kernel, dim=n, inputs=[a_wp, b_wp, c_wp, 5], block_dim=32)
344
+ assert_np_equal(c_wp.numpy(), expected_xor)
345
+
346
+ wp.launch_tiled(
347
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 6], block_dim=32
348
+ )
349
+ assert_np_equal(c_wp.numpy(), expected_and)
350
+ wp.launch_tiled(
351
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 7], block_dim=32
352
+ )
353
+ assert_np_equal(c_wp.numpy(), expected_or)
354
+ wp.launch_tiled(
355
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 8], block_dim=32
356
+ )
357
+ assert_np_equal(c_wp.numpy(), expected_xor)
358
+ wp.launch_tiled(
359
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 9], block_dim=32
360
+ )
361
+ assert_np_equal(c_wp.numpy(), expected_and)
362
+ wp.launch_tiled(
363
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 10], block_dim=32
364
+ )
365
+ assert_np_equal(c_wp.numpy(), expected_or)
366
+ wp.launch_tiled(
367
+ test_tile_atomic_bitwise_matrix_tilewise_kernel, dim=n // 32, inputs=[a_wp, b_wp, c_wp, 11], block_dim=32
368
+ )
369
+ assert_np_equal(c_wp.numpy(), expected_xor)
370
+
371
+
372
+ devices = get_cuda_test_devices()
373
+
374
+
375
+ class TestTileAtomicBitwise(unittest.TestCase):
376
+ pass
377
+
378
+
379
+ add_function_test(
380
+ TestTileAtomicBitwise,
381
+ "test_tile_atomic_bitwise_scalar",
382
+ test_tile_atomic_bitwise_scalar,
383
+ devices=devices,
384
+ )
385
+
386
+ add_function_test(
387
+ TestTileAtomicBitwise,
388
+ "test_tile_atomic_bitwise_vector",
389
+ test_tile_atomic_bitwise_vector,
390
+ devices=devices,
391
+ )
392
+
393
+ add_function_test(
394
+ TestTileAtomicBitwise,
395
+ "test_tile_atomic_bitwise_matrix",
396
+ test_tile_atomic_bitwise_matrix,
397
+ devices=devices,
398
+ )
399
+
400
+
401
+ if __name__ == "__main__":
402
+ wp.clear_kernel_cache()
403
+ unittest.main(verbosity=2)
@@ -20,7 +20,7 @@ import numpy as np
20
20
  import warp as wp
21
21
  from warp.tests.unittest_utils import *
22
22
 
23
- wp.init() # For wp.context.runtime.core.wp_is_mathdx_enabled()
23
+ wp.init() # For wp._src.context.runtime.core.wp_is_mathdx_enabled()
24
24
 
25
25
  TILE_M = wp.constant(8)
26
26
  TILE_N = wp.constant(4)
@@ -490,7 +490,7 @@ def test_tile_upper_solve(L: wp.array2d(dtype=float), y: wp.array(dtype=float),
490
490
 
491
491
 
492
492
  def test_tile_cholesky_singular_matrices(test, device):
493
- if not wp.context.runtime.core.wp_is_mathdx_enabled():
493
+ if not wp._src.context.runtime.core.wp_is_mathdx_enabled():
494
494
  test.skipTest("MathDx is not enabled")
495
495
 
496
496
  rng = np.random.default_rng(42)
@@ -527,8 +527,11 @@ cuda_devices = get_cuda_test_devices()
527
527
 
528
528
 
529
529
  @unittest.skipUnless(
530
- not wp.context.runtime.core.wp_is_mathdx_enabled()
531
- or (wp.context.runtime.core.wp_is_mathdx_enabled() and wp.context.runtime.core.wp_cuda_toolkit_version() >= 12060),
530
+ not wp._src.context.runtime.core.wp_is_mathdx_enabled()
531
+ or (
532
+ wp._src.context.runtime.core.wp_is_mathdx_enabled()
533
+ and wp._src.context.runtime.core.wp_cuda_toolkit_version() >= 12060
534
+ ),
532
535
  "MathDx is not enabled or is enabled but CUDA toolkit version is less than 12.6",
533
536
  )
534
537
  class TestTileCholesky(unittest.TestCase):
@@ -40,6 +40,7 @@ def tile_load_1d_kernel(
40
40
  input: wp.array1d(dtype=float),
41
41
  out_full: wp.array1d(dtype=float),
42
42
  out_padded: wp.array1d(dtype=float),
43
+ out_sliced: wp.array1d(dtype=float),
43
44
  out_offset: wp.array1d(dtype=float),
44
45
  ):
45
46
  full0 = wp.tile_load(input, TILE_M)
@@ -50,8 +51,13 @@ def tile_load_1d_kernel(
50
51
  padded1 = wp.tile_load(input, shape=TILE_M, offset=TILE_OFFSET)
51
52
  padded2 = wp.tile_load(input, shape=(TILE_M,), offset=(TILE_OFFSET,))
52
53
 
54
+ sliced0 = wp.tile_load(input[::2], TILE_M)
55
+ sliced1 = wp.tile_load(input[::2], shape=TILE_M)
56
+ sliced2 = wp.tile_load(input[::2], shape=(TILE_M,))
57
+
53
58
  wp.tile_store(out_full, full0)
54
59
  wp.tile_store(out_padded, padded0)
60
+ wp.tile_store(out_sliced, sliced0)
55
61
  wp.tile_store(out_offset, full0, offset=(TILE_OFFSET,))
56
62
 
57
63
 
@@ -60,13 +66,16 @@ def tile_load_2d_kernel(
60
66
  input: wp.array2d(dtype=float),
61
67
  out_full: wp.array2d(dtype=float),
62
68
  out_padded: wp.array2d(dtype=float),
69
+ out_sliced: wp.array2d(dtype=float),
63
70
  out_offset: wp.array2d(dtype=float),
64
71
  ):
65
72
  full0 = wp.tile_load(input, shape=(TILE_M, TILE_N))
66
73
  padded0 = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(TILE_OFFSET, TILE_OFFSET))
74
+ sliced0 = wp.tile_load(input[::2, ::2], shape=(TILE_M, TILE_N))
67
75
 
68
76
  wp.tile_store(out_full, full0)
69
77
  wp.tile_store(out_padded, padded0)
78
+ wp.tile_store(out_sliced, sliced0)
70
79
  wp.tile_store(out_offset, full0, offset=(TILE_OFFSET, TILE_OFFSET))
71
80
 
72
81
 
@@ -75,13 +84,16 @@ def tile_load_3d_kernel(
75
84
  input: wp.array3d(dtype=float),
76
85
  out_full: wp.array3d(dtype=float),
77
86
  out_padded: wp.array3d(dtype=float),
87
+ out_sliced: wp.array3d(dtype=float),
78
88
  out_offset: wp.array3d(dtype=float),
79
89
  ):
80
90
  full0 = wp.tile_load(input, shape=(TILE_M, TILE_N, TILE_O))
81
91
  padded0 = wp.tile_load(input, shape=(TILE_M, TILE_N, TILE_O), offset=(TILE_OFFSET, TILE_OFFSET, TILE_OFFSET))
92
+ sliced0 = wp.tile_load(input[::2, ::2, ::2], shape=(TILE_M, TILE_N, TILE_O))
82
93
 
83
94
  wp.tile_store(out_full, full0)
84
95
  wp.tile_store(out_padded, padded0)
96
+ wp.tile_store(out_sliced, sliced0)
85
97
  wp.tile_store(out_offset, full0, offset=(TILE_OFFSET, TILE_OFFSET, TILE_OFFSET))
86
98
 
87
99
 
@@ -90,15 +102,18 @@ def tile_load_4d_kernel(
90
102
  input: wp.array4d(dtype=float),
91
103
  out_full: wp.array4d(dtype=float),
92
104
  out_padded: wp.array4d(dtype=float),
105
+ out_sliced: wp.array4d(dtype=float),
93
106
  out_offset: wp.array4d(dtype=float),
94
107
  ):
95
108
  full0 = wp.tile_load(input, shape=(TILE_M, TILE_N, TILE_O, TILE_P))
96
109
  padded0 = wp.tile_load(
97
110
  input, shape=(TILE_M, TILE_N, TILE_O, TILE_P), offset=(TILE_OFFSET, TILE_OFFSET, TILE_OFFSET, TILE_OFFSET)
98
111
  )
112
+ sliced0 = wp.tile_load(input[::2, ::2, ::2, ::2], shape=(TILE_M, TILE_N, TILE_O, TILE_P))
99
113
 
100
114
  wp.tile_store(out_full, full0)
101
115
  wp.tile_store(out_padded, padded0)
116
+ wp.tile_store(out_sliced, sliced0)
102
117
  wp.tile_store(out_offset, full0, offset=(TILE_OFFSET, TILE_OFFSET, TILE_OFFSET, TILE_OFFSET))
103
118
 
104
119
 
@@ -112,13 +127,14 @@ def test_tile_load(kernel, ndim):
112
127
  input = wp.array(rng.random(shape), dtype=float, requires_grad=True, device=device)
113
128
  output_full = wp.zeros(shape, dtype=float, device=device)
114
129
  output_padded = wp.zeros(shape, dtype=float, device=device)
130
+ output_sliced = wp.zeros(shape, dtype=float, device=device)
115
131
  output_offset = wp.zeros(shape, dtype=float, device=device)
116
132
 
117
133
  with wp.Tape() as tape:
118
134
  wp.launch_tiled(
119
135
  kernel,
120
136
  dim=[1],
121
- inputs=[input, output_full, output_padded, output_offset],
137
+ inputs=[input, output_full, output_padded, output_sliced, output_offset],
122
138
  block_dim=TILE_DIM,
123
139
  device=device,
124
140
  )
@@ -134,8 +150,16 @@ def test_tile_load(kernel, ndim):
134
150
  ref_offset = np.zeros_like(ref_full)
135
151
  ref_offset[src_slice] = ref_full[dest_slice]
136
152
 
153
+ # construct a slice for the source/dest sliced arrays
154
+ src_slice = tuple(slice(0, dim, 2) for dim in shape)
155
+ dest_slice = tuple(slice(0, (dim + 1) // 2) for dim in shape)
156
+
157
+ ref_sliced = np.zeros_like(ref_full)
158
+ ref_sliced[dest_slice] = ref_full[src_slice]
159
+
137
160
  assert_np_equal(output_full.numpy(), ref_full)
138
161
  assert_np_equal(output_padded.numpy(), ref_padded)
162
+ assert_np_equal(output_sliced.numpy(), ref_sliced)
139
163
  assert_np_equal(output_offset.numpy(), ref_offset)
140
164
 
141
165
  output_full.grad = wp.ones_like(output_full)
@@ -570,7 +594,7 @@ def test_tile_assign(kernel, ndim):
570
594
  input = wp.array(rng.random(shape), dtype=float, requires_grad=True, device=device)
571
595
  output = wp.zeros_like(input)
572
596
 
573
- with wp.Tape() as tape:
597
+ with wp.Tape():
574
598
  wp.launch(
575
599
  kernel,
576
600
  dim=shape,
@@ -21,7 +21,7 @@ import numpy as np
21
21
  import warp as wp
22
22
  from warp.tests.unittest_utils import *
23
23
 
24
- wp.init() # For wp.context.runtime.core.wp_is_mathdx_enabled()
24
+ wp.init() # For wp._src.context.runtime.core.wp_is_mathdx_enabled()
25
25
 
26
26
  TILE_M = wp.constant(8)
27
27
  TILE_N = wp.constant(4)
@@ -92,7 +92,7 @@ def tile_math_fft_kernel_vec2d(gx: wp.array2d(dtype=wp.vec2d), gy: wp.array2d(dt
92
92
  wp.tile_store(gy, xy)
93
93
 
94
94
 
95
- @unittest.skipUnless(wp.context.runtime.core.wp_is_mathdx_enabled(), "Warp was not built with MathDx support")
95
+ @unittest.skipUnless(wp._src.context.runtime.core.wp_is_mathdx_enabled(), "Warp was not built with MathDx support")
96
96
  def test_tile_math_fft(test, device, wp_dtype):
97
97
  np_real_dtype = {wp.vec2f: np.float32, wp.vec2d: np.float64}[wp_dtype]
98
98
  np_cplx_dtype = {wp.vec2f: np.complex64, wp.vec2d: np.complex128}[wp_dtype]
@@ -113,7 +113,7 @@ def test_tile_math_fft(test, device, wp_dtype):
113
113
  X_c64 = X.view(np_cplx_dtype).reshape(fft_size, fft_size)
114
114
  Y_c64 = np.fft.fft(X_c64, axis=-1)
115
115
 
116
- with wp.Tape() as tape:
116
+ with wp.Tape():
117
117
  wp.launch_tiled(kernel, dim=[1, 1], inputs=[X_wp, Y_wp], block_dim=TILE_DIM, device=device)
118
118
 
119
119
  Y_wp_c64 = Y_wp.numpy().view(np_cplx_dtype).reshape(fft_size, fft_size)
@@ -60,7 +60,7 @@ def test_tile_grouped_gemm(test, device):
60
60
  B_wp = wp.array(B, requires_grad=True, device=device)
61
61
  C_wp = wp.zeros((batch_count, TILE_M, TILE_N), requires_grad=True, device=device)
62
62
 
63
- with wp.Tape() as tape:
63
+ with wp.Tape():
64
64
  wp.launch_tiled(
65
65
  tile_grouped_gemm, dim=[batch_count], inputs=[A_wp, B_wp, C_wp], block_dim=TILE_DIM, device=device
66
66
  )
@@ -43,7 +43,7 @@ def create_array(rng, dim_in, dim_hid, dtype=float):
43
43
  def test_multi_layer_nn(test, device):
44
44
  import torch as tc
45
45
 
46
- if device.is_cuda and not wp.context.runtime.core.wp_is_mathdx_enabled():
46
+ if device.is_cuda and not wp._src.context.runtime.core.wp_is_mathdx_enabled():
47
47
  test.skipTest("Skipping test on CUDA device without MathDx (tolerance)")
48
48
 
49
49
  NUM_FREQ = wp.constant(8)
@@ -63,7 +63,7 @@ def test_multi_layer_nn(test, device):
63
63
  NUM_THREADS = 32
64
64
 
65
65
  dtype = wp.float16
66
- npdtype = wp.types.warp_type_to_np_dtype[dtype]
66
+ npdtype = wp._src.types.warp_type_to_np_dtype[dtype]
67
67
 
68
68
  @wp.func
69
69
  def relu(x: dtype):
@@ -188,7 +188,6 @@ def test_multi_layer_nn(test, device):
188
188
  optimizer_inputs = [p.flatten() for p in params]
189
189
  optimizer = warp.optim.Adam(optimizer_inputs, lr=0.01)
190
190
 
191
- num_batches = int((IMG_WIDTH * IMG_HEIGHT) / BATCH_SIZE)
192
191
  max_epochs = 30
193
192
 
194
193
  # create randomized batch indices
@@ -288,7 +287,6 @@ def test_single_layer_nn(test, device):
288
287
  import torch as tc
289
288
 
290
289
  DIM_IN = 8
291
- DIM_HID = 32
292
290
  DIM_OUT = 16
293
291
 
294
292
  NUM_BLOCKS = 56