warp-lang 1.10.0__py3-none-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (468) hide show
  1. warp/__init__.py +334 -0
  2. warp/__init__.pyi +5856 -0
  3. warp/_src/__init__.py +14 -0
  4. warp/_src/autograd.py +1077 -0
  5. warp/_src/build.py +620 -0
  6. warp/_src/build_dll.py +642 -0
  7. warp/_src/builtins.py +10555 -0
  8. warp/_src/codegen.py +4361 -0
  9. warp/_src/config.py +178 -0
  10. warp/_src/constants.py +59 -0
  11. warp/_src/context.py +8352 -0
  12. warp/_src/dlpack.py +464 -0
  13. warp/_src/fabric.py +362 -0
  14. warp/_src/fem/__init__.py +14 -0
  15. warp/_src/fem/adaptivity.py +510 -0
  16. warp/_src/fem/cache.py +689 -0
  17. warp/_src/fem/dirichlet.py +190 -0
  18. warp/_src/fem/domain.py +553 -0
  19. warp/_src/fem/field/__init__.py +131 -0
  20. warp/_src/fem/field/field.py +703 -0
  21. warp/_src/fem/field/nodal_field.py +403 -0
  22. warp/_src/fem/field/restriction.py +39 -0
  23. warp/_src/fem/field/virtual.py +1021 -0
  24. warp/_src/fem/geometry/__init__.py +32 -0
  25. warp/_src/fem/geometry/adaptive_nanogrid.py +782 -0
  26. warp/_src/fem/geometry/closest_point.py +99 -0
  27. warp/_src/fem/geometry/deformed_geometry.py +277 -0
  28. warp/_src/fem/geometry/element.py +854 -0
  29. warp/_src/fem/geometry/geometry.py +693 -0
  30. warp/_src/fem/geometry/grid_2d.py +478 -0
  31. warp/_src/fem/geometry/grid_3d.py +539 -0
  32. warp/_src/fem/geometry/hexmesh.py +956 -0
  33. warp/_src/fem/geometry/nanogrid.py +660 -0
  34. warp/_src/fem/geometry/partition.py +483 -0
  35. warp/_src/fem/geometry/quadmesh.py +597 -0
  36. warp/_src/fem/geometry/tetmesh.py +762 -0
  37. warp/_src/fem/geometry/trimesh.py +588 -0
  38. warp/_src/fem/integrate.py +2507 -0
  39. warp/_src/fem/linalg.py +385 -0
  40. warp/_src/fem/operator.py +398 -0
  41. warp/_src/fem/polynomial.py +231 -0
  42. warp/_src/fem/quadrature/__init__.py +17 -0
  43. warp/_src/fem/quadrature/pic_quadrature.py +318 -0
  44. warp/_src/fem/quadrature/quadrature.py +665 -0
  45. warp/_src/fem/space/__init__.py +248 -0
  46. warp/_src/fem/space/basis_function_space.py +499 -0
  47. warp/_src/fem/space/basis_space.py +681 -0
  48. warp/_src/fem/space/dof_mapper.py +253 -0
  49. warp/_src/fem/space/function_space.py +312 -0
  50. warp/_src/fem/space/grid_2d_function_space.py +179 -0
  51. warp/_src/fem/space/grid_3d_function_space.py +229 -0
  52. warp/_src/fem/space/hexmesh_function_space.py +255 -0
  53. warp/_src/fem/space/nanogrid_function_space.py +199 -0
  54. warp/_src/fem/space/partition.py +435 -0
  55. warp/_src/fem/space/quadmesh_function_space.py +222 -0
  56. warp/_src/fem/space/restriction.py +221 -0
  57. warp/_src/fem/space/shape/__init__.py +152 -0
  58. warp/_src/fem/space/shape/cube_shape_function.py +1107 -0
  59. warp/_src/fem/space/shape/shape_function.py +134 -0
  60. warp/_src/fem/space/shape/square_shape_function.py +928 -0
  61. warp/_src/fem/space/shape/tet_shape_function.py +829 -0
  62. warp/_src/fem/space/shape/triangle_shape_function.py +674 -0
  63. warp/_src/fem/space/tetmesh_function_space.py +270 -0
  64. warp/_src/fem/space/topology.py +461 -0
  65. warp/_src/fem/space/trimesh_function_space.py +193 -0
  66. warp/_src/fem/types.py +114 -0
  67. warp/_src/fem/utils.py +488 -0
  68. warp/_src/jax.py +188 -0
  69. warp/_src/jax_experimental/__init__.py +14 -0
  70. warp/_src/jax_experimental/custom_call.py +389 -0
  71. warp/_src/jax_experimental/ffi.py +1286 -0
  72. warp/_src/jax_experimental/xla_ffi.py +658 -0
  73. warp/_src/marching_cubes.py +710 -0
  74. warp/_src/math.py +416 -0
  75. warp/_src/optim/__init__.py +14 -0
  76. warp/_src/optim/adam.py +165 -0
  77. warp/_src/optim/linear.py +1608 -0
  78. warp/_src/optim/sgd.py +114 -0
  79. warp/_src/paddle.py +408 -0
  80. warp/_src/render/__init__.py +14 -0
  81. warp/_src/render/imgui_manager.py +291 -0
  82. warp/_src/render/render_opengl.py +3638 -0
  83. warp/_src/render/render_usd.py +939 -0
  84. warp/_src/render/utils.py +162 -0
  85. warp/_src/sparse.py +2718 -0
  86. warp/_src/tape.py +1208 -0
  87. warp/_src/thirdparty/__init__.py +0 -0
  88. warp/_src/thirdparty/appdirs.py +598 -0
  89. warp/_src/thirdparty/dlpack.py +145 -0
  90. warp/_src/thirdparty/unittest_parallel.py +676 -0
  91. warp/_src/torch.py +393 -0
  92. warp/_src/types.py +5888 -0
  93. warp/_src/utils.py +1695 -0
  94. warp/autograd.py +33 -0
  95. warp/bin/libwarp-clang.dylib +0 -0
  96. warp/bin/libwarp.dylib +0 -0
  97. warp/build.py +29 -0
  98. warp/build_dll.py +24 -0
  99. warp/codegen.py +24 -0
  100. warp/constants.py +24 -0
  101. warp/context.py +33 -0
  102. warp/dlpack.py +24 -0
  103. warp/examples/__init__.py +24 -0
  104. warp/examples/assets/bear.usd +0 -0
  105. warp/examples/assets/bunny.usd +0 -0
  106. warp/examples/assets/cube.usd +0 -0
  107. warp/examples/assets/nonuniform.usd +0 -0
  108. warp/examples/assets/nvidia_logo.png +0 -0
  109. warp/examples/assets/pixel.jpg +0 -0
  110. warp/examples/assets/rocks.nvdb +0 -0
  111. warp/examples/assets/rocks.usd +0 -0
  112. warp/examples/assets/sphere.usd +0 -0
  113. warp/examples/assets/square_cloth.usd +0 -0
  114. warp/examples/benchmarks/benchmark_api.py +389 -0
  115. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  116. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  117. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  118. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  119. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  120. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  121. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  122. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  123. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  124. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  125. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  126. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  127. warp/examples/benchmarks/benchmark_launches.py +301 -0
  128. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  129. warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
  130. warp/examples/browse.py +37 -0
  131. warp/examples/core/example_cupy.py +86 -0
  132. warp/examples/core/example_dem.py +241 -0
  133. warp/examples/core/example_fluid.py +299 -0
  134. warp/examples/core/example_graph_capture.py +150 -0
  135. warp/examples/core/example_marching_cubes.py +195 -0
  136. warp/examples/core/example_mesh.py +180 -0
  137. warp/examples/core/example_mesh_intersect.py +211 -0
  138. warp/examples/core/example_nvdb.py +182 -0
  139. warp/examples/core/example_raycast.py +111 -0
  140. warp/examples/core/example_raymarch.py +205 -0
  141. warp/examples/core/example_render_opengl.py +290 -0
  142. warp/examples/core/example_sample_mesh.py +300 -0
  143. warp/examples/core/example_sph.py +411 -0
  144. warp/examples/core/example_spin_lock.py +93 -0
  145. warp/examples/core/example_torch.py +211 -0
  146. warp/examples/core/example_wave.py +269 -0
  147. warp/examples/core/example_work_queue.py +118 -0
  148. warp/examples/distributed/example_jacobi_mpi.py +506 -0
  149. warp/examples/fem/example_adaptive_grid.py +286 -0
  150. warp/examples/fem/example_apic_fluid.py +469 -0
  151. warp/examples/fem/example_burgers.py +261 -0
  152. warp/examples/fem/example_convection_diffusion.py +181 -0
  153. warp/examples/fem/example_convection_diffusion_dg.py +225 -0
  154. warp/examples/fem/example_darcy_ls_optimization.py +489 -0
  155. warp/examples/fem/example_deformed_geometry.py +172 -0
  156. warp/examples/fem/example_diffusion.py +196 -0
  157. warp/examples/fem/example_diffusion_3d.py +225 -0
  158. warp/examples/fem/example_diffusion_mgpu.py +225 -0
  159. warp/examples/fem/example_distortion_energy.py +228 -0
  160. warp/examples/fem/example_elastic_shape_optimization.py +387 -0
  161. warp/examples/fem/example_magnetostatics.py +242 -0
  162. warp/examples/fem/example_mixed_elasticity.py +293 -0
  163. warp/examples/fem/example_navier_stokes.py +263 -0
  164. warp/examples/fem/example_nonconforming_contact.py +300 -0
  165. warp/examples/fem/example_stokes.py +213 -0
  166. warp/examples/fem/example_stokes_transfer.py +262 -0
  167. warp/examples/fem/example_streamlines.py +357 -0
  168. warp/examples/fem/utils.py +1047 -0
  169. warp/examples/interop/example_jax_callable.py +146 -0
  170. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  171. warp/examples/interop/example_jax_kernel.py +232 -0
  172. warp/examples/optim/example_diffray.py +561 -0
  173. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  174. warp/examples/tile/example_tile_block_cholesky.py +502 -0
  175. warp/examples/tile/example_tile_cholesky.py +88 -0
  176. warp/examples/tile/example_tile_convolution.py +66 -0
  177. warp/examples/tile/example_tile_fft.py +55 -0
  178. warp/examples/tile/example_tile_filtering.py +113 -0
  179. warp/examples/tile/example_tile_matmul.py +85 -0
  180. warp/examples/tile/example_tile_mcgp.py +191 -0
  181. warp/examples/tile/example_tile_mlp.py +385 -0
  182. warp/examples/tile/example_tile_nbody.py +199 -0
  183. warp/fabric.py +24 -0
  184. warp/fem/__init__.py +173 -0
  185. warp/fem/adaptivity.py +26 -0
  186. warp/fem/cache.py +30 -0
  187. warp/fem/dirichlet.py +24 -0
  188. warp/fem/field/__init__.py +24 -0
  189. warp/fem/field/field.py +26 -0
  190. warp/fem/geometry/__init__.py +21 -0
  191. warp/fem/geometry/closest_point.py +31 -0
  192. warp/fem/linalg.py +38 -0
  193. warp/fem/operator.py +32 -0
  194. warp/fem/polynomial.py +29 -0
  195. warp/fem/space/__init__.py +22 -0
  196. warp/fem/space/basis_space.py +24 -0
  197. warp/fem/space/shape/__init__.py +68 -0
  198. warp/fem/space/topology.py +24 -0
  199. warp/fem/types.py +24 -0
  200. warp/fem/utils.py +32 -0
  201. warp/jax.py +29 -0
  202. warp/jax_experimental/__init__.py +29 -0
  203. warp/jax_experimental/custom_call.py +29 -0
  204. warp/jax_experimental/ffi.py +39 -0
  205. warp/jax_experimental/xla_ffi.py +24 -0
  206. warp/marching_cubes.py +24 -0
  207. warp/math.py +37 -0
  208. warp/native/array.h +1687 -0
  209. warp/native/builtin.h +2327 -0
  210. warp/native/bvh.cpp +562 -0
  211. warp/native/bvh.cu +826 -0
  212. warp/native/bvh.h +555 -0
  213. warp/native/clang/clang.cpp +541 -0
  214. warp/native/coloring.cpp +622 -0
  215. warp/native/crt.cpp +51 -0
  216. warp/native/crt.h +568 -0
  217. warp/native/cuda_crt.h +1058 -0
  218. warp/native/cuda_util.cpp +677 -0
  219. warp/native/cuda_util.h +313 -0
  220. warp/native/error.cpp +77 -0
  221. warp/native/error.h +36 -0
  222. warp/native/exports.h +2023 -0
  223. warp/native/fabric.h +246 -0
  224. warp/native/hashgrid.cpp +311 -0
  225. warp/native/hashgrid.cu +89 -0
  226. warp/native/hashgrid.h +240 -0
  227. warp/native/initializer_array.h +41 -0
  228. warp/native/intersect.h +1253 -0
  229. warp/native/intersect_adj.h +375 -0
  230. warp/native/intersect_tri.h +348 -0
  231. warp/native/mat.h +5189 -0
  232. warp/native/mathdx.cpp +93 -0
  233. warp/native/matnn.h +221 -0
  234. warp/native/mesh.cpp +266 -0
  235. warp/native/mesh.cu +406 -0
  236. warp/native/mesh.h +2097 -0
  237. warp/native/nanovdb/GridHandle.h +533 -0
  238. warp/native/nanovdb/HostBuffer.h +591 -0
  239. warp/native/nanovdb/NanoVDB.h +6246 -0
  240. warp/native/nanovdb/NodeManager.h +323 -0
  241. warp/native/nanovdb/PNanoVDB.h +3390 -0
  242. warp/native/noise.h +859 -0
  243. warp/native/quat.h +1664 -0
  244. warp/native/rand.h +342 -0
  245. warp/native/range.h +145 -0
  246. warp/native/reduce.cpp +174 -0
  247. warp/native/reduce.cu +363 -0
  248. warp/native/runlength_encode.cpp +79 -0
  249. warp/native/runlength_encode.cu +61 -0
  250. warp/native/scan.cpp +47 -0
  251. warp/native/scan.cu +55 -0
  252. warp/native/scan.h +23 -0
  253. warp/native/solid_angle.h +466 -0
  254. warp/native/sort.cpp +251 -0
  255. warp/native/sort.cu +286 -0
  256. warp/native/sort.h +35 -0
  257. warp/native/sparse.cpp +241 -0
  258. warp/native/sparse.cu +435 -0
  259. warp/native/spatial.h +1306 -0
  260. warp/native/svd.h +727 -0
  261. warp/native/temp_buffer.h +46 -0
  262. warp/native/tile.h +4124 -0
  263. warp/native/tile_radix_sort.h +1112 -0
  264. warp/native/tile_reduce.h +838 -0
  265. warp/native/tile_scan.h +240 -0
  266. warp/native/tuple.h +189 -0
  267. warp/native/vec.h +2199 -0
  268. warp/native/version.h +23 -0
  269. warp/native/volume.cpp +501 -0
  270. warp/native/volume.cu +68 -0
  271. warp/native/volume.h +970 -0
  272. warp/native/volume_builder.cu +483 -0
  273. warp/native/volume_builder.h +52 -0
  274. warp/native/volume_impl.h +70 -0
  275. warp/native/warp.cpp +1143 -0
  276. warp/native/warp.cu +4604 -0
  277. warp/native/warp.h +358 -0
  278. warp/optim/__init__.py +20 -0
  279. warp/optim/adam.py +24 -0
  280. warp/optim/linear.py +35 -0
  281. warp/optim/sgd.py +24 -0
  282. warp/paddle.py +24 -0
  283. warp/py.typed +0 -0
  284. warp/render/__init__.py +22 -0
  285. warp/render/imgui_manager.py +29 -0
  286. warp/render/render_opengl.py +24 -0
  287. warp/render/render_usd.py +24 -0
  288. warp/render/utils.py +24 -0
  289. warp/sparse.py +51 -0
  290. warp/tape.py +24 -0
  291. warp/tests/__init__.py +1 -0
  292. warp/tests/__main__.py +4 -0
  293. warp/tests/assets/curlnoise_golden.npy +0 -0
  294. warp/tests/assets/mlp_golden.npy +0 -0
  295. warp/tests/assets/pixel.npy +0 -0
  296. warp/tests/assets/pnoise_golden.npy +0 -0
  297. warp/tests/assets/spiky.usd +0 -0
  298. warp/tests/assets/test_grid.nvdb +0 -0
  299. warp/tests/assets/test_index_grid.nvdb +0 -0
  300. warp/tests/assets/test_int32_grid.nvdb +0 -0
  301. warp/tests/assets/test_vec_grid.nvdb +0 -0
  302. warp/tests/assets/torus.nvdb +0 -0
  303. warp/tests/assets/torus.usda +105 -0
  304. warp/tests/aux_test_class_kernel.py +34 -0
  305. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  306. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  307. warp/tests/aux_test_dependent.py +29 -0
  308. warp/tests/aux_test_grad_customs.py +29 -0
  309. warp/tests/aux_test_instancing_gc.py +26 -0
  310. warp/tests/aux_test_module_aot.py +7 -0
  311. warp/tests/aux_test_module_unload.py +23 -0
  312. warp/tests/aux_test_name_clash1.py +40 -0
  313. warp/tests/aux_test_name_clash2.py +40 -0
  314. warp/tests/aux_test_reference.py +9 -0
  315. warp/tests/aux_test_reference_reference.py +8 -0
  316. warp/tests/aux_test_square.py +16 -0
  317. warp/tests/aux_test_unresolved_func.py +22 -0
  318. warp/tests/aux_test_unresolved_symbol.py +22 -0
  319. warp/tests/cuda/__init__.py +0 -0
  320. warp/tests/cuda/test_async.py +676 -0
  321. warp/tests/cuda/test_conditional_captures.py +1147 -0
  322. warp/tests/cuda/test_ipc.py +124 -0
  323. warp/tests/cuda/test_mempool.py +233 -0
  324. warp/tests/cuda/test_multigpu.py +169 -0
  325. warp/tests/cuda/test_peer.py +139 -0
  326. warp/tests/cuda/test_pinned.py +84 -0
  327. warp/tests/cuda/test_streams.py +691 -0
  328. warp/tests/geometry/__init__.py +0 -0
  329. warp/tests/geometry/test_bvh.py +335 -0
  330. warp/tests/geometry/test_hash_grid.py +259 -0
  331. warp/tests/geometry/test_marching_cubes.py +294 -0
  332. warp/tests/geometry/test_mesh.py +318 -0
  333. warp/tests/geometry/test_mesh_query_aabb.py +392 -0
  334. warp/tests/geometry/test_mesh_query_point.py +935 -0
  335. warp/tests/geometry/test_mesh_query_ray.py +323 -0
  336. warp/tests/geometry/test_volume.py +1103 -0
  337. warp/tests/geometry/test_volume_write.py +346 -0
  338. warp/tests/interop/__init__.py +0 -0
  339. warp/tests/interop/test_dlpack.py +730 -0
  340. warp/tests/interop/test_jax.py +1673 -0
  341. warp/tests/interop/test_paddle.py +800 -0
  342. warp/tests/interop/test_torch.py +1001 -0
  343. warp/tests/run_coverage_serial.py +39 -0
  344. warp/tests/test_adam.py +162 -0
  345. warp/tests/test_arithmetic.py +1096 -0
  346. warp/tests/test_array.py +3756 -0
  347. warp/tests/test_array_reduce.py +156 -0
  348. warp/tests/test_assert.py +303 -0
  349. warp/tests/test_atomic.py +336 -0
  350. warp/tests/test_atomic_bitwise.py +209 -0
  351. warp/tests/test_atomic_cas.py +312 -0
  352. warp/tests/test_bool.py +220 -0
  353. warp/tests/test_builtins_resolution.py +732 -0
  354. warp/tests/test_closest_point_edge_edge.py +327 -0
  355. warp/tests/test_codegen.py +974 -0
  356. warp/tests/test_codegen_instancing.py +1495 -0
  357. warp/tests/test_compile_consts.py +215 -0
  358. warp/tests/test_conditional.py +298 -0
  359. warp/tests/test_context.py +35 -0
  360. warp/tests/test_copy.py +319 -0
  361. warp/tests/test_ctypes.py +618 -0
  362. warp/tests/test_dense.py +73 -0
  363. warp/tests/test_devices.py +127 -0
  364. warp/tests/test_enum.py +136 -0
  365. warp/tests/test_examples.py +424 -0
  366. warp/tests/test_fabricarray.py +998 -0
  367. warp/tests/test_fast_math.py +72 -0
  368. warp/tests/test_fem.py +2204 -0
  369. warp/tests/test_fixedarray.py +229 -0
  370. warp/tests/test_fp16.py +136 -0
  371. warp/tests/test_func.py +501 -0
  372. warp/tests/test_future_annotations.py +100 -0
  373. warp/tests/test_generics.py +656 -0
  374. warp/tests/test_grad.py +893 -0
  375. warp/tests/test_grad_customs.py +339 -0
  376. warp/tests/test_grad_debug.py +341 -0
  377. warp/tests/test_implicit_init.py +411 -0
  378. warp/tests/test_import.py +45 -0
  379. warp/tests/test_indexedarray.py +1140 -0
  380. warp/tests/test_intersect.py +103 -0
  381. warp/tests/test_iter.py +76 -0
  382. warp/tests/test_large.py +177 -0
  383. warp/tests/test_launch.py +411 -0
  384. warp/tests/test_lerp.py +151 -0
  385. warp/tests/test_linear_solvers.py +223 -0
  386. warp/tests/test_lvalue.py +427 -0
  387. warp/tests/test_map.py +526 -0
  388. warp/tests/test_mat.py +3515 -0
  389. warp/tests/test_mat_assign_copy.py +178 -0
  390. warp/tests/test_mat_constructors.py +573 -0
  391. warp/tests/test_mat_lite.py +122 -0
  392. warp/tests/test_mat_scalar_ops.py +2913 -0
  393. warp/tests/test_math.py +212 -0
  394. warp/tests/test_module_aot.py +287 -0
  395. warp/tests/test_module_hashing.py +258 -0
  396. warp/tests/test_modules_lite.py +70 -0
  397. warp/tests/test_noise.py +252 -0
  398. warp/tests/test_operators.py +299 -0
  399. warp/tests/test_options.py +129 -0
  400. warp/tests/test_overwrite.py +551 -0
  401. warp/tests/test_print.py +408 -0
  402. warp/tests/test_quat.py +2653 -0
  403. warp/tests/test_quat_assign_copy.py +145 -0
  404. warp/tests/test_rand.py +339 -0
  405. warp/tests/test_reload.py +303 -0
  406. warp/tests/test_rounding.py +157 -0
  407. warp/tests/test_runlength_encode.py +196 -0
  408. warp/tests/test_scalar_ops.py +133 -0
  409. warp/tests/test_smoothstep.py +108 -0
  410. warp/tests/test_snippet.py +318 -0
  411. warp/tests/test_sparse.py +845 -0
  412. warp/tests/test_spatial.py +2859 -0
  413. warp/tests/test_spatial_assign_copy.py +160 -0
  414. warp/tests/test_special_values.py +361 -0
  415. warp/tests/test_static.py +640 -0
  416. warp/tests/test_struct.py +901 -0
  417. warp/tests/test_tape.py +242 -0
  418. warp/tests/test_transient_module.py +93 -0
  419. warp/tests/test_triangle_closest_point.py +192 -0
  420. warp/tests/test_tuple.py +361 -0
  421. warp/tests/test_types.py +615 -0
  422. warp/tests/test_utils.py +594 -0
  423. warp/tests/test_vec.py +1408 -0
  424. warp/tests/test_vec_assign_copy.py +143 -0
  425. warp/tests/test_vec_constructors.py +325 -0
  426. warp/tests/test_vec_lite.py +80 -0
  427. warp/tests/test_vec_scalar_ops.py +2327 -0
  428. warp/tests/test_verify_fp.py +100 -0
  429. warp/tests/test_version.py +75 -0
  430. warp/tests/tile/__init__.py +0 -0
  431. warp/tests/tile/test_tile.py +1519 -0
  432. warp/tests/tile/test_tile_atomic_bitwise.py +403 -0
  433. warp/tests/tile/test_tile_cholesky.py +608 -0
  434. warp/tests/tile/test_tile_load.py +724 -0
  435. warp/tests/tile/test_tile_mathdx.py +156 -0
  436. warp/tests/tile/test_tile_matmul.py +179 -0
  437. warp/tests/tile/test_tile_mlp.py +400 -0
  438. warp/tests/tile/test_tile_reduce.py +950 -0
  439. warp/tests/tile/test_tile_shared_memory.py +376 -0
  440. warp/tests/tile/test_tile_sort.py +121 -0
  441. warp/tests/tile/test_tile_view.py +173 -0
  442. warp/tests/unittest_serial.py +47 -0
  443. warp/tests/unittest_suites.py +430 -0
  444. warp/tests/unittest_utils.py +469 -0
  445. warp/tests/walkthrough_debug.py +95 -0
  446. warp/torch.py +24 -0
  447. warp/types.py +51 -0
  448. warp/utils.py +31 -0
  449. warp_lang-1.10.0.dist-info/METADATA +459 -0
  450. warp_lang-1.10.0.dist-info/RECORD +468 -0
  451. warp_lang-1.10.0.dist-info/WHEEL +5 -0
  452. warp_lang-1.10.0.dist-info/licenses/LICENSE.md +176 -0
  453. warp_lang-1.10.0.dist-info/licenses/licenses/Gaia-LICENSE.txt +6 -0
  454. warp_lang-1.10.0.dist-info/licenses/licenses/appdirs-LICENSE.txt +22 -0
  455. warp_lang-1.10.0.dist-info/licenses/licenses/asset_pixel_jpg-LICENSE.txt +3 -0
  456. warp_lang-1.10.0.dist-info/licenses/licenses/cuda-LICENSE.txt +1582 -0
  457. warp_lang-1.10.0.dist-info/licenses/licenses/dlpack-LICENSE.txt +201 -0
  458. warp_lang-1.10.0.dist-info/licenses/licenses/fp16-LICENSE.txt +28 -0
  459. warp_lang-1.10.0.dist-info/licenses/licenses/libmathdx-LICENSE.txt +220 -0
  460. warp_lang-1.10.0.dist-info/licenses/licenses/llvm-LICENSE.txt +279 -0
  461. warp_lang-1.10.0.dist-info/licenses/licenses/moller-LICENSE.txt +16 -0
  462. warp_lang-1.10.0.dist-info/licenses/licenses/nanovdb-LICENSE.txt +2 -0
  463. warp_lang-1.10.0.dist-info/licenses/licenses/nvrtc-LICENSE.txt +1592 -0
  464. warp_lang-1.10.0.dist-info/licenses/licenses/svd-LICENSE.txt +23 -0
  465. warp_lang-1.10.0.dist-info/licenses/licenses/unittest_parallel-LICENSE.txt +21 -0
  466. warp_lang-1.10.0.dist-info/licenses/licenses/usd-LICENSE.txt +213 -0
  467. warp_lang-1.10.0.dist-info/licenses/licenses/windingnumber-LICENSE.txt +21 -0
  468. warp_lang-1.10.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,1519 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import unittest
17
+ from typing import Any
18
+
19
+ import numpy as np
20
+
21
+ import warp as wp
22
+ from warp.tests.unittest_utils import *
23
+
24
+ TILE_M = wp.constant(8)
25
+ TILE_N = wp.constant(4)
26
+ TILE_K = wp.constant(8)
27
+
28
+ # num threads per-tile
29
+ TILE_DIM = 64
30
+
31
+
32
+ @wp.kernel
33
+ def tile_copy_1d_kernel(A: wp.array(dtype=float), B: wp.array(dtype=float)):
34
+ # tile index
35
+ i = wp.tid()
36
+
37
+ a = wp.tile_load(A, shape=TILE_N, offset=i * TILE_N)
38
+ wp.tile_store(B, a, offset=i * TILE_N)
39
+
40
+
41
+ def test_tile_copy_1d(test, device):
42
+ rng = np.random.default_rng(42)
43
+
44
+ N = TILE_N * 5
45
+
46
+ A = rng.random((N), dtype=np.float32)
47
+ B = rng.random((N), dtype=np.float32)
48
+
49
+ A_wp = wp.array(A, requires_grad=True, device=device)
50
+ B_wp = wp.array(B, requires_grad=True, device=device)
51
+
52
+ with wp.Tape() as tape:
53
+ wp.launch_tiled(
54
+ tile_copy_1d_kernel,
55
+ dim=[int(N / TILE_N)],
56
+ inputs=[A_wp, B_wp],
57
+ block_dim=TILE_DIM,
58
+ device=device,
59
+ )
60
+
61
+ # verify forward pass
62
+ assert_array_equal(B_wp, A_wp)
63
+
64
+ # verify backward pass
65
+ B_wp.grad = wp.ones_like(B_wp, device=device)
66
+ tape.backward()
67
+
68
+ assert_array_equal(B_wp.grad, A_wp.grad)
69
+
70
+
71
+ @wp.kernel
72
+ def tile_copy_2d_kernel(A: wp.array2d(dtype=float), B: wp.array2d(dtype=float)):
73
+ # tile index
74
+ i, j = wp.tid()
75
+
76
+ a = wp.tile_load(A, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
77
+ wp.tile_store(B, a, offset=(i * TILE_M, j * TILE_N))
78
+
79
+
80
+ def test_tile_copy_2d(test, device):
81
+ rng = np.random.default_rng(42)
82
+
83
+ M = TILE_M * 7
84
+ N = TILE_N * 5
85
+
86
+ A = rng.random((M, N), dtype=np.float32)
87
+ B = rng.random((M, N), dtype=np.float32)
88
+
89
+ A_wp = wp.array(A, requires_grad=True, device=device)
90
+ B_wp = wp.array(B, requires_grad=True, device=device)
91
+
92
+ with wp.Tape() as tape:
93
+ wp.launch_tiled(
94
+ tile_copy_2d_kernel,
95
+ dim=[int(M / TILE_M), int(N / TILE_N)],
96
+ inputs=[A_wp, B_wp],
97
+ block_dim=TILE_DIM,
98
+ device=device,
99
+ )
100
+
101
+ # verify forward pass
102
+ assert_array_equal(B_wp, A_wp)
103
+
104
+ # verify backward pass
105
+ B_wp.grad = wp.ones_like(B_wp, device=device)
106
+ tape.backward()
107
+
108
+ assert_array_equal(B_wp.grad, A_wp.grad)
109
+
110
+
111
+ @wp.func
112
+ def unary_func(x: wp.float32):
113
+ return wp.sin(x)
114
+
115
+
116
+ @wp.func
117
+ def unary_func(x: wp.float64):
118
+ return wp.sin(x)
119
+
120
+
121
+ @wp.kernel
122
+ def tile_unary_map_user_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
123
+ # tile index
124
+ i, j = wp.tid()
125
+
126
+ a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
127
+
128
+ sa = wp.tile_map(unary_func, a)
129
+
130
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
131
+
132
+
133
+ @wp.kernel
134
+ def tile_unary_map_builtin_func(input: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)):
135
+ # tile index
136
+ i, j = wp.tid()
137
+
138
+ a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
139
+
140
+ sa = wp.tile_map(wp.sin, a)
141
+
142
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
143
+
144
+
145
+ def test_tile_unary_map(test, device):
146
+ rng = np.random.default_rng(42)
147
+
148
+ M = TILE_M * 7
149
+ N = TILE_N * 5
150
+
151
+ def run(kernel, dtype):
152
+ A = rng.random((M, N), dtype=dtype)
153
+ B = np.sin(A)
154
+
155
+ A_grad = np.cos(A)
156
+
157
+ A_wp = wp.array(A, requires_grad=True, device=device)
158
+ B_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
159
+
160
+ with wp.Tape() as tape:
161
+ wp.launch_tiled(
162
+ kernel,
163
+ dim=[int(M / TILE_M), int(N / TILE_N)],
164
+ inputs=[A_wp, B_wp],
165
+ block_dim=TILE_DIM,
166
+ device=device,
167
+ )
168
+
169
+ tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
170
+
171
+ # verify forward pass
172
+ assert_np_equal(B_wp.numpy(), B, tol=tol)
173
+
174
+ # verify backward pass
175
+ B_wp.grad = wp.ones_like(B_wp, device=device)
176
+ tape.backward()
177
+
178
+ assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
179
+
180
+ dtypes = [np.float32, np.float64]
181
+
182
+ for dtype in dtypes:
183
+ run(tile_unary_map_user_func, dtype)
184
+ run(tile_unary_map_builtin_func, dtype)
185
+
186
+
187
+ @wp.func
188
+ def unary_func_mixed_types(x: int) -> float:
189
+ return wp.sin(float(x))
190
+
191
+
192
+ @wp.kernel
193
+ def tile_unary_map_mixed_types(input: wp.array2d(dtype=int), output: wp.array2d(dtype=float)):
194
+ # tile index
195
+ i, j = wp.tid()
196
+
197
+ a = wp.tile_load(input, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
198
+
199
+ sa = wp.tile_map(unary_func_mixed_types, a)
200
+
201
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
202
+
203
+
204
+ def test_tile_unary_map_mixed_types(test, device):
205
+ rng = np.random.default_rng(42)
206
+
207
+ M = TILE_M * 7
208
+ N = TILE_N * 5
209
+
210
+ A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
211
+ B = np.sin(A.astype(np.float32))
212
+
213
+ A_grad = np.cos(A.astype(np.float32))
214
+
215
+ A_wp = wp.array(A, requires_grad=True, device=device)
216
+ B_wp = wp.zeros((M, N), dtype=float, requires_grad=True, device=device)
217
+
218
+ with wp.Tape() as tape:
219
+ wp.launch_tiled(
220
+ tile_unary_map_mixed_types,
221
+ dim=[int(M / TILE_M), int(N / TILE_N)],
222
+ inputs=[A_wp, B_wp],
223
+ block_dim=TILE_DIM,
224
+ device=device,
225
+ )
226
+
227
+ # verify forward pass
228
+ assert_np_equal(B_wp.numpy(), B, tol=1.0e-4)
229
+
230
+ # verify backward pass
231
+ B_wp.grad = wp.ones_like(B_wp, device=device)
232
+ tape.backward()
233
+
234
+ # The a gradients are now stored as ints and can't capture the correct values
235
+ # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
236
+
237
+
238
+ @wp.func
239
+ def binary_func(x: wp.float32, y: wp.float32):
240
+ return x + y
241
+
242
+
243
+ @wp.func
244
+ def binary_func(x: wp.float64, y: wp.float64):
245
+ return x + y
246
+
247
+
248
+ @wp.kernel
249
+ def tile_binary_map_user_func(
250
+ input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
251
+ ):
252
+ # tile index
253
+ i, j = wp.tid()
254
+
255
+ a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
256
+ b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
257
+
258
+ sa = wp.tile_map(binary_func, a, b)
259
+
260
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
261
+
262
+
263
+ @wp.kernel
264
+ def tile_binary_map_builtin_func(
265
+ input_a: wp.array2d(dtype=Any), input_b: wp.array2d(dtype=Any), output: wp.array2d(dtype=Any)
266
+ ):
267
+ # tile index
268
+ i, j = wp.tid()
269
+
270
+ a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
271
+ b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
272
+
273
+ sa = wp.tile_map(wp.add, a, b)
274
+
275
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
276
+
277
+
278
+ def test_tile_binary_map(test, device):
279
+ rng = np.random.default_rng(42)
280
+
281
+ M = TILE_M * 7
282
+ N = TILE_N * 5
283
+
284
+ def run(kernel, dtype):
285
+ A = rng.random((M, N), dtype=dtype)
286
+ B = rng.random((M, N), dtype=dtype)
287
+ C = A + B
288
+
289
+ A_grad = np.ones_like(A)
290
+ B_grad = np.ones_like(B)
291
+
292
+ A_wp = wp.array(A, requires_grad=True, device=device)
293
+ B_wp = wp.array(B, requires_grad=True, device=device)
294
+ C_wp = wp.zeros_like(A_wp, requires_grad=True, device=device)
295
+
296
+ with wp.Tape() as tape:
297
+ wp.launch_tiled(
298
+ kernel,
299
+ dim=[int(M / TILE_M), int(N / TILE_N)],
300
+ inputs=[A_wp, B_wp, C_wp],
301
+ block_dim=TILE_DIM,
302
+ device=device,
303
+ )
304
+
305
+ tol = 1.0e-6 if dtype == np.float64 else 1.0e-4
306
+
307
+ # verify forward pass
308
+ assert_np_equal(C_wp.numpy(), C, tol=tol)
309
+
310
+ # verify backward pass
311
+ C_wp.grad = wp.ones_like(C_wp, device=device)
312
+ tape.backward()
313
+
314
+ assert_np_equal(A_wp.grad.numpy(), A_grad, tol=tol)
315
+ assert_np_equal(B_wp.grad.numpy(), B_grad, tol=tol)
316
+
317
+ dtypes = [np.float32, np.float64]
318
+
319
+ for dtype in dtypes:
320
+ run(tile_binary_map_builtin_func, dtype)
321
+ run(tile_binary_map_user_func, dtype)
322
+
323
+
324
+ @wp.func
325
+ def binary_func_mixed_types(x: int, y: float) -> float:
326
+ return wp.sin(float(x)) + y
327
+
328
+
329
+ @wp.kernel
330
+ def tile_binary_map_mixed_types(
331
+ input_a: wp.array2d(dtype=int), input_b: wp.array2d(dtype=float), output: wp.array2d(dtype=float)
332
+ ):
333
+ # tile index
334
+ i, j = wp.tid()
335
+
336
+ a = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
337
+ b = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
338
+
339
+ sa = wp.tile_map(binary_func_mixed_types, a, b)
340
+
341
+ wp.tile_store(output, sa, offset=(i * TILE_M, j * TILE_N))
342
+
343
+
344
+ def test_tile_binary_map_mixed_types(test, device):
345
+ rng = np.random.default_rng(42)
346
+
347
+ M = TILE_M * 7
348
+ N = TILE_N * 5
349
+
350
+ A = rng.integers(0, 100, size=(M, N), dtype=np.int32)
351
+ B = rng.random((M, N), dtype=np.float32)
352
+ C = np.sin(A.astype(np.float32)) + B
353
+
354
+ A_grad = np.cos(A.astype(np.float32))
355
+ B_grad = np.ones_like(B)
356
+
357
+ A_wp = wp.array(A, requires_grad=True, device=device)
358
+ B_wp = wp.array(B, requires_grad=True, device=device)
359
+ C_wp = wp.zeros_like(B_wp, requires_grad=True, device=device)
360
+
361
+ with wp.Tape() as tape:
362
+ wp.launch_tiled(
363
+ tile_binary_map_mixed_types,
364
+ dim=[int(M / TILE_M), int(N / TILE_N)],
365
+ inputs=[A_wp, B_wp, C_wp],
366
+ block_dim=TILE_DIM,
367
+ device=device,
368
+ )
369
+
370
+ # verify forward pass
371
+ assert_np_equal(C_wp.numpy(), C, tol=1.0e-6)
372
+
373
+ # verify backward pass
374
+ C_wp.grad = wp.ones_like(C_wp, device=device)
375
+ tape.backward()
376
+
377
+ # The a gradiens are now stored as ints and can't capture the correct values
378
+ # assert_np_equal(A_wp.grad.numpy(), A_grad, tol=1.0e-6)
379
+ assert_np_equal(B_wp.grad.numpy(), B_grad)
380
+
381
+
382
+ @wp.kernel
383
+ def tile_operators(input: wp.array3d(dtype=float), output: wp.array3d(dtype=float)):
384
+ # output tile index
385
+ i = wp.tid()
386
+
387
+ a = wp.tile_load(input[i], shape=(TILE_M, TILE_N))
388
+
389
+ # neg
390
+ b = -a
391
+
392
+ # right scalar multiply
393
+ c = b * 0.5
394
+
395
+ # left scalar multiply
396
+ d = 0.5 * c
397
+
398
+ # add tiles
399
+ e = a + d
400
+
401
+ wp.tile_store(output[i], e)
402
+
403
+
404
+ def test_tile_operators(test, device):
405
+ batch_count = 56
406
+
407
+ M = TILE_M
408
+ N = TILE_N
409
+
410
+ rng = np.random.default_rng(42)
411
+ input = rng.random((batch_count, M, N), dtype=np.float32)
412
+ output = input * 0.75
413
+
414
+ input_wp = wp.array(input, requires_grad=True, device=device)
415
+ output_wp = wp.zeros_like(input_wp, requires_grad=True, device=device)
416
+
417
+ with wp.Tape() as tape:
418
+ wp.launch_tiled(
419
+ tile_operators, dim=[batch_count], inputs=[input_wp, output_wp], block_dim=TILE_DIM, device=device
420
+ )
421
+
422
+ assert_np_equal(output_wp.numpy(), output)
423
+
424
+ output_wp.grad.fill_(1.0)
425
+
426
+ tape.backward()
427
+
428
+ assert_np_equal(input_wp.grad.numpy(), np.ones_like(input) * 0.75)
429
+
430
+
431
+ @wp.kernel
432
+ def test_tile_tile_preserve_type_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
433
+ a = x[0]
434
+ t = wp.tile(a, preserve_type=True)
435
+ wp.tile_store(y, t)
436
+
437
+
438
+ wp.overload(test_tile_tile_preserve_type_kernel, {"x": wp.array(dtype=float), "y": wp.array(dtype=float)})
439
+ wp.overload(test_tile_tile_preserve_type_kernel, {"x": wp.array(dtype=wp.vec3), "y": wp.array(dtype=wp.vec3)})
440
+ wp.overload(test_tile_tile_preserve_type_kernel, {"x": wp.array(dtype=wp.quat), "y": wp.array(dtype=wp.quat)})
441
+ wp.overload(test_tile_tile_preserve_type_kernel, {"x": wp.array(dtype=wp.mat33), "y": wp.array(dtype=wp.mat33)})
442
+
443
+
444
+ @wp.kernel
445
+ def test_tile_tile_scalar_expansion_kernel(x: wp.array(dtype=float), y: wp.array(dtype=float)):
446
+ a = x[0]
447
+ t = wp.tile(a)
448
+ wp.tile_store(y, t)
449
+
450
+
451
+ @wp.kernel
452
+ def test_tile_tile_vec_expansion_kernel(x: wp.array(dtype=wp.vec3), y: wp.array2d(dtype=float)):
453
+ a = x[0]
454
+ t = wp.tile(a)
455
+ wp.tile_store(y, t)
456
+
457
+
458
+ @wp.kernel
459
+ def test_tile_tile_mat_expansion_kernel(x: wp.array(dtype=wp.mat33), y: wp.array3d(dtype=float)):
460
+ a = x[0]
461
+ t = wp.tile(a)
462
+ wp.tile_store(y, t)
463
+
464
+
465
+ def test_tile_tile(test, device):
466
+ # preserve type
467
+ def test_func_preserve_type(type: Any):
468
+ x = wp.ones(1, dtype=type, requires_grad=True, device=device)
469
+ y = wp.zeros((TILE_DIM), dtype=type, requires_grad=True, device=device)
470
+
471
+ tape = wp.Tape()
472
+ with tape:
473
+ wp.launch(
474
+ test_tile_tile_preserve_type_kernel,
475
+ dim=[TILE_DIM],
476
+ inputs=[x],
477
+ outputs=[y],
478
+ block_dim=TILE_DIM,
479
+ device=device,
480
+ )
481
+
482
+ y.grad = wp.ones_like(y)
483
+
484
+ tape.backward()
485
+
486
+ assert_np_equal(y.numpy(), wp.full((TILE_DIM), type(1.0), dtype=type, device="cpu").numpy())
487
+ assert_np_equal(x.grad.numpy(), wp.full((1,), type(TILE_DIM), dtype=type, device="cpu").numpy())
488
+
489
+ test_func_preserve_type(float)
490
+ test_func_preserve_type(wp.vec3)
491
+ test_func_preserve_type(wp.quat)
492
+ test_func_preserve_type(wp.mat33)
493
+
494
+ # scalar expansion
495
+ x = wp.ones(1, dtype=float, requires_grad=True, device=device)
496
+ y = wp.zeros((TILE_DIM), dtype=float, requires_grad=True, device=device)
497
+
498
+ tape = wp.Tape()
499
+ with tape:
500
+ wp.launch(
501
+ test_tile_tile_scalar_expansion_kernel,
502
+ dim=[TILE_DIM],
503
+ inputs=[x],
504
+ outputs=[y],
505
+ block_dim=TILE_DIM,
506
+ device=device,
507
+ )
508
+
509
+ y.grad = wp.ones_like(y)
510
+
511
+ tape.backward()
512
+
513
+ assert_np_equal(y.numpy(), wp.full((TILE_DIM), 1.0, dtype=float, device="cpu").numpy())
514
+ assert_np_equal(x.grad.numpy(), wp.full((1,), wp.float32(TILE_DIM), dtype=float, device="cpu").numpy())
515
+
516
+ # vec expansion
517
+ x = wp.ones(1, dtype=wp.vec3, requires_grad=True, device=device)
518
+ y = wp.zeros((3, TILE_DIM), dtype=float, requires_grad=True, device=device)
519
+
520
+ tape = wp.Tape()
521
+ with tape:
522
+ wp.launch(
523
+ test_tile_tile_vec_expansion_kernel,
524
+ dim=[TILE_DIM],
525
+ inputs=[x],
526
+ outputs=[y],
527
+ block_dim=TILE_DIM,
528
+ device=device,
529
+ )
530
+
531
+ y.grad = wp.ones_like(y)
532
+
533
+ tape.backward()
534
+
535
+ assert_np_equal(y.numpy(), wp.full((3, TILE_DIM), 1.0, dtype=float, device="cpu").numpy())
536
+ assert_np_equal(x.grad.numpy(), wp.full((1,), wp.float32(TILE_DIM), dtype=wp.vec3, device="cpu").numpy())
537
+
538
+ # mat expansion
539
+ x = wp.ones(1, dtype=wp.mat33, requires_grad=True, device=device)
540
+ y = wp.zeros((3, 3, TILE_DIM), dtype=float, requires_grad=True, device=device)
541
+
542
+ tape = wp.Tape()
543
+ with tape:
544
+ wp.launch(
545
+ test_tile_tile_mat_expansion_kernel,
546
+ dim=[TILE_DIM],
547
+ inputs=[x],
548
+ outputs=[y],
549
+ block_dim=TILE_DIM,
550
+ device=device,
551
+ )
552
+
553
+ y.grad = wp.ones_like(y)
554
+
555
+ tape.backward()
556
+
557
+ assert_np_equal(y.numpy(), wp.full((3, 3, TILE_DIM), 1.0, dtype=float, device="cpu").numpy())
558
+ assert_np_equal(x.grad.numpy(), wp.full((1,), wp.float32(TILE_DIM), dtype=wp.mat33, device="cpu").numpy())
559
+
560
+
561
+ @wp.kernel
562
+ def test_tile_untile_preserve_type_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
563
+ i = wp.tid()
564
+ a = x[i]
565
+ t = wp.tile(a, preserve_type=True)
566
+ b = wp.untile(t)
567
+ y[i] = b
568
+
569
+
570
+ wp.overload(test_tile_untile_preserve_type_kernel, {"x": wp.array(dtype=float), "y": wp.array(dtype=float)})
571
+ wp.overload(test_tile_untile_preserve_type_kernel, {"x": wp.array(dtype=wp.vec3), "y": wp.array(dtype=wp.vec3)})
572
+ wp.overload(test_tile_untile_preserve_type_kernel, {"x": wp.array(dtype=wp.quat), "y": wp.array(dtype=wp.quat)})
573
+ wp.overload(test_tile_untile_preserve_type_kernel, {"x": wp.array(dtype=wp.mat33), "y": wp.array(dtype=wp.mat33)})
574
+
575
+
576
+ @wp.kernel
577
+ def test_tile_untile_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any)):
578
+ i = wp.tid()
579
+ a = x[i]
580
+ t = wp.tile(a)
581
+ b = wp.untile(t)
582
+ y[i] = b
583
+
584
+
585
+ wp.overload(test_tile_untile_kernel, {"x": wp.array(dtype=float), "y": wp.array(dtype=float)})
586
+ wp.overload(test_tile_untile_kernel, {"x": wp.array(dtype=wp.vec3), "y": wp.array(dtype=wp.vec3)})
587
+ wp.overload(test_tile_untile_kernel, {"x": wp.array(dtype=wp.mat33), "y": wp.array(dtype=wp.mat33)})
588
+
589
+
590
+ def test_tile_untile(test, device):
591
+ def test_func_preserve_type(type: Any):
592
+ x = wp.ones(TILE_DIM, dtype=type, requires_grad=True, device=device)
593
+ y = wp.zeros_like(x)
594
+
595
+ tape = wp.Tape()
596
+ with tape:
597
+ wp.launch(
598
+ test_tile_untile_preserve_type_kernel,
599
+ dim=TILE_DIM,
600
+ inputs=[x],
601
+ outputs=[y],
602
+ block_dim=TILE_DIM,
603
+ device=device,
604
+ )
605
+
606
+ y.grad = wp.ones_like(y)
607
+
608
+ tape.backward()
609
+
610
+ assert_np_equal(y.numpy(), x.numpy())
611
+ assert_np_equal(x.grad.numpy(), y.grad.numpy())
612
+
613
+ test_func_preserve_type(float)
614
+ test_func_preserve_type(wp.vec3)
615
+ test_func_preserve_type(wp.quat)
616
+ test_func_preserve_type(wp.mat33)
617
+
618
+ def test_func(type: Any):
619
+ x = wp.ones(TILE_DIM, dtype=type, requires_grad=True, device=device)
620
+ y = wp.zeros_like(x)
621
+
622
+ tape = wp.Tape()
623
+ with tape:
624
+ wp.launch(test_tile_untile_kernel, dim=TILE_DIM, inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
625
+
626
+ y.grad = wp.ones_like(y)
627
+
628
+ tape.backward()
629
+
630
+ assert_np_equal(y.numpy(), x.numpy())
631
+ assert_np_equal(x.grad.numpy(), y.grad.numpy())
632
+
633
+ test_func(float)
634
+ test_func(wp.vec3)
635
+ test_func(wp.mat33)
636
+
637
+
638
+ @wp.func
639
+ def tile_sum_func(a: wp.tile(dtype=float, shape=(TILE_M, TILE_N))):
640
+ return wp.tile_sum(a) * 0.5
641
+
642
+
643
+ @wp.kernel
644
+ def tile_sum_kernel(input: wp.array3d(dtype=float), output: wp.array(dtype=float)):
645
+ # output tile index
646
+ i = wp.tid()
647
+
648
+ a = wp.tile_load(input[i], shape=(TILE_M, TILE_N))
649
+ s = tile_sum_func(a)
650
+
651
+ wp.tile_store(output, s, offset=i)
652
+
653
+
654
+ def test_tile_sum(test, device):
655
+ batch_count = 56
656
+
657
+ M = TILE_M
658
+ N = TILE_N
659
+
660
+ rng = np.random.default_rng(42)
661
+ input = rng.random((batch_count, M, N), dtype=np.float32)
662
+
663
+ input_wp = wp.array(input, requires_grad=True, device=device)
664
+ output_wp = wp.zeros(batch_count, requires_grad=True, device=device)
665
+
666
+ with wp.Tape() as tape:
667
+ wp.launch_tiled(
668
+ tile_sum_kernel,
669
+ dim=[batch_count],
670
+ inputs=[input_wp, output_wp],
671
+ block_dim=TILE_DIM,
672
+ device=device,
673
+ )
674
+
675
+ sum_wp = output_wp.numpy()
676
+
677
+ for i in range(batch_count):
678
+ sum_np = np.sum(input[i]) * 0.5
679
+ test.assertAlmostEqual(sum_wp[i], sum_np, places=5)
680
+
681
+ output_wp.grad.fill_(1.0)
682
+
683
+ tape.backward()
684
+
685
+ assert_np_equal(input_wp.grad.numpy(), np.ones_like(input) * 0.5)
686
+
687
+
688
+ def test_tile_sum_launch(test, device):
689
+ batch_count = 56
690
+
691
+ M = TILE_M
692
+ N = TILE_N
693
+
694
+ rng = np.random.default_rng(42)
695
+ input = rng.random((batch_count, M, N), dtype=np.float32)
696
+
697
+ input_wp = wp.array(input, requires_grad=True, device=device)
698
+ output_wp = wp.zeros(batch_count, requires_grad=True, device=device)
699
+
700
+ cmd = wp.launch_tiled(
701
+ tile_sum_kernel,
702
+ dim=[batch_count],
703
+ inputs=[input_wp, output_wp],
704
+ block_dim=TILE_DIM,
705
+ device=device,
706
+ record_cmd=True,
707
+ )
708
+ cmd.launch()
709
+
710
+ sum_wp = output_wp.numpy()
711
+
712
+ for i in range(batch_count):
713
+ sum_np = np.sum(input[i]) * 0.5
714
+ test.assertAlmostEqual(sum_wp[i], sum_np, places=5)
715
+
716
+ output_wp.grad.fill_(1.0)
717
+
718
+ wp.launch_tiled(
719
+ tile_sum_kernel,
720
+ dim=[batch_count],
721
+ inputs=[input_wp, output_wp],
722
+ adj_inputs=[input_wp.grad, output_wp.grad],
723
+ block_dim=TILE_DIM,
724
+ device=device,
725
+ adjoint=True,
726
+ )
727
+
728
+ assert_np_equal(input_wp.grad.numpy(), np.ones_like(input) * 0.5)
729
+
730
+
731
+ @wp.kernel(module="unique")
732
+ def test_tile_extract_kernel(a: wp.array2d(dtype=float), b: wp.array2d(dtype=float)):
733
+ i, j, x, y = wp.tid()
734
+
735
+ tile = wp.tile_load(a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
736
+
737
+ # compute sum of array sub tile
738
+ wp.atomic_add(b, i, j, wp.tile_extract(tile, x, y))
739
+
740
+
741
+ @wp.kernel
742
+ def test_tile_extract_vec_kernel(x: wp.array(dtype=wp.vec3), y: wp.array(dtype=float)):
743
+ i = wp.tid()
744
+
745
+ tile = wp.tile_load(x, shape=(TILE_M))
746
+
747
+ a = tile[i][1]
748
+
749
+ y[i] = a
750
+
751
+
752
+ @wp.kernel
753
+ def test_tile_extract_mat_kernel(x: wp.array(dtype=wp.mat33), y: wp.array(dtype=float)):
754
+ i = wp.tid()
755
+
756
+ tile = wp.tile_load(x, shape=(TILE_M))
757
+
758
+ a = tile[i][1, 1]
759
+
760
+ y[i] = a
761
+
762
+
763
+ def test_tile_extract(test, device):
764
+ block_dim = 16
765
+
766
+ input = np.arange(TILE_M * TILE_N * 4).reshape((TILE_M * 2, TILE_N * 2))
767
+
768
+ a = wp.array(input, dtype=float, requires_grad=True, device=device)
769
+ b = wp.zeros((2, 2), dtype=float, requires_grad=True, device=device)
770
+
771
+ with wp.Tape() as tape:
772
+ wp.launch(
773
+ test_tile_extract_kernel, dim=[2, 2, TILE_M, TILE_N], inputs=[a, b], block_dim=block_dim, device=device
774
+ )
775
+
776
+ # compute sum of each sub-block
777
+ sums = input.reshape(2, input.shape[0] // 2, 2, input.shape[1] // 2).sum(axis=(1, 3))
778
+
779
+ assert_np_equal(b.numpy(), sums)
780
+
781
+ b.grad.fill_(1.0)
782
+
783
+ tape.backward()
784
+
785
+ expected_grad = np.ones_like(input)
786
+ assert_np_equal(a.grad.numpy(), expected_grad)
787
+
788
+ # vector element test
789
+ x = wp.ones(TILE_M, dtype=wp.vec3, requires_grad=True, device=device)
790
+ y = wp.zeros(TILE_M, dtype=float, requires_grad=True, device=device)
791
+
792
+ with wp.Tape() as tape:
793
+ wp.launch(test_tile_extract_vec_kernel, dim=[TILE_M], inputs=[x, y], block_dim=TILE_DIM, device=device)
794
+
795
+ y.grad = wp.ones_like(y)
796
+
797
+ tape.backward()
798
+
799
+ x_grad_np = np.zeros((TILE_M, 3), dtype=float)
800
+ x_grad_np[:, 1] = 1.0
801
+
802
+ assert_np_equal(x.grad.numpy(), x_grad_np)
803
+ assert_np_equal(y.numpy(), np.ones(TILE_M, dtype=float))
804
+
805
+ # matrix element test
806
+ x = wp.ones(TILE_M, dtype=wp.mat33, requires_grad=True, device=device)
807
+ y = wp.zeros(TILE_M, dtype=float, requires_grad=True, device=device)
808
+
809
+ with wp.Tape() as tape:
810
+ wp.launch(test_tile_extract_mat_kernel, dim=[TILE_M], inputs=[x, y], block_dim=TILE_DIM, device=device)
811
+
812
+ y.grad = wp.ones_like(y)
813
+
814
+ tape.backward()
815
+
816
+ x_grad_np = np.zeros((TILE_M, 3, 3), dtype=float)
817
+ x_grad_np[:, 1, 1] = 1.0
818
+
819
+ assert_np_equal(y.numpy(), np.ones(TILE_M, dtype=float))
820
+ assert_np_equal(x.grad.numpy(), x_grad_np)
821
+
822
+
823
+ @wp.kernel(module="unique")
824
+ def test_tile_extract_repeated_kernel(a: wp.array2d(dtype=float), b: wp.array2d(dtype=float)):
825
+ i, j, x, y = wp.tid()
826
+
827
+ tile = wp.tile_load(a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N))
828
+
829
+ # each thread extracts the first element of the sub-tile
830
+ # and accumulates the value onto the output
831
+ wp.atomic_add(b, i, j, wp.tile_extract(tile, 0, 0))
832
+
833
+
834
+ def test_tile_extract_repeated(test, device):
835
+ block_dim = 16
836
+
837
+ input = np.arange(TILE_M * TILE_N * 4).reshape((TILE_M * 2, TILE_N * 2))
838
+
839
+ a = wp.array(input, dtype=float, requires_grad=True, device=device)
840
+ b = wp.zeros((2, 2), dtype=float, requires_grad=True, device=device)
841
+
842
+ with wp.Tape() as tape:
843
+ wp.launch(
844
+ test_tile_extract_repeated_kernel,
845
+ dim=[2, 2, TILE_M, TILE_N],
846
+ inputs=[a, b],
847
+ block_dim=block_dim,
848
+ device=device,
849
+ )
850
+
851
+ # each thread adds the first element to the output
852
+ scale = TILE_M * TILE_N
853
+ sums = np.array([[input[0, 0], input[0, TILE_N]], [input[TILE_M, 0], input[TILE_M, TILE_N]]]) * scale
854
+
855
+ assert_np_equal(b.numpy(), sums)
856
+
857
+ b.grad.fill_(1.0)
858
+
859
+ tape.backward()
860
+
861
+ expected_grad = np.zeros_like(input)
862
+ expected_grad[0, 0] = scale
863
+ expected_grad[0, TILE_N] = scale
864
+ expected_grad[TILE_M, 0] = scale
865
+ expected_grad[TILE_M, TILE_N] = scale
866
+
867
+ assert_np_equal(a.grad.numpy(), expected_grad)
868
+
869
+
870
+ @wp.kernel
871
+ def test_tile_assign_kernel(x: wp.array(dtype=float), y: wp.array(dtype=float)):
872
+ i, j = wp.tid()
873
+
874
+ a = wp.tile_zeros(shape=(TILE_M,), dtype=float)
875
+
876
+ a[j] = x[j]
877
+
878
+ wp.tile_atomic_add(y, a, offset=(0,))
879
+
880
+
881
+ @wp.kernel
882
+ def test_tile_assign_vec_kernel(x: wp.array(dtype=float), y: wp.array(dtype=wp.vec3)):
883
+ i = wp.tid()
884
+
885
+ a = wp.tile_zeros(shape=(TILE_M,), dtype=wp.vec3)
886
+
887
+ a[i][1] = x[i]
888
+
889
+ wp.tile_atomic_add(y, a, offset=(0,))
890
+
891
+
892
+ @wp.kernel
893
+ def test_tile_assign_mat_kernel(x: wp.array(dtype=float), y: wp.array(dtype=wp.mat33)):
894
+ i = wp.tid()
895
+
896
+ a = wp.tile_zeros(shape=(TILE_M,), dtype=wp.mat33)
897
+
898
+ a[i][1, 1] = x[i]
899
+
900
+ wp.tile_atomic_add(y, a, offset=(0,))
901
+
902
+
903
+ def test_tile_assign(test, device):
904
+ x = wp.full(TILE_M, 2.0, dtype=float, device=device, requires_grad=True)
905
+ y = wp.zeros(TILE_M, dtype=float, device=device, requires_grad=True)
906
+
907
+ tape = wp.Tape()
908
+ with tape:
909
+ wp.launch(test_tile_assign_kernel, dim=[1, TILE_M], inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
910
+
911
+ y.grad = wp.ones_like(y)
912
+ tape.backward()
913
+
914
+ assert_np_equal(y.numpy(), np.full(TILE_M, 2.0, dtype=np.float32))
915
+ assert_np_equal(x.grad.numpy(), np.full(TILE_M, 1.0, dtype=np.float32))
916
+
917
+ # vector element test
918
+ x = wp.full(TILE_M, 2.0, dtype=float, device=device, requires_grad=True)
919
+ y = wp.zeros(TILE_M, dtype=wp.vec3, device=device, requires_grad=True)
920
+
921
+ tape = wp.Tape()
922
+ with tape:
923
+ wp.launch(test_tile_assign_vec_kernel, dim=[TILE_M], inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
924
+
925
+ y.grad = wp.ones_like(y)
926
+ tape.backward()
927
+
928
+ y_np = np.zeros((TILE_M, 3), dtype=float)
929
+ y_np[:, 1] = 2.0
930
+
931
+ assert_np_equal(y.numpy(), y_np)
932
+ assert_np_equal(x.grad.numpy(), np.full(TILE_M, 1.0, dtype=np.float32))
933
+
934
+ # matrix element test
935
+ x = wp.full(TILE_M, 2.0, dtype=float, device=device, requires_grad=True)
936
+ y = wp.zeros(TILE_M, dtype=wp.mat33, device=device, requires_grad=True)
937
+
938
+ tape = wp.Tape()
939
+ with tape:
940
+ wp.launch(test_tile_assign_mat_kernel, dim=[TILE_M], inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
941
+
942
+ y.grad = wp.ones_like(y)
943
+ tape.backward()
944
+
945
+ y_np = np.zeros((TILE_M, 3, 3), dtype=float)
946
+ y_np[:, 1, 1] = 2.0
947
+
948
+ assert_np_equal(y.numpy(), y_np)
949
+ assert_np_equal(x.grad.numpy(), np.full(TILE_M, 1.0, dtype=np.float32))
950
+
951
+
952
+ @wp.kernel
953
+ def test_tile_where_kernel(select: int, x: wp.array(dtype=float), y: wp.array(dtype=float), z: wp.array(dtype=float)):
954
+ x_reg = wp.tile_load(x, shape=(TILE_M,), storage="register")
955
+ y_reg = wp.tile_load(y, shape=(TILE_M,), storage="register")
956
+
957
+ x_shared = wp.tile_load(x, shape=(TILE_M,), storage="shared")
958
+ y_shared = wp.tile_load(y, shape=(TILE_M,), storage="shared")
959
+
960
+ if select == 0:
961
+ s = x_reg
962
+ elif select == 1:
963
+ s = y_reg
964
+ elif select == 2:
965
+ s = x_shared
966
+ else:
967
+ s = y_shared
968
+
969
+ wp.tile_store(z, s)
970
+
971
+
972
+ def test_tile_where(test, device):
973
+ x = wp.full((TILE_M,), 1.0, dtype=float, device=device, requires_grad=True)
974
+ y = wp.full((TILE_M,), 2.0, dtype=float, device=device, requires_grad=True)
975
+ z = wp.zeros((TILE_M), dtype=float, device=device, requires_grad=True)
976
+
977
+ z_expected = [
978
+ np.full(TILE_M, 1.0, dtype=np.float32),
979
+ np.full(TILE_M, 2.0, dtype=np.float32),
980
+ np.full(TILE_M, 1.0, dtype=np.float32),
981
+ np.full(TILE_M, 2.0, dtype=np.float32),
982
+ ]
983
+ x_grad_expected = [
984
+ np.full(TILE_M, 1.0, dtype=np.float32),
985
+ np.full(TILE_M, 0.0, dtype=np.float32),
986
+ np.full(TILE_M, 1.0, dtype=np.float32),
987
+ np.full(TILE_M, 0.0, dtype=np.float32),
988
+ ]
989
+ y_grad_expected = [
990
+ np.full(TILE_M, 0.0, dtype=np.float32),
991
+ np.full(TILE_M, 1.0, dtype=np.float32),
992
+ np.full(TILE_M, 0.0, dtype=np.float32),
993
+ np.full(TILE_M, 1.0, dtype=np.float32),
994
+ ]
995
+
996
+ for i in range(4):
997
+ tape = wp.Tape()
998
+ with tape:
999
+ wp.launch_tiled(test_tile_where_kernel, dim=[1], inputs=[i, x, y], outputs=[z], block_dim=32, device=device)
1000
+
1001
+ z.grad = wp.ones_like(z)
1002
+
1003
+ tape.backward()
1004
+
1005
+ assert_np_equal(z.numpy(), z_expected[i])
1006
+ assert_np_equal(x.grad.numpy(), x_grad_expected[i])
1007
+ assert_np_equal(y.grad.numpy(), y_grad_expected[i])
1008
+
1009
+ tape.zero()
1010
+
1011
+
1012
+ @wp.kernel
1013
+ def test_tile_transpose_kernel(input: wp.array2d(dtype=float), output: wp.array2d(dtype=float)):
1014
+ x = wp.tile_load(input, shape=(TILE_M, TILE_N))
1015
+ y = wp.tile_transpose(x)
1016
+
1017
+ wp.tile_store(output, y)
1018
+
1019
+
1020
+ def test_tile_transpose(test, device):
1021
+ rng = np.random.default_rng(42)
1022
+ input = wp.array(rng.random((TILE_M, TILE_N), dtype=np.float32), device=device)
1023
+ output = wp.zeros_like(input.transpose(), device=device)
1024
+
1025
+ wp.launch_tiled(test_tile_transpose_kernel, dim=[1], inputs=[input, output], block_dim=TILE_DIM, device=device)
1026
+
1027
+ assert_np_equal(output.numpy(), input.numpy().T)
1028
+
1029
+
1030
+ @wp.kernel
1031
+ def test_tile_broadcast_add_1d_kernel(
1032
+ input_a: wp.array(dtype=float), input_b: wp.array(dtype=float), output: wp.array(dtype=float)
1033
+ ):
1034
+ a = wp.tile_load(input_a, shape=(10,))
1035
+ b = wp.tile_load(input_b, shape=(1,))
1036
+
1037
+ c = wp.tile_broadcast(b, shape=(10,))
1038
+ d = a + c
1039
+
1040
+ wp.tile_store(output, d)
1041
+
1042
+
1043
+ def test_tile_broadcast_add_1d(test, device):
1044
+ N = 10
1045
+
1046
+ # implicit 1-dim ([1], 1)
1047
+ a = wp.array(np.arange(0, N, dtype=np.float32), device=device)
1048
+ b = wp.array(np.ones(1, dtype=np.float32), device=device)
1049
+ out = wp.zeros((N,), dtype=float, device=device)
1050
+
1051
+ wp.launch_tiled(test_tile_broadcast_add_1d_kernel, dim=[1], inputs=[a, b, out], block_dim=TILE_DIM, device=device)
1052
+
1053
+ assert_np_equal(out.numpy(), a.numpy() + b.numpy())
1054
+
1055
+
1056
+ @wp.kernel
1057
+ def test_tile_broadcast_add_2d_kernel(
1058
+ input_a: wp.array2d(dtype=float), input_b: wp.array(dtype=float), output: wp.array2d(dtype=float)
1059
+ ):
1060
+ # implicit 1-dim ([1], 10)
1061
+ a = wp.tile_load(input_a, shape=(10, 10))
1062
+ b = wp.tile_load(input_b, shape=10)
1063
+
1064
+ c = wp.tile_broadcast(b, shape=(10, 10))
1065
+ d = a + c
1066
+
1067
+ wp.tile_store(output, d)
1068
+
1069
+
1070
+ def test_tile_broadcast_add_2d(test, device):
1071
+ M = 10
1072
+ N = 10
1073
+
1074
+ a = wp.array(np.ones((M, N), dtype=np.float32), device=device)
1075
+ b = wp.array(np.arange(0, N, dtype=np.float32), device=device)
1076
+ out = wp.zeros((M, N), dtype=float, device=device)
1077
+
1078
+ wp.launch_tiled(test_tile_broadcast_add_2d_kernel, dim=[1], inputs=[a, b, out], block_dim=TILE_DIM, device=device)
1079
+
1080
+ assert_np_equal(out.numpy(), a.numpy() + b.numpy())
1081
+
1082
+
1083
+ @wp.kernel
1084
+ def test_tile_broadcast_add_3d_kernel(
1085
+ input_a: wp.array3d(dtype=float), input_b: wp.array3d(dtype=float), output: wp.array3d(dtype=float)
1086
+ ):
1087
+ a = wp.tile_load(input_a, shape=(4, 10, 12))
1088
+ b = wp.tile_load(input_b, shape=(4, 10, 1))
1089
+
1090
+ c = wp.tile_broadcast(b, shape=(4, 10, 12))
1091
+ d = a + c
1092
+
1093
+ wp.tile_store(output, d)
1094
+
1095
+
1096
+ def test_tile_broadcast_add_3d(test, device):
1097
+ M = 4
1098
+ N = 10
1099
+ O = 12
1100
+
1101
+ # explicit 1-dim (M, N, 1) to (M, N, O)
1102
+ a = wp.array(np.ones((M, N, O), dtype=np.float32), device=device)
1103
+ b = wp.array(np.arange(0, M * N, dtype=np.float32).reshape((M, N, 1)), device=device)
1104
+ out = wp.zeros((M, N, O), dtype=float, device=device)
1105
+
1106
+ wp.launch_tiled(test_tile_broadcast_add_3d_kernel, dim=[1], inputs=[a, b, out], block_dim=TILE_DIM, device=device)
1107
+ assert_np_equal(out.numpy(), a.numpy() + b.numpy())
1108
+
1109
+
1110
+ @wp.kernel
1111
+ def test_tile_broadcast_add_4d_kernel(
1112
+ input_a: wp.array4d(dtype=float), input_b: wp.array4d(dtype=float), output: wp.array4d(dtype=float)
1113
+ ):
1114
+ a = wp.tile_load(input_a, shape=(4, 10, 5, 6))
1115
+ b = wp.tile_load(input_b, shape=(4, 1, 5, 1))
1116
+ c = wp.tile_broadcast(b, shape=(4, 10, 5, 6))
1117
+ d = a + c
1118
+
1119
+ wp.tile_store(output, d)
1120
+
1121
+
1122
+ def test_tile_broadcast_add_4d(test, device):
1123
+ M = 4
1124
+ N = 10
1125
+ O = 5
1126
+ P = 6
1127
+
1128
+ # explicit 1-dims (M, 1, O, 1) to (M, N, O, P)
1129
+ a = wp.array(np.ones((M, N, O, P), dtype=np.float32), device=device)
1130
+ b = wp.array(np.arange(0, M * O, dtype=np.float32).reshape((M, 1, O, 1)), device=device)
1131
+ out = wp.zeros((M, N, O, P), dtype=float, device=device)
1132
+
1133
+ wp.launch_tiled(test_tile_broadcast_add_4d_kernel, dim=[1], inputs=[a, b, out], block_dim=TILE_DIM, device=device)
1134
+
1135
+ assert_np_equal(out.numpy(), a.numpy() + b.numpy())
1136
+
1137
+
1138
+ @wp.kernel
1139
+ def test_tile_broadcast_grad_kernel(a: wp.array(dtype=float), b: wp.array2d(dtype=float)):
1140
+ x = wp.tile_load(a, shape=5)
1141
+ y = wp.tile_broadcast(x, shape=(5, 5))
1142
+
1143
+ w = wp.tile_ones(dtype=float, shape=(5, 5))
1144
+ z = w + y
1145
+
1146
+ wp.tile_store(b, z)
1147
+
1148
+
1149
+ def test_tile_broadcast_grad(test, device):
1150
+ a = wp.array(np.arange(0, 5, dtype=np.float32), requires_grad=True, device=device)
1151
+ b = wp.array(np.ones((5, 5), dtype=np.float32), requires_grad=True, device=device)
1152
+
1153
+ with wp.Tape() as tape:
1154
+ wp.launch_tiled(test_tile_broadcast_grad_kernel, dim=[1], inputs=[a, b], block_dim=TILE_DIM, device=device)
1155
+
1156
+ b.grad = wp.ones_like(b, device=device)
1157
+ tape.backward()
1158
+
1159
+ assert_np_equal(b.numpy(), a.numpy() + np.ones((5, 5)))
1160
+ assert_np_equal(a.grad.numpy(), np.ones(5) * 5.0)
1161
+
1162
+
1163
+ @wp.kernel
1164
+ def test_tile_squeeze_kernel(x: wp.array3d(dtype=float), y: wp.array(dtype=float)):
1165
+ a = wp.tile_load(x, shape=(1, TILE_M, 1), offset=(0, 0, 0))
1166
+ b = wp.tile_squeeze(a, axis=(2,))
1167
+ c = wp.tile_squeeze(b)
1168
+
1169
+ wp.tile_store(y, c, offset=(0,))
1170
+
1171
+
1172
+ def test_tile_squeeze(test, device):
1173
+ x = wp.ones((1, TILE_M, 1), dtype=float, device=device, requires_grad=True)
1174
+ y = wp.zeros((TILE_M,), dtype=float, device=device, requires_grad=True)
1175
+
1176
+ tape = wp.Tape()
1177
+ with tape:
1178
+ wp.launch_tiled(test_tile_squeeze_kernel, dim=1, inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
1179
+
1180
+ y.grad = wp.ones_like(y)
1181
+ tape.backward()
1182
+
1183
+ assert_np_equal(y.numpy(), np.ones((TILE_M,), dtype=np.float32))
1184
+ assert_np_equal(x.grad.numpy(), np.ones((1, TILE_M, 1), dtype=np.float32))
1185
+
1186
+
1187
+ @wp.kernel
1188
+ def test_tile_reshape_kernel(x: wp.array2d(dtype=float), y: wp.array2d(dtype=float)):
1189
+ a = wp.tile_load(x, shape=(TILE_M, TILE_N), offset=(0, 0))
1190
+ b = wp.tile_reshape(a, shape=(wp.static(TILE_M * TILE_N), 1))
1191
+ c = wp.tile_reshape(b, shape=(-1, 1))
1192
+
1193
+ wp.tile_store(y, c, offset=(0, 0))
1194
+
1195
+
1196
+ def test_tile_reshape(test, device):
1197
+ x = wp.ones((TILE_M, TILE_N), dtype=float, device=device, requires_grad=True)
1198
+ y = wp.zeros((TILE_M * TILE_N, 1), dtype=float, device=device, requires_grad=True)
1199
+
1200
+ tape = wp.Tape()
1201
+ with tape:
1202
+ wp.launch_tiled(test_tile_reshape_kernel, dim=1, inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
1203
+
1204
+ y.grad = wp.ones_like(y)
1205
+ tape.backward()
1206
+
1207
+ assert_np_equal(y.numpy(), np.ones((TILE_M * TILE_N, 1), dtype=np.float32))
1208
+ assert_np_equal(x.grad.numpy(), np.ones((TILE_M, TILE_N), dtype=np.float32))
1209
+
1210
+
1211
+ @wp.kernel
1212
+ def test_tile_astype_kernel(x: wp.array2d(dtype=Any), y: wp.array2d(dtype=wp.float32)):
1213
+ a = wp.tile_load(x, shape=(TILE_M, TILE_N))
1214
+ b = wp.tile_astype(a, dtype=wp.float32)
1215
+ wp.tile_store(y, b)
1216
+
1217
+
1218
+ def test_tile_astype(test, device):
1219
+ x_np = np.arange(TILE_M * TILE_N, dtype=np.int32).reshape((TILE_M, TILE_N))
1220
+ x = wp.array(x_np, dtype=wp.int32, device=device)
1221
+ y = wp.zeros((TILE_M, TILE_N), dtype=wp.float32, device=device)
1222
+
1223
+ wp.launch_tiled(test_tile_astype_kernel, dim=1, inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
1224
+
1225
+ assert_np_equal(y.numpy(), x_np.astype(np.float32))
1226
+
1227
+ x_np = np.arange(TILE_M * TILE_N, dtype=np.float64).reshape((TILE_M, TILE_N))
1228
+ x = wp.array(x_np, dtype=wp.float64, requires_grad=True, device=device)
1229
+ y = wp.zeros((TILE_M, TILE_N), dtype=wp.float32, requires_grad=True, device=device)
1230
+
1231
+ tape = wp.Tape()
1232
+ with tape:
1233
+ wp.launch_tiled(test_tile_astype_kernel, dim=1, inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device)
1234
+
1235
+ y.grad = wp.ones_like(y)
1236
+
1237
+ tape.backward()
1238
+
1239
+ assert_np_equal(y.numpy(), x_np.astype(np.float32))
1240
+ assert_np_equal(x.grad.numpy(), np.ones_like(x_np))
1241
+
1242
+
1243
+ @wp.func
1244
+ def test_tile_func_return_func(tile: Any):
1245
+ return tile
1246
+
1247
+
1248
+ @wp.kernel
1249
+ def test_tile_func_return_kernel(x: wp.array2d(dtype=wp.float32), y: wp.array2d(dtype=wp.float32)):
1250
+ a = wp.tile_load(x, shape=(TILE_M, 1))
1251
+ b = wp.tile_broadcast(a, shape=(TILE_M, TILE_K))
1252
+ c = test_tile_func_return_func(b)
1253
+ wp.tile_store(y, c)
1254
+
1255
+
1256
+ def test_tile_func_return(test, device):
1257
+ x = wp.ones(shape=(TILE_M, 1), dtype=wp.float32, requires_grad=True, device=device)
1258
+ y = wp.zeros(shape=(TILE_M, TILE_K), dtype=wp.float32, requires_grad=True, device=device)
1259
+
1260
+ tape = wp.Tape()
1261
+ with tape:
1262
+ wp.launch_tiled(
1263
+ test_tile_func_return_kernel, dim=[1, 1], inputs=[x], outputs=[y], block_dim=TILE_DIM, device=device
1264
+ )
1265
+
1266
+ y.grad = wp.ones_like(y)
1267
+ tape.backward()
1268
+
1269
+ assert_np_equal(y.numpy(), np.ones((TILE_M, TILE_K), dtype=np.float32))
1270
+ assert_np_equal(x.grad.numpy(), np.ones((TILE_M, 1), dtype=np.float32) * TILE_K)
1271
+
1272
+
1273
+ @wp.kernel
1274
+ def tile_len_kernel(
1275
+ a: wp.array(dtype=float, ndim=2),
1276
+ out: wp.array(dtype=int),
1277
+ ):
1278
+ x = wp.tile_load(a, shape=(TILE_M, TILE_N))
1279
+
1280
+ length = wp.static(len(x))
1281
+ wp.expect_eq(wp.static(len(x)), TILE_M)
1282
+ out[0] = wp.static(len(x))
1283
+
1284
+
1285
+ def test_tile_len(test, device):
1286
+ a = wp.zeros((TILE_M, TILE_N), dtype=float, device=device)
1287
+ out = wp.empty(1, dtype=int, device=device)
1288
+ wp.launch_tiled(tile_len_kernel, dim=(1,), inputs=(a,), outputs=(out,), block_dim=TILE_DIM, device=device)
1289
+
1290
+ test.assertEqual(out.numpy()[0], TILE_M)
1291
+
1292
+
1293
+ @wp.struct
1294
+ class TestStruct:
1295
+ x: wp.float32
1296
+ y: wp.vec3
1297
+
1298
+
1299
+ @wp.kernel
1300
+ def test_tile_construction_kernel(
1301
+ out_zeros: wp.array(dtype=float),
1302
+ out_ones: wp.array(dtype=float),
1303
+ out_arange: wp.array(dtype=float),
1304
+ out_full_twos: wp.array(dtype=float),
1305
+ out_full_vecs: wp.array(dtype=wp.vec3),
1306
+ out_full_mats: wp.array(dtype=wp.mat33),
1307
+ out_full_structs: wp.array(dtype=TestStruct),
1308
+ ):
1309
+ zeros = wp.tile_zeros(TILE_M, dtype=float)
1310
+ ones = wp.tile_ones(TILE_M, dtype=float)
1311
+ arange = wp.tile_arange(TILE_M, dtype=float)
1312
+ full_twos = wp.tile_full(TILE_M, value=2.0, dtype=float)
1313
+ full_vecs = wp.tile_full(TILE_M, value=wp.vec3(1.0), dtype=wp.vec3)
1314
+ full_mats = wp.tile_full(TILE_M, value=wp.mat33(1.0), dtype=wp.mat33)
1315
+
1316
+ ts = TestStruct()
1317
+ ts.x = wp.float32(2.0)
1318
+ ts.y = wp.vec3(1.0)
1319
+ full_structs = wp.tile_full(TILE_M, value=ts, dtype=TestStruct)
1320
+
1321
+ wp.tile_store(out_zeros, zeros)
1322
+ wp.tile_store(out_ones, ones)
1323
+ wp.tile_store(out_arange, arange)
1324
+ wp.tile_store(out_full_twos, full_twos)
1325
+ wp.tile_store(out_full_vecs, full_vecs)
1326
+ wp.tile_store(out_full_mats, full_mats)
1327
+ wp.tile_store(out_full_structs, full_structs)
1328
+
1329
+
1330
+ def test_tile_construction(test, device):
1331
+ zeros = wp.empty(TILE_M, dtype=float, device=device)
1332
+ ones = wp.empty(TILE_M, dtype=float, device=device)
1333
+ arange = wp.empty(TILE_M, dtype=float, device=device)
1334
+ full_twos = wp.empty(TILE_M, dtype=float, device=device)
1335
+ full_vecs = wp.empty(TILE_M, dtype=wp.vec3, device=device)
1336
+ full_mats = wp.empty(TILE_M, dtype=wp.mat33, device=device)
1337
+ full_structs = wp.empty(TILE_M, dtype=TestStruct, device=device)
1338
+
1339
+ wp.launch_tiled(
1340
+ test_tile_construction_kernel,
1341
+ dim=1,
1342
+ inputs=[],
1343
+ outputs=[zeros, ones, arange, full_twos, full_vecs, full_mats, full_structs],
1344
+ block_dim=TILE_DIM,
1345
+ device=device,
1346
+ )
1347
+
1348
+ assert_np_equal(zeros.numpy(), np.zeros(TILE_M, dtype=float))
1349
+ assert_np_equal(ones.numpy(), np.ones(TILE_M, dtype=float))
1350
+ assert_np_equal(full_twos.numpy(), np.full(TILE_M, 2.0, dtype=float))
1351
+ assert_np_equal(full_vecs.numpy(), np.ones((TILE_M, 3), dtype=float))
1352
+ assert_np_equal(full_mats.numpy(), np.ones((TILE_M, 3, 3), dtype=float))
1353
+ assert_np_equal(full_structs.numpy()["x"], np.full(TILE_M, 2.0, dtype=float))
1354
+ assert_np_equal(full_structs.numpy()["y"], np.ones((TILE_M, 3), dtype=float))
1355
+ assert_np_equal(arange.numpy(), np.arange(TILE_M, dtype=float))
1356
+
1357
+
1358
+ @wp.kernel
1359
+ def test_tile_print_kernel():
1360
+ # shared tile
1361
+ a = wp.tile_ones(shape=(4, 3), dtype=float, storage="shared")
1362
+ # register tile
1363
+ b = wp.tile_ones(shape=(4, 3), dtype=float)
1364
+
1365
+ print(a)
1366
+ print(b)
1367
+
1368
+
1369
+ def test_tile_print(test, device):
1370
+ wp.launch_tiled(test_tile_print_kernel, dim=1, inputs=[], block_dim=64, device=device)
1371
+ wp.synchronize()
1372
+
1373
+
1374
+ @wp.kernel
1375
+ def test_tile_add_inplace_kernel(
1376
+ input_a: wp.array2d(dtype=float),
1377
+ input_b: wp.array2d(dtype=float),
1378
+ output_reg: wp.array2d(dtype=float),
1379
+ output_shared: wp.array2d(dtype=float),
1380
+ ):
1381
+ i, j = wp.tid()
1382
+
1383
+ a_reg = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
1384
+ b_reg = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
1385
+ a_shared = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="shared")
1386
+ b_shared = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="shared")
1387
+
1388
+ a_reg += b_reg
1389
+ a_reg += b_shared
1390
+ a_shared += b_reg
1391
+ a_shared += b_shared
1392
+
1393
+ wp.tile_store(output_reg, a_reg, offset=(i * TILE_M, j * TILE_N))
1394
+ wp.tile_store(output_shared, a_shared, offset=(i * TILE_M, j * TILE_N))
1395
+
1396
+
1397
+ @wp.kernel
1398
+ def test_tile_sub_inplace_kernel(
1399
+ input_a: wp.array2d(dtype=float),
1400
+ input_b: wp.array2d(dtype=float),
1401
+ output_reg: wp.array2d(dtype=float),
1402
+ output_shared: wp.array2d(dtype=float),
1403
+ ):
1404
+ i, j = wp.tid()
1405
+
1406
+ a_reg = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
1407
+ b_reg = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="register")
1408
+ a_shared = wp.tile_load(input_a, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="shared")
1409
+ b_shared = wp.tile_load(input_b, shape=(TILE_M, TILE_N), offset=(i * TILE_M, j * TILE_N), storage="shared")
1410
+
1411
+ a_reg -= b_reg
1412
+ a_reg -= b_shared
1413
+ a_shared -= b_reg
1414
+ a_shared -= b_shared
1415
+
1416
+ wp.tile_store(output_reg, a_reg, offset=(i * TILE_M, j * TILE_N))
1417
+ wp.tile_store(output_shared, a_shared, offset=(i * TILE_M, j * TILE_N))
1418
+
1419
+
1420
+ def test_tile_inplace(test, device):
1421
+ M = TILE_M * 2
1422
+ N = TILE_N * 2
1423
+
1424
+ a = wp.zeros((M, N), requires_grad=True, device=device)
1425
+ b = wp.ones_like(a, requires_grad=True, device=device)
1426
+ c = wp.zeros_like(a, requires_grad=True, device=device)
1427
+ d = wp.zeros_like(a, requires_grad=True, device=device)
1428
+
1429
+ with wp.Tape() as tape:
1430
+ wp.launch_tiled(
1431
+ test_tile_add_inplace_kernel,
1432
+ dim=[int(M / TILE_M), int(N / TILE_N)],
1433
+ inputs=[a, b, c, d],
1434
+ block_dim=TILE_DIM,
1435
+ device=device,
1436
+ )
1437
+
1438
+ assert_np_equal(a.numpy(), np.zeros((M, N)))
1439
+ assert_np_equal(b.numpy(), np.ones((M, N)))
1440
+ assert_np_equal(c.numpy(), 2.0 * np.ones((M, N)))
1441
+ assert_np_equal(d.numpy(), 2.0 * np.ones((M, N)))
1442
+
1443
+ c.grad = wp.ones_like(c, device=device)
1444
+ d.grad = wp.ones_like(d, device=device)
1445
+ tape.backward()
1446
+
1447
+ assert_np_equal(a.grad.numpy(), 2.0 * np.ones((M, N)))
1448
+ assert_np_equal(b.grad.numpy(), 4.0 * np.ones((M, N)))
1449
+
1450
+ tape.zero()
1451
+
1452
+ a.zero_()
1453
+ b.fill_(1.0)
1454
+ c.zero_()
1455
+ d.zero_()
1456
+
1457
+ with wp.Tape() as tape:
1458
+ wp.launch_tiled(
1459
+ test_tile_sub_inplace_kernel,
1460
+ dim=[int(M / TILE_M), int(N / TILE_N)],
1461
+ inputs=[a, b, c, d],
1462
+ block_dim=TILE_DIM,
1463
+ device=device,
1464
+ )
1465
+
1466
+ assert_np_equal(a.numpy(), np.zeros((M, N)))
1467
+ assert_np_equal(b.numpy(), np.ones((M, N)))
1468
+ assert_np_equal(c.numpy(), -2.0 * np.ones((M, N)))
1469
+ assert_np_equal(d.numpy(), -2.0 * np.ones((M, N)))
1470
+
1471
+ c.grad = wp.ones_like(c, device=device)
1472
+ d.grad = wp.ones_like(d, device=device)
1473
+ tape.backward()
1474
+
1475
+ assert_np_equal(a.grad.numpy(), 2.0 * np.ones((M, N)))
1476
+ assert_np_equal(b.grad.numpy(), -4.0 * np.ones((M, N)))
1477
+
1478
+
1479
+ devices = get_test_devices()
1480
+
1481
+
1482
+ class TestTile(unittest.TestCase):
1483
+ pass
1484
+
1485
+
1486
+ add_function_test(TestTile, "test_tile_copy_1d", test_tile_copy_1d, devices=devices)
1487
+ add_function_test(TestTile, "test_tile_copy_2d", test_tile_copy_2d, devices=devices)
1488
+ add_function_test(TestTile, "test_tile_unary_map", test_tile_unary_map, devices=devices)
1489
+ add_function_test(TestTile, "test_tile_unary_map_mixed_types", test_tile_unary_map_mixed_types, devices=devices)
1490
+ add_function_test(TestTile, "test_tile_binary_map", test_tile_binary_map, devices=devices)
1491
+ add_function_test(TestTile, "test_tile_binary_map_mixed_types", test_tile_binary_map_mixed_types, devices=devices)
1492
+ add_function_test(TestTile, "test_tile_transpose", test_tile_transpose, devices=devices)
1493
+ add_function_test(TestTile, "test_tile_operators", test_tile_operators, devices=devices)
1494
+ add_function_test(TestTile, "test_tile_tile", test_tile_tile, devices=get_cuda_test_devices())
1495
+ add_function_test(TestTile, "test_tile_untile", test_tile_untile, devices=devices)
1496
+ add_function_test(TestTile, "test_tile_sum", test_tile_sum, devices=devices, check_output=False)
1497
+ add_function_test(TestTile, "test_tile_sum_launch", test_tile_sum_launch, devices=devices)
1498
+ add_function_test(TestTile, "test_tile_extract", test_tile_extract, devices=devices)
1499
+ add_function_test(TestTile, "test_tile_extract_repeated", test_tile_extract_repeated, devices=devices)
1500
+ add_function_test(TestTile, "test_tile_assign", test_tile_assign, devices=devices)
1501
+ add_function_test(TestTile, "test_tile_where", test_tile_where, devices=devices)
1502
+ add_function_test(TestTile, "test_tile_broadcast_add_1d", test_tile_broadcast_add_1d, devices=devices)
1503
+ add_function_test(TestTile, "test_tile_broadcast_add_2d", test_tile_broadcast_add_2d, devices=devices)
1504
+ add_function_test(TestTile, "test_tile_broadcast_add_3d", test_tile_broadcast_add_3d, devices=devices)
1505
+ add_function_test(TestTile, "test_tile_broadcast_add_4d", test_tile_broadcast_add_4d, devices=devices)
1506
+ add_function_test(TestTile, "test_tile_broadcast_grad", test_tile_broadcast_grad, devices=devices)
1507
+ add_function_test(TestTile, "test_tile_squeeze", test_tile_squeeze, devices=devices)
1508
+ add_function_test(TestTile, "test_tile_reshape", test_tile_reshape, devices=devices)
1509
+ add_function_test(TestTile, "test_tile_len", test_tile_len, devices=devices)
1510
+ add_function_test(TestTile, "test_tile_construction", test_tile_construction, devices=devices)
1511
+ # add_function_test(TestTile, "test_tile_print", test_tile_print, devices=devices, check_output=False)
1512
+ # add_function_test(TestTile, "test_tile_inplace", test_tile_inplace, devices=devices)
1513
+ # add_function_test(TestTile, "test_tile_astype", test_tile_astype, devices=devices)
1514
+ # add_function_test(TestTile, "test_tile_func_return", test_tile_func_return, devices=devices)
1515
+
1516
+
1517
+ if __name__ == "__main__":
1518
+ wp.clear_kernel_cache()
1519
+ unittest.main(verbosity=2, failfast=True)