warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/fem/utils.py ADDED
@@ -0,0 +1,495 @@
1
+ from typing import Any, Tuple
2
+
3
+ import numpy as np
4
+
5
+ import warp as wp
6
+ from warp.fem.cache import (
7
+ Temporary,
8
+ TemporaryStore,
9
+ borrow_temporary,
10
+ borrow_temporary_like,
11
+ )
12
+ from warp.utils import array_scan, radix_sort_pairs, runlength_encode
13
+
14
+
15
+ @wp.func
16
+ def generalized_outer(x: Any, y: Any):
17
+ """Generalized outer product allowing for the first argument to be a scalar"""
18
+ return wp.outer(x, y)
19
+
20
+
21
+ @wp.func
22
+ def generalized_outer(x: wp.float32, y: wp.vec2):
23
+ return x * y
24
+
25
+
26
+ @wp.func
27
+ def generalized_outer(x: wp.float32, y: wp.vec3):
28
+ return x * y
29
+
30
+
31
+ @wp.func
32
+ def generalized_inner(x: Any, y: Any):
33
+ """Generalized inner product allowing for the first argument to be a tensor"""
34
+ return wp.dot(x, y)
35
+
36
+
37
+ @wp.func
38
+ def generalized_inner(x: wp.mat22, y: wp.vec2):
39
+ return x[0] * y[0] + x[1] * y[1]
40
+
41
+
42
+ @wp.func
43
+ def generalized_inner(x: wp.mat33, y: wp.vec3):
44
+ return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
45
+
46
+
47
+ @wp.func
48
+ def apply_right(x: Any, y: Any):
49
+ """Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
50
+ Will be removed once native @ operator is implemented.
51
+ """
52
+ return x * y
53
+
54
+
55
+ @wp.func
56
+ def apply_right(x: wp.vec2, y: wp.mat22):
57
+ return x[0] * y[0] + x[1] * y[1]
58
+
59
+
60
+ @wp.func
61
+ def apply_right(x: wp.vec3, y: wp.mat33):
62
+ return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
63
+
64
+
65
+ @wp.func
66
+ def unit_element(template_type: Any, coord: int):
67
+ """Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
68
+
69
+ t = type(template_type)(0.0)
70
+ t[coord] = 1.0
71
+ return t
72
+
73
+
74
+ @wp.func
75
+ def unit_element(template_type: wp.float32, coord: int):
76
+ return 1.0
77
+
78
+
79
+ @wp.func
80
+ def unit_element(template_type: wp.mat22, coord: int):
81
+ t = wp.mat22(0.0)
82
+ row = coord // 2
83
+ col = coord - 2 * row
84
+ t[row, col] = 1.0
85
+ return t
86
+
87
+
88
+ @wp.func
89
+ def unit_element(template_type: wp.mat33, coord: int):
90
+ t = wp.mat33(0.0)
91
+ row = coord // 3
92
+ col = coord - 3 * row
93
+ t[row, col] = 1.0
94
+ return t
95
+
96
+
97
+ @wp.func
98
+ def symmetric_part(x: Any):
99
+ """Symmetric part of a square tensor"""
100
+ return 0.5 * (x + wp.transpose(x))
101
+
102
+
103
+ @wp.func
104
+ def skew_part(x: wp.mat22):
105
+ """Skew part of a 2x2 tensor as corresponding rotation angle"""
106
+ return 0.5 * (x[1, 0] - x[0, 1])
107
+
108
+
109
+ @wp.func
110
+ def skew_part(x: wp.mat33):
111
+ """Skew part of a 3x3 tensor as the corresponding rotation vector"""
112
+ a = 0.5 * (x[2, 1] - x[1, 2])
113
+ b = 0.5 * (x[0, 2] - x[2, 0])
114
+ c = 0.5 * (x[1, 0] - x[0, 1])
115
+ return wp.vec3(a, b, c)
116
+
117
+
118
+ def compress_node_indices(
119
+ node_count: int, node_indices: wp.array(dtype=int), temporary_store: TemporaryStore = None
120
+ ) -> Tuple[Temporary, Temporary, int, Temporary]:
121
+ """
122
+ Compress an unsorted list of node indices into:
123
+ - a node_offsets array, giving for each node the start offset of corresponding indices in sorted_array_indices
124
+ - a sorted_array_indices array, listing the indices in the input array corresponding to each node
125
+ - the number of unique node indices
126
+ - a unique_node_indices array containing the sorted list of unique node indices (i.e. the list of indices i for which node_offsets[i] < node_offsets[i+1])
127
+ """
128
+
129
+ index_count = node_indices.size
130
+
131
+ sorted_node_indices_temp = borrow_temporary(
132
+ temporary_store, shape=2 * index_count, dtype=int, device=node_indices.device
133
+ )
134
+ sorted_array_indices_temp = borrow_temporary_like(sorted_node_indices_temp, temporary_store)
135
+
136
+ sorted_node_indices = sorted_node_indices_temp.array
137
+ sorted_array_indices = sorted_array_indices_temp.array
138
+
139
+ wp.copy(dest=sorted_node_indices, src=node_indices, count=index_count)
140
+
141
+ indices_per_element = 1 if node_indices.ndim == 1 else node_indices.shape[-1]
142
+ wp.launch(
143
+ kernel=_iota_kernel,
144
+ dim=index_count,
145
+ inputs=[sorted_array_indices, indices_per_element],
146
+ device=sorted_array_indices.device,
147
+ )
148
+
149
+ # Sort indices
150
+ radix_sort_pairs(sorted_node_indices, sorted_array_indices, count=index_count)
151
+
152
+ # Build prefix sum of number of elements per node
153
+ unique_node_indices_temp = borrow_temporary(
154
+ temporary_store, shape=index_count, dtype=int, device=node_indices.device
155
+ )
156
+ node_element_counts_temp = borrow_temporary(
157
+ temporary_store, shape=index_count, dtype=int, device=node_indices.device
158
+ )
159
+
160
+ unique_node_indices = unique_node_indices_temp.array
161
+ node_element_counts = node_element_counts_temp.array
162
+
163
+ unique_node_count_dev = borrow_temporary(temporary_store, shape=(1,), dtype=int, device=sorted_node_indices.device)
164
+ runlength_encode(
165
+ sorted_node_indices,
166
+ unique_node_indices,
167
+ node_element_counts,
168
+ value_count=index_count,
169
+ run_count=unique_node_count_dev.array,
170
+ )
171
+
172
+ # Transfer unique node count to host
173
+ if node_indices.device.is_cuda:
174
+ unique_node_count_host = borrow_temporary(temporary_store, shape=(1,), dtype=int, pinned=True, device="cpu")
175
+ wp.copy(src=unique_node_count_dev.array, dest=unique_node_count_host.array, count=1)
176
+ wp.synchronize_stream(wp.get_stream(node_indices.device))
177
+ unique_node_count_dev.release()
178
+ unique_node_count = int(unique_node_count_host.array.numpy()[0])
179
+ unique_node_count_host.release()
180
+ else:
181
+ unique_node_count = int(unique_node_count_dev.array.numpy()[0])
182
+ unique_node_count_dev.release()
183
+
184
+ # Scatter seen run counts to global array of element count per node
185
+ node_offsets_temp = borrow_temporary(
186
+ temporary_store, shape=(node_count + 1), device=node_element_counts.device, dtype=int
187
+ )
188
+ node_offsets = node_offsets_temp.array
189
+
190
+ node_offsets.zero_()
191
+ wp.launch(
192
+ kernel=_scatter_node_counts,
193
+ dim=unique_node_count,
194
+ inputs=[node_element_counts, unique_node_indices, node_offsets],
195
+ device=node_offsets.device,
196
+ )
197
+
198
+ # Prefix sum of number of elements per node
199
+ array_scan(node_offsets, node_offsets, inclusive=True)
200
+
201
+ sorted_node_indices_temp.release()
202
+ node_element_counts_temp.release()
203
+
204
+ return node_offsets_temp, sorted_array_indices_temp, unique_node_count, unique_node_indices_temp
205
+
206
+
207
+ def masked_indices(
208
+ mask: wp.array, missing_index=-1, temporary_store: TemporaryStore = None
209
+ ) -> Tuple[Temporary, Temporary]:
210
+ """
211
+ From an array of boolean masks (must be either 0 or 1), returns:
212
+ - The list of indices for which the mask is 1
213
+ - A map associating to each element of the input mask array its local index if non-zero, or missing_index if zero.
214
+ """
215
+
216
+ offsets_temp = borrow_temporary_like(mask, temporary_store)
217
+ offsets = offsets_temp.array
218
+
219
+ wp.utils.array_scan(mask, offsets, inclusive=True)
220
+
221
+ # Get back total counts on host
222
+ if offsets.device.is_cuda:
223
+ masked_count_temp = borrow_temporary(temporary_store, shape=1, dtype=int, pinned=True, device="cpu")
224
+ wp.copy(dest=masked_count_temp.array, src=offsets, src_offset=offsets.shape[0] - 1, count=1)
225
+ wp.synchronize_stream(wp.get_stream(offsets.device))
226
+ masked_count = int(masked_count_temp.array.numpy()[0])
227
+ masked_count_temp.release()
228
+ else:
229
+ masked_count = int(offsets.numpy()[-1])
230
+
231
+ # Convert counts to indices
232
+ indices_temp = borrow_temporary(temporary_store, shape=masked_count, device=mask.device, dtype=int)
233
+
234
+ wp.launch(
235
+ kernel=_masked_indices_kernel,
236
+ dim=offsets.shape,
237
+ inputs=[missing_index, mask, offsets, indices_temp.array, offsets],
238
+ device=mask.device,
239
+ )
240
+
241
+ return indices_temp, offsets_temp
242
+
243
+
244
+ def array_axpy(x: wp.array, y: wp.array, alpha: float = 1.0, beta: float = 1.0):
245
+ """Performs y = alpha*x + beta*y"""
246
+
247
+ dtype = wp.types.type_scalar_type(x.dtype)
248
+
249
+ alpha = dtype(alpha)
250
+ beta = dtype(beta)
251
+
252
+ if not wp.types.types_equal(x.dtype, y.dtype) or x.shape != y.shape or x.device != y.device:
253
+ raise ValueError("x and y arrays must have same dat atype, shape and device")
254
+
255
+ wp.launch(kernel=_array_axpy_kernel, dim=x.shape, device=x.device, inputs=[x, y, alpha, beta])
256
+
257
+
258
+ @wp.kernel
259
+ def _iota_kernel(indices: wp.array(dtype=int), divisor: int):
260
+ indices[wp.tid()] = wp.tid() // divisor
261
+
262
+
263
+ @wp.kernel
264
+ def _scatter_node_counts(
265
+ unique_counts: wp.array(dtype=int), unique_node_indices: wp.array(dtype=int), node_counts: wp.array(dtype=int)
266
+ ):
267
+ i = wp.tid()
268
+ node_counts[1 + unique_node_indices[i]] = unique_counts[i]
269
+
270
+
271
+ @wp.kernel
272
+ def _masked_indices_kernel(
273
+ missing_index: int,
274
+ mask: wp.array(dtype=int),
275
+ offsets: wp.array(dtype=int),
276
+ masked_to_global: wp.array(dtype=int),
277
+ global_to_masked: wp.array(dtype=int),
278
+ ):
279
+ i = wp.tid()
280
+
281
+ if mask[i] == 0:
282
+ global_to_masked[i] = missing_index
283
+ else:
284
+ masked_idx = offsets[i] - 1
285
+ global_to_masked[i] = masked_idx
286
+ masked_to_global[masked_idx] = i
287
+
288
+
289
+ @wp.kernel
290
+ def _array_axpy_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any), alpha: Any, beta: Any):
291
+ i = wp.tid()
292
+ y[i] = beta * y[i] + alpha * x[i]
293
+
294
+
295
+ def grid_to_tris(Nx: int, Ny: int):
296
+ """Constructs a triangular mesh topology by dividing each cell of a dense 2D grid into two triangles.
297
+
298
+ The resulting triangles will be oriented counter-clockwise assuming that `y` is the fastest moving index direction
299
+
300
+ Args:
301
+ Nx: Resolution of the grid along `x` dimension
302
+ Ny: Resolution of the grid along `y` dimension
303
+
304
+ Returns:
305
+ Array of shape (2 * Nx * Ny, 3) containing vertex indices for each triangle
306
+ """
307
+
308
+ cx, cy = np.meshgrid(np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), indexing="ij")
309
+
310
+ vidx = np.transpose(
311
+ np.array(
312
+ [
313
+ (Ny + 1) * cx + cy,
314
+ (Ny + 1) * (cx + 1) + cy,
315
+ (Ny + 1) * (cx + 1) + (cy + 1),
316
+ (Ny + 1) * cx + cy,
317
+ (Ny + 1) * (cx + 1) + (cy + 1),
318
+ (Ny + 1) * (cx) + (cy + 1),
319
+ ]
320
+ )
321
+ ).reshape((-1, 3))
322
+
323
+ return vidx
324
+
325
+
326
+ def grid_to_tets(Nx: int, Ny: int, Nz: int):
327
+ """Constructs a tetrahedral mesh topology by diving each cell of a dense 3D grid into five tetrahedrons
328
+
329
+ The resulting tets have positive volume assuming that `z` is the fastest moving index direction
330
+
331
+ Args:
332
+ Nx: Resolution of the grid along `x` dimension
333
+ Ny: Resolution of the grid along `y` dimension
334
+ Nz: Resolution of the grid along `z` dimension
335
+
336
+ Returns:
337
+ Array of shape (5 * Nx * Ny * Nz, 4) containing vertex indices for each tet
338
+ """
339
+
340
+ # Global node indices for each cell
341
+ cx, cy, cz = np.meshgrid(
342
+ np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), np.arange(Nz, dtype=int), indexing="ij"
343
+ )
344
+
345
+ grid_vidx = np.array(
346
+ [
347
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz,
348
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz + 1,
349
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz,
350
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz + 1,
351
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz,
352
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz + 1,
353
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz,
354
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz + 1,
355
+ ]
356
+ )
357
+
358
+ # decompose grid cells into 5 tets
359
+ tet_vidx = np.array(
360
+ [
361
+ [0, 1, 2, 4],
362
+ [3, 2, 1, 7],
363
+ [5, 1, 7, 4],
364
+ [6, 7, 4, 2],
365
+ [4, 1, 2, 7],
366
+ ]
367
+ )
368
+
369
+ # Convert to 3d index coordinates
370
+ vidx_coords = np.array(
371
+ [
372
+ [0, 0, 0],
373
+ [0, 0, 1],
374
+ [0, 1, 0],
375
+ [0, 1, 1],
376
+ [1, 0, 0],
377
+ [1, 0, 1],
378
+ [1, 1, 0],
379
+ [1, 1, 1],
380
+ ]
381
+ )
382
+ tet_coords = vidx_coords[tet_vidx]
383
+
384
+ # Symmetry bits for each cell
385
+ ox, oy, oz = np.meshgrid(
386
+ np.arange(Nx, dtype=int) % 2, np.arange(Ny, dtype=int) % 2, np.arange(Nz, dtype=int) % 2, indexing="ij"
387
+ )
388
+ tet_coords = np.broadcast_to(tet_coords, shape=(*ox.shape, *tet_coords.shape))
389
+
390
+ # Flip coordinates according to symmetry
391
+ ox_bk = np.broadcast_to(ox.reshape(*ox.shape, 1, 1), tet_coords.shape[:-1])
392
+ oy_bk = np.broadcast_to(oy.reshape(*oy.shape, 1, 1), tet_coords.shape[:-1])
393
+ oz_bk = np.broadcast_to(oz.reshape(*oz.shape, 1, 1), tet_coords.shape[:-1])
394
+
395
+ tet_coords_x = tet_coords[..., 0] ^ ox_bk
396
+ tet_coords_y = tet_coords[..., 1] ^ oy_bk
397
+ tet_coords_z = tet_coords[..., 2] ^ oz_bk
398
+
399
+ # Back to local vertex indices
400
+ corner_indices = 4 * tet_coords_x + 2 * tet_coords_y + tet_coords_z
401
+
402
+ # Now go from cell-local to global node indices
403
+ # There must be a nicer way than this, but for small grids this works
404
+
405
+ corner_indices = corner_indices.reshape(-1, 4)
406
+
407
+ grid_vidx = grid_vidx.reshape((8, -1, 1))
408
+ grid_vidx = np.broadcast_to(grid_vidx, shape=(8, grid_vidx.shape[1], 5))
409
+ grid_vidx = grid_vidx.reshape((8, -1))
410
+
411
+ node_indices = np.arange(corner_indices.shape[0])
412
+ tet_grid_vidx = np.transpose(
413
+ [
414
+ grid_vidx[corner_indices[:, 0], node_indices],
415
+ grid_vidx[corner_indices[:, 1], node_indices],
416
+ grid_vidx[corner_indices[:, 2], node_indices],
417
+ grid_vidx[corner_indices[:, 3], node_indices],
418
+ ]
419
+ )
420
+
421
+ return tet_grid_vidx
422
+
423
+
424
+ def grid_to_quads(Nx: int, Ny: int):
425
+ """Constructs a quadrilateral mesh topology from a dense 2D grid
426
+
427
+ The resulting quads will be indexed counter-clockwise
428
+
429
+ Args:
430
+ Nx: Resolution of the grid along `x` dimension
431
+ Ny: Resolution of the grid along `y` dimension
432
+
433
+ Returns:
434
+ Array of shape (Nx * Ny, 4) containing vertex indices for each quadrilateral
435
+ """
436
+
437
+ quad_vtx = np.array(
438
+ [
439
+ [0, 0],
440
+ [1, 0],
441
+ [1, 1],
442
+ [0, 1],
443
+ ]
444
+ ).T
445
+
446
+ quads = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), indexing="ij"))
447
+
448
+ quads_vtx_shape = (*quads.shape, quad_vtx.shape[1])
449
+ quads_vtx = np.broadcast_to(quads.reshape(*quads.shape, 1), quads_vtx_shape) + np.broadcast_to(
450
+ quad_vtx.reshape(2, 1, 1, quad_vtx.shape[1]), quads_vtx_shape
451
+ )
452
+
453
+ quad_vtx_indices = quads_vtx[0] * (Ny + 1) + quads_vtx[1]
454
+
455
+ return quad_vtx_indices.reshape(-1, 4)
456
+
457
+
458
+ def grid_to_hexes(Nx: int, Ny: int, Nz: int):
459
+ """Constructs a hexahedral mesh topology from a dense 3D grid
460
+
461
+ The resulting hexes will be indexed following usual convention assuming that `z` is the fastest moving index direction
462
+ (counter-clockwise bottom vertices, then counter-clockwise top vertices)
463
+
464
+ Args:
465
+ Nx: Resolution of the grid along `x` dimension
466
+ Ny: Resolution of the grid along `y` dimension
467
+ Nz: Resolution of the grid along `z` dimension
468
+
469
+ Returns:
470
+ Array of shape (Nx * Ny * Nz, 8) containing vertex indices for each hexaedron
471
+ """
472
+
473
+ hex_vtx = np.array(
474
+ [
475
+ [0, 0, 0],
476
+ [1, 0, 0],
477
+ [1, 1, 0],
478
+ [0, 1, 0],
479
+ [0, 0, 1],
480
+ [1, 0, 1],
481
+ [1, 1, 1],
482
+ [0, 1, 1],
483
+ ]
484
+ ).T
485
+
486
+ hexes = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), np.arange(0, Nz), indexing="ij"))
487
+
488
+ hexes_vtx_shape = (*hexes.shape, hex_vtx.shape[1])
489
+ hexes_vtx = np.broadcast_to(hexes.reshape(*hexes.shape, 1), hexes_vtx_shape) + np.broadcast_to(
490
+ hex_vtx.reshape(3, 1, 1, 1, hex_vtx.shape[1]), hexes_vtx_shape
491
+ )
492
+
493
+ hexes_vtx_indices = hexes_vtx[0] * (Nz + 1) * (Ny + 1) + hexes_vtx[1] * (Nz + 1) + hexes_vtx[2]
494
+
495
+ return hexes_vtx_indices.reshape(-1, 8)