warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/fem/utils.py ADDED
@@ -0,0 +1,495 @@
1
+ from typing import Any, Tuple
2
+
3
+ import numpy as np
4
+
5
+ import warp as wp
6
+ from warp.fem.cache import (
7
+ Temporary,
8
+ TemporaryStore,
9
+ borrow_temporary,
10
+ borrow_temporary_like,
11
+ )
12
+ from warp.utils import array_scan, radix_sort_pairs, runlength_encode
13
+
14
+
15
+ @wp.func
16
+ def generalized_outer(x: Any, y: Any):
17
+ """Generalized outer product allowing for the first argument to be a scalar"""
18
+ return wp.outer(x, y)
19
+
20
+
21
+ @wp.func
22
+ def generalized_outer(x: wp.float32, y: wp.vec2):
23
+ return x * y
24
+
25
+
26
+ @wp.func
27
+ def generalized_outer(x: wp.float32, y: wp.vec3):
28
+ return x * y
29
+
30
+
31
+ @wp.func
32
+ def generalized_inner(x: Any, y: Any):
33
+ """Generalized inner product allowing for the first argument to be a tensor"""
34
+ return wp.dot(x, y)
35
+
36
+
37
+ @wp.func
38
+ def generalized_inner(x: wp.mat22, y: wp.vec2):
39
+ return x[0] * y[0] + x[1] * y[1]
40
+
41
+
42
+ @wp.func
43
+ def generalized_inner(x: wp.mat33, y: wp.vec3):
44
+ return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
45
+
46
+
47
+ @wp.func
48
+ def apply_right(x: Any, y: Any):
49
+ """Performs x y multiplication with y a square matrix and x either a row-vector or a matrix.
50
+ Will be removed once native @ operator is implemented.
51
+ """
52
+ return x * y
53
+
54
+
55
+ @wp.func
56
+ def apply_right(x: wp.vec2, y: wp.mat22):
57
+ return x[0] * y[0] + x[1] * y[1]
58
+
59
+
60
+ @wp.func
61
+ def apply_right(x: wp.vec3, y: wp.mat33):
62
+ return x[0] * y[0] + x[1] * y[1] + x[2] * y[2]
63
+
64
+
65
+ @wp.func
66
+ def unit_element(template_type: Any, coord: int):
67
+ """Returns a instance of `template_type` with a single coordinate set to 1 in the canonical basis"""
68
+
69
+ t = type(template_type)(0.0)
70
+ t[coord] = 1.0
71
+ return t
72
+
73
+
74
+ @wp.func
75
+ def unit_element(template_type: wp.float32, coord: int):
76
+ return 1.0
77
+
78
+
79
+ @wp.func
80
+ def unit_element(template_type: wp.mat22, coord: int):
81
+ t = wp.mat22(0.0)
82
+ row = coord // 2
83
+ col = coord - 2 * row
84
+ t[row, col] = 1.0
85
+ return t
86
+
87
+
88
+ @wp.func
89
+ def unit_element(template_type: wp.mat33, coord: int):
90
+ t = wp.mat33(0.0)
91
+ row = coord // 3
92
+ col = coord - 3 * row
93
+ t[row, col] = 1.0
94
+ return t
95
+
96
+
97
+ @wp.func
98
+ def symmetric_part(x: Any):
99
+ """Symmetric part of a square tensor"""
100
+ return 0.5 * (x + wp.transpose(x))
101
+
102
+
103
+ @wp.func
104
+ def skew_part(x: wp.mat22):
105
+ """Skew part of a 2x2 tensor as corresponding rotation angle"""
106
+ return 0.5 * (x[1, 0] - x[0, 1])
107
+
108
+
109
+ @wp.func
110
+ def skew_part(x: wp.mat33):
111
+ """Skew part of a 3x3 tensor as the corresponding rotation vector"""
112
+ a = 0.5 * (x[2, 1] - x[1, 2])
113
+ b = 0.5 * (x[0, 2] - x[2, 0])
114
+ c = 0.5 * (x[1, 0] - x[0, 1])
115
+ return wp.vec3(a, b, c)
116
+
117
+
118
+ def compress_node_indices(
119
+ node_count: int, node_indices: wp.array(dtype=int), temporary_store: TemporaryStore = None
120
+ ) -> Tuple[Temporary, Temporary, int, Temporary]:
121
+ """
122
+ Compress an unsorted list of node indices into:
123
+ - a node_offsets array, giving for each node the start offset of corresponding indices in sorted_array_indices
124
+ - a sorted_array_indices array, listing the indices in the input array corresponding to each node
125
+ - the number of unique node indices
126
+ - a unique_node_indices array containing the sorted list of unique node indices (i.e. the list of indices i for which node_offsets[i] < node_offsets[i+1])
127
+ """
128
+
129
+ index_count = node_indices.size
130
+
131
+ sorted_node_indices_temp = borrow_temporary(
132
+ temporary_store, shape=2 * index_count, dtype=int, device=node_indices.device
133
+ )
134
+ sorted_array_indices_temp = borrow_temporary_like(sorted_node_indices_temp, temporary_store)
135
+
136
+ sorted_node_indices = sorted_node_indices_temp.array
137
+ sorted_array_indices = sorted_array_indices_temp.array
138
+
139
+ wp.copy(dest=sorted_node_indices, src=node_indices, count=index_count)
140
+
141
+ indices_per_element = 1 if node_indices.ndim == 1 else node_indices.shape[-1]
142
+ wp.launch(
143
+ kernel=_iota_kernel,
144
+ dim=index_count,
145
+ inputs=[sorted_array_indices, indices_per_element],
146
+ device=sorted_array_indices.device,
147
+ )
148
+
149
+ # Sort indices
150
+ radix_sort_pairs(sorted_node_indices, sorted_array_indices, count=index_count)
151
+
152
+ # Build prefix sum of number of elements per node
153
+ unique_node_indices_temp = borrow_temporary(
154
+ temporary_store, shape=index_count, dtype=int, device=node_indices.device
155
+ )
156
+ node_element_counts_temp = borrow_temporary(
157
+ temporary_store, shape=index_count, dtype=int, device=node_indices.device
158
+ )
159
+
160
+ unique_node_indices = unique_node_indices_temp.array
161
+ node_element_counts = node_element_counts_temp.array
162
+
163
+ unique_node_count_dev = borrow_temporary(temporary_store, shape=(1,), dtype=int, device=sorted_node_indices.device)
164
+ runlength_encode(
165
+ sorted_node_indices,
166
+ unique_node_indices,
167
+ node_element_counts,
168
+ value_count=index_count,
169
+ run_count=unique_node_count_dev.array,
170
+ )
171
+
172
+ # Transfer unique node count to host
173
+ if node_indices.device.is_cuda:
174
+ unique_node_count_host = borrow_temporary(temporary_store, shape=(1,), dtype=int, pinned=True, device="cpu")
175
+ wp.copy(src=unique_node_count_dev.array, dest=unique_node_count_host.array, count=1)
176
+ wp.synchronize_stream(wp.get_stream(node_indices.device))
177
+ unique_node_count_dev.release()
178
+ unique_node_count = int(unique_node_count_host.array.numpy()[0])
179
+ unique_node_count_host.release()
180
+ else:
181
+ unique_node_count = int(unique_node_count_dev.array.numpy()[0])
182
+ unique_node_count_dev.release()
183
+
184
+ # Scatter seen run counts to global array of element count per node
185
+ node_offsets_temp = borrow_temporary(
186
+ temporary_store, shape=(node_count + 1), device=node_element_counts.device, dtype=int
187
+ )
188
+ node_offsets = node_offsets_temp.array
189
+
190
+ node_offsets.zero_()
191
+ wp.launch(
192
+ kernel=_scatter_node_counts,
193
+ dim=unique_node_count,
194
+ inputs=[node_element_counts, unique_node_indices, node_offsets],
195
+ device=node_offsets.device,
196
+ )
197
+
198
+ # Prefix sum of number of elements per node
199
+ array_scan(node_offsets, node_offsets, inclusive=True)
200
+
201
+ sorted_node_indices_temp.release()
202
+ node_element_counts_temp.release()
203
+
204
+ return node_offsets_temp, sorted_array_indices_temp, unique_node_count, unique_node_indices_temp
205
+
206
+
207
+ def masked_indices(
208
+ mask: wp.array, missing_index=-1, temporary_store: TemporaryStore = None
209
+ ) -> Tuple[Temporary, Temporary]:
210
+ """
211
+ From an array of boolean masks (must be either 0 or 1), returns:
212
+ - The list of indices for which the mask is 1
213
+ - A map associating to each element of the input mask array its local index if non-zero, or missing_index if zero.
214
+ """
215
+
216
+ offsets_temp = borrow_temporary_like(mask, temporary_store)
217
+ offsets = offsets_temp.array
218
+
219
+ wp.utils.array_scan(mask, offsets, inclusive=True)
220
+
221
+ # Get back total counts on host
222
+ if offsets.device.is_cuda:
223
+ masked_count_temp = borrow_temporary(temporary_store, shape=1, dtype=int, pinned=True, device="cpu")
224
+ wp.copy(dest=masked_count_temp.array, src=offsets, src_offset=offsets.shape[0] - 1, count=1)
225
+ wp.synchronize_stream(wp.get_stream(offsets.device))
226
+ masked_count = int(masked_count_temp.array.numpy()[0])
227
+ masked_count_temp.release()
228
+ else:
229
+ masked_count = int(offsets.numpy()[-1])
230
+
231
+ # Convert counts to indices
232
+ indices_temp = borrow_temporary(temporary_store, shape=masked_count, device=mask.device, dtype=int)
233
+
234
+ wp.launch(
235
+ kernel=_masked_indices_kernel,
236
+ dim=offsets.shape,
237
+ inputs=[missing_index, mask, offsets, indices_temp.array, offsets],
238
+ device=mask.device,
239
+ )
240
+
241
+ return indices_temp, offsets_temp
242
+
243
+
244
+ def array_axpy(x: wp.array, y: wp.array, alpha: float = 1.0, beta: float = 1.0):
245
+ """Performs y = alpha*x + beta*y"""
246
+
247
+ dtype = wp.types.type_scalar_type(x.dtype)
248
+
249
+ alpha = dtype(alpha)
250
+ beta = dtype(beta)
251
+
252
+ if not wp.types.types_equal(x.dtype, y.dtype) or x.shape != y.shape or x.device != y.device:
253
+ raise ValueError("x and y arrays must have same dat atype, shape and device")
254
+
255
+ wp.launch(kernel=_array_axpy_kernel, dim=x.shape, device=x.device, inputs=[x, y, alpha, beta])
256
+
257
+
258
+ @wp.kernel
259
+ def _iota_kernel(indices: wp.array(dtype=int), divisor: int):
260
+ indices[wp.tid()] = wp.tid() // divisor
261
+
262
+
263
+ @wp.kernel
264
+ def _scatter_node_counts(
265
+ unique_counts: wp.array(dtype=int), unique_node_indices: wp.array(dtype=int), node_counts: wp.array(dtype=int)
266
+ ):
267
+ i = wp.tid()
268
+ node_counts[1 + unique_node_indices[i]] = unique_counts[i]
269
+
270
+
271
+ @wp.kernel
272
+ def _masked_indices_kernel(
273
+ missing_index: int,
274
+ mask: wp.array(dtype=int),
275
+ offsets: wp.array(dtype=int),
276
+ masked_to_global: wp.array(dtype=int),
277
+ global_to_masked: wp.array(dtype=int),
278
+ ):
279
+ i = wp.tid()
280
+
281
+ if mask[i] == 0:
282
+ global_to_masked[i] = missing_index
283
+ else:
284
+ masked_idx = offsets[i] - 1
285
+ global_to_masked[i] = masked_idx
286
+ masked_to_global[masked_idx] = i
287
+
288
+
289
+ @wp.kernel
290
+ def _array_axpy_kernel(x: wp.array(dtype=Any), y: wp.array(dtype=Any), alpha: Any, beta: Any):
291
+ i = wp.tid()
292
+ y[i] = beta * y[i] + alpha * x[i]
293
+
294
+
295
+ def grid_to_tris(Nx: int, Ny: int):
296
+ """Constructs a triangular mesh topology by dividing each cell of a dense 2D grid into two triangles.
297
+
298
+ The resulting triangles will be oriented counter-clockwise assuming that `y` is the fastest moving index direction
299
+
300
+ Args:
301
+ Nx: Resolution of the grid along `x` dimension
302
+ Ny: Resolution of the grid along `y` dimension
303
+
304
+ Returns:
305
+ Array of shape (2 * Nx * Ny, 3) containing vertex indices for each triangle
306
+ """
307
+
308
+ cx, cy = np.meshgrid(np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), indexing="ij")
309
+
310
+ vidx = np.transpose(
311
+ np.array(
312
+ [
313
+ (Ny + 1) * cx + cy,
314
+ (Ny + 1) * (cx + 1) + cy,
315
+ (Ny + 1) * (cx + 1) + (cy + 1),
316
+ (Ny + 1) * cx + cy,
317
+ (Ny + 1) * (cx + 1) + (cy + 1),
318
+ (Ny + 1) * (cx) + (cy + 1),
319
+ ]
320
+ )
321
+ ).reshape((-1, 3))
322
+
323
+ return vidx
324
+
325
+
326
+ def grid_to_tets(Nx: int, Ny: int, Nz: int):
327
+ """Constructs a tetrahedral mesh topology by diving each cell of a dense 3D grid into five tetrahedrons
328
+
329
+ The resulting tets have positive volume assuming that `z` is the fastest moving index direction
330
+
331
+ Args:
332
+ Nx: Resolution of the grid along `x` dimension
333
+ Ny: Resolution of the grid along `y` dimension
334
+ Nz: Resolution of the grid along `z` dimension
335
+
336
+ Returns:
337
+ Array of shape (5 * Nx * Ny * Nz, 4) containing vertex indices for each tet
338
+ """
339
+
340
+ # Global node indices for each cell
341
+ cx, cy, cz = np.meshgrid(
342
+ np.arange(Nx, dtype=int), np.arange(Ny, dtype=int), np.arange(Nz, dtype=int), indexing="ij"
343
+ )
344
+
345
+ grid_vidx = np.array(
346
+ [
347
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz,
348
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * cy + cz + 1,
349
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz,
350
+ (Ny + 1) * (Nz + 1) * cx + (Nz + 1) * (cy + 1) + cz + 1,
351
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz,
352
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * cy + cz + 1,
353
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz,
354
+ (Ny + 1) * (Nz + 1) * (cx + 1) + (Nz + 1) * (cy + 1) + cz + 1,
355
+ ]
356
+ )
357
+
358
+ # decompose grid cells into 5 tets
359
+ tet_vidx = np.array(
360
+ [
361
+ [0, 1, 2, 4],
362
+ [3, 2, 1, 7],
363
+ [5, 1, 7, 4],
364
+ [6, 7, 4, 2],
365
+ [4, 1, 2, 7],
366
+ ]
367
+ )
368
+
369
+ # Convert to 3d index coordinates
370
+ vidx_coords = np.array(
371
+ [
372
+ [0, 0, 0],
373
+ [0, 0, 1],
374
+ [0, 1, 0],
375
+ [0, 1, 1],
376
+ [1, 0, 0],
377
+ [1, 0, 1],
378
+ [1, 1, 0],
379
+ [1, 1, 1],
380
+ ]
381
+ )
382
+ tet_coords = vidx_coords[tet_vidx]
383
+
384
+ # Symmetry bits for each cell
385
+ ox, oy, oz = np.meshgrid(
386
+ np.arange(Nx, dtype=int) % 2, np.arange(Ny, dtype=int) % 2, np.arange(Nz, dtype=int) % 2, indexing="ij"
387
+ )
388
+ tet_coords = np.broadcast_to(tet_coords, shape=(*ox.shape, *tet_coords.shape))
389
+
390
+ # Flip coordinates according to symmetry
391
+ ox_bk = np.broadcast_to(ox.reshape(*ox.shape, 1, 1), tet_coords.shape[:-1])
392
+ oy_bk = np.broadcast_to(oy.reshape(*oy.shape, 1, 1), tet_coords.shape[:-1])
393
+ oz_bk = np.broadcast_to(oz.reshape(*oz.shape, 1, 1), tet_coords.shape[:-1])
394
+
395
+ tet_coords_x = tet_coords[..., 0] ^ ox_bk
396
+ tet_coords_y = tet_coords[..., 1] ^ oy_bk
397
+ tet_coords_z = tet_coords[..., 2] ^ oz_bk
398
+
399
+ # Back to local vertex indices
400
+ corner_indices = 4 * tet_coords_x + 2 * tet_coords_y + tet_coords_z
401
+
402
+ # Now go from cell-local to global node indices
403
+ # There must be a nicer way than this, but for small grids this works
404
+
405
+ corner_indices = corner_indices.reshape(-1, 4)
406
+
407
+ grid_vidx = grid_vidx.reshape((8, -1, 1))
408
+ grid_vidx = np.broadcast_to(grid_vidx, shape=(8, grid_vidx.shape[1], 5))
409
+ grid_vidx = grid_vidx.reshape((8, -1))
410
+
411
+ node_indices = np.arange(corner_indices.shape[0])
412
+ tet_grid_vidx = np.transpose(
413
+ [
414
+ grid_vidx[corner_indices[:, 0], node_indices],
415
+ grid_vidx[corner_indices[:, 1], node_indices],
416
+ grid_vidx[corner_indices[:, 2], node_indices],
417
+ grid_vidx[corner_indices[:, 3], node_indices],
418
+ ]
419
+ )
420
+
421
+ return tet_grid_vidx
422
+
423
+
424
+ def grid_to_quads(Nx: int, Ny: int):
425
+ """Constructs a quadrilateral mesh topology from a dense 2D grid
426
+
427
+ The resulting quads will be indexed counter-clockwise
428
+
429
+ Args:
430
+ Nx: Resolution of the grid along `x` dimension
431
+ Ny: Resolution of the grid along `y` dimension
432
+
433
+ Returns:
434
+ Array of shape (Nx * Ny, 4) containing vertex indices for each quadrilateral
435
+ """
436
+
437
+ quad_vtx = np.array(
438
+ [
439
+ [0, 0],
440
+ [1, 0],
441
+ [1, 1],
442
+ [0, 1],
443
+ ]
444
+ ).T
445
+
446
+ quads = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), indexing="ij"))
447
+
448
+ quads_vtx_shape = (*quads.shape, quad_vtx.shape[1])
449
+ quads_vtx = np.broadcast_to(quads.reshape(*quads.shape, 1), quads_vtx_shape) + np.broadcast_to(
450
+ quad_vtx.reshape(2, 1, 1, quad_vtx.shape[1]), quads_vtx_shape
451
+ )
452
+
453
+ quad_vtx_indices = quads_vtx[0] * (Ny + 1) + quads_vtx[1]
454
+
455
+ return quad_vtx_indices.reshape(-1, 4)
456
+
457
+
458
+ def grid_to_hexes(Nx: int, Ny: int, Nz: int):
459
+ """Constructs a hexahedral mesh topology from a dense 3D grid
460
+
461
+ The resulting hexes will be indexed following usual convention assuming that `z` is the fastest moving index direction
462
+ (counter-clockwise bottom vertices, then counter-clockwise top vertices)
463
+
464
+ Args:
465
+ Nx: Resolution of the grid along `x` dimension
466
+ Ny: Resolution of the grid along `y` dimension
467
+ Nz: Resolution of the grid along `z` dimension
468
+
469
+ Returns:
470
+ Array of shape (Nx * Ny * Nz, 8) containing vertex indices for each hexaedron
471
+ """
472
+
473
+ hex_vtx = np.array(
474
+ [
475
+ [0, 0, 0],
476
+ [1, 0, 0],
477
+ [1, 1, 0],
478
+ [0, 1, 0],
479
+ [0, 0, 1],
480
+ [1, 0, 1],
481
+ [1, 1, 1],
482
+ [0, 1, 1],
483
+ ]
484
+ ).T
485
+
486
+ hexes = np.stack(np.meshgrid(np.arange(0, Nx), np.arange(0, Ny), np.arange(0, Nz), indexing="ij"))
487
+
488
+ hexes_vtx_shape = (*hexes.shape, hex_vtx.shape[1])
489
+ hexes_vtx = np.broadcast_to(hexes.reshape(*hexes.shape, 1), hexes_vtx_shape) + np.broadcast_to(
490
+ hex_vtx.reshape(3, 1, 1, 1, hex_vtx.shape[1]), hexes_vtx_shape
491
+ )
492
+
493
+ hexes_vtx_indices = hexes_vtx[0] * (Nz + 1) * (Ny + 1) + hexes_vtx[1] * (Nz + 1) + hexes_vtx[2]
494
+
495
+ return hexes_vtx_indices.reshape(-1, 8)