warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/fem/cache.py ADDED
@@ -0,0 +1,389 @@
1
+ from typing import Callable, Optional, Union, Tuple, Dict, Any
2
+ from copy import copy
3
+ import bisect
4
+ import re
5
+
6
+
7
+ import warp as wp
8
+
9
+
10
+ _kernel_cache = dict()
11
+ _struct_cache = dict()
12
+ _func_cache = dict()
13
+
14
+ _key_re = re.compile("[^0-9a-zA-Z_]+")
15
+
16
+
17
+ def _make_key(obj, suffix: str, use_qualified_name):
18
+ base_name = f"{obj.__module__}.{obj.__qualname__}" if use_qualified_name else obj.__name__
19
+ return _key_re.sub("", f"{base_name}_{suffix}")
20
+
21
+
22
+ def get_func(func, suffix: str, use_qualified_name: bool = False):
23
+ key = _make_key(func, suffix, use_qualified_name)
24
+
25
+ if key not in _func_cache:
26
+ _func_cache[key] = wp.Function(
27
+ func=func,
28
+ key=key,
29
+ namespace="",
30
+ module=wp.get_module(
31
+ func.__module__,
32
+ ),
33
+ )
34
+
35
+ return _func_cache[key]
36
+
37
+
38
+ def dynamic_func(suffix: str, use_qualified_name=False):
39
+ def wrap_func(func: Callable):
40
+ return get_func(func, suffix=suffix, use_qualified_name=use_qualified_name)
41
+
42
+ return wrap_func
43
+
44
+
45
+ def get_kernel(
46
+ func,
47
+ suffix: str,
48
+ use_qualified_name: bool = False,
49
+ kernel_options: Dict[str, Any] = {},
50
+ ):
51
+ key = _make_key(func, suffix, use_qualified_name)
52
+
53
+ if key not in _kernel_cache:
54
+ # Avoid creating too long file names -- can lead to issues on Windows
55
+ # We could hash the key, but prefer to keep it human-readable
56
+ module_name = f"{func.__module__}.dyn.{key}"
57
+ module_name = module_name[:128] if len(module_name) > 128 else module_name
58
+ module = wp.get_module(module_name)
59
+ module.options = copy(wp.get_module(func.__module__).options)
60
+ module.options.update(kernel_options)
61
+ _kernel_cache[key] = wp.Kernel(func=func, key=key, module=module)
62
+ return _kernel_cache[key]
63
+
64
+
65
+ def dynamic_kernel(suffix: str, use_qualified_name=False, kernel_options: Dict[str, Any] = {}):
66
+ def wrap_kernel(func: Callable):
67
+ return get_kernel(func, suffix=suffix, use_qualified_name=use_qualified_name, kernel_options=kernel_options)
68
+
69
+ return wrap_kernel
70
+
71
+
72
+ def get_struct(struct: type, suffix: str, use_qualified_name: bool = False):
73
+ key = _make_key(struct, suffix, use_qualified_name)
74
+ # used in codegen
75
+ struct.__qualname__ = key
76
+
77
+ if key not in _struct_cache:
78
+ module = wp.get_module(struct.__module__)
79
+ _struct_cache[key] = wp.codegen.Struct(
80
+ cls=struct,
81
+ key=key,
82
+ module=module,
83
+ )
84
+
85
+ return _struct_cache[key]
86
+
87
+
88
+ def dynamic_struct(suffix: str, use_qualified_name=False):
89
+ def wrap_struct(struct: type):
90
+ return get_struct(struct, suffix=suffix, use_qualified_name=use_qualified_name)
91
+
92
+ return wrap_struct
93
+
94
+
95
+ def get_integrand_function(
96
+ integrand: "warp.fem.operator.Integrand",
97
+ suffix: str,
98
+ func=None,
99
+ annotations=None,
100
+ code_transformers=[],
101
+ ):
102
+ key = _make_key(integrand.func, suffix, use_qualified_name=True)
103
+
104
+ if key not in _func_cache:
105
+ _func_cache[key] = wp.Function(
106
+ func=integrand.func if func is None else func,
107
+ key=key,
108
+ namespace="",
109
+ module=integrand.module,
110
+ overloaded_annotations=annotations,
111
+ code_transformers=code_transformers,
112
+ )
113
+
114
+ return _func_cache[key]
115
+
116
+
117
+ def get_integrand_kernel(
118
+ integrand: "warp.fem.operator.Integrand",
119
+ suffix: str,
120
+ kernel_fn: Optional[Callable] = None,
121
+ kernel_options: Dict[str, Any] = {},
122
+ code_transformers=[],
123
+ ):
124
+ key = _make_key(integrand.func, suffix, use_qualified_name=True)
125
+
126
+ if key not in _kernel_cache:
127
+ if kernel_fn is None:
128
+ return None
129
+
130
+ module = wp.get_module(f"{integrand.module.name}.{integrand.name}")
131
+ module.options = copy(integrand.module.options)
132
+ module.options.update(kernel_options)
133
+
134
+ _kernel_cache[key] = wp.Kernel(func=kernel_fn, key=key, module=module, code_transformers=code_transformers)
135
+ return _kernel_cache[key]
136
+
137
+
138
+ def cached_arg_value(func: Callable):
139
+ """Decorator to be applied to member methods assembling Arg structs, so that the result gets
140
+ automatically cached for the lifetime of the parent object
141
+ """
142
+
143
+ cache_attr = f"_{func.__name__}_cache"
144
+
145
+ def get_arg(obj, device):
146
+ if not hasattr(obj, cache_attr):
147
+ setattr(obj, cache_attr, {})
148
+
149
+ cache = getattr(obj, cache_attr, {})
150
+
151
+ device = wp.get_device(device)
152
+ if device.ordinal not in cache:
153
+ cache[device.ordinal] = func(obj, device)
154
+
155
+ return cache[device.ordinal]
156
+
157
+ return get_arg
158
+
159
+
160
+ _cached_vec_types = {}
161
+ _cached_mat_types = {}
162
+
163
+
164
+ def cached_vec_type(length, dtype):
165
+ key = (length, dtype)
166
+ if key not in _cached_vec_types:
167
+ _cached_vec_types[key] = wp.vec(length=length, dtype=dtype)
168
+
169
+ return _cached_vec_types[key]
170
+
171
+
172
+ def cached_mat_type(shape, dtype):
173
+ key = (*shape, dtype)
174
+ if key not in _cached_mat_types:
175
+ _cached_mat_types[key] = wp.mat(shape=shape, dtype=dtype)
176
+
177
+ return _cached_mat_types[key]
178
+
179
+
180
+ class Temporary:
181
+ """Handle over a temporary array from a :class:`TemporaryStore`.
182
+
183
+ The array will be automatically returned to the temporary pool for reuse upon destruction of this object, unless
184
+ the temporary is explicitly detached from the pool using :meth:`detach`.
185
+ The temporary may also be explicitly returned to the pool before destruction using :meth:`release`.
186
+ """
187
+
188
+ def __init__(self, array: wp.array, pool: Optional["TemporaryStore.Pool"] = None, shape=None, dtype=None):
189
+ self._raw_array = array
190
+ self._array_view = array
191
+ self._pool = pool
192
+
193
+ if shape is not None or dtype is not None:
194
+ self._view_as(shape=shape, dtype=dtype)
195
+
196
+ def detach(self) -> wp.array:
197
+ """Detaches the temporary so it is never returned to the pool"""
198
+ if self._pool is not None:
199
+ self._pool.detach(self._raw_array)
200
+
201
+ self._pool = None
202
+ return self._array_view
203
+
204
+ def release(self):
205
+ """Returns the temporary array to the pool"""
206
+ if self._pool is not None:
207
+ self._pool.redeem(self._raw_array)
208
+
209
+ self._pool = None
210
+
211
+ @property
212
+ def array(self) -> wp.array:
213
+ """View of the array with desired shape and data type."""
214
+ return self._array_view
215
+
216
+ def _view_as(self, shape, dtype) -> "Temporary":
217
+ def _view_reshaped_truncated(array):
218
+ return wp.types.array(
219
+ ptr=array.ptr,
220
+ dtype=dtype,
221
+ shape=shape,
222
+ device=array.device,
223
+ pinned=array.pinned,
224
+ capacity=array.capacity,
225
+ copy=False,
226
+ owner=False,
227
+ grad=None if array.grad is None else _view_reshaped_truncated(array.grad),
228
+ )
229
+
230
+ self._array_view = _view_reshaped_truncated(self._raw_array)
231
+ return self
232
+
233
+ def __del__(self):
234
+ self.release()
235
+
236
+
237
+ class TemporaryStore:
238
+ """
239
+ Shared pool of temporary arrays that will be persisted and reused across invocations of ``warp.fem`` functions.
240
+
241
+ A :class:`TemporaryStore` instance may either be passed explicitly to ``warp.fem`` functions that accept such an argument, for instance :func:`.integrate.integrate`,
242
+ or can be set globally as the default store using :func:`set_default_temporary_store`.
243
+
244
+ By default, there is no default temporary store, so that temporary allocations are not persisted.
245
+ """
246
+
247
+ _default_store: "TemporaryStore" = None
248
+
249
+ class Pool:
250
+ def __init__(self, dtype, device, pinned: bool):
251
+ self.dtype = dtype
252
+ self.device = device
253
+ self.pinned = pinned
254
+
255
+ self._pool = [] # Currently available arrays for borrowing, ordered by size
256
+ self._pool_sizes = [] # Sizes of available arrays for borrowing, ascending
257
+ self._allocs = {} # All allocated arrays, including borrowed ones
258
+
259
+ def borrow(self, shape, dtype, requires_grad: bool):
260
+ size = 1
261
+ if isinstance(shape, int):
262
+ shape = (shape,)
263
+ for d in shape:
264
+ size *= d
265
+
266
+ index = bisect.bisect_left(
267
+ a=self._pool_sizes,
268
+ x=size,
269
+ )
270
+ if index < len(self._pool):
271
+ # Big enough array found, remove from pool
272
+ array = self._pool.pop(index)
273
+ self._pool_sizes.pop(index)
274
+ if requires_grad and array.grad is None:
275
+ array.requires_grad = True
276
+ return Temporary(pool=self, array=array, shape=shape, dtype=dtype)
277
+
278
+ # No big enough array found, allocate new one
279
+ if len(self._pool) > 0:
280
+ grow_factor = 1.5
281
+ size = max(int(self._pool_sizes[-1] * grow_factor), size)
282
+
283
+ array = wp.empty(
284
+ shape=(size,), dtype=self.dtype, pinned=self.pinned, device=self.device, requires_grad=requires_grad
285
+ )
286
+ self._allocs[array.ptr] = array
287
+ return Temporary(pool=self, array=array, shape=shape, dtype=dtype)
288
+
289
+ def redeem(self, array):
290
+ # Insert back array into available pool
291
+ index = bisect.bisect_left(
292
+ a=self._pool_sizes,
293
+ x=array.size,
294
+ )
295
+ self._pool.insert(index, array)
296
+ self._pool_sizes.insert(index, array.size)
297
+
298
+ def detach(self, array):
299
+ del self._allocs[array.ptr]
300
+
301
+ def __init__(self):
302
+ self.clear()
303
+
304
+ def clear(self):
305
+ self._temporaries = {}
306
+
307
+ def borrow(self, shape, dtype, pinned: bool = False, device=None, requires_grad: bool = False) -> Temporary:
308
+ dtype = wp.types.type_to_warp(dtype)
309
+ device = wp.get_device(device)
310
+
311
+ type_length = wp.types.type_length(dtype)
312
+ key = (dtype._type_, type_length, pinned, device.ordinal)
313
+
314
+ pool = self._temporaries.get(key, None)
315
+ if pool is None:
316
+ value_type = (
317
+ cached_vec_type(length=type_length, dtype=wp.types.type_scalar_type(dtype))
318
+ if type_length > 1
319
+ else dtype
320
+ )
321
+ pool = TemporaryStore.Pool(value_type, device, pinned=pinned)
322
+ self._temporaries[key] = pool
323
+
324
+ return pool.borrow(dtype=dtype, shape=shape, requires_grad=requires_grad)
325
+
326
+
327
+ def set_default_temporary_store(temporary_store: Optional[TemporaryStore]):
328
+ """Globally sets the default :class:`TemporaryStore` instance to use for temporary allocations in ``warp.fem`` functions.
329
+
330
+ If the default temporary store is set to ``None``, temporary allocations are not persisted unless a :class:`TemporaryStore` is provided at a per-function granularity.
331
+ """
332
+
333
+ TemporaryStore._default_store = temporary_store
334
+
335
+
336
+ def borrow_temporary(
337
+ temporary_store: Optional[TemporaryStore],
338
+ shape: Union[int, Tuple[int]],
339
+ dtype: type,
340
+ pinned: bool = False,
341
+ requires_grad: bool = False,
342
+ device=None,
343
+ ) -> Temporary:
344
+ """
345
+ Borrows and returns a temporary array with specified attributes from a shared pool.
346
+
347
+ If an array with sufficient capacity and matching desired attributes is already available in the pool, it will be returned.
348
+ Otherwise, a new allocation will be performed.
349
+
350
+ Args:
351
+ temporary_store: the shared pool to borrow the temporary from. If `temporary_store` is ``None``, the global default temporary store, if set, will be used.
352
+ shape: desired dimensions for the temporary array
353
+ dtype: desired data type for the temporary array
354
+ pinned: whether a pinned allocation is desired
355
+ device: device on which the memory should be allocated; if ``None``, the current device will be used.
356
+ """
357
+
358
+ if temporary_store is None:
359
+ temporary_store = TemporaryStore._default_store
360
+
361
+ if temporary_store is None:
362
+ return Temporary(
363
+ array=wp.empty(shape=shape, dtype=dtype, pinned=pinned, device=device, requires_grad=requires_grad)
364
+ )
365
+
366
+ return temporary_store.borrow(shape=shape, dtype=dtype, device=device, pinned=pinned, requires_grad=requires_grad)
367
+
368
+
369
+ def borrow_temporary_like(
370
+ array: Union[wp.array, Temporary],
371
+ temporary_store: Optional[TemporaryStore],
372
+ ) -> Temporary:
373
+ """
374
+ Borrows and returns a temporary array with the same attributes as another array or temporary.
375
+
376
+ Args:
377
+ array: Warp or temporary array to read the desired attributes from
378
+ temporary_store: the shared pool to borrow the temporary from. If `temporary_store` is ``None``, the global default temporary store, if set, will be used.
379
+ """
380
+ if isinstance(array, Temporary):
381
+ array = array.array
382
+ return borrow_temporary(
383
+ temporary_store=temporary_store,
384
+ shape=array.shape,
385
+ dtype=array.dtype,
386
+ pinned=array.pinned,
387
+ device=array.device,
388
+ requires_grad=array.requires_grad,
389
+ )
warp/fem/dirichlet.py ADDED
@@ -0,0 +1,181 @@
1
+ from typing import Any, Optional
2
+
3
+ import warp as wp
4
+
5
+ from warp.types import type_length, type_is_matrix
6
+ from warp.sparse import BsrMatrix, bsr_copy, bsr_mv, bsr_mm, bsr_assign, bsr_axpy
7
+
8
+ from .utils import array_axpy
9
+
10
+
11
+ def normalize_dirichlet_projector(projector_matrix: BsrMatrix, fixed_value: Optional[wp.array] = None):
12
+ """
13
+ Scale projector so that it becomes idempotent, and apply the same scaling to fixed_value if provided
14
+ """
15
+
16
+ if projector_matrix.nrow < projector_matrix.nnz or projector_matrix.ncol != projector_matrix.nrow:
17
+ raise ValueError("Projector must be a square diagonal matrix, with at most one non-zero block per row")
18
+
19
+ # Cast blocks to matrix type if necessary
20
+ projector_values = projector_matrix.values
21
+ if not type_is_matrix(projector_values.dtype):
22
+ projector_values = wp.array(
23
+ data=None,
24
+ ptr=projector_values.ptr,
25
+ capacity=projector_values.capacity,
26
+ owner=False,
27
+ device=projector_values.device,
28
+ dtype=wp.mat(shape=projector_matrix.block_shape, dtype=projector_matrix.scalar_type),
29
+ shape=projector_values.shape[0],
30
+ )
31
+
32
+ if fixed_value is None:
33
+ wp.launch(
34
+ kernel=_normalize_dirichlet_projector_kernel,
35
+ dim=projector_matrix.nrow,
36
+ device=projector_values.device,
37
+ inputs=[projector_matrix.offsets, projector_matrix.columns, projector_values],
38
+ )
39
+
40
+ else:
41
+ if fixed_value.shape[0] != projector_matrix.nrow:
42
+ raise ValueError("Fixed value array must be of length equal to the number of rows of blocks")
43
+
44
+ if type_length(fixed_value.dtype) == 1:
45
+ # array of scalars, convert to 1d array of vectors
46
+ fixed_value = wp.array(
47
+ data=None,
48
+ ptr=fixed_value.ptr,
49
+ capacity=fixed_value.capacity,
50
+ owner=False,
51
+ device=fixed_value.device,
52
+ dtype=wp.vec(length=projector_matrix.block_shape[0], dtype=projector_matrix.scalar_type),
53
+ shape=fixed_value.shape[0],
54
+ )
55
+
56
+ wp.launch(
57
+ kernel=_normalize_dirichlet_projector_and_values_kernel,
58
+ dim=projector_matrix.nrow,
59
+ device=projector_values.device,
60
+ inputs=[projector_matrix.offsets, projector_matrix.columns, projector_values, fixed_value],
61
+ )
62
+
63
+
64
+ def project_system_rhs(
65
+ system_matrix: BsrMatrix, system_rhs: wp.array, projector_matrix: BsrMatrix, fixed_value: Optional[wp.array] = None
66
+ ):
67
+ """Projects the right-hand-side of a linear system to enforce Dirichlet boundary conditions
68
+
69
+ ``rhs = (I - projector) * ( rhs - system * projector * fixed_value) + projector * fixed_value``
70
+ """
71
+
72
+ rhs_tmp = wp.empty_like(system_rhs)
73
+ rhs_tmp.assign(system_rhs)
74
+
75
+ if fixed_value is None:
76
+ system_rhs.zero_()
77
+ else:
78
+ bsr_mv(A=projector_matrix, x=fixed_value, y=system_rhs, alpha=1.0, beta=0.0)
79
+
80
+ bsr_mv(A=system_matrix, x=system_rhs, y=rhs_tmp, alpha=-1.0, beta=1.0)
81
+
82
+ # here rhs_tmp = system_rhs - system_matrix * projector * fixed_value
83
+ # system_rhs = projector * fixed_value
84
+ array_axpy(x=rhs_tmp, y=system_rhs, alpha=1.0, beta=1.0)
85
+ bsr_mv(A=projector_matrix, x=rhs_tmp, y=system_rhs, alpha=-1.0, beta=1.0)
86
+
87
+
88
+ def project_system_matrix(system_matrix: BsrMatrix, projector_matrix: BsrMatrix):
89
+ """Projects the right-hand-side of a linear system to enforce Dirichlet boundary conditions
90
+
91
+ ``system = (I - projector) * system * (I - projector) + projector``
92
+ """
93
+
94
+ complement_system = bsr_copy(system_matrix)
95
+ bsr_mm(x=projector_matrix, y=system_matrix, z=complement_system, alpha=-1.0, beta=1.0)
96
+
97
+ bsr_assign(dest=system_matrix, src=complement_system)
98
+ bsr_axpy(x=projector_matrix, y=system_matrix)
99
+ bsr_mm(x=complement_system, y=projector_matrix, z=system_matrix, alpha=-1.0, beta=1.0)
100
+
101
+
102
+ def project_linear_system(
103
+ system_matrix: BsrMatrix,
104
+ system_rhs: wp.array,
105
+ projector_matrix: BsrMatrix,
106
+ fixed_value: Optional[wp.array] = None,
107
+ normalize_projector=True,
108
+ ):
109
+ """
110
+ Projects both the left-hand-side and right-hand-side of a linear system to enforce Dirichlet boundary conditions
111
+
112
+ If normalize_projector is True, first apply scaling so that the projector_matrix is idempotent
113
+ """
114
+ if normalize_projector:
115
+ normalize_dirichlet_projector(projector_matrix, fixed_value)
116
+
117
+ project_system_rhs(system_matrix, system_rhs, projector_matrix, fixed_value)
118
+ project_system_matrix(system_matrix, projector_matrix)
119
+
120
+
121
+ @wp.kernel
122
+ def _normalize_dirichlet_projector_kernel(
123
+ offsets: wp.array(dtype=int),
124
+ columns: wp.array(dtype=int),
125
+ block_values: wp.array(dtype=Any),
126
+ ):
127
+ row = wp.tid()
128
+
129
+ beg = offsets[row]
130
+ end = offsets[row + 1]
131
+
132
+ if beg == end:
133
+ return
134
+
135
+ diag = wp.lower_bound(columns, beg, end, row)
136
+
137
+ if diag < end and columns[diag] == row:
138
+ P = block_values[diag]
139
+
140
+ P_sq = P * P
141
+ trace_P = wp.trace(P)
142
+ trace_P_sq = wp.trace(P_sq)
143
+
144
+ if wp.nonzero(trace_P_sq):
145
+ scale = trace_P / trace_P_sq
146
+ block_values[diag] = scale * P
147
+ else:
148
+ block_values[diag] = P - P
149
+
150
+
151
+ @wp.kernel
152
+ def _normalize_dirichlet_projector_and_values_kernel(
153
+ offsets: wp.array(dtype=int),
154
+ columns: wp.array(dtype=int),
155
+ block_values: wp.array(dtype=Any),
156
+ fixed_values: wp.array(dtype=Any),
157
+ ):
158
+ row = wp.tid()
159
+
160
+ beg = offsets[row]
161
+ end = offsets[row + 1]
162
+
163
+ if beg == end:
164
+ return
165
+
166
+ diag = wp.lower_bound(columns, beg, end, row)
167
+
168
+ if diag < end and columns[diag] == row:
169
+ P = block_values[diag]
170
+
171
+ P_sq = P * P
172
+ trace_P = wp.trace(P)
173
+ trace_P_sq = wp.trace(P_sq)
174
+
175
+ if wp.nonzero(trace_P_sq):
176
+ scale = trace_P / trace_P_sq
177
+ block_values[diag] = scale * P
178
+ fixed_values[row] = scale * fixed_values[row]
179
+ else:
180
+ block_values[diag] = P - P
181
+ fixed_values[row] = fixed_values[row] - fixed_values[row]