warp-lang 1.7.0__py3-none-manylinux_2_34_aarch64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (429) hide show
  1. warp/__init__.py +139 -0
  2. warp/__init__.pyi +1 -0
  3. warp/autograd.py +1142 -0
  4. warp/bin/warp-clang.so +0 -0
  5. warp/bin/warp.so +0 -0
  6. warp/build.py +557 -0
  7. warp/build_dll.py +405 -0
  8. warp/builtins.py +6855 -0
  9. warp/codegen.py +3969 -0
  10. warp/config.py +158 -0
  11. warp/constants.py +57 -0
  12. warp/context.py +6812 -0
  13. warp/dlpack.py +462 -0
  14. warp/examples/__init__.py +24 -0
  15. warp/examples/assets/bear.usd +0 -0
  16. warp/examples/assets/bunny.usd +0 -0
  17. warp/examples/assets/cartpole.urdf +110 -0
  18. warp/examples/assets/crazyflie.usd +0 -0
  19. warp/examples/assets/cube.usd +0 -0
  20. warp/examples/assets/nonuniform.usd +0 -0
  21. warp/examples/assets/nv_ant.xml +92 -0
  22. warp/examples/assets/nv_humanoid.xml +183 -0
  23. warp/examples/assets/nvidia_logo.png +0 -0
  24. warp/examples/assets/pixel.jpg +0 -0
  25. warp/examples/assets/quadruped.urdf +268 -0
  26. warp/examples/assets/rocks.nvdb +0 -0
  27. warp/examples/assets/rocks.usd +0 -0
  28. warp/examples/assets/sphere.usd +0 -0
  29. warp/examples/assets/square_cloth.usd +0 -0
  30. warp/examples/benchmarks/benchmark_api.py +389 -0
  31. warp/examples/benchmarks/benchmark_cloth.py +296 -0
  32. warp/examples/benchmarks/benchmark_cloth_cupy.py +96 -0
  33. warp/examples/benchmarks/benchmark_cloth_jax.py +105 -0
  34. warp/examples/benchmarks/benchmark_cloth_numba.py +161 -0
  35. warp/examples/benchmarks/benchmark_cloth_numpy.py +85 -0
  36. warp/examples/benchmarks/benchmark_cloth_paddle.py +94 -0
  37. warp/examples/benchmarks/benchmark_cloth_pytorch.py +94 -0
  38. warp/examples/benchmarks/benchmark_cloth_taichi.py +120 -0
  39. warp/examples/benchmarks/benchmark_cloth_warp.py +153 -0
  40. warp/examples/benchmarks/benchmark_gemm.py +164 -0
  41. warp/examples/benchmarks/benchmark_interop_paddle.py +166 -0
  42. warp/examples/benchmarks/benchmark_interop_torch.py +166 -0
  43. warp/examples/benchmarks/benchmark_launches.py +301 -0
  44. warp/examples/benchmarks/benchmark_tile_load_store.py +103 -0
  45. warp/examples/browse.py +37 -0
  46. warp/examples/core/example_cupy.py +86 -0
  47. warp/examples/core/example_dem.py +241 -0
  48. warp/examples/core/example_fluid.py +299 -0
  49. warp/examples/core/example_graph_capture.py +150 -0
  50. warp/examples/core/example_marching_cubes.py +194 -0
  51. warp/examples/core/example_mesh.py +180 -0
  52. warp/examples/core/example_mesh_intersect.py +211 -0
  53. warp/examples/core/example_nvdb.py +182 -0
  54. warp/examples/core/example_raycast.py +111 -0
  55. warp/examples/core/example_raymarch.py +205 -0
  56. warp/examples/core/example_render_opengl.py +193 -0
  57. warp/examples/core/example_sample_mesh.py +300 -0
  58. warp/examples/core/example_sph.py +411 -0
  59. warp/examples/core/example_torch.py +211 -0
  60. warp/examples/core/example_wave.py +269 -0
  61. warp/examples/fem/example_adaptive_grid.py +286 -0
  62. warp/examples/fem/example_apic_fluid.py +423 -0
  63. warp/examples/fem/example_burgers.py +261 -0
  64. warp/examples/fem/example_convection_diffusion.py +178 -0
  65. warp/examples/fem/example_convection_diffusion_dg.py +204 -0
  66. warp/examples/fem/example_deformed_geometry.py +172 -0
  67. warp/examples/fem/example_diffusion.py +196 -0
  68. warp/examples/fem/example_diffusion_3d.py +225 -0
  69. warp/examples/fem/example_diffusion_mgpu.py +220 -0
  70. warp/examples/fem/example_distortion_energy.py +228 -0
  71. warp/examples/fem/example_magnetostatics.py +240 -0
  72. warp/examples/fem/example_mixed_elasticity.py +291 -0
  73. warp/examples/fem/example_navier_stokes.py +261 -0
  74. warp/examples/fem/example_nonconforming_contact.py +298 -0
  75. warp/examples/fem/example_stokes.py +213 -0
  76. warp/examples/fem/example_stokes_transfer.py +262 -0
  77. warp/examples/fem/example_streamlines.py +352 -0
  78. warp/examples/fem/utils.py +1000 -0
  79. warp/examples/interop/example_jax_callable.py +116 -0
  80. warp/examples/interop/example_jax_ffi_callback.py +132 -0
  81. warp/examples/interop/example_jax_kernel.py +205 -0
  82. warp/examples/optim/example_bounce.py +266 -0
  83. warp/examples/optim/example_cloth_throw.py +228 -0
  84. warp/examples/optim/example_diffray.py +561 -0
  85. warp/examples/optim/example_drone.py +870 -0
  86. warp/examples/optim/example_fluid_checkpoint.py +497 -0
  87. warp/examples/optim/example_inverse_kinematics.py +182 -0
  88. warp/examples/optim/example_inverse_kinematics_torch.py +191 -0
  89. warp/examples/optim/example_softbody_properties.py +400 -0
  90. warp/examples/optim/example_spring_cage.py +245 -0
  91. warp/examples/optim/example_trajectory.py +227 -0
  92. warp/examples/sim/example_cartpole.py +143 -0
  93. warp/examples/sim/example_cloth.py +225 -0
  94. warp/examples/sim/example_cloth_self_contact.py +322 -0
  95. warp/examples/sim/example_granular.py +130 -0
  96. warp/examples/sim/example_granular_collision_sdf.py +202 -0
  97. warp/examples/sim/example_jacobian_ik.py +244 -0
  98. warp/examples/sim/example_particle_chain.py +124 -0
  99. warp/examples/sim/example_quadruped.py +203 -0
  100. warp/examples/sim/example_rigid_chain.py +203 -0
  101. warp/examples/sim/example_rigid_contact.py +195 -0
  102. warp/examples/sim/example_rigid_force.py +133 -0
  103. warp/examples/sim/example_rigid_gyroscopic.py +115 -0
  104. warp/examples/sim/example_rigid_soft_contact.py +140 -0
  105. warp/examples/sim/example_soft_body.py +196 -0
  106. warp/examples/tile/example_tile_cholesky.py +87 -0
  107. warp/examples/tile/example_tile_convolution.py +66 -0
  108. warp/examples/tile/example_tile_fft.py +55 -0
  109. warp/examples/tile/example_tile_filtering.py +113 -0
  110. warp/examples/tile/example_tile_matmul.py +85 -0
  111. warp/examples/tile/example_tile_mlp.py +383 -0
  112. warp/examples/tile/example_tile_nbody.py +199 -0
  113. warp/examples/tile/example_tile_walker.py +327 -0
  114. warp/fabric.py +355 -0
  115. warp/fem/__init__.py +106 -0
  116. warp/fem/adaptivity.py +508 -0
  117. warp/fem/cache.py +572 -0
  118. warp/fem/dirichlet.py +202 -0
  119. warp/fem/domain.py +411 -0
  120. warp/fem/field/__init__.py +125 -0
  121. warp/fem/field/field.py +619 -0
  122. warp/fem/field/nodal_field.py +326 -0
  123. warp/fem/field/restriction.py +37 -0
  124. warp/fem/field/virtual.py +848 -0
  125. warp/fem/geometry/__init__.py +32 -0
  126. warp/fem/geometry/adaptive_nanogrid.py +857 -0
  127. warp/fem/geometry/closest_point.py +84 -0
  128. warp/fem/geometry/deformed_geometry.py +221 -0
  129. warp/fem/geometry/element.py +776 -0
  130. warp/fem/geometry/geometry.py +362 -0
  131. warp/fem/geometry/grid_2d.py +392 -0
  132. warp/fem/geometry/grid_3d.py +452 -0
  133. warp/fem/geometry/hexmesh.py +911 -0
  134. warp/fem/geometry/nanogrid.py +571 -0
  135. warp/fem/geometry/partition.py +389 -0
  136. warp/fem/geometry/quadmesh.py +663 -0
  137. warp/fem/geometry/tetmesh.py +855 -0
  138. warp/fem/geometry/trimesh.py +806 -0
  139. warp/fem/integrate.py +2335 -0
  140. warp/fem/linalg.py +419 -0
  141. warp/fem/operator.py +293 -0
  142. warp/fem/polynomial.py +229 -0
  143. warp/fem/quadrature/__init__.py +17 -0
  144. warp/fem/quadrature/pic_quadrature.py +299 -0
  145. warp/fem/quadrature/quadrature.py +591 -0
  146. warp/fem/space/__init__.py +228 -0
  147. warp/fem/space/basis_function_space.py +468 -0
  148. warp/fem/space/basis_space.py +667 -0
  149. warp/fem/space/dof_mapper.py +251 -0
  150. warp/fem/space/function_space.py +309 -0
  151. warp/fem/space/grid_2d_function_space.py +177 -0
  152. warp/fem/space/grid_3d_function_space.py +227 -0
  153. warp/fem/space/hexmesh_function_space.py +257 -0
  154. warp/fem/space/nanogrid_function_space.py +201 -0
  155. warp/fem/space/partition.py +367 -0
  156. warp/fem/space/quadmesh_function_space.py +223 -0
  157. warp/fem/space/restriction.py +179 -0
  158. warp/fem/space/shape/__init__.py +143 -0
  159. warp/fem/space/shape/cube_shape_function.py +1105 -0
  160. warp/fem/space/shape/shape_function.py +133 -0
  161. warp/fem/space/shape/square_shape_function.py +926 -0
  162. warp/fem/space/shape/tet_shape_function.py +834 -0
  163. warp/fem/space/shape/triangle_shape_function.py +672 -0
  164. warp/fem/space/tetmesh_function_space.py +271 -0
  165. warp/fem/space/topology.py +424 -0
  166. warp/fem/space/trimesh_function_space.py +194 -0
  167. warp/fem/types.py +99 -0
  168. warp/fem/utils.py +420 -0
  169. warp/jax.py +187 -0
  170. warp/jax_experimental/__init__.py +16 -0
  171. warp/jax_experimental/custom_call.py +351 -0
  172. warp/jax_experimental/ffi.py +698 -0
  173. warp/jax_experimental/xla_ffi.py +602 -0
  174. warp/math.py +244 -0
  175. warp/native/array.h +1145 -0
  176. warp/native/builtin.h +1800 -0
  177. warp/native/bvh.cpp +492 -0
  178. warp/native/bvh.cu +791 -0
  179. warp/native/bvh.h +554 -0
  180. warp/native/clang/clang.cpp +536 -0
  181. warp/native/coloring.cpp +613 -0
  182. warp/native/crt.cpp +51 -0
  183. warp/native/crt.h +362 -0
  184. warp/native/cuda_crt.h +1058 -0
  185. warp/native/cuda_util.cpp +646 -0
  186. warp/native/cuda_util.h +307 -0
  187. warp/native/error.cpp +77 -0
  188. warp/native/error.h +36 -0
  189. warp/native/exports.h +1878 -0
  190. warp/native/fabric.h +245 -0
  191. warp/native/hashgrid.cpp +311 -0
  192. warp/native/hashgrid.cu +87 -0
  193. warp/native/hashgrid.h +240 -0
  194. warp/native/initializer_array.h +41 -0
  195. warp/native/intersect.h +1230 -0
  196. warp/native/intersect_adj.h +375 -0
  197. warp/native/intersect_tri.h +339 -0
  198. warp/native/marching.cpp +19 -0
  199. warp/native/marching.cu +514 -0
  200. warp/native/marching.h +19 -0
  201. warp/native/mat.h +2220 -0
  202. warp/native/mathdx.cpp +87 -0
  203. warp/native/matnn.h +343 -0
  204. warp/native/mesh.cpp +266 -0
  205. warp/native/mesh.cu +404 -0
  206. warp/native/mesh.h +1980 -0
  207. warp/native/nanovdb/GridHandle.h +366 -0
  208. warp/native/nanovdb/HostBuffer.h +590 -0
  209. warp/native/nanovdb/NanoVDB.h +6624 -0
  210. warp/native/nanovdb/PNanoVDB.h +3390 -0
  211. warp/native/noise.h +859 -0
  212. warp/native/quat.h +1371 -0
  213. warp/native/rand.h +342 -0
  214. warp/native/range.h +139 -0
  215. warp/native/reduce.cpp +174 -0
  216. warp/native/reduce.cu +364 -0
  217. warp/native/runlength_encode.cpp +79 -0
  218. warp/native/runlength_encode.cu +61 -0
  219. warp/native/scan.cpp +47 -0
  220. warp/native/scan.cu +53 -0
  221. warp/native/scan.h +23 -0
  222. warp/native/solid_angle.h +466 -0
  223. warp/native/sort.cpp +251 -0
  224. warp/native/sort.cu +277 -0
  225. warp/native/sort.h +33 -0
  226. warp/native/sparse.cpp +378 -0
  227. warp/native/sparse.cu +524 -0
  228. warp/native/spatial.h +657 -0
  229. warp/native/svd.h +702 -0
  230. warp/native/temp_buffer.h +46 -0
  231. warp/native/tile.h +2584 -0
  232. warp/native/tile_reduce.h +264 -0
  233. warp/native/vec.h +1426 -0
  234. warp/native/volume.cpp +501 -0
  235. warp/native/volume.cu +67 -0
  236. warp/native/volume.h +969 -0
  237. warp/native/volume_builder.cu +477 -0
  238. warp/native/volume_builder.h +52 -0
  239. warp/native/volume_impl.h +70 -0
  240. warp/native/warp.cpp +1082 -0
  241. warp/native/warp.cu +3636 -0
  242. warp/native/warp.h +381 -0
  243. warp/optim/__init__.py +17 -0
  244. warp/optim/adam.py +163 -0
  245. warp/optim/linear.py +1137 -0
  246. warp/optim/sgd.py +112 -0
  247. warp/paddle.py +407 -0
  248. warp/render/__init__.py +18 -0
  249. warp/render/render_opengl.py +3518 -0
  250. warp/render/render_usd.py +784 -0
  251. warp/render/utils.py +160 -0
  252. warp/sim/__init__.py +65 -0
  253. warp/sim/articulation.py +793 -0
  254. warp/sim/collide.py +2395 -0
  255. warp/sim/graph_coloring.py +300 -0
  256. warp/sim/import_mjcf.py +790 -0
  257. warp/sim/import_snu.py +227 -0
  258. warp/sim/import_urdf.py +579 -0
  259. warp/sim/import_usd.py +894 -0
  260. warp/sim/inertia.py +324 -0
  261. warp/sim/integrator.py +242 -0
  262. warp/sim/integrator_euler.py +1997 -0
  263. warp/sim/integrator_featherstone.py +2101 -0
  264. warp/sim/integrator_vbd.py +2048 -0
  265. warp/sim/integrator_xpbd.py +3292 -0
  266. warp/sim/model.py +4791 -0
  267. warp/sim/particles.py +121 -0
  268. warp/sim/render.py +427 -0
  269. warp/sim/utils.py +428 -0
  270. warp/sparse.py +2057 -0
  271. warp/stubs.py +3333 -0
  272. warp/tape.py +1203 -0
  273. warp/tests/__init__.py +1 -0
  274. warp/tests/__main__.py +4 -0
  275. warp/tests/assets/curlnoise_golden.npy +0 -0
  276. warp/tests/assets/mlp_golden.npy +0 -0
  277. warp/tests/assets/pixel.npy +0 -0
  278. warp/tests/assets/pnoise_golden.npy +0 -0
  279. warp/tests/assets/spiky.usd +0 -0
  280. warp/tests/assets/test_grid.nvdb +0 -0
  281. warp/tests/assets/test_index_grid.nvdb +0 -0
  282. warp/tests/assets/test_int32_grid.nvdb +0 -0
  283. warp/tests/assets/test_vec_grid.nvdb +0 -0
  284. warp/tests/assets/torus.nvdb +0 -0
  285. warp/tests/assets/torus.usda +105 -0
  286. warp/tests/aux_test_class_kernel.py +34 -0
  287. warp/tests/aux_test_compile_consts_dummy.py +18 -0
  288. warp/tests/aux_test_conditional_unequal_types_kernels.py +29 -0
  289. warp/tests/aux_test_dependent.py +29 -0
  290. warp/tests/aux_test_grad_customs.py +29 -0
  291. warp/tests/aux_test_instancing_gc.py +26 -0
  292. warp/tests/aux_test_module_unload.py +23 -0
  293. warp/tests/aux_test_name_clash1.py +40 -0
  294. warp/tests/aux_test_name_clash2.py +40 -0
  295. warp/tests/aux_test_reference.py +9 -0
  296. warp/tests/aux_test_reference_reference.py +8 -0
  297. warp/tests/aux_test_square.py +16 -0
  298. warp/tests/aux_test_unresolved_func.py +22 -0
  299. warp/tests/aux_test_unresolved_symbol.py +22 -0
  300. warp/tests/cuda/__init__.py +0 -0
  301. warp/tests/cuda/test_async.py +676 -0
  302. warp/tests/cuda/test_ipc.py +124 -0
  303. warp/tests/cuda/test_mempool.py +233 -0
  304. warp/tests/cuda/test_multigpu.py +169 -0
  305. warp/tests/cuda/test_peer.py +139 -0
  306. warp/tests/cuda/test_pinned.py +84 -0
  307. warp/tests/cuda/test_streams.py +634 -0
  308. warp/tests/geometry/__init__.py +0 -0
  309. warp/tests/geometry/test_bvh.py +200 -0
  310. warp/tests/geometry/test_hash_grid.py +221 -0
  311. warp/tests/geometry/test_marching_cubes.py +74 -0
  312. warp/tests/geometry/test_mesh.py +316 -0
  313. warp/tests/geometry/test_mesh_query_aabb.py +399 -0
  314. warp/tests/geometry/test_mesh_query_point.py +932 -0
  315. warp/tests/geometry/test_mesh_query_ray.py +311 -0
  316. warp/tests/geometry/test_volume.py +1103 -0
  317. warp/tests/geometry/test_volume_write.py +346 -0
  318. warp/tests/interop/__init__.py +0 -0
  319. warp/tests/interop/test_dlpack.py +729 -0
  320. warp/tests/interop/test_jax.py +371 -0
  321. warp/tests/interop/test_paddle.py +800 -0
  322. warp/tests/interop/test_torch.py +1001 -0
  323. warp/tests/run_coverage_serial.py +39 -0
  324. warp/tests/sim/__init__.py +0 -0
  325. warp/tests/sim/disabled_kinematics.py +244 -0
  326. warp/tests/sim/flaky_test_sim_grad.py +290 -0
  327. warp/tests/sim/test_collision.py +604 -0
  328. warp/tests/sim/test_coloring.py +258 -0
  329. warp/tests/sim/test_model.py +224 -0
  330. warp/tests/sim/test_sim_grad_bounce_linear.py +212 -0
  331. warp/tests/sim/test_sim_kinematics.py +98 -0
  332. warp/tests/sim/test_vbd.py +597 -0
  333. warp/tests/test_adam.py +163 -0
  334. warp/tests/test_arithmetic.py +1096 -0
  335. warp/tests/test_array.py +2972 -0
  336. warp/tests/test_array_reduce.py +156 -0
  337. warp/tests/test_assert.py +250 -0
  338. warp/tests/test_atomic.py +153 -0
  339. warp/tests/test_bool.py +220 -0
  340. warp/tests/test_builtins_resolution.py +1298 -0
  341. warp/tests/test_closest_point_edge_edge.py +327 -0
  342. warp/tests/test_codegen.py +810 -0
  343. warp/tests/test_codegen_instancing.py +1495 -0
  344. warp/tests/test_compile_consts.py +215 -0
  345. warp/tests/test_conditional.py +252 -0
  346. warp/tests/test_context.py +42 -0
  347. warp/tests/test_copy.py +238 -0
  348. warp/tests/test_ctypes.py +638 -0
  349. warp/tests/test_dense.py +73 -0
  350. warp/tests/test_devices.py +97 -0
  351. warp/tests/test_examples.py +482 -0
  352. warp/tests/test_fabricarray.py +996 -0
  353. warp/tests/test_fast_math.py +74 -0
  354. warp/tests/test_fem.py +2003 -0
  355. warp/tests/test_fp16.py +136 -0
  356. warp/tests/test_func.py +454 -0
  357. warp/tests/test_future_annotations.py +98 -0
  358. warp/tests/test_generics.py +656 -0
  359. warp/tests/test_grad.py +893 -0
  360. warp/tests/test_grad_customs.py +339 -0
  361. warp/tests/test_grad_debug.py +341 -0
  362. warp/tests/test_implicit_init.py +411 -0
  363. warp/tests/test_import.py +45 -0
  364. warp/tests/test_indexedarray.py +1140 -0
  365. warp/tests/test_intersect.py +73 -0
  366. warp/tests/test_iter.py +76 -0
  367. warp/tests/test_large.py +177 -0
  368. warp/tests/test_launch.py +411 -0
  369. warp/tests/test_lerp.py +151 -0
  370. warp/tests/test_linear_solvers.py +193 -0
  371. warp/tests/test_lvalue.py +427 -0
  372. warp/tests/test_mat.py +2089 -0
  373. warp/tests/test_mat_lite.py +122 -0
  374. warp/tests/test_mat_scalar_ops.py +2913 -0
  375. warp/tests/test_math.py +178 -0
  376. warp/tests/test_mlp.py +282 -0
  377. warp/tests/test_module_hashing.py +258 -0
  378. warp/tests/test_modules_lite.py +44 -0
  379. warp/tests/test_noise.py +252 -0
  380. warp/tests/test_operators.py +299 -0
  381. warp/tests/test_options.py +129 -0
  382. warp/tests/test_overwrite.py +551 -0
  383. warp/tests/test_print.py +339 -0
  384. warp/tests/test_quat.py +2315 -0
  385. warp/tests/test_rand.py +339 -0
  386. warp/tests/test_reload.py +302 -0
  387. warp/tests/test_rounding.py +185 -0
  388. warp/tests/test_runlength_encode.py +196 -0
  389. warp/tests/test_scalar_ops.py +105 -0
  390. warp/tests/test_smoothstep.py +108 -0
  391. warp/tests/test_snippet.py +318 -0
  392. warp/tests/test_sparse.py +582 -0
  393. warp/tests/test_spatial.py +2229 -0
  394. warp/tests/test_special_values.py +361 -0
  395. warp/tests/test_static.py +592 -0
  396. warp/tests/test_struct.py +734 -0
  397. warp/tests/test_tape.py +204 -0
  398. warp/tests/test_transient_module.py +93 -0
  399. warp/tests/test_triangle_closest_point.py +145 -0
  400. warp/tests/test_types.py +562 -0
  401. warp/tests/test_utils.py +588 -0
  402. warp/tests/test_vec.py +1487 -0
  403. warp/tests/test_vec_lite.py +80 -0
  404. warp/tests/test_vec_scalar_ops.py +2327 -0
  405. warp/tests/test_verify_fp.py +100 -0
  406. warp/tests/tile/__init__.py +0 -0
  407. warp/tests/tile/test_tile.py +780 -0
  408. warp/tests/tile/test_tile_load.py +407 -0
  409. warp/tests/tile/test_tile_mathdx.py +208 -0
  410. warp/tests/tile/test_tile_mlp.py +402 -0
  411. warp/tests/tile/test_tile_reduce.py +447 -0
  412. warp/tests/tile/test_tile_shared_memory.py +247 -0
  413. warp/tests/tile/test_tile_view.py +173 -0
  414. warp/tests/unittest_serial.py +47 -0
  415. warp/tests/unittest_suites.py +427 -0
  416. warp/tests/unittest_utils.py +468 -0
  417. warp/tests/walkthrough_debug.py +93 -0
  418. warp/thirdparty/__init__.py +0 -0
  419. warp/thirdparty/appdirs.py +598 -0
  420. warp/thirdparty/dlpack.py +145 -0
  421. warp/thirdparty/unittest_parallel.py +570 -0
  422. warp/torch.py +391 -0
  423. warp/types.py +5230 -0
  424. warp/utils.py +1137 -0
  425. warp_lang-1.7.0.dist-info/METADATA +516 -0
  426. warp_lang-1.7.0.dist-info/RECORD +429 -0
  427. warp_lang-1.7.0.dist-info/WHEEL +5 -0
  428. warp_lang-1.7.0.dist-info/licenses/LICENSE.md +202 -0
  429. warp_lang-1.7.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,389 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ import gc
17
+ import statistics as stats
18
+
19
+ import warp as wp
20
+
21
+ ENABLE_MEMPOOLS = False
22
+ ENABLE_PEER_ACCESS = False
23
+ ENABLE_MEMPOOL_ACCESS = False
24
+ ENABLE_MEMPOOL_RELEASE_THRESHOLD = False
25
+
26
+ MEMPOOL_RELEASE_THRESHOLD = 1024 * 1024 * 1024
27
+
28
+ DO_SYNC = False
29
+ VERBOSE = False
30
+ USE_NVTX = False
31
+
32
+ num_elems = 10000
33
+ num_runs = 10000
34
+ trim_runs = 2500
35
+
36
+
37
+ @wp.kernel
38
+ def inc_kernel(a: wp.array(dtype=float)):
39
+ tid = wp.tid()
40
+ a[tid] = a[tid] + 1.0
41
+
42
+
43
+ # configure devices
44
+ for target_device in wp.get_cuda_devices():
45
+ try:
46
+ wp.set_mempool_enabled(target_device, ENABLE_MEMPOOLS)
47
+ if ENABLE_MEMPOOL_RELEASE_THRESHOLD:
48
+ wp.set_mempool_release_threshold(target_device, MEMPOOL_RELEASE_THRESHOLD)
49
+ except Exception as e:
50
+ print(f"Error: {e}")
51
+
52
+ for peer_device in wp.get_cuda_devices():
53
+ try:
54
+ wp.set_peer_access_enabled(target_device, peer_device, ENABLE_PEER_ACCESS)
55
+ except Exception as e:
56
+ print(f"Error: {e}")
57
+
58
+ try:
59
+ wp.set_mempool_access_enabled(target_device, peer_device, ENABLE_MEMPOOL_ACCESS)
60
+ except Exception as e:
61
+ print(f"Error: {e}")
62
+
63
+ cuda_device_count = wp.get_cuda_device_count()
64
+
65
+ cuda0 = wp.get_device("cuda:0")
66
+
67
+ # preallocate some arrays
68
+ arr_host = wp.zeros(num_elems, dtype=float, device="cpu", pinned=False)
69
+ arr_host_pinned = wp.zeros(num_elems, dtype=float, device="cpu", pinned=True)
70
+ arr_cuda0 = wp.zeros(num_elems, dtype=float, device=cuda0)
71
+ arr_cuda0_src = wp.zeros(num_elems, dtype=float, device=cuda0)
72
+ arr_cuda0_dst = wp.zeros(num_elems, dtype=float, device=cuda0)
73
+
74
+ # mgpu support
75
+ if cuda_device_count > 1:
76
+ cuda1 = wp.get_device("cuda:1")
77
+ arr_cuda1 = wp.zeros(num_elems, dtype=float, device=cuda1)
78
+
79
+ stream0 = wp.Stream(cuda0)
80
+
81
+ # preload module
82
+ wp.force_load(cuda0)
83
+ if cuda_device_count > 1:
84
+ wp.force_load(cuda1)
85
+
86
+ # capture graph
87
+ with wp.ScopedDevice(cuda0):
88
+ wp.capture_begin()
89
+ wp.launch(inc_kernel, dim=arr_cuda0.size, inputs=[arr_cuda0])
90
+ graph0 = wp.capture_end()
91
+
92
+
93
+ g_allocs = [None] * num_runs
94
+
95
+
96
+ def test_alloc(num_elems, device, idx):
97
+ wp.synchronize()
98
+
99
+ with wp.ScopedTimer("alloc", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
100
+ g_allocs[idx] = wp.empty(num_elems, dtype=float, device=device)
101
+
102
+ if DO_SYNC:
103
+ wp.synchronize_device(device)
104
+
105
+ return timer.elapsed
106
+
107
+
108
+ def test_free(device, idx):
109
+ wp.synchronize()
110
+
111
+ with wp.ScopedTimer("free", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
112
+ g_allocs[idx] = None
113
+
114
+ if DO_SYNC:
115
+ wp.synchronize_device(device)
116
+
117
+ return timer.elapsed
118
+
119
+
120
+ def test_zeros(num_elems, device, idx):
121
+ wp.synchronize()
122
+
123
+ with wp.ScopedTimer("zeros", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
124
+ g_allocs[idx] = wp.zeros(num_elems, dtype=float, device=device)
125
+
126
+ if DO_SYNC:
127
+ wp.synchronize_device(device)
128
+
129
+ return timer.elapsed
130
+
131
+
132
+ def test_h2d(num_elems, device):
133
+ wp.synchronize()
134
+
135
+ with wp.ScopedTimer("h2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
136
+ wp.copy(arr_cuda0, arr_host)
137
+
138
+ if DO_SYNC:
139
+ wp.synchronize_device(device)
140
+
141
+ return timer.elapsed
142
+
143
+
144
+ def test_d2h(num_elems, device):
145
+ wp.synchronize()
146
+
147
+ with wp.ScopedTimer("d2h", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
148
+ wp.copy(arr_host, arr_cuda0)
149
+
150
+ if DO_SYNC:
151
+ wp.synchronize_device(device)
152
+
153
+ return timer.elapsed
154
+
155
+
156
+ def test_h2d_pinned(num_elems, device):
157
+ wp.synchronize()
158
+
159
+ with wp.ScopedTimer("h2d pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
160
+ wp.copy(arr_cuda0, arr_host_pinned)
161
+
162
+ if DO_SYNC:
163
+ wp.synchronize_device(device)
164
+
165
+ return timer.elapsed
166
+
167
+
168
+ def test_d2h_pinned(num_elems, device):
169
+ wp.synchronize()
170
+
171
+ with wp.ScopedTimer("d2h pinned", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
172
+ wp.copy(arr_host_pinned, arr_cuda0)
173
+
174
+ if DO_SYNC:
175
+ wp.synchronize_device(device)
176
+
177
+ return timer.elapsed
178
+
179
+
180
+ def test_d2d(num_elems, device):
181
+ wp.synchronize()
182
+
183
+ with wp.ScopedTimer("d2d", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
184
+ wp.copy(arr_cuda0_dst, arr_cuda0_src)
185
+
186
+ if DO_SYNC:
187
+ wp.synchronize_device(device)
188
+
189
+ return timer.elapsed
190
+
191
+
192
+ def test_p2p(num_elems, src_device, dst_device):
193
+ wp.synchronize()
194
+
195
+ with wp.ScopedTimer("p2p", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
196
+ wp.copy(arr_cuda0, arr_cuda1)
197
+
198
+ if DO_SYNC:
199
+ wp.synchronize_device(src_device)
200
+ wp.synchronize_device(dst_device)
201
+
202
+ return timer.elapsed
203
+
204
+
205
+ def test_p2p_stream(num_elems, src_device, dst_device):
206
+ stream = stream0
207
+
208
+ wp.synchronize()
209
+
210
+ with wp.ScopedTimer("p2p stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
211
+ wp.copy(arr_cuda0, arr_cuda1, stream=stream)
212
+
213
+ if DO_SYNC:
214
+ wp.synchronize_device(src_device)
215
+ wp.synchronize_device(dst_device)
216
+
217
+ return timer.elapsed
218
+
219
+
220
+ def test_launch(num_elems, device):
221
+ a = arr_cuda0
222
+
223
+ wp.synchronize()
224
+
225
+ with wp.ScopedTimer("launch", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
226
+ wp.launch(inc_kernel, dim=a.size, inputs=[a], device=device)
227
+
228
+ if DO_SYNC:
229
+ wp.synchronize_device(device)
230
+
231
+ return timer.elapsed
232
+
233
+
234
+ def test_launch_stream(num_elems, device):
235
+ a = arr_cuda0
236
+ stream = stream0
237
+
238
+ wp.synchronize()
239
+
240
+ with wp.ScopedTimer("launch stream", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
241
+ wp.launch(inc_kernel, dim=a.size, inputs=[a], stream=stream)
242
+
243
+ if DO_SYNC:
244
+ wp.synchronize_device(device)
245
+
246
+ return timer.elapsed
247
+
248
+
249
+ def test_graph(num_elems, device):
250
+ wp.synchronize()
251
+
252
+ with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
253
+ wp.capture_launch(graph0)
254
+
255
+ if DO_SYNC:
256
+ wp.synchronize_device(device)
257
+
258
+ return timer.elapsed
259
+
260
+
261
+ def test_graph_stream(num_elems, device):
262
+ wp.synchronize()
263
+
264
+ with wp.ScopedTimer("graph", print=VERBOSE, use_nvtx=USE_NVTX) as timer:
265
+ wp.capture_launch(graph0, stream=stream0)
266
+
267
+ if DO_SYNC:
268
+ wp.synchronize_device(device)
269
+
270
+ return timer.elapsed
271
+
272
+
273
+ alloc_times = [0] * num_runs
274
+ free_times = [0] * num_runs
275
+ zeros_times = [0] * num_runs
276
+ d2h_times = [0] * num_runs
277
+ h2d_times = [0] * num_runs
278
+ d2h_pinned_times = [0] * num_runs
279
+ h2d_pinned_times = [0] * num_runs
280
+ d2d_times = [0] * num_runs
281
+ p2p_times = [0] * num_runs
282
+ p2p_stream_times = [0] * num_runs
283
+ launch_times = [0] * num_runs
284
+ launch_stream_times = [0] * num_runs
285
+ graph_times = [0] * num_runs
286
+ graph_stream_times = [0] * num_runs
287
+
288
+ wp.set_device(cuda0)
289
+
290
+ # alloc
291
+ for i in range(num_runs):
292
+ gc.disable()
293
+ alloc_times[i] = test_alloc(num_elems, cuda0, i)
294
+ gc.enable()
295
+
296
+ # free
297
+ for i in range(num_runs):
298
+ gc.disable()
299
+ free_times[i] = test_free(cuda0, i)
300
+ gc.enable()
301
+
302
+ # zeros
303
+ for i in range(num_runs):
304
+ gc.disable()
305
+ zeros_times[i] = test_zeros(num_elems, cuda0, i)
306
+ gc.enable()
307
+
308
+ # free zeros
309
+ for i in range(num_runs):
310
+ g_allocs[i] = None
311
+
312
+ # h2d, d2h pageable copy
313
+ for i in range(num_runs):
314
+ gc.disable()
315
+ h2d_times[i] = test_h2d(num_elems, cuda0)
316
+ d2h_times[i] = test_d2h(num_elems, cuda0)
317
+ gc.enable()
318
+
319
+ # h2d, d2h pinned copy
320
+ for i in range(num_runs):
321
+ gc.disable()
322
+ h2d_pinned_times[i] = test_h2d_pinned(num_elems, cuda0)
323
+ d2h_pinned_times[i] = test_d2h_pinned(num_elems, cuda0)
324
+ gc.enable()
325
+
326
+ # d2d copy
327
+ for i in range(num_runs):
328
+ gc.disable()
329
+ d2d_times[i] = test_d2d(num_elems, cuda0)
330
+ gc.enable()
331
+
332
+ # p2p copy
333
+ if cuda_device_count > 1:
334
+ for i in range(num_runs):
335
+ gc.disable()
336
+ p2p_times[i] = test_p2p(num_elems, cuda1, cuda0)
337
+ p2p_stream_times[i] = test_p2p_stream(num_elems, cuda1, cuda0)
338
+ gc.enable()
339
+
340
+ # launch
341
+ for i in range(num_runs):
342
+ gc.disable()
343
+ launch_times[i] = test_launch(num_elems, cuda0)
344
+ launch_stream_times[i] = test_launch_stream(num_elems, cuda0)
345
+ gc.enable()
346
+
347
+ # graph
348
+ for i in range(num_runs):
349
+ gc.disable()
350
+ graph_times[i] = test_graph(num_elems, cuda0)
351
+ graph_stream_times[i] = test_graph_stream(num_elems, cuda0)
352
+ gc.enable()
353
+
354
+
355
+ def print_stat(name, data, trim=trim_runs):
356
+ assert len(data) - 2 * trim > 0
357
+ if trim > 0:
358
+ data = sorted(data)[trim:-trim]
359
+ print(f"{name:15s} {1000000 * stats.mean(data):.0f}")
360
+
361
+
362
+ print("=========================")
363
+ print_stat("Alloc", alloc_times)
364
+ print_stat("Free", free_times)
365
+ print_stat("Zeros", zeros_times)
366
+ print_stat("H2D", h2d_times)
367
+ print_stat("D2H", d2h_times)
368
+ print_stat("H2D pinned", h2d_pinned_times)
369
+ print_stat("D2H pinned", d2h_pinned_times)
370
+ print_stat("D2D", d2d_times)
371
+ print_stat("P2P", p2p_times)
372
+ print_stat("P2P stream", p2p_stream_times)
373
+ print_stat("Launch", launch_times)
374
+ print_stat("Launch stream", launch_stream_times)
375
+ print_stat("Graph", graph_times)
376
+ print_stat("Graph stream", graph_stream_times)
377
+
378
+
379
+ # ========= profiling ==========
380
+
381
+ # from pyinstrument import Profiler
382
+ # profiler = Profiler()
383
+ # profiler.start()
384
+ # for i in range(10):
385
+ # # test_alloc(num_elems, cuda0)
386
+ # # test_h2d(num_elems, cuda0)
387
+ # test_p2p(num_elems, cuda0, cuda1)
388
+ # profiler.stop()
389
+ # print(profiler.output_text(show_all=True))
@@ -0,0 +1,296 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ # include parent path
17
+ import csv
18
+ import os
19
+ import sys
20
+
21
+ import numpy as np
22
+
23
+ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), ".")))
24
+
25
+ from pxr import Usd, UsdGeom
26
+
27
+ import warp as wp
28
+
29
+
30
+ class Cloth:
31
+ def __init__(
32
+ self, lower, dx, dy, radius, stretch_stiffness, bend_stiffness, shear_stiffness, mass, fix_corners=True
33
+ ):
34
+ self.triangles = []
35
+
36
+ self.positions = []
37
+ self.velocities = []
38
+ self.inv_masses = []
39
+
40
+ self.spring_indices = []
41
+ self.spring_lengths = []
42
+ self.spring_stiffness = []
43
+ self.spring_damping = []
44
+
45
+ def grid(x, y, stride):
46
+ return y * stride + x
47
+
48
+ def create_spring(i, j, stiffness, damp=10.0):
49
+ length = np.linalg.norm(np.array(self.positions[i]) - np.array(self.positions[j]))
50
+
51
+ self.spring_indices.append(i)
52
+ self.spring_indices.append(j)
53
+ self.spring_lengths.append(length)
54
+ self.spring_stiffness.append(stiffness)
55
+ self.spring_damping.append(damp)
56
+
57
+ for y in range(dy):
58
+ for x in range(dx):
59
+ p = np.array(lower) + radius * np.array((float(x), float(0.0), float(y)))
60
+
61
+ self.positions.append(p)
62
+ self.velocities.append(np.zeros(3))
63
+
64
+ if x > 0 and y > 0:
65
+ self.triangles.append(grid(x - 1, y - 1, dx))
66
+ self.triangles.append(grid(x, y - 1, dx))
67
+ self.triangles.append(grid(x, y, dx))
68
+
69
+ self.triangles.append(grid(x - 1, y - 1, dx))
70
+ self.triangles.append(grid(x, y, dx))
71
+ self.triangles.append(grid(x - 1, y, dx))
72
+
73
+ if fix_corners and y == 0 and (x == 0 or x == dx - 1):
74
+ w = 0.0
75
+ else:
76
+ w = 1.0 / mass
77
+
78
+ self.inv_masses.append(w)
79
+
80
+ # horizontal springs
81
+ for y in range(dy):
82
+ for x in range(dx):
83
+ index0 = y * dx + x
84
+
85
+ if x > 0:
86
+ index1 = y * dx + x - 1
87
+ create_spring(index0, index1, stretch_stiffness)
88
+
89
+ if x > 1 and bend_stiffness > 0.0:
90
+ index2 = y * dx + x - 2
91
+ create_spring(index0, index2, bend_stiffness)
92
+
93
+ if y > 0 and x < dx - 1 and shear_stiffness > 0.0:
94
+ indexDiag = (y - 1) * dx + x + 1
95
+ create_spring(index0, indexDiag, shear_stiffness)
96
+
97
+ if y > 0 and x > 0 and shear_stiffness > 0.0:
98
+ indexDiag = (y - 1) * dx + x - 1
99
+ create_spring(index0, indexDiag, shear_stiffness)
100
+
101
+ # vertical
102
+ for x in range(dx):
103
+ for y in range(dy):
104
+ index0 = y * dx + x
105
+
106
+ if y > 0:
107
+ index1 = (y - 1) * dx + x
108
+ create_spring(index0, index1, stretch_stiffness)
109
+
110
+ if y > 1 and bend_stiffness > 0.0:
111
+ index2 = (y - 2) * dx + x
112
+ create_spring(index0, index2, bend_stiffness)
113
+
114
+ # harden to np arrays
115
+ self.positions = np.array(self.positions, dtype=np.float32)
116
+ self.velocities = np.array(self.velocities, dtype=np.float32)
117
+ self.inv_masses = np.array(self.inv_masses, dtype=np.float32)
118
+ self.spring_lengths = np.array(self.spring_lengths, dtype=np.float32)
119
+ self.spring_indices = np.array(self.spring_indices, dtype=np.int32)
120
+ self.spring_stiffness = np.array(self.spring_stiffness, dtype=np.float32)
121
+ self.spring_damping = np.array(self.spring_damping, dtype=np.float32)
122
+
123
+ self.num_particles = len(self.positions)
124
+ self.num_springs = len(self.spring_lengths)
125
+ self.num_tris = int(len(self.triangles) / 3)
126
+
127
+
128
+ def run_benchmark(mode, dim, timers, render=False):
129
+ # params
130
+ sim_width = dim
131
+ sim_height = dim
132
+
133
+ sim_fps = 60.0
134
+ sim_substeps = 16
135
+ sim_duration = 1.0
136
+ sim_frames = int(sim_duration * sim_fps)
137
+ sim_dt = 1.0 / sim_fps
138
+ sim_time = 0.0
139
+
140
+ # wave constants
141
+ k_stretch = 1000.0
142
+ k_shear = 1000.0
143
+ k_bend = 1000.0
144
+ # k_damp = 0.0
145
+
146
+ cloth = Cloth(
147
+ lower=(0.0, 0.0, 0.0),
148
+ dx=sim_width,
149
+ dy=sim_height,
150
+ radius=0.1,
151
+ stretch_stiffness=k_stretch,
152
+ bend_stiffness=k_bend,
153
+ shear_stiffness=k_shear,
154
+ mass=0.1,
155
+ fix_corners=True,
156
+ )
157
+
158
+ if render:
159
+ # set up grid for visualization
160
+ stage = Usd.Stage.CreateNew("benchmark.usd")
161
+ stage.SetStartTimeCode(0.0)
162
+ stage.SetEndTimeCode(sim_duration * sim_fps)
163
+ stage.SetTimeCodesPerSecond(sim_fps)
164
+
165
+ grid = UsdGeom.Mesh.Define(stage, "/root")
166
+ grid.GetPointsAttr().Set(cloth.positions, 0.0)
167
+ grid.GetFaceVertexIndicesAttr().Set(cloth.triangles, 0.0)
168
+ grid.GetFaceVertexCountsAttr().Set([3] * cloth.num_tris, 0.0)
169
+
170
+ with wp.ScopedTimer("Initialization", dict=timers):
171
+ if mode == "warp_cpu":
172
+ import benchmark_cloth_warp
173
+
174
+ integrator = benchmark_cloth_warp.WpIntegrator(cloth, "cpu")
175
+
176
+ elif mode == "warp_gpu":
177
+ import benchmark_cloth_warp
178
+
179
+ integrator = benchmark_cloth_warp.WpIntegrator(cloth, "cuda")
180
+
181
+ elif mode == "taichi_cpu":
182
+ import benchmark_cloth_taichi
183
+
184
+ integrator = benchmark_cloth_taichi.TiIntegrator(cloth, "cpu")
185
+
186
+ elif mode == "taichi_gpu":
187
+ import benchmark_cloth_taichi
188
+
189
+ integrator = benchmark_cloth_taichi.TiIntegrator(cloth, "cuda")
190
+
191
+ elif mode == "numpy":
192
+ import benchmark_cloth_numpy
193
+
194
+ integrator = benchmark_cloth_numpy.NpIntegrator(cloth)
195
+
196
+ elif mode == "cupy":
197
+ import benchmark_cloth_cupy
198
+
199
+ integrator = benchmark_cloth_cupy.CpIntegrator(cloth)
200
+
201
+ elif mode == "numba":
202
+ import benchmark_cloth_numba
203
+
204
+ integrator = benchmark_cloth_numba.NbIntegrator(cloth)
205
+
206
+ elif mode == "torch_cpu":
207
+ import benchmark_cloth_pytorch
208
+
209
+ integrator = benchmark_cloth_pytorch.TrIntegrator(cloth, "cpu")
210
+
211
+ elif mode == "torch_gpu":
212
+ import benchmark_cloth_pytorch
213
+
214
+ integrator = benchmark_cloth_pytorch.TrIntegrator(cloth, "cuda")
215
+
216
+ elif mode == "jax_cpu":
217
+ os.environ["JAX_PLATFORM_NAME"] = "cpu"
218
+
219
+ import benchmark_cloth_jax
220
+
221
+ integrator = benchmark_cloth_jax.JxIntegrator(cloth)
222
+
223
+ elif mode == "jax_gpu":
224
+ os.environ["JAX_PLATFORM_NAME"] = "gpu"
225
+
226
+ import benchmark_cloth_jax
227
+
228
+ integrator = benchmark_cloth_jax.JxIntegrator(cloth)
229
+
230
+ elif mode == "paddle_cpu":
231
+ import benchmark_cloth_paddle
232
+
233
+ integrator = benchmark_cloth_paddle.TrIntegrator(cloth, "cpu")
234
+
235
+ elif mode == "paddle_gpu":
236
+ import benchmark_cloth_paddle
237
+
238
+ integrator = benchmark_cloth_paddle.TrIntegrator(cloth, "gpu")
239
+
240
+ else:
241
+ raise RuntimeError("Unknown simulation backend")
242
+
243
+ # run one warm-up iteration to accurately measure initialization time (some engines do lazy init)
244
+ positions = integrator.simulate(sim_dt, sim_substeps)
245
+
246
+ label = "Dim ({}^2)".format(dim)
247
+
248
+ # run simulation
249
+ for _i in range(sim_frames):
250
+ # simulate
251
+ with wp.ScopedTimer(label, dict=timers):
252
+ positions = integrator.simulate(sim_dt, sim_substeps)
253
+
254
+ if render:
255
+ grid.GetPointsAttr().Set(positions, sim_time * sim_fps)
256
+
257
+ sim_time += sim_dt
258
+
259
+ if render:
260
+ stage.Save()
261
+
262
+
263
+ # record profiling information
264
+ timers = {}
265
+
266
+ if len(sys.argv) > 1:
267
+ mode = sys.argv[1]
268
+ else:
269
+ mode = "warp_gpu"
270
+
271
+ run_benchmark(mode, 32, timers, render=False)
272
+ run_benchmark(mode, 64, timers, render=False)
273
+ run_benchmark(mode, 128, timers, render=False)
274
+
275
+ # write results
276
+
277
+ for k, v in timers.items():
278
+ print("{:16} min: {:8.2f} max: {:8.2f} avg: {:8.2f}".format(k, np.min(v), np.max(v), np.mean(v)))
279
+
280
+ report = open(os.path.join("benchmark.csv"), "a")
281
+ writer = csv.writer(report, delimiter=",")
282
+
283
+ if report.tell() == 0:
284
+ writer.writerow(["Name", "Init", "Dim (32^2)", "Dim (64^2)", "Dim (128^2)"])
285
+
286
+ writer.writerow(
287
+ [
288
+ mode,
289
+ np.max(timers["Initialization"]),
290
+ np.mean(timers["Dim (32^2)"]),
291
+ np.mean(timers["Dim (64^2)"]),
292
+ np.mean(timers["Dim (128^2)"]),
293
+ ]
294
+ )
295
+
296
+ report.close()