warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/mesh.cpp CHANGED
@@ -103,7 +103,8 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
103
103
  {
104
104
  Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
105
105
 
106
- m->bounds = new bounds3[num_tris];
106
+ m->lowers = new vec3[num_tris];
107
+ m->uppers = new vec3[num_tris];
107
108
 
108
109
  float sum = 0.0;
109
110
  for (int i=0; i < num_tris; ++i)
@@ -111,15 +112,23 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
111
112
  wp::vec3& p0 = points[indices[i*3+0]];
112
113
  wp::vec3& p1 = points[indices[i*3+1]];
113
114
  wp::vec3& p2 = points[indices[i*3+2]];
114
- m->bounds[i].add_point(p0);
115
- m->bounds[i].add_point(p1);
116
- m->bounds[i].add_point(p2);
115
+
116
+ // compute triangle bounds
117
+ bounds3 b;
118
+ b.add_point(p0);
119
+ b.add_point(p1);
120
+ b.add_point(p2);
121
+
122
+ m->lowers[i] = b.lower;
123
+ m->uppers[i] = b.upper;
124
+
125
+ // compute edge lengths
117
126
  sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
118
127
  }
119
128
  m->average_edge_length = sum / (num_tris*3);
120
129
 
121
- m->bvh = bvh_create(m->bounds, num_tris);
122
-
130
+ m->bvh = *(wp::BVH*)bvh_create_host(m->lowers, m->uppers, num_tris);
131
+
123
132
  if (support_winding_number)
124
133
  {
125
134
  // Let's first compute the sold
@@ -131,86 +140,14 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
131
140
  return (uint64_t)m;
132
141
  }
133
142
 
134
- uint64_t mesh_create_device(void* context, array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
135
- {
136
- ContextGuard guard(context);
137
-
138
- Mesh mesh(points, velocities, indices, num_points, num_tris);
139
-
140
- mesh.context = context ? context : cuda_context_get_current();
141
-
142
- // mesh.points = array_t<vec3>(points, num_points, points_grad);
143
- // mesh.velocities = array_t<vec3>(velocities, num_points, velocities_grad);
144
- // mesh.indices = array_t<int>(indices, num_tris, 3);
145
-
146
- // mesh.num_points = num_points;
147
- // mesh.num_tris = num_tris;
148
-
149
- {
150
- // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
151
- vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
152
- int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
153
- bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
154
-
155
- memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
156
- memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
157
- cuda_context_synchronize(WP_CURRENT_CONTEXT);
158
-
159
- float sum = 0.0;
160
- for (int i=0; i < num_tris; ++i)
161
- {
162
- bounds_host[i] = bounds3();
163
- wp::vec3 p0 = points_host[indices_host[i*3+0]];
164
- wp::vec3 p1 = points_host[indices_host[i*3+1]];
165
- wp::vec3 p2 = points_host[indices_host[i*3+2]];
166
- bounds_host[i].add_point(p0);
167
- bounds_host[i].add_point(p1);
168
- bounds_host[i].add_point(p2);
169
- sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
170
- }
171
- mesh.average_edge_length = sum / (num_tris*3);
172
-
173
- BVH bvh_host = bvh_create(bounds_host, num_tris);
174
- BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
175
-
176
- bvh_destroy_host(bvh_host);
177
-
178
- // save gpu-side copy of bounds
179
- mesh.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*num_tris);
180
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh.bounds, bounds_host, sizeof(bounds3)*num_tris);
181
-
182
- free_host(points_host);
183
- free_host(indices_host);
184
- free_host(bounds_host);
185
-
186
- mesh.bvh = bvh_device;
187
-
188
- if (support_winding_number)
189
- {
190
- int num_bvh_nodes = 2*num_tris-1;
191
- mesh.solid_angle_props = (SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(SolidAngleProps)*num_bvh_nodes);
192
- }
193
- }
194
-
195
- Mesh* mesh_device = (Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(Mesh));
196
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(Mesh));
197
-
198
- // save descriptor
199
- uint64_t mesh_id = (uint64_t)mesh_device;
200
- mesh_add_descriptor(mesh_id, mesh);
201
-
202
- if (support_winding_number)
203
- {
204
- mesh_refit_device(mesh_id);
205
- }
206
- return mesh_id;
207
- }
208
143
 
209
144
  void mesh_destroy_host(uint64_t id)
210
145
  {
211
146
  Mesh* m = (Mesh*)(id);
212
147
 
213
- delete[] m->bounds;
148
+ delete[] m->lowers;
149
+ delete[] m->uppers;
150
+
214
151
  if (m->solid_angle_props) {
215
152
  delete [] m->solid_angle_props;
216
153
  }
@@ -219,25 +156,6 @@ void mesh_destroy_host(uint64_t id)
219
156
  delete m;
220
157
  }
221
158
 
222
- void mesh_destroy_device(uint64_t id)
223
- {
224
- Mesh mesh;
225
- if (mesh_get_descriptor(id, mesh))
226
- {
227
- ContextGuard guard(mesh.context);
228
-
229
- bvh_destroy_device(mesh.bvh);
230
-
231
- free_device(WP_CURRENT_CONTEXT, mesh.bounds);
232
- free_device(WP_CURRENT_CONTEXT, (Mesh*)id);
233
-
234
- if (mesh.solid_angle_props) {
235
- free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
236
- }
237
- mesh_rem_descriptor(id);
238
- }
239
- }
240
-
241
159
  void mesh_refit_host(uint64_t id)
242
160
  {
243
161
  Mesh* m = (Mesh*)(id);
@@ -245,13 +163,19 @@ void mesh_refit_host(uint64_t id)
245
163
  float sum = 0.0;
246
164
  for (int i=0; i < m->num_tris; ++i)
247
165
  {
248
- m->bounds[i] = bounds3();
249
166
  wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
250
167
  wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
251
168
  wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
252
- m->bounds[i].add_point(p0);
253
- m->bounds[i].add_point(p1);
254
- m->bounds[i].add_point(p2);
169
+
170
+ // compute triangle bounds
171
+ bounds3 b;
172
+ b.add_point(p0);
173
+ b.add_point(p1);
174
+ b.add_point(p2);
175
+
176
+ m->lowers[i] = b.lower;
177
+ m->uppers[i] = b.upper;
178
+
255
179
  sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
256
180
  }
257
181
  m->average_edge_length = sum / (m->num_tris*3);
@@ -263,7 +187,7 @@ void mesh_refit_host(uint64_t id)
263
187
  }
264
188
  else
265
189
  {
266
- bvh_refit_host(m->bvh, m->bounds);
190
+ bvh_refit_host(m->bvh);
267
191
  }
268
192
  }
269
193
 
@@ -271,9 +195,10 @@ void mesh_refit_host(uint64_t id)
271
195
  // stubs for non-CUDA platforms
272
196
  #if !WP_ENABLE_CUDA
273
197
 
274
- void mesh_refit_device(uint64_t id)
275
- {
276
- }
198
+
199
+ WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
200
+ WP_API void mesh_destroy_device(uint64_t id) {}
201
+ WP_API void mesh_refit_device(uint64_t id) {}
277
202
 
278
203
 
279
204
  #endif // !WP_ENABLE_CUDA
warp/native/mesh.cu CHANGED
@@ -10,13 +10,12 @@
10
10
  #include "cuda_util.h"
11
11
  #include "mesh.h"
12
12
  #include "bvh.h"
13
- #include "thrust/device_ptr.h"
14
- #include <thrust/scan.h>
13
+ #include "scan.h"
15
14
 
16
15
  namespace wp
17
16
  {
18
17
 
19
- __global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, bounds3* b)
18
+ __global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, vec3* lowers, vec3* uppers)
20
19
  {
21
20
  const int tid = blockIdx.x*blockDim.x + threadIdx.x;
22
21
 
@@ -34,7 +33,8 @@ __global__ void compute_triangle_bounds(int n, const vec3* points, const int* in
34
33
  vec3 lower = min(min(p, q), r);
35
34
  vec3 upper = max(max(p, q), r);
36
35
 
37
- b[tid] = bounds3(lower, upper);
36
+ lowers[tid] = lower;
37
+ uppers[tid] = upper;
38
38
  }
39
39
  }
40
40
 
@@ -159,44 +159,135 @@ void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
159
159
  // clear child counters
160
160
  memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
161
161
 
162
- wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.max_nodes, (bvh.max_nodes, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
162
+ wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_items, (bvh.num_items, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
163
163
  }
164
+
164
165
  } // namespace wp
165
166
 
166
- void mesh_refit_device(uint64_t id)
167
+
168
+ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
169
+ {
170
+ ContextGuard guard(context);
171
+
172
+ wp::Mesh mesh(points, velocities, indices, num_points, num_tris);
173
+
174
+ mesh.context = context ? context : cuda_context_get_current();
175
+
176
+ {
177
+ // // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
178
+ // vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
179
+ // int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
180
+ // bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
181
+
182
+ // memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
183
+ // memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
184
+ // cuda_context_synchronize(WP_CURRENT_CONTEXT);
185
+
186
+ // float sum = 0.0;
187
+ // for (int i=0; i < num_tris; ++i)
188
+ // {
189
+ // bounds_host[i] = bounds3();
190
+ // wp::vec3 p0 = points_host[indices_host[i*3+0]];
191
+ // wp::vec3 p1 = points_host[indices_host[i*3+1]];
192
+ // wp::vec3 p2 = points_host[indices_host[i*3+2]];
193
+ // bounds_host[i].add_point(p0);
194
+ // bounds_host[i].add_point(p1);
195
+ // bounds_host[i].add_point(p2);
196
+ // sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
197
+ // }
198
+ // mesh.average_edge_length = sum / (num_tris*3);
199
+
200
+ // BVH bvh_host = bvh_create(bounds_host, num_tris);
201
+ // BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
202
+
203
+ // bvh_destroy_host(bvh_host);
204
+
205
+ // create lower upper arrays expected by GPU BVH builder
206
+ mesh.lowers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
207
+ mesh.uppers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
208
+
209
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
210
+
211
+ uint64_t bvh_id = bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris);
212
+ wp::bvh_get_descriptor(bvh_id, mesh.bvh);
213
+
214
+ if (support_winding_number)
215
+ {
216
+ int num_bvh_nodes = 2*num_tris;
217
+ mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
218
+ }
219
+ }
220
+
221
+ wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
222
+ memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
223
+
224
+ // save descriptor
225
+ uint64_t mesh_id = (uint64_t)mesh_device;
226
+ mesh_add_descriptor(mesh_id, mesh);
227
+
228
+ if (support_winding_number)
229
+ mesh_refit_device(mesh_id);
230
+
231
+ return mesh_id;
232
+ }
233
+
234
+ void mesh_destroy_device(uint64_t id)
167
235
  {
236
+ wp::Mesh mesh;
237
+ if (wp::mesh_get_descriptor(id, mesh))
238
+ {
239
+ ContextGuard guard(mesh.context);
240
+
241
+ wp::bvh_destroy_device(mesh.bvh);
242
+
243
+ free_device(WP_CURRENT_CONTEXT, mesh.lowers);
244
+ free_device(WP_CURRENT_CONTEXT, mesh.uppers);
245
+ free_device(WP_CURRENT_CONTEXT, (wp::Mesh*)id);
168
246
 
247
+ if (mesh.solid_angle_props) {
248
+ free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
249
+ }
250
+ wp::mesh_rem_descriptor(id);
251
+ }
252
+ }
253
+
254
+ void mesh_update_stats(uint64_t id)
255
+ {
256
+
257
+ }
258
+
259
+ void mesh_refit_device(uint64_t id)
260
+ {
169
261
  // recompute triangle bounds
170
262
  wp::Mesh m;
171
263
  if (mesh_get_descriptor(id, m))
172
264
  {
173
265
  ContextGuard guard(m.context);
174
266
 
175
- // we compute mesh the average edge length
176
- // for use in mesh_query_point_sign_normal()
177
- // since it relies on an epsilon for welding
178
-
179
- // re-use bounds memory temporarily for computing edge lengths
180
- float* length_tmp_ptr = (float*)m.bounds;
181
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
182
-
183
- thrust::inclusive_scan(
184
- thrust::device_ptr<float>(length_tmp_ptr),
185
- thrust::device_ptr<float>(length_tmp_ptr + m.num_tris),
186
- thrust::device_ptr<float>(length_tmp_ptr));
187
-
188
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
189
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.bounds));
267
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
190
268
 
191
269
  if (m.solid_angle_props)
192
270
  {
271
+ // we compute mesh the average edge length
272
+ // for use in mesh_query_point_sign_normal()
273
+ // since it relies on an epsilon for welding
274
+
275
+ // reuse bounds memory temporarily for computing edge lengths
276
+ float* length_tmp_ptr = (float*)m.lowers;
277
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
278
+
279
+ scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
280
+
281
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
282
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
283
+
284
+ // update solid angle data
193
285
  bvh_refit_with_solid_angle_device(m.bvh, m);
194
286
  }
195
287
  else
196
288
  {
197
- bvh_refit_device(m.bvh, m.bounds);
289
+ bvh_refit_device(m.bvh);
198
290
  }
199
291
  }
200
-
201
292
  }
202
293