warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show
  1. warp/__init__.py +15 -7
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +22 -443
  6. warp/build_dll.py +384 -0
  7. warp/builtins.py +998 -488
  8. warp/codegen.py +1307 -739
  9. warp/config.py +5 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +1291 -548
  12. warp/dlpack.py +31 -31
  13. warp/fabric.py +326 -0
  14. warp/fem/__init__.py +27 -0
  15. warp/fem/cache.py +389 -0
  16. warp/fem/dirichlet.py +181 -0
  17. warp/fem/domain.py +263 -0
  18. warp/fem/field/__init__.py +101 -0
  19. warp/fem/field/field.py +149 -0
  20. warp/fem/field/nodal_field.py +299 -0
  21. warp/fem/field/restriction.py +21 -0
  22. warp/fem/field/test.py +181 -0
  23. warp/fem/field/trial.py +183 -0
  24. warp/fem/geometry/__init__.py +19 -0
  25. warp/fem/geometry/closest_point.py +70 -0
  26. warp/fem/geometry/deformed_geometry.py +271 -0
  27. warp/fem/geometry/element.py +744 -0
  28. warp/fem/geometry/geometry.py +186 -0
  29. warp/fem/geometry/grid_2d.py +373 -0
  30. warp/fem/geometry/grid_3d.py +435 -0
  31. warp/fem/geometry/hexmesh.py +953 -0
  32. warp/fem/geometry/partition.py +376 -0
  33. warp/fem/geometry/quadmesh_2d.py +532 -0
  34. warp/fem/geometry/tetmesh.py +840 -0
  35. warp/fem/geometry/trimesh_2d.py +577 -0
  36. warp/fem/integrate.py +1616 -0
  37. warp/fem/operator.py +191 -0
  38. warp/fem/polynomial.py +213 -0
  39. warp/fem/quadrature/__init__.py +2 -0
  40. warp/fem/quadrature/pic_quadrature.py +245 -0
  41. warp/fem/quadrature/quadrature.py +294 -0
  42. warp/fem/space/__init__.py +292 -0
  43. warp/fem/space/basis_space.py +489 -0
  44. warp/fem/space/collocated_function_space.py +105 -0
  45. warp/fem/space/dof_mapper.py +236 -0
  46. warp/fem/space/function_space.py +145 -0
  47. warp/fem/space/grid_2d_function_space.py +267 -0
  48. warp/fem/space/grid_3d_function_space.py +306 -0
  49. warp/fem/space/hexmesh_function_space.py +352 -0
  50. warp/fem/space/partition.py +350 -0
  51. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  52. warp/fem/space/restriction.py +160 -0
  53. warp/fem/space/shape/__init__.py +15 -0
  54. warp/fem/space/shape/cube_shape_function.py +738 -0
  55. warp/fem/space/shape/shape_function.py +103 -0
  56. warp/fem/space/shape/square_shape_function.py +611 -0
  57. warp/fem/space/shape/tet_shape_function.py +567 -0
  58. warp/fem/space/shape/triangle_shape_function.py +429 -0
  59. warp/fem/space/tetmesh_function_space.py +292 -0
  60. warp/fem/space/topology.py +295 -0
  61. warp/fem/space/trimesh_2d_function_space.py +221 -0
  62. warp/fem/types.py +77 -0
  63. warp/fem/utils.py +495 -0
  64. warp/native/array.h +164 -55
  65. warp/native/builtin.h +150 -174
  66. warp/native/bvh.cpp +75 -328
  67. warp/native/bvh.cu +406 -23
  68. warp/native/bvh.h +37 -45
  69. warp/native/clang/clang.cpp +136 -24
  70. warp/native/crt.cpp +1 -76
  71. warp/native/crt.h +111 -104
  72. warp/native/cuda_crt.h +1049 -0
  73. warp/native/cuda_util.cpp +15 -3
  74. warp/native/cuda_util.h +3 -1
  75. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  76. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  77. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  78. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  79. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  80. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  133. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  134. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  135. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  136. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  137. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  138. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  139. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  140. warp/native/cutlass_gemm.cu +5 -3
  141. warp/native/exports.h +1240 -949
  142. warp/native/fabric.h +228 -0
  143. warp/native/hashgrid.cpp +4 -4
  144. warp/native/hashgrid.h +22 -2
  145. warp/native/initializer_array.h +2 -2
  146. warp/native/intersect.h +22 -7
  147. warp/native/intersect_adj.h +8 -8
  148. warp/native/intersect_tri.h +13 -16
  149. warp/native/marching.cu +157 -161
  150. warp/native/mat.h +119 -19
  151. warp/native/matnn.h +2 -2
  152. warp/native/mesh.cpp +108 -83
  153. warp/native/mesh.cu +243 -6
  154. warp/native/mesh.h +1547 -458
  155. warp/native/nanovdb/NanoVDB.h +1 -1
  156. warp/native/noise.h +272 -329
  157. warp/native/quat.h +51 -8
  158. warp/native/rand.h +45 -35
  159. warp/native/range.h +6 -2
  160. warp/native/reduce.cpp +157 -0
  161. warp/native/reduce.cu +348 -0
  162. warp/native/runlength_encode.cpp +62 -0
  163. warp/native/runlength_encode.cu +46 -0
  164. warp/native/scan.cu +11 -13
  165. warp/native/scan.h +1 -0
  166. warp/native/solid_angle.h +442 -0
  167. warp/native/sort.cpp +13 -0
  168. warp/native/sort.cu +9 -1
  169. warp/native/sparse.cpp +338 -0
  170. warp/native/sparse.cu +545 -0
  171. warp/native/spatial.h +2 -2
  172. warp/native/temp_buffer.h +30 -0
  173. warp/native/vec.h +126 -24
  174. warp/native/volume.h +120 -0
  175. warp/native/warp.cpp +658 -53
  176. warp/native/warp.cu +660 -68
  177. warp/native/warp.h +112 -12
  178. warp/optim/__init__.py +1 -0
  179. warp/optim/linear.py +922 -0
  180. warp/optim/sgd.py +92 -0
  181. warp/render/render_opengl.py +392 -152
  182. warp/render/render_usd.py +11 -11
  183. warp/sim/__init__.py +2 -2
  184. warp/sim/articulation.py +385 -185
  185. warp/sim/collide.py +21 -8
  186. warp/sim/import_mjcf.py +297 -106
  187. warp/sim/import_urdf.py +389 -210
  188. warp/sim/import_usd.py +198 -97
  189. warp/sim/inertia.py +17 -18
  190. warp/sim/integrator_euler.py +14 -8
  191. warp/sim/integrator_xpbd.py +161 -19
  192. warp/sim/model.py +795 -291
  193. warp/sim/optimizer.py +2 -6
  194. warp/sim/render.py +65 -3
  195. warp/sim/utils.py +3 -0
  196. warp/sparse.py +1227 -0
  197. warp/stubs.py +665 -223
  198. warp/tape.py +66 -15
  199. warp/tests/__main__.py +3 -6
  200. warp/tests/assets/curlnoise_golden.npy +0 -0
  201. warp/tests/assets/pnoise_golden.npy +0 -0
  202. warp/tests/assets/torus.usda +105 -105
  203. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  204. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  205. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  206. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  207. warp/tests/aux_test_unresolved_func.py +14 -0
  208. warp/tests/aux_test_unresolved_symbol.py +14 -0
  209. warp/tests/disabled_kinematics.py +239 -0
  210. warp/tests/run_coverage_serial.py +31 -0
  211. warp/tests/test_adam.py +103 -106
  212. warp/tests/test_arithmetic.py +128 -74
  213. warp/tests/test_array.py +1497 -211
  214. warp/tests/test_array_reduce.py +150 -0
  215. warp/tests/test_atomic.py +64 -28
  216. warp/tests/test_bool.py +99 -0
  217. warp/tests/test_builtins_resolution.py +1292 -0
  218. warp/tests/test_bvh.py +75 -43
  219. warp/tests/test_closest_point_edge_edge.py +54 -57
  220. warp/tests/test_codegen.py +233 -128
  221. warp/tests/test_compile_consts.py +28 -20
  222. warp/tests/test_conditional.py +108 -24
  223. warp/tests/test_copy.py +10 -12
  224. warp/tests/test_ctypes.py +112 -88
  225. warp/tests/test_dense.py +21 -14
  226. warp/tests/test_devices.py +98 -0
  227. warp/tests/test_dlpack.py +136 -108
  228. warp/tests/test_examples.py +277 -0
  229. warp/tests/test_fabricarray.py +955 -0
  230. warp/tests/test_fast_math.py +15 -11
  231. warp/tests/test_fem.py +1271 -0
  232. warp/tests/test_fp16.py +53 -19
  233. warp/tests/test_func.py +187 -74
  234. warp/tests/test_generics.py +194 -49
  235. warp/tests/test_grad.py +180 -116
  236. warp/tests/test_grad_customs.py +176 -0
  237. warp/tests/test_hash_grid.py +52 -37
  238. warp/tests/test_import.py +10 -23
  239. warp/tests/test_indexedarray.py +577 -24
  240. warp/tests/test_intersect.py +18 -9
  241. warp/tests/test_large.py +141 -0
  242. warp/tests/test_launch.py +251 -15
  243. warp/tests/test_lerp.py +64 -65
  244. warp/tests/test_linear_solvers.py +154 -0
  245. warp/tests/test_lvalue.py +493 -0
  246. warp/tests/test_marching_cubes.py +12 -13
  247. warp/tests/test_mat.py +508 -2778
  248. warp/tests/test_mat_lite.py +115 -0
  249. warp/tests/test_mat_scalar_ops.py +2889 -0
  250. warp/tests/test_math.py +103 -9
  251. warp/tests/test_matmul.py +305 -69
  252. warp/tests/test_matmul_lite.py +410 -0
  253. warp/tests/test_mesh.py +71 -14
  254. warp/tests/test_mesh_query_aabb.py +41 -25
  255. warp/tests/test_mesh_query_point.py +325 -34
  256. warp/tests/test_mesh_query_ray.py +39 -22
  257. warp/tests/test_mlp.py +30 -22
  258. warp/tests/test_model.py +92 -89
  259. warp/tests/test_modules_lite.py +39 -0
  260. warp/tests/test_multigpu.py +88 -114
  261. warp/tests/test_noise.py +12 -11
  262. warp/tests/test_operators.py +16 -20
  263. warp/tests/test_options.py +11 -11
  264. warp/tests/test_pinned.py +17 -18
  265. warp/tests/test_print.py +32 -11
  266. warp/tests/test_quat.py +275 -129
  267. warp/tests/test_rand.py +18 -16
  268. warp/tests/test_reload.py +38 -34
  269. warp/tests/test_rounding.py +50 -43
  270. warp/tests/test_runlength_encode.py +190 -0
  271. warp/tests/test_smoothstep.py +9 -11
  272. warp/tests/test_snippet.py +143 -0
  273. warp/tests/test_sparse.py +460 -0
  274. warp/tests/test_spatial.py +276 -243
  275. warp/tests/test_streams.py +110 -85
  276. warp/tests/test_struct.py +331 -85
  277. warp/tests/test_tape.py +39 -21
  278. warp/tests/test_torch.py +118 -89
  279. warp/tests/test_transient_module.py +12 -13
  280. warp/tests/test_types.py +614 -0
  281. warp/tests/test_utils.py +494 -0
  282. warp/tests/test_vec.py +354 -1987
  283. warp/tests/test_vec_lite.py +73 -0
  284. warp/tests/test_vec_scalar_ops.py +2099 -0
  285. warp/tests/test_volume.py +457 -293
  286. warp/tests/test_volume_write.py +124 -134
  287. warp/tests/unittest_serial.py +35 -0
  288. warp/tests/unittest_suites.py +341 -0
  289. warp/tests/unittest_utils.py +568 -0
  290. warp/tests/unused_test_misc.py +71 -0
  291. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  292. warp/thirdparty/appdirs.py +36 -45
  293. warp/thirdparty/unittest_parallel.py +549 -0
  294. warp/torch.py +72 -30
  295. warp/types.py +1744 -713
  296. warp/utils.py +360 -350
  297. warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
  298. warp_lang-0.11.0.dist-info/METADATA +238 -0
  299. warp_lang-0.11.0.dist-info/RECORD +332 -0
  300. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  301. warp/bin/warp-clang.exp +0 -0
  302. warp/bin/warp-clang.lib +0 -0
  303. warp/bin/warp.exp +0 -0
  304. warp/bin/warp.lib +0 -0
  305. warp/tests/test_all.py +0 -215
  306. warp/tests/test_array_scan.py +0 -60
  307. warp/tests/test_base.py +0 -208
  308. warp/tests/test_unresolved_func.py +0 -7
  309. warp/tests/test_unresolved_symbol.py +0 -7
  310. warp_lang-0.9.0.dist-info/METADATA +0 -20
  311. warp_lang-0.9.0.dist-info/RECORD +0 -177
  312. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  313. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  314. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  315. {warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/mesh.cpp CHANGED
@@ -49,131 +49,156 @@ void mesh_rem_descriptor(uint64_t id)
49
49
 
50
50
  } // namespace wp
51
51
 
52
- uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris)
52
+ void bvh_refit_with_solid_angle_recursive_host(BVH& bvh, int index, Mesh& mesh)
53
53
  {
54
- Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
54
+ BVHPackedNodeHalf& lower = bvh.node_lowers[index];
55
+ BVHPackedNodeHalf& upper = bvh.node_uppers[index];
55
56
 
56
- m->bounds = new bounds3[num_tris];
57
+ if (lower.b)
58
+ {
59
+ // Leaf, compute properties
60
+ const int leaf_index = lower.i;
57
61
 
58
- for (int i=0; i < num_tris; ++i)
62
+ precompute_triangle_solid_angle_props(mesh.points[mesh.indices[leaf_index*3+0]], mesh.points[mesh.indices[leaf_index*3+1]], mesh.points[mesh.indices[leaf_index*3+2]], mesh.solid_angle_props[index]);
63
+ (vec3&)lower = mesh.solid_angle_props[index].box.lower;
64
+ (vec3&)upper = mesh.solid_angle_props[index].box.upper;
65
+ }
66
+ else
59
67
  {
60
- m->bounds[i].add_point(points[indices[i*3+0]]);
61
- m->bounds[i].add_point(points[indices[i*3+1]]);
62
- m->bounds[i].add_point(points[indices[i*3+2]]);
68
+ int left_index = lower.i;
69
+ int right_index = upper.i;
70
+
71
+ bvh_refit_with_solid_angle_recursive_host(bvh, left_index, mesh);
72
+ bvh_refit_with_solid_angle_recursive_host(bvh, right_index, mesh);
73
+
74
+ // combine
75
+ SolidAngleProps* left_child_data = &mesh.solid_angle_props[left_index];
76
+ SolidAngleProps* right_child_data = (left_index != right_index) ? &mesh.solid_angle_props[right_index] : NULL;
77
+
78
+ combine_precomputed_solid_angle_props(mesh.solid_angle_props[index], left_child_data, right_child_data);
79
+
80
+ // compute union of children
81
+ const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
82
+ const vec3& left_upper = (vec3&)bvh.node_uppers[left_index];
83
+
84
+ const vec3& right_lower = (vec3&)bvh.node_lowers[right_index];
85
+ const vec3& right_upper = (vec3&)bvh.node_uppers[right_index];
86
+
87
+ // union of child bounds
88
+ vec3 new_lower = min(left_lower, right_lower);
89
+ vec3 new_upper = max(left_upper, right_upper);
90
+
91
+ // write new BVH nodes
92
+ (vec3&)lower = new_lower;
93
+ (vec3&)upper = new_upper;
63
94
  }
64
-
65
- m->bvh = bvh_create(m->bounds, num_tris);
66
-
67
- return (uint64_t)m;
68
95
  }
69
96
 
70
- uint64_t mesh_create_device(void* context, array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris)
97
+ void bvh_refit_with_solid_angle_host(BVH& bvh, Mesh& mesh)
71
98
  {
72
- ContextGuard guard(context);
73
-
74
- Mesh mesh(points, velocities, indices, num_points, num_tris);
75
-
76
- mesh.context = context ? context : cuda_context_get_current();
99
+ bvh_refit_with_solid_angle_recursive_host(bvh, 0, mesh);
100
+ }
77
101
 
78
- // mesh.points = array_t<vec3>(points, num_points, points_grad);
79
- // mesh.velocities = array_t<vec3>(velocities, num_points, velocities_grad);
80
- // mesh.indices = array_t<int>(indices, num_tris, 3);
102
+ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
103
+ {
104
+ Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
81
105
 
82
- // mesh.num_points = num_points;
83
- // mesh.num_tris = num_tris;
106
+ m->lowers = new vec3[num_tris];
107
+ m->uppers = new vec3[num_tris];
84
108
 
109
+ float sum = 0.0;
110
+ for (int i=0; i < num_tris; ++i)
85
111
  {
86
- // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
87
- vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
88
- int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
89
- bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
90
-
91
- memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
92
- memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
93
- cuda_context_synchronize(WP_CURRENT_CONTEXT);
94
-
95
- for (int i=0; i < num_tris; ++i)
96
- {
97
- bounds_host[i] = bounds3();
98
- bounds_host[i].add_point(points_host[indices_host[i*3+0]]);
99
- bounds_host[i].add_point(points_host[indices_host[i*3+1]]);
100
- bounds_host[i].add_point(points_host[indices_host[i*3+2]]);
101
- }
102
-
103
- BVH bvh_host = bvh_create(bounds_host, num_tris);
104
- BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
105
-
106
- bvh_destroy_host(bvh_host);
107
-
108
- // save gpu-side copy of bounds
109
- mesh.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*num_tris);
110
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh.bounds, bounds_host, sizeof(bounds3)*num_tris);
111
-
112
- free_host(points_host);
113
- free_host(indices_host);
114
- free_host(bounds_host);
115
-
116
- mesh.bvh = bvh_device;
112
+ wp::vec3& p0 = points[indices[i*3+0]];
113
+ wp::vec3& p1 = points[indices[i*3+1]];
114
+ wp::vec3& p2 = points[indices[i*3+2]];
115
+
116
+ // compute triangle bounds
117
+ bounds3 b;
118
+ b.add_point(p0);
119
+ b.add_point(p1);
120
+ b.add_point(p2);
121
+
122
+ m->lowers[i] = b.lower;
123
+ m->uppers[i] = b.upper;
124
+
125
+ // compute edge lengths
126
+ sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
117
127
  }
128
+ m->average_edge_length = sum / (num_tris*3);
118
129
 
119
- Mesh* mesh_device = (Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(Mesh));
120
- memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(Mesh));
130
+ m->bvh = *(wp::BVH*)bvh_create_host(m->lowers, m->uppers, num_tris);
121
131
 
122
- // save descriptor
123
- uint64_t mesh_id = (uint64_t)mesh_device;
124
- mesh_add_descriptor(mesh_id, mesh);
132
+ if (support_winding_number)
133
+ {
134
+ // Let's first compute the sold
135
+ int num_bvh_nodes = 2*num_tris-1;
136
+ m->solid_angle_props = new SolidAngleProps[num_bvh_nodes];
137
+ bvh_refit_with_solid_angle_host(m->bvh, *m);
138
+ }
125
139
 
126
- return mesh_id;
140
+ return (uint64_t)m;
127
141
  }
128
142
 
143
+
129
144
  void mesh_destroy_host(uint64_t id)
130
145
  {
131
146
  Mesh* m = (Mesh*)(id);
132
147
 
133
- delete[] m->bounds;
148
+ delete[] m->lowers;
149
+ delete[] m->uppers;
150
+
151
+ if (m->solid_angle_props) {
152
+ delete [] m->solid_angle_props;
153
+ }
134
154
  bvh_destroy_host(m->bvh);
135
155
 
136
156
  delete m;
137
157
  }
138
158
 
139
- void mesh_destroy_device(uint64_t id)
159
+ void mesh_refit_host(uint64_t id)
140
160
  {
141
- Mesh mesh;
142
- if (mesh_get_descriptor(id, mesh))
161
+ Mesh* m = (Mesh*)(id);
162
+
163
+ float sum = 0.0;
164
+ for (int i=0; i < m->num_tris; ++i)
143
165
  {
144
- ContextGuard guard(mesh.context);
166
+ wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
167
+ wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
168
+ wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
145
169
 
146
- bvh_destroy_device(mesh.bvh);
170
+ // compute triangle bounds
171
+ bounds3 b;
172
+ b.add_point(p0);
173
+ b.add_point(p1);
174
+ b.add_point(p2);
147
175
 
148
- free_device(WP_CURRENT_CONTEXT, mesh.bounds);
149
- free_device(WP_CURRENT_CONTEXT, (Mesh*)id);
176
+ m->lowers[i] = b.lower;
177
+ m->uppers[i] = b.upper;
150
178
 
151
- mesh_rem_descriptor(id);
179
+ sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
152
180
  }
153
- }
154
-
155
- void mesh_refit_host(uint64_t id)
156
- {
157
- Mesh* m = (Mesh*)(id);
181
+ m->average_edge_length = sum / (m->num_tris*3);
158
182
 
159
- for (int i=0; i < m->num_tris; ++i)
183
+ if (m->solid_angle_props)
160
184
  {
161
- m->bounds[i] = bounds3();
162
- m->bounds[i].add_point(m->points.data[m->indices.data[i*3+0]]);
163
- m->bounds[i].add_point(m->points.data[m->indices.data[i*3+1]]);
164
- m->bounds[i].add_point(m->points.data[m->indices.data[i*3+2]]);
185
+ // If solid angle were used, use refit solid angle
186
+ bvh_refit_with_solid_angle_host(m->bvh, *m);
187
+ }
188
+ else
189
+ {
190
+ bvh_refit_host(m->bvh);
165
191
  }
166
-
167
- bvh_refit_host(m->bvh, m->bounds);
168
192
  }
169
193
 
170
194
 
171
195
  // stubs for non-CUDA platforms
172
196
  #if !WP_ENABLE_CUDA
173
197
 
174
- void mesh_refit_device(uint64_t id)
175
- {
176
- }
198
+
199
+ WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
200
+ WP_API void mesh_destroy_device(uint64_t id) {}
201
+ WP_API void mesh_refit_device(uint64_t id) {}
177
202
 
178
203
 
179
204
  #endif // !WP_ENABLE_CUDA
warp/native/mesh.cu CHANGED
@@ -10,11 +10,12 @@
10
10
  #include "cuda_util.h"
11
11
  #include "mesh.h"
12
12
  #include "bvh.h"
13
+ #include "scan.h"
13
14
 
14
15
  namespace wp
15
16
  {
16
17
 
17
- __global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, bounds3* b)
18
+ __global__ void compute_triangle_bounds(int n, const vec3* points, const int* indices, vec3* lowers, vec3* uppers)
18
19
  {
19
20
  const int tid = blockIdx.x*blockDim.x + threadIdx.x;
20
21
 
@@ -32,25 +33,261 @@ __global__ void compute_triangle_bounds(int n, const vec3* points, const int* in
32
33
  vec3 lower = min(min(p, q), r);
33
34
  vec3 upper = max(max(p, q), r);
34
35
 
35
- b[tid] = bounds3(lower, upper);
36
+ lowers[tid] = lower;
37
+ uppers[tid] = upper;
36
38
  }
37
39
  }
38
40
 
41
+ __global__ void compute_mesh_edge_lengths(int n, const vec3* points, const int* indices, float* edge_lengths)
42
+ {
43
+ const int tid = blockIdx.x*blockDim.x + threadIdx.x;
44
+
45
+ if (tid < n)
46
+ {
47
+ // if leaf then update bounds
48
+ int i = indices[tid*3+0];
49
+ int j = indices[tid*3+1];
50
+ int k = indices[tid*3+2];
51
+
52
+ vec3 p = points[i];
53
+ vec3 q = points[j];
54
+ vec3 r = points[k];
55
+
56
+
57
+ edge_lengths[tid] = length(p-q) + length(p-r) + length(q-r);
58
+ }
59
+ }
60
+
61
+ __global__ void compute_average_mesh_edge_length(int n, float* sum_edge_lengths, Mesh* m)
62
+ {
63
+ m->average_edge_length = sum_edge_lengths[n - 1] / (3*n);
64
+ }
65
+
66
+ __global__ void bvh_refit_with_solid_angle_kernel(int n, const int* __restrict__ parents, int* __restrict__ child_count, BVHPackedNodeHalf* __restrict__ lowers, BVHPackedNodeHalf* __restrict__ uppers, const vec3* points, const int* indices, SolidAngleProps* solid_angle_props)
67
+ {
68
+ int index = blockDim.x*blockIdx.x + threadIdx.x;
69
+
70
+ if (index < n)
71
+ {
72
+ bool leaf = lowers[index].b;
73
+
74
+ if (leaf)
75
+ {
76
+ // update the leaf node
77
+ const int leaf_index = lowers[index].i;
78
+ precompute_triangle_solid_angle_props(points[indices[leaf_index*3+0]], points[indices[leaf_index*3+1]], points[indices[leaf_index*3+2]], solid_angle_props[index]);
79
+
80
+ make_node(lowers+index, solid_angle_props[index].box.lower, leaf_index, true);
81
+ make_node(uppers+index, solid_angle_props[index].box.upper, 0, false);
82
+ }
83
+ else
84
+ {
85
+ // only keep leaf threads
86
+ return;
87
+ }
88
+
89
+ // update hierarchy
90
+ for (;;)
91
+ {
92
+ int parent = parents[index];
93
+
94
+ // reached root
95
+ if (parent == -1)
96
+ return;
97
+
98
+ // ensure all writes are visible
99
+ __threadfence();
100
+
101
+ int finished = atomicAdd(&child_count[parent], 1);
102
+
103
+ // if we have are the last thread (such that the parent node is now complete)
104
+ // then update its bounds and move onto the the next parent in the hierarchy
105
+ if (finished == 1)
106
+ {
107
+ //printf("Compute non-leaf at %d\n", index);
108
+ const int left_child = lowers[parent].i;
109
+ const int right_child = uppers[parent].i;
110
+
111
+ vec3 left_lower = vec3(lowers[left_child].x,
112
+ lowers[left_child].y,
113
+ lowers[left_child].z);
114
+
115
+ vec3 left_upper = vec3(uppers[left_child].x,
116
+ uppers[left_child].y,
117
+ uppers[left_child].z);
118
+
119
+ vec3 right_lower = vec3(lowers[right_child].x,
120
+ lowers[right_child].y,
121
+ lowers[right_child].z);
122
+
123
+
124
+ vec3 right_upper = vec3(uppers[right_child].x,
125
+ uppers[right_child].y,
126
+ uppers[right_child].z);
127
+
128
+ // union of child bounds
129
+ vec3 lower = min(left_lower, right_lower);
130
+ vec3 upper = max(left_upper, right_upper);
131
+
132
+ // write new BVH nodes
133
+ make_node(lowers+parent, lower, left_child, false);
134
+ make_node(uppers+parent, upper, right_child, false);
135
+
136
+ // combine
137
+ SolidAngleProps* left_child_data = &solid_angle_props[left_child];
138
+ SolidAngleProps* right_child_data = (left_child != right_child) ? &solid_angle_props[right_child] : NULL;
139
+
140
+ combine_precomputed_solid_angle_props(solid_angle_props[parent], left_child_data, right_child_data);
141
+
142
+ // move onto processing the parent
143
+ index = parent;
144
+ }
145
+ else
146
+ {
147
+ // parent not ready (we are the first child), terminate thread
148
+ break;
149
+ }
150
+ }
151
+ }
152
+ }
153
+
154
+
155
+ void bvh_refit_with_solid_angle_device(BVH& bvh, Mesh& mesh)
156
+ {
157
+ ContextGuard guard(bvh.context);
158
+
159
+ // clear child counters
160
+ memset_device(WP_CURRENT_CONTEXT, bvh.node_counts, 0, sizeof(int)*bvh.max_nodes);
161
+
162
+ wp_launch_device(WP_CURRENT_CONTEXT, bvh_refit_with_solid_angle_kernel, bvh.num_items, (bvh.num_items, bvh.node_parents, bvh.node_counts, bvh.node_lowers, bvh.node_uppers, mesh.points, mesh.indices, mesh.solid_angle_props));
163
+ }
164
+
39
165
  } // namespace wp
40
166
 
41
- void mesh_refit_device(uint64_t id)
167
+
168
+ uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> indices, int num_points, int num_tris, int support_winding_number)
169
+ {
170
+ ContextGuard guard(context);
171
+
172
+ wp::Mesh mesh(points, velocities, indices, num_points, num_tris);
173
+
174
+ mesh.context = context ? context : cuda_context_get_current();
175
+
176
+ {
177
+ // // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
178
+ // vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
179
+ // int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
180
+ // bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
181
+
182
+ // memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
183
+ // memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
184
+ // cuda_context_synchronize(WP_CURRENT_CONTEXT);
185
+
186
+ // float sum = 0.0;
187
+ // for (int i=0; i < num_tris; ++i)
188
+ // {
189
+ // bounds_host[i] = bounds3();
190
+ // wp::vec3 p0 = points_host[indices_host[i*3+0]];
191
+ // wp::vec3 p1 = points_host[indices_host[i*3+1]];
192
+ // wp::vec3 p2 = points_host[indices_host[i*3+2]];
193
+ // bounds_host[i].add_point(p0);
194
+ // bounds_host[i].add_point(p1);
195
+ // bounds_host[i].add_point(p2);
196
+ // sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
197
+ // }
198
+ // mesh.average_edge_length = sum / (num_tris*3);
199
+
200
+ // BVH bvh_host = bvh_create(bounds_host, num_tris);
201
+ // BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
202
+
203
+ // bvh_destroy_host(bvh_host);
204
+
205
+ // create lower upper arrays expected by GPU BVH builder
206
+ mesh.lowers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
207
+ mesh.uppers = (wp::vec3*)alloc_temp_device(WP_CURRENT_CONTEXT, sizeof(wp::vec3)*num_tris);
208
+
209
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, num_tris, (num_tris, points.data, indices.data, mesh.lowers, mesh.uppers));
210
+
211
+ uint64_t bvh_id = bvh_create_device(mesh.context, mesh.lowers, mesh.uppers, num_tris);
212
+ wp::bvh_get_descriptor(bvh_id, mesh.bvh);
213
+
214
+ if (support_winding_number)
215
+ {
216
+ int num_bvh_nodes = 2*num_tris;
217
+ mesh.solid_angle_props = (wp::SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::SolidAngleProps)*num_bvh_nodes);
218
+ }
219
+ }
220
+
221
+ wp::Mesh* mesh_device = (wp::Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(wp::Mesh));
222
+ memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(wp::Mesh));
223
+
224
+ // save descriptor
225
+ uint64_t mesh_id = (uint64_t)mesh_device;
226
+ mesh_add_descriptor(mesh_id, mesh);
227
+
228
+ if (support_winding_number)
229
+ mesh_refit_device(mesh_id);
230
+
231
+ return mesh_id;
232
+ }
233
+
234
+ void mesh_destroy_device(uint64_t id)
235
+ {
236
+ wp::Mesh mesh;
237
+ if (wp::mesh_get_descriptor(id, mesh))
238
+ {
239
+ ContextGuard guard(mesh.context);
240
+
241
+ wp::bvh_destroy_device(mesh.bvh);
242
+
243
+ free_device(WP_CURRENT_CONTEXT, mesh.lowers);
244
+ free_device(WP_CURRENT_CONTEXT, mesh.uppers);
245
+ free_device(WP_CURRENT_CONTEXT, (wp::Mesh*)id);
246
+
247
+ if (mesh.solid_angle_props) {
248
+ free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
249
+ }
250
+ wp::mesh_rem_descriptor(id);
251
+ }
252
+ }
253
+
254
+ void mesh_update_stats(uint64_t id)
42
255
  {
256
+
257
+ }
43
258
 
259
+ void mesh_refit_device(uint64_t id)
260
+ {
44
261
  // recompute triangle bounds
45
262
  wp::Mesh m;
46
263
  if (mesh_get_descriptor(id, m))
47
264
  {
48
265
  ContextGuard guard(m.context);
49
266
 
50
- wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.bounds));
267
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
51
268
 
52
- bvh_refit_device(m.bvh, m.bounds);
53
- }
269
+ if (m.solid_angle_props)
270
+ {
271
+ // we compute mesh the average edge length
272
+ // for use in mesh_query_point_sign_normal()
273
+ // since it relies on an epsilon for welding
54
274
 
275
+ // reuse bounds memory temporarily for computing edge lengths
276
+ float* length_tmp_ptr = (float*)m.lowers;
277
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_mesh_edge_lengths, m.num_tris, (m.num_tris, m.points, m.indices, length_tmp_ptr));
278
+
279
+ scan_device(length_tmp_ptr, length_tmp_ptr, m.num_tris, true);
280
+
281
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_average_mesh_edge_length, 1, (m.num_tris, length_tmp_ptr, (wp::Mesh*)id));
282
+ wp_launch_device(WP_CURRENT_CONTEXT, wp::compute_triangle_bounds, m.num_tris, (m.num_tris, m.points, m.indices, m.lowers, m.uppers));
283
+
284
+ // update solid angle data
285
+ bvh_refit_with_solid_angle_device(m.bvh, m);
286
+ }
287
+ else
288
+ {
289
+ bvh_refit_device(m.bvh);
290
+ }
291
+ }
55
292
  }
56
293