warp-lang 0.10.1__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of warp-lang might be problematic. Click here for more details.

Files changed (300) hide show
  1. warp/__init__.py +10 -4
  2. warp/__init__.pyi +1 -0
  3. warp/bin/warp-clang.dll +0 -0
  4. warp/bin/warp.dll +0 -0
  5. warp/build.py +5 -3
  6. warp/build_dll.py +29 -9
  7. warp/builtins.py +868 -507
  8. warp/codegen.py +1074 -638
  9. warp/config.py +3 -3
  10. warp/constants.py +6 -0
  11. warp/context.py +715 -222
  12. warp/fabric.py +326 -0
  13. warp/fem/__init__.py +27 -0
  14. warp/fem/cache.py +389 -0
  15. warp/fem/dirichlet.py +181 -0
  16. warp/fem/domain.py +263 -0
  17. warp/fem/field/__init__.py +101 -0
  18. warp/fem/field/field.py +149 -0
  19. warp/fem/field/nodal_field.py +299 -0
  20. warp/fem/field/restriction.py +21 -0
  21. warp/fem/field/test.py +181 -0
  22. warp/fem/field/trial.py +183 -0
  23. warp/fem/geometry/__init__.py +19 -0
  24. warp/fem/geometry/closest_point.py +70 -0
  25. warp/fem/geometry/deformed_geometry.py +271 -0
  26. warp/fem/geometry/element.py +744 -0
  27. warp/fem/geometry/geometry.py +186 -0
  28. warp/fem/geometry/grid_2d.py +373 -0
  29. warp/fem/geometry/grid_3d.py +435 -0
  30. warp/fem/geometry/hexmesh.py +953 -0
  31. warp/fem/geometry/partition.py +376 -0
  32. warp/fem/geometry/quadmesh_2d.py +532 -0
  33. warp/fem/geometry/tetmesh.py +840 -0
  34. warp/fem/geometry/trimesh_2d.py +577 -0
  35. warp/fem/integrate.py +1616 -0
  36. warp/fem/operator.py +191 -0
  37. warp/fem/polynomial.py +213 -0
  38. warp/fem/quadrature/__init__.py +2 -0
  39. warp/fem/quadrature/pic_quadrature.py +245 -0
  40. warp/fem/quadrature/quadrature.py +294 -0
  41. warp/fem/space/__init__.py +292 -0
  42. warp/fem/space/basis_space.py +489 -0
  43. warp/fem/space/collocated_function_space.py +105 -0
  44. warp/fem/space/dof_mapper.py +236 -0
  45. warp/fem/space/function_space.py +145 -0
  46. warp/fem/space/grid_2d_function_space.py +267 -0
  47. warp/fem/space/grid_3d_function_space.py +306 -0
  48. warp/fem/space/hexmesh_function_space.py +352 -0
  49. warp/fem/space/partition.py +350 -0
  50. warp/fem/space/quadmesh_2d_function_space.py +369 -0
  51. warp/fem/space/restriction.py +160 -0
  52. warp/fem/space/shape/__init__.py +15 -0
  53. warp/fem/space/shape/cube_shape_function.py +738 -0
  54. warp/fem/space/shape/shape_function.py +103 -0
  55. warp/fem/space/shape/square_shape_function.py +611 -0
  56. warp/fem/space/shape/tet_shape_function.py +567 -0
  57. warp/fem/space/shape/triangle_shape_function.py +429 -0
  58. warp/fem/space/tetmesh_function_space.py +292 -0
  59. warp/fem/space/topology.py +295 -0
  60. warp/fem/space/trimesh_2d_function_space.py +221 -0
  61. warp/fem/types.py +77 -0
  62. warp/fem/utils.py +495 -0
  63. warp/native/array.h +147 -44
  64. warp/native/builtin.h +122 -149
  65. warp/native/bvh.cpp +73 -325
  66. warp/native/bvh.cu +406 -23
  67. warp/native/bvh.h +34 -43
  68. warp/native/clang/clang.cpp +13 -8
  69. warp/native/crt.h +2 -0
  70. warp/native/cuda_crt.h +5 -0
  71. warp/native/cuda_util.cpp +15 -3
  72. warp/native/cuda_util.h +3 -1
  73. warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
  74. warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
  75. warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
  76. warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
  77. warp/native/cutlass/tools/library/scripts/library.py +799 -0
  78. warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
  79. warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
  80. warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
  81. warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
  82. warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
  83. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
  84. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
  85. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
  86. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
  87. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
  88. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
  89. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
  90. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
  91. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
  92. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
  93. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
  94. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
  95. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
  96. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
  97. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
  98. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
  99. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
  100. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
  101. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
  102. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
  103. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
  104. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
  105. warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
  106. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
  107. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
  108. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
  109. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
  110. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
  111. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
  112. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
  113. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
  114. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
  115. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  116. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  117. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
  118. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
  119. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
  120. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
  121. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
  122. warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
  123. warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
  124. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
  125. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
  126. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
  127. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
  128. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
  129. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
  130. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
  131. warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
  132. warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
  133. warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
  134. warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
  135. warp/native/cutlass/tools/library/scripts/rt.py +796 -0
  136. warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
  137. warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
  138. warp/native/cutlass_gemm.cu +5 -3
  139. warp/native/exports.h +1240 -952
  140. warp/native/fabric.h +228 -0
  141. warp/native/hashgrid.cpp +4 -4
  142. warp/native/hashgrid.h +22 -2
  143. warp/native/intersect.h +22 -7
  144. warp/native/intersect_adj.h +8 -8
  145. warp/native/intersect_tri.h +1 -1
  146. warp/native/marching.cu +157 -161
  147. warp/native/mat.h +80 -19
  148. warp/native/matnn.h +2 -2
  149. warp/native/mesh.cpp +33 -108
  150. warp/native/mesh.cu +114 -23
  151. warp/native/mesh.h +446 -46
  152. warp/native/noise.h +272 -329
  153. warp/native/quat.h +51 -8
  154. warp/native/rand.h +45 -35
  155. warp/native/range.h +6 -2
  156. warp/native/reduce.cpp +1 -1
  157. warp/native/reduce.cu +10 -12
  158. warp/native/runlength_encode.cu +6 -10
  159. warp/native/scan.cu +8 -11
  160. warp/native/sparse.cpp +4 -4
  161. warp/native/sparse.cu +164 -154
  162. warp/native/spatial.h +2 -2
  163. warp/native/temp_buffer.h +14 -30
  164. warp/native/vec.h +107 -23
  165. warp/native/volume.h +120 -0
  166. warp/native/warp.cpp +560 -30
  167. warp/native/warp.cu +431 -44
  168. warp/native/warp.h +13 -4
  169. warp/optim/__init__.py +1 -0
  170. warp/optim/linear.py +922 -0
  171. warp/optim/sgd.py +92 -0
  172. warp/render/render_opengl.py +335 -119
  173. warp/render/render_usd.py +11 -11
  174. warp/sim/__init__.py +2 -2
  175. warp/sim/articulation.py +385 -185
  176. warp/sim/collide.py +8 -0
  177. warp/sim/import_mjcf.py +297 -106
  178. warp/sim/import_urdf.py +389 -210
  179. warp/sim/import_usd.py +198 -97
  180. warp/sim/inertia.py +17 -18
  181. warp/sim/integrator_euler.py +14 -8
  182. warp/sim/integrator_xpbd.py +158 -16
  183. warp/sim/model.py +795 -291
  184. warp/sim/render.py +3 -3
  185. warp/sim/utils.py +3 -0
  186. warp/sparse.py +640 -150
  187. warp/stubs.py +606 -267
  188. warp/tape.py +61 -10
  189. warp/tests/__main__.py +3 -6
  190. warp/tests/assets/curlnoise_golden.npy +0 -0
  191. warp/tests/assets/pnoise_golden.npy +0 -0
  192. warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
  193. warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
  194. warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
  195. warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
  196. warp/tests/aux_test_unresolved_func.py +14 -0
  197. warp/tests/aux_test_unresolved_symbol.py +14 -0
  198. warp/tests/disabled_kinematics.py +239 -0
  199. warp/tests/run_coverage_serial.py +31 -0
  200. warp/tests/test_adam.py +103 -106
  201. warp/tests/test_arithmetic.py +128 -74
  202. warp/tests/test_array.py +212 -97
  203. warp/tests/test_array_reduce.py +57 -23
  204. warp/tests/test_atomic.py +64 -28
  205. warp/tests/test_bool.py +99 -0
  206. warp/tests/test_builtins_resolution.py +1292 -0
  207. warp/tests/test_bvh.py +42 -18
  208. warp/tests/test_closest_point_edge_edge.py +54 -57
  209. warp/tests/test_codegen.py +208 -130
  210. warp/tests/test_compile_consts.py +28 -20
  211. warp/tests/test_conditional.py +108 -24
  212. warp/tests/test_copy.py +10 -12
  213. warp/tests/test_ctypes.py +112 -88
  214. warp/tests/test_dense.py +21 -14
  215. warp/tests/test_devices.py +98 -0
  216. warp/tests/test_dlpack.py +75 -75
  217. warp/tests/test_examples.py +277 -0
  218. warp/tests/test_fabricarray.py +955 -0
  219. warp/tests/test_fast_math.py +15 -11
  220. warp/tests/test_fem.py +1271 -0
  221. warp/tests/test_fp16.py +53 -19
  222. warp/tests/test_func.py +187 -86
  223. warp/tests/test_generics.py +194 -49
  224. warp/tests/test_grad.py +178 -109
  225. warp/tests/test_grad_customs.py +176 -0
  226. warp/tests/test_hash_grid.py +52 -37
  227. warp/tests/test_import.py +10 -23
  228. warp/tests/test_indexedarray.py +32 -31
  229. warp/tests/test_intersect.py +18 -9
  230. warp/tests/test_large.py +141 -0
  231. warp/tests/test_launch.py +14 -41
  232. warp/tests/test_lerp.py +64 -65
  233. warp/tests/test_linear_solvers.py +154 -0
  234. warp/tests/test_lvalue.py +493 -0
  235. warp/tests/test_marching_cubes.py +12 -13
  236. warp/tests/test_mat.py +517 -2898
  237. warp/tests/test_mat_lite.py +115 -0
  238. warp/tests/test_mat_scalar_ops.py +2889 -0
  239. warp/tests/test_math.py +103 -9
  240. warp/tests/test_matmul.py +305 -69
  241. warp/tests/test_matmul_lite.py +410 -0
  242. warp/tests/test_mesh.py +71 -14
  243. warp/tests/test_mesh_query_aabb.py +41 -25
  244. warp/tests/test_mesh_query_point.py +140 -22
  245. warp/tests/test_mesh_query_ray.py +39 -22
  246. warp/tests/test_mlp.py +30 -22
  247. warp/tests/test_model.py +92 -89
  248. warp/tests/test_modules_lite.py +39 -0
  249. warp/tests/test_multigpu.py +88 -114
  250. warp/tests/test_noise.py +12 -11
  251. warp/tests/test_operators.py +16 -20
  252. warp/tests/test_options.py +11 -11
  253. warp/tests/test_pinned.py +17 -18
  254. warp/tests/test_print.py +32 -11
  255. warp/tests/test_quat.py +275 -129
  256. warp/tests/test_rand.py +18 -16
  257. warp/tests/test_reload.py +38 -34
  258. warp/tests/test_rounding.py +50 -43
  259. warp/tests/test_runlength_encode.py +168 -20
  260. warp/tests/test_smoothstep.py +9 -11
  261. warp/tests/test_snippet.py +143 -0
  262. warp/tests/test_sparse.py +261 -63
  263. warp/tests/test_spatial.py +276 -243
  264. warp/tests/test_streams.py +110 -85
  265. warp/tests/test_struct.py +268 -63
  266. warp/tests/test_tape.py +39 -21
  267. warp/tests/test_torch.py +118 -89
  268. warp/tests/test_transient_module.py +12 -13
  269. warp/tests/test_types.py +614 -0
  270. warp/tests/test_utils.py +494 -0
  271. warp/tests/test_vec.py +354 -2050
  272. warp/tests/test_vec_lite.py +73 -0
  273. warp/tests/test_vec_scalar_ops.py +2099 -0
  274. warp/tests/test_volume.py +457 -293
  275. warp/tests/test_volume_write.py +124 -134
  276. warp/tests/unittest_serial.py +35 -0
  277. warp/tests/unittest_suites.py +341 -0
  278. warp/tests/unittest_utils.py +568 -0
  279. warp/tests/unused_test_misc.py +71 -0
  280. warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
  281. warp/thirdparty/appdirs.py +36 -45
  282. warp/thirdparty/unittest_parallel.py +549 -0
  283. warp/torch.py +9 -6
  284. warp/types.py +1089 -366
  285. warp/utils.py +93 -387
  286. warp_lang-0.11.0.dist-info/METADATA +238 -0
  287. warp_lang-0.11.0.dist-info/RECORD +332 -0
  288. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
  289. warp/tests/test_all.py +0 -219
  290. warp/tests/test_array_scan.py +0 -60
  291. warp/tests/test_base.py +0 -208
  292. warp/tests/test_unresolved_func.py +0 -7
  293. warp/tests/test_unresolved_symbol.py +0 -7
  294. warp_lang-0.10.1.dist-info/METADATA +0 -21
  295. warp_lang-0.10.1.dist-info/RECORD +0 -188
  296. /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
  297. /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
  298. /warp/tests/{test_square.py → aux_test_square.py} +0 -0
  299. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/LICENSE.md +0 -0
  300. {warp_lang-0.10.1.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0
warp/native/bvh.cpp CHANGED
@@ -27,34 +27,34 @@ class MedianBVHBuilder
27
27
  {
28
28
  public:
29
29
 
30
- void build(BVH& bvh, const bounds3* items, int n);
30
+ void build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n);
31
31
 
32
32
  private:
33
33
 
34
- bounds3 calc_bounds(const bounds3* bounds, const int* indices, int start, int end);
34
+ bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
35
35
 
36
- int partition_median(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
37
- int partition_midpoint(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
38
- int partition_sah(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
36
+ int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
37
+ int partition_midpoint(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
38
+ int partition_sah(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
39
39
 
40
- int build_recursive(BVH& bvh, const bounds3* bounds, int* indices, int start, int end, int depth, int parent);
40
+ int build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int* indices, int start, int end, int depth, int parent);
41
41
  };
42
42
 
43
43
  //////////////////////////////////////////////////////////////////////
44
44
 
45
- void MedianBVHBuilder::build(BVH& bvh, const bounds3* items, int n)
45
+ void MedianBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n)
46
46
  {
47
47
  bvh.max_depth = 0;
48
48
  bvh.max_nodes = 2*n-1;
49
- bvh.num_nodes = 0;
50
49
 
51
50
  bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
52
51
  bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
53
52
  bvh.node_parents = new int[bvh.max_nodes];
54
53
  bvh.node_counts = NULL;
55
-
54
+
56
55
  // root is always in first slot for top down builders
57
- bvh.root = 0;
56
+ bvh.root = new int[1];
57
+ bvh.root[0] = 0;
58
58
 
59
59
  if (n == 0)
60
60
  return;
@@ -63,35 +63,42 @@ void MedianBVHBuilder::build(BVH& bvh, const bounds3* items, int n)
63
63
  for (int i=0; i < n; ++i)
64
64
  indices[i] = i;
65
65
 
66
- build_recursive(bvh, items, &indices[0], 0, n, 0, -1);
66
+ build_recursive(bvh, lowers, uppers, &indices[0], 0, n, 0, -1);
67
67
  }
68
68
 
69
69
 
70
- bounds3 MedianBVHBuilder::calc_bounds(const bounds3* bounds, const int* indices, int start, int end)
70
+ bounds3 MedianBVHBuilder::calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end)
71
71
  {
72
72
  bounds3 u;
73
73
 
74
74
  for (int i=start; i < end; ++i)
75
- u = bounds_union(u, bounds[indices[i]]);
75
+ {
76
+ u.add_point(lowers[indices[i]]);
77
+ u.add_point(uppers[indices[i]]);
78
+ }
76
79
 
77
80
  return u;
78
81
  }
79
82
 
80
83
  struct PartitionPredicateMedian
81
84
  {
82
- PartitionPredicateMedian(const bounds3* bounds, int a) : bounds(bounds), axis(a) {}
85
+ PartitionPredicateMedian(const vec3* lowers, const vec3* uppers, int a) : lowers(lowers), uppers(uppers), axis(a) {}
83
86
 
84
87
  bool operator()(int a, int b) const
85
88
  {
86
- return bounds[a].center()[axis] < bounds[b].center()[axis];
89
+ vec3 a_center = 0.5f*(lowers[a] + uppers[a]);
90
+ vec3 b_center = 0.5f*(lowers[b] + uppers[b]);
91
+
92
+ return a_center[axis] < b_center[axis];
87
93
  }
88
94
 
89
- const bounds3* bounds;
95
+ const vec3* lowers;
96
+ const vec3* uppers;
90
97
  int axis;
91
98
  };
92
99
 
93
100
 
94
- int MedianBVHBuilder::partition_median(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds)
101
+ int MedianBVHBuilder::partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds)
95
102
  {
96
103
  assert(end-start >= 2);
97
104
 
@@ -101,27 +108,31 @@ int MedianBVHBuilder::partition_median(const bounds3* bounds, int* indices, int
101
108
 
102
109
  const int k = (start+end)/2;
103
110
 
104
- std::nth_element(&indices[start], &indices[k], &indices[end], PartitionPredicateMedian(&bounds[0], axis));
111
+ std::nth_element(&indices[start], &indices[k], &indices[end], PartitionPredicateMedian(lowers, uppers, axis));
105
112
 
106
113
  return k;
107
114
  }
108
115
 
109
116
  struct PartitionPredictateMidPoint
110
117
  {
111
- PartitionPredictateMidPoint(const bounds3* bounds, int a, float m) : bounds(bounds), axis(a), mid(m) {}
118
+ PartitionPredictateMidPoint(const vec3* lowers, const vec3* uppers, int a, float m) : lowers(lowers), uppers(uppers), axis(a), mid(m) {}
112
119
 
113
120
  bool operator()(int index) const
114
121
  {
115
- return bounds[index].center()[axis] <= mid;
122
+ vec3 center = 0.5f*(lowers[index] + uppers[index]);
123
+
124
+ return center[axis] <= mid;
116
125
  }
117
126
 
118
- const bounds3* bounds;
127
+ const vec3* lowers;
128
+ const vec3* uppers;
129
+
119
130
  int axis;
120
131
  float mid;
121
132
  };
122
133
 
123
134
 
124
- int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds)
135
+ int MedianBVHBuilder::partition_midpoint(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds)
125
136
  {
126
137
  assert(end-start >= 2);
127
138
 
@@ -131,7 +142,7 @@ int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, in
131
142
  int axis = longest_axis(edges);
132
143
  float mid = center[axis];
133
144
 
134
- int* upper = std::partition(indices+start, indices+end, PartitionPredictateMidPoint(&bounds[0], axis, mid));
145
+ int* upper = std::partition(indices+start, indices+end, PartitionPredictateMidPoint(lowers, uppers, axis, mid));
135
146
 
136
147
  int k = upper-indices;
137
148
 
@@ -139,7 +150,6 @@ int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, in
139
150
  if (k == start || k == end)
140
151
  k = (start+end)/2;
141
152
 
142
-
143
153
  return k;
144
154
  }
145
155
 
@@ -199,7 +209,7 @@ int MedianBVHBuilder::partition_sah(const bounds3* bounds, int* indices, int sta
199
209
  }
200
210
  #endif
201
211
 
202
- int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indices, int start, int end, int depth, int parent)
212
+ int MedianBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int* indices, int start, int end, int depth, int parent)
203
213
  {
204
214
  assert(start < end);
205
215
 
@@ -211,7 +221,7 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
211
221
  if (depth > bvh.max_depth)
212
222
  bvh.max_depth = depth;
213
223
 
214
- bounds3 b = calc_bounds(bounds, indices, start, end);
224
+ bounds3 b = calc_bounds(lowers, uppers, indices, start, end);
215
225
 
216
226
  const int kMaxItemsPerLeaf = 1;
217
227
 
@@ -224,7 +234,7 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
224
234
  else
225
235
  {
226
236
  //int split = partition_midpoint(bounds, indices, start, end, b);
227
- int split = partition_median(bounds, indices, start, end, b);
237
+ int split = partition_median(lowers, uppers, indices, start, end, b);
228
238
  //int split = partition_sah(bounds, indices, start, end, b);
229
239
 
230
240
  if (split == start || split == end)
@@ -233,8 +243,8 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
233
243
  split = (start+end)/2;
234
244
  }
235
245
 
236
- int left_child = build_recursive(bvh, bounds, indices, start, split, depth+1, node_index);
237
- int right_child = build_recursive(bvh, bounds, indices, split, end, depth+1, node_index);
246
+ int left_child = build_recursive(bvh, lowers, uppers, indices, start, split, depth+1, node_index);
247
+ int right_child = build_recursive(bvh, lowers, uppers, indices, split, end, depth+1, node_index);
238
248
 
239
249
  bvh.node_lowers[node_index] = make_node(b.lower, left_child, false);
240
250
  bvh.node_uppers[node_index] = make_node(b.upper, right_child, false);
@@ -244,218 +254,8 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
244
254
  return node_index;
245
255
  }
246
256
 
247
- class LinearBVHBuilderCPU
248
- {
249
- public:
250
-
251
- void build(BVH& bvh, const bounds3* items, int n);
252
-
253
- private:
254
-
255
- // calculate Morton codes
256
- struct KeyIndexPair
257
- {
258
- uint32_t key;
259
- int index;
260
-
261
- inline bool operator < (const KeyIndexPair& rhs) const { return key < rhs.key; }
262
- };
263
-
264
- bounds3 calc_bounds(const bounds3* bounds, const KeyIndexPair* keys, int start, int end);
265
- int find_split(const KeyIndexPair* pairs, int start, int end);
266
- int build_recursive(BVH& bvh, const KeyIndexPair* keys, const bounds3* bounds, int start, int end, int depth);
267
-
268
- };
269
-
270
-
271
- // disable std::sort workaround for macOS error
272
- #if 0
273
- void LinearBVHBuilderCPU::build(BVH& bvh, const bounds3* items, int n)
274
- {
275
- memset(&bvh, 0, sizeof(BVH));
276
-
277
- bvh.max_nodes = 2*n-1;
278
-
279
- bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
280
- bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
281
- bvh.num_nodes = 0;
282
-
283
- // root is always in first slot for top down builders
284
- bvh.root = 0;
285
-
286
- std::vector<KeyIndexPair> keys;
287
- keys.reserve(n);
288
-
289
- bounds3 totalbounds3;
290
- for (int i=0; i < n; ++i)
291
- totalbounds3 = bounds_union(totalbounds3, items[i]);
292
-
293
- // ensure non-zero edge length in all dimensions
294
- totalbounds3.expand(0.001f);
295
-
296
- vec3 edges = totalbounds3.edges();
297
- vec3 invEdges = cw_div(vec3(1.0f), edges);
298
-
299
- for (int i=0; i < n; ++i)
300
- {
301
- vec3 center = items[i].center();
302
- vec3 local = cw_mul(center-totalbounds3.lower, invEdges);
303
-
304
- KeyIndexPair l;
305
- l.key = morton3<1024>(local.x, local.y, local.z);
306
- l.index = i;
307
-
308
- keys.push_back(l);
309
- }
310
-
311
- // sort by key
312
- std::sort(keys.begin(), keys.end());
313
-
314
- build_recursive(bvh, &keys[0], items, 0, n, 0);
315
-
316
- printf("Created BVH for %d items with %d nodes, max depth of %d\n", n, bvh.num_nodes, bvh.max_depth);
317
- }
318
- #endif
319
-
320
- inline bounds3 LinearBVHBuilderCPU::calc_bounds(const bounds3* bounds, const KeyIndexPair* keys, int start, int end)
321
- {
322
- bounds3 u;
323
-
324
- for (int i=start; i < end; ++i)
325
- u = bounds_union(u, bounds[keys[i].index]);
326
257
 
327
- return u;
328
- }
329
-
330
- inline int LinearBVHBuilderCPU::find_split(const KeyIndexPair* pairs, int start, int end)
331
- {
332
- if (pairs[start].key == pairs[end-1].key)
333
- return (start+end)/2;
334
-
335
- // find split point between keys, xor here means all bits
336
- // of the result are zero up until the first differing bit
337
- int common_prefix = clz(pairs[start].key ^ pairs[end-1].key);
338
-
339
- // use binary search to find the point at which this bit changes
340
- // from zero to a 1
341
- const int mask = 1 << (31-common_prefix);
342
-
343
- while (end-start > 0)
344
- {
345
- int index = (start+end)/2;
346
-
347
- if (pairs[index].key&mask)
348
- {
349
- end = index;
350
- }
351
- else
352
- start = index+1;
353
- }
354
-
355
- assert(start == end);
356
-
357
- return start;
358
- }
359
-
360
- int LinearBVHBuilderCPU::build_recursive(BVH& bvh, const KeyIndexPair* keys, const bounds3* bounds, int start, int end, int depth)
361
- {
362
- assert(start < end);
363
-
364
- const int n = end-start;
365
- const int nodeIndex = bvh.num_nodes++;
366
-
367
- assert(nodeIndex < bvh.max_nodes);
368
-
369
- if (depth > bvh.max_depth)
370
- bvh.max_depth = depth;
371
-
372
- bounds3 b = calc_bounds(bounds, keys, start, end);
373
-
374
- const int kMaxItemsPerLeaf = 1;
375
-
376
- if (n <= kMaxItemsPerLeaf)
377
- {
378
- bvh.node_lowers[nodeIndex] = make_node(b.lower, keys[start].index, true);
379
- bvh.node_uppers[nodeIndex] = make_node(b.upper, keys[start].index, false);
380
- }
381
- else
382
- {
383
- int split = find_split(keys, start, end);
384
-
385
- int leftChild = build_recursive(bvh, keys, bounds, start, split, depth+1);
386
- int rightChild = build_recursive(bvh, keys, bounds, split, end, depth+1);
387
-
388
- bvh.node_lowers[nodeIndex] = make_node(b.lower, leftChild, false);
389
- bvh.node_uppers[nodeIndex] = make_node(b.upper, rightChild, false);
390
- }
391
-
392
- return nodeIndex;
393
- }
394
-
395
-
396
-
397
- // create only happens on host currently, use bvh_clone() to transfer BVH To device
398
- BVH bvh_create(const bounds3* bounds, int num_bounds)
399
- {
400
- BVH bvh;
401
- memset(&bvh, 0, sizeof(bvh));
402
-
403
- MedianBVHBuilder builder;
404
- //LinearBVHBuilderCPU builder;
405
- builder.build(bvh, bounds, num_bounds);
406
-
407
- return bvh;
408
- }
409
-
410
- void bvh_destroy_host(BVH& bvh)
411
- {
412
- delete[] bvh.node_lowers;
413
- delete[] bvh.node_uppers;
414
- delete[] bvh.node_parents;
415
- delete[] bvh.bounds;
416
-
417
- bvh.node_lowers = NULL;
418
- bvh.node_uppers = NULL;
419
- bvh.max_nodes = 0;
420
- bvh.num_nodes = 0;
421
- bvh.num_bounds = 0;
422
- }
423
-
424
- void bvh_destroy_device(BVH& bvh)
425
- {
426
- ContextGuard guard(bvh.context);
427
-
428
- free_device(WP_CURRENT_CONTEXT, bvh.node_lowers); bvh.node_lowers = NULL;
429
- free_device(WP_CURRENT_CONTEXT, bvh.node_uppers); bvh.node_uppers = NULL;
430
- free_device(WP_CURRENT_CONTEXT, bvh.node_parents); bvh.node_parents = NULL;
431
- free_device(WP_CURRENT_CONTEXT, bvh.node_counts); bvh.node_counts = NULL;
432
- free_device(WP_CURRENT_CONTEXT, bvh.bounds); bvh.bounds = NULL;
433
- }
434
-
435
- BVH bvh_clone(void* context, const BVH& bvh_host)
436
- {
437
- ContextGuard guard(context);
438
-
439
- BVH bvh_device = bvh_host;
440
-
441
- bvh_device.context = context ? context : cuda_context_get_current();
442
-
443
- bvh_device.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
444
- bvh_device.node_uppers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
445
- bvh_device.node_parents = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
446
- bvh_device.node_counts = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
447
- bvh_device.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*bvh_host.num_bounds);
448
-
449
- // copy host data to device
450
- memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_lowers, bvh_host.node_lowers, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
451
- memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_uppers, bvh_host.node_uppers, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
452
- memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_parents, bvh_host.node_parents, sizeof(int)*bvh_host.max_nodes);
453
- memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.bounds, bvh_host.bounds, sizeof(bounds3)*bvh_host.num_bounds);
454
-
455
- return bvh_device;
456
- }
457
-
458
- void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
258
+ void bvh_refit_recursive(BVH& bvh, int index)
459
259
  {
460
260
  BVHPackedNodeHalf& lower = bvh.node_lowers[index];
461
261
  BVHPackedNodeHalf& upper = bvh.node_uppers[index];
@@ -464,16 +264,17 @@ void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
464
264
  {
465
265
  const int leaf_index = lower.i;
466
266
 
467
- (vec3&)lower = bounds[leaf_index].lower;
468
- (vec3&)upper = bounds[leaf_index].upper;
267
+ // update leaf from items
268
+ (vec3&)lower = bvh.item_lowers[leaf_index];
269
+ (vec3&)upper = bvh.item_uppers[leaf_index];
469
270
  }
470
271
  else
471
272
  {
472
273
  int left_index = lower.i;
473
274
  int right_index = upper.i;
474
275
 
475
- bvh_refit_recursive(bvh, left_index, bounds);
476
- bvh_refit_recursive(bvh, right_index, bounds);
276
+ bvh_refit_recursive(bvh, left_index);
277
+ bvh_refit_recursive(bvh, right_index);
477
278
 
478
279
  // compute union of children
479
280
  const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
@@ -492,9 +293,9 @@ void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
492
293
  }
493
294
  }
494
295
 
495
- void bvh_refit_host(BVH& bvh, const bounds3* b)
296
+ void bvh_refit_host(BVH& bvh)
496
297
  {
497
- bvh_refit_recursive(bvh, 0, b);
298
+ bvh_refit_recursive(bvh, 0);
498
299
  }
499
300
 
500
301
 
@@ -537,87 +338,46 @@ void bvh_rem_descriptor(uint64_t id)
537
338
 
538
339
  }
539
340
 
341
+
342
+ void bvh_destroy_host(BVH& bvh)
343
+ {
344
+ delete[] bvh.node_lowers;
345
+ delete[] bvh.node_uppers;
346
+ delete[] bvh.node_parents;
347
+ delete[] bvh.root;
348
+
349
+ bvh.node_lowers = NULL;
350
+ bvh.node_uppers = NULL;
351
+ bvh.node_parents = NULL;
352
+ bvh.root = NULL;
353
+
354
+ bvh.max_nodes = 0;
355
+ bvh.num_items = 0;
356
+ }
357
+
540
358
  } // namespace wp
541
359
 
542
- uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_bounds)
360
+ uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items)
543
361
  {
544
362
  BVH* bvh = new BVH();
545
363
  memset(bvh, 0, sizeof(BVH));
546
364
 
547
365
  bvh->context = NULL;
548
366
 
549
- bvh->lowers = lowers;
550
- bvh->uppers = uppers;
551
- bvh->num_bounds = num_bounds;
552
-
553
- bvh->bounds = new bounds3[num_bounds];
554
-
555
- for (int i=0; i < num_bounds; ++i)
556
- {
557
- bvh->bounds[i].lower = lowers[i];
558
- bvh->bounds[i].upper = uppers[i];
559
- }
367
+ bvh->item_lowers = lowers;
368
+ bvh->item_uppers = uppers;
369
+ bvh->num_items = num_items;
560
370
 
561
371
  MedianBVHBuilder builder;
562
- builder.build(*bvh, bvh->bounds, num_bounds);
372
+ builder.build(*bvh, lowers, uppers, num_items);
563
373
 
564
374
  return (uint64_t)bvh;
565
375
  }
566
376
 
567
- uint64_t bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_bounds)
568
- {
569
- ContextGuard guard(context);
570
-
571
- // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
572
- vec3* lowers_host = (vec3*)alloc_host(sizeof(vec3)*num_bounds);
573
- vec3* uppers_host = (vec3*)alloc_host(sizeof(vec3)*num_bounds);
574
- bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_bounds);
575
-
576
- memcpy_d2h(WP_CURRENT_CONTEXT, lowers_host, lowers, sizeof(vec3)*num_bounds);
577
- memcpy_d2h(WP_CURRENT_CONTEXT, uppers_host, uppers, sizeof(vec3)*num_bounds);
578
- cuda_context_synchronize(WP_CURRENT_CONTEXT);
579
-
580
- for (int i=0; i < num_bounds; ++i)
581
- {
582
- bounds_host[i] = bounds3();
583
- bounds_host[i].lower = lowers_host[i];
584
- bounds_host[i].upper = uppers_host[i];
585
- }
586
-
587
- BVH bvh_host = bvh_create(bounds_host, num_bounds);
588
- bvh_host.context = context ? context : cuda_context_get_current();
589
- bvh_host.bounds = bounds_host;
590
- bvh_host.num_bounds = num_bounds;
591
- BVH bvh_device_clone = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
592
-
593
- bvh_device_clone.lowers = lowers; // managed by the user
594
- bvh_device_clone.uppers = uppers; // managed by the user
595
-
596
- BVH* bvh_device = (BVH*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVH));
597
- memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device, &bvh_device_clone, sizeof(BVH));
598
-
599
- bvh_destroy_host(bvh_host);
600
- free_host(lowers_host);
601
- free_host(uppers_host);
602
-
603
- uint64_t bvh_id = (uint64_t)bvh_device;
604
- bvh_add_descriptor(bvh_id, bvh_device_clone);
605
-
606
- return bvh_id;
607
- }
608
-
609
377
  void bvh_refit_host(uint64_t id)
610
378
  {
611
379
  BVH* bvh = (BVH*)(id);
612
-
613
- for (int i=0; i < bvh->num_bounds; ++i)
614
- {
615
- bvh->bounds[i] = bounds3();
616
- bvh->bounds[i].lower = bvh->lowers[i];
617
- bvh->bounds[i].upper = bvh->uppers[i];
618
- }
619
-
620
- bvh_refit_host(*bvh, bvh->bounds);
380
+ bvh_refit_host(*bvh);
621
381
  }
622
382
 
623
383
  void bvh_destroy_host(uint64_t id)
@@ -628,23 +388,11 @@ void bvh_destroy_host(uint64_t id)
628
388
  }
629
389
 
630
390
 
631
- void bvh_destroy_device(uint64_t id)
632
- {
633
- BVH bvh;
634
- if (bvh_get_descriptor(id, bvh))
635
- {
636
- bvh_destroy_device(bvh);
637
- mesh_rem_descriptor(id);
638
- }
639
- }
640
-
641
391
  // stubs for non-CUDA platforms
642
392
  #if !WP_ENABLE_CUDA
643
393
 
644
- void bvh_refit_device(uint64_t id)
645
- {
646
- }
647
-
648
-
394
+ uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items) { return 0; }
395
+ void bvh_refit_device(uint64_t id) {}
396
+ void bvh_destroy_device(uint64_t id) {}
649
397
 
650
398
  #endif // !WP_ENABLE_CUDA