PyPI - warp-lang - Versions diffs - 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl - Mend

warp-lang 0.9.0__py3-none-win_amd64.whl → 0.11.0__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (315) hide show

warp/__init__.py +15 -7
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +22 -443
warp/build_dll.py +384 -0
warp/builtins.py +998 -488
warp/codegen.py +1307 -739
warp/config.py +5 -3
warp/constants.py +6 -0
warp/context.py +1291 -548
warp/dlpack.py +31 -31
warp/fabric.py +326 -0
warp/fem/__init__.py +27 -0
warp/fem/cache.py +389 -0
warp/fem/dirichlet.py +181 -0
warp/fem/domain.py +263 -0
warp/fem/field/__init__.py +101 -0
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +299 -0
warp/fem/field/restriction.py +21 -0
warp/fem/field/test.py +181 -0
warp/fem/field/trial.py +183 -0
warp/fem/geometry/__init__.py +19 -0
warp/fem/geometry/closest_point.py +70 -0
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +744 -0
warp/fem/geometry/geometry.py +186 -0
warp/fem/geometry/grid_2d.py +373 -0
warp/fem/geometry/grid_3d.py +435 -0
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +376 -0
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +840 -0
warp/fem/geometry/trimesh_2d.py +577 -0
warp/fem/integrate.py +1616 -0
warp/fem/operator.py +191 -0
warp/fem/polynomial.py +213 -0
warp/fem/quadrature/__init__.py +2 -0
warp/fem/quadrature/pic_quadrature.py +245 -0
warp/fem/quadrature/quadrature.py +294 -0
warp/fem/space/__init__.py +292 -0
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +236 -0
warp/fem/space/function_space.py +145 -0
warp/fem/space/grid_2d_function_space.py +267 -0
warp/fem/space/grid_3d_function_space.py +306 -0
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +350 -0
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +160 -0
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +292 -0
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +221 -0
warp/fem/types.py +77 -0
warp/fem/utils.py +495 -0
warp/native/array.h +164 -55
warp/native/builtin.h +150 -174
warp/native/bvh.cpp +75 -328
warp/native/bvh.cu +406 -23
warp/native/bvh.h +37 -45
warp/native/clang/clang.cpp +136 -24
warp/native/crt.cpp +1 -76
warp/native/crt.h +111 -104
warp/native/cuda_crt.h +1049 -0
warp/native/cuda_util.cpp +15 -3
warp/native/cuda_util.h +3 -1
warp/native/cutlass/tools/library/scripts/conv2d_operation.py +463 -0
warp/native/cutlass/tools/library/scripts/conv3d_operation.py +321 -0
warp/native/cutlass/tools/library/scripts/gemm_operation.py +988 -0
warp/native/cutlass/tools/library/scripts/generator.py +4625 -0
warp/native/cutlass/tools/library/scripts/library.py +799 -0
warp/native/cutlass/tools/library/scripts/manifest.py +402 -0
warp/native/cutlass/tools/library/scripts/pycutlass/docs/source/conf.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/conv/conv2d_f16_sm80.py +106 -0
warp/native/cutlass/tools/library/scripts/pycutlass/profile/gemm/gemm_f32_sm80.py +91 -0
warp/native/cutlass/tools/library/scripts/pycutlass/setup.py +80 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/__init__.py +48 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/arguments.py +118 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/c_types.py +241 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/compiler.py +432 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/conv2d_operation.py +631 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/epilogue.py +1026 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/frontend.py +104 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/gemm_operation.py +1276 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/library.py +744 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/memory_manager.py +74 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/operation.py +110 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/parser.py +619 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/reduction_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/tensor_ref.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/__init__.py +4 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/conv2d_testbed.py +646 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_grouped_testbed.py +235 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/gemm_testbed.py +557 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/test/profiler.py +70 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/type_hint.py +39 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/__init__.py +1 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/device.py +76 -0
warp/native/cutlass/tools/library/scripts/pycutlass/src/pycutlass/utils/reference_model.py +255 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +201 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +177 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +98 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_dgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +95 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_few_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +163 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_fixed_channels_f16nhwc_f16nhwc_f16nhwc_tensor_op_f32_sm80.py +187 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +309 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +54 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_fprop_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +253 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.py +97 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.py +242 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_f32nhwc_f32nhwc_f32nhwc_simt_f32_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/conv2d_wgrad_implicit_gemm_tf32nhwc_tf32nhwc_f32nhwc_tensor_op_f32_sm80.py +107 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/conv/run_all_tests.py +10 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/frontend/test_frontend.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/__init__.py +0 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_bf16_sm80.py +96 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f16_sm80.py +447 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f32_sm80.py +146 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_f64_sm80.py +102 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_grouped_sm80.py +203 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/gemm_s8_sm80.py +229 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/gemm/run_all_tests.py +9 -0
warp/native/cutlass/tools/library/scripts/pycutlass/test/unit/test_sm80.py +453 -0
warp/native/cutlass/tools/library/scripts/rank_2k_operation.py +398 -0
warp/native/cutlass/tools/library/scripts/rank_k_operation.py +387 -0
warp/native/cutlass/tools/library/scripts/rt.py +796 -0
warp/native/cutlass/tools/library/scripts/symm_operation.py +400 -0
warp/native/cutlass/tools/library/scripts/trmm_operation.py +407 -0
warp/native/cutlass_gemm.cu +5 -3
warp/native/exports.h +1240 -949
warp/native/fabric.h +228 -0
warp/native/hashgrid.cpp +4 -4
warp/native/hashgrid.h +22 -2
warp/native/initializer_array.h +2 -2
warp/native/intersect.h +22 -7
warp/native/intersect_adj.h +8 -8
warp/native/intersect_tri.h +13 -16
warp/native/marching.cu +157 -161
warp/native/mat.h +119 -19
warp/native/matnn.h +2 -2
warp/native/mesh.cpp +108 -83
warp/native/mesh.cu +243 -6
warp/native/mesh.h +1547 -458
warp/native/nanovdb/NanoVDB.h +1 -1
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +45 -35
warp/native/range.h +6 -2
warp/native/reduce.cpp +157 -0
warp/native/reduce.cu +348 -0
warp/native/runlength_encode.cpp +62 -0
warp/native/runlength_encode.cu +46 -0
warp/native/scan.cu +11 -13
warp/native/scan.h +1 -0
warp/native/solid_angle.h +442 -0
warp/native/sort.cpp +13 -0
warp/native/sort.cu +9 -1
warp/native/sparse.cpp +338 -0
warp/native/sparse.cu +545 -0
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +30 -0
warp/native/vec.h +126 -24
warp/native/volume.h +120 -0
warp/native/warp.cpp +658 -53
warp/native/warp.cu +660 -68
warp/native/warp.h +112 -12
warp/optim/__init__.py +1 -0
warp/optim/linear.py +922 -0
warp/optim/sgd.py +92 -0
warp/render/render_opengl.py +392 -152
warp/render/render_usd.py +11 -11
warp/sim/__init__.py +2 -2
warp/sim/articulation.py +385 -185
warp/sim/collide.py +21 -8
warp/sim/import_mjcf.py +297 -106
warp/sim/import_urdf.py +389 -210
warp/sim/import_usd.py +198 -97
warp/sim/inertia.py +17 -18
warp/sim/integrator_euler.py +14 -8
warp/sim/integrator_xpbd.py +161 -19
warp/sim/model.py +795 -291
warp/sim/optimizer.py +2 -6
warp/sim/render.py +65 -3
warp/sim/utils.py +3 -0
warp/sparse.py +1227 -0
warp/stubs.py +665 -223
warp/tape.py +66 -15
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/assets/torus.usda +105 -105
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +128 -74
warp/tests/test_array.py +1497 -211
warp/tests/test_array_reduce.py +150 -0
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +99 -0
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +75 -43
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +233 -128
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +136 -108
warp/tests/test_examples.py +277 -0
warp/tests/test_fabricarray.py +955 -0
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1271 -0
warp/tests/test_fp16.py +53 -19
warp/tests/test_func.py +187 -74
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +180 -116
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +52 -37
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +577 -24
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +251 -15
warp/tests/test_lerp.py +64 -65
warp/tests/test_linear_solvers.py +154 -0
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +508 -2778
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +305 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +71 -14
warp/tests/test_mesh_query_aabb.py +41 -25
warp/tests/test_mesh_query_point.py +325 -34
warp/tests/test_mesh_query_ray.py +39 -22
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +190 -0
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +460 -0
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +331 -85
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +118 -89
warp/tests/test_transient_module.py +12 -13
warp/tests/test_types.py +614 -0
warp/tests/test_utils.py +494 -0
warp/tests/test_vec.py +354 -1987
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +457 -293
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +341 -0
warp/tests/unittest_utils.py +568 -0
warp/tests/unused_test_misc.py +71 -0
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +549 -0
warp/torch.py +72 -30
warp/types.py +1744 -713
warp/utils.py +360 -350
warp_lang-0.11.0.dist-info/LICENSE.md +36 -0
warp_lang-0.11.0.dist-info/METADATA +238 -0
warp_lang-0.11.0.dist-info/RECORD +332 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/WHEEL +1 -1
warp/bin/warp-clang.exp +0 -0
warp/bin/warp-clang.lib +0 -0
warp/bin/warp.exp +0 -0
warp/bin/warp.lib +0 -0
warp/tests/test_all.py +0 -215
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-0.9.0.dist-info/METADATA +0 -20
warp_lang-0.9.0.dist-info/RECORD +0 -177
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-0.9.0.dist-info → warp_lang-0.11.0.dist-info}/top_level.txt +0 -0

warp/native/bvh.cpp CHANGED Viewed

@@ -27,35 +27,34 @@ class MedianBVHBuilder
 {
 public:
-    void build(BVH& bvh, const bounds3* items, int n);
+    void build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n);
 private:
-    bounds3 calc_bounds(const bounds3* bounds, const int* indices, int start, int end);
+    bounds3 calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end);
-    int partition_median(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
-    int partition_midpoint(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
-    int partition_sah(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds);
+    int partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
+    int partition_midpoint(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
+    int partition_sah(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds);
-    int build_recursive(BVH& bvh, const bounds3* bounds, int* indices, int start, int end, int depth, int parent);
+    int build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int* indices, int start, int end, int depth, int parent);
 };
 //////////////////////////////////////////////////////////////////////
-void MedianBVHBuilder::build(BVH& bvh, const bounds3* items, int n)
+void MedianBVHBuilder::build(BVH& bvh, const vec3* lowers, const vec3* uppers, int n)
 {
-    memset(&bvh, 0, sizeof(BVH));
+    bvh.max_depth = 0;
     bvh.max_nodes = 2*n-1;
     bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
     bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
     bvh.node_parents = new int[bvh.max_nodes];
-    bvh.num_nodes = 0;
+    bvh.node_counts = NULL;
     // root is always in first slot for top down builders
-    bvh.root = 0;
+    bvh.root = new int[1];
+    bvh.root[0] = 0;
     if (n == 0)
         return;
@@ -64,35 +63,42 @@ void MedianBVHBuilder::build(BVH& bvh, const bounds3* items, int n)
     for (int i=0; i < n; ++i)
         indices[i] = i;
-    build_recursive(bvh, items, &indices[0], 0, n, 0, -1);
+    build_recursive(bvh, lowers, uppers, &indices[0], 0, n, 0, -1);
 }
-bounds3 MedianBVHBuilder::calc_bounds(const bounds3* bounds, const int* indices, int start, int end)
+bounds3 MedianBVHBuilder::calc_bounds(const vec3* lowers, const vec3* uppers, const int* indices, int start, int end)
 {
     bounds3 u;
     for (int i=start; i < end; ++i)
-        u = bounds_union(u, bounds[indices[i]]);
+    {
+        u.add_point(lowers[indices[i]]);
+        u.add_point(uppers[indices[i]]);
+    }
     return u;
 }
 struct PartitionPredicateMedian
 {
-    PartitionPredicateMedian(const bounds3* bounds, int a) : bounds(bounds), axis(a) {}
+    PartitionPredicateMedian(const vec3* lowers, const vec3* uppers, int a) : lowers(lowers), uppers(uppers), axis(a) {}
     bool operator()(int a, int b) const
     {
-        return bounds[a].center()[axis] < bounds[b].center()[axis];
+        vec3 a_center = 0.5f*(lowers[a] + uppers[a]);
+        vec3 b_center = 0.5f*(lowers[b] + uppers[b]);
+        return a_center[axis] < b_center[axis];
     }
-    const bounds3* bounds;
+    const vec3* lowers;
+    const vec3* uppers;
     int axis;
 };
-int MedianBVHBuilder::partition_median(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds)
+int MedianBVHBuilder::partition_median(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds)
 {
     assert(end-start >= 2);
@@ -102,27 +108,31 @@ int MedianBVHBuilder::partition_median(const bounds3* bounds, int* indices, int
     const int k = (start+end)/2;
-    std::nth_element(&indices[start], &indices[k], &indices[end], PartitionPredicateMedian(&bounds[0], axis));
+    std::nth_element(&indices[start], &indices[k], &indices[end], PartitionPredicateMedian(lowers, uppers, axis));
     return k;
 }
 struct PartitionPredictateMidPoint
 {
-    PartitionPredictateMidPoint(const bounds3* bounds, int a, float m) : bounds(bounds), axis(a), mid(m) {}
+    PartitionPredictateMidPoint(const vec3* lowers, const vec3* uppers, int a, float m) : lowers(lowers), uppers(uppers), axis(a), mid(m) {}
     bool operator()(int index) const
     {
-        return bounds[index].center()[axis] <= mid;
+        vec3 center = 0.5f*(lowers[index] + uppers[index]);
+        return center[axis] <= mid;
     }
-    const bounds3* bounds;
+    const vec3* lowers;
+    const vec3* uppers;
     int axis;
     float mid;
 };
-int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, int start, int end, bounds3 range_bounds)
+int MedianBVHBuilder::partition_midpoint(const vec3* lowers, const vec3* uppers, int* indices, int start, int end, bounds3 range_bounds)
 {
     assert(end-start >= 2);
@@ -132,7 +142,7 @@ int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, in
     int axis = longest_axis(edges);
     float mid = center[axis];
-    int* upper = std::partition(indices+start, indices+end, PartitionPredictateMidPoint(&bounds[0], axis, mid));
+    int* upper = std::partition(indices+start, indices+end, PartitionPredictateMidPoint(lowers, uppers, axis, mid));
     int k = upper-indices;
@@ -140,7 +150,6 @@ int MedianBVHBuilder::partition_midpoint(const bounds3* bounds, int* indices, in
     if (k == start || k == end)
         k = (start+end)/2;
     return k;
 }
@@ -200,7 +209,7 @@ int MedianBVHBuilder::partition_sah(const bounds3* bounds, int* indices, int sta
 }
 #endif
-int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indices, int start, int end, int depth, int parent)
+int MedianBVHBuilder::build_recursive(BVH& bvh, const vec3* lowers, const vec3* uppers, int* indices, int start, int end, int depth, int parent)
 {
     assert(start < end);
@@ -212,7 +221,7 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
     if (depth > bvh.max_depth)
         bvh.max_depth = depth;
-    bounds3 b = calc_bounds(bounds, indices, start, end);
+    bounds3 b = calc_bounds(lowers, uppers, indices, start, end);
     const int kMaxItemsPerLeaf = 1;
@@ -225,7 +234,7 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
     else
     {
         //int split = partition_midpoint(bounds, indices, start, end, b);
-        int split = partition_median(bounds, indices, start, end, b);
+        int split = partition_median(lowers, uppers, indices, start, end, b);
         //int split = partition_sah(bounds, indices, start, end, b);
         if (split == start || split == end)
@@ -234,8 +243,8 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
             split = (start+end)/2;
         }
-        int left_child = build_recursive(bvh, bounds, indices, start, split, depth+1, node_index);
-        int right_child = build_recursive(bvh, bounds, indices, split, end, depth+1, node_index);
+        int left_child = build_recursive(bvh, lowers, uppers, indices, start, split, depth+1, node_index);
+        int right_child = build_recursive(bvh, lowers, uppers, indices, split, end, depth+1, node_index);
         bvh.node_lowers[node_index] = make_node(b.lower, left_child, false);
         bvh.node_uppers[node_index] = make_node(b.upper, right_child, false);
@@ -245,218 +254,8 @@ int MedianBVHBuilder::build_recursive(BVH& bvh, const bounds3* bounds, int* indi
     return node_index;
 }
-class LinearBVHBuilderCPU
-{
-public:
-    void build(BVH& bvh, const bounds3* items, int n);
-private:
-    // calculate Morton codes
-    struct KeyIndexPair
-    {
-        uint32_t key;
-        int index;
-        inline bool operator < (const KeyIndexPair& rhs) const { return key < rhs.key; }
-    };
-    bounds3 calc_bounds(const bounds3* bounds, const KeyIndexPair* keys, int start, int end);
-    int find_split(const KeyIndexPair* pairs, int start, int end);
-    int build_recursive(BVH& bvh, const KeyIndexPair* keys, const bounds3* bounds, int start, int end, int depth);
-};
-// disable std::sort workaround for macOS error
-#if 0
-void LinearBVHBuilderCPU::build(BVH& bvh, const bounds3* items, int n)
-{
-	memset(&bvh, 0, sizeof(BVH));
-	bvh.max_nodes = 2*n-1;
-	bvh.node_lowers = new BVHPackedNodeHalf[bvh.max_nodes];
-	bvh.node_uppers = new BVHPackedNodeHalf[bvh.max_nodes];
-	bvh.num_nodes = 0;
-	// root is always in first slot for top down builders
-	bvh.root = 0;
-	std::vector<KeyIndexPair> keys;
-	keys.reserve(n);
-	bounds3 totalbounds3;
-	for (int i=0; i < n; ++i)
-		totalbounds3 = bounds_union(totalbounds3, items[i]);
-	// ensure non-zero edge length in all dimensions
-	totalbounds3.expand(0.001f);
-	vec3 edges = totalbounds3.edges();
-	vec3 invEdges = cw_div(vec3(1.0f), edges);
-	for (int i=0; i < n; ++i)
-	{
-		vec3 center = items[i].center();
-		vec3 local = cw_mul(center-totalbounds3.lower, invEdges);
-		KeyIndexPair l;
-		l.key = morton3<1024>(local.x, local.y, local.z);
-		l.index = i;
-		keys.push_back(l);
-	}
-	// sort by key
-	std::sort(keys.begin(), keys.end());
-	build_recursive(bvh, &keys[0], items,  0, n, 0);
-	printf("Created BVH for %d items with %d nodes, max depth of %d\n", n, bvh.num_nodes, bvh.max_depth);
-}
-#endif
-inline bounds3 LinearBVHBuilderCPU::calc_bounds(const bounds3* bounds, const KeyIndexPair* keys, int start, int end)
-{
-	bounds3 u;
-	for (int i=start; i < end; ++i)
-		u = bounds_union(u, bounds[keys[i].index]);
-	return u;
-}
-inline int LinearBVHBuilderCPU::find_split(const KeyIndexPair* pairs, int start, int end)
-{
-	if (pairs[start].key == pairs[end-1].key)
-		return (start+end)/2;
-	// find split point between keys, xor here means all bits
-	// of the result are zero up until the first differing bit
-	int common_prefix = clz(pairs[start].key ^ pairs[end-1].key);
-	// use binary search to find the point at which this bit changes
-	// from zero to a 1
-	const int mask = 1 << (31-common_prefix);
-	while (end-start > 0)
-	{
-		int index = (start+end)/2;
-		if (pairs[index].key&mask)
-		{
-			end = index;
-		}
-		else
-			start = index+1;
-	}
-	assert(start == end);
-	return start;
-}
-int LinearBVHBuilderCPU::build_recursive(BVH& bvh, const KeyIndexPair* keys, const bounds3* bounds, int start, int end, int depth)
-{
-	assert(start < end);
-	const int n = end-start;
-	const int nodeIndex = bvh.num_nodes++;
-	assert(nodeIndex < bvh.max_nodes);
-	if (depth > bvh.max_depth)
-		bvh.max_depth = depth;
-	bounds3 b = calc_bounds(bounds, keys, start, end);
-	const int kMaxItemsPerLeaf = 1;
-	if (n <= kMaxItemsPerLeaf)
-	{
-		bvh.node_lowers[nodeIndex] = make_node(b.lower, keys[start].index, true);
-		bvh.node_uppers[nodeIndex] = make_node(b.upper, keys[start].index, false);
-	}
-	else
-	{
-		int split = find_split(keys, start, end);
-		int leftChild = build_recursive(bvh, keys, bounds, start, split, depth+1);
-		int rightChild = build_recursive(bvh, keys, bounds, split, end, depth+1);
-		bvh.node_lowers[nodeIndex] = make_node(b.lower, leftChild, false);
-		bvh.node_uppers[nodeIndex] = make_node(b.upper, rightChild, false);
-	}
-	return nodeIndex;
-}
-// create only happens on host currently, use bvh_clone() to transfer BVH To device
-BVH bvh_create(const bounds3* bounds, int num_bounds)
-{
-    BVH bvh;
-    memset(&bvh, 0, sizeof(bvh));
-    MedianBVHBuilder builder;
-    //LinearBVHBuilderCPU builder;
-    builder.build(bvh, bounds, num_bounds);
-    return bvh;
-}
-void bvh_destroy_host(BVH& bvh)
-{
-    delete[] bvh.node_lowers;
-    delete[] bvh.node_uppers;
-    delete[] bvh.node_parents;
-	delete[] bvh.bounds;
-    bvh.node_lowers = NULL;
-    bvh.node_uppers = NULL;
-    bvh.max_nodes = 0;
-    bvh.num_nodes = 0;
-    bvh.num_bounds = 0;
-}
-void bvh_destroy_device(BVH& bvh)
-{
-    ContextGuard guard(bvh.context);
-    free_device(WP_CURRENT_CONTEXT, bvh.node_lowers); bvh.node_lowers = NULL;
-    free_device(WP_CURRENT_CONTEXT, bvh.node_uppers); bvh.node_uppers = NULL;
-    free_device(WP_CURRENT_CONTEXT, bvh.node_parents); bvh.node_parents = NULL;
-    free_device(WP_CURRENT_CONTEXT, bvh.node_counts); bvh.node_counts = NULL;
-    free_device(WP_CURRENT_CONTEXT, bvh.bounds); bvh.bounds = NULL;
-}
-BVH bvh_clone(void* context, const BVH& bvh_host)
-{
-    ContextGuard guard(context);
-    BVH bvh_device = bvh_host;
-    bvh_device.context = context ? context : cuda_context_get_current();
-    bvh_device.node_lowers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
-    bvh_device.node_uppers = (BVHPackedNodeHalf*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
-    bvh_device.node_parents = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
-    bvh_device.node_counts = (int*)alloc_device(WP_CURRENT_CONTEXT, sizeof(int)*bvh_host.max_nodes);
-	bvh_device.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*bvh_host.num_bounds);
-    // copy host data to device
-    memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_lowers, bvh_host.node_lowers, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
-    memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_uppers, bvh_host.node_uppers, sizeof(BVHPackedNodeHalf)*bvh_host.max_nodes);
-    memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.node_parents, bvh_host.node_parents, sizeof(int)*bvh_host.max_nodes);
-	memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device.bounds, bvh_host.bounds, sizeof(bounds3)*bvh_host.num_bounds);
-    return bvh_device;
-}
-void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
+void bvh_refit_recursive(BVH& bvh, int index)
 {
     BVHPackedNodeHalf& lower = bvh.node_lowers[index];
     BVHPackedNodeHalf& upper = bvh.node_uppers[index];
@@ -465,16 +264,17 @@ void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
     {
         const int leaf_index = lower.i;
-        (vec3&)lower = bounds[leaf_index].lower;
-        (vec3&)upper = bounds[leaf_index].upper;
+        // update leaf from items
+        (vec3&)lower = bvh.item_lowers[leaf_index];
+        (vec3&)upper = bvh.item_uppers[leaf_index];
     }
     else
     {
         int left_index = lower.i;
         int right_index = upper.i;
-        bvh_refit_recursive(bvh, left_index, bounds);
-        bvh_refit_recursive(bvh, right_index, bounds);
+        bvh_refit_recursive(bvh, left_index);
+        bvh_refit_recursive(bvh, right_index);
         // compute union of children
         const vec3& left_lower = (vec3&)bvh.node_lowers[left_index];
@@ -493,9 +293,9 @@ void bvh_refit_recursive(BVH& bvh, int index, const bounds3* bounds)
     }
 }
-void bvh_refit_host(BVH& bvh, const bounds3* b)
+void bvh_refit_host(BVH& bvh)
 {
-    bvh_refit_recursive(bvh, 0, b);
+    bvh_refit_recursive(bvh, 0);
 }
@@ -538,87 +338,46 @@ void bvh_rem_descriptor(uint64_t id)
 }
+void bvh_destroy_host(BVH& bvh)
+{
+    delete[] bvh.node_lowers;
+    delete[] bvh.node_uppers;
+    delete[] bvh.node_parents;
+    delete[] bvh.root;
+    bvh.node_lowers = NULL;
+    bvh.node_uppers = NULL;
+    bvh.node_parents = NULL;
+    bvh.root = NULL;
+    bvh.max_nodes = 0;
+    bvh.num_items = 0;
+}
 } // namespace wp
-uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_bounds)
+uint64_t bvh_create_host(vec3* lowers, vec3* uppers, int num_items)
 {
     BVH* bvh = new BVH();
     memset(bvh, 0, sizeof(BVH));
     bvh->context = NULL;
-    bvh->lowers = lowers;
-    bvh->uppers = uppers;
-    bvh->num_bounds = num_bounds;
-    bvh->bounds = new bounds3[num_bounds];
-    for (int i=0; i < num_bounds; ++i)
-    {
-        bvh->bounds[i].lower = lowers[i];
-        bvh->bounds[i].upper = uppers[i];
-    }
+    bvh->item_lowers = lowers;
+    bvh->item_uppers = uppers;
+    bvh->num_items = num_items;
     MedianBVHBuilder builder;
-    builder.build(*bvh, bvh->bounds, num_bounds);
+    builder.build(*bvh, lowers, uppers, num_items);
     return (uint64_t)bvh;
 }
-uint64_t bvh_create_device(void* context, vec3* lowers, vec3* uppers, int num_bounds)
-{
-    ContextGuard guard(context);
-    // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
-    vec3* lowers_host = (vec3*)alloc_host(sizeof(vec3)*num_bounds);
-    vec3* uppers_host = (vec3*)alloc_host(sizeof(vec3)*num_bounds);
-    bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_bounds);
-    memcpy_d2h(WP_CURRENT_CONTEXT, lowers_host, lowers, sizeof(vec3)*num_bounds);
-    memcpy_d2h(WP_CURRENT_CONTEXT, uppers_host, uppers, sizeof(vec3)*num_bounds);
-    cuda_context_synchronize(WP_CURRENT_CONTEXT);
-    for (int i=0; i < num_bounds; ++i)
-    {
-        bounds_host[i] = bounds3();
-        bounds_host[i].lower = lowers_host[i];
-        bounds_host[i].upper = uppers_host[i];
-    }
-    BVH bvh_host = bvh_create(bounds_host, num_bounds);
-    bvh_host.context = context ? context : cuda_context_get_current();
-	bvh_host.bounds = bounds_host;
-    bvh_host.num_bounds = num_bounds;
-    BVH bvh_device_clone = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
-	bvh_device_clone.lowers = lowers;		// managed by the user
-	bvh_device_clone.uppers = uppers;		// managed by the user
-    BVH* bvh_device = (BVH*)alloc_device(WP_CURRENT_CONTEXT, sizeof(BVH));
-    memcpy_h2d(WP_CURRENT_CONTEXT, bvh_device, &bvh_device_clone, sizeof(BVH));
-    bvh_destroy_host(bvh_host);
-    free_host(lowers_host);
-    free_host(uppers_host);
-    uint64_t bvh_id = (uint64_t)bvh_device;
-    bvh_add_descriptor(bvh_id, bvh_device_clone);
-    return bvh_id;
-}
 void bvh_refit_host(uint64_t id)
 {
     BVH* bvh = (BVH*)(id);
-    for (int i=0; i < bvh->num_bounds; ++i)
-    {
-        bvh->bounds[i] = bounds3();
-        bvh->bounds[i].lower = bvh->lowers[i];
-        bvh->bounds[i].upper = bvh->uppers[i];
-    }
-    bvh_refit_host(*bvh, bvh->bounds);
+    bvh_refit_host(*bvh);
 }
 void bvh_destroy_host(uint64_t id)
@@ -629,23 +388,11 @@ void bvh_destroy_host(uint64_t id)
 }
-void bvh_destroy_device(uint64_t id)
-{
-    BVH bvh;
-    if (bvh_get_descriptor(id, bvh))
-    {
-        bvh_destroy_device(bvh);
-        mesh_rem_descriptor(id);
-    }
-}
 // stubs for non-CUDA platforms
 #if !WP_ENABLE_CUDA
-void bvh_refit_device(uint64_t id)
-{
-}
+uint64_t bvh_create_device(void* context, wp::vec3* lowers, wp::vec3* uppers, int num_items) { return 0; }
+void bvh_refit_device(uint64_t id) {}
+void bvh_destroy_device(uint64_t id) {}
 #endif // !WP_ENABLE_CUDA