PyPI - warp-lang - Versions diffs - 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl - Mend

warp-lang 1.0.0b2__py3-none-win_amd64.whl → 1.0.0b6__py3-none-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of warp-lang might be problematic. Click here for more details.

Files changed (271) hide show

docs/conf.py +17 -5
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/env/env_usd.py +4 -1
examples/env/environment.py +8 -9
examples/example_dem.py +34 -33
examples/example_diffray.py +364 -337
examples/example_fluid.py +32 -23
examples/example_jacobian_ik.py +97 -93
examples/example_marching_cubes.py +6 -16
examples/example_mesh.py +6 -16
examples/example_mesh_intersect.py +16 -14
examples/example_nvdb.py +14 -16
examples/example_raycast.py +14 -13
examples/example_raymarch.py +16 -23
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +82 -78
examples/example_sim_cloth.py +45 -48
examples/example_sim_fk_grad.py +51 -44
examples/example_sim_fk_grad_torch.py +47 -40
examples/example_sim_grad_bounce.py +108 -133
examples/example_sim_grad_cloth.py +99 -113
examples/example_sim_granular.py +5 -6
examples/{example_sim_sdf_shape.py → example_sim_granular_collision_sdf.py} +37 -26
examples/example_sim_neo_hookean.py +51 -55
examples/example_sim_particle_chain.py +4 -4
examples/example_sim_quadruped.py +126 -81
examples/example_sim_rigid_chain.py +54 -61
examples/example_sim_rigid_contact.py +66 -70
examples/example_sim_rigid_fem.py +3 -3
examples/example_sim_rigid_force.py +1 -1
examples/example_sim_rigid_gyroscopic.py +3 -4
examples/example_sim_rigid_kinematics.py +28 -39
examples/example_sim_trajopt.py +112 -110
examples/example_sph.py +9 -8
examples/example_wave.py +7 -7
examples/fem/bsr_utils.py +30 -17
examples/fem/example_apic_fluid.py +85 -69
examples/fem/example_convection_diffusion.py +97 -93
examples/fem/example_convection_diffusion_dg.py +142 -149
examples/fem/example_convection_diffusion_dg0.py +141 -136
examples/fem/example_deformed_geometry.py +146 -0
examples/fem/example_diffusion.py +115 -84
examples/fem/example_diffusion_3d.py +116 -86
examples/fem/example_diffusion_mgpu.py +102 -79
examples/fem/example_mixed_elasticity.py +139 -100
examples/fem/example_navier_stokes.py +175 -162
examples/fem/example_stokes.py +143 -111
examples/fem/example_stokes_transfer.py +186 -157
examples/fem/mesh_utils.py +59 -97
examples/fem/plot_utils.py +138 -17
tools/ci/publishing/build_nodes_info.py +54 -0
warp/__init__.py +4 -3
warp/__init__.pyi +1 -0
warp/bin/warp-clang.dll +0 -0
warp/bin/warp.dll +0 -0
warp/build.py +5 -3
warp/build_dll.py +29 -9
warp/builtins.py +836 -492
warp/codegen.py +864 -553
warp/config.py +3 -1
warp/context.py +389 -172
warp/fem/__init__.py +24 -6
warp/fem/cache.py +318 -25
warp/fem/dirichlet.py +7 -3
warp/fem/domain.py +14 -0
warp/fem/field/__init__.py +30 -38
warp/fem/field/field.py +149 -0
warp/fem/field/nodal_field.py +244 -138
warp/fem/field/restriction.py +8 -6
warp/fem/field/test.py +127 -59
warp/fem/field/trial.py +117 -60
warp/fem/geometry/__init__.py +5 -1
warp/fem/geometry/deformed_geometry.py +271 -0
warp/fem/geometry/element.py +24 -1
warp/fem/geometry/geometry.py +86 -14
warp/fem/geometry/grid_2d.py +112 -54
warp/fem/geometry/grid_3d.py +134 -65
warp/fem/geometry/hexmesh.py +953 -0
warp/fem/geometry/partition.py +85 -33
warp/fem/geometry/quadmesh_2d.py +532 -0
warp/fem/geometry/tetmesh.py +451 -115
warp/fem/geometry/trimesh_2d.py +197 -92
warp/fem/integrate.py +534 -268
warp/fem/operator.py +58 -31
warp/fem/polynomial.py +11 -0
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +150 -58
warp/fem/quadrature/quadrature.py +209 -57
warp/fem/space/__init__.py +230 -53
warp/fem/space/basis_space.py +489 -0
warp/fem/space/collocated_function_space.py +105 -0
warp/fem/space/dof_mapper.py +49 -2
warp/fem/space/function_space.py +90 -39
warp/fem/space/grid_2d_function_space.py +149 -496
warp/fem/space/grid_3d_function_space.py +173 -538
warp/fem/space/hexmesh_function_space.py +352 -0
warp/fem/space/partition.py +129 -76
warp/fem/space/quadmesh_2d_function_space.py +369 -0
warp/fem/space/restriction.py +46 -34
warp/fem/space/shape/__init__.py +15 -0
warp/fem/space/shape/cube_shape_function.py +738 -0
warp/fem/space/shape/shape_function.py +103 -0
warp/fem/space/shape/square_shape_function.py +611 -0
warp/fem/space/shape/tet_shape_function.py +567 -0
warp/fem/space/shape/triangle_shape_function.py +429 -0
warp/fem/space/tetmesh_function_space.py +132 -1039
warp/fem/space/topology.py +295 -0
warp/fem/space/trimesh_2d_function_space.py +104 -742
warp/fem/types.py +13 -11
warp/fem/utils.py +335 -60
warp/native/array.h +120 -34
warp/native/builtin.h +101 -72
warp/native/bvh.cpp +73 -325
warp/native/bvh.cu +406 -23
warp/native/bvh.h +22 -40
warp/native/clang/clang.cpp +1 -0
warp/native/crt.h +2 -0
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1522 -1243
warp/native/intersect.h +19 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +76 -17
warp/native/mesh.cpp +33 -108
warp/native/mesh.cu +114 -18
warp/native/mesh.h +395 -40
warp/native/noise.h +272 -329
warp/native/quat.h +51 -8
warp/native/rand.h +44 -34
warp/native/reduce.cpp +1 -1
warp/native/sparse.cpp +4 -4
warp/native/sparse.cu +163 -155
warp/native/spatial.h +2 -2
warp/native/temp_buffer.h +18 -14
warp/native/vec.h +103 -21
warp/native/warp.cpp +2 -1
warp/native/warp.cu +28 -3
warp/native/warp.h +4 -3
warp/render/render_opengl.py +261 -109
warp/sim/__init__.py +1 -2
warp/sim/articulation.py +385 -185
warp/sim/import_mjcf.py +59 -48
warp/sim/import_urdf.py +15 -15
warp/sim/import_usd.py +174 -102
warp/sim/inertia.py +17 -18
warp/sim/integrator_xpbd.py +4 -3
warp/sim/model.py +330 -250
warp/sim/render.py +1 -1
warp/sparse.py +625 -152
warp/stubs.py +341 -309
warp/tape.py +9 -6
warp/tests/__main__.py +3 -6
warp/tests/assets/curlnoise_golden.npy +0 -0
warp/tests/assets/pnoise_golden.npy +0 -0
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/disabled_kinematics.py +239 -0
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +103 -106
warp/tests/test_arithmetic.py +94 -74
warp/tests/test_array.py +82 -101
warp/tests/test_array_reduce.py +57 -23
warp/tests/test_atomic.py +64 -28
warp/tests/test_bool.py +22 -12
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +18 -18
warp/tests/test_closest_point_edge_edge.py +54 -57
warp/tests/test_codegen.py +165 -134
warp/tests/test_compile_consts.py +28 -20
warp/tests/test_conditional.py +108 -24
warp/tests/test_copy.py +10 -12
warp/tests/test_ctypes.py +112 -88
warp/tests/test_dense.py +21 -14
warp/tests/test_devices.py +98 -0
warp/tests/test_dlpack.py +75 -75
warp/tests/test_examples.py +237 -0
warp/tests/test_fabricarray.py +22 -24
warp/tests/test_fast_math.py +15 -11
warp/tests/test_fem.py +1034 -124
warp/tests/test_fp16.py +23 -16
warp/tests/test_func.py +187 -86
warp/tests/test_generics.py +194 -49
warp/tests/test_grad.py +123 -181
warp/tests/test_grad_customs.py +176 -0
warp/tests/test_hash_grid.py +35 -34
warp/tests/test_import.py +10 -23
warp/tests/test_indexedarray.py +24 -25
warp/tests/test_intersect.py +18 -9
warp/tests/test_large.py +141 -0
warp/tests/test_launch.py +14 -41
warp/tests/test_lerp.py +64 -65
warp/tests/test_lvalue.py +493 -0
warp/tests/test_marching_cubes.py +12 -13
warp/tests/test_mat.py +517 -2898
warp/tests/test_mat_lite.py +115 -0
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +103 -9
warp/tests/test_matmul.py +304 -69
warp/tests/test_matmul_lite.py +410 -0
warp/tests/test_mesh.py +60 -22
warp/tests/test_mesh_query_aabb.py +21 -25
warp/tests/test_mesh_query_point.py +111 -22
warp/tests/test_mesh_query_ray.py +12 -24
warp/tests/test_mlp.py +30 -22
warp/tests/test_model.py +92 -89
warp/tests/test_modules_lite.py +39 -0
warp/tests/test_multigpu.py +88 -114
warp/tests/test_noise.py +12 -11
warp/tests/test_operators.py +16 -20
warp/tests/test_options.py +11 -11
warp/tests/test_pinned.py +17 -18
warp/tests/test_print.py +32 -11
warp/tests/test_quat.py +275 -129
warp/tests/test_rand.py +18 -16
warp/tests/test_reload.py +38 -34
warp/tests/test_rounding.py +50 -43
warp/tests/test_runlength_encode.py +168 -20
warp/tests/test_smoothstep.py +9 -11
warp/tests/test_snippet.py +143 -0
warp/tests/test_sparse.py +261 -63
warp/tests/test_spatial.py +276 -243
warp/tests/test_streams.py +110 -85
warp/tests/test_struct.py +268 -63
warp/tests/test_tape.py +39 -21
warp/tests/test_torch.py +90 -86
warp/tests/test_transient_module.py +10 -12
warp/tests/test_types.py +363 -0
warp/tests/test_utils.py +451 -0
warp/tests/test_vec.py +354 -2050
warp/tests/test_vec_lite.py +73 -0
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +418 -376
warp/tests/test_volume_write.py +124 -134
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/unittest_utils.py +342 -0
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +3 -17
warp/thirdparty/appdirs.py +36 -45
warp/thirdparty/unittest_parallel.py +589 -0
warp/types.py +622 -211
warp/utils.py +54 -393
warp_lang-1.0.0b6.dist-info/METADATA +238 -0
warp_lang-1.0.0b6.dist-info/RECORD +409 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
examples/example_cache_management.py +0 -40
examples/example_multigpu.py +0 -54
examples/example_struct.py +0 -65
examples/fem/example_stokes_transfer_3d.py +0 -210
warp/bin/warp-clang.so +0 -0
warp/bin/warp.so +0 -0
warp/fem/field/discrete_field.py +0 -80
warp/fem/space/nodal_function_space.py +0 -233
warp/tests/test_all.py +0 -223
warp/tests/test_array_scan.py +0 -60
warp/tests/test_base.py +0 -208
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
warp_lang-1.0.0b2.dist-info/METADATA +0 -26
warp_lang-1.0.0b2.dist-info/RECORD +0 -380
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b2.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/native/intersect.h CHANGED Viewed

@@ -114,6 +114,21 @@ CUDA_CALLABLE inline vec2 closest_point_to_triangle(const vec3& a, const vec3& b
 	return vec2(u, v);
 }
+CUDA_CALLABLE inline vec2 furthest_point_to_triangle(const vec3& a, const vec3& b, const vec3& c, const vec3& p)
+{
+    vec3 pa = p-a;
+    vec3 pb = p-b;
+    vec3 pc = p-c;
+    float dist_a = dot(pa, pa);
+    float dist_b = dot(pb, pb);
+    float dist_c = dot(pc, pc);
+    if (dist_a > dist_b && dist_a > dist_c)
+        return vec2(1.0f, 0.0f); // a is furthest
+    if (dist_b > dist_c)
+        return vec2(0.0f, 1.0f); // b is furthest
+    return vec2(0.0f, 0.0f); // c is furthest
+}
 CUDA_CALLABLE inline bool intersect_ray_aabb(const vec3& pos, const vec3& rcp_dir, const vec3& lower, const vec3& upper, float& t)
 {
@@ -854,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
     wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
     wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
-    wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
+    wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
     wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
     wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
     wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
@@ -866,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     }
     wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
     wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
-    wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
+    wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
     wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
     wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
     wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
@@ -881,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     	wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
     	wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
     }
-    wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
+    wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
     wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
     wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
     wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
@@ -902,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     	wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
     	wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
     }
-    wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
+    wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
     wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
     wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
     wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);

warp/native/intersect_adj.h CHANGED Viewed

@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     label1:;
     adj_71 += adj_ret;
     wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
-    wp::adj_length(var_69, adj_69, adj_70);
+    wp::adj_length(var_69, var_70, adj_69, adj_70);
     wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
     wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
     wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     			wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
     			if (var_51) {
     				wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
-    				wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
+    				wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
     				wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
     			}
     		}
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     		wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
     		if (var_45) {
     			wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
-    			wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
+    			wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
     			wp::adj_neg(var_21, adj_21, adj_46);
     		}
-    		wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
+    		wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
     		wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
     		wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
     		wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     		wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
     		if (var_34) {
     			wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
-    			wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
+    			wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
     			wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
     			wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
     			wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     	if (var_22) {
     		wp::adj_cast_float(var_6, adj_6, adj_27);
     		wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
-    		wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
+    		wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
     		wp::adj_neg(var_21, adj_21, adj_23);
     	}
     	wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
     if (var_15) {
     	wp::adj_cast_float(var_17, adj_17, adj_18);
-    	wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
+    	wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
     	wp::adj_cast_float(var_6, adj_6, adj_16);
     }
     if (var_13) {
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     	adj_14 += adj_ret;
     	wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
     }
-    wp::adj_length(var_9, adj_9, adj_10);
+    wp::adj_length(var_9, var_10, adj_9, adj_10);
     wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
     wp::adj_cast_float(var_6, adj_6, adj_8);
     wp::adj_cast_float(var_6, adj_6, adj_7);

warp/native/mat.h CHANGED Viewed

@@ -298,7 +298,19 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_max(mat_t<Rows,Cols,Type> * ad
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE vec_t<Cols,Type> index(const mat_t<Rows,Cols,Type>& m, int row)
+inline CUDA_CALLABLE void adj_atomic_minmax(
+    mat_t<Rows,Cols,Type> *addr,
+    mat_t<Rows,Cols,Type> *adj_addr,
+    const mat_t<Rows,Cols,Type> &value,
+    mat_t<Rows,Cols,Type> &adj_value)
+{
+    for (unsigned i=0; i < Rows; ++i)
+        for (unsigned j=0; j < Cols; ++j)
+            adj_atomic_minmax(&addr->data[i][j], &adj_addr->data[i][j], value.data[i][j], adj_value.data[i][j]);
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE vec_t<Cols,Type> extract(const mat_t<Rows,Cols,Type>& m, int row)
 {
     vec_t<Cols,Type> ret;
     for(unsigned i=0; i < Cols; ++i)
@@ -309,7 +321,7 @@ inline CUDA_CALLABLE vec_t<Cols,Type> index(const mat_t<Rows,Cols,Type>& m, int
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE Type index(const mat_t<Rows,Cols,Type>& m, int row, int col)
+inline CUDA_CALLABLE Type extract(const mat_t<Rows,Cols,Type>& m, int row, int col)
 {
 #ifndef NDEBUG
     if (row < 0 || row >= Rows)
@@ -327,7 +339,7 @@ inline CUDA_CALLABLE Type index(const mat_t<Rows,Cols,Type>& m, int row, int col
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols, Type> value)
+inline CUDA_CALLABLE vec_t<Cols, Type>* index(mat_t<Rows,Cols,Type>& m, int row)
 {
 #ifndef NDEBUG
     if (row < 0 || row >= Rows)
@@ -337,12 +349,11 @@ inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, vec_t<Cols
     }
 #endif
-    for(unsigned i=0; i < Cols; ++i)
-        m.data[row][i] = value[i];
+    return reinterpret_cast<vec_t<Cols, Type>*>(&m.data[row]);
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, int col, Type value)
+inline CUDA_CALLABLE Type* index(mat_t<Rows,Cols,Type>& m, int row, int col)
 {
 #ifndef NDEBUG
     if (row < 0 || row >= Rows)
@@ -356,18 +367,19 @@ inline CUDA_CALLABLE void indexset(mat_t<Rows,Cols,Type>& m, int row, int col, T
         assert(0);
     }
 #endif
-    m.data[row][col] = value;
+    return &m.data[row][col];
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_indexset(const mat_t<Rows,Cols,Type>& m, int row, const vec_t<Cols, Type>& value,
+inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row,
                                        const mat_t<Rows,Cols,Type>& adj_m, int adj_row, const vec_t<Cols, Type>& adj_value)
 {
     // nop
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_indexset(const mat_t<Rows,Cols,Type>& m, int row, int col, Type value,
+inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, int col,
                                        const mat_t<Rows,Cols,Type>& adj_m, int adj_row, int adj_col, Type adj_value)
 {
     // nop
@@ -425,7 +437,22 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, T
         }
     }
-    return t;
+    return t;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(Type b, const mat_t<Rows,Cols,Type>& a)
+{
+    mat_t<Rows,Cols,Type> t;
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            t.data[i][j] = b / a.data[i][j];
+        }
+    }
+    return t;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
@@ -440,7 +467,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, T
         }
     }
-    return t;
+    return t;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
@@ -473,6 +500,17 @@ inline CUDA_CALLABLE vec_t<Rows,Type> mul(const mat_t<Rows,Cols,Type>& a, const
     return r;
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a)
+{
+    vec_t<Cols,Type> r = a.get_row(0)*b[0];
+    for( unsigned i=1; i < Rows; ++i )
+    {
+        r += a.get_row(i)*b[i];
+    }
+    return r;
+}
 template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
 inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
 {
@@ -861,14 +899,14 @@ inline CUDA_CALLABLE vec_t<3,Type> transform_vector(const mat_t<4,4,Type>& m, co
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_index(const mat_t<Rows,Cols,Type>& m, int row, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, const vec_t<Cols,Type>& adj_ret)
+inline CUDA_CALLABLE void adj_extract(const mat_t<Rows,Cols,Type>& m, int row, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, const vec_t<Cols,Type>& adj_ret)
 {
     for( unsigned col=0; col < Cols; ++col )
         adj_m.data[row][col] += adj_ret[col];
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline void CUDA_CALLABLE adj_index(const mat_t<Rows,Cols,Type>& m, int row, int col, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type adj_ret)
+inline void CUDA_CALLABLE adj_extract(const mat_t<Rows,Cols,Type>& m, int row, int col, mat_t<Rows,Cols,Type>& adj_m, int& adj_row, int& adj_col, Type adj_ret)
 {
 #ifndef NDEBUG
     if (row < 0 || row > Rows)
@@ -932,6 +970,20 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
     }
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
+{
+    adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            adj_a.data[i][j] += s / adj_ret.data[i][j];
+        }
+    }
+}
 template<unsigned Rows, unsigned Cols, typename Type>
 inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, Type b, mat_t<Rows,Cols,Type>& adj_a, Type& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
 {
@@ -965,6 +1017,13 @@ inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const vec_t<Co
     adj_b += mul(transpose(a), adj_ret);
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a, vec_t<Rows,Type>& adj_b, mat_t<Rows,Cols,Type>& adj_a, const vec_t<Cols,Type>& adj_ret)
+{
+    adj_a += outer(b, adj_ret);
+    adj_b += mul(adj_ret, transpose(a));
+}
 template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
 inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Cols,ColsOut,Type>& adj_b, const mat_t<Rows,ColsOut,Type>& adj_ret)
 {
@@ -1105,10 +1164,10 @@ inline CUDA_CALLABLE void adj_determinant(const mat_t<4,4,Type>& m, mat_t<4,4,Ty
 }
 template<unsigned Rows, typename Type>
-inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
+inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& ret, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
 {
     // todo: how to cache this from the forward pass?
-    mat_t<Rows,Rows,Type> invt = transpose(inverse(m));
+    mat_t<Rows,Rows,Type> invt = transpose(ret);
     // see https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf 2.2.3
     adj_m -= mul(mul(invt, adj_ret), invt);
@@ -1150,10 +1209,10 @@ inline CUDA_CALLABLE void adj_cw_mul(const mat_t<Rows,Cols,Type>& a, const mat_t
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
+inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& ret, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
 {
   adj_a += cw_div(adj_ret, b);
-  adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
+  adj_b -= cw_mul(adj_ret, cw_div(ret, b));
 }
 // adjoint for the constant constructor:

warp/native/mesh.cpp CHANGED Viewed

@@ -103,7 +103,8 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
 {
     Mesh* m = new Mesh(points, velocities, indices, num_points, num_tris);
-    m->bounds = new bounds3[num_tris];
+    m->lowers = new vec3[num_tris];
+    m->uppers = new vec3[num_tris];
     float sum = 0.0;
     for (int i=0; i < num_tris; ++i)
@@ -111,15 +112,23 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
         wp::vec3& p0 = points[indices[i*3+0]];
         wp::vec3& p1 = points[indices[i*3+1]];
         wp::vec3& p2 = points[indices[i*3+2]];
-        m->bounds[i].add_point(p0);
-        m->bounds[i].add_point(p1);
-        m->bounds[i].add_point(p2);
+        // compute triangle bounds
+        bounds3 b;
+        b.add_point(p0);
+        b.add_point(p1);
+        b.add_point(p2);
+        m->lowers[i] = b.lower;
+        m->uppers[i] = b.upper;
+        // compute edge lengths
         sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
     }
     m->average_edge_length = sum / (num_tris*3);
-    m->bvh = bvh_create(m->bounds, num_tris);
+    m->bvh = *(wp::BVH*)bvh_create_host(m->lowers, m->uppers, num_tris);
     if (support_winding_number)
     {
         // Let's first compute the sold
@@ -131,86 +140,14 @@ uint64_t mesh_create_host(array_t<wp::vec3> points, array_t<wp::vec3> velocities
     return (uint64_t)m;
 }
-uint64_t mesh_create_device(void* context, array_t<wp::vec3> points, array_t<wp::vec3> velocities, array_t<int> indices, int num_points, int num_tris, int support_winding_number)
-{
-    ContextGuard guard(context);
-    Mesh mesh(points, velocities, indices, num_points, num_tris);
-    mesh.context = context ? context : cuda_context_get_current();
-    // mesh.points = array_t<vec3>(points, num_points, points_grad);
-    // mesh.velocities = array_t<vec3>(velocities, num_points, velocities_grad);
-    // mesh.indices = array_t<int>(indices, num_tris, 3);
-    // mesh.num_points = num_points;
-    // mesh.num_tris = num_tris;
-    {
-        // todo: BVH creation only on CPU at the moment so temporarily bring all the data back to host
-        vec3* points_host = (vec3*)alloc_host(sizeof(vec3)*num_points);
-        int* indices_host = (int*)alloc_host(sizeof(int)*num_tris*3);
-        bounds3* bounds_host = (bounds3*)alloc_host(sizeof(bounds3)*num_tris);
-        memcpy_d2h(WP_CURRENT_CONTEXT, points_host, points, sizeof(vec3)*num_points);
-        memcpy_d2h(WP_CURRENT_CONTEXT, indices_host, indices, sizeof(int)*num_tris*3);
-        cuda_context_synchronize(WP_CURRENT_CONTEXT);
-        float sum = 0.0;
-        for (int i=0; i < num_tris; ++i)
-        {
-            bounds_host[i] = bounds3();
-            wp::vec3 p0 = points_host[indices_host[i*3+0]];
-            wp::vec3 p1 = points_host[indices_host[i*3+1]];
-            wp::vec3 p2 = points_host[indices_host[i*3+2]];
-            bounds_host[i].add_point(p0);
-            bounds_host[i].add_point(p1);
-            bounds_host[i].add_point(p2);
-            sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
-        }
-        mesh.average_edge_length = sum / (num_tris*3);
-        BVH bvh_host = bvh_create(bounds_host, num_tris);
-        BVH bvh_device = bvh_clone(WP_CURRENT_CONTEXT, bvh_host);
-        bvh_destroy_host(bvh_host);
-        // save gpu-side copy of bounds
-        mesh.bounds = (bounds3*)alloc_device(WP_CURRENT_CONTEXT, sizeof(bounds3)*num_tris);
-        memcpy_h2d(WP_CURRENT_CONTEXT, mesh.bounds, bounds_host, sizeof(bounds3)*num_tris);
-        free_host(points_host);
-        free_host(indices_host);
-        free_host(bounds_host);
-        mesh.bvh = bvh_device;
-        if (support_winding_number)
-        {
-            int num_bvh_nodes = 2*num_tris-1;
-            mesh.solid_angle_props = (SolidAngleProps*)alloc_device(WP_CURRENT_CONTEXT, sizeof(SolidAngleProps)*num_bvh_nodes);
-        }
-    }
-    Mesh* mesh_device = (Mesh*)alloc_device(WP_CURRENT_CONTEXT, sizeof(Mesh));
-    memcpy_h2d(WP_CURRENT_CONTEXT, mesh_device, &mesh, sizeof(Mesh));
-    // save descriptor
-    uint64_t mesh_id = (uint64_t)mesh_device;
-    mesh_add_descriptor(mesh_id, mesh);
-    if (support_winding_number)
-    {
-        mesh_refit_device(mesh_id);
-    }
-    return mesh_id;
-}
 void mesh_destroy_host(uint64_t id)
 {
     Mesh* m = (Mesh*)(id);
-    delete[] m->bounds;
+    delete[] m->lowers;
+    delete[] m->uppers;
     if (m->solid_angle_props) {
         delete [] m->solid_angle_props;
     }
@@ -219,25 +156,6 @@ void mesh_destroy_host(uint64_t id)
     delete m;
 }
-void mesh_destroy_device(uint64_t id)
-{
-    Mesh mesh;
-    if (mesh_get_descriptor(id, mesh))
-    {
-        ContextGuard guard(mesh.context);
-        bvh_destroy_device(mesh.bvh);
-        free_device(WP_CURRENT_CONTEXT, mesh.bounds);
-        free_device(WP_CURRENT_CONTEXT, (Mesh*)id);
-        if (mesh.solid_angle_props) {
-            free_device(WP_CURRENT_CONTEXT, mesh.solid_angle_props);
-        }
-        mesh_rem_descriptor(id);
-    }
-}
 void mesh_refit_host(uint64_t id)
 {
     Mesh* m = (Mesh*)(id);
@@ -245,13 +163,19 @@ void mesh_refit_host(uint64_t id)
     float sum = 0.0;
     for (int i=0; i < m->num_tris; ++i)
     {
-        m->bounds[i] = bounds3();
         wp::vec3 p0 = m->points.data[m->indices.data[i*3+0]];
         wp::vec3 p1 = m->points.data[m->indices.data[i*3+1]];
         wp::vec3 p2 = m->points.data[m->indices.data[i*3+2]];
-        m->bounds[i].add_point(p0);
-        m->bounds[i].add_point(p1);
-        m->bounds[i].add_point(p2);
+        // compute triangle bounds
+        bounds3 b;
+        b.add_point(p0);
+        b.add_point(p1);
+        b.add_point(p2);
+        m->lowers[i] = b.lower;
+        m->uppers[i] = b.upper;
         sum += length(p0-p1) + length(p0-p2) + length(p2-p1);
     }
     m->average_edge_length = sum / (m->num_tris*3);
@@ -263,7 +187,7 @@ void mesh_refit_host(uint64_t id)
     }
     else
     {
-        bvh_refit_host(m->bvh, m->bounds);
+        bvh_refit_host(m->bvh);
     }
 }
@@ -271,9 +195,10 @@ void mesh_refit_host(uint64_t id)
 // stubs for non-CUDA platforms
 #if !WP_ENABLE_CUDA
-void mesh_refit_device(uint64_t id)
-{
-}
+WP_API uint64_t mesh_create_device(void* context, wp::array_t<wp::vec3> points, wp::array_t<wp::vec3> velocities, wp::array_t<int> tris, int num_points, int num_tris, int support_winding_number) { return 0; }
+WP_API void mesh_destroy_device(uint64_t id) {}
+WP_API void mesh_refit_device(uint64_t id) {}
 #endif // !WP_ENABLE_CUDA