PyPI - warp-lang - Versions diffs - 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl - Mend

warp-lang 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (187) hide show

docs/conf.py +3 -4
examples/env/env_ant.py +1 -1
examples/env/env_cartpole.py +1 -1
examples/env/env_humanoid.py +1 -1
examples/example_dem.py +28 -26
examples/example_diffray.py +37 -30
examples/example_fluid.py +7 -3
examples/example_jacobian_ik.py +1 -1
examples/example_mesh_intersect.py +10 -7
examples/example_nvdb.py +3 -3
examples/example_render_opengl.py +19 -10
examples/example_sim_cartpole.py +9 -5
examples/example_sim_cloth.py +29 -25
examples/example_sim_fk_grad.py +2 -2
examples/example_sim_fk_grad_torch.py +3 -3
examples/example_sim_grad_bounce.py +11 -8
examples/example_sim_grad_cloth.py +12 -9
examples/example_sim_granular.py +2 -2
examples/example_sim_granular_collision_sdf.py +13 -13
examples/example_sim_neo_hookean.py +3 -3
examples/example_sim_particle_chain.py +2 -2
examples/example_sim_quadruped.py +8 -5
examples/example_sim_rigid_chain.py +8 -5
examples/example_sim_rigid_contact.py +13 -10
examples/example_sim_rigid_fem.py +2 -2
examples/example_sim_rigid_gyroscopic.py +2 -2
examples/example_sim_rigid_kinematics.py +1 -1
examples/example_sim_trajopt.py +3 -2
examples/fem/example_apic_fluid.py +5 -7
examples/fem/example_diffusion_mgpu.py +18 -16
warp/__init__.py +3 -2
warp/bin/warp.so +0 -0
warp/build_dll.py +29 -9
warp/builtins.py +206 -7
warp/codegen.py +58 -38
warp/config.py +3 -1
warp/context.py +234 -128
warp/fem/__init__.py +2 -2
warp/fem/cache.py +2 -1
warp/fem/field/nodal_field.py +18 -17
warp/fem/geometry/hexmesh.py +11 -6
warp/fem/geometry/quadmesh_2d.py +16 -12
warp/fem/geometry/tetmesh.py +19 -8
warp/fem/geometry/trimesh_2d.py +18 -7
warp/fem/integrate.py +341 -196
warp/fem/quadrature/__init__.py +1 -1
warp/fem/quadrature/pic_quadrature.py +138 -53
warp/fem/quadrature/quadrature.py +81 -9
warp/fem/space/__init__.py +1 -1
warp/fem/space/basis_space.py +169 -51
warp/fem/space/grid_2d_function_space.py +2 -2
warp/fem/space/grid_3d_function_space.py +2 -2
warp/fem/space/hexmesh_function_space.py +2 -2
warp/fem/space/partition.py +9 -6
warp/fem/space/quadmesh_2d_function_space.py +2 -2
warp/fem/space/shape/cube_shape_function.py +27 -15
warp/fem/space/shape/square_shape_function.py +29 -18
warp/fem/space/tetmesh_function_space.py +2 -2
warp/fem/space/topology.py +10 -0
warp/fem/space/trimesh_2d_function_space.py +2 -2
warp/fem/utils.py +10 -5
warp/native/array.h +49 -8
warp/native/builtin.h +31 -14
warp/native/cuda_util.cpp +8 -3
warp/native/cuda_util.h +1 -0
warp/native/exports.h +1177 -1108
warp/native/intersect.h +4 -4
warp/native/intersect_adj.h +8 -8
warp/native/mat.h +65 -6
warp/native/mesh.h +126 -5
warp/native/quat.h +28 -4
warp/native/vec.h +76 -14
warp/native/warp.cu +1 -6
warp/render/render_opengl.py +261 -109
warp/sim/import_mjcf.py +13 -7
warp/sim/import_urdf.py +14 -14
warp/sim/inertia.py +17 -18
warp/sim/model.py +67 -67
warp/sim/render.py +1 -1
warp/sparse.py +6 -6
warp/stubs.py +19 -81
warp/tape.py +1 -1
warp/tests/__main__.py +3 -6
warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
warp/tests/aux_test_unresolved_func.py +14 -0
warp/tests/aux_test_unresolved_symbol.py +14 -0
warp/tests/{test_kinematics.py → disabled_kinematics.py} +10 -12
warp/tests/run_coverage_serial.py +31 -0
warp/tests/test_adam.py +102 -106
warp/tests/test_arithmetic.py +39 -40
warp/tests/test_array.py +46 -48
warp/tests/test_array_reduce.py +25 -19
warp/tests/test_atomic.py +62 -26
warp/tests/test_bool.py +16 -11
warp/tests/test_builtins_resolution.py +1292 -0
warp/tests/test_bvh.py +9 -12
warp/tests/test_closest_point_edge_edge.py +53 -57
warp/tests/test_codegen.py +164 -134
warp/tests/test_compile_consts.py +13 -19
warp/tests/test_conditional.py +30 -32
warp/tests/test_copy.py +9 -12
warp/tests/test_ctypes.py +90 -98
warp/tests/test_dense.py +20 -14
warp/tests/test_devices.py +34 -35
warp/tests/test_dlpack.py +74 -75
warp/tests/test_examples.py +215 -97
warp/tests/test_fabricarray.py +15 -21
warp/tests/test_fast_math.py +14 -11
warp/tests/test_fem.py +280 -97
warp/tests/test_fp16.py +19 -15
warp/tests/test_func.py +177 -194
warp/tests/test_generics.py +71 -77
warp/tests/test_grad.py +83 -32
warp/tests/test_grad_customs.py +7 -9
warp/tests/test_hash_grid.py +6 -10
warp/tests/test_import.py +9 -23
warp/tests/test_indexedarray.py +19 -21
warp/tests/test_intersect.py +15 -9
warp/tests/test_large.py +17 -19
warp/tests/test_launch.py +14 -17
warp/tests/test_lerp.py +63 -63
warp/tests/test_lvalue.py +84 -35
warp/tests/test_marching_cubes.py +9 -13
warp/tests/test_mat.py +388 -3004
warp/tests/test_mat_lite.py +9 -12
warp/tests/test_mat_scalar_ops.py +2889 -0
warp/tests/test_math.py +10 -11
warp/tests/test_matmul.py +104 -100
warp/tests/test_matmul_lite.py +72 -98
warp/tests/test_mesh.py +35 -32
warp/tests/test_mesh_query_aabb.py +18 -25
warp/tests/test_mesh_query_point.py +39 -23
warp/tests/test_mesh_query_ray.py +9 -21
warp/tests/test_mlp.py +8 -9
warp/tests/test_model.py +89 -93
warp/tests/test_modules_lite.py +15 -25
warp/tests/test_multigpu.py +87 -114
warp/tests/test_noise.py +10 -12
warp/tests/test_operators.py +14 -21
warp/tests/test_options.py +10 -11
warp/tests/test_pinned.py +16 -18
warp/tests/test_print.py +16 -20
warp/tests/test_quat.py +121 -88
warp/tests/test_rand.py +12 -13
warp/tests/test_reload.py +27 -32
warp/tests/test_rounding.py +7 -10
warp/tests/test_runlength_encode.py +105 -106
warp/tests/test_smoothstep.py +8 -9
warp/tests/test_snippet.py +13 -22
warp/tests/test_sparse.py +30 -29
warp/tests/test_spatial.py +179 -174
warp/tests/test_streams.py +100 -107
warp/tests/test_struct.py +98 -67
warp/tests/test_tape.py +11 -17
warp/tests/test_torch.py +89 -86
warp/tests/test_transient_module.py +9 -12
warp/tests/test_types.py +328 -50
warp/tests/test_utils.py +217 -218
warp/tests/test_vec.py +133 -2133
warp/tests/test_vec_lite.py +8 -11
warp/tests/test_vec_scalar_ops.py +2099 -0
warp/tests/test_volume.py +391 -382
warp/tests/test_volume_write.py +122 -135
warp/tests/unittest_serial.py +35 -0
warp/tests/unittest_suites.py +291 -0
warp/tests/{test_base.py → unittest_utils.py} +138 -25
warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
warp/tests/{test_debug.py → walkthough_debug.py} +2 -15
warp/thirdparty/unittest_parallel.py +257 -54
warp/types.py +119 -98
warp/utils.py +14 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/METADATA +2 -1
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/RECORD +182 -178
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
warp/tests/test_all.py +0 -239
warp/tests/test_conditional_unequal_types_kernels.py +0 -14
warp/tests/test_coverage.py +0 -38
warp/tests/test_unresolved_func.py +0 -7
warp/tests/test_unresolved_symbol.py +0 -7
/warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
/warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
/warp/tests/{test_square.py → aux_test_square.py} +0 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
{warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0

warp/native/intersect.h CHANGED Viewed

@@ -869,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
     wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
     wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
-    wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
+    wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
     wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
     wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
     wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
@@ -881,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     }
     wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
     wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
-    wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
+    wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
     wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
     wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
     wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
@@ -896,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     	wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
     	wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
     }
-    wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
+    wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
     wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
     wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
     wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
@@ -917,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
     	wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
     	wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
     }
-    wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
+    wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
     wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
     wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
     wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);

warp/native/intersect_adj.h CHANGED Viewed

@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     label1:;
     adj_71 += adj_ret;
     wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
-    wp::adj_length(var_69, adj_69, adj_70);
+    wp::adj_length(var_69, var_70, adj_69, adj_70);
     wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
     wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
     wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     			wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
     			if (var_51) {
     				wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
-    				wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
+    				wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
     				wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
     			}
     		}
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     		wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
     		if (var_45) {
     			wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
-    			wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
+    			wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
     			wp::adj_neg(var_21, adj_21, adj_46);
     		}
-    		wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
+    		wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
     		wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
     		wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
     		wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     		wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
     		if (var_34) {
     			wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
-    			wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
+    			wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
     			wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
     			wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
     			wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     	if (var_22) {
     		wp::adj_cast_float(var_6, adj_6, adj_27);
     		wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
-    		wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
+    		wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
     		wp::adj_neg(var_21, adj_21, adj_23);
     	}
     	wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
     if (var_15) {
     	wp::adj_cast_float(var_17, adj_17, adj_18);
-    	wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
+    	wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
     	wp::adj_cast_float(var_6, adj_6, adj_16);
     }
     if (var_13) {
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
     	adj_14 += adj_ret;
     	wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
     }
-    wp::adj_length(var_9, adj_9, adj_10);
+    wp::adj_length(var_9, var_10, adj_9, adj_10);
     wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
     wp::adj_cast_float(var_6, adj_6, adj_8);
     wp::adj_cast_float(var_6, adj_6, adj_7);

warp/native/mat.h CHANGED Viewed

@@ -297,6 +297,18 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_max(mat_t<Rows,Cols,Type> * ad
     return m;
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_atomic_minmax(
+    mat_t<Rows,Cols,Type> *addr,
+    mat_t<Rows,Cols,Type> *adj_addr,
+    const mat_t<Rows,Cols,Type> &value,
+    mat_t<Rows,Cols,Type> &adj_value)
+{
+    for (unsigned i=0; i < Rows; ++i)
+        for (unsigned j=0; j < Cols; ++j)
+            adj_atomic_minmax(&addr->data[i][j], &adj_addr->data[i][j], value.data[i][j], adj_value.data[i][j]);
+}
 template<unsigned Rows, unsigned Cols, typename Type>
 inline CUDA_CALLABLE vec_t<Cols,Type> extract(const mat_t<Rows,Cols,Type>& m, int row)
 {
@@ -425,7 +437,22 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, T
         }
     }
-    return t;
+    return t;
+}
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(Type b, const mat_t<Rows,Cols,Type>& a)
+{
+    mat_t<Rows,Cols,Type> t;
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            t.data[i][j] = b / a.data[i][j];
+        }
+    }
+    return t;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
@@ -440,7 +467,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, T
         }
     }
-    return t;
+    return t;
 }
 template<unsigned Rows, unsigned Cols, typename Type>
@@ -473,6 +500,17 @@ inline CUDA_CALLABLE vec_t<Rows,Type> mul(const mat_t<Rows,Cols,Type>& a, const
     return r;
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a)
+{
+    vec_t<Cols,Type> r = a.get_row(0)*b[0];
+    for( unsigned i=1; i < Rows; ++i )
+    {
+        r += a.get_row(i)*b[i];
+    }
+    return r;
+}
 template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
 inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
 {
@@ -932,6 +970,20 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
     }
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
+{
+    adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
+    for (unsigned i=0; i < Rows; ++i)
+    {
+        for (unsigned j=0; j < Cols; ++j)
+        {
+            adj_a.data[i][j] += s / adj_ret.data[i][j];
+        }
+    }
+}
 template<unsigned Rows, unsigned Cols, typename Type>
 inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, Type b, mat_t<Rows,Cols,Type>& adj_a, Type& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
 {
@@ -965,6 +1017,13 @@ inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const vec_t<Co
     adj_b += mul(transpose(a), adj_ret);
 }
+template<unsigned Rows, unsigned Cols, typename Type>
+inline CUDA_CALLABLE void adj_mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a, vec_t<Rows,Type>& adj_b, mat_t<Rows,Cols,Type>& adj_a, const vec_t<Cols,Type>& adj_ret)
+{
+    adj_a += outer(b, adj_ret);
+    adj_b += mul(adj_ret, transpose(a));
+}
 template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
 inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Cols,ColsOut,Type>& adj_b, const mat_t<Rows,ColsOut,Type>& adj_ret)
 {
@@ -1105,10 +1164,10 @@ inline CUDA_CALLABLE void adj_determinant(const mat_t<4,4,Type>& m, mat_t<4,4,Ty
 }
 template<unsigned Rows, typename Type>
-inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
+inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& ret, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
 {
     // todo: how to cache this from the forward pass?
-    mat_t<Rows,Rows,Type> invt = transpose(inverse(m));
+    mat_t<Rows,Rows,Type> invt = transpose(ret);
     // see https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf 2.2.3
     adj_m -= mul(mul(invt, adj_ret), invt);
@@ -1150,10 +1209,10 @@ inline CUDA_CALLABLE void adj_cw_mul(const mat_t<Rows,Cols,Type>& a, const mat_t
 }
 template<unsigned Rows, unsigned Cols, typename Type>
-inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
+inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& ret, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
 {
   adj_a += cw_div(adj_ret, b);
-  adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
+  adj_b -= cw_mul(adj_ret, cw_div(ret, b));
 }
 // adjoint for the constant constructor:

warp/native/mesh.h CHANGED Viewed

@@ -1181,7 +1181,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
     }
 }
-CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, int& face, float& u, float& v,
+CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, const int& face, const float& u, const float& v,
                                                uint64_t adj_id, vec3& adj_point, float& adj_max_dist, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
 {
     Mesh mesh = mesh_get(id);
@@ -1202,7 +1202,7 @@ CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3&
     adj_closest_point_to_triangle(p, q, r, point, adj_p, adj_q, adj_r, adj_point, adj_uv);
 }
-CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, int& face, float& u, float& v,
+CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, const int& face, const float& u, const float& v,
                                                uint64_t adj_id, vec3& adj_point, float& adj_min_dist, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
 {
     Mesh mesh = mesh_get(id);
@@ -1223,24 +1223,116 @@ CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, con
     adj_closest_point_to_triangle(p, q, r, point, adj_p, adj_q, adj_r, adj_point, adj_uv);	 // Todo for Miles :>
 }
-CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v,
+CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v,
                                                uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
 {
     adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
 }
-CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v, const float epsilon,
+CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v, const float epsilon,
                                                uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, float& adj_epsilon, bool& adj_ret)
 {
     adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
 }
-CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v, const float accuracy, const float winding_number_threshold,
+CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v, const float accuracy, const float winding_number_threshold,
                                                uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, float& adj_accuracy, float& adj_winding_number_threshold, bool& adj_ret)
 {
     adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
 }
+// Stores the result of querying the closest point on a mesh.
+struct mesh_query_point_t
+{
+    CUDA_CALLABLE mesh_query_point_t()
+    {
+    }
+    CUDA_CALLABLE mesh_query_point_t(int)
+    {
+        // For backward pass.
+    }
+    bool result;
+    float sign;
+    int face;
+    float u;
+    float v;
+};
+CUDA_CALLABLE inline mesh_query_point_t mesh_query_point(uint64_t id, const vec3& point, float max_dist)
+{
+    mesh_query_point_t query;
+    query.result = mesh_query_point(id, point, max_dist, query.sign, query.face, query.u, query.v);
+    return query;
+}
+CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist)
+{
+    mesh_query_point_t query;
+    query.sign = 0.0;
+    query.result = mesh_query_point_no_sign(id, point, max_dist, query.face, query.u, query.v);
+    return query;
+}
+CUDA_CALLABLE inline mesh_query_point_t mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist)
+{
+    mesh_query_point_t query;
+    query.sign = 0.0;
+    query.result = mesh_query_furthest_point_no_sign(id, point, min_dist, query.face, query.u, query.v);
+    return query;
+}
+CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, const float epsilon = 1e-3f)
+{
+    mesh_query_point_t query;
+    query.result = mesh_query_point_sign_normal(id, point, max_dist, query.sign, query.face, query.u, query.v, epsilon);
+    return query;
+}
+CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold)
+{
+    mesh_query_point_t query;
+    query.result = mesh_query_point_sign_winding_number(id, point, max_dist, query.sign, query.face, query.u, query.v, accuracy, winding_number_threshold);
+    return query;
+}
+CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, const mesh_query_point_t& ret,
+                                               uint64_t adj_id, vec3& adj_point, float& adj_max_dist, mesh_query_point_t& adj_ret)
+{
+    adj_mesh_query_point(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v,
+                         adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
+}
+CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, const mesh_query_point_t& ret,
+                                                       uint64_t adj_id, vec3& adj_point, float& adj_max_dist, mesh_query_point_t& adj_ret)
+{
+    adj_mesh_query_point_no_sign(id, point, max_dist, ret.face, ret.u, ret.v,
+                                adj_id, adj_point, adj_max_dist, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
+}
+CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, const mesh_query_point_t& ret,
+                                                                uint64_t adj_id, vec3& adj_point, float& adj_min_dist, mesh_query_point_t& adj_ret)
+{
+    adj_mesh_query_furthest_point_no_sign(id, point, min_dist, ret.face, ret.u, ret.v,
+                                          adj_id, adj_point, adj_min_dist, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
+}
+CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, float epsilon, const mesh_query_point_t& ret,
+                                                           uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_epsilon, mesh_query_point_t& adj_ret)
+{
+    adj_mesh_query_point_sign_normal(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, epsilon,
+                                     adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, epsilon, adj_ret.result);
+}
+CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold, const mesh_query_point_t& ret,
+                                                                   uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_accuracy, float& adj_winding_number_threshold, mesh_query_point_t& adj_ret)
+{
+    adj_mesh_query_point_sign_winding_number(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, accuracy, winding_number_threshold,
+                                             adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_accuracy, adj_winding_number_threshold, adj_ret.result);
+}
 CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const vec3& dir, float max_t, float& t, float& u, float& v, float& sign, vec3& normal, int& face)
 {
     Mesh mesh = mesh_get(id);
@@ -1353,6 +1445,35 @@ CUDA_CALLABLE inline void adj_mesh_query_ray(
 }
+// Stores the result of querying the closest point on a mesh.
+struct mesh_query_ray_t
+{
+    CUDA_CALLABLE mesh_query_ray_t()
+    {
+    }
+    CUDA_CALLABLE mesh_query_ray_t(int)
+    {
+        // For backward pass.
+    }
+    bool result;
+    float sign;
+    int face;
+    float t;
+    float u;
+    float v;
+    vec3 normal;
+};
+CUDA_CALLABLE inline mesh_query_ray_t mesh_query_ray(uint64_t id, const vec3& start, const vec3& dir, float max_t)
+{
+    mesh_query_ray_t query;
+    query.result = mesh_query_ray(id, start, dir, max_t, query.t, query.u, query.v, query.sign, query.normal, query.face);
+    return query;
+}
 // determine if a point is inside (ret < 0 ) or outside the mesh (ret > 0)
 CUDA_CALLABLE inline float mesh_query_inside(uint64_t id, const vec3& p)
 {

warp/native/quat.h CHANGED Viewed

@@ -225,12 +225,24 @@ inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
     return quat_t<Type>(q.x/s, q.y/s, q.z/s, q.w/s);
 }
+template<typename Type>
+inline CUDA_CALLABLE quat_t<Type> div(Type s, quat_t<Type> q)
+{
+    return quat_t<Type>(s/q.x, s/q.y, s/q.z, s/q.w);
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> operator / (quat_t<Type> a, Type s)
 {
     return div(a,s);
 }
+template<typename Type>
+inline CUDA_CALLABLE quat_t<Type> operator / (Type s, quat_t<Type> a)
+{
+    return div(s,a);
+}
 template<typename Type>
 inline CUDA_CALLABLE quat_t<Type> operator*(Type s, const quat_t<Type>& a)
 {
@@ -523,9 +535,14 @@ inline CUDA_CALLABLE void tensordot(const quat_t<Type>& a, const quat_t<Type>& b
 }
 template<typename Type>
-inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, quat_t<Type>& adj_a, const Type adj_ret)
+inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, Type ret, quat_t<Type>& adj_a, const Type adj_ret)
 {
-    adj_a += normalize(a)*adj_ret;
+    if (ret > Type(kEps))
+    {
+        Type inv_l = Type(1)/ret;
+        adj_a += quat_t<Type>(a.x*inv_l, a.y*inv_l, a.z*inv_l, a.w*inv_l) * adj_ret;
+    }
 }
 template<typename Type>
@@ -608,6 +625,13 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
     adj_a += adj_ret / s;
 }
+template<typename Type>
+inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
+{
+    adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
+    adj_a += s / adj_ret;
+}
 template<typename Type>
 inline CUDA_CALLABLE void adj_quat_rotate(const quat_t<Type>& q, const vec_t<3,Type>& p, quat_t<Type>& adj_q, vec_t<3,Type>& adj_p, const vec_t<3,Type>& adj_ret)
 {
@@ -677,7 +701,7 @@ inline CUDA_CALLABLE void adj_quat_rotate_inv(const quat_t<Type>& q, const vec_t
 }
 template<typename Type>
-inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
+inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& ret, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
 {
     vec_t<3,Type> axis;
     Type angle;
@@ -688,7 +712,7 @@ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Ty
     angle = angle * 0.5;
     // adj_t
-    adj_t += dot(mul(quat_slerp(q0, q1, t), quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
+    adj_t += dot(mul(ret, quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
     // adj_q0
     quat_t<Type> q_inc_x_q0;

warp/native/vec.h CHANGED Viewed

@@ -284,12 +284,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
     return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
+{
+    vec_t<Length, Type> ret;
+    for (unsigned i=0; i < Length; ++i)
+    {
+        ret[i] = s / a[i];
+    }
+    return ret;
+}
+template<typename Type>
+inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
+{
+    return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
+}
+template<typename Type>
+inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
+{
+    return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
+}
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
 {
     return div(a,s);
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
+{
+    return div(s, a);
+}
 // component wise division
 template<unsigned Length, typename Type>
 inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
@@ -735,9 +764,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
 }
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
+inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
+{
+    adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
+    for( unsigned i=0; i < Length; ++i )
+    {
+        adj_a[i] += s / adj_ret[i];
+    }
+#if FP_CHECK
+    if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
+    {
+        // \TODO: How shall we implement this error message?
+        // printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
+        assert(0);
+    }
+#endif
+}
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
   adj_a += cw_div(adj_ret, b);
-  adj_b -= cw_mul(adj_ret, cw_div(cw_div(a, b), b));
+  adj_b -= cw_mul(adj_ret, cw_div(ret, b));
 }
 template<unsigned Length, typename Type>
@@ -850,9 +900,12 @@ inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, ve
 }
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
+inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
 {
-    adj_a += normalize(a)*adj_ret;
+    if (ret > Type(kEps))
+    {
+        adj_a += div(a, ret) * adj_ret;
+    }
 #if FP_CHECK
     if (!isfinite(adj_a))
@@ -880,7 +933,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
 }
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
+inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
 {
     Type d = length(a);
@@ -888,9 +941,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
     {
         Type invd = Type(1.0f)/d;
-        vec_t<Length, Type> ahat = normalize(a);
-        adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
+        adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
 #if FP_CHECK
         if (!isfinite(adj_a))
@@ -951,8 +1002,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
 // Do I need to specialize these for different lengths?
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
+inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
+{
     vec_t<Length, Type> ret;
     for( unsigned i=0; i < Length; ++i )
     {
@@ -963,8 +1014,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
 }
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
+inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
+{
     vec_t<Length, Type> ret;
     for( unsigned i=0; i < Length; ++i )
     {
@@ -975,8 +1026,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
 }
 template<unsigned Length, typename Type>
-inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value) {
+inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
+{
     vec_t<Length, Type> ret;
     for( unsigned i=0; i < Length; ++i )
     {
@@ -986,6 +1037,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
     return ret;
 }
+template<unsigned Length, typename Type>
+inline CUDA_CALLABLE void adj_atomic_minmax(
+    vec_t<Length,Type> *addr,
+    vec_t<Length,Type> *adj_addr,
+    const vec_t<Length,Type> &value,
+    vec_t<Length,Type> &adj_value)
+{
+    for (unsigned i=0; i < Length; ++i)
+        adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
+}
 // ok, the original implementation of this didn't take the absolute values.
 // I wouldn't consider this expected behavior. It looks like it's only
 // being used for bounding boxes at the moment, where this doesn't matter,

warp/native/warp.cu CHANGED Viewed

@@ -1143,12 +1143,7 @@ int cuda_toolkit_version()
 bool cuda_driver_is_initialized()
 {
-    CUcontext ctx;
-    // result can be: CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED
-    CUresult result = cuCtxGetCurrent_f(&ctx);
-    return result == CUDA_SUCCESS;
+    return is_cuda_driver_initialized();
 }
 int nvrtc_supported_arch_count()