warp-lang 1.0.0b5__py3-none-manylinux2014_x86_64.whl → 1.0.0b6__py3-none-manylinux2014_x86_64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/conf.py +3 -4
- examples/env/env_ant.py +1 -1
- examples/env/env_cartpole.py +1 -1
- examples/env/env_humanoid.py +1 -1
- examples/example_dem.py +28 -26
- examples/example_diffray.py +37 -30
- examples/example_fluid.py +7 -3
- examples/example_jacobian_ik.py +1 -1
- examples/example_mesh_intersect.py +10 -7
- examples/example_nvdb.py +3 -3
- examples/example_render_opengl.py +19 -10
- examples/example_sim_cartpole.py +9 -5
- examples/example_sim_cloth.py +29 -25
- examples/example_sim_fk_grad.py +2 -2
- examples/example_sim_fk_grad_torch.py +3 -3
- examples/example_sim_grad_bounce.py +11 -8
- examples/example_sim_grad_cloth.py +12 -9
- examples/example_sim_granular.py +2 -2
- examples/example_sim_granular_collision_sdf.py +13 -13
- examples/example_sim_neo_hookean.py +3 -3
- examples/example_sim_particle_chain.py +2 -2
- examples/example_sim_quadruped.py +8 -5
- examples/example_sim_rigid_chain.py +8 -5
- examples/example_sim_rigid_contact.py +13 -10
- examples/example_sim_rigid_fem.py +2 -2
- examples/example_sim_rigid_gyroscopic.py +2 -2
- examples/example_sim_rigid_kinematics.py +1 -1
- examples/example_sim_trajopt.py +3 -2
- examples/fem/example_apic_fluid.py +5 -7
- examples/fem/example_diffusion_mgpu.py +18 -16
- warp/__init__.py +3 -2
- warp/bin/warp.so +0 -0
- warp/build_dll.py +29 -9
- warp/builtins.py +206 -7
- warp/codegen.py +58 -38
- warp/config.py +3 -1
- warp/context.py +234 -128
- warp/fem/__init__.py +2 -2
- warp/fem/cache.py +2 -1
- warp/fem/field/nodal_field.py +18 -17
- warp/fem/geometry/hexmesh.py +11 -6
- warp/fem/geometry/quadmesh_2d.py +16 -12
- warp/fem/geometry/tetmesh.py +19 -8
- warp/fem/geometry/trimesh_2d.py +18 -7
- warp/fem/integrate.py +341 -196
- warp/fem/quadrature/__init__.py +1 -1
- warp/fem/quadrature/pic_quadrature.py +138 -53
- warp/fem/quadrature/quadrature.py +81 -9
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_space.py +169 -51
- warp/fem/space/grid_2d_function_space.py +2 -2
- warp/fem/space/grid_3d_function_space.py +2 -2
- warp/fem/space/hexmesh_function_space.py +2 -2
- warp/fem/space/partition.py +9 -6
- warp/fem/space/quadmesh_2d_function_space.py +2 -2
- warp/fem/space/shape/cube_shape_function.py +27 -15
- warp/fem/space/shape/square_shape_function.py +29 -18
- warp/fem/space/tetmesh_function_space.py +2 -2
- warp/fem/space/topology.py +10 -0
- warp/fem/space/trimesh_2d_function_space.py +2 -2
- warp/fem/utils.py +10 -5
- warp/native/array.h +49 -8
- warp/native/builtin.h +31 -14
- warp/native/cuda_util.cpp +8 -3
- warp/native/cuda_util.h +1 -0
- warp/native/exports.h +1177 -1108
- warp/native/intersect.h +4 -4
- warp/native/intersect_adj.h +8 -8
- warp/native/mat.h +65 -6
- warp/native/mesh.h +126 -5
- warp/native/quat.h +28 -4
- warp/native/vec.h +76 -14
- warp/native/warp.cu +1 -6
- warp/render/render_opengl.py +261 -109
- warp/sim/import_mjcf.py +13 -7
- warp/sim/import_urdf.py +14 -14
- warp/sim/inertia.py +17 -18
- warp/sim/model.py +67 -67
- warp/sim/render.py +1 -1
- warp/sparse.py +6 -6
- warp/stubs.py +19 -81
- warp/tape.py +1 -1
- warp/tests/__main__.py +3 -6
- warp/tests/{test_class_kernel.py → aux_test_class_kernel.py} +9 -1
- warp/tests/aux_test_conditional_unequal_types_kernels.py +21 -0
- warp/tests/{test_dependent.py → aux_test_dependent.py} +2 -2
- warp/tests/{test_reference.py → aux_test_reference.py} +1 -1
- warp/tests/aux_test_unresolved_func.py +14 -0
- warp/tests/aux_test_unresolved_symbol.py +14 -0
- warp/tests/{test_kinematics.py → disabled_kinematics.py} +10 -12
- warp/tests/run_coverage_serial.py +31 -0
- warp/tests/test_adam.py +102 -106
- warp/tests/test_arithmetic.py +39 -40
- warp/tests/test_array.py +46 -48
- warp/tests/test_array_reduce.py +25 -19
- warp/tests/test_atomic.py +62 -26
- warp/tests/test_bool.py +16 -11
- warp/tests/test_builtins_resolution.py +1292 -0
- warp/tests/test_bvh.py +9 -12
- warp/tests/test_closest_point_edge_edge.py +53 -57
- warp/tests/test_codegen.py +164 -134
- warp/tests/test_compile_consts.py +13 -19
- warp/tests/test_conditional.py +30 -32
- warp/tests/test_copy.py +9 -12
- warp/tests/test_ctypes.py +90 -98
- warp/tests/test_dense.py +20 -14
- warp/tests/test_devices.py +34 -35
- warp/tests/test_dlpack.py +74 -75
- warp/tests/test_examples.py +215 -97
- warp/tests/test_fabricarray.py +15 -21
- warp/tests/test_fast_math.py +14 -11
- warp/tests/test_fem.py +280 -97
- warp/tests/test_fp16.py +19 -15
- warp/tests/test_func.py +177 -194
- warp/tests/test_generics.py +71 -77
- warp/tests/test_grad.py +83 -32
- warp/tests/test_grad_customs.py +7 -9
- warp/tests/test_hash_grid.py +6 -10
- warp/tests/test_import.py +9 -23
- warp/tests/test_indexedarray.py +19 -21
- warp/tests/test_intersect.py +15 -9
- warp/tests/test_large.py +17 -19
- warp/tests/test_launch.py +14 -17
- warp/tests/test_lerp.py +63 -63
- warp/tests/test_lvalue.py +84 -35
- warp/tests/test_marching_cubes.py +9 -13
- warp/tests/test_mat.py +388 -3004
- warp/tests/test_mat_lite.py +9 -12
- warp/tests/test_mat_scalar_ops.py +2889 -0
- warp/tests/test_math.py +10 -11
- warp/tests/test_matmul.py +104 -100
- warp/tests/test_matmul_lite.py +72 -98
- warp/tests/test_mesh.py +35 -32
- warp/tests/test_mesh_query_aabb.py +18 -25
- warp/tests/test_mesh_query_point.py +39 -23
- warp/tests/test_mesh_query_ray.py +9 -21
- warp/tests/test_mlp.py +8 -9
- warp/tests/test_model.py +89 -93
- warp/tests/test_modules_lite.py +15 -25
- warp/tests/test_multigpu.py +87 -114
- warp/tests/test_noise.py +10 -12
- warp/tests/test_operators.py +14 -21
- warp/tests/test_options.py +10 -11
- warp/tests/test_pinned.py +16 -18
- warp/tests/test_print.py +16 -20
- warp/tests/test_quat.py +121 -88
- warp/tests/test_rand.py +12 -13
- warp/tests/test_reload.py +27 -32
- warp/tests/test_rounding.py +7 -10
- warp/tests/test_runlength_encode.py +105 -106
- warp/tests/test_smoothstep.py +8 -9
- warp/tests/test_snippet.py +13 -22
- warp/tests/test_sparse.py +30 -29
- warp/tests/test_spatial.py +179 -174
- warp/tests/test_streams.py +100 -107
- warp/tests/test_struct.py +98 -67
- warp/tests/test_tape.py +11 -17
- warp/tests/test_torch.py +89 -86
- warp/tests/test_transient_module.py +9 -12
- warp/tests/test_types.py +328 -50
- warp/tests/test_utils.py +217 -218
- warp/tests/test_vec.py +133 -2133
- warp/tests/test_vec_lite.py +8 -11
- warp/tests/test_vec_scalar_ops.py +2099 -0
- warp/tests/test_volume.py +391 -382
- warp/tests/test_volume_write.py +122 -135
- warp/tests/unittest_serial.py +35 -0
- warp/tests/unittest_suites.py +291 -0
- warp/tests/{test_base.py → unittest_utils.py} +138 -25
- warp/tests/{test_misc.py → unused_test_misc.py} +13 -5
- warp/tests/{test_debug.py → walkthough_debug.py} +2 -15
- warp/thirdparty/unittest_parallel.py +257 -54
- warp/types.py +119 -98
- warp/utils.py +14 -0
- {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/METADATA +2 -1
- {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/RECORD +182 -178
- {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/WHEEL +1 -1
- warp/tests/test_all.py +0 -239
- warp/tests/test_conditional_unequal_types_kernels.py +0 -14
- warp/tests/test_coverage.py +0 -38
- warp/tests/test_unresolved_func.py +0 -7
- warp/tests/test_unresolved_symbol.py +0 -7
- /warp/tests/{test_compile_consts_dummy.py → aux_test_compile_consts_dummy.py} +0 -0
- /warp/tests/{test_reference_reference.py → aux_test_reference_reference.py} +0 -0
- /warp/tests/{test_square.py → aux_test_square.py} +0 -0
- {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/LICENSE.md +0 -0
- {warp_lang-1.0.0b5.dist-info → warp_lang-1.0.0b6.dist-info}/top_level.txt +0 -0
warp/native/intersect.h
CHANGED
|
@@ -869,7 +869,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
869
869
|
wp::adj_sub(var_9, var_71, adj_9, adj_71, adj_73);
|
|
870
870
|
wp::adj_mul(var_21, var_70, adj_21, adj_70, adj_72);
|
|
871
871
|
wp::adj_mul(var_41, var_70, adj_41, adj_70, adj_71);
|
|
872
|
-
wp::adj_div(var_9, var_69, adj_9, adj_69, adj_70);
|
|
872
|
+
wp::adj_div(var_9, var_69, var_70, adj_9, adj_69, adj_70);
|
|
873
873
|
wp::adj_add(var_68, var_21, adj_68, adj_21, adj_69);
|
|
874
874
|
wp::adj_add(var_53, var_41, adj_53, adj_41, adj_68);
|
|
875
875
|
wp::adj_select(var_64, var_50, var_66, adj_64, adj_50, adj_66, adj_67);
|
|
@@ -881,7 +881,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
881
881
|
}
|
|
882
882
|
wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_62);
|
|
883
883
|
wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_60);
|
|
884
|
-
wp::adj_div(var_54, var_57, adj_54, adj_57, adj_58);
|
|
884
|
+
wp::adj_div(var_54, var_57, var_58, adj_54, adj_57, adj_58);
|
|
885
885
|
wp::adj_add(var_55, var_56, adj_55, adj_56, adj_57);
|
|
886
886
|
wp::adj_sub(var_32, var_33, adj_32, adj_33, adj_56);
|
|
887
887
|
wp::adj_sub(var_13, var_12, adj_13, adj_12, adj_55);
|
|
@@ -896,7 +896,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
896
896
|
wp::adj_vec2(var_48, var_5, adj_48, adj_5, adj_49);
|
|
897
897
|
wp::adj_sub(var_9, var_43, adj_9, adj_43, adj_48);
|
|
898
898
|
}
|
|
899
|
-
wp::adj_div(var_4, var_42, adj_4, adj_42, adj_43);
|
|
899
|
+
wp::adj_div(var_4, var_42, var_43, adj_4, adj_42, adj_43);
|
|
900
900
|
wp::adj_sub(var_4, var_33, adj_4, adj_33, adj_42);
|
|
901
901
|
wp::adj_sub(var_39, var_40, adj_39, adj_40, adj_41);
|
|
902
902
|
wp::adj_mul(var_3, var_33, adj_3, adj_33, adj_40);
|
|
@@ -917,7 +917,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
917
917
|
wp::adj_vec2(var_28, var_23, adj_28, adj_23, adj_29);
|
|
918
918
|
wp::adj_sub(var_9, var_23, adj_9, adj_23, adj_28);
|
|
919
919
|
}
|
|
920
|
-
wp::adj_div(var_3, var_22, adj_3, adj_22, adj_23);
|
|
920
|
+
wp::adj_div(var_3, var_22, var_23, adj_3, adj_22, adj_23);
|
|
921
921
|
wp::adj_sub(var_3, var_12, adj_3, adj_12, adj_22);
|
|
922
922
|
wp::adj_sub(var_19, var_20, adj_19, adj_20, adj_21);
|
|
923
923
|
wp::adj_mul(var_12, var_4, adj_12, adj_4, adj_20);
|
warp/native/intersect_adj.h
CHANGED
|
@@ -276,7 +276,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
276
276
|
label1:;
|
|
277
277
|
adj_71 += adj_ret;
|
|
278
278
|
wp::adj_vec3(var_61, var_62, var_70, adj_61, adj_62, adj_70, adj_71);
|
|
279
|
-
wp::adj_length(var_69, adj_69, adj_70);
|
|
279
|
+
wp::adj_length(var_69, var_70, adj_69, adj_70);
|
|
280
280
|
wp::adj_sub(var_68, var_65, adj_68, adj_65, adj_69);
|
|
281
281
|
wp::adj_add(var_p2, var_67, adj_p2, adj_67, adj_68);
|
|
282
282
|
wp::adj_mul(var_66, var_62, adj_66, adj_62, adj_67);
|
|
@@ -297,7 +297,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
297
297
|
wp::adj_select(var_51, var_49, var_54, adj_51, adj_49, adj_54, adj_55);
|
|
298
298
|
if (var_51) {
|
|
299
299
|
wp::adj_clamp(var_53, var_6, var_25, adj_53, adj_6, adj_25, adj_54);
|
|
300
|
-
wp::adj_div(var_52, var_3, adj_52, adj_3, adj_53);
|
|
300
|
+
wp::adj_div(var_52, var_3, var_53, adj_52, adj_3, adj_53);
|
|
301
301
|
wp::adj_sub(var_30, var_21, adj_30, adj_21, adj_52);
|
|
302
302
|
}
|
|
303
303
|
}
|
|
@@ -305,10 +305,10 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
305
305
|
wp::adj_select(var_45, var_41, var_48, adj_45, adj_41, adj_48, adj_49);
|
|
306
306
|
if (var_45) {
|
|
307
307
|
wp::adj_clamp(var_47, var_6, var_25, adj_47, adj_6, adj_25, adj_48);
|
|
308
|
-
wp::adj_div(var_46, var_3, adj_46, adj_3, adj_47);
|
|
308
|
+
wp::adj_div(var_46, var_3, var_47, adj_46, adj_3, adj_47);
|
|
309
309
|
wp::adj_neg(var_21, adj_21, adj_46);
|
|
310
310
|
}
|
|
311
|
-
wp::adj_div(var_43, var_4, adj_43, adj_4, adj_44);
|
|
311
|
+
wp::adj_div(var_43, var_4, var_44, adj_43, adj_4, adj_44);
|
|
312
312
|
wp::adj_add(var_42, var_5, adj_42, adj_5, adj_43);
|
|
313
313
|
wp::adj_mul(var_30, var_41, adj_30, adj_41, adj_42);
|
|
314
314
|
wp::adj_select(var_34, var_6, var_40, adj_34, adj_6, adj_40, adj_41);
|
|
@@ -317,7 +317,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
317
317
|
wp::adj_select(var_34, var_28, var_39, adj_34, adj_28, adj_39, adj_40);
|
|
318
318
|
if (var_34) {
|
|
319
319
|
wp::adj_clamp(var_38, var_6, var_25, adj_38, adj_6, adj_25, adj_39);
|
|
320
|
-
wp::adj_div(var_37, var_33, adj_37, adj_33, adj_38);
|
|
320
|
+
wp::adj_div(var_37, var_33, var_38, adj_37, adj_33, adj_38);
|
|
321
321
|
wp::adj_sub(var_35, var_36, adj_35, adj_36, adj_37);
|
|
322
322
|
wp::adj_mul(var_21, var_4, adj_21, adj_4, adj_36);
|
|
323
323
|
wp::adj_mul(var_30, var_5, adj_30, adj_5, adj_35);
|
|
@@ -332,7 +332,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
332
332
|
if (var_22) {
|
|
333
333
|
wp::adj_cast_float(var_6, adj_6, adj_27);
|
|
334
334
|
wp::adj_clamp(var_24, var_6, var_25, adj_24, adj_6, adj_25, adj_26);
|
|
335
|
-
wp::adj_div(var_23, var_3, adj_23, adj_3, adj_24);
|
|
335
|
+
wp::adj_div(var_23, var_3, var_24, adj_23, adj_3, adj_24);
|
|
336
336
|
wp::adj_neg(var_21, adj_21, adj_23);
|
|
337
337
|
}
|
|
338
338
|
wp::adj_dot(var_0, var_2, adj_0, adj_2, adj_21);
|
|
@@ -341,7 +341,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
341
341
|
wp::adj_select(var_15, var_7, var_16, adj_15, adj_7, adj_16, adj_19);
|
|
342
342
|
if (var_15) {
|
|
343
343
|
wp::adj_cast_float(var_17, adj_17, adj_18);
|
|
344
|
-
wp::adj_div(var_5, var_4, adj_5, adj_4, adj_17);
|
|
344
|
+
wp::adj_div(var_5, var_4, var_17, adj_5, adj_4, adj_17);
|
|
345
345
|
wp::adj_cast_float(var_6, adj_6, adj_16);
|
|
346
346
|
}
|
|
347
347
|
if (var_13) {
|
|
@@ -349,7 +349,7 @@ static CUDA_CALLABLE void adj_closest_point_edge_edge(vec3 var_p1,
|
|
|
349
349
|
adj_14 += adj_ret;
|
|
350
350
|
wp::adj_vec3(var_7, var_8, var_10, adj_7, adj_8, adj_10, adj_14);
|
|
351
351
|
}
|
|
352
|
-
wp::adj_length(var_9, adj_9, adj_10);
|
|
352
|
+
wp::adj_length(var_9, var_10, adj_9, adj_10);
|
|
353
353
|
wp::adj_sub(var_p2, var_p1, adj_p2, adj_p1, adj_9);
|
|
354
354
|
wp::adj_cast_float(var_6, adj_6, adj_8);
|
|
355
355
|
wp::adj_cast_float(var_6, adj_6, adj_7);
|
warp/native/mat.h
CHANGED
|
@@ -297,6 +297,18 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> atomic_max(mat_t<Rows,Cols,Type> * ad
|
|
|
297
297
|
return m;
|
|
298
298
|
}
|
|
299
299
|
|
|
300
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
301
|
+
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
302
|
+
mat_t<Rows,Cols,Type> *addr,
|
|
303
|
+
mat_t<Rows,Cols,Type> *adj_addr,
|
|
304
|
+
const mat_t<Rows,Cols,Type> &value,
|
|
305
|
+
mat_t<Rows,Cols,Type> &adj_value)
|
|
306
|
+
{
|
|
307
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
308
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
309
|
+
adj_atomic_minmax(&addr->data[i][j], &adj_addr->data[i][j], value.data[i][j], adj_value.data[i][j]);
|
|
310
|
+
}
|
|
311
|
+
|
|
300
312
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
301
313
|
inline CUDA_CALLABLE vec_t<Cols,Type> extract(const mat_t<Rows,Cols,Type>& m, int row)
|
|
302
314
|
{
|
|
@@ -425,7 +437,22 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(const mat_t<Rows,Cols,Type>& a, T
|
|
|
425
437
|
}
|
|
426
438
|
}
|
|
427
439
|
|
|
428
|
-
return t;
|
|
440
|
+
return t;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
444
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> div(Type b, const mat_t<Rows,Cols,Type>& a)
|
|
445
|
+
{
|
|
446
|
+
mat_t<Rows,Cols,Type> t;
|
|
447
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
448
|
+
{
|
|
449
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
450
|
+
{
|
|
451
|
+
t.data[i][j] = b / a.data[i][j];
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
return t;
|
|
429
456
|
}
|
|
430
457
|
|
|
431
458
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
@@ -440,7 +467,7 @@ inline CUDA_CALLABLE mat_t<Rows,Cols,Type> mul(const mat_t<Rows,Cols,Type>& a, T
|
|
|
440
467
|
}
|
|
441
468
|
}
|
|
442
469
|
|
|
443
|
-
return t;
|
|
470
|
+
return t;
|
|
444
471
|
}
|
|
445
472
|
|
|
446
473
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
@@ -473,6 +500,17 @@ inline CUDA_CALLABLE vec_t<Rows,Type> mul(const mat_t<Rows,Cols,Type>& a, const
|
|
|
473
500
|
return r;
|
|
474
501
|
}
|
|
475
502
|
|
|
503
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
504
|
+
inline CUDA_CALLABLE vec_t<Cols,Type> mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a)
|
|
505
|
+
{
|
|
506
|
+
vec_t<Cols,Type> r = a.get_row(0)*b[0];
|
|
507
|
+
for( unsigned i=1; i < Rows; ++i )
|
|
508
|
+
{
|
|
509
|
+
r += a.get_row(i)*b[i];
|
|
510
|
+
}
|
|
511
|
+
return r;
|
|
512
|
+
}
|
|
513
|
+
|
|
476
514
|
template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
|
|
477
515
|
inline CUDA_CALLABLE mat_t<Rows,ColsOut,Type> mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b)
|
|
478
516
|
{
|
|
@@ -932,6 +970,20 @@ inline CUDA_CALLABLE void adj_div(const mat_t<Rows,Cols,Type>& a, Type s, mat_t<
|
|
|
932
970
|
}
|
|
933
971
|
}
|
|
934
972
|
|
|
973
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
974
|
+
inline CUDA_CALLABLE void adj_div(Type s, const mat_t<Rows,Cols,Type>& a, Type& adj_s, mat_t<Rows,Cols,Type>& adj_a, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
975
|
+
{
|
|
976
|
+
adj_s -= tensordot(a , adj_ret)/ (s * s); // - a / s^2
|
|
977
|
+
|
|
978
|
+
for (unsigned i=0; i < Rows; ++i)
|
|
979
|
+
{
|
|
980
|
+
for (unsigned j=0; j < Cols; ++j)
|
|
981
|
+
{
|
|
982
|
+
adj_a.data[i][j] += s / adj_ret.data[i][j];
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
|
|
935
987
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
936
988
|
inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, Type b, mat_t<Rows,Cols,Type>& adj_a, Type& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
937
989
|
{
|
|
@@ -965,6 +1017,13 @@ inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const vec_t<Co
|
|
|
965
1017
|
adj_b += mul(transpose(a), adj_ret);
|
|
966
1018
|
}
|
|
967
1019
|
|
|
1020
|
+
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1021
|
+
inline CUDA_CALLABLE void adj_mul(const vec_t<Rows,Type>& b, const mat_t<Rows,Cols,Type>& a, vec_t<Rows,Type>& adj_b, mat_t<Rows,Cols,Type>& adj_a, const vec_t<Cols,Type>& adj_ret)
|
|
1022
|
+
{
|
|
1023
|
+
adj_a += outer(b, adj_ret);
|
|
1024
|
+
adj_b += mul(adj_ret, transpose(a));
|
|
1025
|
+
}
|
|
1026
|
+
|
|
968
1027
|
template<unsigned Rows, unsigned Cols, unsigned ColsOut, typename Type>
|
|
969
1028
|
inline CUDA_CALLABLE void adj_mul(const mat_t<Rows,Cols,Type>& a, const mat_t<Cols,ColsOut,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Cols,ColsOut,Type>& adj_b, const mat_t<Rows,ColsOut,Type>& adj_ret)
|
|
970
1029
|
{
|
|
@@ -1105,10 +1164,10 @@ inline CUDA_CALLABLE void adj_determinant(const mat_t<4,4,Type>& m, mat_t<4,4,Ty
|
|
|
1105
1164
|
}
|
|
1106
1165
|
|
|
1107
1166
|
template<unsigned Rows, typename Type>
|
|
1108
|
-
inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
|
|
1167
|
+
inline CUDA_CALLABLE void adj_inverse(const mat_t<Rows,Rows,Type>& m, mat_t<Rows,Rows,Type>& ret, mat_t<Rows,Rows,Type>& adj_m, const mat_t<Rows,Rows,Type>& adj_ret)
|
|
1109
1168
|
{
|
|
1110
1169
|
// todo: how to cache this from the forward pass?
|
|
1111
|
-
mat_t<Rows,Rows,Type> invt = transpose(
|
|
1170
|
+
mat_t<Rows,Rows,Type> invt = transpose(ret);
|
|
1112
1171
|
|
|
1113
1172
|
// see https://people.maths.ox.ac.uk/gilesm/files/NA-08-01.pdf 2.2.3
|
|
1114
1173
|
adj_m -= mul(mul(invt, adj_ret), invt);
|
|
@@ -1150,10 +1209,10 @@ inline CUDA_CALLABLE void adj_cw_mul(const mat_t<Rows,Cols,Type>& a, const mat_t
|
|
|
1150
1209
|
}
|
|
1151
1210
|
|
|
1152
1211
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
1153
|
-
inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1212
|
+
inline CUDA_CALLABLE void adj_cw_div(const mat_t<Rows,Cols,Type>& a, const mat_t<Rows,Cols,Type>& b, mat_t<Rows,Cols,Type>& ret, mat_t<Rows,Cols,Type>& adj_a, mat_t<Rows,Cols,Type>& adj_b, const mat_t<Rows,Cols,Type>& adj_ret)
|
|
1154
1213
|
{
|
|
1155
1214
|
adj_a += cw_div(adj_ret, b);
|
|
1156
|
-
adj_b -= cw_mul(adj_ret, cw_div(
|
|
1215
|
+
adj_b -= cw_mul(adj_ret, cw_div(ret, b));
|
|
1157
1216
|
}
|
|
1158
1217
|
|
|
1159
1218
|
// adjoint for the constant constructor:
|
warp/native/mesh.h
CHANGED
|
@@ -1181,7 +1181,7 @@ CUDA_CALLABLE inline bool mesh_query_point_sign_winding_number(uint64_t id, cons
|
|
|
1181
1181
|
}
|
|
1182
1182
|
}
|
|
1183
1183
|
|
|
1184
|
-
CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, int& face, float& u, float& v,
|
|
1184
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, const int& face, const float& u, const float& v,
|
|
1185
1185
|
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
|
|
1186
1186
|
{
|
|
1187
1187
|
Mesh mesh = mesh_get(id);
|
|
@@ -1202,7 +1202,7 @@ CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3&
|
|
|
1202
1202
|
adj_closest_point_to_triangle(p, q, r, point, adj_p, adj_q, adj_r, adj_point, adj_uv);
|
|
1203
1203
|
}
|
|
1204
1204
|
|
|
1205
|
-
CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, int& face, float& u, float& v,
|
|
1205
|
+
CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, const int& face, const float& u, const float& v,
|
|
1206
1206
|
uint64_t adj_id, vec3& adj_point, float& adj_min_dist, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
|
|
1207
1207
|
{
|
|
1208
1208
|
Mesh mesh = mesh_get(id);
|
|
@@ -1223,24 +1223,116 @@ CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, con
|
|
|
1223
1223
|
adj_closest_point_to_triangle(p, q, r, point, adj_p, adj_q, adj_r, adj_point, adj_uv); // Todo for Miles :>
|
|
1224
1224
|
}
|
|
1225
1225
|
|
|
1226
|
-
CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v,
|
|
1226
|
+
CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v,
|
|
1227
1227
|
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, bool& adj_ret)
|
|
1228
1228
|
{
|
|
1229
1229
|
adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
|
|
1230
1230
|
}
|
|
1231
1231
|
|
|
1232
|
-
CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v, const float epsilon,
|
|
1232
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v, const float epsilon,
|
|
1233
1233
|
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, float& adj_epsilon, bool& adj_ret)
|
|
1234
1234
|
{
|
|
1235
1235
|
adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
|
|
1236
1236
|
}
|
|
1237
1237
|
|
|
1238
|
-
CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float& inside, int& face, float& u, float& v, const float accuracy, const float winding_number_threshold,
|
|
1238
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, const float& inside, const int& face, const float& u, const float& v, const float accuracy, const float winding_number_threshold,
|
|
1239
1239
|
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_inside, int& adj_face, float& adj_u, float& adj_v, float& adj_accuracy, float& adj_winding_number_threshold, bool& adj_ret)
|
|
1240
1240
|
{
|
|
1241
1241
|
adj_mesh_query_point_no_sign(id, point, max_dist, face, u, v, adj_id, adj_point, adj_max_dist, adj_face, adj_u, adj_v, adj_ret);
|
|
1242
1242
|
}
|
|
1243
1243
|
|
|
1244
|
+
|
|
1245
|
+
// Stores the result of querying the closest point on a mesh.
|
|
1246
|
+
struct mesh_query_point_t
|
|
1247
|
+
{
|
|
1248
|
+
CUDA_CALLABLE mesh_query_point_t()
|
|
1249
|
+
{
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
CUDA_CALLABLE mesh_query_point_t(int)
|
|
1253
|
+
{
|
|
1254
|
+
// For backward pass.
|
|
1255
|
+
}
|
|
1256
|
+
|
|
1257
|
+
bool result;
|
|
1258
|
+
float sign;
|
|
1259
|
+
int face;
|
|
1260
|
+
float u;
|
|
1261
|
+
float v;
|
|
1262
|
+
};
|
|
1263
|
+
|
|
1264
|
+
CUDA_CALLABLE inline mesh_query_point_t mesh_query_point(uint64_t id, const vec3& point, float max_dist)
|
|
1265
|
+
{
|
|
1266
|
+
mesh_query_point_t query;
|
|
1267
|
+
query.result = mesh_query_point(id, point, max_dist, query.sign, query.face, query.u, query.v);
|
|
1268
|
+
return query;
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1271
|
+
CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist)
|
|
1272
|
+
{
|
|
1273
|
+
mesh_query_point_t query;
|
|
1274
|
+
query.sign = 0.0;
|
|
1275
|
+
query.result = mesh_query_point_no_sign(id, point, max_dist, query.face, query.u, query.v);
|
|
1276
|
+
return query;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
CUDA_CALLABLE inline mesh_query_point_t mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist)
|
|
1280
|
+
{
|
|
1281
|
+
mesh_query_point_t query;
|
|
1282
|
+
query.sign = 0.0;
|
|
1283
|
+
query.result = mesh_query_furthest_point_no_sign(id, point, min_dist, query.face, query.u, query.v);
|
|
1284
|
+
return query;
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, const float epsilon = 1e-3f)
|
|
1288
|
+
{
|
|
1289
|
+
mesh_query_point_t query;
|
|
1290
|
+
query.result = mesh_query_point_sign_normal(id, point, max_dist, query.sign, query.face, query.u, query.v, epsilon);
|
|
1291
|
+
return query;
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
CUDA_CALLABLE inline mesh_query_point_t mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold)
|
|
1295
|
+
{
|
|
1296
|
+
mesh_query_point_t query;
|
|
1297
|
+
query.result = mesh_query_point_sign_winding_number(id, point, max_dist, query.sign, query.face, query.u, query.v, accuracy, winding_number_threshold);
|
|
1298
|
+
return query;
|
|
1299
|
+
}
|
|
1300
|
+
|
|
1301
|
+
CUDA_CALLABLE inline void adj_mesh_query_point(uint64_t id, const vec3& point, float max_dist, const mesh_query_point_t& ret,
|
|
1302
|
+
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, mesh_query_point_t& adj_ret)
|
|
1303
|
+
{
|
|
1304
|
+
adj_mesh_query_point(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v,
|
|
1305
|
+
adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
|
|
1306
|
+
}
|
|
1307
|
+
|
|
1308
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_no_sign(uint64_t id, const vec3& point, float max_dist, const mesh_query_point_t& ret,
|
|
1309
|
+
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, mesh_query_point_t& adj_ret)
|
|
1310
|
+
{
|
|
1311
|
+
adj_mesh_query_point_no_sign(id, point, max_dist, ret.face, ret.u, ret.v,
|
|
1312
|
+
adj_id, adj_point, adj_max_dist, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
|
|
1313
|
+
}
|
|
1314
|
+
|
|
1315
|
+
CUDA_CALLABLE inline void adj_mesh_query_furthest_point_no_sign(uint64_t id, const vec3& point, float min_dist, const mesh_query_point_t& ret,
|
|
1316
|
+
uint64_t adj_id, vec3& adj_point, float& adj_min_dist, mesh_query_point_t& adj_ret)
|
|
1317
|
+
{
|
|
1318
|
+
adj_mesh_query_furthest_point_no_sign(id, point, min_dist, ret.face, ret.u, ret.v,
|
|
1319
|
+
adj_id, adj_point, adj_min_dist, adj_ret.face, adj_ret.u, adj_ret.v, adj_ret.result);
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_sign_normal(uint64_t id, const vec3& point, float max_dist, float epsilon, const mesh_query_point_t& ret,
|
|
1323
|
+
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_epsilon, mesh_query_point_t& adj_ret)
|
|
1324
|
+
{
|
|
1325
|
+
adj_mesh_query_point_sign_normal(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, epsilon,
|
|
1326
|
+
adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, epsilon, adj_ret.result);
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
CUDA_CALLABLE inline void adj_mesh_query_point_sign_winding_number(uint64_t id, const vec3& point, float max_dist, float accuracy, float winding_number_threshold, const mesh_query_point_t& ret,
|
|
1330
|
+
uint64_t adj_id, vec3& adj_point, float& adj_max_dist, float& adj_accuracy, float& adj_winding_number_threshold, mesh_query_point_t& adj_ret)
|
|
1331
|
+
{
|
|
1332
|
+
adj_mesh_query_point_sign_winding_number(id, point, max_dist, ret.sign, ret.face, ret.u, ret.v, accuracy, winding_number_threshold,
|
|
1333
|
+
adj_id, adj_point, adj_max_dist, adj_ret.sign, adj_ret.face, adj_ret.u, adj_ret.v, adj_accuracy, adj_winding_number_threshold, adj_ret.result);
|
|
1334
|
+
}
|
|
1335
|
+
|
|
1244
1336
|
CUDA_CALLABLE inline bool mesh_query_ray(uint64_t id, const vec3& start, const vec3& dir, float max_t, float& t, float& u, float& v, float& sign, vec3& normal, int& face)
|
|
1245
1337
|
{
|
|
1246
1338
|
Mesh mesh = mesh_get(id);
|
|
@@ -1353,6 +1445,35 @@ CUDA_CALLABLE inline void adj_mesh_query_ray(
|
|
|
1353
1445
|
}
|
|
1354
1446
|
|
|
1355
1447
|
|
|
1448
|
+
// Stores the result of querying the closest point on a mesh.
|
|
1449
|
+
struct mesh_query_ray_t
|
|
1450
|
+
{
|
|
1451
|
+
CUDA_CALLABLE mesh_query_ray_t()
|
|
1452
|
+
{
|
|
1453
|
+
}
|
|
1454
|
+
|
|
1455
|
+
CUDA_CALLABLE mesh_query_ray_t(int)
|
|
1456
|
+
{
|
|
1457
|
+
// For backward pass.
|
|
1458
|
+
}
|
|
1459
|
+
|
|
1460
|
+
bool result;
|
|
1461
|
+
float sign;
|
|
1462
|
+
int face;
|
|
1463
|
+
float t;
|
|
1464
|
+
float u;
|
|
1465
|
+
float v;
|
|
1466
|
+
vec3 normal;
|
|
1467
|
+
};
|
|
1468
|
+
|
|
1469
|
+
CUDA_CALLABLE inline mesh_query_ray_t mesh_query_ray(uint64_t id, const vec3& start, const vec3& dir, float max_t)
|
|
1470
|
+
{
|
|
1471
|
+
mesh_query_ray_t query;
|
|
1472
|
+
query.result = mesh_query_ray(id, start, dir, max_t, query.t, query.u, query.v, query.sign, query.normal, query.face);
|
|
1473
|
+
return query;
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
|
|
1356
1477
|
// determine if a point is inside (ret < 0 ) or outside the mesh (ret > 0)
|
|
1357
1478
|
CUDA_CALLABLE inline float mesh_query_inside(uint64_t id, const vec3& p)
|
|
1358
1479
|
{
|
warp/native/quat.h
CHANGED
|
@@ -225,12 +225,24 @@ inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
|
|
|
225
225
|
return quat_t<Type>(q.x/s, q.y/s, q.z/s, q.w/s);
|
|
226
226
|
}
|
|
227
227
|
|
|
228
|
+
template<typename Type>
|
|
229
|
+
inline CUDA_CALLABLE quat_t<Type> div(Type s, quat_t<Type> q)
|
|
230
|
+
{
|
|
231
|
+
return quat_t<Type>(s/q.x, s/q.y, s/q.z, s/q.w);
|
|
232
|
+
}
|
|
233
|
+
|
|
228
234
|
template<typename Type>
|
|
229
235
|
inline CUDA_CALLABLE quat_t<Type> operator / (quat_t<Type> a, Type s)
|
|
230
236
|
{
|
|
231
237
|
return div(a,s);
|
|
232
238
|
}
|
|
233
239
|
|
|
240
|
+
template<typename Type>
|
|
241
|
+
inline CUDA_CALLABLE quat_t<Type> operator / (Type s, quat_t<Type> a)
|
|
242
|
+
{
|
|
243
|
+
return div(s,a);
|
|
244
|
+
}
|
|
245
|
+
|
|
234
246
|
template<typename Type>
|
|
235
247
|
inline CUDA_CALLABLE quat_t<Type> operator*(Type s, const quat_t<Type>& a)
|
|
236
248
|
{
|
|
@@ -523,9 +535,14 @@ inline CUDA_CALLABLE void tensordot(const quat_t<Type>& a, const quat_t<Type>& b
|
|
|
523
535
|
}
|
|
524
536
|
|
|
525
537
|
template<typename Type>
|
|
526
|
-
inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, quat_t<Type>& adj_a, const Type adj_ret)
|
|
538
|
+
inline CUDA_CALLABLE void adj_length(const quat_t<Type>& a, Type ret, quat_t<Type>& adj_a, const Type adj_ret)
|
|
527
539
|
{
|
|
528
|
-
|
|
540
|
+
if (ret > Type(kEps))
|
|
541
|
+
{
|
|
542
|
+
Type inv_l = Type(1)/ret;
|
|
543
|
+
|
|
544
|
+
adj_a += quat_t<Type>(a.x*inv_l, a.y*inv_l, a.z*inv_l, a.w*inv_l) * adj_ret;
|
|
545
|
+
}
|
|
529
546
|
}
|
|
530
547
|
|
|
531
548
|
template<typename Type>
|
|
@@ -608,6 +625,13 @@ inline CUDA_CALLABLE void adj_div(quat_t<Type> a, Type s, quat_t<Type>& adj_a, T
|
|
|
608
625
|
adj_a += adj_ret / s;
|
|
609
626
|
}
|
|
610
627
|
|
|
628
|
+
template<typename Type>
|
|
629
|
+
inline CUDA_CALLABLE void adj_div(Type s, quat_t<Type> a, Type& adj_s, quat_t<Type>& adj_a, const quat_t<Type>& adj_ret)
|
|
630
|
+
{
|
|
631
|
+
adj_s -= dot(a, adj_ret)/ (s * s); // - a / s^2
|
|
632
|
+
adj_a += s / adj_ret;
|
|
633
|
+
}
|
|
634
|
+
|
|
611
635
|
template<typename Type>
|
|
612
636
|
inline CUDA_CALLABLE void adj_quat_rotate(const quat_t<Type>& q, const vec_t<3,Type>& p, quat_t<Type>& adj_q, vec_t<3,Type>& adj_p, const vec_t<3,Type>& adj_ret)
|
|
613
637
|
{
|
|
@@ -677,7 +701,7 @@ inline CUDA_CALLABLE void adj_quat_rotate_inv(const quat_t<Type>& q, const vec_t
|
|
|
677
701
|
}
|
|
678
702
|
|
|
679
703
|
template<typename Type>
|
|
680
|
-
inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
|
|
704
|
+
inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Type>& q1, Type t, quat_t<Type>& ret, quat_t<Type>& adj_q0, quat_t<Type>& adj_q1, Type& adj_t, const quat_t<Type>& adj_ret)
|
|
681
705
|
{
|
|
682
706
|
vec_t<3,Type> axis;
|
|
683
707
|
Type angle;
|
|
@@ -688,7 +712,7 @@ inline CUDA_CALLABLE void adj_quat_slerp(const quat_t<Type>& q0, const quat_t<Ty
|
|
|
688
712
|
angle = angle * 0.5;
|
|
689
713
|
|
|
690
714
|
// adj_t
|
|
691
|
-
adj_t += dot(mul(
|
|
715
|
+
adj_t += dot(mul(ret, quat_t<Type>(angle*axis[0], angle*axis[1], angle*axis[2], Type(0))), adj_ret);
|
|
692
716
|
|
|
693
717
|
// adj_q0
|
|
694
718
|
quat_t<Type> q_inc_x_q0;
|
warp/native/vec.h
CHANGED
|
@@ -284,12 +284,41 @@ inline CUDA_CALLABLE vec_t<2, Type> div(vec_t<2, Type> a, Type s)
|
|
|
284
284
|
return vec_t<2, Type>(a.c[0]/s,a.c[1]/s);
|
|
285
285
|
}
|
|
286
286
|
|
|
287
|
+
template<unsigned Length, typename Type>
|
|
288
|
+
inline CUDA_CALLABLE vec_t<Length, Type> div(Type s, vec_t<Length, Type> a)
|
|
289
|
+
{
|
|
290
|
+
vec_t<Length, Type> ret;
|
|
291
|
+
for (unsigned i=0; i < Length; ++i)
|
|
292
|
+
{
|
|
293
|
+
ret[i] = s / a[i];
|
|
294
|
+
}
|
|
295
|
+
return ret;
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
template<typename Type>
|
|
299
|
+
inline CUDA_CALLABLE vec_t<3, Type> div(Type s, vec_t<3, Type> a)
|
|
300
|
+
{
|
|
301
|
+
return vec_t<3, Type>(s/a.c[0],s/a.c[1],s/a.c[2]);
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
template<typename Type>
|
|
305
|
+
inline CUDA_CALLABLE vec_t<2, Type> div(Type s, vec_t<2, Type> a)
|
|
306
|
+
{
|
|
307
|
+
return vec_t<2, Type>(s/a.c[0],s/a.c[1]);
|
|
308
|
+
}
|
|
309
|
+
|
|
287
310
|
template<unsigned Length, typename Type>
|
|
288
311
|
inline CUDA_CALLABLE vec_t<Length, Type> operator / (vec_t<Length, Type> a, Type s)
|
|
289
312
|
{
|
|
290
313
|
return div(a,s);
|
|
291
314
|
}
|
|
292
315
|
|
|
316
|
+
template<unsigned Length, typename Type>
|
|
317
|
+
inline CUDA_CALLABLE vec_t<Length, Type> operator / (Type s, vec_t<Length, Type> a)
|
|
318
|
+
{
|
|
319
|
+
return div(s, a);
|
|
320
|
+
}
|
|
321
|
+
|
|
293
322
|
// component wise division
|
|
294
323
|
template<unsigned Length, typename Type>
|
|
295
324
|
inline CUDA_CALLABLE vec_t<Length, Type> cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b)
|
|
@@ -735,9 +764,30 @@ inline CUDA_CALLABLE void adj_div(vec_t<Length, Type> a, Type s, vec_t<Length, T
|
|
|
735
764
|
}
|
|
736
765
|
|
|
737
766
|
template<unsigned Length, typename Type>
|
|
738
|
-
inline CUDA_CALLABLE void
|
|
767
|
+
inline CUDA_CALLABLE void adj_div(Type s, vec_t<Length, Type> a, Type& adj_s, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
768
|
+
{
|
|
769
|
+
|
|
770
|
+
adj_s -= dot(a , adj_ret)/ (s * s); // - a / s^2
|
|
771
|
+
|
|
772
|
+
for( unsigned i=0; i < Length; ++i )
|
|
773
|
+
{
|
|
774
|
+
adj_a[i] += s / adj_ret[i];
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
#if FP_CHECK
|
|
778
|
+
if (!isfinite(a) || !isfinite(s) || !isfinite(adj_a) || !isfinite(adj_s) || !isfinite(adj_ret))
|
|
779
|
+
{
|
|
780
|
+
// \TODO: How shall we implement this error message?
|
|
781
|
+
// printf("adj_div((%f %f %f %f), %f, (%f %f %f %f), %f, (%f %f %f %f)\n", a.x, a.y, a.z, a.w, s, adj_a.x, adj_a.y, adj_a.z, adj_a.w, adj_s, adj_ret.x, adj_ret.y, adj_ret.z, adj_ret.w);
|
|
782
|
+
assert(0);
|
|
783
|
+
}
|
|
784
|
+
#endif
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
template<unsigned Length, typename Type>
|
|
788
|
+
inline CUDA_CALLABLE void adj_cw_div(vec_t<Length, Type> a, vec_t<Length, Type> b, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, vec_t<Length, Type>& adj_b, const vec_t<Length, Type>& adj_ret) {
|
|
739
789
|
adj_a += cw_div(adj_ret, b);
|
|
740
|
-
adj_b -= cw_mul(adj_ret, cw_div(
|
|
790
|
+
adj_b -= cw_mul(adj_ret, cw_div(ret, b));
|
|
741
791
|
}
|
|
742
792
|
|
|
743
793
|
template<unsigned Length, typename Type>
|
|
@@ -850,9 +900,12 @@ inline CUDA_CALLABLE void adj_extract(const vec_t<Length, Type> & a, int idx, ve
|
|
|
850
900
|
}
|
|
851
901
|
|
|
852
902
|
template<unsigned Length, typename Type>
|
|
853
|
-
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
903
|
+
inline CUDA_CALLABLE void adj_length(vec_t<Length, Type> a, Type ret, vec_t<Length, Type>& adj_a, const Type adj_ret)
|
|
854
904
|
{
|
|
855
|
-
|
|
905
|
+
if (ret > Type(kEps))
|
|
906
|
+
{
|
|
907
|
+
adj_a += div(a, ret) * adj_ret;
|
|
908
|
+
}
|
|
856
909
|
|
|
857
910
|
#if FP_CHECK
|
|
858
911
|
if (!isfinite(adj_a))
|
|
@@ -880,7 +933,7 @@ inline CUDA_CALLABLE void adj_length_sq(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
880
933
|
}
|
|
881
934
|
|
|
882
935
|
template<unsigned Length, typename Type>
|
|
883
|
-
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
936
|
+
inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Type>& ret, vec_t<Length, Type>& adj_a, const vec_t<Length, Type>& adj_ret)
|
|
884
937
|
{
|
|
885
938
|
Type d = length(a);
|
|
886
939
|
|
|
@@ -888,9 +941,7 @@ inline CUDA_CALLABLE void adj_normalize(vec_t<Length, Type> a, vec_t<Length, Typ
|
|
|
888
941
|
{
|
|
889
942
|
Type invd = Type(1.0f)/d;
|
|
890
943
|
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
adj_a += (adj_ret*invd - ahat*(dot(ahat, adj_ret))*invd);
|
|
944
|
+
adj_a += (adj_ret*invd - ret*(dot(ret, adj_ret))*invd);
|
|
894
945
|
|
|
895
946
|
#if FP_CHECK
|
|
896
947
|
if (!isfinite(adj_a))
|
|
@@ -951,8 +1002,8 @@ inline CUDA_CALLABLE void adj_max(const vec_t<Length,Type> &v, vec_t<Length,Type
|
|
|
951
1002
|
|
|
952
1003
|
// Do I need to specialize these for different lengths?
|
|
953
1004
|
template<unsigned Length, typename Type>
|
|
954
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
955
|
-
|
|
1005
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1006
|
+
{
|
|
956
1007
|
vec_t<Length, Type> ret;
|
|
957
1008
|
for( unsigned i=0; i < Length; ++i )
|
|
958
1009
|
{
|
|
@@ -963,8 +1014,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_add(vec_t<Length, Type> * addr,
|
|
|
963
1014
|
}
|
|
964
1015
|
|
|
965
1016
|
template<unsigned Length, typename Type>
|
|
966
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
967
|
-
|
|
1017
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1018
|
+
{
|
|
968
1019
|
vec_t<Length, Type> ret;
|
|
969
1020
|
for( unsigned i=0; i < Length; ++i )
|
|
970
1021
|
{
|
|
@@ -975,8 +1026,8 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_min(vec_t<Length, Type> * addr,
|
|
|
975
1026
|
}
|
|
976
1027
|
|
|
977
1028
|
template<unsigned Length, typename Type>
|
|
978
|
-
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
979
|
-
|
|
1029
|
+
inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr, vec_t<Length, Type> value)
|
|
1030
|
+
{
|
|
980
1031
|
vec_t<Length, Type> ret;
|
|
981
1032
|
for( unsigned i=0; i < Length; ++i )
|
|
982
1033
|
{
|
|
@@ -986,6 +1037,17 @@ inline CUDA_CALLABLE vec_t<Length, Type> atomic_max(vec_t<Length, Type> * addr,
|
|
|
986
1037
|
return ret;
|
|
987
1038
|
}
|
|
988
1039
|
|
|
1040
|
+
template<unsigned Length, typename Type>
|
|
1041
|
+
inline CUDA_CALLABLE void adj_atomic_minmax(
|
|
1042
|
+
vec_t<Length,Type> *addr,
|
|
1043
|
+
vec_t<Length,Type> *adj_addr,
|
|
1044
|
+
const vec_t<Length,Type> &value,
|
|
1045
|
+
vec_t<Length,Type> &adj_value)
|
|
1046
|
+
{
|
|
1047
|
+
for (unsigned i=0; i < Length; ++i)
|
|
1048
|
+
adj_atomic_minmax(&(addr->c[i]), &(adj_addr->c[i]), value[i], adj_value[i]);
|
|
1049
|
+
}
|
|
1050
|
+
|
|
989
1051
|
// ok, the original implementation of this didn't take the absolute values.
|
|
990
1052
|
// I wouldn't consider this expected behavior. It looks like it's only
|
|
991
1053
|
// being used for bounding boxes at the moment, where this doesn't matter,
|
warp/native/warp.cu
CHANGED
|
@@ -1143,12 +1143,7 @@ int cuda_toolkit_version()
|
|
|
1143
1143
|
|
|
1144
1144
|
bool cuda_driver_is_initialized()
|
|
1145
1145
|
{
|
|
1146
|
-
|
|
1147
|
-
|
|
1148
|
-
// result can be: CUDA_SUCCESS, CUDA_ERROR_DEINITIALIZED, CUDA_ERROR_NOT_INITIALIZED
|
|
1149
|
-
CUresult result = cuCtxGetCurrent_f(&ctx);
|
|
1150
|
-
|
|
1151
|
-
return result == CUDA_SUCCESS;
|
|
1146
|
+
return is_cuda_driver_initialized();
|
|
1152
1147
|
}
|
|
1153
1148
|
|
|
1154
1149
|
int nvrtc_supported_arch_count()
|