warp-lang 1.7.2rc1__py3-none-manylinux_2_34_aarch64.whl → 1.8.0__py3-none-manylinux_2_34_aarch64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of warp-lang might be problematic. Click here for more details.
- warp/__init__.py +3 -1
- warp/__init__.pyi +3489 -1
- warp/autograd.py +45 -122
- warp/bin/warp-clang.so +0 -0
- warp/bin/warp.so +0 -0
- warp/build.py +241 -252
- warp/build_dll.py +125 -26
- warp/builtins.py +1907 -384
- warp/codegen.py +257 -101
- warp/config.py +12 -1
- warp/constants.py +1 -1
- warp/context.py +657 -223
- warp/dlpack.py +1 -1
- warp/examples/benchmarks/benchmark_cloth.py +2 -2
- warp/examples/benchmarks/benchmark_tile_sort.py +155 -0
- warp/examples/core/example_sample_mesh.py +1 -1
- warp/examples/core/example_spin_lock.py +93 -0
- warp/examples/core/example_work_queue.py +118 -0
- warp/examples/fem/example_adaptive_grid.py +5 -5
- warp/examples/fem/example_apic_fluid.py +1 -1
- warp/examples/fem/example_burgers.py +1 -1
- warp/examples/fem/example_convection_diffusion.py +9 -6
- warp/examples/fem/example_darcy_ls_optimization.py +489 -0
- warp/examples/fem/example_deformed_geometry.py +1 -1
- warp/examples/fem/example_diffusion.py +2 -2
- warp/examples/fem/example_diffusion_3d.py +1 -1
- warp/examples/fem/example_distortion_energy.py +1 -1
- warp/examples/fem/example_elastic_shape_optimization.py +387 -0
- warp/examples/fem/example_magnetostatics.py +5 -3
- warp/examples/fem/example_mixed_elasticity.py +5 -3
- warp/examples/fem/example_navier_stokes.py +11 -9
- warp/examples/fem/example_nonconforming_contact.py +5 -3
- warp/examples/fem/example_streamlines.py +8 -3
- warp/examples/fem/utils.py +9 -8
- warp/examples/interop/example_jax_ffi_callback.py +2 -2
- warp/examples/optim/example_drone.py +1 -1
- warp/examples/sim/example_cloth.py +1 -1
- warp/examples/sim/example_cloth_self_contact.py +48 -54
- warp/examples/tile/example_tile_block_cholesky.py +502 -0
- warp/examples/tile/example_tile_cholesky.py +2 -1
- warp/examples/tile/example_tile_convolution.py +1 -1
- warp/examples/tile/example_tile_filtering.py +1 -1
- warp/examples/tile/example_tile_matmul.py +1 -1
- warp/examples/tile/example_tile_mlp.py +2 -0
- warp/fabric.py +7 -7
- warp/fem/__init__.py +5 -0
- warp/fem/adaptivity.py +1 -1
- warp/fem/cache.py +152 -63
- warp/fem/dirichlet.py +2 -2
- warp/fem/domain.py +136 -6
- warp/fem/field/field.py +141 -99
- warp/fem/field/nodal_field.py +85 -39
- warp/fem/field/virtual.py +97 -52
- warp/fem/geometry/adaptive_nanogrid.py +91 -86
- warp/fem/geometry/closest_point.py +13 -0
- warp/fem/geometry/deformed_geometry.py +102 -40
- warp/fem/geometry/element.py +56 -2
- warp/fem/geometry/geometry.py +323 -22
- warp/fem/geometry/grid_2d.py +157 -62
- warp/fem/geometry/grid_3d.py +116 -20
- warp/fem/geometry/hexmesh.py +86 -20
- warp/fem/geometry/nanogrid.py +166 -86
- warp/fem/geometry/partition.py +59 -25
- warp/fem/geometry/quadmesh.py +86 -135
- warp/fem/geometry/tetmesh.py +47 -119
- warp/fem/geometry/trimesh.py +77 -270
- warp/fem/integrate.py +107 -52
- warp/fem/linalg.py +25 -58
- warp/fem/operator.py +124 -27
- warp/fem/quadrature/pic_quadrature.py +36 -14
- warp/fem/quadrature/quadrature.py +40 -16
- warp/fem/space/__init__.py +1 -1
- warp/fem/space/basis_function_space.py +66 -46
- warp/fem/space/basis_space.py +17 -4
- warp/fem/space/dof_mapper.py +1 -1
- warp/fem/space/function_space.py +2 -2
- warp/fem/space/grid_2d_function_space.py +4 -1
- warp/fem/space/hexmesh_function_space.py +4 -2
- warp/fem/space/nanogrid_function_space.py +3 -1
- warp/fem/space/partition.py +11 -2
- warp/fem/space/quadmesh_function_space.py +4 -1
- warp/fem/space/restriction.py +5 -2
- warp/fem/space/shape/__init__.py +10 -8
- warp/fem/space/tetmesh_function_space.py +4 -1
- warp/fem/space/topology.py +52 -21
- warp/fem/space/trimesh_function_space.py +4 -1
- warp/fem/utils.py +53 -8
- warp/jax.py +1 -2
- warp/jax_experimental/ffi.py +12 -17
- warp/jax_experimental/xla_ffi.py +37 -24
- warp/math.py +171 -1
- warp/native/array.h +99 -0
- warp/native/builtin.h +174 -31
- warp/native/coloring.cpp +1 -1
- warp/native/exports.h +118 -63
- warp/native/intersect.h +3 -3
- warp/native/mat.h +5 -10
- warp/native/mathdx.cpp +11 -5
- warp/native/matnn.h +1 -123
- warp/native/quat.h +28 -4
- warp/native/sparse.cpp +121 -258
- warp/native/sparse.cu +181 -274
- warp/native/spatial.h +305 -17
- warp/native/tile.h +583 -72
- warp/native/tile_radix_sort.h +1108 -0
- warp/native/tile_reduce.h +237 -2
- warp/native/tile_scan.h +240 -0
- warp/native/tuple.h +189 -0
- warp/native/vec.h +6 -16
- warp/native/warp.cpp +36 -4
- warp/native/warp.cu +574 -51
- warp/native/warp.h +47 -74
- warp/optim/linear.py +5 -1
- warp/paddle.py +7 -8
- warp/py.typed +0 -0
- warp/render/render_opengl.py +58 -29
- warp/render/render_usd.py +124 -61
- warp/sim/__init__.py +9 -0
- warp/sim/collide.py +252 -78
- warp/sim/graph_coloring.py +8 -1
- warp/sim/import_mjcf.py +4 -3
- warp/sim/import_usd.py +11 -7
- warp/sim/integrator.py +5 -2
- warp/sim/integrator_euler.py +1 -1
- warp/sim/integrator_featherstone.py +1 -1
- warp/sim/integrator_vbd.py +751 -320
- warp/sim/integrator_xpbd.py +1 -1
- warp/sim/model.py +265 -260
- warp/sim/utils.py +10 -7
- warp/sparse.py +303 -166
- warp/tape.py +52 -51
- warp/tests/cuda/test_conditional_captures.py +1046 -0
- warp/tests/cuda/test_streams.py +1 -1
- warp/tests/geometry/test_volume.py +2 -2
- warp/tests/interop/test_dlpack.py +9 -9
- warp/tests/interop/test_jax.py +0 -1
- warp/tests/run_coverage_serial.py +1 -1
- warp/tests/sim/disabled_kinematics.py +2 -2
- warp/tests/sim/{test_vbd.py → test_cloth.py} +296 -113
- warp/tests/sim/test_collision.py +159 -51
- warp/tests/sim/test_coloring.py +15 -1
- warp/tests/test_array.py +254 -2
- warp/tests/test_array_reduce.py +2 -2
- warp/tests/test_atomic_cas.py +299 -0
- warp/tests/test_codegen.py +142 -19
- warp/tests/test_conditional.py +47 -1
- warp/tests/test_ctypes.py +0 -20
- warp/tests/test_devices.py +8 -0
- warp/tests/test_fabricarray.py +4 -2
- warp/tests/test_fem.py +58 -25
- warp/tests/test_func.py +42 -1
- warp/tests/test_grad.py +1 -1
- warp/tests/test_lerp.py +1 -3
- warp/tests/test_map.py +481 -0
- warp/tests/test_mat.py +1 -24
- warp/tests/test_quat.py +6 -15
- warp/tests/test_rounding.py +10 -38
- warp/tests/test_runlength_encode.py +7 -7
- warp/tests/test_smoothstep.py +1 -1
- warp/tests/test_sparse.py +51 -2
- warp/tests/test_spatial.py +507 -1
- warp/tests/test_struct.py +2 -2
- warp/tests/test_tuple.py +265 -0
- warp/tests/test_types.py +2 -2
- warp/tests/test_utils.py +24 -18
- warp/tests/tile/test_tile.py +420 -1
- warp/tests/tile/test_tile_mathdx.py +518 -14
- warp/tests/tile/test_tile_reduce.py +213 -0
- warp/tests/tile/test_tile_shared_memory.py +130 -1
- warp/tests/tile/test_tile_sort.py +117 -0
- warp/tests/unittest_suites.py +4 -6
- warp/types.py +462 -308
- warp/utils.py +647 -86
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/METADATA +20 -6
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/RECORD +178 -166
- warp/stubs.py +0 -3381
- warp/tests/sim/test_xpbd.py +0 -399
- warp/tests/test_mlp.py +0 -282
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/WHEEL +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/licenses/LICENSE.md +0 -0
- {warp_lang-1.7.2rc1.dist-info → warp_lang-1.8.0.dist-info}/top_level.txt +0 -0
warp/native/exports.h
CHANGED
|
@@ -953,10 +953,25 @@ WP_API void builtin_cw_div_mat22d_mat22d(mat22d& a, mat22d& b, mat22d* ret) { *r
|
|
|
953
953
|
WP_API void builtin_cw_div_mat33d_mat33d(mat33d& a, mat33d& b, mat33d* ret) { *ret = wp::cw_div(a, b); }
|
|
954
954
|
WP_API void builtin_cw_div_mat44d_mat44d(mat44d& a, mat44d& b, mat44d* ret) { *ret = wp::cw_div(a, b); }
|
|
955
955
|
WP_API void builtin_cw_div_spatial_matrixd_spatial_matrixd(spatial_matrixd& a, spatial_matrixd& b, spatial_matrixd* ret) { *ret = wp::cw_div(a, b); }
|
|
956
|
+
WP_API void builtin_svd3_mat33h(mat33h& A, mat33h& ret_0, vec3h& ret_1, mat33h& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
|
|
957
|
+
WP_API void builtin_svd3_mat33f(mat33f& A, mat33f& ret_0, vec3f& ret_1, mat33f& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
|
|
958
|
+
WP_API void builtin_svd3_mat33d(mat33d& A, mat33d& ret_0, vec3d& ret_1, mat33d& ret_2) { wp::svd3(A, ret_0, ret_1, ret_2); }
|
|
959
|
+
WP_API void builtin_svd2_mat22h(mat22h& A, mat22h& ret_0, vec2h& ret_1, mat22h& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
|
|
960
|
+
WP_API void builtin_svd2_mat22f(mat22f& A, mat22f& ret_0, vec2f& ret_1, mat22f& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
|
|
961
|
+
WP_API void builtin_svd2_mat22d(mat22d& A, mat22d& ret_0, vec2d& ret_1, mat22d& ret_2) { wp::svd2(A, ret_0, ret_1, ret_2); }
|
|
962
|
+
WP_API void builtin_qr3_mat33h(mat33h& A, mat33h& ret_0, mat33h& ret_1) { wp::qr3(A, ret_0, ret_1); }
|
|
963
|
+
WP_API void builtin_qr3_mat33f(mat33f& A, mat33f& ret_0, mat33f& ret_1) { wp::qr3(A, ret_0, ret_1); }
|
|
964
|
+
WP_API void builtin_qr3_mat33d(mat33d& A, mat33d& ret_0, mat33d& ret_1) { wp::qr3(A, ret_0, ret_1); }
|
|
965
|
+
WP_API void builtin_eig3_mat33h(mat33h& A, mat33h& ret_0, vec3h& ret_1) { wp::eig3(A, ret_0, ret_1); }
|
|
966
|
+
WP_API void builtin_eig3_mat33f(mat33f& A, mat33f& ret_0, vec3f& ret_1) { wp::eig3(A, ret_0, ret_1); }
|
|
967
|
+
WP_API void builtin_eig3_mat33d(mat33d& A, mat33d& ret_0, vec3d& ret_1) { wp::eig3(A, ret_0, ret_1); }
|
|
956
968
|
WP_API void builtin_quat_identity(quatf* ret) { *ret = wp::quat_identity(); }
|
|
957
969
|
WP_API void builtin_quat_from_axis_angle_vec3h_float16(vec3h& axis, float16 angle, quath* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
|
|
958
970
|
WP_API void builtin_quat_from_axis_angle_vec3f_float32(vec3f& axis, float32 angle, quatf* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
|
|
959
971
|
WP_API void builtin_quat_from_axis_angle_vec3d_float64(vec3d& axis, float64 angle, quatd* ret) { *ret = wp::quat_from_axis_angle(axis, angle); }
|
|
972
|
+
WP_API void builtin_quat_to_axis_angle_quath(quath& quat, vec3h& ret_0, float16& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
|
|
973
|
+
WP_API void builtin_quat_to_axis_angle_quatf(quatf& quat, vec3f& ret_0, float32& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
|
|
974
|
+
WP_API void builtin_quat_to_axis_angle_quatd(quatd& quat, vec3d& ret_0, float64& ret_1) { wp::quat_to_axis_angle(quat, ret_0, ret_1); }
|
|
960
975
|
WP_API void builtin_quat_from_matrix_mat33h(mat33h& mat, quath* ret) { *ret = wp::quat_from_matrix(mat); }
|
|
961
976
|
WP_API void builtin_quat_from_matrix_mat33f(mat33f& mat, quatf* ret) { *ret = wp::quat_from_matrix(mat); }
|
|
962
977
|
WP_API void builtin_quat_from_matrix_mat33d(mat33d& mat, quatd* ret) { *ret = wp::quat_from_matrix(mat); }
|
|
@@ -988,6 +1003,12 @@ WP_API void builtin_transform_get_translation_transformd(transformd& xform, vec3
|
|
|
988
1003
|
WP_API void builtin_transform_get_rotation_transformh(transformh& xform, quath* ret) { *ret = wp::transform_get_rotation(xform); }
|
|
989
1004
|
WP_API void builtin_transform_get_rotation_transformf(transformf& xform, quatf* ret) { *ret = wp::transform_get_rotation(xform); }
|
|
990
1005
|
WP_API void builtin_transform_get_rotation_transformd(transformd& xform, quatd* ret) { *ret = wp::transform_get_rotation(xform); }
|
|
1006
|
+
WP_API void builtin_transform_set_translation_transformh_vec3h(transformh& xform, vec3h& p) { wp::transform_set_translation(xform, p); }
|
|
1007
|
+
WP_API void builtin_transform_set_translation_transformf_vec3f(transformf& xform, vec3f& p) { wp::transform_set_translation(xform, p); }
|
|
1008
|
+
WP_API void builtin_transform_set_translation_transformd_vec3d(transformd& xform, vec3d& p) { wp::transform_set_translation(xform, p); }
|
|
1009
|
+
WP_API void builtin_transform_set_rotation_transformh_quath(transformh& xform, quath& q) { wp::transform_set_rotation(xform, q); }
|
|
1010
|
+
WP_API void builtin_transform_set_rotation_transformf_quatf(transformf& xform, quatf& q) { wp::transform_set_rotation(xform, q); }
|
|
1011
|
+
WP_API void builtin_transform_set_rotation_transformd_quatd(transformd& xform, quatd& q) { wp::transform_set_rotation(xform, q); }
|
|
991
1012
|
WP_API void builtin_transform_multiply_transformh_transformh(transformh& a, transformh& b, transformh* ret) { *ret = wp::transform_multiply(a, b); }
|
|
992
1013
|
WP_API void builtin_transform_multiply_transformf_transformf(transformf& a, transformf& b, transformf* ret) { *ret = wp::transform_multiply(a, b); }
|
|
993
1014
|
WP_API void builtin_transform_multiply_transformd_transformd(transformd& a, transformd& b, transformd* ret) { *ret = wp::transform_multiply(a, b); }
|
|
@@ -1063,6 +1084,7 @@ WP_API void builtin_pnoise_uint32_vec4f_int32_int32_int32_int32(uint32 state, ve
|
|
|
1063
1084
|
WP_API void builtin_curlnoise_uint32_vec2f_uint32_float32_float32(uint32 state, vec2f& xy, uint32 octaves, float32 lacunarity, float32 gain, vec2f* ret) { *ret = wp::curlnoise(state, xy, octaves, lacunarity, gain); }
|
|
1064
1085
|
WP_API void builtin_curlnoise_uint32_vec3f_uint32_float32_float32(uint32 state, vec3f& xyz, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyz, octaves, lacunarity, gain); }
|
|
1065
1086
|
WP_API void builtin_curlnoise_uint32_vec4f_uint32_float32_float32(uint32 state, vec4f& xyzt, uint32 octaves, float32 lacunarity, float32 gain, vec3f* ret) { *ret = wp::curlnoise(state, xyzt, octaves, lacunarity, gain); }
|
|
1087
|
+
WP_API void builtin_block_dim(int* ret) { *ret = wp::block_dim(); }
|
|
1066
1088
|
WP_API void builtin_extract_vec2h_int32(vec2h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1067
1089
|
WP_API void builtin_extract_vec3h_int32(vec3h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
1068
1090
|
WP_API void builtin_extract_vec4h_int32(vec4h& a, int32 i, float16* ret) { *ret = wp::extract(a, i); }
|
|
@@ -1130,69 +1152,72 @@ WP_API void builtin_extract_transformh_int32(transformh& a, int32 i, float16* re
|
|
|
1130
1152
|
WP_API void builtin_extract_transformf_int32(transformf& a, int32 i, float32* ret) { *ret = wp::extract(a, i); }
|
|
1131
1153
|
WP_API void builtin_extract_transformd_int32(transformd& a, int32 i, float64* ret) { *ret = wp::extract(a, i); }
|
|
1132
1154
|
WP_API void builtin_extract_shape_t_int32(shape_t s, int32 i, int* ret) { *ret = wp::extract(s, i); }
|
|
1133
|
-
WP_API void
|
|
1134
|
-
WP_API void
|
|
1135
|
-
WP_API void
|
|
1136
|
-
WP_API void
|
|
1137
|
-
WP_API void
|
|
1138
|
-
WP_API void
|
|
1139
|
-
WP_API void
|
|
1140
|
-
WP_API void
|
|
1141
|
-
WP_API void
|
|
1142
|
-
WP_API void
|
|
1143
|
-
WP_API void
|
|
1144
|
-
WP_API void
|
|
1145
|
-
WP_API void
|
|
1146
|
-
WP_API void
|
|
1147
|
-
WP_API void
|
|
1148
|
-
WP_API void
|
|
1149
|
-
WP_API void
|
|
1150
|
-
WP_API void
|
|
1151
|
-
WP_API void
|
|
1152
|
-
WP_API void
|
|
1153
|
-
WP_API void
|
|
1154
|
-
WP_API void
|
|
1155
|
-
WP_API void
|
|
1156
|
-
WP_API void
|
|
1157
|
-
WP_API void
|
|
1158
|
-
WP_API void
|
|
1159
|
-
WP_API void
|
|
1160
|
-
WP_API void
|
|
1161
|
-
WP_API void
|
|
1162
|
-
WP_API void
|
|
1163
|
-
WP_API void
|
|
1164
|
-
WP_API void
|
|
1165
|
-
WP_API void
|
|
1166
|
-
WP_API void
|
|
1167
|
-
WP_API void
|
|
1168
|
-
WP_API void
|
|
1169
|
-
WP_API void
|
|
1170
|
-
WP_API void
|
|
1171
|
-
WP_API void
|
|
1172
|
-
WP_API void
|
|
1173
|
-
WP_API void
|
|
1174
|
-
WP_API void
|
|
1175
|
-
WP_API void
|
|
1176
|
-
WP_API void
|
|
1177
|
-
WP_API void
|
|
1178
|
-
WP_API void
|
|
1179
|
-
WP_API void
|
|
1180
|
-
WP_API void
|
|
1181
|
-
WP_API void
|
|
1182
|
-
WP_API void
|
|
1183
|
-
WP_API void
|
|
1184
|
-
WP_API void
|
|
1185
|
-
WP_API void
|
|
1186
|
-
WP_API void
|
|
1187
|
-
WP_API void
|
|
1188
|
-
WP_API void
|
|
1189
|
-
WP_API void
|
|
1190
|
-
WP_API void
|
|
1191
|
-
WP_API void
|
|
1192
|
-
WP_API void
|
|
1193
|
-
WP_API void
|
|
1194
|
-
WP_API void
|
|
1195
|
-
WP_API void
|
|
1155
|
+
WP_API void builtin_expect_eq_int8_int8(int8 a, int8 b) { wp::expect_eq(a, b); }
|
|
1156
|
+
WP_API void builtin_expect_eq_uint8_uint8(uint8 a, uint8 b) { wp::expect_eq(a, b); }
|
|
1157
|
+
WP_API void builtin_expect_eq_int16_int16(int16 a, int16 b) { wp::expect_eq(a, b); }
|
|
1158
|
+
WP_API void builtin_expect_eq_uint16_uint16(uint16 a, uint16 b) { wp::expect_eq(a, b); }
|
|
1159
|
+
WP_API void builtin_expect_eq_int32_int32(int32 a, int32 b) { wp::expect_eq(a, b); }
|
|
1160
|
+
WP_API void builtin_expect_eq_uint32_uint32(uint32 a, uint32 b) { wp::expect_eq(a, b); }
|
|
1161
|
+
WP_API void builtin_expect_eq_int64_int64(int64 a, int64 b) { wp::expect_eq(a, b); }
|
|
1162
|
+
WP_API void builtin_expect_eq_uint64_uint64(uint64 a, uint64 b) { wp::expect_eq(a, b); }
|
|
1163
|
+
WP_API void builtin_expect_eq_float16_float16(float16 a, float16 b) { wp::expect_eq(a, b); }
|
|
1164
|
+
WP_API void builtin_expect_eq_float32_float32(float32 a, float32 b) { wp::expect_eq(a, b); }
|
|
1165
|
+
WP_API void builtin_expect_eq_float64_float64(float64 a, float64 b) { wp::expect_eq(a, b); }
|
|
1166
|
+
WP_API void builtin_expect_eq_quath_quath(quath& a, quath& b) { wp::expect_eq(a, b); }
|
|
1167
|
+
WP_API void builtin_expect_eq_quatf_quatf(quatf& a, quatf& b) { wp::expect_eq(a, b); }
|
|
1168
|
+
WP_API void builtin_expect_eq_quatd_quatd(quatd& a, quatd& b) { wp::expect_eq(a, b); }
|
|
1169
|
+
WP_API void builtin_expect_eq_transformh_transformh(transformh& a, transformh& b) { wp::expect_eq(a, b); }
|
|
1170
|
+
WP_API void builtin_expect_eq_transformf_transformf(transformf& a, transformf& b) { wp::expect_eq(a, b); }
|
|
1171
|
+
WP_API void builtin_expect_eq_transformd_transformd(transformd& a, transformd& b) { wp::expect_eq(a, b); }
|
|
1172
|
+
WP_API void builtin_expect_eq_bool_bool(bool a, bool b) { wp::expect_eq(a, b); }
|
|
1173
|
+
WP_API void builtin_expect_eq_vec2h_vec2h(vec2h& a, vec2h& b) { wp::expect_eq(a, b); }
|
|
1174
|
+
WP_API void builtin_expect_eq_vec3h_vec3h(vec3h& a, vec3h& b) { wp::expect_eq(a, b); }
|
|
1175
|
+
WP_API void builtin_expect_eq_vec4h_vec4h(vec4h& a, vec4h& b) { wp::expect_eq(a, b); }
|
|
1176
|
+
WP_API void builtin_expect_eq_spatial_vectorh_spatial_vectorh(spatial_vectorh& a, spatial_vectorh& b) { wp::expect_eq(a, b); }
|
|
1177
|
+
WP_API void builtin_expect_eq_vec2f_vec2f(vec2f& a, vec2f& b) { wp::expect_eq(a, b); }
|
|
1178
|
+
WP_API void builtin_expect_eq_vec3f_vec3f(vec3f& a, vec3f& b) { wp::expect_eq(a, b); }
|
|
1179
|
+
WP_API void builtin_expect_eq_vec4f_vec4f(vec4f& a, vec4f& b) { wp::expect_eq(a, b); }
|
|
1180
|
+
WP_API void builtin_expect_eq_spatial_vectorf_spatial_vectorf(spatial_vectorf& a, spatial_vectorf& b) { wp::expect_eq(a, b); }
|
|
1181
|
+
WP_API void builtin_expect_eq_vec2d_vec2d(vec2d& a, vec2d& b) { wp::expect_eq(a, b); }
|
|
1182
|
+
WP_API void builtin_expect_eq_vec3d_vec3d(vec3d& a, vec3d& b) { wp::expect_eq(a, b); }
|
|
1183
|
+
WP_API void builtin_expect_eq_vec4d_vec4d(vec4d& a, vec4d& b) { wp::expect_eq(a, b); }
|
|
1184
|
+
WP_API void builtin_expect_eq_spatial_vectord_spatial_vectord(spatial_vectord& a, spatial_vectord& b) { wp::expect_eq(a, b); }
|
|
1185
|
+
WP_API void builtin_expect_eq_vec2s_vec2s(vec2s& a, vec2s& b) { wp::expect_eq(a, b); }
|
|
1186
|
+
WP_API void builtin_expect_eq_vec3s_vec3s(vec3s& a, vec3s& b) { wp::expect_eq(a, b); }
|
|
1187
|
+
WP_API void builtin_expect_eq_vec4s_vec4s(vec4s& a, vec4s& b) { wp::expect_eq(a, b); }
|
|
1188
|
+
WP_API void builtin_expect_eq_vec2i_vec2i(vec2i& a, vec2i& b) { wp::expect_eq(a, b); }
|
|
1189
|
+
WP_API void builtin_expect_eq_vec3i_vec3i(vec3i& a, vec3i& b) { wp::expect_eq(a, b); }
|
|
1190
|
+
WP_API void builtin_expect_eq_vec4i_vec4i(vec4i& a, vec4i& b) { wp::expect_eq(a, b); }
|
|
1191
|
+
WP_API void builtin_expect_eq_vec2l_vec2l(vec2l& a, vec2l& b) { wp::expect_eq(a, b); }
|
|
1192
|
+
WP_API void builtin_expect_eq_vec3l_vec3l(vec3l& a, vec3l& b) { wp::expect_eq(a, b); }
|
|
1193
|
+
WP_API void builtin_expect_eq_vec4l_vec4l(vec4l& a, vec4l& b) { wp::expect_eq(a, b); }
|
|
1194
|
+
WP_API void builtin_expect_eq_vec2b_vec2b(vec2b& a, vec2b& b) { wp::expect_eq(a, b); }
|
|
1195
|
+
WP_API void builtin_expect_eq_vec3b_vec3b(vec3b& a, vec3b& b) { wp::expect_eq(a, b); }
|
|
1196
|
+
WP_API void builtin_expect_eq_vec4b_vec4b(vec4b& a, vec4b& b) { wp::expect_eq(a, b); }
|
|
1197
|
+
WP_API void builtin_expect_eq_vec2us_vec2us(vec2us& a, vec2us& b) { wp::expect_eq(a, b); }
|
|
1198
|
+
WP_API void builtin_expect_eq_vec3us_vec3us(vec3us& a, vec3us& b) { wp::expect_eq(a, b); }
|
|
1199
|
+
WP_API void builtin_expect_eq_vec4us_vec4us(vec4us& a, vec4us& b) { wp::expect_eq(a, b); }
|
|
1200
|
+
WP_API void builtin_expect_eq_vec2ui_vec2ui(vec2ui& a, vec2ui& b) { wp::expect_eq(a, b); }
|
|
1201
|
+
WP_API void builtin_expect_eq_vec3ui_vec3ui(vec3ui& a, vec3ui& b) { wp::expect_eq(a, b); }
|
|
1202
|
+
WP_API void builtin_expect_eq_vec4ui_vec4ui(vec4ui& a, vec4ui& b) { wp::expect_eq(a, b); }
|
|
1203
|
+
WP_API void builtin_expect_eq_vec2ul_vec2ul(vec2ul& a, vec2ul& b) { wp::expect_eq(a, b); }
|
|
1204
|
+
WP_API void builtin_expect_eq_vec3ul_vec3ul(vec3ul& a, vec3ul& b) { wp::expect_eq(a, b); }
|
|
1205
|
+
WP_API void builtin_expect_eq_vec4ul_vec4ul(vec4ul& a, vec4ul& b) { wp::expect_eq(a, b); }
|
|
1206
|
+
WP_API void builtin_expect_eq_vec2ub_vec2ub(vec2ub& a, vec2ub& b) { wp::expect_eq(a, b); }
|
|
1207
|
+
WP_API void builtin_expect_eq_vec3ub_vec3ub(vec3ub& a, vec3ub& b) { wp::expect_eq(a, b); }
|
|
1208
|
+
WP_API void builtin_expect_eq_vec4ub_vec4ub(vec4ub& a, vec4ub& b) { wp::expect_eq(a, b); }
|
|
1209
|
+
WP_API void builtin_expect_eq_mat22h_mat22h(mat22h& a, mat22h& b) { wp::expect_eq(a, b); }
|
|
1210
|
+
WP_API void builtin_expect_eq_mat33h_mat33h(mat33h& a, mat33h& b) { wp::expect_eq(a, b); }
|
|
1211
|
+
WP_API void builtin_expect_eq_mat44h_mat44h(mat44h& a, mat44h& b) { wp::expect_eq(a, b); }
|
|
1212
|
+
WP_API void builtin_expect_eq_spatial_matrixh_spatial_matrixh(spatial_matrixh& a, spatial_matrixh& b) { wp::expect_eq(a, b); }
|
|
1213
|
+
WP_API void builtin_expect_eq_mat22f_mat22f(mat22f& a, mat22f& b) { wp::expect_eq(a, b); }
|
|
1214
|
+
WP_API void builtin_expect_eq_mat33f_mat33f(mat33f& a, mat33f& b) { wp::expect_eq(a, b); }
|
|
1215
|
+
WP_API void builtin_expect_eq_mat44f_mat44f(mat44f& a, mat44f& b) { wp::expect_eq(a, b); }
|
|
1216
|
+
WP_API void builtin_expect_eq_spatial_matrixf_spatial_matrixf(spatial_matrixf& a, spatial_matrixf& b) { wp::expect_eq(a, b); }
|
|
1217
|
+
WP_API void builtin_expect_eq_mat22d_mat22d(mat22d& a, mat22d& b) { wp::expect_eq(a, b); }
|
|
1218
|
+
WP_API void builtin_expect_eq_mat33d_mat33d(mat33d& a, mat33d& b) { wp::expect_eq(a, b); }
|
|
1219
|
+
WP_API void builtin_expect_eq_mat44d_mat44d(mat44d& a, mat44d& b) { wp::expect_eq(a, b); }
|
|
1220
|
+
WP_API void builtin_expect_eq_spatial_matrixd_spatial_matrixd(spatial_matrixd& a, spatial_matrixd& b) { wp::expect_eq(a, b); }
|
|
1196
1221
|
WP_API void builtin_lerp_float16_float16_float16(float16 a, float16 b, float16 t, float16* ret) { *ret = wp::lerp(a, b, t); }
|
|
1197
1222
|
WP_API void builtin_lerp_float32_float32_float32(float32 a, float32 b, float32 t, float32* ret) { *ret = wp::lerp(a, b, t); }
|
|
1198
1223
|
WP_API void builtin_lerp_float64_float64_float64(float64 a, float64 b, float64 t, float64* ret) { *ret = wp::lerp(a, b, t); }
|
|
@@ -1229,6 +1254,36 @@ WP_API void builtin_lerp_transformd_transformd_float64(transformd& a, transformd
|
|
|
1229
1254
|
WP_API void builtin_smoothstep_float16_float16_float16(float16 a, float16 b, float16 x, float16* ret) { *ret = wp::smoothstep(a, b, x); }
|
|
1230
1255
|
WP_API void builtin_smoothstep_float32_float32_float32(float32 a, float32 b, float32 x, float32* ret) { *ret = wp::smoothstep(a, b, x); }
|
|
1231
1256
|
WP_API void builtin_smoothstep_float64_float64_float64(float64 a, float64 b, float64 x, float64* ret) { *ret = wp::smoothstep(a, b, x); }
|
|
1257
|
+
WP_API void builtin_expect_near_float16_float16_float16(float16 a, float16 b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1258
|
+
WP_API void builtin_expect_near_float32_float32_float32(float32 a, float32 b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1259
|
+
WP_API void builtin_expect_near_float64_float64_float64(float64 a, float64 b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1260
|
+
WP_API void builtin_expect_near_vec2h_vec2h_float16(vec2h& a, vec2h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1261
|
+
WP_API void builtin_expect_near_vec3h_vec3h_float16(vec3h& a, vec3h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1262
|
+
WP_API void builtin_expect_near_vec4h_vec4h_float16(vec4h& a, vec4h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1263
|
+
WP_API void builtin_expect_near_spatial_vectorh_spatial_vectorh_float16(spatial_vectorh& a, spatial_vectorh& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1264
|
+
WP_API void builtin_expect_near_vec2f_vec2f_float32(vec2f& a, vec2f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1265
|
+
WP_API void builtin_expect_near_vec3f_vec3f_float32(vec3f& a, vec3f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1266
|
+
WP_API void builtin_expect_near_vec4f_vec4f_float32(vec4f& a, vec4f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1267
|
+
WP_API void builtin_expect_near_spatial_vectorf_spatial_vectorf_float32(spatial_vectorf& a, spatial_vectorf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1268
|
+
WP_API void builtin_expect_near_vec2d_vec2d_float64(vec2d& a, vec2d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1269
|
+
WP_API void builtin_expect_near_vec3d_vec3d_float64(vec3d& a, vec3d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1270
|
+
WP_API void builtin_expect_near_vec4d_vec4d_float64(vec4d& a, vec4d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1271
|
+
WP_API void builtin_expect_near_spatial_vectord_spatial_vectord_float64(spatial_vectord& a, spatial_vectord& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1272
|
+
WP_API void builtin_expect_near_quath_quath_float16(quath& a, quath& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1273
|
+
WP_API void builtin_expect_near_quatf_quatf_float32(quatf& a, quatf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1274
|
+
WP_API void builtin_expect_near_quatd_quatd_float64(quatd& a, quatd& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1275
|
+
WP_API void builtin_expect_near_mat22h_mat22h_float16(mat22h& a, mat22h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1276
|
+
WP_API void builtin_expect_near_mat33h_mat33h_float16(mat33h& a, mat33h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1277
|
+
WP_API void builtin_expect_near_mat44h_mat44h_float16(mat44h& a, mat44h& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1278
|
+
WP_API void builtin_expect_near_spatial_matrixh_spatial_matrixh_float16(spatial_matrixh& a, spatial_matrixh& b, float16 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1279
|
+
WP_API void builtin_expect_near_mat22f_mat22f_float32(mat22f& a, mat22f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1280
|
+
WP_API void builtin_expect_near_mat33f_mat33f_float32(mat33f& a, mat33f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1281
|
+
WP_API void builtin_expect_near_mat44f_mat44f_float32(mat44f& a, mat44f& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1282
|
+
WP_API void builtin_expect_near_spatial_matrixf_spatial_matrixf_float32(spatial_matrixf& a, spatial_matrixf& b, float32 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1283
|
+
WP_API void builtin_expect_near_mat22d_mat22d_float64(mat22d& a, mat22d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1284
|
+
WP_API void builtin_expect_near_mat33d_mat33d_float64(mat33d& a, mat33d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1285
|
+
WP_API void builtin_expect_near_mat44d_mat44d_float64(mat44d& a, mat44d& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1286
|
+
WP_API void builtin_expect_near_spatial_matrixd_spatial_matrixd_float64(spatial_matrixd& a, spatial_matrixd& b, float64 tolerance) { wp::expect_near(a, b, tolerance); }
|
|
1232
1287
|
WP_API void builtin_add_float16_float16(float16 a, float16 b, float16* ret) { *ret = wp::add(a, b); }
|
|
1233
1288
|
WP_API void builtin_add_float32_float32(float32 a, float32 b, float32* ret) { *ret = wp::add(a, b); }
|
|
1234
1289
|
WP_API void builtin_add_float64_float64(float64 a, float64 b, float64* ret) { *ret = wp::add(a, b); }
|
warp/native/intersect.h
CHANGED
|
@@ -665,7 +665,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
665
665
|
bool var_25;
|
|
666
666
|
bool var_26;
|
|
667
667
|
bool var_27;
|
|
668
|
-
float32 var_28;
|
|
668
|
+
float32 var_28 = 0.0;
|
|
669
669
|
vec2 var_29;
|
|
670
670
|
vec2 var_30;
|
|
671
671
|
vec3 var_31;
|
|
@@ -685,7 +685,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
685
685
|
bool var_45;
|
|
686
686
|
bool var_46;
|
|
687
687
|
bool var_47;
|
|
688
|
-
float32 var_48;
|
|
688
|
+
float32 var_48 = 0.0;
|
|
689
689
|
vec2 var_49;
|
|
690
690
|
vec2 var_50;
|
|
691
691
|
float32 var_51;
|
|
@@ -702,7 +702,7 @@ CUDA_CALLABLE inline void adj_closest_point_to_triangle(
|
|
|
702
702
|
float32 var_62;
|
|
703
703
|
bool var_63;
|
|
704
704
|
bool var_64;
|
|
705
|
-
float32 var_65;
|
|
705
|
+
float32 var_65 = 0.0;
|
|
706
706
|
vec2 var_66;
|
|
707
707
|
// vec2 var_67;
|
|
708
708
|
float32 var_68;
|
warp/native/mat.h
CHANGED
|
@@ -389,23 +389,17 @@ inline CUDA_CALLABLE bool operator==(const mat_t<Rows,Cols,Type>& a, const mat_t
|
|
|
389
389
|
return true;
|
|
390
390
|
}
|
|
391
391
|
|
|
392
|
-
|
|
393
|
-
// negation:
|
|
394
392
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
395
|
-
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (mat_t<Rows,Cols,Type
|
|
393
|
+
inline CUDA_CALLABLE mat_t<Rows,Cols,Type> operator - (const mat_t<Rows,Cols,Type>& x)
|
|
396
394
|
{
|
|
397
|
-
// NB: this constructor will initialize all ret's components to 0, which is
|
|
398
|
-
// unnecessary...
|
|
399
395
|
mat_t<Rows,Cols,Type> ret;
|
|
400
396
|
for (unsigned i=0; i < Rows; ++i)
|
|
401
397
|
for (unsigned j=0; j < Cols; ++j)
|
|
402
|
-
ret.data[i][j] = -
|
|
398
|
+
ret.data[i][j] = -x.data[i][j];
|
|
403
399
|
|
|
404
|
-
// Wonder if this does a load of copying when it returns... hopefully not as it's inlined?
|
|
405
400
|
return ret;
|
|
406
401
|
}
|
|
407
402
|
|
|
408
|
-
|
|
409
403
|
template<unsigned Rows, unsigned Cols, typename Type>
|
|
410
404
|
CUDA_CALLABLE inline mat_t<Rows,Cols,Type> pos(const mat_t<Rows,Cols,Type>& x)
|
|
411
405
|
{
|
|
@@ -2206,8 +2200,9 @@ inline CUDA_CALLABLE void expect_near(const mat_t<Rows,Cols,Type>& actual, const
|
|
|
2206
2200
|
if (diff > tolerance)
|
|
2207
2201
|
{
|
|
2208
2202
|
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
2209
|
-
printf("
|
|
2210
|
-
printf("
|
|
2203
|
+
printf(" Expected: "); print(expected);
|
|
2204
|
+
printf(" Actual: "); print(actual);
|
|
2205
|
+
printf(" Max absolute difference: "); print(diff);
|
|
2211
2206
|
}
|
|
2212
2207
|
}
|
|
2213
2208
|
|
warp/native/mathdx.cpp
CHANGED
|
@@ -26,7 +26,8 @@ extern "C"
|
|
|
26
26
|
WP_API
|
|
27
27
|
bool cuda_compile_fft(
|
|
28
28
|
const char* ltoir_output_path,
|
|
29
|
-
const char* symbol_name,
|
|
29
|
+
const char* symbol_name,
|
|
30
|
+
int num_include_dirs,
|
|
30
31
|
const char** include_dirs,
|
|
31
32
|
const char* mathdx_include_dir,
|
|
32
33
|
int arch,
|
|
@@ -41,7 +42,6 @@ bool cuda_compile_fft(
|
|
|
41
42
|
}
|
|
42
43
|
|
|
43
44
|
WP_API bool cuda_compile_dot(
|
|
44
|
-
const char* fatbin_output_path,
|
|
45
45
|
const char* ltoir_output_path,
|
|
46
46
|
const char* symbol_name,
|
|
47
47
|
int num_include_dirs,
|
|
@@ -55,9 +55,9 @@ WP_API bool cuda_compile_dot(
|
|
|
55
55
|
int precision_B,
|
|
56
56
|
int precision_C,
|
|
57
57
|
int type,
|
|
58
|
-
int
|
|
59
|
-
int
|
|
60
|
-
int
|
|
58
|
+
int arrangement_A,
|
|
59
|
+
int arrangement_B,
|
|
60
|
+
int arrangement_C,
|
|
61
61
|
int num_threads)
|
|
62
62
|
{
|
|
63
63
|
printf("CUDA is disabled and/or Warp was not compiled with MathDx support.\n");
|
|
@@ -65,6 +65,7 @@ WP_API bool cuda_compile_dot(
|
|
|
65
65
|
}
|
|
66
66
|
|
|
67
67
|
WP_API bool cuda_compile_solver(
|
|
68
|
+
const char* fatbin_output_path,
|
|
68
69
|
const char* ltoir_output_path,
|
|
69
70
|
const char* symbol_name,
|
|
70
71
|
int num_include_dirs,
|
|
@@ -73,8 +74,13 @@ WP_API bool cuda_compile_solver(
|
|
|
73
74
|
int arch,
|
|
74
75
|
int M,
|
|
75
76
|
int N,
|
|
77
|
+
int NRHS,
|
|
76
78
|
int function,
|
|
79
|
+
int side,
|
|
80
|
+
int diag,
|
|
77
81
|
int precision,
|
|
82
|
+
int arrangement_A,
|
|
83
|
+
int arrangement_B,
|
|
78
84
|
int fill_mode,
|
|
79
85
|
int num_threads)
|
|
80
86
|
{
|
warp/native/matnn.h
CHANGED
|
@@ -218,126 +218,4 @@ CUDA_CALLABLE inline void adj_dense_solve(int n,
|
|
|
218
218
|
}
|
|
219
219
|
}
|
|
220
220
|
|
|
221
|
-
|
|
222
|
-
template <typename F>
|
|
223
|
-
CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out)
|
|
224
|
-
{
|
|
225
|
-
const int m = weights.shape[0];
|
|
226
|
-
const int n = weights.shape[1];
|
|
227
|
-
const int b = x.shape[1];
|
|
228
|
-
|
|
229
|
-
for (int i=0; i < m; ++i)
|
|
230
|
-
{
|
|
231
|
-
float tmp = bias.data[i];
|
|
232
|
-
|
|
233
|
-
for(int j=0; j < n; ++j)
|
|
234
|
-
{
|
|
235
|
-
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
236
|
-
}
|
|
237
|
-
|
|
238
|
-
out.data[index + b*i] = activation(tmp);
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
|
|
242
|
-
template <typename F, typename AdjF>
|
|
243
|
-
CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int index, const array_t<float>& x, array_t<float>& out,
|
|
244
|
-
array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
245
|
-
{
|
|
246
|
-
const int m = weights.shape[0];
|
|
247
|
-
const int n = weights.shape[1];
|
|
248
|
-
const int b = x.shape[1];
|
|
249
|
-
|
|
250
|
-
for (int i=0; i < m; ++i)
|
|
251
|
-
{
|
|
252
|
-
// recompute forward pass so we don't have to store pre-activation outputs
|
|
253
|
-
float tmp = bias.data[i];
|
|
254
|
-
|
|
255
|
-
for(int j=0; j < n; ++j)
|
|
256
|
-
{
|
|
257
|
-
tmp += weights.data[i*n + j]*x.data[index + b*j];
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
// adjoint w.r.t to activation
|
|
261
|
-
float adj_f = 0.0f;
|
|
262
|
-
|
|
263
|
-
if (adj_out.data)
|
|
264
|
-
adj_activation(tmp, adj_f, adj_out.data[index + b*i]);
|
|
265
|
-
|
|
266
|
-
for (int j=0; j < n; ++j)
|
|
267
|
-
{
|
|
268
|
-
// adjoint w.r.t M_i
|
|
269
|
-
if (adj_weights.data)
|
|
270
|
-
atomic_add(&adj_weights.data[i*n + j], x.data[index + b*j]*adj_f); // todo: reduce these atomic stores using warp/block level reductions
|
|
271
|
-
|
|
272
|
-
// adjoint w.r.t x
|
|
273
|
-
if (adj_x.data)
|
|
274
|
-
atomic_add(&adj_x.data[index + b*j], weights.data[i*n + j]*adj_f);
|
|
275
|
-
}
|
|
276
|
-
|
|
277
|
-
// adjoint w.r.t b
|
|
278
|
-
if (adj_bias.data)
|
|
279
|
-
atomic_add(&adj_bias.data[i], adj_f);
|
|
280
|
-
|
|
281
|
-
}
|
|
282
|
-
}
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
// template <typename F>
|
|
286
|
-
// CUDA_CALLABLE inline void mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, array_t<float>& out)
|
|
287
|
-
// {
|
|
288
|
-
// x += index*n;
|
|
289
|
-
// out += index*m;
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
// for (int i=0; i < m; ++i)
|
|
293
|
-
// {
|
|
294
|
-
// float tmp = bias[i];
|
|
295
|
-
|
|
296
|
-
// for(int j=0; j < n; ++j)
|
|
297
|
-
// {
|
|
298
|
-
// tmp += weights[i*n + j]*x[j];
|
|
299
|
-
// }
|
|
300
|
-
|
|
301
|
-
// out[i] = activation(tmp);
|
|
302
|
-
// }
|
|
303
|
-
// }
|
|
304
|
-
|
|
305
|
-
// template <typename F, typename AdjF>
|
|
306
|
-
// CUDA_CALLABLE inline void adj_mlp(const array_t<float>& weights, const array_t<float>& bias, F activation, int m, int n, int b, int index, const array_t<float>& x, const array_t<float>& out,
|
|
307
|
-
// array_t<float>& adj_weights, array_t<float>& adj_bias, AdjF adj_activation, int adj_m, int adj_n, int adj_b, int adj_index, array_t<float>& adj_x, array_t<float>& adj_out)
|
|
308
|
-
// {
|
|
309
|
-
// x += index*n;
|
|
310
|
-
// out += index*m;
|
|
311
|
-
|
|
312
|
-
// adj_x += index*n;
|
|
313
|
-
// adj_out += index*m;
|
|
314
|
-
|
|
315
|
-
// for (int i=0; i < m; ++i)
|
|
316
|
-
// {
|
|
317
|
-
// // recompute forward pass so we don't have to store pre-activation outputs
|
|
318
|
-
// float tmp = bias[i];
|
|
319
|
-
|
|
320
|
-
// for(int j=0; j < n; ++j)
|
|
321
|
-
// {
|
|
322
|
-
// tmp += weights[i*n + j]*x[index + b*j];
|
|
323
|
-
// }
|
|
324
|
-
|
|
325
|
-
// // adjoint w.r.t to activation
|
|
326
|
-
// float adj_f = 0.0f;
|
|
327
|
-
// adj_activation(tmp, adj_f, adj_out[index + b*i]);
|
|
328
|
-
|
|
329
|
-
// for (int j=0; j < n; ++j)
|
|
330
|
-
// {
|
|
331
|
-
// // adjoint w.r.t M_i
|
|
332
|
-
// adj_weights[i*n + j] += x[j]*adj_f;
|
|
333
|
-
|
|
334
|
-
// // adjoint w.r.t x
|
|
335
|
-
// adj_x[index + b*j] += weights[i*n + j]*adj_f;
|
|
336
|
-
// }
|
|
337
|
-
|
|
338
|
-
// // adjoint w.r.t b
|
|
339
|
-
// adj_bias[i] += adj_f;
|
|
340
|
-
// }
|
|
341
|
-
// }
|
|
342
|
-
|
|
343
|
-
} // namespace wp
|
|
221
|
+
} // namespace wp
|
warp/native/quat.h
CHANGED
|
@@ -274,8 +274,32 @@ inline CUDA_CALLABLE quat_t<Type> add(const quat_t<Type>& a, const quat_t<Type>&
|
|
|
274
274
|
template<typename Type>
|
|
275
275
|
inline CUDA_CALLABLE quat_t<Type> sub(const quat_t<Type>& a, const quat_t<Type>& b)
|
|
276
276
|
{
|
|
277
|
-
return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
277
|
+
return quat_t<Type>(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w);
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
template<typename Type>
|
|
281
|
+
inline CUDA_CALLABLE quat_t<Type> operator - (const quat_t<Type>& q)
|
|
282
|
+
{
|
|
283
|
+
return quat_t<Type>(-q.x, -q.y, -q.z, -q.w);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
template<typename Type>
|
|
287
|
+
CUDA_CALLABLE inline quat_t<Type> pos(const quat_t<Type>& q)
|
|
288
|
+
{
|
|
289
|
+
return q;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
template<typename Type>
|
|
293
|
+
CUDA_CALLABLE inline quat_t<Type> neg(const quat_t<Type>& q)
|
|
294
|
+
{
|
|
295
|
+
return -q;
|
|
296
|
+
}
|
|
278
297
|
|
|
298
|
+
template<typename Type>
|
|
299
|
+
CUDA_CALLABLE inline void adj_neg(const quat_t<Type>& q, quat_t<Type>& adj_q, const quat_t<Type>& adj_ret)
|
|
300
|
+
{
|
|
301
|
+
adj_q -= adj_ret;
|
|
302
|
+
}
|
|
279
303
|
|
|
280
304
|
template<typename Type>
|
|
281
305
|
inline CUDA_CALLABLE quat_t<Type> mul(const quat_t<Type>& a, const quat_t<Type>& b)
|
|
@@ -298,7 +322,6 @@ inline CUDA_CALLABLE quat_t<Type> mul(Type s, const quat_t<Type>& a)
|
|
|
298
322
|
return mul(a, s);
|
|
299
323
|
}
|
|
300
324
|
|
|
301
|
-
// division
|
|
302
325
|
template<typename Type>
|
|
303
326
|
inline CUDA_CALLABLE quat_t<Type> div(quat_t<Type> q, Type s)
|
|
304
327
|
{
|
|
@@ -1357,8 +1380,9 @@ inline CUDA_CALLABLE void expect_near(const quat_t<Type>& actual, const quat_t<T
|
|
|
1357
1380
|
if (diff > tolerance)
|
|
1358
1381
|
{
|
|
1359
1382
|
printf("Error, expect_near() failed with tolerance "); print(tolerance);
|
|
1360
|
-
printf("
|
|
1361
|
-
printf("
|
|
1383
|
+
printf(" Expected: "); print(expected);
|
|
1384
|
+
printf(" Actual: "); print(actual);
|
|
1385
|
+
printf(" Max absolute difference: "); print(diff);
|
|
1362
1386
|
}
|
|
1363
1387
|
}
|
|
1364
1388
|
|