tf-models-nightly 2.17.0.dev20240617__py2.py3-none-any.whl → 2.20.0.dev20251205__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1257) hide show
  1. official/__init__.py +1 -1
  2. official/common/__init__.py +1 -1
  3. official/common/dataset_fn.py +1 -1
  4. official/common/distribute_utils.py +27 -3
  5. official/common/distribute_utils_test.py +13 -12
  6. official/common/flags.py +24 -6
  7. official/common/registry_imports.py +1 -1
  8. official/common/streamz_counters.py +1 -1
  9. official/core/__init__.py +1 -1
  10. official/core/actions.py +1 -1
  11. official/core/actions_test.py +1 -1
  12. official/core/base_task.py +1 -1
  13. official/core/base_trainer.py +1 -1
  14. official/core/base_trainer_test.py +1 -1
  15. official/core/config_definitions.py +1 -1
  16. official/core/exp_factory.py +1 -1
  17. official/core/export_base.py +1 -1
  18. official/core/export_base_test.py +1 -1
  19. official/core/file_writers.py +1 -1
  20. official/core/file_writers_test.py +1 -1
  21. official/core/input_reader.py +1 -1
  22. official/core/registry.py +1 -1
  23. official/core/registry_test.py +1 -1
  24. official/core/savedmodel_checkpoint_manager.py +1 -1
  25. official/core/savedmodel_checkpoint_manager_test.py +1 -1
  26. official/core/task_factory.py +1 -1
  27. official/core/test_utils.py +1 -1
  28. official/core/tf_example_builder.py +1 -1
  29. official/core/tf_example_builder_test.py +1 -1
  30. official/core/tf_example_feature_key.py +1 -1
  31. official/core/tf_example_feature_key_test.py +1 -1
  32. official/core/train_lib.py +1 -3
  33. official/core/train_lib_test.py +1 -1
  34. official/core/train_utils.py +1 -1
  35. official/core/train_utils_test.py +1 -1
  36. official/legacy/__init__.py +1 -1
  37. official/legacy/albert/__init__.py +1 -1
  38. official/legacy/albert/configs.py +1 -1
  39. official/legacy/bert/__init__.py +1 -1
  40. official/legacy/bert/bert_models.py +1 -1
  41. official/legacy/bert/bert_models_test.py +1 -1
  42. official/legacy/bert/common_flags.py +1 -1
  43. official/legacy/bert/configs.py +1 -1
  44. official/legacy/bert/export_tfhub.py +1 -2
  45. official/legacy/bert/export_tfhub_test.py +1 -1
  46. official/legacy/bert/input_pipeline.py +1 -1
  47. official/legacy/bert/model_saving_utils.py +1 -1
  48. official/legacy/bert/model_training_utils.py +1 -1
  49. official/legacy/bert/model_training_utils_test.py +1 -1
  50. official/legacy/bert/run_classifier.py +1 -2
  51. official/legacy/bert/run_pretraining.py +1 -2
  52. official/legacy/bert/run_squad.py +1 -2
  53. official/legacy/bert/run_squad_helper.py +1 -1
  54. official/legacy/bert/serving.py +1 -1
  55. official/legacy/detection/__init__.py +1 -1
  56. official/legacy/detection/configs/__init__.py +1 -1
  57. official/legacy/detection/configs/base_config.py +1 -1
  58. official/legacy/detection/configs/factory.py +1 -1
  59. official/legacy/detection/configs/maskrcnn_config.py +1 -1
  60. official/legacy/detection/configs/olnmask_config.py +1 -1
  61. official/legacy/detection/configs/retinanet_config.py +1 -1
  62. official/legacy/detection/configs/shapemask_config.py +1 -1
  63. official/legacy/detection/dataloader/__init__.py +1 -1
  64. official/legacy/detection/dataloader/anchor.py +1 -1
  65. official/legacy/detection/dataloader/factory.py +1 -1
  66. official/legacy/detection/dataloader/input_reader.py +1 -1
  67. official/legacy/detection/dataloader/maskrcnn_parser.py +1 -1
  68. official/legacy/detection/dataloader/mode_keys.py +1 -1
  69. official/legacy/detection/dataloader/olnmask_parser.py +1 -1
  70. official/legacy/detection/dataloader/retinanet_parser.py +1 -1
  71. official/legacy/detection/dataloader/shapemask_parser.py +1 -1
  72. official/legacy/detection/dataloader/tf_example_decoder.py +1 -1
  73. official/legacy/detection/evaluation/__init__.py +1 -1
  74. official/legacy/detection/evaluation/coco_evaluator.py +1 -1
  75. official/legacy/detection/evaluation/coco_utils.py +1 -1
  76. official/legacy/detection/evaluation/factory.py +1 -1
  77. official/legacy/detection/executor/__init__.py +1 -1
  78. official/legacy/detection/executor/detection_executor.py +1 -1
  79. official/legacy/detection/executor/distributed_executor.py +1 -1
  80. official/legacy/detection/main.py +1 -1
  81. official/legacy/detection/modeling/__init__.py +1 -1
  82. official/legacy/detection/modeling/architecture/__init__.py +1 -1
  83. official/legacy/detection/modeling/architecture/factory.py +1 -1
  84. official/legacy/detection/modeling/architecture/fpn.py +1 -1
  85. official/legacy/detection/modeling/architecture/heads.py +1 -1
  86. official/legacy/detection/modeling/architecture/identity.py +1 -1
  87. official/legacy/detection/modeling/architecture/nn_blocks.py +1 -1
  88. official/legacy/detection/modeling/architecture/nn_ops.py +1 -1
  89. official/legacy/detection/modeling/architecture/resnet.py +1 -1
  90. official/legacy/detection/modeling/architecture/spinenet.py +1 -1
  91. official/legacy/detection/modeling/base_model.py +1 -1
  92. official/legacy/detection/modeling/checkpoint_utils.py +1 -1
  93. official/legacy/detection/modeling/factory.py +1 -1
  94. official/legacy/detection/modeling/learning_rates.py +1 -1
  95. official/legacy/detection/modeling/losses.py +1 -1
  96. official/legacy/detection/modeling/maskrcnn_model.py +1 -1
  97. official/legacy/detection/modeling/olnmask_model.py +1 -1
  98. official/legacy/detection/modeling/optimizers.py +1 -1
  99. official/legacy/detection/modeling/retinanet_model.py +1 -1
  100. official/legacy/detection/modeling/shapemask_model.py +1 -1
  101. official/legacy/detection/ops/__init__.py +1 -1
  102. official/legacy/detection/ops/nms.py +1 -1
  103. official/legacy/detection/ops/postprocess_ops.py +1 -1
  104. official/legacy/detection/ops/roi_ops.py +1 -1
  105. official/legacy/detection/ops/spatial_transform_ops.py +1 -1
  106. official/legacy/detection/ops/target_ops.py +1 -1
  107. official/legacy/detection/utils/__init__.py +1 -1
  108. official/legacy/detection/utils/box_utils.py +1 -1
  109. official/legacy/detection/utils/class_utils.py +1 -1
  110. official/legacy/detection/utils/dataloader_utils.py +1 -1
  111. official/legacy/detection/utils/input_utils.py +1 -1
  112. official/legacy/detection/utils/mask_utils.py +1 -1
  113. official/legacy/image_classification/__init__.py +1 -1
  114. official/legacy/image_classification/augment.py +1 -1
  115. official/legacy/image_classification/augment_test.py +1 -1
  116. official/legacy/image_classification/callbacks.py +1 -1
  117. official/legacy/image_classification/classifier_trainer.py +1 -1
  118. official/legacy/image_classification/classifier_trainer_test.py +1 -1
  119. official/legacy/image_classification/classifier_trainer_util_test.py +1 -1
  120. official/legacy/image_classification/configs/__init__.py +1 -1
  121. official/legacy/image_classification/configs/base_configs.py +1 -1
  122. official/legacy/image_classification/configs/configs.py +1 -1
  123. official/legacy/image_classification/dataset_factory.py +1 -1
  124. official/legacy/image_classification/efficientnet/__init__.py +1 -1
  125. official/legacy/image_classification/efficientnet/common_modules.py +1 -1
  126. official/legacy/image_classification/efficientnet/efficientnet_config.py +1 -1
  127. official/legacy/image_classification/efficientnet/efficientnet_model.py +1 -1
  128. official/legacy/image_classification/efficientnet/tfhub_export.py +1 -1
  129. official/legacy/image_classification/learning_rate.py +1 -1
  130. official/legacy/image_classification/learning_rate_test.py +1 -1
  131. official/legacy/image_classification/mnist_main.py +1 -2
  132. official/legacy/image_classification/mnist_test.py +1 -1
  133. official/legacy/image_classification/optimizer_factory.py +1 -1
  134. official/legacy/image_classification/optimizer_factory_test.py +1 -1
  135. official/legacy/image_classification/preprocessing.py +1 -1
  136. official/legacy/image_classification/resnet/__init__.py +1 -1
  137. official/legacy/image_classification/resnet/common.py +1 -1
  138. official/legacy/image_classification/resnet/imagenet_preprocessing.py +1 -1
  139. official/legacy/image_classification/resnet/resnet_config.py +1 -1
  140. official/legacy/image_classification/resnet/resnet_ctl_imagenet_main.py +1 -2
  141. official/legacy/image_classification/resnet/resnet_model.py +1 -1
  142. official/legacy/image_classification/resnet/resnet_runnable.py +1 -1
  143. official/legacy/image_classification/resnet/tfhub_export.py +1 -2
  144. official/legacy/image_classification/test_utils.py +1 -1
  145. official/legacy/image_classification/vgg/__init__.py +1 -1
  146. official/legacy/image_classification/vgg/vgg_config.py +1 -1
  147. official/legacy/image_classification/vgg/vgg_model.py +1 -1
  148. official/legacy/transformer/__init__.py +1 -1
  149. official/legacy/transformer/attention_layer.py +1 -1
  150. official/legacy/transformer/beam_search_v1.py +1 -1
  151. official/legacy/transformer/compute_bleu.py +1 -1
  152. official/legacy/transformer/compute_bleu_test.py +1 -1
  153. official/legacy/transformer/data_download.py +1 -1
  154. official/legacy/transformer/data_pipeline.py +1 -1
  155. official/legacy/transformer/embedding_layer.py +1 -1
  156. official/legacy/transformer/ffn_layer.py +1 -1
  157. official/legacy/transformer/metrics.py +1 -1
  158. official/legacy/transformer/misc.py +1 -1
  159. official/legacy/transformer/model_params.py +1 -1
  160. official/legacy/transformer/model_utils.py +1 -1
  161. official/legacy/transformer/model_utils_test.py +1 -1
  162. official/legacy/transformer/optimizer.py +1 -1
  163. official/legacy/transformer/transformer.py +1 -1
  164. official/legacy/transformer/transformer_forward_test.py +1 -1
  165. official/legacy/transformer/transformer_layers_test.py +1 -1
  166. official/legacy/transformer/transformer_main.py +1 -5
  167. official/legacy/transformer/transformer_main_test.py +1 -1
  168. official/legacy/transformer/transformer_test.py +1 -1
  169. official/legacy/transformer/translate.py +1 -2
  170. official/legacy/transformer/utils/__init__.py +1 -1
  171. official/legacy/transformer/utils/metrics.py +1 -1
  172. official/legacy/transformer/utils/tokenizer.py +1 -1
  173. official/legacy/transformer/utils/tokenizer_test.py +1 -1
  174. official/legacy/xlnet/__init__.py +1 -1
  175. official/legacy/xlnet/classifier_utils.py +1 -1
  176. official/legacy/xlnet/common_flags.py +1 -1
  177. official/legacy/xlnet/data_utils.py +1 -1
  178. official/legacy/xlnet/optimization.py +1 -1
  179. official/legacy/xlnet/preprocess_classification_data.py +1 -2
  180. official/legacy/xlnet/preprocess_pretrain_data.py +1 -2
  181. official/legacy/xlnet/preprocess_squad_data.py +1 -2
  182. official/legacy/xlnet/preprocess_utils.py +1 -1
  183. official/legacy/xlnet/run_classifier.py +1 -2
  184. official/legacy/xlnet/run_pretrain.py +1 -2
  185. official/legacy/xlnet/run_squad.py +1 -2
  186. official/legacy/xlnet/squad_utils.py +1 -1
  187. official/legacy/xlnet/training_utils.py +1 -1
  188. official/legacy/xlnet/xlnet_config.py +1 -1
  189. official/legacy/xlnet/xlnet_modeling.py +1 -1
  190. official/modeling/__init__.py +1 -1
  191. official/modeling/activations/__init__.py +1 -1
  192. official/modeling/activations/gelu.py +1 -1
  193. official/modeling/activations/gelu_test.py +1 -1
  194. official/modeling/activations/mish.py +1 -1
  195. official/modeling/activations/mish_test.py +1 -1
  196. official/modeling/activations/relu.py +1 -1
  197. official/modeling/activations/relu_test.py +1 -1
  198. official/modeling/activations/sigmoid.py +1 -1
  199. official/modeling/activations/sigmoid_test.py +1 -1
  200. official/modeling/activations/swish.py +1 -1
  201. official/modeling/activations/swish_test.py +1 -1
  202. official/modeling/grad_utils.py +1 -1
  203. official/modeling/grad_utils_test.py +1 -1
  204. official/modeling/hyperparams/__init__.py +1 -1
  205. official/modeling/hyperparams/base_config.py +27 -19
  206. official/modeling/hyperparams/base_config_test.py +32 -1
  207. official/modeling/hyperparams/oneof.py +1 -1
  208. official/modeling/hyperparams/oneof_test.py +1 -1
  209. official/modeling/hyperparams/params_dict.py +1 -1
  210. official/modeling/hyperparams/params_dict_test.py +1 -1
  211. official/modeling/multitask/__init__.py +1 -1
  212. official/modeling/multitask/base_model.py +1 -1
  213. official/modeling/multitask/base_trainer.py +1 -1
  214. official/modeling/multitask/base_trainer_test.py +1 -1
  215. official/modeling/multitask/configs.py +3 -3
  216. official/modeling/multitask/evaluator.py +1 -1
  217. official/modeling/multitask/evaluator_test.py +1 -1
  218. official/modeling/multitask/interleaving_trainer.py +1 -1
  219. official/modeling/multitask/interleaving_trainer_test.py +1 -1
  220. official/modeling/multitask/multitask.py +1 -1
  221. official/modeling/multitask/task_sampler.py +1 -1
  222. official/modeling/multitask/task_sampler_test.py +1 -1
  223. official/modeling/multitask/test_utils.py +1 -1
  224. official/modeling/multitask/train_lib.py +81 -14
  225. official/modeling/multitask/train_lib_test.py +1 -1
  226. official/modeling/optimization/__init__.py +1 -1
  227. official/modeling/optimization/adafactor_optimizer.py +1 -1
  228. official/modeling/optimization/configs/__init__.py +1 -1
  229. official/modeling/optimization/configs/learning_rate_config.py +1 -1
  230. official/modeling/optimization/configs/optimization_config.py +1 -1
  231. official/modeling/optimization/configs/optimization_config_test.py +1 -1
  232. official/modeling/optimization/configs/optimizer_config.py +1 -1
  233. official/modeling/optimization/ema_optimizer.py +1 -1
  234. official/modeling/optimization/lamb.py +1 -1
  235. official/modeling/optimization/lamb_test.py +1 -1
  236. official/modeling/optimization/lars.py +1 -1
  237. official/modeling/optimization/legacy_adamw.py +1 -1
  238. official/modeling/optimization/lr_schedule.py +1 -1
  239. official/modeling/optimization/lr_schedule_test.py +1 -1
  240. official/modeling/optimization/optimizer_factory.py +1 -1
  241. official/modeling/optimization/optimizer_factory_test.py +1 -1
  242. official/modeling/optimization/slide_optimizer.py +1 -1
  243. official/modeling/performance.py +1 -1
  244. official/modeling/privacy/__init__.py +1 -1
  245. official/modeling/privacy/configs.py +1 -1
  246. official/modeling/privacy/configs_test.py +1 -1
  247. official/modeling/privacy/ops.py +1 -1
  248. official/modeling/privacy/ops_test.py +1 -1
  249. official/modeling/tf_utils.py +1 -1
  250. official/modeling/tf_utils_test.py +1 -1
  251. official/nlp/__init__.py +1 -1
  252. official/nlp/configs/__init__.py +1 -1
  253. official/nlp/configs/bert.py +1 -1
  254. official/nlp/configs/electra.py +1 -1
  255. official/nlp/configs/encoders.py +1 -1
  256. official/nlp/configs/encoders_test.py +1 -1
  257. official/nlp/configs/experiment_configs.py +1 -1
  258. official/nlp/configs/finetuning_experiments.py +1 -1
  259. official/nlp/configs/pretraining_experiments.py +1 -1
  260. official/nlp/configs/wmt_transformer_experiments.py +1 -1
  261. official/nlp/continuous_finetune_lib.py +1 -1
  262. official/nlp/continuous_finetune_lib_test.py +1 -1
  263. official/nlp/data/__init__.py +1 -1
  264. official/nlp/data/classifier_data_lib.py +1 -1
  265. official/nlp/data/classifier_data_lib_test.py +1 -1
  266. official/nlp/data/create_finetuning_data.py +1 -2
  267. official/nlp/data/create_pretraining_data.py +1 -3
  268. official/nlp/data/create_pretraining_data_test.py +1 -1
  269. official/nlp/data/create_xlnet_pretraining_data.py +1 -3
  270. official/nlp/data/create_xlnet_pretraining_data_test.py +1 -1
  271. official/nlp/data/data_loader.py +1 -1
  272. official/nlp/data/data_loader_factory.py +1 -1
  273. official/nlp/data/data_loader_factory_test.py +1 -1
  274. official/nlp/data/dual_encoder_dataloader.py +1 -1
  275. official/nlp/data/dual_encoder_dataloader_test.py +1 -1
  276. official/nlp/data/pretrain_dataloader.py +1 -1
  277. official/nlp/data/pretrain_dataloader_test.py +1 -1
  278. official/nlp/data/pretrain_dynamic_dataloader.py +1 -1
  279. official/nlp/data/pretrain_dynamic_dataloader_test.py +1 -1
  280. official/nlp/data/pretrain_text_dataloader.py +1 -1
  281. official/nlp/data/question_answering_dataloader.py +1 -1
  282. official/nlp/data/question_answering_dataloader_test.py +1 -1
  283. official/nlp/data/sentence_prediction_dataloader.py +1 -1
  284. official/nlp/data/sentence_prediction_dataloader_test.py +1 -1
  285. official/nlp/data/sentence_retrieval_lib.py +1 -1
  286. official/nlp/data/squad_lib.py +1 -1
  287. official/nlp/data/squad_lib_sp.py +1 -1
  288. official/nlp/data/tagging_data_lib.py +1 -1
  289. official/nlp/data/tagging_data_lib_test.py +1 -1
  290. official/nlp/data/tagging_dataloader.py +1 -1
  291. official/nlp/data/tagging_dataloader_test.py +1 -1
  292. official/nlp/data/train_sentencepiece.py +1 -1
  293. official/nlp/data/wmt_dataloader.py +1 -1
  294. official/nlp/data/wmt_dataloader_test.py +1 -1
  295. official/nlp/metrics/__init__.py +1 -1
  296. official/nlp/metrics/bleu.py +1 -1
  297. official/nlp/metrics/bleu_test.py +1 -1
  298. official/nlp/modeling/__init__.py +1 -1
  299. official/nlp/modeling/layers/__init__.py +1 -1
  300. official/nlp/modeling/layers/attention.py +1 -1
  301. official/nlp/modeling/layers/attention_test.py +1 -1
  302. official/nlp/modeling/layers/bigbird_attention.py +1 -1
  303. official/nlp/modeling/layers/bigbird_attention_test.py +1 -1
  304. official/nlp/modeling/layers/block_diag_feedforward.py +1 -1
  305. official/nlp/modeling/layers/block_diag_feedforward_test.py +1 -1
  306. official/nlp/modeling/layers/block_sparse_attention.py +187 -44
  307. official/nlp/modeling/layers/block_sparse_attention_test.py +137 -7
  308. official/nlp/modeling/layers/cls_head.py +1 -1
  309. official/nlp/modeling/layers/cls_head_test.py +1 -1
  310. official/nlp/modeling/layers/factorized_embedding.py +1 -1
  311. official/nlp/modeling/layers/factorized_embedding_test.py +1 -1
  312. official/nlp/modeling/layers/gated_feedforward.py +2 -2
  313. official/nlp/modeling/layers/gated_feedforward_test.py +1 -1
  314. official/nlp/modeling/layers/gaussian_process.py +1 -1
  315. official/nlp/modeling/layers/gaussian_process_test.py +1 -1
  316. official/nlp/modeling/layers/kernel_attention.py +1 -1
  317. official/nlp/modeling/layers/kernel_attention_test.py +1 -1
  318. official/nlp/modeling/layers/masked_lm.py +1 -1
  319. official/nlp/modeling/layers/masked_lm_test.py +1 -1
  320. official/nlp/modeling/layers/masked_softmax.py +1 -1
  321. official/nlp/modeling/layers/masked_softmax_test.py +1 -1
  322. official/nlp/modeling/layers/mat_mul_with_margin.py +1 -2
  323. official/nlp/modeling/layers/mat_mul_with_margin_test.py +1 -1
  324. official/nlp/modeling/layers/mixing.py +1 -1
  325. official/nlp/modeling/layers/mixing_test.py +1 -1
  326. official/nlp/modeling/layers/mobile_bert_layers.py +1 -1
  327. official/nlp/modeling/layers/mobile_bert_layers_test.py +1 -1
  328. official/nlp/modeling/layers/moe.py +1 -1
  329. official/nlp/modeling/layers/moe_test.py +1 -1
  330. official/nlp/modeling/layers/multi_channel_attention.py +1 -1
  331. official/nlp/modeling/layers/multi_channel_attention_test.py +1 -1
  332. official/nlp/modeling/layers/multi_query_attention.py +222 -4
  333. official/nlp/modeling/layers/multi_query_attention_test.py +201 -1
  334. official/nlp/modeling/layers/on_device_embedding.py +1 -1
  335. official/nlp/modeling/layers/on_device_embedding_test.py +1 -1
  336. official/nlp/modeling/layers/pack_optimization.py +1 -1
  337. official/nlp/modeling/layers/pack_optimization_test.py +1 -1
  338. official/nlp/modeling/layers/per_dim_scale_attention.py +1 -1
  339. official/nlp/modeling/layers/per_dim_scale_attention_test.py +1 -1
  340. official/nlp/modeling/layers/position_embedding.py +1 -1
  341. official/nlp/modeling/layers/position_embedding_test.py +1 -1
  342. official/nlp/modeling/layers/relative_attention.py +1 -1
  343. official/nlp/modeling/layers/relative_attention_test.py +1 -1
  344. official/nlp/modeling/layers/reuse_attention.py +1 -1
  345. official/nlp/modeling/layers/reuse_attention_test.py +1 -1
  346. official/nlp/modeling/layers/reuse_transformer.py +1 -1
  347. official/nlp/modeling/layers/reuse_transformer_test.py +1 -1
  348. official/nlp/modeling/layers/rezero_transformer.py +20 -1
  349. official/nlp/modeling/layers/rezero_transformer_test.py +1 -1
  350. official/nlp/modeling/layers/routing.py +1 -1
  351. official/nlp/modeling/layers/routing_test.py +1 -1
  352. official/nlp/modeling/layers/self_attention_mask.py +1 -1
  353. official/nlp/modeling/layers/spectral_normalization.py +1 -1
  354. official/nlp/modeling/layers/spectral_normalization_test.py +1 -1
  355. official/nlp/modeling/layers/talking_heads_attention.py +1 -1
  356. official/nlp/modeling/layers/talking_heads_attention_test.py +1 -1
  357. official/nlp/modeling/layers/text_layers.py +1 -1
  358. official/nlp/modeling/layers/text_layers_test.py +1 -1
  359. official/nlp/modeling/layers/tn_expand_condense.py +1 -1
  360. official/nlp/modeling/layers/tn_expand_condense_test.py +1 -1
  361. official/nlp/modeling/layers/tn_transformer_expand_condense.py +1 -3
  362. official/nlp/modeling/layers/tn_transformer_test.py +1 -1
  363. official/nlp/modeling/layers/transformer.py +1 -1
  364. official/nlp/modeling/layers/transformer_encoder_block.py +273 -52
  365. official/nlp/modeling/layers/transformer_encoder_block_test.py +215 -11
  366. official/nlp/modeling/layers/transformer_scaffold.py +1 -1
  367. official/nlp/modeling/layers/transformer_scaffold_test.py +1 -1
  368. official/nlp/modeling/layers/transformer_test.py +1 -1
  369. official/nlp/modeling/layers/transformer_xl.py +1 -1
  370. official/nlp/modeling/layers/transformer_xl_test.py +1 -1
  371. official/nlp/modeling/layers/util.py +1 -1
  372. official/nlp/modeling/losses/__init__.py +1 -1
  373. official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy.py +1 -1
  374. official/nlp/modeling/losses/weighted_sparse_categorical_crossentropy_test.py +1 -1
  375. official/nlp/modeling/models/__init__.py +1 -1
  376. official/nlp/modeling/models/bert_classifier.py +1 -1
  377. official/nlp/modeling/models/bert_classifier_test.py +1 -1
  378. official/nlp/modeling/models/bert_pretrainer.py +1 -1
  379. official/nlp/modeling/models/bert_pretrainer_test.py +1 -1
  380. official/nlp/modeling/models/bert_span_labeler.py +1 -1
  381. official/nlp/modeling/models/bert_span_labeler_test.py +1 -1
  382. official/nlp/modeling/models/bert_token_classifier.py +1 -1
  383. official/nlp/modeling/models/bert_token_classifier_test.py +1 -1
  384. official/nlp/modeling/models/dual_encoder.py +1 -1
  385. official/nlp/modeling/models/dual_encoder_test.py +1 -1
  386. official/nlp/modeling/models/electra_pretrainer.py +1 -1
  387. official/nlp/modeling/models/electra_pretrainer_test.py +1 -1
  388. official/nlp/modeling/models/seq2seq_transformer.py +1 -1
  389. official/nlp/modeling/models/seq2seq_transformer_test.py +1 -1
  390. official/nlp/modeling/models/t5.py +1 -1
  391. official/nlp/modeling/models/t5_test.py +1 -1
  392. official/nlp/modeling/models/xlnet.py +1 -1
  393. official/nlp/modeling/models/xlnet_test.py +1 -1
  394. official/nlp/modeling/networks/__init__.py +1 -1
  395. official/nlp/modeling/networks/albert_encoder.py +1 -1
  396. official/nlp/modeling/networks/albert_encoder_test.py +1 -1
  397. official/nlp/modeling/networks/bert_dense_encoder_test.py +1 -2
  398. official/nlp/modeling/networks/bert_encoder.py +1 -1
  399. official/nlp/modeling/networks/bert_encoder_test.py +1 -2
  400. official/nlp/modeling/networks/classification.py +1 -1
  401. official/nlp/modeling/networks/classification_test.py +1 -1
  402. official/nlp/modeling/networks/encoder_scaffold.py +1 -1
  403. official/nlp/modeling/networks/encoder_scaffold_test.py +1 -1
  404. official/nlp/modeling/networks/fnet.py +1 -1
  405. official/nlp/modeling/networks/fnet_test.py +1 -1
  406. official/nlp/modeling/networks/funnel_transformer.py +1 -1
  407. official/nlp/modeling/networks/funnel_transformer_test.py +1 -1
  408. official/nlp/modeling/networks/mobile_bert_encoder.py +6 -4
  409. official/nlp/modeling/networks/mobile_bert_encoder_test.py +1 -1
  410. official/nlp/modeling/networks/packed_sequence_embedding.py +1 -1
  411. official/nlp/modeling/networks/packed_sequence_embedding_test.py +1 -3
  412. official/nlp/modeling/networks/span_labeling.py +1 -1
  413. official/nlp/modeling/networks/span_labeling_test.py +1 -1
  414. official/nlp/modeling/networks/sparse_mixer.py +1 -1
  415. official/nlp/modeling/networks/sparse_mixer_test.py +1 -1
  416. official/nlp/modeling/networks/xlnet_base.py +1 -1
  417. official/nlp/modeling/networks/xlnet_base_test.py +1 -1
  418. official/nlp/modeling/ops/__init__.py +1 -1
  419. official/nlp/modeling/ops/beam_search.py +1 -1
  420. official/nlp/modeling/ops/beam_search_test.py +1 -1
  421. official/nlp/modeling/ops/decoding_module.py +1 -1
  422. official/nlp/modeling/ops/decoding_module_test.py +1 -1
  423. official/nlp/modeling/ops/sampling_module.py +3 -3
  424. official/nlp/modeling/ops/segment_extractor.py +1 -1
  425. official/nlp/modeling/ops/segment_extractor_test.py +1 -1
  426. official/nlp/optimization.py +1 -1
  427. official/nlp/serving/__init__.py +1 -1
  428. official/nlp/serving/export_savedmodel.py +1 -1
  429. official/nlp/serving/export_savedmodel_test.py +1 -1
  430. official/nlp/serving/export_savedmodel_util.py +1 -1
  431. official/nlp/serving/serving_modules.py +1 -1
  432. official/nlp/serving/serving_modules_test.py +1 -1
  433. official/nlp/tasks/__init__.py +1 -1
  434. official/nlp/tasks/dual_encoder.py +1 -2
  435. official/nlp/tasks/dual_encoder_test.py +1 -1
  436. official/nlp/tasks/electra_task.py +1 -1
  437. official/nlp/tasks/electra_task_test.py +1 -1
  438. official/nlp/tasks/masked_lm.py +1 -1
  439. official/nlp/tasks/masked_lm_determinism_test.py +1 -1
  440. official/nlp/tasks/masked_lm_test.py +1 -1
  441. official/nlp/tasks/question_answering.py +1 -1
  442. official/nlp/tasks/question_answering_test.py +1 -1
  443. official/nlp/tasks/sentence_prediction.py +1 -1
  444. official/nlp/tasks/sentence_prediction_test.py +1 -1
  445. official/nlp/tasks/tagging.py +1 -1
  446. official/nlp/tasks/tagging_test.py +1 -1
  447. official/nlp/tasks/translation.py +1 -1
  448. official/nlp/tasks/translation_test.py +1 -1
  449. official/nlp/tasks/utils.py +1 -1
  450. official/nlp/tools/__init__.py +1 -1
  451. official/nlp/tools/export_tfhub.py +1 -1
  452. official/nlp/tools/export_tfhub_lib.py +1 -2
  453. official/nlp/tools/export_tfhub_lib_test.py +1 -1
  454. official/nlp/tools/squad_evaluate_v1_1.py +1 -1
  455. official/nlp/tools/squad_evaluate_v2_0.py +1 -1
  456. official/nlp/tools/tf1_bert_checkpoint_converter_lib.py +1 -1
  457. official/nlp/tools/tf2_albert_encoder_checkpoint_converter.py +1 -1
  458. official/nlp/tools/tf2_bert_encoder_checkpoint_converter.py +1 -1
  459. official/nlp/tools/tokenization.py +1 -1
  460. official/nlp/tools/tokenization_test.py +1 -1
  461. official/nlp/train.py +1 -1
  462. official/projects/__init__.py +1 -1
  463. official/projects/bigbird/__init__.py +1 -1
  464. official/projects/bigbird/encoder.py +1 -1
  465. official/projects/bigbird/encoder_test.py +1 -1
  466. official/projects/bigbird/experiment_configs.py +1 -1
  467. official/projects/bigbird/recompute_grad.py +1 -1
  468. official/projects/bigbird/recomputing_dropout.py +1 -1
  469. official/projects/bigbird/stateless_dropout.py +1 -1
  470. official/projects/centernet/__init__.py +1 -1
  471. official/projects/centernet/common/__init__.py +1 -1
  472. official/projects/centernet/common/registry_imports.py +1 -1
  473. official/projects/centernet/configs/__init__.py +1 -1
  474. official/projects/centernet/configs/backbones.py +1 -1
  475. official/projects/centernet/configs/centernet.py +1 -1
  476. official/projects/centernet/configs/centernet_test.py +1 -1
  477. official/projects/centernet/dataloaders/__init__.py +1 -1
  478. official/projects/centernet/dataloaders/centernet_input.py +1 -1
  479. official/projects/centernet/losses/__init__.py +1 -1
  480. official/projects/centernet/losses/centernet_losses.py +1 -1
  481. official/projects/centernet/losses/centernet_losses_test.py +1 -1
  482. official/projects/centernet/modeling/__init__.py +1 -1
  483. official/projects/centernet/modeling/backbones/__init__.py +1 -1
  484. official/projects/centernet/modeling/backbones/hourglass.py +1 -1
  485. official/projects/centernet/modeling/backbones/hourglass_test.py +1 -1
  486. official/projects/centernet/modeling/centernet_model.py +2 -2
  487. official/projects/centernet/modeling/centernet_model_test.py +1 -1
  488. official/projects/centernet/modeling/heads/__init__.py +1 -1
  489. official/projects/centernet/modeling/heads/centernet_head.py +2 -2
  490. official/projects/centernet/modeling/heads/centernet_head_test.py +1 -1
  491. official/projects/centernet/modeling/layers/__init__.py +1 -1
  492. official/projects/centernet/modeling/layers/cn_nn_blocks.py +1 -1
  493. official/projects/centernet/modeling/layers/cn_nn_blocks_test.py +1 -1
  494. official/projects/centernet/modeling/layers/detection_generator.py +1 -1
  495. official/projects/centernet/modeling/layers/detection_generator_test.py +1 -1
  496. official/projects/centernet/ops/__init__.py +1 -1
  497. official/projects/centernet/ops/box_list.py +1 -1
  498. official/projects/centernet/ops/box_list_ops.py +1 -1
  499. official/projects/centernet/ops/loss_ops.py +1 -1
  500. official/projects/centernet/ops/nms_ops.py +1 -1
  501. official/projects/centernet/ops/preprocess_ops.py +1 -1
  502. official/projects/centernet/ops/target_assigner.py +1 -1
  503. official/projects/centernet/ops/target_assigner_test.py +1 -1
  504. official/projects/centernet/tasks/__init__.py +1 -1
  505. official/projects/centernet/tasks/centernet.py +1 -1
  506. official/projects/centernet/train.py +1 -1
  507. official/projects/centernet/utils/__init__.py +1 -1
  508. official/projects/centernet/utils/checkpoints/__init__.py +1 -1
  509. official/projects/centernet/utils/checkpoints/config_classes.py +1 -1
  510. official/projects/centernet/utils/checkpoints/config_data.py +1 -1
  511. official/projects/centernet/utils/checkpoints/load_weights.py +1 -1
  512. official/projects/centernet/utils/checkpoints/read_checkpoints.py +1 -1
  513. official/projects/centernet/utils/tf2_centernet_checkpoint_converter.py +1 -1
  514. official/projects/deepmac_maskrcnn/__init__.py +1 -1
  515. official/projects/deepmac_maskrcnn/common/__init__.py +1 -1
  516. official/projects/deepmac_maskrcnn/common/registry_imports.py +1 -1
  517. official/projects/deepmac_maskrcnn/configs/__init__.py +1 -1
  518. official/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn.py +1 -1
  519. official/projects/deepmac_maskrcnn/configs/deep_mask_head_rcnn_config_test.py +1 -1
  520. official/projects/deepmac_maskrcnn/modeling/__init__.py +1 -1
  521. official/projects/deepmac_maskrcnn/modeling/heads/__init__.py +1 -1
  522. official/projects/deepmac_maskrcnn/modeling/heads/hourglass_network.py +1 -1
  523. official/projects/deepmac_maskrcnn/modeling/heads/instance_heads.py +1 -3
  524. official/projects/deepmac_maskrcnn/modeling/heads/instance_heads_test.py +1 -2
  525. official/projects/deepmac_maskrcnn/modeling/maskrcnn_model.py +1 -3
  526. official/projects/deepmac_maskrcnn/modeling/maskrcnn_model_test.py +1 -3
  527. official/projects/deepmac_maskrcnn/serving/__init__.py +1 -1
  528. official/projects/deepmac_maskrcnn/serving/detection.py +1 -1
  529. official/projects/deepmac_maskrcnn/serving/detection_test.py +1 -1
  530. official/projects/deepmac_maskrcnn/serving/export_saved_model.py +1 -1
  531. official/projects/deepmac_maskrcnn/tasks/__init__.py +1 -1
  532. official/projects/deepmac_maskrcnn/tasks/deep_mask_head_rcnn.py +1 -1
  533. official/projects/deepmac_maskrcnn/train.py +1 -1
  534. official/projects/detr/__init__.py +14 -0
  535. official/projects/detr/configs/__init__.py +14 -0
  536. official/projects/detr/configs/detr.py +277 -0
  537. official/projects/detr/configs/detr_test.py +51 -0
  538. official/projects/detr/dataloaders/__init__.py +14 -0
  539. official/projects/detr/dataloaders/coco.py +157 -0
  540. official/projects/detr/dataloaders/coco_test.py +111 -0
  541. official/projects/detr/dataloaders/detr_input.py +175 -0
  542. official/projects/detr/experiments/__init__.py +14 -0
  543. official/projects/detr/modeling/__init__.py +14 -0
  544. official/projects/detr/modeling/detr.py +345 -0
  545. official/projects/detr/modeling/detr_test.py +70 -0
  546. official/projects/detr/modeling/transformer.py +849 -0
  547. official/projects/detr/modeling/transformer_test.py +263 -0
  548. official/projects/detr/ops/__init__.py +14 -0
  549. official/projects/detr/ops/matchers.py +489 -0
  550. official/projects/detr/ops/matchers_test.py +95 -0
  551. official/projects/detr/optimization.py +151 -0
  552. official/projects/detr/serving/__init__.py +14 -0
  553. official/projects/detr/serving/export_module.py +103 -0
  554. official/projects/detr/serving/export_module_test.py +98 -0
  555. official/projects/detr/serving/export_saved_model.py +109 -0
  556. official/projects/detr/tasks/__init__.py +14 -0
  557. official/projects/detr/tasks/detection.py +433 -0
  558. official/projects/detr/tasks/detection_test.py +203 -0
  559. official/projects/detr/train.py +70 -0
  560. official/projects/maskconver/__init__.py +14 -0
  561. official/projects/maskconver/configs/__init__.py +14 -0
  562. official/projects/maskconver/configs/backbones.py +43 -0
  563. official/projects/maskconver/configs/decoders.py +36 -0
  564. official/projects/maskconver/configs/maskconver.py +523 -0
  565. official/projects/maskconver/configs/multiscale_maskconver.py +215 -0
  566. official/projects/maskconver/tasks/__init__.py +14 -0
  567. official/projects/maskconver/tasks/maskconver.py +641 -0
  568. official/projects/maskconver/tasks/multiscale_maskconver.py +278 -0
  569. official/projects/maskconver/train.py +30 -0
  570. official/projects/maxvit/__init__.py +1 -1
  571. official/projects/maxvit/configs/__init__.py +1 -1
  572. official/projects/maxvit/configs/backbones.py +1 -1
  573. official/projects/maxvit/configs/image_classification.py +1 -1
  574. official/projects/maxvit/configs/image_classification_test.py +1 -1
  575. official/projects/maxvit/configs/rcnn.py +1 -1
  576. official/projects/maxvit/configs/rcnn_test.py +1 -1
  577. official/projects/maxvit/configs/retinanet.py +1 -1
  578. official/projects/maxvit/configs/retinanet_test.py +1 -1
  579. official/projects/maxvit/configs/semantic_segmentation.py +1 -1
  580. official/projects/maxvit/configs/semantic_segmentation_test.py +1 -1
  581. official/projects/maxvit/modeling/__init__.py +1 -1
  582. official/projects/maxvit/modeling/common_ops.py +14 -1
  583. official/projects/maxvit/modeling/layers.py +1 -1
  584. official/projects/maxvit/modeling/maxvit.py +2 -2
  585. official/projects/maxvit/modeling/maxvit_test.py +1 -1
  586. official/projects/maxvit/registry_imports.py +1 -1
  587. official/projects/maxvit/train.py +1 -1
  588. official/projects/maxvit/train_test.py +1 -1
  589. official/projects/mobilebert/__init__.py +1 -1
  590. official/projects/mobilebert/distillation.py +1 -1
  591. official/projects/mobilebert/distillation_test.py +1 -1
  592. official/projects/mobilebert/export_tfhub.py +1 -1
  593. official/projects/mobilebert/model_utils.py +1 -1
  594. official/projects/mobilebert/run_distillation.py +1 -1
  595. official/projects/mobilebert/tf2_model_checkpoint_converter.py +1 -1
  596. official/projects/mobilebert/utils.py +1 -1
  597. official/projects/movinet/__init__.py +1 -1
  598. official/projects/movinet/configs/__init__.py +1 -1
  599. official/projects/movinet/configs/movinet.py +1 -1
  600. official/projects/movinet/configs/movinet_test.py +1 -1
  601. official/projects/movinet/modeling/__init__.py +1 -1
  602. official/projects/movinet/modeling/movinet.py +1 -1
  603. official/projects/movinet/modeling/movinet_layers.py +1 -1
  604. official/projects/movinet/modeling/movinet_layers_test.py +1 -1
  605. official/projects/movinet/modeling/movinet_model.py +1 -1
  606. official/projects/movinet/modeling/movinet_model_test.py +1 -1
  607. official/projects/movinet/modeling/movinet_test.py +1 -1
  608. official/projects/movinet/tools/__init__.py +1 -1
  609. official/projects/movinet/tools/convert_3d_2plus1d.py +1 -1
  610. official/projects/movinet/tools/convert_3d_2plus1d_test.py +1 -1
  611. official/projects/movinet/tools/export_saved_model.py +1 -1
  612. official/projects/movinet/tools/export_saved_model_test.py +6 -3
  613. official/projects/movinet/tools/quantize_movinet.py +1 -1
  614. official/projects/movinet/train.py +1 -1
  615. official/projects/movinet/train_test.py +1 -1
  616. official/projects/nhnet/__init__.py +1 -1
  617. official/projects/nhnet/configs.py +1 -1
  618. official/projects/nhnet/configs_test.py +1 -1
  619. official/projects/nhnet/decoder.py +1 -1
  620. official/projects/nhnet/decoder_test.py +1 -1
  621. official/projects/nhnet/evaluation.py +1 -3
  622. official/projects/nhnet/input_pipeline.py +1 -1
  623. official/projects/nhnet/models.py +1 -1
  624. official/projects/nhnet/models_test.py +1 -1
  625. official/projects/nhnet/optimizer.py +1 -1
  626. official/projects/nhnet/raw_data_process.py +1 -1
  627. official/projects/nhnet/raw_data_processor.py +1 -1
  628. official/projects/nhnet/trainer.py +1 -6
  629. official/projects/nhnet/trainer_test.py +1 -1
  630. official/projects/nhnet/utils.py +1 -1
  631. official/projects/panoptic/__init__.py +1 -1
  632. official/projects/panoptic/configs/__init__.py +1 -1
  633. official/projects/panoptic/configs/panoptic_deeplab.py +5 -6
  634. official/projects/panoptic/configs/panoptic_maskrcnn.py +1 -1
  635. official/projects/panoptic/tasks/__init__.py +1 -1
  636. official/projects/panoptic/tasks/panoptic_deeplab.py +1 -1
  637. official/projects/panoptic/tasks/panoptic_maskrcnn.py +3 -1
  638. official/projects/panoptic/train.py +1 -1
  639. official/projects/qat/__init__.py +1 -1
  640. official/projects/qat/nlp/__init__.py +1 -1
  641. official/projects/qat/nlp/configs/__init__.py +1 -1
  642. official/projects/qat/nlp/configs/finetuning_experiments.py +1 -1
  643. official/projects/qat/nlp/modeling/__init__.py +1 -1
  644. official/projects/qat/nlp/modeling/layers/__init__.py +1 -1
  645. official/projects/qat/nlp/modeling/layers/mobile_bert_layers.py +1 -1
  646. official/projects/qat/nlp/modeling/layers/multi_head_attention.py +1 -1
  647. official/projects/qat/nlp/modeling/layers/transformer_encoder_block.py +1 -1
  648. official/projects/qat/nlp/modeling/layers/transformer_encoder_block_test.py +1 -1
  649. official/projects/qat/nlp/modeling/models/__init__.py +1 -1
  650. official/projects/qat/nlp/modeling/models/bert_span_labeler.py +1 -1
  651. official/projects/qat/nlp/modeling/networks/__init__.py +1 -1
  652. official/projects/qat/nlp/modeling/networks/span_labeling.py +1 -1
  653. official/projects/qat/nlp/pretrained_checkpoint_converter.py +1 -3
  654. official/projects/qat/nlp/quantization/__init__.py +1 -1
  655. official/projects/qat/nlp/quantization/configs.py +1 -1
  656. official/projects/qat/nlp/quantization/configs_test.py +1 -2
  657. official/projects/qat/nlp/quantization/helper.py +1 -1
  658. official/projects/qat/nlp/quantization/schemes.py +1 -3
  659. official/projects/qat/nlp/quantization/wrappers.py +1 -1
  660. official/projects/qat/nlp/registry_imports.py +1 -1
  661. official/projects/qat/nlp/tasks/__init__.py +1 -1
  662. official/projects/qat/nlp/tasks/question_answering.py +1 -1
  663. official/projects/qat/nlp/tasks/question_answering_test.py +1 -1
  664. official/projects/qat/nlp/train.py +1 -1
  665. official/projects/qat/vision/__init__.py +1 -1
  666. official/projects/qat/vision/configs/__init__.py +1 -1
  667. official/projects/qat/vision/configs/common.py +1 -1
  668. official/projects/qat/vision/configs/image_classification.py +1 -1
  669. official/projects/qat/vision/configs/image_classification_test.py +1 -1
  670. official/projects/qat/vision/configs/retinanet.py +1 -1
  671. official/projects/qat/vision/configs/retinanet_test.py +1 -1
  672. official/projects/qat/vision/configs/semantic_segmentation.py +1 -1
  673. official/projects/qat/vision/configs/semantic_segmentation_test.py +1 -1
  674. official/projects/qat/vision/modeling/__init__.py +1 -1
  675. official/projects/qat/vision/modeling/factory.py +1 -3
  676. official/projects/qat/vision/modeling/factory_test.py +1 -3
  677. official/projects/qat/vision/modeling/heads/__init__.py +1 -1
  678. official/projects/qat/vision/modeling/heads/dense_prediction_heads.py +1 -3
  679. official/projects/qat/vision/modeling/heads/dense_prediction_heads_test.py +1 -2
  680. official/projects/qat/vision/modeling/layers/__init__.py +1 -1
  681. official/projects/qat/vision/modeling/layers/nn_blocks.py +1 -3
  682. official/projects/qat/vision/modeling/layers/nn_blocks_test.py +1 -2
  683. official/projects/qat/vision/modeling/layers/nn_layers.py +2 -2
  684. official/projects/qat/vision/modeling/layers/nn_layers_test.py +1 -2
  685. official/projects/qat/vision/modeling/segmentation_model.py +1 -2
  686. official/projects/qat/vision/n_bit/__init__.py +1 -1
  687. official/projects/qat/vision/n_bit/configs.py +1 -1
  688. official/projects/qat/vision/n_bit/configs_test.py +1 -3
  689. official/projects/qat/vision/n_bit/nn_blocks.py +1 -3
  690. official/projects/qat/vision/n_bit/nn_blocks_test.py +1 -2
  691. official/projects/qat/vision/n_bit/nn_layers.py +1 -1
  692. official/projects/qat/vision/n_bit/schemes.py +1 -3
  693. official/projects/qat/vision/quantization/__init__.py +1 -1
  694. official/projects/qat/vision/quantization/configs.py +1 -1
  695. official/projects/qat/vision/quantization/configs_test.py +1 -3
  696. official/projects/qat/vision/quantization/helper.py +1 -1
  697. official/projects/qat/vision/quantization/helper_test.py +1 -1
  698. official/projects/qat/vision/quantization/layer_transforms.py +1 -1
  699. official/projects/qat/vision/quantization/schemes.py +1 -3
  700. official/projects/qat/vision/registry_imports.py +1 -1
  701. official/projects/qat/vision/serving/__init__.py +1 -1
  702. official/projects/qat/vision/serving/export_module.py +1 -1
  703. official/projects/qat/vision/serving/export_saved_model.py +1 -1
  704. official/projects/qat/vision/serving/export_tflite.py +1 -1
  705. official/projects/qat/vision/tasks/__init__.py +1 -1
  706. official/projects/qat/vision/tasks/image_classification.py +1 -1
  707. official/projects/qat/vision/tasks/image_classification_test.py +1 -1
  708. official/projects/qat/vision/tasks/retinanet.py +1 -1
  709. official/projects/qat/vision/tasks/retinanet_test.py +1 -1
  710. official/projects/qat/vision/tasks/semantic_segmentation.py +1 -1
  711. official/projects/qat/vision/train.py +1 -1
  712. official/projects/roformer/__init__.py +1 -1
  713. official/projects/roformer/roformer.py +1 -1
  714. official/projects/roformer/roformer_attention.py +1 -1
  715. official/projects/roformer/roformer_attention_test.py +1 -1
  716. official/projects/roformer/roformer_encoder.py +1 -1
  717. official/projects/roformer/roformer_encoder_block.py +1 -1
  718. official/projects/roformer/roformer_encoder_block_test.py +1 -1
  719. official/projects/roformer/roformer_encoder_test.py +1 -1
  720. official/projects/roformer/roformer_experiments.py +1 -1
  721. official/projects/roformer/train.py +1 -1
  722. official/projects/teams/__init__.py +1 -1
  723. official/projects/teams/teams.py +1 -1
  724. official/projects/teams/teams_experiments.py +1 -1
  725. official/projects/teams/teams_pretrainer.py +1 -1
  726. official/projects/teams/teams_pretrainer_test.py +1 -1
  727. official/projects/teams/teams_task.py +1 -1
  728. official/projects/teams/teams_task_test.py +1 -1
  729. official/projects/teams/train.py +1 -1
  730. official/projects/triviaqa/__init__.py +1 -1
  731. official/projects/triviaqa/dataset.py +1 -1
  732. official/projects/triviaqa/download_and_prepare.py +1 -1
  733. official/projects/triviaqa/evaluate.py +1 -1
  734. official/projects/triviaqa/evaluation.py +1 -1
  735. official/projects/triviaqa/inputs.py +1 -1
  736. official/projects/triviaqa/modeling.py +1 -1
  737. official/projects/triviaqa/predict.py +1 -1
  738. official/projects/triviaqa/prediction.py +1 -1
  739. official/projects/triviaqa/preprocess.py +1 -1
  740. official/projects/triviaqa/sentencepiece_pb2.py +1 -1
  741. official/projects/triviaqa/train.py +1 -1
  742. official/projects/video_ssl/__init__.py +1 -1
  743. official/projects/video_ssl/configs/__init__.py +1 -1
  744. official/projects/video_ssl/configs/video_ssl.py +1 -1
  745. official/projects/video_ssl/configs/video_ssl_test.py +1 -1
  746. official/projects/video_ssl/dataloaders/__init__.py +1 -1
  747. official/projects/video_ssl/dataloaders/video_ssl_input.py +1 -1
  748. official/projects/video_ssl/dataloaders/video_ssl_input_test.py +1 -2
  749. official/projects/video_ssl/losses/__init__.py +1 -1
  750. official/projects/video_ssl/losses/losses.py +1 -2
  751. official/projects/video_ssl/modeling/__init__.py +1 -1
  752. official/projects/video_ssl/modeling/video_ssl_model.py +1 -3
  753. official/projects/video_ssl/ops/__init__.py +1 -1
  754. official/projects/video_ssl/ops/video_ssl_preprocess_ops.py +1 -1
  755. official/projects/video_ssl/ops/video_ssl_preprocess_ops_test.py +1 -1
  756. official/projects/video_ssl/tasks/__init__.py +1 -1
  757. official/projects/video_ssl/tasks/linear_eval.py +1 -1
  758. official/projects/video_ssl/tasks/pretrain.py +1 -1
  759. official/projects/video_ssl/tasks/pretrain_test.py +1 -1
  760. official/projects/video_ssl/train.py +1 -1
  761. official/projects/volumetric_models/__init__.py +1 -1
  762. official/projects/volumetric_models/configs/__init__.py +1 -1
  763. official/projects/volumetric_models/configs/backbones.py +1 -1
  764. official/projects/volumetric_models/configs/decoders.py +1 -1
  765. official/projects/volumetric_models/configs/semantic_segmentation_3d.py +1 -1
  766. official/projects/volumetric_models/configs/semantic_segmentation_3d_test.py +1 -1
  767. official/projects/volumetric_models/dataloaders/__init__.py +1 -1
  768. official/projects/volumetric_models/dataloaders/segmentation_input_3d.py +1 -1
  769. official/projects/volumetric_models/dataloaders/segmentation_input_3d_test.py +1 -1
  770. official/projects/volumetric_models/evaluation/__init__.py +1 -1
  771. official/projects/volumetric_models/evaluation/segmentation_metrics.py +1 -1
  772. official/projects/volumetric_models/evaluation/segmentation_metrics_test.py +1 -1
  773. official/projects/volumetric_models/losses/__init__.py +1 -1
  774. official/projects/volumetric_models/losses/segmentation_losses.py +1 -1
  775. official/projects/volumetric_models/losses/segmentation_losses_test.py +1 -1
  776. official/projects/volumetric_models/modeling/__init__.py +1 -1
  777. official/projects/volumetric_models/modeling/backbones/__init__.py +1 -1
  778. official/projects/volumetric_models/modeling/backbones/unet_3d.py +1 -2
  779. official/projects/volumetric_models/modeling/backbones/unet_3d_test.py +1 -2
  780. official/projects/volumetric_models/modeling/decoders/__init__.py +1 -1
  781. official/projects/volumetric_models/modeling/decoders/factory.py +1 -3
  782. official/projects/volumetric_models/modeling/decoders/factory_test.py +1 -1
  783. official/projects/volumetric_models/modeling/decoders/unet_3d_decoder.py +1 -1
  784. official/projects/volumetric_models/modeling/decoders/unet_3d_decoder_test.py +1 -2
  785. official/projects/volumetric_models/modeling/factory.py +1 -3
  786. official/projects/volumetric_models/modeling/factory_test.py +1 -1
  787. official/projects/volumetric_models/modeling/heads/__init__.py +1 -1
  788. official/projects/volumetric_models/modeling/heads/segmentation_heads_3d.py +1 -1
  789. official/projects/volumetric_models/modeling/heads/segmentation_heads_3d_test.py +1 -1
  790. official/projects/volumetric_models/modeling/nn_blocks_3d.py +2 -3
  791. official/projects/volumetric_models/modeling/nn_blocks_3d_test.py +1 -2
  792. official/projects/volumetric_models/modeling/segmentation_model_test.py +1 -1
  793. official/projects/volumetric_models/registry_imports.py +1 -1
  794. official/projects/volumetric_models/serving/__init__.py +1 -1
  795. official/projects/volumetric_models/serving/export_saved_model.py +1 -1
  796. official/projects/volumetric_models/serving/semantic_segmentation_3d.py +1 -1
  797. official/projects/volumetric_models/serving/semantic_segmentation_3d_test.py +3 -3
  798. official/projects/volumetric_models/tasks/__init__.py +1 -1
  799. official/projects/volumetric_models/tasks/semantic_segmentation_3d.py +1 -1
  800. official/projects/volumetric_models/tasks/semantic_segmentation_3d_test.py +1 -1
  801. official/projects/volumetric_models/train.py +1 -1
  802. official/projects/volumetric_models/train_test.py +1 -1
  803. official/projects/waste_identification_ml/__init__.py +1 -1
  804. official/projects/waste_identification_ml/data_generation/__init__.py +1 -1
  805. official/projects/waste_identification_ml/data_generation/utils.py +1 -1
  806. official/projects/waste_identification_ml/data_generation/utils_test.py +1 -1
  807. official/projects/yolo/__init__.py +1 -1
  808. official/projects/yolo/common/__init__.py +1 -1
  809. official/projects/yolo/common/registry_imports.py +1 -1
  810. official/projects/yolo/configs/__init__.py +1 -1
  811. official/projects/yolo/configs/backbones.py +1 -1
  812. official/projects/yolo/configs/darknet_classification.py +1 -1
  813. official/projects/yolo/configs/decoders.py +1 -1
  814. official/projects/yolo/configs/yolo.py +1 -1
  815. official/projects/yolo/configs/yolov7.py +17 -1
  816. official/projects/yolo/dataloaders/__init__.py +1 -1
  817. official/projects/yolo/dataloaders/classification_input.py +1 -1
  818. official/projects/yolo/dataloaders/tf_example_decoder.py +1 -1
  819. official/projects/yolo/dataloaders/yolo_input.py +1 -1
  820. official/projects/yolo/losses/__init__.py +1 -1
  821. official/projects/yolo/losses/yolo_loss.py +1 -1
  822. official/projects/yolo/losses/yolo_loss_test.py +1 -1
  823. official/projects/yolo/losses/yolov7_loss.py +1 -1
  824. official/projects/yolo/losses/yolov7_loss_test.py +1 -1
  825. official/projects/yolo/modeling/__init__.py +1 -1
  826. official/projects/yolo/modeling/backbones/__init__.py +1 -1
  827. official/projects/yolo/modeling/backbones/darknet.py +1 -1
  828. official/projects/yolo/modeling/backbones/darknet_test.py +1 -1
  829. official/projects/yolo/modeling/backbones/yolov7.py +69 -1
  830. official/projects/yolo/modeling/backbones/yolov7_test.py +1 -1
  831. official/projects/yolo/modeling/decoders/__init__.py +1 -1
  832. official/projects/yolo/modeling/decoders/yolo_decoder.py +1 -1
  833. official/projects/yolo/modeling/decoders/yolo_decoder_test.py +1 -2
  834. official/projects/yolo/modeling/decoders/yolov7.py +90 -1
  835. official/projects/yolo/modeling/decoders/yolov7_test.py +1 -1
  836. official/projects/yolo/modeling/factory.py +1 -1
  837. official/projects/yolo/modeling/factory_test.py +1 -1
  838. official/projects/yolo/modeling/heads/__init__.py +1 -1
  839. official/projects/yolo/modeling/heads/yolo_head.py +1 -1
  840. official/projects/yolo/modeling/heads/yolo_head_test.py +1 -2
  841. official/projects/yolo/modeling/heads/yolov7_head.py +1 -1
  842. official/projects/yolo/modeling/heads/yolov7_head_test.py +1 -1
  843. official/projects/yolo/modeling/layers/__init__.py +1 -1
  844. official/projects/yolo/modeling/layers/detection_generator.py +1 -1
  845. official/projects/yolo/modeling/layers/detection_generator_test.py +1 -1
  846. official/projects/yolo/modeling/layers/nn_blocks.py +1 -1
  847. official/projects/yolo/modeling/layers/nn_blocks_test.py +1 -1
  848. official/projects/yolo/modeling/yolo_model.py +2 -2
  849. official/projects/yolo/modeling/yolov7_model.py +2 -2
  850. official/projects/yolo/ops/__init__.py +1 -1
  851. official/projects/yolo/ops/anchor.py +1 -1
  852. official/projects/yolo/ops/box_ops.py +1 -1
  853. official/projects/yolo/ops/box_ops_test.py +1 -1
  854. official/projects/yolo/ops/initializer_ops.py +1 -1
  855. official/projects/yolo/ops/kmeans_anchors.py +1 -1
  856. official/projects/yolo/ops/kmeans_anchors_test.py +1 -1
  857. official/projects/yolo/ops/loss_utils.py +1 -1
  858. official/projects/yolo/ops/math_ops.py +1 -1
  859. official/projects/yolo/ops/mosaic.py +1 -1
  860. official/projects/yolo/ops/preprocessing_ops.py +1 -1
  861. official/projects/yolo/ops/preprocessing_ops_test.py +1 -1
  862. official/projects/yolo/optimization/__init__.py +1 -1
  863. official/projects/yolo/optimization/configs/__init__.py +1 -1
  864. official/projects/yolo/optimization/configs/optimization_config.py +1 -1
  865. official/projects/yolo/optimization/configs/optimizer_config.py +1 -1
  866. official/projects/yolo/optimization/optimizer_factory.py +1 -1
  867. official/projects/yolo/optimization/sgd_torch.py +1 -1
  868. official/projects/yolo/serving/__init__.py +1 -1
  869. official/projects/yolo/serving/export_module_factory.py +1 -1
  870. official/projects/yolo/serving/export_saved_model.py +1 -1
  871. official/projects/yolo/serving/export_tflite.py +1 -1
  872. official/projects/yolo/serving/model_fn.py +1 -1
  873. official/projects/yolo/tasks/__init__.py +1 -1
  874. official/projects/yolo/tasks/image_classification.py +1 -1
  875. official/projects/yolo/tasks/task_utils.py +1 -1
  876. official/projects/yolo/tasks/yolo.py +1 -1
  877. official/projects/yolo/tasks/yolov7.py +1 -1
  878. official/projects/yolo/train.py +1 -1
  879. official/projects/yt8m/__init__.py +1 -1
  880. official/projects/yt8m/configs/__init__.py +1 -1
  881. official/projects/yt8m/configs/yt8m.py +1 -1
  882. official/projects/yt8m/configs/yt8m_test.py +1 -1
  883. official/projects/yt8m/modeling/__init__.py +1 -1
  884. official/projects/yt8m/modeling/backbones/__init__.py +1 -1
  885. official/projects/yt8m/modeling/backbones/dbof.py +1 -1
  886. official/projects/yt8m/modeling/backbones/dbof_test.py +1 -1
  887. official/projects/yt8m/modeling/heads/__init__.py +1 -1
  888. official/projects/yt8m/modeling/heads/logistic.py +1 -1
  889. official/projects/yt8m/modeling/heads/moe.py +1 -1
  890. official/projects/yt8m/modeling/nn_layers.py +1 -1
  891. official/projects/yt8m/modeling/nn_layers_test.py +1 -1
  892. official/projects/yt8m/modeling/yt8m_model.py +1 -1
  893. official/projects/yt8m/modeling/yt8m_model_test.py +1 -1
  894. official/projects/yt8m/modeling/yt8m_model_utils.py +1 -1
  895. official/projects/yt8m/modeling/yt8m_model_utils_test.py +1 -1
  896. official/projects/yt8m/tasks/__init__.py +1 -1
  897. official/projects/yt8m/tasks/yt8m_task.py +1 -1
  898. official/projects/yt8m/train.py +1 -1
  899. official/projects/yt8m/train_test.py +1 -1
  900. official/recommendation/__init__.py +1 -1
  901. official/recommendation/constants.py +1 -1
  902. official/recommendation/create_ncf_data.py +1 -2
  903. official/recommendation/data_pipeline.py +1 -1
  904. official/recommendation/data_preprocessing.py +1 -1
  905. official/recommendation/data_test.py +4 -4
  906. official/recommendation/movielens.py +1 -2
  907. official/recommendation/ncf_common.py +1 -1
  908. official/recommendation/ncf_input_pipeline.py +1 -1
  909. official/recommendation/ncf_keras_main.py +1 -1
  910. official/recommendation/ncf_test.py +1 -1
  911. official/recommendation/neumf_model.py +1 -1
  912. official/recommendation/popen_helper.py +1 -1
  913. official/recommendation/ranking/__init__.py +1 -1
  914. official/recommendation/ranking/common.py +1 -1
  915. official/recommendation/ranking/configs/__init__.py +1 -1
  916. official/recommendation/ranking/configs/config.py +14 -1
  917. official/recommendation/ranking/configs/config_test.py +1 -1
  918. official/recommendation/ranking/data/__init__.py +1 -1
  919. official/recommendation/ranking/data/data_pipeline.py +9 -2
  920. official/recommendation/ranking/data/data_pipeline_multi_hot.py +8 -2
  921. official/recommendation/ranking/data/data_pipeline_multi_hot_test.py +12 -6
  922. official/recommendation/ranking/data/data_pipeline_test.py +18 -8
  923. official/recommendation/ranking/task.py +102 -19
  924. official/recommendation/ranking/task_test.py +1 -1
  925. official/recommendation/ranking/train.py +1 -1
  926. official/recommendation/ranking/train_test.py +76 -31
  927. official/recommendation/stat_utils.py +1 -1
  928. official/recommendation/uplift/__init__.py +1 -1
  929. official/recommendation/uplift/keras_test_case.py +1 -1
  930. official/recommendation/uplift/keys.py +1 -1
  931. official/recommendation/uplift/layers/__init__.py +1 -1
  932. official/recommendation/uplift/layers/encoders/__init__.py +1 -1
  933. official/recommendation/uplift/layers/encoders/concat_features.py +1 -1
  934. official/recommendation/uplift/layers/encoders/concat_features_test.py +1 -1
  935. official/recommendation/uplift/layers/heads/__init__.py +1 -1
  936. official/recommendation/uplift/layers/heads/two_tower_logits_head.py +1 -1
  937. official/recommendation/uplift/layers/heads/two_tower_logits_head_test.py +1 -1
  938. official/recommendation/uplift/layers/uplift_networks/__init__.py +1 -1
  939. official/recommendation/uplift/layers/uplift_networks/base_uplift_networks.py +1 -1
  940. official/recommendation/uplift/layers/uplift_networks/two_tower_output_head.py +1 -1
  941. official/recommendation/uplift/layers/uplift_networks/two_tower_output_head_test.py +1 -1
  942. official/recommendation/uplift/layers/uplift_networks/two_tower_uplift_network.py +1 -1
  943. official/recommendation/uplift/layers/uplift_networks/two_tower_uplift_network_test.py +1 -1
  944. official/recommendation/uplift/losses/__init__.py +1 -1
  945. official/recommendation/uplift/losses/true_logits_loss.py +1 -1
  946. official/recommendation/uplift/losses/true_logits_loss_test.py +1 -1
  947. official/recommendation/uplift/metrics/__init__.py +1 -1
  948. official/recommendation/uplift/metrics/label_mean.py +1 -1
  949. official/recommendation/uplift/metrics/label_mean_test.py +1 -1
  950. official/recommendation/uplift/metrics/label_variance.py +1 -1
  951. official/recommendation/uplift/metrics/label_variance_test.py +1 -1
  952. official/recommendation/uplift/metrics/loss_metric.py +1 -1
  953. official/recommendation/uplift/metrics/loss_metric_test.py +1 -1
  954. official/recommendation/uplift/metrics/metric_configs.py +1 -1
  955. official/recommendation/uplift/metrics/poisson_metrics.py +1 -1
  956. official/recommendation/uplift/metrics/poisson_metrics_test.py +1 -1
  957. official/recommendation/uplift/metrics/sliced_metric.py +1 -1
  958. official/recommendation/uplift/metrics/sliced_metric_test.py +1 -1
  959. official/recommendation/uplift/metrics/treatment_fraction.py +1 -1
  960. official/recommendation/uplift/metrics/treatment_fraction_test.py +1 -1
  961. official/recommendation/uplift/metrics/treatment_sliced_metric.py +1 -1
  962. official/recommendation/uplift/metrics/treatment_sliced_metric_test.py +1 -1
  963. official/recommendation/uplift/metrics/uplift_mean.py +1 -1
  964. official/recommendation/uplift/metrics/uplift_mean_test.py +1 -1
  965. official/recommendation/uplift/metrics/variance.py +1 -1
  966. official/recommendation/uplift/metrics/variance_test.py +12 -10
  967. official/recommendation/uplift/models/__init__.py +1 -1
  968. official/recommendation/uplift/models/two_tower_uplift_model.py +1 -1
  969. official/recommendation/uplift/models/two_tower_uplift_model_test.py +1 -1
  970. official/recommendation/uplift/types.py +1 -1
  971. official/recommendation/uplift/utils.py +3 -3
  972. official/recommendation/uplift/utils_test.py +1 -1
  973. official/utils/__init__.py +1 -1
  974. official/utils/docs/__init__.py +1 -1
  975. official/utils/docs/build_orbit_api_docs.py +1 -1
  976. official/utils/docs/build_tfm_api_docs.py +1 -1
  977. official/utils/flags/__init__.py +1 -1
  978. official/utils/flags/_base.py +1 -1
  979. official/utils/flags/_benchmark.py +1 -1
  980. official/utils/flags/_conventions.py +1 -1
  981. official/utils/flags/_device.py +1 -1
  982. official/utils/flags/_distribution.py +1 -1
  983. official/utils/flags/_misc.py +1 -1
  984. official/utils/flags/_performance.py +1 -1
  985. official/utils/flags/core.py +1 -1
  986. official/utils/flags/flags_test.py +1 -1
  987. official/utils/hyperparams_flags.py +1 -1
  988. official/utils/misc/__init__.py +1 -1
  989. official/utils/misc/keras_utils.py +1 -1
  990. official/utils/misc/model_helpers.py +1 -1
  991. official/utils/misc/model_helpers_test.py +3 -3
  992. official/utils/testing/__init__.py +1 -1
  993. official/utils/testing/integration.py +1 -1
  994. official/utils/testing/mock_task.py +1 -1
  995. official/vision/__init__.py +1 -1
  996. official/vision/configs/__init__.py +1 -1
  997. official/vision/configs/backbones.py +3 -1
  998. official/vision/configs/backbones_3d.py +1 -2
  999. official/vision/configs/common.py +1 -3
  1000. official/vision/configs/decoders.py +1 -3
  1001. official/vision/configs/image_classification.py +1 -1
  1002. official/vision/configs/image_classification_test.py +1 -1
  1003. official/vision/configs/maskrcnn.py +1 -1
  1004. official/vision/configs/maskrcnn_test.py +1 -1
  1005. official/vision/configs/retinanet.py +2 -1
  1006. official/vision/configs/retinanet_test.py +1 -1
  1007. official/vision/configs/semantic_segmentation.py +7 -8
  1008. official/vision/configs/semantic_segmentation_test.py +1 -1
  1009. official/vision/configs/video_classification.py +1 -1
  1010. official/vision/configs/video_classification_test.py +1 -1
  1011. official/vision/data/__init__.py +1 -1
  1012. official/vision/data/create_coco_tf_record.py +1 -1
  1013. official/vision/data/fake_feature_generator.py +5 -2
  1014. official/vision/data/image_utils.py +1 -1
  1015. official/vision/data/image_utils_test.py +1 -1
  1016. official/vision/data/process_coco_few_shot_json_files.py +1 -1
  1017. official/vision/data/tf_example_builder.py +1 -1
  1018. official/vision/data/tf_example_builder_test.py +1 -1
  1019. official/vision/data/tf_example_feature_key.py +1 -1
  1020. official/vision/data/tfrecord_lib.py +1 -1
  1021. official/vision/data/tfrecord_lib_test.py +1 -1
  1022. official/vision/dataloaders/__init__.py +1 -1
  1023. official/vision/dataloaders/classification_input.py +1 -2
  1024. official/vision/dataloaders/decoder.py +1 -1
  1025. official/vision/dataloaders/input_reader.py +1 -1
  1026. official/vision/dataloaders/input_reader_factory.py +1 -1
  1027. official/vision/dataloaders/maskrcnn_input.py +1 -2
  1028. official/vision/dataloaders/parser.py +1 -1
  1029. official/vision/dataloaders/retinanet_input.py +1 -3
  1030. official/vision/dataloaders/segmentation_input.py +9 -4
  1031. official/vision/dataloaders/tf_example_decoder.py +1 -1
  1032. official/vision/dataloaders/tf_example_decoder_test.py +1 -2
  1033. official/vision/dataloaders/tf_example_label_map_decoder.py +1 -2
  1034. official/vision/dataloaders/tf_example_label_map_decoder_test.py +1 -2
  1035. official/vision/dataloaders/tfds_classification_decoders.py +1 -1
  1036. official/vision/dataloaders/tfds_detection_decoders.py +1 -1
  1037. official/vision/dataloaders/tfds_factory.py +1 -1
  1038. official/vision/dataloaders/tfds_factory_test.py +1 -1
  1039. official/vision/dataloaders/tfds_segmentation_decoders.py +1 -1
  1040. official/vision/dataloaders/tfexample_utils.py +1 -1
  1041. official/vision/dataloaders/utils.py +1 -2
  1042. official/vision/dataloaders/utils_test.py +1 -3
  1043. official/vision/dataloaders/video_input.py +1 -1
  1044. official/vision/dataloaders/video_input_test.py +1 -2
  1045. official/vision/evaluation/__init__.py +1 -1
  1046. official/vision/evaluation/coco_evaluator.py +1 -2
  1047. official/vision/evaluation/coco_utils.py +1 -3
  1048. official/vision/evaluation/coco_utils_test.py +1 -1
  1049. official/vision/evaluation/instance_metrics.py +1 -1
  1050. official/vision/evaluation/instance_metrics_test.py +1 -1
  1051. official/vision/evaluation/iou.py +1 -1
  1052. official/vision/evaluation/iou_test.py +1 -1
  1053. official/vision/evaluation/panoptic_quality.py +1 -1
  1054. official/vision/evaluation/panoptic_quality_evaluator.py +1 -1
  1055. official/vision/evaluation/panoptic_quality_evaluator_test.py +1 -1
  1056. official/vision/evaluation/panoptic_quality_test.py +1 -1
  1057. official/vision/evaluation/segmentation_metrics.py +1 -1
  1058. official/vision/evaluation/segmentation_metrics_test.py +1 -1
  1059. official/vision/evaluation/wod_detection_evaluator.py +1 -1
  1060. official/vision/losses/__init__.py +1 -1
  1061. official/vision/losses/focal_loss.py +1 -1
  1062. official/vision/losses/loss_utils.py +1 -1
  1063. official/vision/losses/maskrcnn_losses.py +1 -2
  1064. official/vision/losses/maskrcnn_losses_test.py +1 -1
  1065. official/vision/losses/retinanet_losses.py +1 -2
  1066. official/vision/losses/segmentation_losses.py +1 -1
  1067. official/vision/losses/segmentation_losses_test.py +1 -1
  1068. official/vision/modeling/__init__.py +1 -1
  1069. official/vision/modeling/backbones/__init__.py +1 -1
  1070. official/vision/modeling/backbones/efficientnet.py +1 -3
  1071. official/vision/modeling/backbones/efficientnet_test.py +1 -2
  1072. official/vision/modeling/backbones/factory.py +1 -3
  1073. official/vision/modeling/backbones/factory_test.py +1 -2
  1074. official/vision/modeling/backbones/mobiledet.py +1 -1
  1075. official/vision/modeling/backbones/mobiledet_test.py +1 -1
  1076. official/vision/modeling/backbones/mobilenet.py +73 -3
  1077. official/vision/modeling/backbones/mobilenet_test.py +12 -3
  1078. official/vision/modeling/backbones/resnet.py +1 -2
  1079. official/vision/modeling/backbones/resnet_3d.py +1 -2
  1080. official/vision/modeling/backbones/resnet_3d_test.py +1 -2
  1081. official/vision/modeling/backbones/resnet_deeplab.py +5 -4
  1082. official/vision/modeling/backbones/resnet_deeplab_test.py +21 -10
  1083. official/vision/modeling/backbones/resnet_test.py +1 -2
  1084. official/vision/modeling/backbones/resnet_unet.py +1 -2
  1085. official/vision/modeling/backbones/resnet_unet_test.py +1 -3
  1086. official/vision/modeling/backbones/revnet.py +1 -2
  1087. official/vision/modeling/backbones/revnet_test.py +1 -2
  1088. official/vision/modeling/backbones/spinenet.py +1 -3
  1089. official/vision/modeling/backbones/spinenet_mobile.py +1 -3
  1090. official/vision/modeling/backbones/spinenet_mobile_test.py +1 -2
  1091. official/vision/modeling/backbones/spinenet_test.py +1 -2
  1092. official/vision/modeling/backbones/vit.py +53 -27
  1093. official/vision/modeling/backbones/vit_specs.py +1 -1
  1094. official/vision/modeling/backbones/vit_test.py +12 -1
  1095. official/vision/modeling/classification_model.py +1 -2
  1096. official/vision/modeling/classification_model_test.py +1 -2
  1097. official/vision/modeling/decoders/__init__.py +1 -1
  1098. official/vision/modeling/decoders/aspp.py +1 -3
  1099. official/vision/modeling/decoders/aspp_test.py +1 -2
  1100. official/vision/modeling/decoders/factory.py +1 -3
  1101. official/vision/modeling/decoders/factory_test.py +1 -1
  1102. official/vision/modeling/decoders/fpn.py +1 -2
  1103. official/vision/modeling/decoders/fpn_test.py +1 -2
  1104. official/vision/modeling/decoders/nasfpn.py +1 -3
  1105. official/vision/modeling/decoders/nasfpn_test.py +1 -2
  1106. official/vision/modeling/factory.py +1 -1
  1107. official/vision/modeling/factory_3d.py +1 -2
  1108. official/vision/modeling/factory_test.py +1 -2
  1109. official/vision/modeling/heads/__init__.py +1 -1
  1110. official/vision/modeling/heads/dense_prediction_heads.py +1 -3
  1111. official/vision/modeling/heads/dense_prediction_heads_test.py +1 -3
  1112. official/vision/modeling/heads/instance_heads.py +3 -4
  1113. official/vision/modeling/heads/instance_heads_test.py +1 -2
  1114. official/vision/modeling/heads/segmentation_heads.py +2 -2
  1115. official/vision/modeling/heads/segmentation_heads_test.py +1 -2
  1116. official/vision/modeling/layers/__init__.py +1 -1
  1117. official/vision/modeling/layers/box_sampler.py +1 -2
  1118. official/vision/modeling/layers/deeplab.py +1 -1
  1119. official/vision/modeling/layers/deeplab_test.py +1 -1
  1120. official/vision/modeling/layers/detection_generator.py +1 -3
  1121. official/vision/modeling/layers/detection_generator_test.py +1 -3
  1122. official/vision/modeling/layers/edgetpu.py +1 -1
  1123. official/vision/modeling/layers/edgetpu_test.py +1 -1
  1124. official/vision/modeling/layers/mask_sampler.py +1 -2
  1125. official/vision/modeling/layers/nn_blocks.py +1 -2
  1126. official/vision/modeling/layers/nn_blocks_3d.py +1 -2
  1127. official/vision/modeling/layers/nn_blocks_3d_test.py +1 -2
  1128. official/vision/modeling/layers/nn_blocks_test.py +1 -3
  1129. official/vision/modeling/layers/nn_layers.py +1 -1
  1130. official/vision/modeling/layers/nn_layers_test.py +1 -2
  1131. official/vision/modeling/layers/roi_aligner.py +7 -5
  1132. official/vision/modeling/layers/roi_aligner_test.py +1 -2
  1133. official/vision/modeling/layers/roi_generator.py +1 -2
  1134. official/vision/modeling/layers/roi_sampler.py +1 -2
  1135. official/vision/modeling/maskrcnn_model.py +1 -1
  1136. official/vision/modeling/maskrcnn_model_test.py +1 -2
  1137. official/vision/modeling/models/__init__.py +1 -1
  1138. official/vision/modeling/retinanet_model.py +9 -8
  1139. official/vision/modeling/retinanet_model_test.py +1 -2
  1140. official/vision/modeling/segmentation_model.py +4 -4
  1141. official/vision/modeling/segmentation_model_test.py +1 -1
  1142. official/vision/modeling/video_classification_model.py +1 -1
  1143. official/vision/modeling/video_classification_model_test.py +1 -2
  1144. official/vision/ops/__init__.py +1 -1
  1145. official/vision/ops/anchor.py +1 -3
  1146. official/vision/ops/anchor_generator.py +1 -1
  1147. official/vision/ops/anchor_generator_test.py +1 -1
  1148. official/vision/ops/anchor_test.py +1 -2
  1149. official/vision/ops/augment.py +4 -16
  1150. official/vision/ops/augment_test.py +1 -1
  1151. official/vision/ops/box_matcher.py +1 -1
  1152. official/vision/ops/box_matcher_test.py +1 -1
  1153. official/vision/ops/box_ops.py +1 -2
  1154. official/vision/ops/iou_similarity.py +1 -1
  1155. official/vision/ops/iou_similarity_test.py +1 -1
  1156. official/vision/ops/mask_ops.py +1 -3
  1157. official/vision/ops/mask_ops_test.py +1 -2
  1158. official/vision/ops/nms.py +1 -2
  1159. official/vision/ops/preprocess_ops.py +40 -11
  1160. official/vision/ops/preprocess_ops_3d.py +6 -3
  1161. official/vision/ops/preprocess_ops_3d_test.py +1 -1
  1162. official/vision/ops/preprocess_ops_test.py +13 -7
  1163. official/vision/ops/sampling_ops.py +1 -2
  1164. official/vision/ops/spatial_transform_ops.py +1 -1
  1165. official/vision/ops/target_gather.py +1 -1
  1166. official/vision/ops/target_gather_test.py +1 -1
  1167. official/vision/registry_imports.py +1 -1
  1168. official/vision/serving/__init__.py +1 -1
  1169. official/vision/serving/detection.py +21 -1
  1170. official/vision/serving/detection_test.py +39 -1
  1171. official/vision/serving/export_base.py +1 -1
  1172. official/vision/serving/export_base_v2.py +1 -1
  1173. official/vision/serving/export_base_v2_test.py +1 -1
  1174. official/vision/serving/export_module_factory.py +1 -1
  1175. official/vision/serving/export_module_factory_test.py +1 -1
  1176. official/vision/serving/export_saved_model.py +1 -1
  1177. official/vision/serving/export_saved_model_lib.py +1 -1
  1178. official/vision/serving/export_saved_model_lib_test.py +1 -1
  1179. official/vision/serving/export_saved_model_lib_v2.py +1 -1
  1180. official/vision/serving/export_tfhub.py +1 -2
  1181. official/vision/serving/export_tfhub_lib.py +1 -3
  1182. official/vision/serving/export_tflite.py +1 -1
  1183. official/vision/serving/export_tflite_lib.py +1 -1
  1184. official/vision/serving/export_utils.py +1 -1
  1185. official/vision/serving/image_classification.py +1 -1
  1186. official/vision/serving/image_classification_test.py +1 -1
  1187. official/vision/serving/semantic_segmentation.py +6 -3
  1188. official/vision/serving/semantic_segmentation_test.py +71 -7
  1189. official/vision/serving/video_classification.py +1 -1
  1190. official/vision/serving/video_classification_test.py +1 -1
  1191. official/vision/tasks/__init__.py +1 -1
  1192. official/vision/tasks/image_classification.py +1 -1
  1193. official/vision/tasks/maskrcnn.py +1 -1
  1194. official/vision/tasks/retinanet.py +1 -1
  1195. official/vision/tasks/semantic_segmentation.py +1 -1
  1196. official/vision/tasks/video_classification.py +1 -1
  1197. official/vision/train.py +1 -1
  1198. official/vision/train_spatial_partitioning.py +1 -1
  1199. official/vision/utils/__init__.py +1 -1
  1200. official/vision/utils/object_detection/__init__.py +1 -1
  1201. official/vision/utils/object_detection/argmax_matcher.py +1 -1
  1202. official/vision/utils/object_detection/balanced_positive_negative_sampler.py +1 -1
  1203. official/vision/utils/object_detection/box_coder.py +1 -1
  1204. official/vision/utils/object_detection/box_list.py +1 -1
  1205. official/vision/utils/object_detection/box_list_ops.py +1 -1
  1206. official/vision/utils/object_detection/faster_rcnn_box_coder.py +1 -1
  1207. official/vision/utils/object_detection/matcher.py +1 -1
  1208. official/vision/utils/object_detection/minibatch_sampler.py +1 -1
  1209. official/vision/utils/object_detection/ops.py +1 -1
  1210. official/vision/utils/object_detection/preprocessor.py +1 -1
  1211. official/vision/utils/object_detection/region_similarity_calculator.py +1 -1
  1212. official/vision/utils/object_detection/shape_utils.py +1 -1
  1213. official/vision/utils/object_detection/target_assigner.py +1 -1
  1214. official/vision/utils/object_detection/visualization_utils.py +6 -1
  1215. official/vision/utils/ops_test.py +1 -1
  1216. official/vision/utils/summary_manager.py +1 -1
  1217. orbit/__init__.py +1 -1
  1218. orbit/actions/__init__.py +1 -1
  1219. orbit/actions/conditional_action.py +3 -2
  1220. orbit/actions/conditional_action_test.py +1 -1
  1221. orbit/actions/export_saved_model.py +1 -1
  1222. orbit/actions/export_saved_model_test.py +1 -1
  1223. orbit/actions/new_best_metric.py +2 -2
  1224. orbit/actions/new_best_metric_test.py +2 -2
  1225. orbit/actions/save_checkpoint_if_preempted.py +1 -1
  1226. orbit/controller.py +1 -1
  1227. orbit/controller_test.py +1 -1
  1228. orbit/examples/__init__.py +1 -1
  1229. orbit/examples/single_task/__init__.py +1 -1
  1230. orbit/examples/single_task/single_task_evaluator.py +1 -1
  1231. orbit/examples/single_task/single_task_evaluator_test.py +1 -1
  1232. orbit/examples/single_task/single_task_trainer.py +1 -1
  1233. orbit/examples/single_task/single_task_trainer_test.py +1 -1
  1234. orbit/runner.py +1 -1
  1235. orbit/standard_runner.py +1 -1
  1236. orbit/standard_runner_test.py +1 -1
  1237. orbit/utils/__init__.py +1 -1
  1238. orbit/utils/common.py +1 -1
  1239. orbit/utils/common_test.py +1 -1
  1240. orbit/utils/epoch_helper.py +1 -1
  1241. orbit/utils/loop_fns.py +7 -2
  1242. orbit/utils/summary_manager.py +1 -1
  1243. orbit/utils/summary_manager_interface.py +1 -1
  1244. orbit/utils/tpu_summaries.py +1 -1
  1245. orbit/utils/tpu_summaries_test.py +1 -1
  1246. tensorflow_models/__init__.py +1 -1
  1247. tensorflow_models/nlp/__init__.py +1 -1
  1248. tensorflow_models/tensorflow_models_test.py +1 -1
  1249. tensorflow_models/uplift/__init__.py +1 -1
  1250. tensorflow_models/vision/__init__.py +1 -1
  1251. {tf_models_nightly-2.17.0.dev20240617.dist-info → tf_models_nightly-2.20.0.dev20251205.dist-info}/METADATA +1 -1
  1252. tf_models_nightly-2.20.0.dev20251205.dist-info/RECORD +1256 -0
  1253. tf_models_nightly-2.17.0.dev20240617.dist-info/RECORD +0 -1220
  1254. {tf_models_nightly-2.17.0.dev20240617.dist-info → tf_models_nightly-2.20.0.dev20251205.dist-info}/AUTHORS +0 -0
  1255. {tf_models_nightly-2.17.0.dev20240617.dist-info → tf_models_nightly-2.20.0.dev20251205.dist-info}/LICENSE +0 -0
  1256. {tf_models_nightly-2.17.0.dev20240617.dist-info → tf_models_nightly-2.20.0.dev20251205.dist-info}/WHEEL +0 -0
  1257. {tf_models_nightly-2.17.0.dev20240617.dist-info → tf_models_nightly-2.20.0.dev20251205.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,849 @@
1
+ # Copyright 2025 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Specialized Transformers for DETR.
16
+
17
+ the position embeddings are added to the query and key for every self- and
18
+ cross-attention layer.
19
+ """
20
+
21
+ import tensorflow as tf, tf_keras
22
+
23
+ from official.modeling import tf_utils
24
+ from official.nlp.modeling import layers
25
+ from official.nlp.modeling import models
26
+
27
+
28
+ class TransformerEncoder(tf_keras.layers.Layer):
29
+ """Transformer encoder.
30
+
31
+ Transformer encoder is made up of N identical layers. Each layer is composed
32
+ of the sublayers:
33
+ 1. Self-attention layer
34
+ 2. Feedforward network (which is 2 fully-connected layers)
35
+ """
36
+
37
+ def __init__(self,
38
+ num_layers=6,
39
+ num_attention_heads=8,
40
+ intermediate_size=2048,
41
+ activation="relu",
42
+ dropout_rate=0.0,
43
+ attention_dropout_rate=0.0,
44
+ use_bias=False,
45
+ norm_first=True,
46
+ norm_epsilon=1e-6,
47
+ intermediate_dropout=0.0,
48
+ **kwargs):
49
+ """Initialize a Transformer encoder.
50
+
51
+ Args:
52
+ num_layers: Number of layers.
53
+ num_attention_heads: Number of attention heads.
54
+ intermediate_size: Size of the intermediate (Feedforward) layer.
55
+ activation: Activation for the intermediate layer.
56
+ dropout_rate: Dropout probability.
57
+ attention_dropout_rate: Dropout probability for attention layers.
58
+ use_bias: Whether to enable use_bias in attention layer. If set False,
59
+ use_bias in attention layer is disabled.
60
+ norm_first: Whether to normalize inputs to attention and intermediate
61
+ dense layers. If set False, output of attention and intermediate dense
62
+ layers is normalized.
63
+ norm_epsilon: Epsilon value to initialize normalization layers.
64
+ intermediate_dropout: Dropout probability for intermediate_dropout_layer.
65
+ **kwargs: key word arguemnts passed to tf_keras.layers.Layer.
66
+ """
67
+
68
+ super(TransformerEncoder, self).__init__(**kwargs)
69
+ self.num_layers = num_layers
70
+ self.num_attention_heads = num_attention_heads
71
+ self._intermediate_size = intermediate_size
72
+ self._activation = activation
73
+ self._dropout_rate = dropout_rate
74
+ self._attention_dropout_rate = attention_dropout_rate
75
+ self._use_bias = use_bias
76
+ self._norm_first = norm_first
77
+ self._norm_epsilon = norm_epsilon
78
+ self._intermediate_dropout = intermediate_dropout
79
+
80
+ def build(self, input_shape):
81
+ """Implements build() for the layer."""
82
+ self.encoder_layers = []
83
+ for i in range(self.num_layers):
84
+ self.encoder_layers.append(
85
+ TransformerEncoderBlock(
86
+ num_attention_heads=self.num_attention_heads,
87
+ inner_dim=self._intermediate_size,
88
+ inner_activation=self._activation,
89
+ output_dropout=self._dropout_rate,
90
+ attention_dropout=self._attention_dropout_rate,
91
+ use_bias=self._use_bias,
92
+ norm_first=self._norm_first,
93
+ norm_epsilon=self._norm_epsilon,
94
+ inner_dropout=self._intermediate_dropout,
95
+ attention_initializer=tf_utils.clone_initializer(
96
+ models.seq2seq_transformer.attention_initializer(
97
+ input_shape[2])),
98
+ name=("layer_%d" % i)))
99
+ self.output_normalization = tf_keras.layers.LayerNormalization(
100
+ epsilon=self._norm_epsilon, dtype="float32")
101
+ super(TransformerEncoder, self).build(input_shape)
102
+
103
+ def get_config(self):
104
+ config = {
105
+ "num_layers": self.num_layers,
106
+ "num_attention_heads": self.num_attention_heads,
107
+ "intermediate_size": self._intermediate_size,
108
+ "activation": self._activation,
109
+ "dropout_rate": self._dropout_rate,
110
+ "attention_dropout_rate": self._attention_dropout_rate,
111
+ "use_bias": self._use_bias,
112
+ "norm_first": self._norm_first,
113
+ "norm_epsilon": self._norm_epsilon,
114
+ "intermediate_dropout": self._intermediate_dropout
115
+ }
116
+ base_config = super(TransformerEncoder, self).get_config()
117
+ return dict(list(base_config.items()) + list(config.items()))
118
+
119
+ def call(self, encoder_inputs, attention_mask=None, pos_embed=None):
120
+ """Return the output of the encoder.
121
+
122
+ Args:
123
+ encoder_inputs: A tensor with shape `(batch_size, input_length,
124
+ hidden_size)`.
125
+ attention_mask: A mask for the encoder self-attention layer with shape
126
+ `(batch_size, input_length, input_length)`.
127
+ pos_embed: Position embedding to add to every encoder layer.
128
+
129
+ Returns:
130
+ Output of encoder which is a `float32` tensor with shape
131
+ `(batch_size, input_length, hidden_size)`.
132
+ """
133
+ for layer_idx in range(self.num_layers):
134
+ encoder_inputs = self.encoder_layers[layer_idx](
135
+ [encoder_inputs, attention_mask, pos_embed])
136
+
137
+ output_tensor = encoder_inputs
138
+ output_tensor = self.output_normalization(output_tensor)
139
+
140
+ return output_tensor
141
+
142
+
143
+ class TransformerEncoderBlock(tf_keras.layers.Layer):
144
+ """TransformerEncoderBlock layer.
145
+
146
+ This layer implements the Transformer Encoder from
147
+ "Attention Is All You Need". (https://arxiv.org/abs/1706.03762),
148
+ which combines a `tf_keras.layers.MultiHeadAttention` layer with a
149
+ two-layer feedforward network. The only difference: position embedding is
150
+ added to the query and key of self-attention.
151
+
152
+ References:
153
+ [Attention Is All You Need](https://arxiv.org/abs/1706.03762)
154
+ [BERT: Pre-training of Deep Bidirectional Transformers for Language
155
+ Understanding](https://arxiv.org/abs/1810.04805)
156
+ """
157
+
158
+ def __init__(self,
159
+ num_attention_heads,
160
+ inner_dim,
161
+ inner_activation,
162
+ output_range=None,
163
+ kernel_initializer="glorot_uniform",
164
+ bias_initializer="zeros",
165
+ kernel_regularizer=None,
166
+ bias_regularizer=None,
167
+ activity_regularizer=None,
168
+ kernel_constraint=None,
169
+ bias_constraint=None,
170
+ use_bias=True,
171
+ norm_first=False,
172
+ norm_epsilon=1e-12,
173
+ output_dropout=0.0,
174
+ attention_dropout=0.0,
175
+ inner_dropout=0.0,
176
+ attention_initializer=None,
177
+ attention_axes=None,
178
+ **kwargs):
179
+ """Initializes `TransformerEncoderBlock`.
180
+
181
+ Args:
182
+ num_attention_heads: Number of attention heads.
183
+ inner_dim: The output dimension of the first Dense layer in a two-layer
184
+ feedforward network.
185
+ inner_activation: The activation for the first Dense layer in a two-layer
186
+ feedforward network.
187
+ output_range: the sequence output range, [0, output_range) for slicing the
188
+ target sequence. `None` means the target sequence is not sliced.
189
+ kernel_initializer: Initializer for dense layer kernels.
190
+ bias_initializer: Initializer for dense layer biases.
191
+ kernel_regularizer: Regularizer for dense layer kernels.
192
+ bias_regularizer: Regularizer for dense layer biases.
193
+ activity_regularizer: Regularizer for dense layer activity.
194
+ kernel_constraint: Constraint for dense layer kernels.
195
+ bias_constraint: Constraint for dense layer kernels.
196
+ use_bias: Whether to enable use_bias in attention layer. If set False,
197
+ use_bias in attention layer is disabled.
198
+ norm_first: Whether to normalize inputs to attention and intermediate
199
+ dense layers. If set False, output of attention and intermediate dense
200
+ layers is normalized.
201
+ norm_epsilon: Epsilon value to initialize normalization layers.
202
+ output_dropout: Dropout probability for the post-attention and output
203
+ dropout.
204
+ attention_dropout: Dropout probability for within the attention layer.
205
+ inner_dropout: Dropout probability for the first Dense layer in a
206
+ two-layer feedforward network.
207
+ attention_initializer: Initializer for kernels of attention layers. If set
208
+ `None`, attention layers use kernel_initializer as initializer for
209
+ kernel.
210
+ attention_axes: axes over which the attention is applied. `None` means
211
+ attention over all axes, but batch, heads, and features.
212
+ **kwargs: keyword arguments/
213
+ """
214
+ super().__init__(**kwargs)
215
+
216
+ self._num_heads = num_attention_heads
217
+ self._inner_dim = inner_dim
218
+ self._inner_activation = inner_activation
219
+ self._attention_dropout = attention_dropout
220
+ self._attention_dropout_rate = attention_dropout
221
+ self._output_dropout = output_dropout
222
+ self._output_dropout_rate = output_dropout
223
+ self._output_range = output_range
224
+ self._kernel_initializer = tf_keras.initializers.get(kernel_initializer)
225
+ self._bias_initializer = tf_keras.initializers.get(bias_initializer)
226
+ self._kernel_regularizer = tf_keras.regularizers.get(kernel_regularizer)
227
+ self._bias_regularizer = tf_keras.regularizers.get(bias_regularizer)
228
+ self._activity_regularizer = tf_keras.regularizers.get(activity_regularizer)
229
+ self._kernel_constraint = tf_keras.constraints.get(kernel_constraint)
230
+ self._bias_constraint = tf_keras.constraints.get(bias_constraint)
231
+ self._use_bias = use_bias
232
+ self._norm_first = norm_first
233
+ self._norm_epsilon = norm_epsilon
234
+ self._inner_dropout = inner_dropout
235
+ if attention_initializer:
236
+ self._attention_initializer = tf_keras.initializers.get(
237
+ attention_initializer)
238
+ else:
239
+ self._attention_initializer = tf_utils.clone_initializer(
240
+ self._kernel_initializer)
241
+ self._attention_axes = attention_axes
242
+
243
+ def build(self, input_shape):
244
+ if isinstance(input_shape, tf.TensorShape):
245
+ input_tensor_shape = input_shape
246
+ elif isinstance(input_shape, (list, tuple)):
247
+ input_tensor_shape = tf.TensorShape(input_shape[0])
248
+ else:
249
+ raise ValueError(
250
+ "The type of input shape argument is not supported, got: %s" %
251
+ type(input_shape))
252
+ einsum_equation = "abc,cd->abd"
253
+ if len(input_tensor_shape.as_list()) > 3:
254
+ einsum_equation = "...bc,cd->...bd"
255
+ hidden_size = input_tensor_shape[-1]
256
+ if hidden_size % self._num_heads != 0:
257
+ raise ValueError(
258
+ "The input size (%d) is not a multiple of the number of attention "
259
+ "heads (%d)" % (hidden_size, self._num_heads))
260
+ self._attention_head_size = int(hidden_size // self._num_heads)
261
+ common_kwargs = dict(
262
+ bias_initializer=self._bias_initializer,
263
+ kernel_regularizer=self._kernel_regularizer,
264
+ bias_regularizer=self._bias_regularizer,
265
+ activity_regularizer=self._activity_regularizer,
266
+ kernel_constraint=self._kernel_constraint,
267
+ bias_constraint=self._bias_constraint)
268
+ self._attention_layer = tf_keras.layers.MultiHeadAttention(
269
+ num_heads=self._num_heads,
270
+ key_dim=self._attention_head_size,
271
+ dropout=self._attention_dropout,
272
+ use_bias=self._use_bias,
273
+ kernel_initializer=self._attention_initializer,
274
+ attention_axes=self._attention_axes,
275
+ name="self_attention",
276
+ **common_kwargs)
277
+ self._attention_dropout = tf_keras.layers.Dropout(rate=self._output_dropout)
278
+ # Use float32 in layernorm for numeric stability.
279
+ # It is probably safe in mixed_float16, but we haven't validated this yet.
280
+ self._attention_layer_norm = (
281
+ tf_keras.layers.LayerNormalization(
282
+ name="self_attention_layer_norm",
283
+ axis=-1,
284
+ epsilon=self._norm_epsilon,
285
+ dtype=tf.float32))
286
+ self._intermediate_dense = tf_keras.layers.EinsumDense(
287
+ einsum_equation,
288
+ output_shape=(None, self._inner_dim),
289
+ bias_axes="d",
290
+ kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
291
+ name="intermediate",
292
+ **common_kwargs)
293
+ policy = tf_keras.mixed_precision.global_policy()
294
+ if policy.name == "mixed_bfloat16":
295
+ # bfloat16 causes BERT with the LAMB optimizer to not converge
296
+ # as well, so we use float32.
297
+ # TODO(b/154538392): Investigate this.
298
+ policy = tf.float32
299
+ self._intermediate_activation_layer = tf_keras.layers.Activation(
300
+ self._inner_activation, dtype=policy)
301
+ self._inner_dropout_layer = tf_keras.layers.Dropout(
302
+ rate=self._inner_dropout)
303
+ self._output_dense = tf_keras.layers.EinsumDense(
304
+ einsum_equation,
305
+ output_shape=(None, hidden_size),
306
+ bias_axes="d",
307
+ name="output",
308
+ kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
309
+ **common_kwargs)
310
+ self._output_dropout = tf_keras.layers.Dropout(rate=self._output_dropout)
311
+ # Use float32 in layernorm for numeric stability.
312
+ self._output_layer_norm = tf_keras.layers.LayerNormalization(
313
+ name="output_layer_norm",
314
+ axis=-1,
315
+ epsilon=self._norm_epsilon,
316
+ dtype=tf.float32)
317
+
318
+ super(TransformerEncoderBlock, self).build(input_shape)
319
+
320
+ def get_config(self):
321
+ config = {
322
+ "num_attention_heads": self._num_heads,
323
+ "inner_dim": self._inner_dim,
324
+ "inner_activation": self._inner_activation,
325
+ "output_dropout": self._output_dropout_rate,
326
+ "attention_dropout": self._attention_dropout_rate,
327
+ "output_range": self._output_range,
328
+ "kernel_initializer": tf_utils.serialize_initializer(
329
+ self._kernel_initializer, use_legacy_format=True
330
+ ),
331
+ "bias_initializer": tf_utils.serialize_initializer(
332
+ self._bias_initializer, use_legacy_format=True
333
+ ),
334
+ "kernel_regularizer": tf_utils.serialize_regularizer(
335
+ self._kernel_regularizer, use_legacy_format=True
336
+ ),
337
+ "bias_regularizer": tf_utils.serialize_regularizer(
338
+ self._bias_regularizer, use_legacy_format=True
339
+ ),
340
+ "activity_regularizer": tf_utils.serialize_regularizer(
341
+ self._activity_regularizer, use_legacy_format=True
342
+ ),
343
+ "kernel_constraint": tf_utils.serialize_constraint(
344
+ self._kernel_constraint, use_legacy_format=True
345
+ ),
346
+ "bias_constraint": tf_utils.serialize_constraint(
347
+ self._bias_constraint, use_legacy_format=True
348
+ ),
349
+ "use_bias": self._use_bias,
350
+ "norm_first": self._norm_first,
351
+ "norm_epsilon": self._norm_epsilon,
352
+ "inner_dropout": self._inner_dropout,
353
+ "attention_initializer": tf_utils.serialize_initializer(
354
+ self._attention_initializer, use_legacy_format=True
355
+ ),
356
+ "attention_axes": self._attention_axes,
357
+ }
358
+ base_config = super(TransformerEncoderBlock, self).get_config()
359
+ return dict(list(base_config.items()) + list(config.items()))
360
+
361
+ def call(self, inputs):
362
+ """Transformer self-attention encoder block call.
363
+
364
+ Args:
365
+ inputs: a single tensor or a list of tensors. `input tensor` as the single
366
+ sequence of embeddings. [`input tensor`, `attention mask`] to have the
367
+ additional attention mask. [`input tensor`, `attention mask`, `query
368
+ embed`] to have an additional position embedding to add.
369
+
370
+ Returns:
371
+ An output tensor with the same dimensions as input/query tensor.
372
+ """
373
+ input_tensor, attention_mask, pos_embed = inputs
374
+
375
+ key_value = None
376
+
377
+ if self._output_range:
378
+ if self._norm_first:
379
+ source_tensor = input_tensor[:, 0:self._output_range, :]
380
+ input_tensor = self._attention_layer_norm(input_tensor)
381
+ if key_value is not None:
382
+ key_value = self._attention_layer_norm(key_value)
383
+ target_tensor = input_tensor[:, 0:self._output_range, :]
384
+ if attention_mask is not None:
385
+ attention_mask = attention_mask[:, 0:self._output_range, :]
386
+ else:
387
+ if self._norm_first:
388
+ source_tensor = input_tensor
389
+ input_tensor = self._attention_layer_norm(input_tensor)
390
+ if key_value is not None:
391
+ key_value = self._attention_layer_norm(key_value)
392
+ target_tensor = input_tensor
393
+
394
+ if key_value is None:
395
+ key_value = input_tensor
396
+ attention_output = self._attention_layer(
397
+ query=target_tensor + pos_embed,
398
+ key=key_value + pos_embed,
399
+ value=key_value,
400
+ attention_mask=attention_mask)
401
+ attention_output = self._attention_dropout(attention_output)
402
+ if self._norm_first:
403
+ attention_output = source_tensor + attention_output
404
+ else:
405
+ attention_output = self._attention_layer_norm(target_tensor +
406
+ attention_output)
407
+ if self._norm_first:
408
+ source_attention_output = attention_output
409
+ attention_output = self._output_layer_norm(attention_output)
410
+ inner_output = self._intermediate_dense(attention_output)
411
+ inner_output = self._intermediate_activation_layer(inner_output)
412
+ inner_output = self._inner_dropout_layer(inner_output)
413
+ layer_output = self._output_dense(inner_output)
414
+ layer_output = self._output_dropout(layer_output)
415
+
416
+ if self._norm_first:
417
+ return source_attention_output + layer_output
418
+
419
+ # During mixed precision training, layer norm output is always fp32 for now.
420
+ # Casts fp32 for the subsequent add.
421
+ layer_output = tf.cast(layer_output, tf.float32)
422
+ return self._output_layer_norm(layer_output + attention_output)
423
+
424
+
425
+ class TransformerDecoder(tf_keras.layers.Layer):
426
+ """Transformer decoder.
427
+
428
+ Like the encoder, the decoder is made up of N identical layers.
429
+ Each layer is composed of the sublayers:
430
+ 1. Self-attention layer
431
+ 2. Multi-headed attention layer combining encoder outputs with results from
432
+ the previous self-attention layer.
433
+ 3. Feedforward network (2 fully-connected layers)
434
+ """
435
+
436
+ def __init__(self,
437
+ num_layers=6,
438
+ num_attention_heads=8,
439
+ intermediate_size=2048,
440
+ activation="relu",
441
+ dropout_rate=0.0,
442
+ attention_dropout_rate=0.0,
443
+ use_bias=False,
444
+ norm_first=True,
445
+ norm_epsilon=1e-6,
446
+ intermediate_dropout=0.0,
447
+ **kwargs):
448
+ """Initialize a Transformer decoder.
449
+
450
+ Args:
451
+ num_layers: Number of layers.
452
+ num_attention_heads: Number of attention heads.
453
+ intermediate_size: Size of the intermediate (Feedforward) layer.
454
+ activation: Activation for the intermediate layer.
455
+ dropout_rate: Dropout probability.
456
+ attention_dropout_rate: Dropout probability for attention layers.
457
+ use_bias: Whether to enable use_bias in attention layer. If set `False`,
458
+ use_bias in attention layer is disabled.
459
+ norm_first: Whether to normalize inputs to attention and intermediate
460
+ dense layers. If set `False`, output of attention and intermediate dense
461
+ layers is normalized.
462
+ norm_epsilon: Epsilon value to initialize normalization layers.
463
+ intermediate_dropout: Dropout probability for intermediate_dropout_layer.
464
+ **kwargs: key word arguemnts passed to tf_keras.layers.Layer.
465
+ """
466
+ super(TransformerDecoder, self).__init__(**kwargs)
467
+ self.num_layers = num_layers
468
+ self.num_attention_heads = num_attention_heads
469
+ self._intermediate_size = intermediate_size
470
+ self._activation = activation
471
+ self._dropout_rate = dropout_rate
472
+ self._attention_dropout_rate = attention_dropout_rate
473
+ self._use_bias = use_bias
474
+ self._norm_first = norm_first
475
+ self._norm_epsilon = norm_epsilon
476
+ self._intermediate_dropout = intermediate_dropout
477
+
478
+ def build(self, input_shape):
479
+ """Implements build() for the layer."""
480
+ self.decoder_layers = []
481
+ for i in range(self.num_layers):
482
+ self.decoder_layers.append(
483
+ TransformerDecoderBlock(
484
+ num_attention_heads=self.num_attention_heads,
485
+ intermediate_size=self._intermediate_size,
486
+ intermediate_activation=self._activation,
487
+ dropout_rate=self._dropout_rate,
488
+ attention_dropout_rate=self._attention_dropout_rate,
489
+ use_bias=self._use_bias,
490
+ norm_first=self._norm_first,
491
+ norm_epsilon=self._norm_epsilon,
492
+ intermediate_dropout=self._intermediate_dropout,
493
+ attention_initializer=tf_utils.clone_initializer(
494
+ models.seq2seq_transformer.attention_initializer(
495
+ input_shape[2])),
496
+ name=("layer_%d" % i)))
497
+ self.output_normalization = tf_keras.layers.LayerNormalization(
498
+ epsilon=self._norm_epsilon, dtype="float32")
499
+ super(TransformerDecoder, self).build(input_shape)
500
+
501
+ def get_config(self):
502
+ config = {
503
+ "num_layers": self.num_layers,
504
+ "num_attention_heads": self.num_attention_heads,
505
+ "intermediate_size": self._intermediate_size,
506
+ "activation": self._activation,
507
+ "dropout_rate": self._dropout_rate,
508
+ "attention_dropout_rate": self._attention_dropout_rate,
509
+ "use_bias": self._use_bias,
510
+ "norm_first": self._norm_first,
511
+ "norm_epsilon": self._norm_epsilon,
512
+ "intermediate_dropout": self._intermediate_dropout
513
+ }
514
+ base_config = super(TransformerDecoder, self).get_config()
515
+ return dict(list(base_config.items()) + list(config.items()))
516
+
517
+ def call(self,
518
+ target,
519
+ memory,
520
+ self_attention_mask=None,
521
+ cross_attention_mask=None,
522
+ cache=None,
523
+ decode_loop_step=None,
524
+ return_all_decoder_outputs=False,
525
+ input_pos_embed=None,
526
+ memory_pos_embed=None):
527
+ """Return the output of the decoder layer stacks.
528
+
529
+ Args:
530
+ target: A tensor with shape `(batch_size, target_length, hidden_size)`.
531
+ memory: A tensor with shape `(batch_size, input_length, hidden_size)`.
532
+ self_attention_mask: A tensor with shape `(batch_size, target_len,
533
+ target_length)`, the mask for decoder self-attention layer.
534
+ cross_attention_mask: A tensor with shape `(batch_size, target_length,
535
+ input_length)` which is the mask for encoder-decoder attention layer.
536
+ cache: (Used for fast decoding) A nested dictionary storing previous
537
+ decoder self-attention values. The items are:
538
+ {layer_n: {"k": A tensor with shape `(batch_size, i, key_channels)`,
539
+ "v": A tensor with shape `(batch_size, i, value_channels)`},
540
+ ...}
541
+ decode_loop_step: An integer, the step number of the decoding loop. Used
542
+ only for autoregressive inference on TPU.
543
+ return_all_decoder_outputs: Return all decoder layer outputs. Note that
544
+ the outputs are layer normed. This is useful when introducing per layer
545
+ auxiliary loss.
546
+ input_pos_embed: A tensor that is added to the query and key of the
547
+ self-attention layer.
548
+ memory_pos_embed: A tensor that is added to the query and key of the
549
+ cross-attention layer.
550
+
551
+ Returns:
552
+ Output of decoder.
553
+ float32 tensor with shape `(batch_size, target_length, hidden_size`).
554
+ """
555
+
556
+ output_tensor = target
557
+ decoder_outputs = []
558
+ for layer_idx in range(self.num_layers):
559
+ transformer_inputs = [
560
+ output_tensor, memory, cross_attention_mask, self_attention_mask,
561
+ input_pos_embed, memory_pos_embed
562
+ ]
563
+ # Gets the cache for decoding.
564
+ if cache is None:
565
+ output_tensor, _ = self.decoder_layers[layer_idx](transformer_inputs)
566
+ else:
567
+ cache_layer_idx = str(layer_idx)
568
+ output_tensor, cache[cache_layer_idx] = self.decoder_layers[layer_idx](
569
+ transformer_inputs,
570
+ cache=cache[cache_layer_idx],
571
+ decode_loop_step=decode_loop_step)
572
+ if return_all_decoder_outputs:
573
+ decoder_outputs.append(self.output_normalization(output_tensor))
574
+
575
+ if return_all_decoder_outputs:
576
+ return decoder_outputs
577
+ else:
578
+ return self.output_normalization(output_tensor)
579
+
580
+
581
+ class TransformerDecoderBlock(tf_keras.layers.Layer):
582
+ """Single transformer layer for decoder.
583
+
584
+ It has three sub-layers:
585
+ (1) a multi-head self-attention mechanism.
586
+ (2) a encoder-decoder attention.
587
+ (3) a positionwise fully connected feed-forward network.
588
+ """
589
+
590
+ def __init__(self,
591
+ num_attention_heads,
592
+ intermediate_size,
593
+ intermediate_activation,
594
+ dropout_rate=0.0,
595
+ attention_dropout_rate=0.0,
596
+ kernel_initializer="glorot_uniform",
597
+ bias_initializer="zeros",
598
+ kernel_regularizer=None,
599
+ bias_regularizer=None,
600
+ activity_regularizer=None,
601
+ kernel_constraint=None,
602
+ bias_constraint=None,
603
+ use_bias=True,
604
+ norm_first=False,
605
+ norm_epsilon=1e-12,
606
+ intermediate_dropout=0.0,
607
+ attention_initializer=None,
608
+ **kwargs):
609
+ """Initialize a Transformer decoder block.
610
+
611
+ Args:
612
+ num_attention_heads: Number of attention heads.
613
+ intermediate_size: Size of the intermediate layer.
614
+ intermediate_activation: Activation for the intermediate layer.
615
+ dropout_rate: Dropout probability for the post-attention and output
616
+ dropout.
617
+ attention_dropout_rate: Dropout probability for within the attention
618
+ layer.
619
+ kernel_initializer: Initializer for dense layer kernels.
620
+ bias_initializer: Initializer for dense layer biases.
621
+ kernel_regularizer: Regularizer for dense layer kernels.
622
+ bias_regularizer: Regularizer for dense layer biases.
623
+ activity_regularizer: Regularizer for dense layer activity.
624
+ kernel_constraint: Constraint for dense layer kernels.
625
+ bias_constraint: Constraint for dense layer kernels.
626
+ use_bias: Whether to enable use_bias in attention layer. If set False,
627
+ use_bias in attention layer is disabled.
628
+ norm_first: Whether to normalize inputs to attention and intermediate
629
+ dense layers. If set False, output of attention and intermediate dense
630
+ layers is normalized.
631
+ norm_epsilon: Epsilon value to initialize normalization layers.
632
+ intermediate_dropout: Dropout probability for intermediate_dropout_layer.
633
+ attention_initializer: Initializer for kernels of attention layers. If set
634
+ `None`, attention layers use kernel_initializer as initializer for
635
+ kernel.
636
+ **kwargs: key word arguemnts passed to tf_keras.layers.Layer.
637
+ """
638
+ super().__init__(**kwargs)
639
+ self.num_attention_heads = num_attention_heads
640
+ self.intermediate_size = intermediate_size
641
+ self.intermediate_activation = tf_keras.activations.get(
642
+ intermediate_activation)
643
+ self.dropout_rate = dropout_rate
644
+ self.attention_dropout_rate = attention_dropout_rate
645
+ self._kernel_initializer = tf_keras.initializers.get(kernel_initializer)
646
+ self._bias_initializer = tf_keras.initializers.get(bias_initializer)
647
+ self._kernel_regularizer = tf_keras.regularizers.get(kernel_regularizer)
648
+ self._bias_regularizer = tf_keras.regularizers.get(bias_regularizer)
649
+ self._activity_regularizer = tf_keras.regularizers.get(activity_regularizer)
650
+ self._kernel_constraint = tf_keras.constraints.get(kernel_constraint)
651
+ self._bias_constraint = tf_keras.constraints.get(bias_constraint)
652
+ self._use_bias = use_bias
653
+ self._norm_first = norm_first
654
+ self._norm_epsilon = norm_epsilon
655
+ self._intermediate_dropout = intermediate_dropout
656
+ if attention_initializer:
657
+ self._attention_initializer = tf_keras.initializers.get(
658
+ attention_initializer)
659
+ else:
660
+ self._attention_initializer = tf_utils.clone_initializer(
661
+ self._kernel_initializer)
662
+ self._cross_attention_cls = layers.attention.MultiHeadAttention
663
+
664
+ def build(self, input_shape):
665
+ target_tensor_shape = tf.TensorShape(input_shape[0])
666
+ if len(target_tensor_shape.as_list()) != 3:
667
+ raise ValueError("TransformerLayer expects a three-dimensional input of "
668
+ "shape [batch, sequence, width].")
669
+ hidden_size = target_tensor_shape[2]
670
+ if hidden_size % self.num_attention_heads != 0:
671
+ raise ValueError(
672
+ "The hidden size (%d) is not a multiple of the number of attention "
673
+ "heads (%d)" % (hidden_size, self.num_attention_heads))
674
+ self.attention_head_size = int(hidden_size) // self.num_attention_heads
675
+ common_kwargs = dict(
676
+ bias_initializer=self._bias_initializer,
677
+ kernel_regularizer=self._kernel_regularizer,
678
+ bias_regularizer=self._bias_regularizer,
679
+ activity_regularizer=self._activity_regularizer,
680
+ kernel_constraint=self._kernel_constraint,
681
+ bias_constraint=self._bias_constraint)
682
+ # Self attention.
683
+ self.self_attention = layers.attention.CachedAttention(
684
+ num_heads=self.num_attention_heads,
685
+ key_dim=self.attention_head_size,
686
+ dropout=self.attention_dropout_rate,
687
+ use_bias=self._use_bias,
688
+ kernel_initializer=self._attention_initializer,
689
+ name="self_attention",
690
+ **common_kwargs)
691
+ self.self_attention_output_dense = tf_keras.layers.EinsumDense(
692
+ "abc,cd->abd",
693
+ output_shape=(None, hidden_size),
694
+ bias_axes="d",
695
+ kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
696
+ name="output",
697
+ **common_kwargs)
698
+ self.self_attention_dropout = tf_keras.layers.Dropout(
699
+ rate=self.dropout_rate)
700
+ self.self_attention_layer_norm = (
701
+ tf_keras.layers.LayerNormalization(
702
+ name="self_attention_layer_norm",
703
+ axis=-1,
704
+ epsilon=self._norm_epsilon,
705
+ dtype="float32"))
706
+ # Encoder-decoder attention.
707
+ self.encdec_attention = self._cross_attention_cls(
708
+ num_heads=self.num_attention_heads,
709
+ key_dim=self.attention_head_size,
710
+ dropout=self.attention_dropout_rate,
711
+ output_shape=hidden_size,
712
+ use_bias=self._use_bias,
713
+ kernel_initializer=self._attention_initializer,
714
+ name="attention/encdec",
715
+ **common_kwargs)
716
+
717
+ self.encdec_attention_dropout = tf_keras.layers.Dropout(
718
+ rate=self.dropout_rate)
719
+ self.encdec_attention_layer_norm = (
720
+ tf_keras.layers.LayerNormalization(
721
+ name="attention/encdec_output_layer_norm",
722
+ axis=-1,
723
+ epsilon=self._norm_epsilon,
724
+ dtype="float32"))
725
+
726
+ # Feed-forward projection.
727
+ self.intermediate_dense = tf_keras.layers.EinsumDense(
728
+ "abc,cd->abd",
729
+ output_shape=(None, self.intermediate_size),
730
+ bias_axes="d",
731
+ kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
732
+ name="intermediate",
733
+ **common_kwargs)
734
+ self.intermediate_activation_layer = tf_keras.layers.Activation(
735
+ self.intermediate_activation)
736
+ self._intermediate_dropout_layer = tf_keras.layers.Dropout(
737
+ rate=self._intermediate_dropout)
738
+ self.output_dense = tf_keras.layers.EinsumDense(
739
+ "abc,cd->abd",
740
+ output_shape=(None, hidden_size),
741
+ bias_axes="d",
742
+ kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
743
+ name="output",
744
+ **common_kwargs)
745
+ self.output_dropout = tf_keras.layers.Dropout(rate=self.dropout_rate)
746
+ self.output_layer_norm = tf_keras.layers.LayerNormalization(
747
+ name="output_layer_norm",
748
+ axis=-1,
749
+ epsilon=self._norm_epsilon,
750
+ dtype="float32")
751
+ super().build(input_shape)
752
+
753
+ def get_config(self):
754
+ config = {
755
+ "num_attention_heads": self.num_attention_heads,
756
+ "intermediate_size": self.intermediate_size,
757
+ "intermediate_activation": tf_utils.serialize_activation(
758
+ self.intermediate_activation, use_legacy_format=True
759
+ ),
760
+ "dropout_rate": self.dropout_rate,
761
+ "attention_dropout_rate": self.attention_dropout_rate,
762
+ "kernel_initializer": tf_utils.serialize_initializer(
763
+ self._kernel_initializer, use_legacy_format=True
764
+ ),
765
+ "bias_initializer": tf_utils.serialize_initializer(
766
+ self._bias_initializer, use_legacy_format=True
767
+ ),
768
+ "kernel_regularizer": tf_utils.serialize_regularizer(
769
+ self._kernel_regularizer, use_legacy_format=True
770
+ ),
771
+ "bias_regularizer": tf_utils.serialize_regularizer(
772
+ self._bias_regularizer, use_legacy_format=True
773
+ ),
774
+ "activity_regularizer": tf_utils.serialize_regularizer(
775
+ self._activity_regularizer, use_legacy_format=True
776
+ ),
777
+ "kernel_constraint": tf_utils.serialize_constraint(
778
+ self._kernel_constraint, use_legacy_format=True
779
+ ),
780
+ "bias_constraint": tf_utils.serialize_constraint(
781
+ self._bias_constraint, use_legacy_format=True
782
+ ),
783
+ "use_bias": self._use_bias,
784
+ "norm_first": self._norm_first,
785
+ "norm_epsilon": self._norm_epsilon,
786
+ "intermediate_dropout": self._intermediate_dropout,
787
+ "attention_initializer": tf_utils.serialize_initializer(
788
+ self._attention_initializer, use_legacy_format=True
789
+ ),
790
+ }
791
+ base_config = super().get_config()
792
+ return dict(list(base_config.items()) + list(config.items()))
793
+
794
+ def common_layers_with_encoder(self):
795
+ """Gets layer objects that can make a Transformer encoder block."""
796
+ return [
797
+ self.self_attention, self.self_attention_layer_norm,
798
+ self.intermediate_dense, self.output_dense, self.output_layer_norm
799
+ ]
800
+
801
+ def call(self, inputs, cache=None, decode_loop_step=None):
802
+ input_tensor, memory, attention_mask, self_attention_mask, input_pos_embed, memory_pos_embed = inputs
803
+ source_tensor = input_tensor
804
+ if self._norm_first:
805
+ input_tensor = self.self_attention_layer_norm(input_tensor)
806
+ self_attention_output, cache = self.self_attention(
807
+ query=input_tensor + input_pos_embed,
808
+ key=input_tensor + input_pos_embed,
809
+ value=input_tensor,
810
+ attention_mask=self_attention_mask,
811
+ cache=cache,
812
+ decode_loop_step=decode_loop_step)
813
+ self_attention_output = self.self_attention_dropout(self_attention_output)
814
+ if self._norm_first:
815
+ self_attention_output = source_tensor + self_attention_output
816
+ else:
817
+ self_attention_output = self.self_attention_layer_norm(
818
+ input_tensor + self_attention_output)
819
+ if self._norm_first:
820
+ source_self_attention_output = self_attention_output
821
+ self_attention_output = self.encdec_attention_layer_norm(
822
+ self_attention_output)
823
+ cross_attn_inputs = dict(
824
+ query=self_attention_output + input_pos_embed,
825
+ key=memory + memory_pos_embed,
826
+ value=memory,
827
+ attention_mask=attention_mask)
828
+ attention_output = self.encdec_attention(**cross_attn_inputs)
829
+ attention_output = self.encdec_attention_dropout(attention_output)
830
+ if self._norm_first:
831
+ attention_output = source_self_attention_output + attention_output
832
+ else:
833
+ attention_output = self.encdec_attention_layer_norm(
834
+ self_attention_output + attention_output)
835
+ if self._norm_first:
836
+ source_attention_output = attention_output
837
+ attention_output = self.output_layer_norm(attention_output)
838
+
839
+ intermediate_output = self.intermediate_dense(attention_output)
840
+ intermediate_output = self.intermediate_activation_layer(
841
+ intermediate_output)
842
+ intermediate_output = self._intermediate_dropout_layer(intermediate_output)
843
+ layer_output = self.output_dense(intermediate_output)
844
+ layer_output = self.output_dropout(layer_output)
845
+ if self._norm_first:
846
+ layer_output = source_attention_output + layer_output
847
+ else:
848
+ layer_output = self.output_layer_norm(layer_output + attention_output)
849
+ return layer_output, cache