tf-models-nightly 2.20.0.dev20251114__py2.py3-none-any.whl → 2.20.0.dev20251116__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -121,6 +121,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
121
121
  lowrank_query_seq_proj_dim=None,
122
122
  enable_talking_heads=False,
123
123
  enable_gqa_optimization=False,
124
+ softmax_robust_masking=False,
124
125
  **kwargs,
125
126
  ):
126
127
  """Initializes `TransformerEncoderBlock`.
@@ -209,6 +210,8 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
209
210
  https://arxiv.org/pdf/2003.02436.
210
211
  enable_gqa_optimization: Enable GQA optimization in multi-query attention.
211
212
  This flag is valid only when num_kv_heads is set for GQA.
213
+ softmax_robust_masking: If true, will use a more numerically robust
214
+ masking impl for softmax.
212
215
  **kwargs: keyword arguments.
213
216
  """
214
217
  util.filter_kwargs(kwargs)
@@ -253,6 +256,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
253
256
  self._lowrank_query_seq_proj_dim = lowrank_query_seq_proj_dim
254
257
  self._enable_talking_heads = enable_talking_heads
255
258
  self._enable_gqa_optimization = enable_gqa_optimization
259
+ self._softmax_robust_masking = softmax_robust_masking
256
260
  if (
257
261
  self._src_block_size is not None
258
262
  and self._num_kv_heads is not None
@@ -314,6 +318,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
314
318
  bias_initializer=tf_utils.clone_initializer(self._bias_initializer),
315
319
  attention_axes=self._attention_axes,
316
320
  output_shape=self._output_last_dim,
321
+ softmax_robust_masking=self._softmax_robust_masking,
317
322
  name="self_attention",
318
323
  )
319
324
  common_kwargs = dict(
@@ -512,6 +517,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
512
517
  "linformer_dim": self._linformer_dim,
513
518
  "linformer_shared_kv_projection": self._linformer_shared_kv_projection,
514
519
  "lowrank_query_seq_proj_dim": self._lowrank_query_seq_proj_dim,
520
+ "softmax_robust_masking": self._softmax_robust_masking
515
521
  }
516
522
  base_config = super().get_config()
517
523
  return dict(list(base_config.items()) + list(config.items()))
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tf-models-nightly
3
- Version: 2.20.0.dev20251114
3
+ Version: 2.20.0.dev20251116
4
4
  Summary: TensorFlow Official Models
5
5
  Home-page: https://github.com/tensorflow/models
6
6
  Author: Google Inc.
@@ -363,7 +363,7 @@ official/nlp/modeling/layers/tn_expand_condense_test.py,sha256=QWq1dJqQUPe5n69K3
363
363
  official/nlp/modeling/layers/tn_transformer_expand_condense.py,sha256=omzTkCBEk2TOkHEYDEBwve6WsOitX7IIJHzeKXdqDq0,11012
364
364
  official/nlp/modeling/layers/tn_transformer_test.py,sha256=pSCONEZRI4J9_6QLTJ3g_ynUYLrRXsJ1c2YMSiOV_60,8893
365
365
  official/nlp/modeling/layers/transformer.py,sha256=VjUO-gVj_PnavbT_vSrg5NDKMr0SRSiqSg5ktd42m5M,20087
366
- official/nlp/modeling/layers/transformer_encoder_block.py,sha256=BiL8ErBs-m0UZ6ONVJV0ncfWX3LhMhPetIhfH2VvuD4,29910
366
+ official/nlp/modeling/layers/transformer_encoder_block.py,sha256=78nIg3hVFKLeqJ-Hdg4j1scfskZAnm2CSHqlTeoSRcc,30236
367
367
  official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=g7oMDPvwg6Fv75SBdm6BInXPI8r5GcItBRjLFGuObyg,37821
368
368
  official/nlp/modeling/layers/transformer_scaffold.py,sha256=qmzhCJvbbFVF9zDqnfO4Zs2JDXwKhK7iEBOhsU6-KpQ,15704
369
369
  official/nlp/modeling/layers/transformer_scaffold_test.py,sha256=dRJwesTBKm-mF5mDHrHfVpVNnxa-Wx-fj_4ZHDPTpE0,19920
@@ -1248,9 +1248,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=yiAneltAW3NHSj3fUSvHNBjfq0MGZ
1248
1248
  tensorflow_models/nlp/__init__.py,sha256=8uQd4wI6Zc4IJMPjtQifMeWVbPFkTxqYh66wfivCOL4,807
1249
1249
  tensorflow_models/uplift/__init__.py,sha256=NzaweFf4ZmhRb2l_fuV6bP-2N8oSO3xu6xJqVb1UmpY,999
1250
1250
  tensorflow_models/vision/__init__.py,sha256=ks420Ooqzi0hU7HnQpM5rylLaE-YcJdJkBx_umVaXlE,833
1251
- tf_models_nightly-2.20.0.dev20251114.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
1252
- tf_models_nightly-2.20.0.dev20251114.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
1253
- tf_models_nightly-2.20.0.dev20251114.dist-info/METADATA,sha256=0ImfIyWGCsTpCzpIt8ishPQk_wsbj7V3CgYr99XgD1A,1432
1254
- tf_models_nightly-2.20.0.dev20251114.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
1255
- tf_models_nightly-2.20.0.dev20251114.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
1256
- tf_models_nightly-2.20.0.dev20251114.dist-info/RECORD,,
1251
+ tf_models_nightly-2.20.0.dev20251116.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
1252
+ tf_models_nightly-2.20.0.dev20251116.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
1253
+ tf_models_nightly-2.20.0.dev20251116.dist-info/METADATA,sha256=O3KI3rsF_AQuFE494hYOCSC1ZiYV9Bf8BZ09QYXO2m4,1432
1254
+ tf_models_nightly-2.20.0.dev20251116.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
1255
+ tf_models_nightly-2.20.0.dev20251116.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
1256
+ tf_models_nightly-2.20.0.dev20251116.dist-info/RECORD,,