tf-models-nightly 2.20.0.dev20251106__py2.py3-none-any.whl → 2.20.0.dev20251115__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- official/nlp/modeling/layers/transformer_encoder_block.py +6 -0
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/METADATA +1 -1
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/RECORD +7 -7
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/AUTHORS +0 -0
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/LICENSE +0 -0
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/WHEEL +0 -0
- {tf_models_nightly-2.20.0.dev20251106.dist-info → tf_models_nightly-2.20.0.dev20251115.dist-info}/top_level.txt +0 -0
|
@@ -121,6 +121,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
121
121
|
lowrank_query_seq_proj_dim=None,
|
|
122
122
|
enable_talking_heads=False,
|
|
123
123
|
enable_gqa_optimization=False,
|
|
124
|
+
softmax_robust_masking=False,
|
|
124
125
|
**kwargs,
|
|
125
126
|
):
|
|
126
127
|
"""Initializes `TransformerEncoderBlock`.
|
|
@@ -209,6 +210,8 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
209
210
|
https://arxiv.org/pdf/2003.02436.
|
|
210
211
|
enable_gqa_optimization: Enable GQA optimization in multi-query attention.
|
|
211
212
|
This flag is valid only when num_kv_heads is set for GQA.
|
|
213
|
+
softmax_robust_masking: If true, will use a more numerically robust
|
|
214
|
+
masking impl for softmax.
|
|
212
215
|
**kwargs: keyword arguments.
|
|
213
216
|
"""
|
|
214
217
|
util.filter_kwargs(kwargs)
|
|
@@ -253,6 +256,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
253
256
|
self._lowrank_query_seq_proj_dim = lowrank_query_seq_proj_dim
|
|
254
257
|
self._enable_talking_heads = enable_talking_heads
|
|
255
258
|
self._enable_gqa_optimization = enable_gqa_optimization
|
|
259
|
+
self._softmax_robust_masking = softmax_robust_masking
|
|
256
260
|
if (
|
|
257
261
|
self._src_block_size is not None
|
|
258
262
|
and self._num_kv_heads is not None
|
|
@@ -314,6 +318,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
314
318
|
bias_initializer=tf_utils.clone_initializer(self._bias_initializer),
|
|
315
319
|
attention_axes=self._attention_axes,
|
|
316
320
|
output_shape=self._output_last_dim,
|
|
321
|
+
softmax_robust_masking=self._softmax_robust_masking,
|
|
317
322
|
name="self_attention",
|
|
318
323
|
)
|
|
319
324
|
common_kwargs = dict(
|
|
@@ -512,6 +517,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
512
517
|
"linformer_dim": self._linformer_dim,
|
|
513
518
|
"linformer_shared_kv_projection": self._linformer_shared_kv_projection,
|
|
514
519
|
"lowrank_query_seq_proj_dim": self._lowrank_query_seq_proj_dim,
|
|
520
|
+
"softmax_robust_masking": self._softmax_robust_masking
|
|
515
521
|
}
|
|
516
522
|
base_config = super().get_config()
|
|
517
523
|
return dict(list(base_config.items()) + list(config.items()))
|
|
@@ -363,7 +363,7 @@ official/nlp/modeling/layers/tn_expand_condense_test.py,sha256=QWq1dJqQUPe5n69K3
|
|
|
363
363
|
official/nlp/modeling/layers/tn_transformer_expand_condense.py,sha256=omzTkCBEk2TOkHEYDEBwve6WsOitX7IIJHzeKXdqDq0,11012
|
|
364
364
|
official/nlp/modeling/layers/tn_transformer_test.py,sha256=pSCONEZRI4J9_6QLTJ3g_ynUYLrRXsJ1c2YMSiOV_60,8893
|
|
365
365
|
official/nlp/modeling/layers/transformer.py,sha256=VjUO-gVj_PnavbT_vSrg5NDKMr0SRSiqSg5ktd42m5M,20087
|
|
366
|
-
official/nlp/modeling/layers/transformer_encoder_block.py,sha256=
|
|
366
|
+
official/nlp/modeling/layers/transformer_encoder_block.py,sha256=78nIg3hVFKLeqJ-Hdg4j1scfskZAnm2CSHqlTeoSRcc,30236
|
|
367
367
|
official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=g7oMDPvwg6Fv75SBdm6BInXPI8r5GcItBRjLFGuObyg,37821
|
|
368
368
|
official/nlp/modeling/layers/transformer_scaffold.py,sha256=qmzhCJvbbFVF9zDqnfO4Zs2JDXwKhK7iEBOhsU6-KpQ,15704
|
|
369
369
|
official/nlp/modeling/layers/transformer_scaffold_test.py,sha256=dRJwesTBKm-mF5mDHrHfVpVNnxa-Wx-fj_4ZHDPTpE0,19920
|
|
@@ -1248,9 +1248,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=yiAneltAW3NHSj3fUSvHNBjfq0MGZ
|
|
|
1248
1248
|
tensorflow_models/nlp/__init__.py,sha256=8uQd4wI6Zc4IJMPjtQifMeWVbPFkTxqYh66wfivCOL4,807
|
|
1249
1249
|
tensorflow_models/uplift/__init__.py,sha256=NzaweFf4ZmhRb2l_fuV6bP-2N8oSO3xu6xJqVb1UmpY,999
|
|
1250
1250
|
tensorflow_models/vision/__init__.py,sha256=ks420Ooqzi0hU7HnQpM5rylLaE-YcJdJkBx_umVaXlE,833
|
|
1251
|
-
tf_models_nightly-2.20.0.
|
|
1252
|
-
tf_models_nightly-2.20.0.
|
|
1253
|
-
tf_models_nightly-2.20.0.
|
|
1254
|
-
tf_models_nightly-2.20.0.
|
|
1255
|
-
tf_models_nightly-2.20.0.
|
|
1256
|
-
tf_models_nightly-2.20.0.
|
|
1251
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
|
|
1252
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
|
|
1253
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/METADATA,sha256=rTMmXBZgHPaClZSHnwTsEju3i1Bw4JG4s-hiq5_VNgA,1432
|
|
1254
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
|
1255
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
|
|
1256
|
+
tf_models_nightly-2.20.0.dev20251115.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|