tf-models-nightly 2.19.0.dev20241104__py2.py3-none-any.whl → 2.19.0.dev20241106__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- official/nlp/modeling/layers/rezero_transformer.py +19 -0
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/METADATA +1 -1
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/RECORD +7 -7
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/AUTHORS +0 -0
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/LICENSE +0 -0
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/WHEEL +0 -0
- {tf_models_nightly-2.19.0.dev20241104.dist-info → tf_models_nightly-2.19.0.dev20241106.dist-info}/top_level.txt +0 -0
@@ -82,6 +82,10 @@ class ReZeroTransformer(tf_keras.layers.Layer):
|
|
82
82
|
num_kv_heads=None,
|
83
83
|
src_block_size=None,
|
84
84
|
tgt_block_size=None,
|
85
|
+
linformer_dim=None,
|
86
|
+
linformer_shared_kv_projection=True,
|
87
|
+
use_sigmoid_attn=False,
|
88
|
+
sigmoid_attn_bias=None,
|
85
89
|
**kwargs):
|
86
90
|
# attention_dropout will override attention_dropout_rate.
|
87
91
|
# This is to unify the input params with TransformerEncoderBlock.
|
@@ -115,6 +119,15 @@ class ReZeroTransformer(tf_keras.layers.Layer):
|
|
115
119
|
self._num_kv_heads = num_kv_heads
|
116
120
|
self._src_block_size = src_block_size
|
117
121
|
self._tgt_block_size = tgt_block_size
|
122
|
+
self._linformer_dim = linformer_dim
|
123
|
+
self._linformer_shared_kv_projection = linformer_shared_kv_projection
|
124
|
+
self._use_sigmoid_attn = use_sigmoid_attn
|
125
|
+
self._sigmoid_attn_bias = sigmoid_attn_bias
|
126
|
+
if self._linformer_dim is not None or self._use_sigmoid_attn:
|
127
|
+
raise ValueError(
|
128
|
+
"Linformer and Sigmoid attention are not supported in ReZero"
|
129
|
+
" Transformer."
|
130
|
+
)
|
118
131
|
if self._num_kv_heads is not None and self._src_block_size is not None:
|
119
132
|
raise ValueError(
|
120
133
|
"Block sparse attention does not support Multi-query attention."
|
@@ -284,6 +297,12 @@ class ReZeroTransformer(tf_keras.layers.Layer):
|
|
284
297
|
tf_keras.constraints.serialize(self._kernel_constraint),
|
285
298
|
"bias_constraint":
|
286
299
|
tf_keras.constraints.serialize(self._bias_constraint),
|
300
|
+
"linformer_dim": self._linformer_dim,
|
301
|
+
"linformer_shared_kv_projection": (
|
302
|
+
self._linformer_shared_kv_projection
|
303
|
+
),
|
304
|
+
"use_sigmoid_attn": self._use_sigmoid_attn,
|
305
|
+
"sigmoid_attn_bias": self._sigmoid_attn_bias,
|
287
306
|
}
|
288
307
|
base_config = super().get_config()
|
289
308
|
return dict(list(base_config.items()) + list(config.items()))
|
@@ -347,7 +347,7 @@ official/nlp/modeling/layers/reuse_attention.py,sha256=qvAC-Dr2uPbpQWOvaf0RVN7t6
|
|
347
347
|
official/nlp/modeling/layers/reuse_attention_test.py,sha256=rKr-dl05DqQesYdvYaCYYahIp0ObP4Xgi4Lno4jsl3Y,14329
|
348
348
|
official/nlp/modeling/layers/reuse_transformer.py,sha256=S0IxI8LzjAnZ5L3MDy32oanI3oGQhxQjdkC3ff-zlmc,15697
|
349
349
|
official/nlp/modeling/layers/reuse_transformer_test.py,sha256=GXuJWfNrqsOwxAi0xSyuziD3kreVWGPCr0LHmxxe0Mk,17201
|
350
|
-
official/nlp/modeling/layers/rezero_transformer.py,sha256=
|
350
|
+
official/nlp/modeling/layers/rezero_transformer.py,sha256=hS6LHmIJCZxdRpdeJ__c0jT3mqO9u4Kl5kvj2pQvtqg,15074
|
351
351
|
official/nlp/modeling/layers/rezero_transformer_test.py,sha256=-Ib-PpZvWgP5aiH3EhyY69OxXLldxCMsnT8oAhojXzI,8620
|
352
352
|
official/nlp/modeling/layers/routing.py,sha256=hV6RHVBU0lEgwx180Q78mDncuXTcyWPyaVBqatWCtQw,4469
|
353
353
|
official/nlp/modeling/layers/routing_test.py,sha256=ViRCnFWPdwM4Kam0k8aDZbyoJqeqyIiQKEmlhNE7LgI,2226
|
@@ -1222,9 +1222,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=nc6A9K53OGqF25xN5St8EiWvdVbda
|
|
1222
1222
|
tensorflow_models/nlp/__init__.py,sha256=4tA5Pf4qaFwT-fIFOpX7x7FHJpnyJT-5UgOeFYTyMlc,807
|
1223
1223
|
tensorflow_models/uplift/__init__.py,sha256=mqfa55gweOdpKoaQyid4A_4u7xw__FcQeSIF0k_pYmI,999
|
1224
1224
|
tensorflow_models/vision/__init__.py,sha256=zBorY_v5xva1uI-qxhZO3Qh-Dii-Suq6wEYh6hKHDfc,833
|
1225
|
-
tf_models_nightly-2.19.0.
|
1226
|
-
tf_models_nightly-2.19.0.
|
1227
|
-
tf_models_nightly-2.19.0.
|
1228
|
-
tf_models_nightly-2.19.0.
|
1229
|
-
tf_models_nightly-2.19.0.
|
1230
|
-
tf_models_nightly-2.19.0.
|
1225
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
|
1226
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
|
1227
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/METADATA,sha256=8UVCGstkfhDyihQWM5BSpY31l8yTY_NzJ_NRkfUr08o,1432
|
1228
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
1229
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
|
1230
|
+
tf_models_nightly-2.19.0.dev20241106.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|