tf-models-nightly 2.17.0.dev20240610__py2.py3-none-any.whl → 2.17.0.dev20240611__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,6 +23,7 @@ from official.nlp.modeling.layers.attention import *
23
23
  from official.nlp.modeling.layers.bigbird_attention import BigBirdAttention
24
24
  from official.nlp.modeling.layers.bigbird_attention import BigBirdMasks
25
25
  from official.nlp.modeling.layers.block_diag_feedforward import BlockDiagFeedforward
26
+ from official.nlp.modeling.layers.block_sparse_attention import MultiHeadAttention as BlockSparseAttention
26
27
  from official.nlp.modeling.layers.cls_head import *
27
28
  from official.nlp.modeling.layers.factorized_embedding import FactorizedEmbedding
28
29
  from official.nlp.modeling.layers.gated_feedforward import GatedFeedforward
@@ -44,6 +45,7 @@ from official.nlp.modeling.layers.moe import FeedForwardExperts
44
45
  from official.nlp.modeling.layers.moe import MoeLayer
45
46
  from official.nlp.modeling.layers.moe import MoeLayerWithBackbone
46
47
  from official.nlp.modeling.layers.multi_channel_attention import *
48
+ from official.nlp.modeling.layers.multi_query_attention import MultiHeadAttention as MultiQueryAttention
47
49
  from official.nlp.modeling.layers.on_device_embedding import OnDeviceEmbedding
48
50
  from official.nlp.modeling.layers.pack_optimization import PackBertEmbeddings
49
51
  from official.nlp.modeling.layers.pack_optimization import StridedReZeroTransformer
@@ -72,6 +72,10 @@ class StridedTransformerEncoderBlock(
72
72
  if self._output_range is not None:
73
73
  raise ValueError('StridedTransformerEncoderBlock does not '
74
74
  'support `output_range` argument.')
75
+ # TODO(b/337888023): Support block sparse attention with strided inputs.
76
+ if self._src_block_size is not None:
77
+ raise ValueError('StridedTransformerEncoderBlock does not '
78
+ 'support block sparse attention.')
75
79
 
76
80
  def call(self, inputs, stride: tf.Tensor):
77
81
  if isinstance(inputs, (list, tuple)):
@@ -137,6 +141,10 @@ class StridedReZeroTransformer(rezero_transformer.ReZeroTransformer):
137
141
  if self._output_range is not None:
138
142
  raise ValueError(f'{self.__class__} does not '
139
143
  'support `output_range` argument.')
144
+ # TODO(b/337888023): Support block sparse attention with strided inputs.
145
+ if self._src_block_size is not None:
146
+ raise ValueError(f'{self.__class__} does not '
147
+ 'support block sparse attention.')
140
148
 
141
149
  def call(self, inputs, stride: tf.Tensor):
142
150
  if isinstance(inputs, (list, tuple)):
@@ -21,6 +21,8 @@ import gin
21
21
  import tensorflow as tf, tf_keras
22
22
 
23
23
  from official.modeling import tf_utils
24
+ from official.nlp.modeling.layers import block_sparse_attention
25
+ from official.nlp.modeling.layers import multi_query_attention
24
26
  from official.nlp.modeling.layers import util
25
27
 
26
28
 
@@ -53,6 +55,12 @@ class ReZeroTransformer(tf_keras.layers.Layer):
53
55
  bias_constraint: Constraint for dense layer kernels.
54
56
  use_layer_norm: If add layer_norm on top of the ReZero.
55
57
  share_rezero: If attention layer and FFN layer share the same alpha.
58
+ num_kv_heads: Number of key-value heads for multi-query attention. Refer to
59
+ `multi_query_attention.MultiHeadAttention` for more details.
60
+ src_block_size: Source block size. Refer to
61
+ `block_sparse_attention.MultiHeadAttention` for more details.
62
+ tgt_block_size: Target block size. Refer to
63
+ `block_sparse_attention.MultiHeadAttention` for more details.
56
64
  """
57
65
 
58
66
  def __init__(self,
@@ -71,6 +79,9 @@ class ReZeroTransformer(tf_keras.layers.Layer):
71
79
  bias_constraint=None,
72
80
  use_layer_norm=False,
73
81
  share_rezero=True,
82
+ num_kv_heads=None,
83
+ src_block_size=None,
84
+ tgt_block_size=None,
74
85
  **kwargs):
75
86
  # attention_dropout will override attention_dropout_rate.
76
87
  # This is to unify the input params with TransformerEncoderBlock.
@@ -101,6 +112,14 @@ class ReZeroTransformer(tf_keras.layers.Layer):
101
112
  self._bias_constraint = tf_keras.constraints.get(bias_constraint)
102
113
  self._use_layer_norm = use_layer_norm
103
114
  self._share_rezero = share_rezero
115
+ self._num_kv_heads = num_kv_heads
116
+ self._src_block_size = src_block_size
117
+ self._tgt_block_size = tgt_block_size
118
+ if self._num_kv_heads is not None and self._src_block_size is not None:
119
+ raise ValueError(
120
+ "Block sparse attention does not support Multi-query attention."
121
+ " Specify only one of them."
122
+ )
104
123
 
105
124
  def build(self, input_shape):
106
125
  if isinstance(input_shape, tf.TensorShape):
@@ -109,53 +128,77 @@ class ReZeroTransformer(tf_keras.layers.Layer):
109
128
  input_tensor_shape = tf.TensorShape(input_shape[0])
110
129
  else:
111
130
  raise ValueError(
112
- "The type of input shape argument is not supported, got: %s" %
113
- type(input_shape))
131
+ "The type of input shape argument is not supported, got: %s"
132
+ % type(input_shape)
133
+ )
114
134
 
115
135
  if len(input_tensor_shape.as_list()) != 3:
116
- raise ValueError("TransformerLayer expects a three-dimensional input of "
117
- "shape [batch, sequence, width].")
136
+ raise ValueError(
137
+ "TransformerLayer expects a three-dimensional input of "
138
+ "shape [batch, sequence, width]."
139
+ )
118
140
  batch_size, sequence_length, hidden_size = input_tensor_shape
119
141
 
120
142
  if len(input_shape) == 2:
121
143
  mask_tensor_shape = tf.TensorShape(input_shape[1])
122
144
  expected_mask_tensor_shape = tf.TensorShape(
123
- [batch_size, sequence_length, sequence_length])
145
+ [batch_size, sequence_length, sequence_length]
146
+ )
124
147
  if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape):
125
- raise ValueError("When passing a mask tensor to TransformerLayer, the "
126
- "mask tensor must be of shape [batch, "
127
- "sequence_length, sequence_length] (here %s). Got a "
128
- "mask tensor of shape %s." %
129
- (expected_mask_tensor_shape, mask_tensor_shape))
148
+ raise ValueError(
149
+ "When passing a mask tensor to TransformerLayer, the "
150
+ "mask tensor must be of shape [batch, "
151
+ "sequence_length, sequence_length] (here %s). Got a "
152
+ "mask tensor of shape %s."
153
+ % (expected_mask_tensor_shape, mask_tensor_shape)
154
+ )
130
155
  if hidden_size % self._num_heads != 0:
131
156
  raise ValueError(
132
157
  "The input size (%d) is not a multiple of the number of attention "
133
- "heads (%d)" % (hidden_size, self._num_heads))
158
+ "heads (%d)" % (hidden_size, self._num_heads)
159
+ )
134
160
  self._attention_head_size = int(hidden_size // self._num_heads)
135
161
  common_kwargs = dict(
136
162
  kernel_regularizer=self._kernel_regularizer,
137
163
  bias_regularizer=self._bias_regularizer,
138
164
  activity_regularizer=self._activity_regularizer,
139
165
  kernel_constraint=self._kernel_constraint,
140
- bias_constraint=self._bias_constraint)
141
- self._attention_layer = tf_keras.layers.MultiHeadAttention(
166
+ bias_constraint=self._bias_constraint,
167
+ )
168
+ attention_kwargs = dict(
142
169
  num_heads=self._num_heads,
143
170
  key_dim=self._attention_head_size,
144
171
  dropout=self._attention_dropout_rate,
145
172
  name="self_attention",
146
173
  kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
147
174
  bias_initializer=tf_utils.clone_initializer(self._bias_initializer),
148
- **common_kwargs)
175
+ )
176
+ if self._src_block_size is not None:
177
+ attention_kwargs.update(
178
+ src_block_size=self._src_block_size,
179
+ tgt_block_size=self._tgt_block_size,
180
+ name="block_sparse_attention",
181
+ )
182
+ attention_fn = block_sparse_attention.MultiHeadAttention
183
+ elif self._num_kv_heads is not None:
184
+ attention_kwargs.update(
185
+ num_kv_heads=self._num_kv_heads,
186
+ name="multi_query_attention",
187
+ )
188
+ attention_fn = multi_query_attention.MultiHeadAttention
189
+ else:
190
+ attention_fn = tf_keras.layers.MultiHeadAttention
191
+ self._attention_layer = attention_fn(**attention_kwargs, **common_kwargs)
149
192
  self._attention_dropout = tf_keras.layers.Dropout(rate=self._dropout_rate)
150
193
  if self._use_layer_norm:
151
194
  # Use float32 in layernorm for numeric stability.
152
195
  # It is probably safe in mixed_float16, but we haven't validated this yet.
153
- self._attention_layer_norm = (
154
- tf_keras.layers.LayerNormalization(
155
- name="self_attention_layer_norm",
156
- axis=-1,
157
- epsilon=1e-12,
158
- dtype=tf.float32))
196
+ self._attention_layer_norm = tf_keras.layers.LayerNormalization(
197
+ name="self_attention_layer_norm",
198
+ axis=-1,
199
+ epsilon=1e-12,
200
+ dtype=tf.float32,
201
+ )
159
202
  self._intermediate_dense = tf_keras.layers.EinsumDense(
160
203
  "abc,cd->abd",
161
204
  output_shape=(None, self._inner_dim),
@@ -221,6 +264,12 @@ class ReZeroTransformer(tf_keras.layers.Layer):
221
264
  self._use_layer_norm,
222
265
  "share_rezero":
223
266
  self._share_rezero,
267
+ "num_kv_heads":
268
+ self._num_kv_heads,
269
+ "src_block_size":
270
+ self._src_block_size,
271
+ "tgt_block_size":
272
+ self._tgt_block_size,
224
273
  "kernel_initializer":
225
274
  tf_keras.initializers.serialize(self._kernel_initializer),
226
275
  "bias_initializer":
@@ -141,6 +141,69 @@ class TransformerWithReZeroLayerTest(tf.test.TestCase, parameterized.TestCase):
141
141
  output = test_layer(inputs)
142
142
  self.assertEqual(output.shape, q_tensor.shape)
143
143
 
144
+ @parameterized.named_parameters(('_mqa', 1),
145
+ ('_gqa', 5))
146
+ def test_rezero_with_kv_heads(self, num_kv_heads):
147
+ tf_keras.mixed_precision.set_global_policy('mixed_float16')
148
+ test_layer = rezero_transformer.ReZeroTransformer(
149
+ num_attention_heads=10,
150
+ intermediate_size=2048,
151
+ intermediate_activation='relu',
152
+ num_kv_heads=num_kv_heads,
153
+ )
154
+ sequence_length = 21
155
+ width = 80
156
+ # Create a 3-dimensional input (the first dimension is implicit).
157
+ data_tensor = tf_keras.Input(shape=(sequence_length, width))
158
+ # Create a 2-dimensional input (the first dimension is implicit).
159
+ mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
160
+ output_tensor = test_layer([data_tensor, mask_tensor])
161
+
162
+ # Create a model from the test layer.
163
+ model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
164
+
165
+ # Invoke the model on test data. We can't validate the output data itself
166
+ # (the NN is too complex) but this will rule out structural runtime errors.
167
+ batch_size = 6
168
+ input_data = (10 * np.random.random_sample(
169
+ (batch_size, sequence_length, width)))
170
+ # The attention mask should be of shape (batch, from_seq_len, to_seq_len),
171
+ # which here is (batch, sequence_length, sequence_length)
172
+ mask_data = np.random.randint(
173
+ 2, size=(batch_size, sequence_length, sequence_length))
174
+ _ = model.predict([input_data, mask_data])
175
+
176
+ def test_rezero_with_block_sparse_attention(self):
177
+ tf_keras.mixed_precision.set_global_policy('mixed_float16')
178
+ test_layer = rezero_transformer.ReZeroTransformer(
179
+ num_attention_heads=10,
180
+ intermediate_size=2048,
181
+ intermediate_activation='relu',
182
+ src_block_size=3,
183
+ tgt_block_size=3,
184
+ )
185
+ sequence_length = 21
186
+ width = 80
187
+ # Create a 3-dimensional input (the first dimension is implicit).
188
+ data_tensor = tf_keras.Input(shape=(sequence_length, width))
189
+ # Create a 2-dimensional input (the first dimension is implicit).
190
+ mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
191
+ output_tensor = test_layer([data_tensor, mask_tensor])
192
+
193
+ # Create a model from the test layer.
194
+ model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
195
+
196
+ # Invoke the model on test data. We can't validate the output data itself
197
+ # (the NN is too complex) but this will rule out structural runtime errors.
198
+ batch_size = 6
199
+ input_data = (10 * np.random.random_sample(
200
+ (batch_size, sequence_length, width)))
201
+ # The attention mask should be of shape (batch, from_seq_len, to_seq_len),
202
+ # which here is (batch, sequence_length, sequence_length)
203
+ mask_data = np.random.randint(
204
+ 2, size=(batch_size, sequence_length, sequence_length))
205
+ _ = model.predict([input_data, mask_data])
206
+
144
207
 
145
208
  if __name__ == '__main__':
146
209
  tf.test.main()
@@ -18,6 +18,8 @@ from absl import logging
18
18
  import tensorflow as tf, tf_keras
19
19
 
20
20
  from official.modeling import tf_utils
21
+ from official.nlp.modeling.layers import block_sparse_attention
22
+ from official.nlp.modeling.layers import multi_query_attention
21
23
  from official.nlp.modeling.layers import util
22
24
 
23
25
 
@@ -107,6 +109,9 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
107
109
  output_last_dim=None,
108
110
  diff_q_kv_att_layer_norm=False,
109
111
  return_attention_scores=False,
112
+ num_kv_heads=None,
113
+ src_block_size=None,
114
+ tgt_block_size=None,
110
115
  **kwargs):
111
116
  """Initializes `TransformerEncoderBlock`.
112
117
 
@@ -174,6 +179,12 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
174
179
  return_attention_scores: If `True`, the output of this layer will be a
175
180
  tuple and additionally contain the attention scores in the shape of
176
181
  `[batch_size, num_attention_heads, seq_dim, seq_dim]`.
182
+ num_kv_heads: Number of key-value heads for multi-query attention. Refer
183
+ to `multi_query_attention.MultiHeadAttention` for more details.
184
+ src_block_size: Source block size. Refer to
185
+ `block_sparse_attention.MultiHeadAttention` for more details.
186
+ tgt_block_size: Target block size. Refer to
187
+ `block_sparse_attention.MultiHeadAttention` for more details.
177
188
  **kwargs: keyword arguments.
178
189
  """
179
190
  util.filter_kwargs(kwargs)
@@ -208,6 +219,14 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
208
219
  self._output_last_dim = output_last_dim
209
220
  self._diff_q_kv_att_layer_norm = diff_q_kv_att_layer_norm
210
221
  self._return_attention_scores = return_attention_scores
222
+ self._num_kv_heads = num_kv_heads
223
+ self._src_block_size = src_block_size
224
+ self._tgt_block_size = tgt_block_size
225
+ if self._num_kv_heads is not None and self._src_block_size is not None:
226
+ raise ValueError(
227
+ "Block sparse attention does not support Multi-query attention."
228
+ " Specify only one of them."
229
+ )
211
230
  if attention_initializer:
212
231
  self._attention_initializer = tf_keras.initializers.get(
213
232
  attention_initializer)
@@ -244,12 +263,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
244
263
  else:
245
264
  last_output_shape = self._output_last_dim
246
265
 
247
- common_kwargs = dict(
248
- bias_regularizer=self._bias_regularizer,
249
- activity_regularizer=self._activity_regularizer,
250
- kernel_constraint=self._kernel_constraint,
251
- bias_constraint=self._bias_constraint)
252
- self._attention_layer = tf_keras.layers.MultiHeadAttention(
266
+ attention_layer_kwargs = dict(
253
267
  num_heads=self._num_heads,
254
268
  key_dim=self._key_dim,
255
269
  value_dim=self._value_dim,
@@ -260,7 +274,30 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
260
274
  attention_axes=self._attention_axes,
261
275
  output_shape=self._output_last_dim,
262
276
  name="self_attention",
263
- **common_kwargs
277
+ )
278
+ common_kwargs = dict(
279
+ bias_regularizer=self._bias_regularizer,
280
+ activity_regularizer=self._activity_regularizer,
281
+ kernel_constraint=self._kernel_constraint,
282
+ bias_constraint=self._bias_constraint,
283
+ )
284
+ if self._src_block_size is not None:
285
+ attention_layer_kwargs.update(
286
+ src_block_size=self._src_block_size,
287
+ tgt_block_size=self._tgt_block_size,
288
+ name="block_sparse_attention",
289
+ )
290
+ attention_fn = block_sparse_attention.MultiHeadAttention
291
+ elif self._num_kv_heads is not None:
292
+ attention_layer_kwargs.update(
293
+ num_kv_heads=self._num_kv_heads,
294
+ name="multi_query_attention",
295
+ )
296
+ attention_fn = multi_query_attention.MultiHeadAttention
297
+ else:
298
+ attention_fn = tf_keras.layers.MultiHeadAttention
299
+ self._attention_layer = attention_fn(
300
+ **attention_layer_kwargs, **common_kwargs
264
301
  )
265
302
  self._attention_dropout = tf_keras.layers.Dropout(
266
303
  rate=self._attention_dropout_rate
@@ -373,6 +410,9 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
373
410
  "value_dim": self._value_dim,
374
411
  "output_last_dim": self._output_last_dim,
375
412
  "diff_q_kv_att_layer_norm": self._diff_q_kv_att_layer_norm,
413
+ "num_kv_heads": self._num_kv_heads,
414
+ "src_block_size": self._src_block_size,
415
+ "tgt_block_size": self._tgt_block_size,
376
416
  }
377
417
  base_config = super().get_config()
378
418
  return dict(list(base_config.items()) + list(config.items()))
@@ -712,6 +712,84 @@ class TransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
712
712
  self.assertEqual(output_tensor.shape.as_list(),
713
713
  expected_layer_output_shape)
714
714
 
715
+ @parameterized.named_parameters(
716
+ ('mqa', 1),
717
+ ('gqa', 4),
718
+ )
719
+ def test_attention_with_kv_heads(self, num_kv_heads):
720
+ num_attention_heads = 8
721
+ sequence_length = 21
722
+ width = 80
723
+
724
+ test_layer = TransformerEncoderBlock(
725
+ num_attention_heads=num_attention_heads,
726
+ inner_dim=2048,
727
+ inner_activation='relu',
728
+ return_attention_scores=True,
729
+ num_kv_heads=num_kv_heads,
730
+ )
731
+ # Create a 3-dimensional input (the first dimension is implicit).
732
+ data_tensor = tf_keras.Input(shape=(sequence_length, width))
733
+ output_tensor = test_layer(data_tensor)
734
+
735
+ expected_layer_output_shape = [None, sequence_length, width]
736
+ expected_attention_scores_shape = [
737
+ None,
738
+ num_attention_heads,
739
+ sequence_length,
740
+ sequence_length,
741
+ ]
742
+
743
+ self.assertIsInstance(output_tensor, tuple)
744
+ self.assertLen(output_tensor, 2)
745
+ # First is the standard output.
746
+ self.assertEqual(
747
+ output_tensor[0].shape.as_list(), expected_layer_output_shape
748
+ )
749
+ # Second is the attention scores.
750
+ self.assertEqual(
751
+ output_tensor[1].shape.as_list(), expected_attention_scores_shape
752
+ )
753
+
754
+ def test_block_sparse_attention(self):
755
+ num_attention_heads = 8
756
+ sequence_length = 21
757
+ width = 80
758
+ src_block_size = 7
759
+ tgt_block_size = 7
760
+
761
+ test_layer = TransformerEncoderBlock(
762
+ num_attention_heads=num_attention_heads,
763
+ inner_dim=2048,
764
+ inner_activation='relu',
765
+ return_attention_scores=True,
766
+ src_block_size=src_block_size,
767
+ tgt_block_size=tgt_block_size,
768
+ )
769
+ # Create a 3-dimensional input (the first dimension is implicit).
770
+ data_tensor = tf_keras.Input(shape=(sequence_length, width))
771
+ output_tensor = test_layer(data_tensor)
772
+
773
+ expected_layer_output_shape = [None, sequence_length, width]
774
+ expected_attention_scores_shape = [
775
+ None,
776
+ num_attention_heads,
777
+ sequence_length//src_block_size,
778
+ src_block_size,
779
+ tgt_block_size,
780
+ ]
781
+
782
+ self.assertIsInstance(output_tensor, tuple)
783
+ self.assertLen(output_tensor, 2)
784
+ # First is the standard output.
785
+ self.assertEqual(
786
+ output_tensor[0].shape.as_list(), expected_layer_output_shape
787
+ )
788
+ # Second is the attention scores.
789
+ self.assertEqual(
790
+ output_tensor[1].shape.as_list(), expected_attention_scores_shape
791
+ )
792
+
715
793
 
716
794
  if __name__ == '__main__':
717
795
  tf.test.main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: tf-models-nightly
3
- Version: 2.17.0.dev20240610
3
+ Version: 2.17.0.dev20240611
4
4
  Summary: TensorFlow Official Models
5
5
  Home-page: https://github.com/tensorflow/models
6
6
  Author: Google Inc.
@@ -298,7 +298,7 @@ official/nlp/metrics/__init__.py,sha256=7oiypy0N82PDw9aSdcJBLVoGTd_oRSUOdvuJhMv4
298
298
  official/nlp/metrics/bleu.py,sha256=XOTTbjC3B9lt8-MLvNX02tjA94wfsUVse6KJ5CWPzfk,6587
299
299
  official/nlp/metrics/bleu_test.py,sha256=0j4pZ1MSIcndvUNZa25oXCu4UFOE367KaL7oRNCzLCI,2508
300
300
  official/nlp/modeling/__init__.py,sha256=SQozaRl78tYS6xvGCfM3msABe2VL20x_mL2vIln1Sn0,1062
301
- official/nlp/modeling/layers/__init__.py,sha256=no0uyA68hsEDa_UuUdhC4jXeqb4lj-3dp6j0GTDzrIQ,4864
301
+ official/nlp/modeling/layers/__init__.py,sha256=vsVNp7WcO4o500l7Zq_-_BIqYbK4fKRMEEtCRJCSP2E,5076
302
302
  official/nlp/modeling/layers/attention.py,sha256=3-jG3m_L9Y41BY35c4uTFG_Ywlfk4SOwUEtmqfSoKkk,3906
303
303
  official/nlp/modeling/layers/attention_test.py,sha256=c7KezuYUze8PWAPuwYow8KTQNRyuuJgwICSsFTyJ2nQ,3536
304
304
  official/nlp/modeling/layers/bigbird_attention.py,sha256=dzutgRoQt2DFsYMpMILv_QF0O_FMDbiLQ3T-7c1Zpcs,21111
@@ -335,7 +335,7 @@ official/nlp/modeling/layers/multi_query_attention.py,sha256=fFPBa9IBVj_O5x5OfGu
335
335
  official/nlp/modeling/layers/multi_query_attention_test.py,sha256=3VFF2hz85YExWPwdbhYWaSrIaSOkC1x7axdGfXr0W90,8512
336
336
  official/nlp/modeling/layers/on_device_embedding.py,sha256=FgsHyRXf5TWVTyo4OeKImmrTnn4uOPJgS3AGKzKMWYY,4582
337
337
  official/nlp/modeling/layers/on_device_embedding_test.py,sha256=M-LUba4QXV37s9Cx7aH8LL3bz_YotC6qITmWRI7Fhjk,8589
338
- official/nlp/modeling/layers/pack_optimization.py,sha256=C2prsYZMSkL8FBjz6Syc_Tu4JgzppaeIHyGDDoWzs8c,10289
338
+ official/nlp/modeling/layers/pack_optimization.py,sha256=7bQS9k5Pd9X08KyI6-Px1t8tWYG719Bgaxqqs6IXSDo,10760
339
339
  official/nlp/modeling/layers/pack_optimization_test.py,sha256=dpsyZAI_PNq9C5HkOkCk70hWaSbT0UThSclwQeYDQqU,2795
340
340
  official/nlp/modeling/layers/per_dim_scale_attention.py,sha256=1xECNMAB91lz7eVl6FevwRrHXaHW3-FCpjXTO8F3S4M,3416
341
341
  official/nlp/modeling/layers/per_dim_scale_attention_test.py,sha256=_JbPV0ALqFSCWYBvmuemeN4ist0AnNPbQLgwVsRvavU,1761
@@ -347,8 +347,8 @@ official/nlp/modeling/layers/reuse_attention.py,sha256=qvAC-Dr2uPbpQWOvaf0RVN7t6
347
347
  official/nlp/modeling/layers/reuse_attention_test.py,sha256=rKr-dl05DqQesYdvYaCYYahIp0ObP4Xgi4Lno4jsl3Y,14329
348
348
  official/nlp/modeling/layers/reuse_transformer.py,sha256=S0IxI8LzjAnZ5L3MDy32oanI3oGQhxQjdkC3ff-zlmc,15697
349
349
  official/nlp/modeling/layers/reuse_transformer_test.py,sha256=GXuJWfNrqsOwxAi0xSyuziD3kreVWGPCr0LHmxxe0Mk,17201
350
- official/nlp/modeling/layers/rezero_transformer.py,sha256=EONgqHNi1OUHXWjzDYaYpetdhvxXxeAPqkXUBEMzGq4,12539
351
- official/nlp/modeling/layers/rezero_transformer_test.py,sha256=in5ZOcWOQt3MnTHjIVzL9H8e_TH7_XmKzgS2CRQAGHA,5761
350
+ official/nlp/modeling/layers/rezero_transformer.py,sha256=SE6iDIlguTxEBdKR79XGOZaqHJ79UY5VZuolgQXQz8g,14249
351
+ official/nlp/modeling/layers/rezero_transformer_test.py,sha256=-Ib-PpZvWgP5aiH3EhyY69OxXLldxCMsnT8oAhojXzI,8620
352
352
  official/nlp/modeling/layers/routing.py,sha256=hV6RHVBU0lEgwx180Q78mDncuXTcyWPyaVBqatWCtQw,4469
353
353
  official/nlp/modeling/layers/routing_test.py,sha256=ViRCnFWPdwM4Kam0k8aDZbyoJqeqyIiQKEmlhNE7LgI,2226
354
354
  official/nlp/modeling/layers/self_attention_mask.py,sha256=7avqkfChwnuZU-qqAED0x1gwwmWSMUszZVAIch8NF_Y,2173
@@ -363,8 +363,8 @@ official/nlp/modeling/layers/tn_expand_condense_test.py,sha256=J52mXzoiuaXfR61kh
363
363
  official/nlp/modeling/layers/tn_transformer_expand_condense.py,sha256=gbGJOrgxJd1SyMGB6ME04FSxuZfHqsi94Xxt23l7368,11032
364
364
  official/nlp/modeling/layers/tn_transformer_test.py,sha256=Fh-EDRoAkhO7ccD3w3FsJHC51MnZySv8jBlHYnvKZMc,8893
365
365
  official/nlp/modeling/layers/transformer.py,sha256=yofIEOjZpcvDmHbcjBmkZrl5iSe6pLtMsetNbXmxDnY,20087
366
- official/nlp/modeling/layers/transformer_encoder_block.py,sha256=mb2vezq6tOmcnfbwE2Sq3_CvbS7jk4cDvKJfpF-Ob9Q,20237
367
- official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=zBcOxKR5aFc0DvoOvdDSoXKZiBPjapuUcd6OUZQhSGA,28304
366
+ official/nlp/modeling/layers/transformer_encoder_block.py,sha256=9EuAsedY35eIFc4z-22QQ4c47NHrEe8-8uzjtPfgNTM,21977
367
+ official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=chs8-M69Gx_Zcp7Pi7sNKjpWgyuSHDw_fNrRh6URPLc,30686
368
368
  official/nlp/modeling/layers/transformer_scaffold.py,sha256=m8TF4geBkm8-VJQiTpzMI6FSJZry6oa2vPO3FXCCClE,15704
369
369
  official/nlp/modeling/layers/transformer_scaffold_test.py,sha256=pqUGldhmAKROrd4eoCWmHNtKOdCO6PH_-EigcYnvIpE,19920
370
370
  official/nlp/modeling/layers/transformer_test.py,sha256=kC_9NcLbJnBbuTaE_7BW60EF8xG_QUoICj0t0gS7O4Q,5522
@@ -1212,9 +1212,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=nc6A9K53OGqF25xN5St8EiWvdVbda
1212
1212
  tensorflow_models/nlp/__init__.py,sha256=4tA5Pf4qaFwT-fIFOpX7x7FHJpnyJT-5UgOeFYTyMlc,807
1213
1213
  tensorflow_models/uplift/__init__.py,sha256=mqfa55gweOdpKoaQyid4A_4u7xw__FcQeSIF0k_pYmI,999
1214
1214
  tensorflow_models/vision/__init__.py,sha256=zBorY_v5xva1uI-qxhZO3Qh-Dii-Suq6wEYh6hKHDfc,833
1215
- tf_models_nightly-2.17.0.dev20240610.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
1216
- tf_models_nightly-2.17.0.dev20240610.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
1217
- tf_models_nightly-2.17.0.dev20240610.dist-info/METADATA,sha256=fzC4StzZ-4A96PlGaXFxIGweI3hNqRYUg11L4cdWz6w,1432
1218
- tf_models_nightly-2.17.0.dev20240610.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
1219
- tf_models_nightly-2.17.0.dev20240610.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
1220
- tf_models_nightly-2.17.0.dev20240610.dist-info/RECORD,,
1215
+ tf_models_nightly-2.17.0.dev20240611.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
1216
+ tf_models_nightly-2.17.0.dev20240611.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
1217
+ tf_models_nightly-2.17.0.dev20240611.dist-info/METADATA,sha256=kUzmYfy2YSnQ6ew3TJFN7nJqzj_0eQZm6Tes46AVY5c,1432
1218
+ tf_models_nightly-2.17.0.dev20240611.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
1219
+ tf_models_nightly-2.17.0.dev20240611.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
1220
+ tf_models_nightly-2.17.0.dev20240611.dist-info/RECORD,,