PyPI - tf-models-nightly - Versions diffs - 2.17.0.dev20240610__py2.py3-none-any.whl → 2.17.0.dev20240612__py2.py3-none-any.whl - Mend

tf-models-nightly 2.17.0.dev20240610py2.py3-none-any.whl → 2.17.0.dev20240612py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

official/nlp/modeling/layers/__init__.py CHANGED Viewed

@@ -23,6 +23,7 @@ from official.nlp.modeling.layers.attention import *
 from official.nlp.modeling.layers.bigbird_attention import BigBirdAttention
 from official.nlp.modeling.layers.bigbird_attention import BigBirdMasks
 from official.nlp.modeling.layers.block_diag_feedforward import BlockDiagFeedforward
+from official.nlp.modeling.layers.block_sparse_attention import MultiHeadAttention as BlockSparseAttention
 from official.nlp.modeling.layers.cls_head import *
 from official.nlp.modeling.layers.factorized_embedding import FactorizedEmbedding
 from official.nlp.modeling.layers.gated_feedforward import GatedFeedforward
@@ -44,6 +45,7 @@ from official.nlp.modeling.layers.moe import FeedForwardExperts
 from official.nlp.modeling.layers.moe import MoeLayer
 from official.nlp.modeling.layers.moe import MoeLayerWithBackbone
 from official.nlp.modeling.layers.multi_channel_attention import *
+from official.nlp.modeling.layers.multi_query_attention import MultiHeadAttention as MultiQueryAttention
 from official.nlp.modeling.layers.on_device_embedding import OnDeviceEmbedding
 from official.nlp.modeling.layers.pack_optimization import PackBertEmbeddings
 from official.nlp.modeling.layers.pack_optimization import StridedReZeroTransformer

official/nlp/modeling/layers/pack_optimization.py CHANGED Viewed

@@ -72,6 +72,10 @@ class StridedTransformerEncoderBlock(
     if self._output_range is not None:
       raise ValueError('StridedTransformerEncoderBlock does not '
                        'support `output_range` argument.')
+    # TODO(b/337888023): Support block sparse attention with strided inputs.
+    if self._src_block_size is not None:
+      raise ValueError('StridedTransformerEncoderBlock does not '
+                       'support block sparse attention.')
   def call(self, inputs, stride: tf.Tensor):
     if isinstance(inputs, (list, tuple)):
@@ -137,6 +141,10 @@ class StridedReZeroTransformer(rezero_transformer.ReZeroTransformer):
     if self._output_range is not None:
       raise ValueError(f'{self.__class__} does not '
                        'support `output_range` argument.')
+    # TODO(b/337888023): Support block sparse attention with strided inputs.
+    if self._src_block_size is not None:
+      raise ValueError(f'{self.__class__} does not '
+                       'support block sparse attention.')
   def call(self, inputs, stride: tf.Tensor):
     if isinstance(inputs, (list, tuple)):

official/nlp/modeling/layers/rezero_transformer.py CHANGED Viewed

@@ -21,6 +21,8 @@ import gin
 import tensorflow as tf, tf_keras
 from official.modeling import tf_utils
+from official.nlp.modeling.layers import block_sparse_attention
+from official.nlp.modeling.layers import multi_query_attention
 from official.nlp.modeling.layers import util
@@ -53,6 +55,12 @@ class ReZeroTransformer(tf_keras.layers.Layer):
     bias_constraint: Constraint for dense layer kernels.
     use_layer_norm: If add layer_norm on top of the ReZero.
     share_rezero: If attention layer and FFN layer share the same alpha.
+    num_kv_heads: Number of key-value heads for multi-query attention. Refer to
+      `multi_query_attention.MultiHeadAttention` for more details.
+    src_block_size: Source block size. Refer to
+      `block_sparse_attention.MultiHeadAttention` for more details.
+    tgt_block_size: Target block size. Refer to
+      `block_sparse_attention.MultiHeadAttention` for more details.
   """
   def __init__(self,
@@ -71,6 +79,9 @@ class ReZeroTransformer(tf_keras.layers.Layer):
                bias_constraint=None,
                use_layer_norm=False,
                share_rezero=True,
+               num_kv_heads=None,
+               src_block_size=None,
+               tgt_block_size=None,
                **kwargs):
     # attention_dropout will override attention_dropout_rate.
     # This is to unify the input params with TransformerEncoderBlock.
@@ -101,6 +112,14 @@ class ReZeroTransformer(tf_keras.layers.Layer):
     self._bias_constraint = tf_keras.constraints.get(bias_constraint)
     self._use_layer_norm = use_layer_norm
     self._share_rezero = share_rezero
+    self._num_kv_heads = num_kv_heads
+    self._src_block_size = src_block_size
+    self._tgt_block_size = tgt_block_size
+    if self._num_kv_heads is not None and self._src_block_size is not None:
+      raise ValueError(
+          "Block sparse attention does not support Multi-query attention."
+          " Specify only one of them."
+      )
   def build(self, input_shape):
     if isinstance(input_shape, tf.TensorShape):
@@ -109,53 +128,77 @@ class ReZeroTransformer(tf_keras.layers.Layer):
       input_tensor_shape = tf.TensorShape(input_shape[0])
     else:
       raise ValueError(
-          "The type of input shape argument is not supported, got: %s" %
-          type(input_shape))
+          "The type of input shape argument is not supported, got: %s"
+          % type(input_shape)
+      )
     if len(input_tensor_shape.as_list()) != 3:
-      raise ValueError("TransformerLayer expects a three-dimensional input of "
-                       "shape [batch, sequence, width].")
+      raise ValueError(
+          "TransformerLayer expects a three-dimensional input of "
+          "shape [batch, sequence, width]."
+      )
     batch_size, sequence_length, hidden_size = input_tensor_shape
     if len(input_shape) == 2:
       mask_tensor_shape = tf.TensorShape(input_shape[1])
       expected_mask_tensor_shape = tf.TensorShape(
-          [batch_size, sequence_length, sequence_length])
+          [batch_size, sequence_length, sequence_length]
+      )
       if not expected_mask_tensor_shape.is_compatible_with(mask_tensor_shape):
-        raise ValueError("When passing a mask tensor to TransformerLayer, the "
-                         "mask tensor must be of shape [batch, "
-                         "sequence_length, sequence_length] (here %s). Got a "
-                         "mask tensor of shape %s." %
-                         (expected_mask_tensor_shape, mask_tensor_shape))
+        raise ValueError(
+            "When passing a mask tensor to TransformerLayer, the "
+            "mask tensor must be of shape [batch, "
+            "sequence_length, sequence_length] (here %s). Got a "
+            "mask tensor of shape %s."
+            % (expected_mask_tensor_shape, mask_tensor_shape)
+        )
     if hidden_size % self._num_heads != 0:
       raise ValueError(
           "The input size (%d) is not a multiple of the number of attention "
-          "heads (%d)" % (hidden_size, self._num_heads))
+          "heads (%d)" % (hidden_size, self._num_heads)
+      )
     self._attention_head_size = int(hidden_size // self._num_heads)
     common_kwargs = dict(
         kernel_regularizer=self._kernel_regularizer,
         bias_regularizer=self._bias_regularizer,
         activity_regularizer=self._activity_regularizer,
         kernel_constraint=self._kernel_constraint,
-        bias_constraint=self._bias_constraint)
-    self._attention_layer = tf_keras.layers.MultiHeadAttention(
+        bias_constraint=self._bias_constraint,
+    )
+    attention_kwargs = dict(
         num_heads=self._num_heads,
         key_dim=self._attention_head_size,
         dropout=self._attention_dropout_rate,
         name="self_attention",
         kernel_initializer=tf_utils.clone_initializer(self._kernel_initializer),
         bias_initializer=tf_utils.clone_initializer(self._bias_initializer),
-        **common_kwargs)
+    )
+    if self._src_block_size is not None:
+      attention_kwargs.update(
+          src_block_size=self._src_block_size,
+          tgt_block_size=self._tgt_block_size,
+          name="block_sparse_attention",
+      )
+      attention_fn = block_sparse_attention.MultiHeadAttention
+    elif self._num_kv_heads is not None:
+      attention_kwargs.update(
+          num_kv_heads=self._num_kv_heads,
+          name="multi_query_attention",
+          )
+      attention_fn = multi_query_attention.MultiHeadAttention
+    else:
+      attention_fn = tf_keras.layers.MultiHeadAttention
+    self._attention_layer = attention_fn(**attention_kwargs, **common_kwargs)
     self._attention_dropout = tf_keras.layers.Dropout(rate=self._dropout_rate)
     if self._use_layer_norm:
       # Use float32 in layernorm for numeric stability.
       # It is probably safe in mixed_float16, but we haven't validated this yet.
-      self._attention_layer_norm = (
-          tf_keras.layers.LayerNormalization(
-              name="self_attention_layer_norm",
-              axis=-1,
-              epsilon=1e-12,
-              dtype=tf.float32))
+      self._attention_layer_norm = tf_keras.layers.LayerNormalization(
+          name="self_attention_layer_norm",
+          axis=-1,
+          epsilon=1e-12,
+          dtype=tf.float32,
+      )
     self._intermediate_dense = tf_keras.layers.EinsumDense(
         "abc,cd->abd",
         output_shape=(None, self._inner_dim),
@@ -221,6 +264,12 @@ class ReZeroTransformer(tf_keras.layers.Layer):
             self._use_layer_norm,
         "share_rezero":
             self._share_rezero,
+        "num_kv_heads":
+            self._num_kv_heads,
+        "src_block_size":
+            self._src_block_size,
+        "tgt_block_size":
+            self._tgt_block_size,
         "kernel_initializer":
             tf_keras.initializers.serialize(self._kernel_initializer),
         "bias_initializer":

official/nlp/modeling/layers/rezero_transformer_test.py CHANGED Viewed

@@ -141,6 +141,69 @@ class TransformerWithReZeroLayerTest(tf.test.TestCase, parameterized.TestCase):
     output = test_layer(inputs)
     self.assertEqual(output.shape, q_tensor.shape)
+  @parameterized.named_parameters(('_mqa', 1),
+                                  ('_gqa', 5))
+  def test_rezero_with_kv_heads(self, num_kv_heads):
+    tf_keras.mixed_precision.set_global_policy('mixed_float16')
+    test_layer = rezero_transformer.ReZeroTransformer(
+        num_attention_heads=10,
+        intermediate_size=2048,
+        intermediate_activation='relu',
+        num_kv_heads=num_kv_heads,
+        )
+    sequence_length = 21
+    width = 80
+    # Create a 3-dimensional input (the first dimension is implicit).
+    data_tensor = tf_keras.Input(shape=(sequence_length, width))
+    # Create a 2-dimensional input (the first dimension is implicit).
+    mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
+    output_tensor = test_layer([data_tensor, mask_tensor])
+    # Create a model from the test layer.
+    model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
+    # Invoke the model on test data. We can't validate the output data itself
+    # (the NN is too complex) but this will rule out structural runtime errors.
+    batch_size = 6
+    input_data = (10 * np.random.random_sample(
+        (batch_size, sequence_length, width)))
+    # The attention mask should be of shape (batch, from_seq_len, to_seq_len),
+    # which here is (batch, sequence_length, sequence_length)
+    mask_data = np.random.randint(
+        2, size=(batch_size, sequence_length, sequence_length))
+    _ = model.predict([input_data, mask_data])
+  def test_rezero_with_block_sparse_attention(self):
+    tf_keras.mixed_precision.set_global_policy('mixed_float16')
+    test_layer = rezero_transformer.ReZeroTransformer(
+        num_attention_heads=10,
+        intermediate_size=2048,
+        intermediate_activation='relu',
+        src_block_size=3,
+        tgt_block_size=3,
+        )
+    sequence_length = 21
+    width = 80
+    # Create a 3-dimensional input (the first dimension is implicit).
+    data_tensor = tf_keras.Input(shape=(sequence_length, width))
+    # Create a 2-dimensional input (the first dimension is implicit).
+    mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
+    output_tensor = test_layer([data_tensor, mask_tensor])
+    # Create a model from the test layer.
+    model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
+    # Invoke the model on test data. We can't validate the output data itself
+    # (the NN is too complex) but this will rule out structural runtime errors.
+    batch_size = 6
+    input_data = (10 * np.random.random_sample(
+        (batch_size, sequence_length, width)))
+    # The attention mask should be of shape (batch, from_seq_len, to_seq_len),
+    # which here is (batch, sequence_length, sequence_length)
+    mask_data = np.random.randint(
+        2, size=(batch_size, sequence_length, sequence_length))
+    _ = model.predict([input_data, mask_data])
 if __name__ == '__main__':
   tf.test.main()

official/nlp/modeling/layers/transformer_encoder_block.py CHANGED Viewed

@@ -18,6 +18,8 @@ from absl import logging
 import tensorflow as tf, tf_keras
 from official.modeling import tf_utils
+from official.nlp.modeling.layers import block_sparse_attention
+from official.nlp.modeling.layers import multi_query_attention
 from official.nlp.modeling.layers import util
@@ -107,6 +109,9 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
                output_last_dim=None,
                diff_q_kv_att_layer_norm=False,
                return_attention_scores=False,
+               num_kv_heads=None,
+               src_block_size=None,
+               tgt_block_size=None,
                **kwargs):
     """Initializes `TransformerEncoderBlock`.
@@ -174,6 +179,12 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
       return_attention_scores: If `True`, the output of this layer will be a
         tuple and additionally contain the attention scores in the shape of
         `[batch_size, num_attention_heads, seq_dim, seq_dim]`.
+      num_kv_heads: Number of key-value heads for multi-query attention. Refer
+        to `multi_query_attention.MultiHeadAttention` for more details.
+      src_block_size: Source block size. Refer to
+        `block_sparse_attention.MultiHeadAttention` for more details.
+      tgt_block_size: Target block size. Refer to
+        `block_sparse_attention.MultiHeadAttention` for more details.
       **kwargs: keyword arguments.
     """
     util.filter_kwargs(kwargs)
@@ -208,6 +219,14 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
     self._output_last_dim = output_last_dim
     self._diff_q_kv_att_layer_norm = diff_q_kv_att_layer_norm
     self._return_attention_scores = return_attention_scores
+    self._num_kv_heads = num_kv_heads
+    self._src_block_size = src_block_size
+    self._tgt_block_size = tgt_block_size
+    if self._num_kv_heads is not None and self._src_block_size is not None:
+      raise ValueError(
+          "Block sparse attention does not support Multi-query attention."
+          " Specify only one of them."
+      )
     if attention_initializer:
       self._attention_initializer = tf_keras.initializers.get(
           attention_initializer)
@@ -244,12 +263,7 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
     else:
       last_output_shape = self._output_last_dim
-    common_kwargs = dict(
-        bias_regularizer=self._bias_regularizer,
-        activity_regularizer=self._activity_regularizer,
-        kernel_constraint=self._kernel_constraint,
-        bias_constraint=self._bias_constraint)
-    self._attention_layer = tf_keras.layers.MultiHeadAttention(
+    attention_layer_kwargs = dict(
         num_heads=self._num_heads,
         key_dim=self._key_dim,
         value_dim=self._value_dim,
@@ -260,7 +274,30 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
         attention_axes=self._attention_axes,
         output_shape=self._output_last_dim,
         name="self_attention",
-        **common_kwargs
+    )
+    common_kwargs = dict(
+        bias_regularizer=self._bias_regularizer,
+        activity_regularizer=self._activity_regularizer,
+        kernel_constraint=self._kernel_constraint,
+        bias_constraint=self._bias_constraint,
+    )
+    if self._src_block_size is not None:
+      attention_layer_kwargs.update(
+          src_block_size=self._src_block_size,
+          tgt_block_size=self._tgt_block_size,
+          name="block_sparse_attention",
+      )
+      attention_fn = block_sparse_attention.MultiHeadAttention
+    elif self._num_kv_heads is not None:
+      attention_layer_kwargs.update(
+          num_kv_heads=self._num_kv_heads,
+          name="multi_query_attention",
+      )
+      attention_fn = multi_query_attention.MultiHeadAttention
+    else:
+      attention_fn = tf_keras.layers.MultiHeadAttention
+    self._attention_layer = attention_fn(
+        **attention_layer_kwargs, **common_kwargs
     )
     self._attention_dropout = tf_keras.layers.Dropout(
         rate=self._attention_dropout_rate
@@ -373,6 +410,9 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
         "value_dim": self._value_dim,
         "output_last_dim": self._output_last_dim,
         "diff_q_kv_att_layer_norm": self._diff_q_kv_att_layer_norm,
+        "num_kv_heads": self._num_kv_heads,
+        "src_block_size": self._src_block_size,
+        "tgt_block_size": self._tgt_block_size,
     }
     base_config = super().get_config()
     return dict(list(base_config.items()) + list(config.items()))

official/nlp/modeling/layers/transformer_encoder_block_test.py CHANGED Viewed

@@ -712,6 +712,84 @@ class TransformerArgumentTest(tf.test.TestCase, parameterized.TestCase):
       self.assertEqual(output_tensor.shape.as_list(),
                        expected_layer_output_shape)
+  @parameterized.named_parameters(
+      ('mqa', 1),
+      ('gqa', 4),
+  )
+  def test_attention_with_kv_heads(self, num_kv_heads):
+    num_attention_heads = 8
+    sequence_length = 21
+    width = 80
+    test_layer = TransformerEncoderBlock(
+        num_attention_heads=num_attention_heads,
+        inner_dim=2048,
+        inner_activation='relu',
+        return_attention_scores=True,
+        num_kv_heads=num_kv_heads,
+    )
+    # Create a 3-dimensional input (the first dimension is implicit).
+    data_tensor = tf_keras.Input(shape=(sequence_length, width))
+    output_tensor = test_layer(data_tensor)
+    expected_layer_output_shape = [None, sequence_length, width]
+    expected_attention_scores_shape = [
+        None,
+        num_attention_heads,
+        sequence_length,
+        sequence_length,
+    ]
+    self.assertIsInstance(output_tensor, tuple)
+    self.assertLen(output_tensor, 2)
+    # First is the standard output.
+    self.assertEqual(
+        output_tensor[0].shape.as_list(), expected_layer_output_shape
+    )
+    # Second is the attention scores.
+    self.assertEqual(
+        output_tensor[1].shape.as_list(), expected_attention_scores_shape
+    )
+  def test_block_sparse_attention(self):
+    num_attention_heads = 8
+    sequence_length = 21
+    width = 80
+    src_block_size = 7
+    tgt_block_size = 7
+    test_layer = TransformerEncoderBlock(
+        num_attention_heads=num_attention_heads,
+        inner_dim=2048,
+        inner_activation='relu',
+        return_attention_scores=True,
+        src_block_size=src_block_size,
+        tgt_block_size=tgt_block_size,
+    )
+    # Create a 3-dimensional input (the first dimension is implicit).
+    data_tensor = tf_keras.Input(shape=(sequence_length, width))
+    output_tensor = test_layer(data_tensor)
+    expected_layer_output_shape = [None, sequence_length, width]
+    expected_attention_scores_shape = [
+        None,
+        num_attention_heads,
+        sequence_length//src_block_size,
+        src_block_size,
+        tgt_block_size,
+    ]
+    self.assertIsInstance(output_tensor, tuple)
+    self.assertLen(output_tensor, 2)
+    # First is the standard output.
+    self.assertEqual(
+        output_tensor[0].shape.as_list(), expected_layer_output_shape
+    )
+    # Second is the attention scores.
+    self.assertEqual(
+        output_tensor[1].shape.as_list(), expected_attention_scores_shape
+    )
 if __name__ == '__main__':
   tf.test.main()

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tf-models-nightly
-Version: 2.17.0.dev20240610
+Version: 2.17.0.dev20240612
 Summary: TensorFlow Official Models
 Home-page: https://github.com/tensorflow/models
 Author: Google Inc.

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/RECORD RENAMED Viewed

@@ -298,7 +298,7 @@ official/nlp/metrics/__init__.py,sha256=7oiypy0N82PDw9aSdcJBLVoGTd_oRSUOdvuJhMv4
 official/nlp/metrics/bleu.py,sha256=XOTTbjC3B9lt8-MLvNX02tjA94wfsUVse6KJ5CWPzfk,6587
 official/nlp/metrics/bleu_test.py,sha256=0j4pZ1MSIcndvUNZa25oXCu4UFOE367KaL7oRNCzLCI,2508
 official/nlp/modeling/__init__.py,sha256=SQozaRl78tYS6xvGCfM3msABe2VL20x_mL2vIln1Sn0,1062
-official/nlp/modeling/layers/__init__.py,sha256=no0uyA68hsEDa_UuUdhC4jXeqb4lj-3dp6j0GTDzrIQ,4864
+official/nlp/modeling/layers/__init__.py,sha256=vsVNp7WcO4o500l7Zq_-_BIqYbK4fKRMEEtCRJCSP2E,5076
 official/nlp/modeling/layers/attention.py,sha256=3-jG3m_L9Y41BY35c4uTFG_Ywlfk4SOwUEtmqfSoKkk,3906
 official/nlp/modeling/layers/attention_test.py,sha256=c7KezuYUze8PWAPuwYow8KTQNRyuuJgwICSsFTyJ2nQ,3536
 official/nlp/modeling/layers/bigbird_attention.py,sha256=dzutgRoQt2DFsYMpMILv_QF0O_FMDbiLQ3T-7c1Zpcs,21111
@@ -335,7 +335,7 @@ official/nlp/modeling/layers/multi_query_attention.py,sha256=fFPBa9IBVj_O5x5OfGu
 official/nlp/modeling/layers/multi_query_attention_test.py,sha256=3VFF2hz85YExWPwdbhYWaSrIaSOkC1x7axdGfXr0W90,8512
 official/nlp/modeling/layers/on_device_embedding.py,sha256=FgsHyRXf5TWVTyo4OeKImmrTnn4uOPJgS3AGKzKMWYY,4582
 official/nlp/modeling/layers/on_device_embedding_test.py,sha256=M-LUba4QXV37s9Cx7aH8LL3bz_YotC6qITmWRI7Fhjk,8589
-official/nlp/modeling/layers/pack_optimization.py,sha256=C2prsYZMSkL8FBjz6Syc_Tu4JgzppaeIHyGDDoWzs8c,10289
+official/nlp/modeling/layers/pack_optimization.py,sha256=7bQS9k5Pd9X08KyI6-Px1t8tWYG719Bgaxqqs6IXSDo,10760
 official/nlp/modeling/layers/pack_optimization_test.py,sha256=dpsyZAI_PNq9C5HkOkCk70hWaSbT0UThSclwQeYDQqU,2795
 official/nlp/modeling/layers/per_dim_scale_attention.py,sha256=1xECNMAB91lz7eVl6FevwRrHXaHW3-FCpjXTO8F3S4M,3416
 official/nlp/modeling/layers/per_dim_scale_attention_test.py,sha256=_JbPV0ALqFSCWYBvmuemeN4ist0AnNPbQLgwVsRvavU,1761
@@ -347,8 +347,8 @@ official/nlp/modeling/layers/reuse_attention.py,sha256=qvAC-Dr2uPbpQWOvaf0RVN7t6
 official/nlp/modeling/layers/reuse_attention_test.py,sha256=rKr-dl05DqQesYdvYaCYYahIp0ObP4Xgi4Lno4jsl3Y,14329
 official/nlp/modeling/layers/reuse_transformer.py,sha256=S0IxI8LzjAnZ5L3MDy32oanI3oGQhxQjdkC3ff-zlmc,15697
 official/nlp/modeling/layers/reuse_transformer_test.py,sha256=GXuJWfNrqsOwxAi0xSyuziD3kreVWGPCr0LHmxxe0Mk,17201
-official/nlp/modeling/layers/rezero_transformer.py,sha256=EONgqHNi1OUHXWjzDYaYpetdhvxXxeAPqkXUBEMzGq4,12539
-official/nlp/modeling/layers/rezero_transformer_test.py,sha256=in5ZOcWOQt3MnTHjIVzL9H8e_TH7_XmKzgS2CRQAGHA,5761
+official/nlp/modeling/layers/rezero_transformer.py,sha256=SE6iDIlguTxEBdKR79XGOZaqHJ79UY5VZuolgQXQz8g,14249
+official/nlp/modeling/layers/rezero_transformer_test.py,sha256=-Ib-PpZvWgP5aiH3EhyY69OxXLldxCMsnT8oAhojXzI,8620
 official/nlp/modeling/layers/routing.py,sha256=hV6RHVBU0lEgwx180Q78mDncuXTcyWPyaVBqatWCtQw,4469
 official/nlp/modeling/layers/routing_test.py,sha256=ViRCnFWPdwM4Kam0k8aDZbyoJqeqyIiQKEmlhNE7LgI,2226
 official/nlp/modeling/layers/self_attention_mask.py,sha256=7avqkfChwnuZU-qqAED0x1gwwmWSMUszZVAIch8NF_Y,2173
@@ -363,8 +363,8 @@ official/nlp/modeling/layers/tn_expand_condense_test.py,sha256=J52mXzoiuaXfR61kh
 official/nlp/modeling/layers/tn_transformer_expand_condense.py,sha256=gbGJOrgxJd1SyMGB6ME04FSxuZfHqsi94Xxt23l7368,11032
 official/nlp/modeling/layers/tn_transformer_test.py,sha256=Fh-EDRoAkhO7ccD3w3FsJHC51MnZySv8jBlHYnvKZMc,8893
 official/nlp/modeling/layers/transformer.py,sha256=yofIEOjZpcvDmHbcjBmkZrl5iSe6pLtMsetNbXmxDnY,20087
-official/nlp/modeling/layers/transformer_encoder_block.py,sha256=mb2vezq6tOmcnfbwE2Sq3_CvbS7jk4cDvKJfpF-Ob9Q,20237
-official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=zBcOxKR5aFc0DvoOvdDSoXKZiBPjapuUcd6OUZQhSGA,28304
+official/nlp/modeling/layers/transformer_encoder_block.py,sha256=9EuAsedY35eIFc4z-22QQ4c47NHrEe8-8uzjtPfgNTM,21977
+official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=chs8-M69Gx_Zcp7Pi7sNKjpWgyuSHDw_fNrRh6URPLc,30686
 official/nlp/modeling/layers/transformer_scaffold.py,sha256=m8TF4geBkm8-VJQiTpzMI6FSJZry6oa2vPO3FXCCClE,15704
 official/nlp/modeling/layers/transformer_scaffold_test.py,sha256=pqUGldhmAKROrd4eoCWmHNtKOdCO6PH_-EigcYnvIpE,19920
 official/nlp/modeling/layers/transformer_test.py,sha256=kC_9NcLbJnBbuTaE_7BW60EF8xG_QUoICj0t0gS7O4Q,5522
@@ -1212,9 +1212,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=nc6A9K53OGqF25xN5St8EiWvdVbda
 tensorflow_models/nlp/__init__.py,sha256=4tA5Pf4qaFwT-fIFOpX7x7FHJpnyJT-5UgOeFYTyMlc,807
 tensorflow_models/uplift/__init__.py,sha256=mqfa55gweOdpKoaQyid4A_4u7xw__FcQeSIF0k_pYmI,999
 tensorflow_models/vision/__init__.py,sha256=zBorY_v5xva1uI-qxhZO3Qh-Dii-Suq6wEYh6hKHDfc,833
-tf_models_nightly-2.17.0.dev20240610.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
-tf_models_nightly-2.17.0.dev20240610.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
-tf_models_nightly-2.17.0.dev20240610.dist-info/METADATA,sha256=fzC4StzZ-4A96PlGaXFxIGweI3hNqRYUg11L4cdWz6w,1432
-tf_models_nightly-2.17.0.dev20240610.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
-tf_models_nightly-2.17.0.dev20240610.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
-tf_models_nightly-2.17.0.dev20240610.dist-info/RECORD,,
+tf_models_nightly-2.17.0.dev20240612.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
+tf_models_nightly-2.17.0.dev20240612.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
+tf_models_nightly-2.17.0.dev20240612.dist-info/METADATA,sha256=L8f-eOf_Ygx6KUBQjrmxw7M6NOWla-lHBmqARj5U4vg,1432
+tf_models_nightly-2.17.0.dev20240612.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
+tf_models_nightly-2.17.0.dev20240612.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
+tf_models_nightly-2.17.0.dev20240612.dist-info/RECORD,,

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/AUTHORS RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/LICENSE RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/WHEEL RENAMED Viewed

File without changes

{tf_models_nightly-2.17.0.dev20240610.dist-info → tf_models_nightly-2.17.0.dev20240612.dist-info}/top_level.txt RENAMED Viewed

File without changes

tf-models-nightly 2.17.0.dev20240610__py2.py3-none-any.whl → 2.17.0.dev20240612__py2.py3-none-any.whl

tf-models-nightly 2.17.0.dev20240610py2.py3-none-any.whl → 2.17.0.dev20240612py2.py3-none-any.whl