PyPI - tf-models-nightly - Versions diffs - 2.11.0.dev20230321__py2.py3-none-any.whl → 2.11.0.dev20230323__py2.py3-none-any.whl - Mend

tf-models-nightly 2.11.0.dev20230321py2.py3-none-any.whl → 2.11.0.dev20230323py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

official/modeling/hyperparams/base_config.py CHANGED Viewed

@@ -95,6 +95,20 @@ class Config(params_dict.ParamsDict):
   def BUILDER(self):
     return self._BUILDER
+  @classmethod
+  def _get_annotations(cls):
+    """Returns valid annotations.
+    Note: this is similar to dataclasses.__annotations__ except it also includes
+      annotations from its parent classes.
+    """
+    all_annotations = typing.get_type_hints(cls)
+    # Removes Config class annotation from the value, e.g., default_params,
+    # restrictions, etc.
+    for k in Config.__annotations__:
+      del all_annotations[k]
+    return all_annotations
   @classmethod
   def _isvalidsequence(cls, v):
     """Check if the input values are valid sequences.
@@ -175,9 +189,10 @@ class Config(params_dict.ParamsDict):
     if not subconfig_type:
       subconfig_type = Config
-    if k in cls.__annotations__:
+    annotations = cls._get_annotations()
+    if k in annotations:
       # Directly Config subtype.
-      type_annotation = cls.__annotations__[k]  # pytype: disable=invalid-annotation
+      type_annotation = annotations[k]
       i = 0
       # Loop for striping the Optional annotation.
       traverse_in = True
@@ -326,6 +341,9 @@ class Config(params_dict.ParamsDict):
   @classmethod
   def from_args(cls, *args, **kwargs):
     """Builds a config from the given list of arguments."""
+    # Note we intend to keep `__annotations__` instead of `_get_annotations`.
+    # Assuming a parent class of (a, b) with the sub-class of (c, d), the
+    # sub-class will take (c, d) for args, rather than starting from (a, b).
     attributes = list(cls.__annotations__.keys())
     default_params = {a: p for a, p in zip(attributes, args)}
     default_params.update(kwargs)

official/modeling/hyperparams/base_config_test.py CHANGED Viewed

@@ -33,6 +33,7 @@ class DumpConfig2(base_config.Config):
   c: int = 2
   d: str = 'text'
   e: DumpConfig1 = DumpConfig1()
+  optional_e: Optional[DumpConfig1] = None
 @dataclasses.dataclass
@@ -348,6 +349,34 @@ class BaseConfigTest(parameterized.TestCase, tf.test.TestCase):
         ]),
         "['s', 1, 1.0, True, None, {}, [], (), {8: 9, (2,): (3, [4], {6: 7})}]")
+  def test_with_superclass_override(self):
+    config = DumpConfig2()
+    config.override({'optional_e': {'a': 2}})
+    self.assertEqual(
+        config.optional_e.as_dict(),
+        {
+            'a': 2,
+            'b': 'text',
+        },
+    )
+    # Previously, the following will fail. See b/274696969 for context.
+    config = DumpConfig3()
+    config.override({'optional_e': {'a': 2}})
+    self.assertEqual(
+        config.optional_e.as_dict(),
+        {
+            'a': 2,
+            'b': 'text',
+        },
+    )
+  def test_get_annotations_without_base_config_leak(self):
+    with self.assertRaisesRegex(
+        KeyError, "The key 'restrictions' does not exist"
+    ):
+      DumpConfig3().override({'restrictions': None})
   def test_with_restrictions(self):
     restrictions = ['e.a<c']
     config = DumpConfig2(restrictions=restrictions)

official/projects/yt8m/configs/yt8m.py CHANGED Viewed

@@ -15,7 +15,6 @@
 """Video classification configuration definition."""
 import dataclasses
 from typing import Optional, Tuple
-from absl import flags
 from official.core import config_definitions as cfg
 from official.core import exp_factory
@@ -23,7 +22,6 @@ from official.modeling import hyperparams
 from official.modeling import optimization
 from official.vision.configs import common
-FLAGS = flags.FLAGS
 YT8M_TRAIN_EXAMPLES = 3888919
 YT8M_VAL_EXAMPLES = 1112356
@@ -105,7 +103,6 @@ def yt8m(is_training):
 class MoeModel(hyperparams.Config):
   """The model config."""
   num_mixtures: int = 5
-  l2_penalty: float = 1e-5
   use_input_context_gate: bool = False
   use_output_context_gate: bool = False
   vocab_as_last_dim: bool = False
@@ -121,7 +118,7 @@ class DbofModel(hyperparams.Config):
   use_context_gate_cluster_layer: bool = False
   context_gate_cluster_bottleneck_size: int = 0
   pooling_method: str = 'average'
-  yt8m_agg_classifier_model: str = 'MoeModel'
+  agg_classifier_model: str = 'MoeModel'
   agg_model: hyperparams.Config = MoeModel()
   norm_activation: common.NormActivation = common.NormActivation(
       activation='relu', use_sync_bn=False)

official/projects/yt8m/modeling/nn_layers.py CHANGED Viewed

@@ -13,22 +13,156 @@
 # limitations under the License.
 """Contains model definitions."""
+import functools
 from typing import Any, Dict, Optional
 import tensorflow as tf
+from official.modeling import tf_utils
+from official.projects.yt8m.configs import yt8m as yt8m_cfg
 from official.projects.yt8m.modeling import yt8m_model_utils as utils
 layers = tf.keras.layers
-class LogisticModel():
-  """Logistic model with L2 regularization."""
+class Dbof(tf.keras.Model):
+  """A YT8M model class builder.
+  Creates a Deep Bag of Frames model.
+  The model projects the features for each frame into a higher dimensional
+  'clustering' space, pools across frames in that space, and then
+  uses a configurable video-level model to classify the now aggregated features.
+  The model will randomly sample either frames or sequences of frames during
+  training to speed up convergence.
+  """
+  def __init__(
+      self,
+      params: yt8m_cfg.DbofModel,
+      num_classes: int = 3862,
+      input_specs: layers.InputSpec = layers.InputSpec(
+          shape=[None, None, 1152]),
+      l2_weight_decay: Optional[float] = None,
+      **kwargs):
+    """YT8M initialization function.
+    Args:
+      params: model configuration parameters
+      num_classes: `int` number of classes in dataset.
+      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+        [batch_size x num_frames x num_features]
+      l2_weight_decay: An optional `float` of kernel regularizer weight decay.
+      **kwargs: keyword arguments to be passed.
+    """
+    self._self_setattr_tracking = False
+    self._num_classes = num_classes
+    self._input_specs = input_specs
+    self._params = params
+    self._l2_weight_decay = l2_weight_decay
+    self._act_fn = tf_utils.get_activation(params.norm_activation.activation)
+    self._norm = functools.partial(
+        layers.BatchNormalization,
+        momentum=params.norm_activation.norm_momentum,
+        epsilon=params.norm_activation.norm_epsilon,
+        synchronized=params.norm_activation.use_sync_bn)
+    # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
+    # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
+    # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
+    l2_regularizer = (
+        tf.keras.regularizers.l2(l2_weight_decay / 2.0)
+        if l2_weight_decay
+        else None
+    )
+    # [batch_size x num_frames x num_features]
+    feature_size = input_specs.shape[-1]
+    # shape 'excluding' batch_size
+    model_input = tf.keras.Input(shape=self._input_specs.shape[1:])
+    # normalize input features
+    input_data = tf.nn.l2_normalize(model_input, -1)
+    tf.summary.histogram("input_hist", input_data)
+    # configure model
+    if params.add_batch_norm:
+      input_data = self._norm(name="input_bn")(input_data)
+    # activation = reshaped input * cluster weights
+    if params.cluster_size > 0:
+      activation = layers.Dense(
+          params.cluster_size,
+          kernel_regularizer=l2_regularizer,
+          kernel_initializer=tf.random_normal_initializer(
+              stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))(
+                  input_data)
+    if params.add_batch_norm:
+      activation = self._norm(name="cluster_bn")(activation)
+    else:
+      cluster_biases = tf.Variable(
+          tf.random_normal_initializer(stddev=1 / tf.math.sqrt(feature_size))(
+              shape=[params.cluster_size]),
+          name="cluster_biases")
+      tf.summary.histogram("cluster_biases", cluster_biases)
+      activation += cluster_biases
+    activation = self._act_fn(activation)
+    tf.summary.histogram("cluster_output", activation)
-  def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **kwargs):
+    if params.use_context_gate_cluster_layer:
+      pooling_method = None
+      norm_args = dict(name="context_gate_bn")
+      activation = utils.context_gate(
+          activation,
+          normalizer_fn=self._norm,
+          normalizer_params=norm_args,
+          pooling_method=pooling_method,
+          hidden_layer_size=params.context_gate_cluster_bottleneck_size,
+          kernel_regularizer=l2_regularizer)
+    activation = utils.frame_pooling(activation, params.pooling_method)
+    # activation = activation * hidden1_weights
+    activation = layers.Dense(
+        params.hidden_size,
+        kernel_regularizer=l2_regularizer,
+        kernel_initializer=tf.random_normal_initializer(
+            stddev=1 / tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(
+                activation)
+    if params.add_batch_norm:
+      activation = self._norm(name="hidden1_bn")(activation)
+    else:
+      hidden1_biases = tf.Variable(
+          tf.random_normal_initializer(stddev=0.01)(shape=[params.hidden_size]),
+          name="hidden1_biases")
+      tf.summary.histogram("hidden1_biases", hidden1_biases)
+      activation += hidden1_biases
+    activation = self._act_fn(activation)
+    tf.summary.histogram("hidden1_output", activation)
+    super().__init__(inputs=model_input, outputs=activation, **kwargs)
+class LogisticModel(tf.keras.Model):
+  """Logistic prediction head model with L2 regularization."""
+  def __init__(
+      self,
+      input_specs: layers.InputSpec = layers.InputSpec(shape=[None, 128]),
+      vocab_size: int = 3862,
+      l2_penalty: float = 1e-8,
+      **kwargs,
+  ):
     """Creates a logistic model.
     Args:
-      model_input: 'batch' x 'num_features' matrix of input features.
+      input_specs: 'batch' x 'num_features' matrix of input features.
       vocab_size: The number of classes in the dataset.
       l2_penalty: L2 weight regularization ratio.
       **kwargs: extra key word args.
@@ -38,43 +172,44 @@ class LogisticModel():
       model in the 'predictions' key. The dimensions of the tensor are
       batch_size x num_classes.
     """
-    del kwargs  # Unused.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
     output = layers.Dense(
         vocab_size,
         activation=tf.nn.sigmoid,
         kernel_regularizer=tf.keras.regularizers.l2(l2_penalty))(
-            model_input)
-    return {"predictions": output}
+            inputs)
+    super().__init__(inputs=inputs, outputs={"predictions": output}, **kwargs)
-class MoeModel():
+class MoeModel(tf.keras.Model):
   """A softmax over a mixture of logistic models (with L2 regularization)."""
-  def create_model(self,
-                   model_input,
-                   vocab_size,
-                   num_mixtures: int = 2,
-                   use_input_context_gate: bool = False,
-                   use_output_context_gate: bool = False,
-                   normalizer_fn=None,
-                   normalizer_params: Optional[Dict[str, Any]] = None,
-                   vocab_as_last_dim: bool = False,
-                   l2_penalty: float = 1e-5,
-                   **kwargs):
+  def __init__(
+      self,
+      input_specs: layers.InputSpec = layers.InputSpec(shape=[None, 128]),
+      vocab_size: int = 3862,
+      num_mixtures: int = 2,
+      use_input_context_gate: bool = False,
+      use_output_context_gate: bool = False,
+      normalizer_params: Optional[Dict[str, Any]] = None,
+      vocab_as_last_dim: bool = False,
+      l2_penalty: float = 1e-5,
+      **kwargs,
+  ):
     """Creates a Mixture of (Logistic) Experts model.
      The model consists of a per-class softmax distribution over a
      configurable number of logistic classifiers. One of the classifiers
      in the mixture is not trained, and always predicts 0.
     Args:
-      model_input: 'batch_size' x 'num_features' matrix of input features.
+      input_specs: 'batch_size' x 'num_features' matrix of input features.
       vocab_size: The number of classes in the dataset.
       num_mixtures: The number of mixtures (excluding a dummy 'expert' that
         always predicts the non-existence of an entity).
       use_input_context_gate: if True apply context gate layer to the input.
       use_output_context_gate: if True apply context gate layer to the output.
-      normalizer_fn: normalization op constructor (e.g. batch norm).
-      normalizer_params: parameters to the `normalizer_fn`.
+      normalizer_params: parameters of the batch normalization.
       vocab_as_last_dim: if True reshape `activations` and make `vocab_size` as
         the last dimension to avoid small `num_mixtures` as the last dimension.
         XLA pads up the dimensions of tensors: typically the last dimension will
@@ -88,11 +223,13 @@ class MoeModel():
       of the model in the 'predictions' key. The dimensions of the tensor
       are batch_size x num_classes.
     """
-    del kwargs  # Unused.
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+    model_input = inputs
     if use_input_context_gate:
       model_input = utils.context_gate(
           model_input,
-          normalizer_fn=normalizer_fn,
+          normalizer_fn=layers.BatchNormalization,
           normalizer_params=normalizer_params,
       )
@@ -132,7 +269,11 @@ class MoeModel():
     if use_output_context_gate:
       final_probabilities = utils.context_gate(
           final_probabilities,
-          normalizer_fn=normalizer_fn,
+          normalizer_fn=layers.BatchNormalization,
           normalizer_params=normalizer_params,
       )
-    return {"predictions": final_probabilities}
+    super().__init__(
+        inputs=inputs,
+        outputs={"predictions": final_probabilities},
+        **kwargs,
+    )

official/projects/yt8m/modeling/yt8m_model.py CHANGED Viewed

@@ -12,17 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""YT8M model definition."""
+"""YT8M prediction model definition."""
 import functools
-from typing import Optional
+from typing import Any, Optional
+from absl import logging
 import tensorflow as tf
-from official.modeling import tf_utils
 from official.projects.yt8m.configs import yt8m as yt8m_cfg
 from official.projects.yt8m.modeling import nn_layers
-from official.projects.yt8m.modeling import yt8m_model_utils as utils
 layers = tf.keras.layers
@@ -45,202 +44,58 @@ class DbofModel(tf.keras.Model):
       num_classes: int = 3862,
       input_specs: layers.InputSpec = layers.InputSpec(
           shape=[None, None, 1152]),
-      activation: str = "relu",
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
       l2_weight_decay: Optional[float] = None,
-      **kwargs):
-    """YT8M initialization function.
+      **kwargs,
+  ):
+    """YT8M Dbof model initialization function.
     Args:
       params: model configuration parameters
       num_classes: `int` number of classes in dataset.
       input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
         [batch_size x num_frames x num_features]
-      activation: A `str` of name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
       l2_weight_decay: An optional `float` of kernel regularizer weight decay.
       **kwargs: keyword arguments to be passed.
     """
-    model_input, activation = self.get_dbof(
-        params=params,
-        num_classes=num_classes,
-        input_specs=input_specs,
-        activation=activation,
-        use_sync_bn=use_sync_bn,
-        norm_momentum=norm_momentum,
-        norm_epsilon=norm_epsilon,
-        l2_weight_decay=l2_weight_decay,
-        **kwargs,
-    )
-    output = self.get_aggregation(model_input=activation, **kwargs)
-    super().__init__(
-        inputs=model_input, outputs=output.get("predictions"), **kwargs)
-  def get_dbof(
-      self,
-      params: yt8m_cfg.DbofModel,
-      num_classes: int = 3862,
-      input_specs: layers.InputSpec = layers.InputSpec(
-          shape=[None, None, 1152]),
-      activation: str = "relu",
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      l2_weight_decay: Optional[float] = None,
-      **kwargs):
-    del kwargs  # Unused and reserved for future extension.
-    self._self_setattr_tracking = False
+    super().__init__()
+    self._params = params
+    self._num_classes = num_classes
+    self._input_specs = input_specs
+    self._l2_weight_decay = l2_weight_decay
     self._config_dict = {
+        "params": params,
         "input_specs": input_specs,
         "num_classes": num_classes,
-        "params": params,
-        "use_sync_bn": use_sync_bn,
-        "activation": activation,
         "l2_weight_decay": l2_weight_decay,
-        "norm_momentum": norm_momentum,
-        "norm_epsilon": norm_epsilon,
     }
-    self._num_classes = num_classes
-    self._input_specs = input_specs
-    self._params = params
-    self._activation = activation
-    self._l2_weight_decay = l2_weight_decay
-    self._use_sync_bn = use_sync_bn
-    self._norm_momentum = norm_momentum
-    self._norm_epsilon = norm_epsilon
-    self._act_fn = tf_utils.get_activation(activation)
-    self._norm = functools.partial(
-        layers.BatchNormalization, synchronized=use_sync_bn)
-    # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
-    # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
-    # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
-    l2_regularizer = (
-        tf.keras.regularizers.l2(l2_weight_decay / 2.0)
-        if l2_weight_decay
-        else None
-    )
-    bn_axis = -1
-    # [batch_size x num_frames x num_features]
-    feature_size = input_specs.shape[-1]
-    # shape 'excluding' batch_size
-    model_input = tf.keras.Input(shape=self._input_specs.shape[1:])
-    # normalize input features
-    input_data = tf.nn.l2_normalize(model_input, -1)
-    tf.summary.histogram("input_hist", input_data)
-    # configure model
-    if params.add_batch_norm:
-      input_data = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="input_bn")(
-              input_data)
-    # activation = reshaped input * cluster weights
-    if params.cluster_size > 0:
-      activation = layers.Dense(
-          params.cluster_size,
-          kernel_regularizer=l2_regularizer,
-          kernel_initializer=tf.random_normal_initializer(
-              stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))(
-                  input_data)
-    if params.add_batch_norm:
-      activation = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="cluster_bn")(
-              activation)
-    else:
-      cluster_biases = tf.Variable(
-          tf.random_normal_initializer(stddev=1 / tf.math.sqrt(feature_size))(
-              shape=[params.cluster_size]),
-          name="cluster_biases")
-      tf.summary.histogram("cluster_biases", cluster_biases)
-      activation += cluster_biases
-    activation = self._act_fn(activation)
-    tf.summary.histogram("cluster_output", activation)
-    if params.use_context_gate_cluster_layer:
-      pooling_method = None
-      norm_args = dict(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="context_gate_bn")
-      activation = utils.context_gate(
-          activation,
-          normalizer_fn=self._norm,
-          normalizer_params=norm_args,
-          pooling_method=pooling_method,
-          hidden_layer_size=params.context_gate_cluster_bottleneck_size,
-          kernel_regularizer=l2_regularizer)
-    activation = utils.frame_pooling(activation, params.pooling_method)
-    # activation = activation * hidden1_weights
-    activation = layers.Dense(
-        params.hidden_size,
-        kernel_regularizer=l2_regularizer,
-        kernel_initializer=tf.random_normal_initializer(
-            stddev=1 / tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(
-                activation)
-    if params.add_batch_norm:
-      activation = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="hidden1_bn")(
-              activation)
-    else:
-      hidden1_biases = tf.Variable(
-          tf.random_normal_initializer(stddev=0.01)(shape=[params.hidden_size]),
-          name="hidden1_biases")
-      tf.summary.histogram("hidden1_biases", hidden1_biases)
-      activation += hidden1_biases
-    activation = self._act_fn(activation)
-    tf.summary.histogram("hidden1_output", activation)
-    return model_input, activation
-  def get_aggregation(self, model_input, **kwargs):
-    del kwargs  # Unused and reserved for future extension.
-    normalizer_fn = functools.partial(
-        layers.BatchNormalization, synchronized=self._use_sync_bn)
-    normalizer_params = dict(
-        axis=-1, momentum=self._norm_momentum, epsilon=self._norm_epsilon)
-    aggregated_model = getattr(
-        nn_layers, self._params.yt8m_agg_classifier_model)
-    output = aggregated_model().create_model(
-        model_input=model_input,
-        vocab_size=self._num_classes,
-        num_mixtures=self._params.agg_model.num_mixtures,
-        normalizer_fn=normalizer_fn,
-        normalizer_params=normalizer_params,
-        vocab_as_last_dim=self._params.agg_model.vocab_as_last_dim,
-        l2_penalty=self._params.agg_model.l2_penalty,
+    self.dbof_backbone = nn_layers.Dbof(
+        params,
+        num_classes,
+        input_specs,
+        l2_weight_decay,
+        **kwargs,
     )
-    return output
-  @property
-  def checkpoint_items(self):
-    """Returns a dictionary of items to be additionally checkpointed."""
-    return dict()
+    logging.info("Build DbofModel with %s.", params.agg_classifier_model)
+    if hasattr(nn_layers, params.agg_classifier_model):
+      aggregation_head = getattr(nn_layers, params.agg_classifier_model)
+      if params.agg_classifier_model == "MoeModel":
+        normalizer_params = dict(
+            synchronized=params.norm_activation.use_sync_bn,
+            momentum=params.norm_activation.norm_momentum,
+            epsilon=params.norm_activation.norm_epsilon,
+        )
+        aggregation_head = functools.partial(
+            aggregation_head, normalizer_params=normalizer_params)
+      if params.agg_model is not None:
+        kwargs.update(params.agg_model.as_dict())
+      self.head = aggregation_head(
+          input_specs=layers.InputSpec(shape=[None, params.hidden_size]),
+          vocab_size=num_classes,
+          l2_penalty=l2_weight_decay,
+          **kwargs)
   def get_config(self):
     return self._config_dict
@@ -248,3 +103,10 @@ class DbofModel(tf.keras.Model):
   @classmethod
   def from_config(cls, config):
     return cls(**config)
+  def call(
+      self, inputs: tf.Tensor, training: Any = None, mask: Any = None
+  ) -> tf.Tensor:
+    features = self.dbof_backbone(inputs)
+    outputs = self.head(features)
+    return outputs["predictions"]

official/projects/yt8m/modeling/yt8m_model_utils.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains a collection of util functions for model construction."""
 from typing import Any, Dict, Optional, Union
 import tensorflow as tf
@@ -177,8 +178,7 @@ def context_gate(
           kernel_initializer=kernel_initializer,
           bias_initializer=bias_initializer,
           kernel_regularizer=kernel_regularizer,
-      )(
-          context_features)
+      )(context_features)
       if normalizer_fn:
         gates_bottleneck = normalizer_fn(**normalizer_params)(gates_bottleneck)
     else:
@@ -191,14 +191,13 @@ def context_gate(
         kernel_initializer=kernel_initializer,
         bias_initializer=bias_initializer,
         kernel_regularizer=kernel_regularizer,
-    )(
-        gates_bottleneck)
+    )(gates_bottleneck)
     if normalizer_fn:
       gates = normalizer_fn(**normalizer_params)(gates)
     if additive_residual:
-      input_features += gates
+      input_features += tf.cast(gates, input_features.dtype)
     else:
-      input_features *= gates
+      input_features *= tf.cast(gates, input_features.dtype)
     return input_features

tf-models-nightly 2.11.0.dev20230321__py2.py3-none-any.whl → 2.11.0.dev20230323__py2.py3-none-any.whl

tf-models-nightly 2.11.0.dev20230321py2.py3-none-any.whl → 2.11.0.dev20230323py2.py3-none-any.whl