PyPI - tf-models-nightly - Versions diffs - 2.11.0.dev20230320__py2.py3-none-any.whl → 2.11.0.dev20230322__py2.py3-none-any.whl - Mend

tf-models-nightly 2.11.0.dev20230320py2.py3-none-any.whl → 2.11.0.dev20230322py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

official/projects/yt8m/configs/yt8m.py CHANGED Viewed

@@ -15,7 +15,6 @@
 """Video classification configuration definition."""
 import dataclasses
 from typing import Optional, Tuple
-from absl import flags
 from official.core import config_definitions as cfg
 from official.core import exp_factory
@@ -23,7 +22,6 @@ from official.modeling import hyperparams
 from official.modeling import optimization
 from official.vision.configs import common
-FLAGS = flags.FLAGS
 YT8M_TRAIN_EXAMPLES = 3888919
 YT8M_VAL_EXAMPLES = 1112356
@@ -53,7 +51,7 @@ class DataConfig(cfg.DataConfig):
     temporal_stride: Not used. Need to deprecated.
     max_frames: Maxim Number of frames in a input example. It is used to crop
       the input in the temporal dimension.
-    num_frames: Number of frames in a single input example.
+    num_sample_frames: Number of frames to sample for each input example.
     num_classes: Number of classes to classify. Assuming it is a classification
       task.
     num_devices: Not used. To be deprecated.
@@ -77,7 +75,7 @@ class DataConfig(cfg.DataConfig):
   include_video_id: bool = False
   temporal_stride: int = 1
   max_frames: int = 300
-  num_frames: int = 300  # set smaller to allow random sample (Parser)
+  num_sample_frames: int = 300  # set smaller to allow random sample (Parser)
   num_classes: int = 3862
   num_devices: int = 1
   input_path: str = ''
@@ -90,7 +88,6 @@ def yt8m(is_training):
   """YT8M dataset configs."""
   # pylint: disable=unexpected-keyword-arg
   return DataConfig(
-      num_frames=30,
       temporal_stride=1,
       segment_labels=False,
       segment_size=5,
@@ -106,7 +103,6 @@ def yt8m(is_training):
 class MoeModel(hyperparams.Config):
   """The model config."""
   num_mixtures: int = 5
-  l2_penalty: float = 1e-5
   use_input_context_gate: bool = False
   use_output_context_gate: bool = False
   vocab_as_last_dim: bool = False
@@ -122,7 +118,7 @@ class DbofModel(hyperparams.Config):
   use_context_gate_cluster_layer: bool = False
   context_gate_cluster_bottleneck_size: int = 0
   pooling_method: str = 'average'
-  yt8m_agg_classifier_model: str = 'MoeModel'
+  agg_classifier_model: str = 'MoeModel'
   agg_model: hyperparams.Config = MoeModel()
   norm_activation: common.NormActivation = common.NormActivation(
       activation='relu', use_sync_bn=False)

official/projects/yt8m/modeling/nn_layers.py CHANGED Viewed

@@ -13,81 +13,223 @@
 # limitations under the License.
 """Contains model definitions."""
+import functools
 from typing import Any, Dict, Optional
 import tensorflow as tf
+from official.modeling import tf_utils
+from official.projects.yt8m.configs import yt8m as yt8m_cfg
 from official.projects.yt8m.modeling import yt8m_model_utils as utils
 layers = tf.keras.layers
-class LogisticModel():
-  """Logistic model with L2 regularization."""
+class Dbof(tf.keras.Model):
+  """A YT8M model class builder.
+  Creates a Deep Bag of Frames model.
+  The model projects the features for each frame into a higher dimensional
+  'clustering' space, pools across frames in that space, and then
+  uses a configurable video-level model to classify the now aggregated features.
+  The model will randomly sample either frames or sequences of frames during
+  training to speed up convergence.
+  """
+  def __init__(
+      self,
+      params: yt8m_cfg.DbofModel,
+      num_classes: int = 3862,
+      input_specs: layers.InputSpec = layers.InputSpec(
+          shape=[None, None, 1152]),
+      l2_weight_decay: Optional[float] = None,
+      **kwargs):
+    """YT8M initialization function.
+    Args:
+      params: model configuration parameters
+      num_classes: `int` number of classes in dataset.
+      input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
+        [batch_size x num_frames x num_features]
+      l2_weight_decay: An optional `float` of kernel regularizer weight decay.
+      **kwargs: keyword arguments to be passed.
+    """
+    self._self_setattr_tracking = False
+    self._num_classes = num_classes
+    self._input_specs = input_specs
+    self._params = params
+    self._l2_weight_decay = l2_weight_decay
+    self._act_fn = tf_utils.get_activation(params.norm_activation.activation)
+    self._norm = functools.partial(
+        layers.BatchNormalization,
+        momentum=params.norm_activation.norm_momentum,
+        epsilon=params.norm_activation.norm_epsilon,
+        synchronized=params.norm_activation.use_sync_bn)
+    # Divide weight decay by 2.0 to match the implementation of tf.nn.l2_loss.
+    # (https://www.tensorflow.org/api_docs/python/tf/keras/regularizers/l2)
+    # (https://www.tensorflow.org/api_docs/python/tf/nn/l2_loss)
+    l2_regularizer = (
+        tf.keras.regularizers.l2(l2_weight_decay / 2.0)
+        if l2_weight_decay
+        else None
+    )
+    # [batch_size x num_frames x num_features]
+    feature_size = input_specs.shape[-1]
+    # shape 'excluding' batch_size
+    model_input = tf.keras.Input(shape=self._input_specs.shape[1:])
+    # normalize input features
+    input_data = tf.nn.l2_normalize(model_input, -1)
+    tf.summary.histogram("input_hist", input_data)
+    # configure model
+    if params.add_batch_norm:
+      input_data = self._norm(name="input_bn")(input_data)
+    # activation = reshaped input * cluster weights
+    if params.cluster_size > 0:
+      activation = layers.Dense(
+          params.cluster_size,
+          kernel_regularizer=l2_regularizer,
+          kernel_initializer=tf.random_normal_initializer(
+              stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))(
+                  input_data)
+    if params.add_batch_norm:
+      activation = self._norm(name="cluster_bn")(activation)
+    else:
+      cluster_biases = tf.Variable(
+          tf.random_normal_initializer(stddev=1 / tf.math.sqrt(feature_size))(
+              shape=[params.cluster_size]),
+          name="cluster_biases")
+      tf.summary.histogram("cluster_biases", cluster_biases)
+      activation += cluster_biases
+    activation = self._act_fn(activation)
+    tf.summary.histogram("cluster_output", activation)
-  def create_model(self, model_input, vocab_size, l2_penalty=1e-8):
+    if params.use_context_gate_cluster_layer:
+      pooling_method = None
+      norm_args = dict(name="context_gate_bn")
+      activation = utils.context_gate(
+          activation,
+          normalizer_fn=self._norm,
+          normalizer_params=norm_args,
+          pooling_method=pooling_method,
+          hidden_layer_size=params.context_gate_cluster_bottleneck_size,
+          kernel_regularizer=l2_regularizer)
+    activation = utils.frame_pooling(activation, params.pooling_method)
+    # activation = activation * hidden1_weights
+    activation = layers.Dense(
+        params.hidden_size,
+        kernel_regularizer=l2_regularizer,
+        kernel_initializer=tf.random_normal_initializer(
+            stddev=1 / tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(
+                activation)
+    if params.add_batch_norm:
+      activation = self._norm(name="hidden1_bn")(activation)
+    else:
+      hidden1_biases = tf.Variable(
+          tf.random_normal_initializer(stddev=0.01)(shape=[params.hidden_size]),
+          name="hidden1_biases")
+      tf.summary.histogram("hidden1_biases", hidden1_biases)
+      activation += hidden1_biases
+    activation = self._act_fn(activation)
+    tf.summary.histogram("hidden1_output", activation)
+    super().__init__(inputs=model_input, outputs=activation, **kwargs)
+class LogisticModel(tf.keras.Model):
+  """Logistic prediction head model with L2 regularization."""
+  def __init__(
+      self,
+      input_specs: layers.InputSpec = layers.InputSpec(shape=[None, 128]),
+      vocab_size: int = 3862,
+      l2_penalty: float = 1e-8,
+      **kwargs,
+  ):
     """Creates a logistic model.
     Args:
-      model_input: 'batch' x 'num_features' matrix of input features.
+      input_specs: 'batch' x 'num_features' matrix of input features.
       vocab_size: The number of classes in the dataset.
       l2_penalty: L2 weight regularization ratio.
+      **kwargs: extra key word args.
     Returns:
       A dictionary with a tensor containing the probability predictions of the
       model in the 'predictions' key. The dimensions of the tensor are
       batch_size x num_classes.
     """
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
     output = layers.Dense(
         vocab_size,
         activation=tf.nn.sigmoid,
         kernel_regularizer=tf.keras.regularizers.l2(l2_penalty))(
-            model_input)
-    return {"predictions": output}
+            inputs)
+    super().__init__(inputs=inputs, outputs={"predictions": output}, **kwargs)
-class MoeModel():
+class MoeModel(tf.keras.Model):
   """A softmax over a mixture of logistic models (with L2 regularization)."""
-  def create_model(self,
-                   model_input,
-                   vocab_size,
-                   num_mixtures: int = 2,
-                   use_input_context_gate: bool = False,
-                   use_output_context_gate: bool = False,
-                   normalizer_fn=None,
-                   normalizer_params: Optional[Dict[str, Any]] = None,
-                   vocab_as_last_dim: bool = False,
-                   l2_penalty: float = 1e-5):
+  def __init__(
+      self,
+      input_specs: layers.InputSpec = layers.InputSpec(shape=[None, 128]),
+      vocab_size: int = 3862,
+      num_mixtures: int = 2,
+      use_input_context_gate: bool = False,
+      use_output_context_gate: bool = False,
+      normalizer_params: Optional[Dict[str, Any]] = None,
+      vocab_as_last_dim: bool = False,
+      l2_penalty: float = 1e-5,
+      **kwargs,
+  ):
     """Creates a Mixture of (Logistic) Experts model.
      The model consists of a per-class softmax distribution over a
      configurable number of logistic classifiers. One of the classifiers
      in the mixture is not trained, and always predicts 0.
     Args:
-      model_input: 'batch_size' x 'num_features' matrix of input features.
+      input_specs: 'batch_size' x 'num_features' matrix of input features.
       vocab_size: The number of classes in the dataset.
       num_mixtures: The number of mixtures (excluding a dummy 'expert' that
         always predicts the non-existence of an entity).
       use_input_context_gate: if True apply context gate layer to the input.
       use_output_context_gate: if True apply context gate layer to the output.
-      normalizer_fn: normalization op constructor (e.g. batch norm).
-      normalizer_params: parameters to the `normalizer_fn`.
+      normalizer_params: parameters of the batch normalization.
       vocab_as_last_dim: if True reshape `activations` and make `vocab_size` as
         the last dimension to avoid small `num_mixtures` as the last dimension.
         XLA pads up the dimensions of tensors: typically the last dimension will
         be padded to 128, and the second to last will be padded to 8.
       l2_penalty: How much to penalize the squared magnitudes of parameter
         values.
+      **kwargs: extra key word args.
     Returns:
       A dictionary with a tensor containing the probability predictions
       of the model in the 'predictions' key. The dimensions of the tensor
       are batch_size x num_classes.
     """
+    inputs = tf.keras.Input(shape=input_specs.shape[1:])
+    model_input = inputs
     if use_input_context_gate:
       model_input = utils.context_gate(
           model_input,
-          normalizer_fn=normalizer_fn,
+          normalizer_fn=layers.BatchNormalization,
           normalizer_params=normalizer_params,
       )
@@ -127,7 +269,11 @@ class MoeModel():
     if use_output_context_gate:
       final_probabilities = utils.context_gate(
           final_probabilities,
-          normalizer_fn=normalizer_fn,
+          normalizer_fn=layers.BatchNormalization,
           normalizer_params=normalizer_params,
       )
-    return {"predictions": final_probabilities}
+    super().__init__(
+        inputs=inputs,
+        outputs={"predictions": final_probabilities},
+        **kwargs,
+    )

official/projects/yt8m/modeling/yt8m_model.py CHANGED Viewed

@@ -12,15 +12,17 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-"""YT8M model definition."""
-from typing import Optional
+"""YT8M prediction model definition."""
+import functools
+from typing import Any, Optional
+from absl import logging
 import tensorflow as tf
-from official.modeling import tf_utils
 from official.projects.yt8m.configs import yt8m as yt8m_cfg
 from official.projects.yt8m.modeling import nn_layers
-from official.projects.yt8m.modeling import yt8m_model_utils as utils
 layers = tf.keras.layers
@@ -39,155 +41,61 @@ class DbofModel(tf.keras.Model):
   def __init__(
       self,
       params: yt8m_cfg.DbofModel,
-      num_frames: int = 30,
       num_classes: int = 3862,
       input_specs: layers.InputSpec = layers.InputSpec(
           shape=[None, None, 1152]),
-      kernel_regularizer: Optional[tf.keras.regularizers.Regularizer] = None,
-      activation: str = "relu",
-      use_sync_bn: bool = False,
-      norm_momentum: float = 0.99,
-      norm_epsilon: float = 0.001,
-      **kwargs):
-    """YT8M initialization function.
+      l2_weight_decay: Optional[float] = None,
+      **kwargs,
+  ):
+    """YT8M Dbof model initialization function.
     Args:
       params: model configuration parameters
-      num_frames: `int` number of frames in a single input.
       num_classes: `int` number of classes in dataset.
       input_specs: `tf.keras.layers.InputSpec` specs of the input tensor.
         [batch_size x num_frames x num_features]
-      kernel_regularizer: tf.keras.regularizers.Regularizer object. Default to
-        None.
-      activation: A `str` of name of the activation function.
-      use_sync_bn: If True, use synchronized batch normalization.
-      norm_momentum: A `float` of normalization momentum for the moving average.
-      norm_epsilon: A `float` added to variance to avoid dividing by zero.
+      l2_weight_decay: An optional `float` of kernel regularizer weight decay.
       **kwargs: keyword arguments to be passed.
     """
-    del num_frames
-    self._self_setattr_tracking = False
+    super().__init__()
+    self._params = params
+    self._num_classes = num_classes
+    self._input_specs = input_specs
+    self._l2_weight_decay = l2_weight_decay
     self._config_dict = {
+        "params": params,
         "input_specs": input_specs,
         "num_classes": num_classes,
-        "params": params
+        "l2_weight_decay": l2_weight_decay,
     }
-    self._num_classes = num_classes
-    self._input_specs = input_specs
-    self._act_fn = tf_utils.get_activation(activation)
-    if use_sync_bn:
-      self._norm = layers.experimental.SyncBatchNormalization
-    else:
-      self._norm = layers.BatchNormalization
-    bn_axis = -1
-    # [batch_size x num_frames x num_features]
-    feature_size = input_specs.shape[-1]
-    # shape 'excluding' batch_size
-    model_input = tf.keras.Input(shape=self._input_specs.shape[1:])
-    # normalize input features
-    input_data = tf.nn.l2_normalize(model_input, -1)
-    tf.summary.histogram("input_hist", input_data)
-    # configure model
-    if params.add_batch_norm:
-      input_data = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="input_bn")(
-              input_data)
-    # activation = reshaped input * cluster weights
-    if params.cluster_size > 0:
-      activation = layers.Dense(
-          params.cluster_size,
-          kernel_regularizer=kernel_regularizer,
-          kernel_initializer=tf.random_normal_initializer(
-              stddev=1 / tf.sqrt(tf.cast(feature_size, tf.float32))))(
-                  input_data)
-    if params.add_batch_norm:
-      activation = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="cluster_bn")(
-              activation)
-    else:
-      cluster_biases = tf.Variable(
-          tf.random_normal_initializer(stddev=1 / tf.math.sqrt(feature_size))(
-              shape=[params.cluster_size]),
-          name="cluster_biases")
-      tf.summary.histogram("cluster_biases", cluster_biases)
-      activation += cluster_biases
-    activation = self._act_fn(activation)
-    tf.summary.histogram("cluster_output", activation)
-    if params.use_context_gate_cluster_layer:
-      pooling_method = None
-      norm_args = dict(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="context_gate_bn")
-      activation = utils.context_gate(
-          activation,
-          normalizer_fn=self._norm,
-          normalizer_params=norm_args,
-          pooling_method=pooling_method,
-          hidden_layer_size=params.context_gate_cluster_bottleneck_size,
-          kernel_regularizer=kernel_regularizer)
-    activation = utils.frame_pooling(activation, params.pooling_method)
-    # activation = activation * hidden1_weights
-    activation = layers.Dense(
-        params.hidden_size,
-        kernel_regularizer=kernel_regularizer,
-        kernel_initializer=tf.random_normal_initializer(
-            stddev=1 / tf.sqrt(tf.cast(params.cluster_size, tf.float32))))(
-                activation)
-    if params.add_batch_norm:
-      activation = self._norm(
-          axis=bn_axis,
-          momentum=norm_momentum,
-          epsilon=norm_epsilon,
-          name="hidden1_bn")(
-              activation)
-    else:
-      hidden1_biases = tf.Variable(
-          tf.random_normal_initializer(stddev=0.01)(shape=[params.hidden_size]),
-          name="hidden1_biases")
-      tf.summary.histogram("hidden1_biases", hidden1_biases)
-      activation += hidden1_biases
-    activation = self._act_fn(activation)
-    tf.summary.histogram("hidden1_output", activation)
-    aggregated_model = getattr(nn_layers,
-                               params.yt8m_agg_classifier_model)
-    norm_args = dict(axis=bn_axis, momentum=norm_momentum, epsilon=norm_epsilon)
-    output = aggregated_model().create_model(
-        model_input=activation,
-        vocab_size=self._num_classes,
-        num_mixtures=params.agg_model.num_mixtures,
-        normalizer_fn=self._norm,
-        normalizer_params=norm_args,
-        vocab_as_last_dim=params.agg_model.vocab_as_last_dim,
-        l2_penalty=params.agg_model.l2_penalty)
-    super().__init__(
-        inputs=model_input, outputs=output.get("predictions"), **kwargs)
-  @property
-  def checkpoint_items(self):
-    """Returns a dictionary of items to be additionally checkpointed."""
-    return dict()
+    self.dbof_backbone = nn_layers.Dbof(
+        params,
+        num_classes,
+        input_specs,
+        l2_weight_decay,
+        **kwargs,
+    )
+    logging.info("Build DbofModel with %s.", params.agg_classifier_model)
+    if hasattr(nn_layers, params.agg_classifier_model):
+      aggregation_head = getattr(nn_layers, params.agg_classifier_model)
+      if params.agg_classifier_model == "MoeModel":
+        normalizer_params = dict(
+            synchronized=params.norm_activation.use_sync_bn,
+            momentum=params.norm_activation.norm_momentum,
+            epsilon=params.norm_activation.norm_epsilon,
+        )
+        aggregation_head = functools.partial(
+            aggregation_head, normalizer_params=normalizer_params)
+      if params.agg_model is not None:
+        kwargs.update(params.agg_model.as_dict())
+      self.head = aggregation_head(
+          input_specs=layers.InputSpec(shape=[None, params.hidden_size]),
+          vocab_size=num_classes,
+          l2_penalty=l2_weight_decay,
+          **kwargs)
   def get_config(self):
     return self._config_dict
@@ -195,3 +103,10 @@ class DbofModel(tf.keras.Model):
   @classmethod
   def from_config(cls, config):
     return cls(**config)
+  def call(
+      self, inputs: tf.Tensor, training: Any = None, mask: Any = None
+  ) -> tf.Tensor:
+    features = self.dbof_backbone(inputs)
+    outputs = self.head(features)
+    return outputs["predictions"]

official/projects/yt8m/modeling/yt8m_model_test.py CHANGED Viewed

@@ -26,7 +26,7 @@ class YT8MNetworkTest(parameterized.TestCase, tf.test.TestCase):
   """Class for testing yt8m network."""
   # test_yt8m_network_creation arbitrary params
-  @parameterized.parameters((32, 1152))  # 1152 = 1024 + 128
+  @parameterized.parameters((32, 1152), (24, 1152))  # 1152 = 1024 + 128
   def test_yt8m_network_creation(self, num_frames, feature_dims):
     """Test for creation of a YT8M Model.
@@ -39,11 +39,10 @@ class YT8MNetworkTest(parameterized.TestCase, tf.test.TestCase):
     num_classes = 3862
     model = yt8m_model.DbofModel(
         params=yt8m_cfg.YT8MTask.model,
-        num_frames=num_frames,
         num_classes=num_classes,
         input_specs=input_specs)
-    # batch = 2 -> arbitrary value for test
+    # batch = 2 -> arbitrary value for test.
     inputs = np.random.rand(2, num_frames, feature_dims)
     logits = model(inputs)
     self.assertAllEqual([2, num_classes], logits.numpy().shape)

official/projects/yt8m/modeling/yt8m_model_utils.py CHANGED Viewed

@@ -13,6 +13,7 @@
 # limitations under the License.
 """Contains a collection of util functions for model construction."""
 from typing import Any, Dict, Optional, Union
 import tensorflow as tf
@@ -177,8 +178,7 @@ def context_gate(
           kernel_initializer=kernel_initializer,
           bias_initializer=bias_initializer,
           kernel_regularizer=kernel_regularizer,
-      )(
-          context_features)
+      )(context_features)
       if normalizer_fn:
         gates_bottleneck = normalizer_fn(**normalizer_params)(gates_bottleneck)
     else:
@@ -191,14 +191,13 @@ def context_gate(
         kernel_initializer=kernel_initializer,
         bias_initializer=bias_initializer,
         kernel_regularizer=kernel_regularizer,
-    )(
-        gates_bottleneck)
+    )(gates_bottleneck)
     if normalizer_fn:
       gates = normalizer_fn(**normalizer_params)(gates)
     if additive_residual:
-      input_features += gates
+      input_features += tf.cast(gates, input_features.dtype)
     else:
-      input_features *= gates
+      input_features *= tf.cast(gates, input_features.dtype)
     return input_features

tf-models-nightly 2.11.0.dev20230320__py2.py3-none-any.whl → 2.11.0.dev20230322__py2.py3-none-any.whl

tf-models-nightly 2.11.0.dev20230320py2.py3-none-any.whl → 2.11.0.dev20230322py2.py3-none-any.whl