PyPI - tf-keras-nightly - Versions diffs - 2.17.0.dev2024050509__py3-none-any.whl → 2.19.0.dev2024101709__py3-none-any.whl - Mend

tf-keras-nightly 2.17.0.dev2024050509py3-none-any.whl → 2.19.0.dev2024101709py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (48) hide show

tf_keras/src/layers/rnn/legacy_cells.py CHANGED Viewed

@@ -246,11 +246,6 @@ class RNNCell(base_layer.Layer):
         """Integer or TensorShape: size of outputs produced by this cell."""
         raise NotImplementedError("Abstract method")
-    def build(self, _):
-        # This tells the parent Layer object that it's OK to call
-        # self.add_weight() inside the call() method.
-        pass
     def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
         if inputs is not None:
             # Validate the given batch_size and dtype against inputs if
@@ -445,15 +440,15 @@ class BasicRNNCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         self._kernel = self.add_weight(
             _WEIGHTS_VARIABLE_NAME,
             shape=[input_depth + self._num_units, self._num_units],
@@ -464,7 +459,7 @@ class BasicRNNCell(LayerRNNCell):
             initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Most basic RNN: output = new_state = act(W * input + U * state +
@@ -563,14 +558,14 @@ class GRUCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         self._gate_kernel = self.add_weight(
             f"gates/{_WEIGHTS_VARIABLE_NAME}",
             shape=[input_depth + self._num_units, 2 * self._num_units],
@@ -600,7 +595,7 @@ class GRUCell(LayerRNNCell):
             ),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Gated recurrent unit (GRU) with nunits cells."""
@@ -774,14 +769,14 @@ class BasicLSTMCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         h_depth = self._num_units
         self._kernel = self.add_weight(
             _WEIGHTS_VARIABLE_NAME,
@@ -793,7 +788,7 @@ class BasicLSTMCell(LayerRNNCell):
             initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Long short-term memory cell (LSTM).
@@ -1017,14 +1012,14 @@ class LSTMCell(LayerRNNCell):
         return self._output_size
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         h_depth = self._num_units if self._num_proj is None else self._num_proj
         maybe_partitioner = (
             tf.compat.v1.fixed_size_partitioner(self._num_unit_shards)
@@ -1076,7 +1071,7 @@ class LSTMCell(LayerRNNCell):
                 partitioner=maybe_proj_partitioner,
             )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Run one step of LSTM.

tf_keras/src/layers/rnn/lstm.py CHANGED Viewed

@@ -236,7 +236,6 @@ class LSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def _compute_carry_and_output(self, x, h_tm1, c_tm1):
         """Computes carry and output using split kernels."""
@@ -1063,11 +1062,13 @@ def gpu_lstm(
             mask, time_major
         )
-    if not time_major and sequence_lengths is None:
-        inputs = tf.transpose(inputs, perm=(1, 0, 2))
-        seq_axis, batch_axis = (0, 1)
-    else:
-        seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    if sequence_lengths is None:
+        max_sequence_length = tf.shape(inputs)[seq_axis]
+        batch_size = tf.shape(inputs)[batch_axis]
+        sequence_lengths = tf.fill([batch_size], max_sequence_length)
     # For init_h and init_c, cuDNN expects one more dim of num_layers before or
     # after batch dim for time major or batch major inputs respectively
     init_h = tf.expand_dims(init_h, axis=seq_axis)
@@ -1099,52 +1100,36 @@ def gpu_lstm(
         transpose_weights=True,
     )
-    if sequence_lengths is not None:
-        if go_backwards:
-            # Three reversals are required. E.g.,
-            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
-            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
-            # output_from_cudnn = [6, 5, 4, 0, 0]
-            # expected_output = [0, 0, 6, 5 ,4]
-            inputs = tf.reverse_sequence(
-                inputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
-            sequence_lengths=sequence_lengths,
-            time_major=time_major,
+    if go_backwards:
+        # Three reversals are required. E.g.,
+        # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+        # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+        # output_from_cudnn = [6, 5, 4, 0, 0]
+        # expected_output = [0, 0, 6, 5 ,4]
+        inputs = tf.reverse_sequence(
+            inputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
-        if go_backwards:
-            outputs = tf.reverse_sequence(
-                outputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-            outputs = tf.reverse(outputs, axis=[seq_axis])
-    else:
-        # # Fill the array with shape [batch] with value of max timesteps.
-        # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
-        #                                  array_ops.shape(inputs)[0])
-        if go_backwards:
-            # Reverse axis 0 since the input is already convert to time major.
-            inputs = tf.reverse(inputs, axis=[0])
-        outputs, h, c, _ = tf.raw_ops.CudnnRNN(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
+    outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
+        input=inputs,
+        input_h=init_h,
+        input_c=init_c,
+        params=params,
+        is_training=True,
+        rnn_mode="lstm",
+        sequence_lengths=sequence_lengths,
+        time_major=time_major,
+    )
+    if go_backwards:
+        outputs = tf.reverse_sequence(
+            outputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
+        outputs = tf.reverse(outputs, axis=[seq_axis])
     last_output = outputs[-1]
     if not time_major and sequence_lengths is None and return_sequences:

tf_keras/src/layers/rnn/simple_rnn.py CHANGED Viewed

@@ -189,7 +189,6 @@ class SimpleRNNCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def call(self, inputs, states, training=None):
         prev_output = states[0] if tf.nest.is_nested(states) else states

tf_keras/src/layers/rnn/stacked_rnn_cells.py CHANGED Viewed

@@ -166,6 +166,7 @@ class StackedRNNCells(base_layer.Layer):
     @tf_utils.shape_type_conversion
     def build(self, input_shape):
+        super().build(input_shape)
         if isinstance(input_shape, list):
             input_shape = input_shape[0]
@@ -195,7 +196,6 @@ class StackedRNNCells(base_layer.Layer):
                 input_shape = tuple(
                     [batch_size] + tf.TensorShape(output_dim).as_list()
                 )
-        self.built = True
     def get_config(self):
         cells = []

tf_keras/src/layers/rnn/time_distributed.py CHANGED Viewed

@@ -135,7 +135,6 @@ class TimeDistributed(Wrapper):
         )
         child_input_shape = tf_utils.convert_shapes(child_input_shape)
         super().build(tuple(child_input_shape))
-        self.built = True
     def compute_output_shape(self, input_shape):
         input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)

tf_keras/src/mixed_precision/autocast_variable.py CHANGED Viewed

@@ -124,20 +124,21 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
     def _should_cast(self):
         """Returns True if this variable should be casted when accessed."""
         autocast_dtype = getattr(_autocast_dtype, "dtype", None)
-        return autocast_dtype is not None and self.dtype != autocast_dtype
+        return autocast_dtype is not None and self.true_dtype != autocast_dtype
     @property
     def dtype(self):
-        """The dtype of the underlying variable, before any casts are done."""
-        return self._variable.dtype
+        """The dtype when the value is accessed, that is after casting."""
+        return self._cast_dtype
     @property
     def true_dtype(self):
-        """Deprecated alias of `dtype`."""
+        """The dtype of the underlying variable, before any casts are done."""
         return self._variable.dtype
     @property
     def _cast_dtype(self):
+        """The dtype after casting."""
         dtype = getattr(_autocast_dtype, "dtype", None)
         return dtype or self._variable.dtype
@@ -202,7 +203,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
         if tf.executing_eagerly() and not self._in_graph_mode:
             repr_str = (
                 "<AutoCastVariable '{v.name}' shape={v.shape} "
-                "dtype={v.dtype.name} dtype_to_cast_to={v._cast_dtype.name}, "
+                "dtype={v.true_dtype.name} "
+                "dtype_to_cast_to={v._cast_dtype.name}, "
                 "numpy={np_repr}>"
             )
             return repr_str.format(
@@ -211,7 +213,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
         else:
             repr_str = (
                 "<AutoCastVariable '{v.name}' shape={v.shape} "
-                "dtype={v.dtype.name} dtype_to_cast_to={v._cast_dtype.name}>"
+                "dtype={v.true_dtype.name} "
+                "dtype_to_cast_to={v._cast_dtype.name}>"
             )
             return repr_str.format(v=self)
@@ -261,6 +264,9 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
     def _apply_assign_update(
         self, update_fn, value, use_locking=None, name=None, read_value=True
     ):
+        # In auto cast scope, we cast back to the actual variable dtype.
+        if self._should_cast():
+            value = tf.cast(value, self.true_dtype)
         # TODO(b/146181571): This logic can be simplified once
         # DistributedVariable.assign returns a DistributedVariable. Currently
         # for MirroredStrategy, it returns a Mirrored value.

tf_keras/src/mixed_precision/test_util.py CHANGED Viewed

@@ -171,14 +171,14 @@ class MultiplyLayer(AssertTypeLayer):
             activity_regularizer=self._activity_regularizer, **kwargs
         )
-    def build(self, _):
+    def build(self, input_shape):
         self.v = self.add_weight(
             self._var_name,
             (),
             initializer="ones",
             regularizer=self._regularizer,
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs):
         self.assert_input_types(inputs)
@@ -205,7 +205,7 @@ class MultiplyLayer(AssertTypeLayer):
 class MultiplyLayerWithoutAutoCast(MultiplyLayer):
     """Same as MultiplyLayer, but does not use AutoCastVariables."""
-    def build(self, _):
+    def build(self, input_shape):
         dtype = self.dtype
         if dtype in ("float16", "bfloat16"):
             dtype = "float32"
@@ -214,10 +214,11 @@ class MultiplyLayerWithoutAutoCast(MultiplyLayer):
             (),
             initializer="ones",
             dtype=dtype,
-            experimental_autocast=False,
+            autocast=False,
             regularizer=self._regularizer,
         )
-        self.built = True
+        # Call Layer.build() to skip MultiplyLayer.build() which we override.
+        base_layer.Layer.build(self, input_shape)
     def call(self, inputs):
         self.assert_input_types(inputs)

tf_keras/src/optimizers/legacy/optimizer_v2.py CHANGED Viewed

@@ -1033,6 +1033,13 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
         slot_dict = self._slots.setdefault(var_key, {})
         weight = slot_dict.get(slot_name, None)
         if weight is None:
+            # Under a mixed precision policy, variables report their "cast"
+            # dtype. However, we want to use the original dtype for slots.
+            if hasattr(var, "true_dtype"):
+                dtype = var.true_dtype
+            else:
+                dtype = var.dtype
             if isinstance(initializer, str) or callable(initializer):
                 initializer = initializers.get(initializer)
                 if isinstance(
@@ -1043,7 +1050,7 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
                 else:
                     slot_shape = var.shape
                 initial_value = functools.partial(
-                    initializer, shape=slot_shape, dtype=var.dtype
+                    initializer, shape=slot_shape, dtype=dtype
                 )
             else:
                 initial_value = initializer
@@ -1064,7 +1071,7 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
                 with strategy.extended.colocate_vars_with(var):
                     weight = tf.Variable(
                         name=f"{var._shared_name}/{slot_name}",
-                        dtype=var.dtype,
+                        dtype=dtype,
                         trainable=False,
                         initial_value=initial_value,
                     )

tf_keras/src/optimizers/optimizer.py CHANGED Viewed

@@ -498,26 +498,28 @@ class _BaseOptimizer(tf.__internal__.tracking.AutoTrackable):
         Returns:
           An optimizer variable.
         """
+        # Under a mixed precision policy, variables report their "cast"
+        # dtype. However, we want to use the original dtype for slots.
+        if hasattr(model_variable, "true_dtype"):
+            dtype = model_variable.true_dtype
+        else:
+            dtype = model_variable.dtype
         if initial_value is None:
             if shape is None:
                 if model_variable.shape.rank is None:
                     # When the rank is None, we cannot get a concrete
                     # `model_variable.shape`, we use dynamic shape.
-                    initial_value = tf.zeros_like(
-                        model_variable, dtype=model_variable.dtype
-                    )
+                    initial_value = tf.zeros_like(model_variable, dtype=dtype)
                 else:
                     # We cannot always use `zeros_like`, because some cases
                     # the shape exists while values don't.
-                    initial_value = tf.zeros(
-                        model_variable.shape, dtype=model_variable.dtype
-                    )
+                    initial_value = tf.zeros(model_variable.shape, dtype=dtype)
             else:
-                initial_value = tf.zeros(shape, dtype=model_variable.dtype)
+                initial_value = tf.zeros(shape, dtype=dtype)
         variable = tf.Variable(
             initial_value=initial_value,
             name=f"{variable_name}/{model_variable._shared_name}",
-            dtype=model_variable.dtype,
+            dtype=dtype,
             trainable=False,
         )
         # If model_variable is a shard of a ShardedVariable, we should add a
@@ -1188,10 +1190,17 @@ class Optimizer(_BaseOptimizer):
                         self._mesh, rank=initial_value.shape.rank
                     ),
                 )
+            # Under a mixed precision policy, variables report their "cast"
+            # dtype. However, we want to use the original dtype for optimizer
+            # variables.
+            if hasattr(model_variable, "true_dtype"):
+                dtype = model_variable.true_dtype
+            else:
+                dtype = model_variable.dtype
             variable = tf.experimental.dtensor.DVariable(
                 initial_value=initial_value,
                 name=f"{variable_name}/{model_variable._shared_name}",
-                dtype=model_variable.dtype,
+                dtype=dtype,
                 trainable=False,
             )
             self._variables.append(variable)

tf_keras/src/premade_models/linear.py CHANGED Viewed

@@ -156,7 +156,8 @@ class LinearModel(training.Model):
             )
         else:
             self.bias = None
-        self.built = True
+        # Call Layer.build() to skip Model.build() which we override here.
+        base_layer.Layer.build(self, input_shape)
     def call(self, inputs):
         result = None

tf_keras/src/utils/data_utils.py CHANGED Viewed

@@ -1108,7 +1108,7 @@ def pad_sequences(
         maxlen = np.max(lengths)
     is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype(
-        dtype, np.unicode_
+        dtype, np.str_
     )
     if isinstance(value, str) and dtype != object and not is_dtype_str:
         raise ValueError(

tf_keras/src/utils/steps_per_execution_tuning.py CHANGED Viewed

@@ -229,7 +229,7 @@ class StepsPerExecutionTuner:
         if current_spe >= spe_limit:
             new_spe = current_spe
-        elif current_spe == 0:
+        elif current_spe <= 0:
             new_spe = self.init_spe
         self._steps_per_execution.assign(np.round(new_spe))

tf_keras/src/utils/timeseries_dataset.py CHANGED Viewed

@@ -110,16 +110,24 @@ def timeseries_dataset_from_array(
     timesteps to predict the next timestep, you would use:
     ```python
-    input_data = data[:-10]
-    targets = data[10:]
+    data = tf.range(15)
+    sequence_length = 10
+    input_data = data[:]
+    targets = data[sequence_length:]
     dataset = tf.keras.utils.timeseries_dataset_from_array(
-        input_data, targets, sequence_length=10)
+        input_data, targets, sequence_length=sequence_length
+    )
     for batch in dataset:
       inputs, targets = batch
-      assert np.array_equal(inputs[0], data[:10])  # First sequence: steps [0-9]
+      # First sequence: steps [0-9]
+      assert np.array_equal(inputs[0], data[:sequence_length])
       # Corresponding target: step 10
-      assert np.array_equal(targets[0], data[10])
+      assert np.array_equal(targets[0], data[sequence_length])
       break
+    # To view the generated dataset
+    for batch in dataset.as_numpy_iterator():
+      input, label = batch
+      print(f"Input:{input}, target:{label}")
     ```
     Example 3: Temporal regression for many-to-many architectures.

{tf_keras_nightly-2.17.0.dev2024050509.dist-info → tf_keras_nightly-2.19.0.dev2024101709.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: tf_keras-nightly
-Version: 2.17.0.dev2024050509
+Version: 2.19.0.dev2024101709
 Summary: Deep learning for humans.
 Home-page: https://keras.io/
 Download-URL: https://github.com/keras-team/tf-keras/tags
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development
 Classifier: Topic :: Software Development :: Libraries
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.9
-Requires-Dist: tf-nightly ~=2.17.0.dev
+Requires-Dist: tf-nightly ~=2.19.0.dev
 TF-Keras is a deep learning API written in Python,
 running on top of the machine learning platform TensorFlow.

tf-keras-nightly 2.17.0.dev2024050509__py3-none-any.whl → 2.19.0.dev2024101709__py3-none-any.whl

tf-keras-nightly 2.17.0.dev2024050509py3-none-any.whl → 2.19.0.dev2024101709py3-none-any.whl