PyPI - tf-keras-nightly - Versions diffs - 2.17.0.dev2024031909__py3-none-any.whl → 2.19.0.dev2025011410__py3-none-any.whl - Mend

tf-keras-nightly 2.17.0.dev2024031909py3-none-any.whl → 2.19.0.dev2025011410py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (62) hide show

tf_keras/__init__.py +1 -1
tf_keras/src/__init__.py +1 -1
tf_keras/src/backend.py +1 -1
tf_keras/src/callbacks.py +24 -7
tf_keras/src/datasets/boston_housing.py +14 -5
tf_keras/src/datasets/cifar10.py +9 -1
tf_keras/src/datasets/cifar100.py +7 -1
tf_keras/src/datasets/fashion_mnist.py +16 -4
tf_keras/src/datasets/imdb.py +8 -0
tf_keras/src/datasets/mnist.py +9 -3
tf_keras/src/datasets/reuters.py +8 -0
tf_keras/src/engine/base_layer.py +10 -4
tf_keras/src/engine/base_layer_v1.py +10 -4
tf_keras/src/engine/node.py +8 -3
tf_keras/src/layers/activation/prelu.py +1 -1
tf_keras/src/layers/attention/base_dense_attention.py +2 -1
tf_keras/src/layers/convolutional/base_conv.py +1 -1
tf_keras/src/layers/convolutional/base_depthwise_conv.py +3 -1
tf_keras/src/layers/convolutional/base_separable_conv.py +3 -1
tf_keras/src/layers/convolutional/conv1d_transpose.py +3 -1
tf_keras/src/layers/convolutional/conv2d_transpose.py +3 -1
tf_keras/src/layers/convolutional/conv3d_transpose.py +3 -1
tf_keras/src/layers/core/dense.py +1 -1
tf_keras/src/layers/core/embedding.py +1 -1
tf_keras/src/layers/locally_connected/locally_connected1d.py +1 -1
tf_keras/src/layers/locally_connected/locally_connected2d.py +1 -1
tf_keras/src/layers/normalization/batch_normalization.py +1 -1
tf_keras/src/layers/normalization/layer_normalization.py +1 -1
tf_keras/src/layers/normalization/unit_normalization.py +2 -1
tf_keras/src/layers/rnn/abstract_rnn_cell.py +1 -1
tf_keras/src/layers/rnn/base_conv_lstm.py +0 -1
tf_keras/src/layers/rnn/base_conv_rnn.py +3 -1
tf_keras/src/layers/rnn/base_rnn.py +1 -1
tf_keras/src/layers/rnn/base_wrapper.py +1 -1
tf_keras/src/layers/rnn/bidirectional.py +2 -1
tf_keras/src/layers/rnn/cell_wrappers.py +3 -3
tf_keras/src/layers/rnn/cudnn_gru.py +6 -3
tf_keras/src/layers/rnn/cudnn_lstm.py +6 -3
tf_keras/src/layers/rnn/gru.py +35 -47
tf_keras/src/layers/rnn/legacy_cell_wrappers.py +3 -3
tf_keras/src/layers/rnn/legacy_cells.py +20 -25
tf_keras/src/layers/rnn/lstm.py +35 -50
tf_keras/src/layers/rnn/simple_rnn.py +0 -1
tf_keras/src/layers/rnn/stacked_rnn_cells.py +1 -1
tf_keras/src/layers/rnn/time_distributed.py +0 -1
tf_keras/src/mixed_precision/autocast_variable.py +12 -6
tf_keras/src/mixed_precision/test_util.py +6 -5
tf_keras/src/optimizers/legacy/optimizer_v2.py +9 -2
tf_keras/src/optimizers/optimizer.py +18 -9
tf_keras/src/premade_models/linear.py +2 -1
tf_keras/src/saving/legacy/saved_model/json_utils.py +1 -1
tf_keras/src/saving/saving_api.py +165 -127
tf_keras/src/saving/saving_lib.py +1 -11
tf_keras/src/saving/serialization_lib.py +1 -10
tf_keras/src/utils/data_utils.py +1 -1
tf_keras/src/utils/steps_per_execution_tuning.py +1 -1
tf_keras/src/utils/tf_utils.py +2 -2
tf_keras/src/utils/timeseries_dataset.py +13 -5
{tf_keras_nightly-2.17.0.dev2024031909.dist-info → tf_keras_nightly-2.19.0.dev2025011410.dist-info}/METADATA +14 -3
{tf_keras_nightly-2.17.0.dev2024031909.dist-info → tf_keras_nightly-2.19.0.dev2025011410.dist-info}/RECORD +62 -62
{tf_keras_nightly-2.17.0.dev2024031909.dist-info → tf_keras_nightly-2.19.0.dev2025011410.dist-info}/WHEEL +1 -1
{tf_keras_nightly-2.17.0.dev2024031909.dist-info → tf_keras_nightly-2.19.0.dev2025011410.dist-info}/top_level.txt +0 -0

tf_keras/src/layers/locally_connected/locally_connected2d.py CHANGED Viewed

@@ -308,7 +308,7 @@ class LocallyConnected2D(Layer):
             self.input_spec = InputSpec(ndim=4, axes={1: input_filter})
         else:
             self.input_spec = InputSpec(ndim=4, axes={-1: input_filter})
-        self.built = True
+        super().build(input_shape)
     @tf_utils.shape_type_conversion
     def compute_output_shape(self, input_shape):

tf_keras/src/layers/normalization/batch_normalization.py CHANGED Viewed

@@ -542,7 +542,7 @@ class BatchNormalizationBase(Layer):
         finally:
             if partitioner:
                 self._scope.set_partitioner(partitioner)
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, training=None, mask=None):
         inputs = tf.cast(inputs, self.compute_dtype)

tf_keras/src/layers/normalization/layer_normalization.py CHANGED Viewed

@@ -249,7 +249,7 @@ class LayerNormalization(Layer):
             self.beta = None
         self._fused = self._fused_can_be_used(rank)
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs):
         # TODO(b/229545225): Remove the RaggedTensor check.

tf_keras/src/layers/normalization/unit_normalization.py CHANGED Viewed

@@ -60,7 +60,8 @@ class UnitNormalization(base_layer.Layer):
         self.supports_masking = True
     def build(self, input_shape):
-        self.axis = tf_utils.validate_axis(self.axis, input_shape)
+        tf_utils.validate_axis(self.axis, input_shape)
+        super().build(input_shape)
     def call(self, inputs):
         inputs = tf.cast(inputs, self.compute_dtype)

tf_keras/src/layers/rnn/abstract_rnn_cell.py CHANGED Viewed

@@ -56,7 +56,7 @@ class AbstractRNNCell(base_layer.Layer):
               shape=(self.units, self.units),
               initializer='uniform',
               name='recurrent_kernel')
-          self.built = True
+          super().build(input_shape)
         def call(self, inputs, states):
           prev_output = states[0]

tf_keras/src/layers/rnn/base_conv_lstm.py CHANGED Viewed

@@ -218,7 +218,6 @@ class ConvLSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def call(self, inputs, states, training=None):
         h_tm1 = states[0]  # previous memory state

tf_keras/src/layers/rnn/base_conv_rnn.py CHANGED Viewed

@@ -20,6 +20,7 @@ import tensorflow.compat.v2 as tf
 from tf_keras.src import backend
 from tf_keras.src.engine import base_layer
+from tf_keras.src.engine.base_layer import Layer
 from tf_keras.src.engine.input_spec import InputSpec
 from tf_keras.src.layers.rnn.base_rnn import RNN
 from tf_keras.src.utils import conv_utils
@@ -207,6 +208,8 @@ class ConvRNN(RNN):
     @tf_utils.shape_type_conversion
     def build(self, input_shape):
+        # Call Layer.build() to skip RNN.build() which we override here.
+        Layer.build(self, input_shape)
         # Note input_shape will be list of shapes of initial states and
         # constants if these are passed in __call__.
         if self._num_constants is not None:
@@ -263,7 +266,6 @@ class ConvRNN(RNN):
                 ]
         if self.stateful:
             self.reset_states()
-        self.built = True
     def get_initial_state(self, inputs):
         # (samples, timesteps, img_dims..., filters)

tf_keras/src/layers/rnn/base_rnn.py CHANGED Viewed

@@ -207,7 +207,7 @@ class RNN(base_layer.Layer):
                 shape=(self.units, self.units),
                 initializer='uniform',
                 name='recurrent_kernel')
-            self.built = True
+            super().build(input_shape)
         def call(self, inputs, states):
             prev_output = states[0]

tf_keras/src/layers/rnn/base_wrapper.py CHANGED Viewed

@@ -56,7 +56,7 @@ class Wrapper(Layer):
         if not self.layer.built:
             self.layer.build(input_shape)
             self.layer.built = True
-        self.built = True
+        super().build(input_shape)
     @property
     def activity_regularizer(self):

tf_keras/src/layers/rnn/bidirectional.py CHANGED Viewed

@@ -470,7 +470,8 @@ class Bidirectional(Wrapper):
             self.forward_layer.build(input_shape)
         with backend.name_scope(self.backward_layer.name):
             self.backward_layer.build(input_shape)
-        self.built = True
+        # Call Layer.build() to skip Wrapper.build() which we override here.
+        Layer.build(self, input_shape)
     def compute_mask(self, inputs, mask):
         if isinstance(mask, list):

tf_keras/src/layers/rnn/cell_wrappers.py CHANGED Viewed

@@ -102,10 +102,10 @@ class _RNNCellWrapper(AbstractRNNCell):
             inputs, state, cell_call_fn=self.cell.call, **kwargs
         )
-    def build(self, inputs_shape):
+    def build(self, input_shape):
         """Builds the wrapped cell."""
-        self.cell.build(inputs_shape)
-        self.built = True
+        self.cell.build(input_shape)
+        super().build(input_shape)
     @property
     def wrapped_cell(self):

tf_keras/src/layers/rnn/cudnn_gru.py CHANGED Viewed

@@ -144,8 +144,6 @@ class CuDNNGRU(_CuDNNRNN):
             constraint=self.bias_constraint,
         )
-        self.built = True
     def _process_batch(self, inputs, initial_state):
         if not self.time_major:
             inputs = tf.transpose(inputs, perm=(1, 0, 2))
@@ -172,6 +170,10 @@ class CuDNNGRU(_CuDNNRNN):
             shape=self._vector_shape,
         )
+        batch_dim = tf.shape(inputs)[1]
+        max_sequence_length = tf.shape(inputs)[0]
+        sequence_lengths = tf.fill([batch_dim], max_sequence_length)
         args = {
             "input": inputs,
             "input_h": input_h,
@@ -179,9 +181,10 @@ class CuDNNGRU(_CuDNNRNN):
             "params": params,
             "is_training": True,
             "rnn_mode": "gru",
+            "sequence_lengths": sequence_lengths,
         }
-        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV2(**args)
+        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(**args)
         if self.stateful or self.return_state:
             h = h[0]

tf_keras/src/layers/rnn/cudnn_lstm.py CHANGED Viewed

@@ -170,8 +170,6 @@ class CuDNNLSTM(_CuDNNRNN):
             constraint=self.bias_constraint,
         )
-        self.built = True
     def _process_batch(self, inputs, initial_state):
         if not self.time_major:
             inputs = tf.transpose(inputs, perm=(1, 0, 2))
@@ -204,15 +202,20 @@ class CuDNNLSTM(_CuDNNRNN):
             shape=self._vector_shape,
         )
+        batch_dim = tf.shape(inputs)[1]
+        max_sequence_length = tf.shape(inputs)[0]
+        sequence_lengths = tf.fill([batch_dim], max_sequence_length)
         args = {
             "input": inputs,
             "input_h": input_h,
             "input_c": input_c,
             "params": params,
             "is_training": True,
+            "sequence_lengths": sequence_lengths,
         }
-        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV2(**args)
+        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(**args)
         if self.stateful or self.return_state:
             h = h[0]

tf_keras/src/layers/rnn/gru.py CHANGED Viewed

@@ -222,7 +222,6 @@ class GRUCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def call(self, inputs, states, training=None):
         h_tm1 = (
@@ -1034,11 +1033,13 @@ def gpu_gru(
             mask, time_major
         )
-    if not time_major and sequence_lengths is None:
-        inputs = tf.transpose(inputs, perm=(1, 0, 2))
-        seq_axis, batch_axis = (0, 1)
-    else:
-        seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    if sequence_lengths is None:
+        max_sequence_length = tf.shape(inputs)[seq_axis]
+        batch_size = tf.shape(inputs)[batch_axis]
+        sequence_lengths = tf.fill([batch_size], max_sequence_length)
     # For init_h, cuDNN expects one more dim of num_layers before or after batch
     # dim for time major or batch major inputs respectively
     init_h = tf.expand_dims(init_h, axis=seq_axis)
@@ -1069,49 +1070,36 @@ def gpu_gru(
         transpose_weights=True,
     )
-    if sequence_lengths is not None:
-        if go_backwards:
-            # Three reversals are required. E.g.,
-            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
-            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
-            # output_from_cudnn = [6, 5, 4, 0, 0]
-            # expected_output = [0, 0, 6, 5 ,4]
-            inputs = tf.reverse_sequence(
-                inputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-        outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
-            input=inputs,
-            input_h=init_h,
-            input_c=0,
-            params=params,
-            is_training=True,
-            rnn_mode="gru",
-            sequence_lengths=sequence_lengths,
-            time_major=time_major,
+    if go_backwards:
+        # Three reversals are required. E.g.,
+        # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+        # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+        # output_from_cudnn = [6, 5, 4, 0, 0]
+        # expected_output = [0, 0, 6, 5 ,4]
+        inputs = tf.reverse_sequence(
+            inputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
-        if go_backwards:
-            outputs = tf.reverse_sequence(
-                outputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-            outputs = tf.reverse(outputs, axis=[seq_axis])
-    else:
-        if go_backwards:
-            # Reverse axis 0 since the input is already convert to time major.
-            inputs = tf.reverse(inputs, axis=[0])
-        outputs, h, _, _ = tf.raw_ops.CudnnRNN(
-            input=inputs,
-            input_h=init_h,
-            input_c=0,
-            params=params,
-            is_training=True,
-            rnn_mode="gru",
+    outputs, h, _, _, _ = tf.raw_ops.CudnnRNNV3(
+        input=inputs,
+        input_h=init_h,
+        input_c=0,
+        params=params,
+        is_training=True,
+        rnn_mode="gru",
+        sequence_lengths=sequence_lengths,
+        time_major=time_major,
+    )
+    if go_backwards:
+        outputs = tf.reverse_sequence(
+            outputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
+        outputs = tf.reverse(outputs, axis=[seq_axis])
     last_output = outputs[-1]
     if not time_major and sequence_lengths is None and return_sequences:

tf_keras/src/layers/rnn/legacy_cell_wrappers.py CHANGED Viewed

@@ -368,9 +368,9 @@ class DropoutWrapper(_RNNCellWrapperV1):
     def wrapped_cell(self):
         return self.cell
-    def build(self, inputs_shape):
-        self.cell.build(inputs_shape)
-        self.built = True
+    def build(self, input_shape):
+        self.cell.build(input_shape)
+        super().build(input_shape)
     def _variational_recurrent_dropout_value(
         self, unused_index, value, noise, keep_prob

tf_keras/src/layers/rnn/legacy_cells.py CHANGED Viewed

@@ -246,11 +246,6 @@ class RNNCell(base_layer.Layer):
         """Integer or TensorShape: size of outputs produced by this cell."""
         raise NotImplementedError("Abstract method")
-    def build(self, _):
-        # This tells the parent Layer object that it's OK to call
-        # self.add_weight() inside the call() method.
-        pass
     def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
         if inputs is not None:
             # Validate the given batch_size and dtype against inputs if
@@ -445,15 +440,15 @@ class BasicRNNCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         self._kernel = self.add_weight(
             _WEIGHTS_VARIABLE_NAME,
             shape=[input_depth + self._num_units, self._num_units],
@@ -464,7 +459,7 @@ class BasicRNNCell(LayerRNNCell):
             initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Most basic RNN: output = new_state = act(W * input + U * state +
@@ -563,14 +558,14 @@ class GRUCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         self._gate_kernel = self.add_weight(
             f"gates/{_WEIGHTS_VARIABLE_NAME}",
             shape=[input_depth + self._num_units, 2 * self._num_units],
@@ -600,7 +595,7 @@ class GRUCell(LayerRNNCell):
             ),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Gated recurrent unit (GRU) with nunits cells."""
@@ -774,14 +769,14 @@ class BasicLSTMCell(LayerRNNCell):
         return self._num_units
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         h_depth = self._num_units
         self._kernel = self.add_weight(
             _WEIGHTS_VARIABLE_NAME,
@@ -793,7 +788,7 @@ class BasicLSTMCell(LayerRNNCell):
             initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
         )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Long short-term memory cell (LSTM).
@@ -1017,14 +1012,14 @@ class LSTMCell(LayerRNNCell):
         return self._output_size
     @tf_utils.shape_type_conversion
-    def build(self, inputs_shape):
-        if inputs_shape[-1] is None:
+    def build(self, input_shape):
+        if input_shape[-1] is None:
             raise ValueError(
                 "Expected inputs.shape[-1] to be known, "
-                f"received shape: {inputs_shape}"
+                f"received shape: {input_shape}"
             )
         _check_supported_dtypes(self.dtype)
-        input_depth = inputs_shape[-1]
+        input_depth = input_shape[-1]
         h_depth = self._num_units if self._num_proj is None else self._num_proj
         maybe_partitioner = (
             tf.compat.v1.fixed_size_partitioner(self._num_unit_shards)
@@ -1076,7 +1071,7 @@ class LSTMCell(LayerRNNCell):
                 partitioner=maybe_proj_partitioner,
             )
-        self.built = True
+        super().build(input_shape)
     def call(self, inputs, state):
         """Run one step of LSTM.

tf_keras/src/layers/rnn/lstm.py CHANGED Viewed

@@ -236,7 +236,6 @@ class LSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def _compute_carry_and_output(self, x, h_tm1, c_tm1):
         """Computes carry and output using split kernels."""
@@ -1063,11 +1062,13 @@ def gpu_lstm(
             mask, time_major
         )
-    if not time_major and sequence_lengths is None:
-        inputs = tf.transpose(inputs, perm=(1, 0, 2))
-        seq_axis, batch_axis = (0, 1)
-    else:
-        seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
+    if sequence_lengths is None:
+        max_sequence_length = tf.shape(inputs)[seq_axis]
+        batch_size = tf.shape(inputs)[batch_axis]
+        sequence_lengths = tf.fill([batch_size], max_sequence_length)
     # For init_h and init_c, cuDNN expects one more dim of num_layers before or
     # after batch dim for time major or batch major inputs respectively
     init_h = tf.expand_dims(init_h, axis=seq_axis)
@@ -1099,52 +1100,36 @@ def gpu_lstm(
         transpose_weights=True,
     )
-    if sequence_lengths is not None:
-        if go_backwards:
-            # Three reversals are required. E.g.,
-            # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
-            # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
-            # output_from_cudnn = [6, 5, 4, 0, 0]
-            # expected_output = [0, 0, 6, 5 ,4]
-            inputs = tf.reverse_sequence(
-                inputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-        outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
-            sequence_lengths=sequence_lengths,
-            time_major=time_major,
+    if go_backwards:
+        # Three reversals are required. E.g.,
+        # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+        # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+        # output_from_cudnn = [6, 5, 4, 0, 0]
+        # expected_output = [0, 0, 6, 5 ,4]
+        inputs = tf.reverse_sequence(
+            inputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
-        if go_backwards:
-            outputs = tf.reverse_sequence(
-                outputs,
-                sequence_lengths,
-                seq_axis=seq_axis,
-                batch_axis=batch_axis,
-            )
-            outputs = tf.reverse(outputs, axis=[seq_axis])
-    else:
-        # # Fill the array with shape [batch] with value of max timesteps.
-        # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
-        #                                  array_ops.shape(inputs)[0])
-        if go_backwards:
-            # Reverse axis 0 since the input is already convert to time major.
-            inputs = tf.reverse(inputs, axis=[0])
-        outputs, h, c, _ = tf.raw_ops.CudnnRNN(
-            input=inputs,
-            input_h=init_h,
-            input_c=init_c,
-            params=params,
-            is_training=True,
-            rnn_mode="lstm",
+    outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
+        input=inputs,
+        input_h=init_h,
+        input_c=init_c,
+        params=params,
+        is_training=True,
+        rnn_mode="lstm",
+        sequence_lengths=sequence_lengths,
+        time_major=time_major,
+    )
+    if go_backwards:
+        outputs = tf.reverse_sequence(
+            outputs,
+            sequence_lengths,
+            seq_axis=seq_axis,
+            batch_axis=batch_axis,
         )
+        outputs = tf.reverse(outputs, axis=[seq_axis])
     last_output = outputs[-1]
     if not time_major and sequence_lengths is None and return_sequences:

tf_keras/src/layers/rnn/simple_rnn.py CHANGED Viewed

@@ -189,7 +189,6 @@ class SimpleRNNCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
             )
         else:
             self.bias = None
-        self.built = True
     def call(self, inputs, states, training=None):
         prev_output = states[0] if tf.nest.is_nested(states) else states

tf_keras/src/layers/rnn/stacked_rnn_cells.py CHANGED Viewed

@@ -166,6 +166,7 @@ class StackedRNNCells(base_layer.Layer):
     @tf_utils.shape_type_conversion
     def build(self, input_shape):
+        super().build(input_shape)
         if isinstance(input_shape, list):
             input_shape = input_shape[0]
@@ -195,7 +196,6 @@ class StackedRNNCells(base_layer.Layer):
                 input_shape = tuple(
                     [batch_size] + tf.TensorShape(output_dim).as_list()
                 )
-        self.built = True
     def get_config(self):
         cells = []

tf_keras/src/layers/rnn/time_distributed.py CHANGED Viewed

@@ -135,7 +135,6 @@ class TimeDistributed(Wrapper):
         )
         child_input_shape = tf_utils.convert_shapes(child_input_shape)
         super().build(tuple(child_input_shape))
-        self.built = True
     def compute_output_shape(self, input_shape):
         input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)

tf_keras/src/mixed_precision/autocast_variable.py CHANGED Viewed

@@ -124,20 +124,21 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
     def _should_cast(self):
         """Returns True if this variable should be casted when accessed."""
         autocast_dtype = getattr(_autocast_dtype, "dtype", None)
-        return autocast_dtype is not None and self.dtype != autocast_dtype
+        return autocast_dtype is not None and self.true_dtype != autocast_dtype
     @property
     def dtype(self):
-        """The dtype of the underlying variable, before any casts are done."""
-        return self._variable.dtype
+        """The dtype when the value is accessed, that is after casting."""
+        return self._cast_dtype
     @property
     def true_dtype(self):
-        """Deprecated alias of `dtype`."""
+        """The dtype of the underlying variable, before any casts are done."""
         return self._variable.dtype
     @property
     def _cast_dtype(self):
+        """The dtype after casting."""
         dtype = getattr(_autocast_dtype, "dtype", None)
         return dtype or self._variable.dtype
@@ -202,7 +203,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
         if tf.executing_eagerly() and not self._in_graph_mode:
             repr_str = (
                 "<AutoCastVariable '{v.name}' shape={v.shape} "
-                "dtype={v.dtype.name} dtype_to_cast_to={v._cast_dtype.name}, "
+                "dtype={v.true_dtype.name} "
+                "dtype_to_cast_to={v._cast_dtype.name}, "
                 "numpy={np_repr}>"
             )
             return repr_str.format(
@@ -211,7 +213,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
         else:
             repr_str = (
                 "<AutoCastVariable '{v.name}' shape={v.shape} "
-                "dtype={v.dtype.name} dtype_to_cast_to={v._cast_dtype.name}>"
+                "dtype={v.true_dtype.name} "
+                "dtype_to_cast_to={v._cast_dtype.name}>"
             )
             return repr_str.format(v=self)
@@ -261,6 +264,9 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
     def _apply_assign_update(
         self, update_fn, value, use_locking=None, name=None, read_value=True
     ):
+        # In auto cast scope, we cast back to the actual variable dtype.
+        if self._should_cast():
+            value = tf.cast(value, self.true_dtype)
         # TODO(b/146181571): This logic can be simplified once
         # DistributedVariable.assign returns a DistributedVariable. Currently
         # for MirroredStrategy, it returns a Mirrored value.

tf-keras-nightly 2.17.0.dev2024031909__py3-none-any.whl → 2.19.0.dev2025011410__py3-none-any.whl

tf-keras-nightly 2.17.0.dev2024031909py3-none-any.whl → 2.19.0.dev2025011410py3-none-any.whl