PyPI - tf-keras-nightly - Versions diffs - 2.20.0.dev2025051109__py3-none-any.whl → 2.21.0.dev2025123010__py3-none-any.whl - Mend

tf-keras-nightly 2.20.0.dev2025051109py3-none-any.whl → 2.21.0.dev2025123010py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

tf_keras/src/engine/base_layer_v1.py CHANGED Viewed

@@ -132,6 +132,7 @@ class Layer(base_layer.Layer):
         self, trainable=True, name=None, dtype=None, dynamic=False, **kwargs
     ):
         self._instrument_layer_creation()
+        self._called = False
         # These properties should be set by the user via keyword arguments.
         # note that 'dtype', 'input_shape' and 'batch_input_shape'
@@ -165,6 +166,8 @@ class Layer(base_layer.Layer):
         self._input_spec = None
         self.supports_masking = False
+        self._call_context_args = {"training"}
         self._init_set_name(name)
         self._activity_regularizer = regularizers.get(
             kwargs.pop("activity_regularizer", None)
@@ -705,6 +708,7 @@ class Layer(base_layer.Layer):
           RuntimeError: if `super().__init__()` was not called in the
             constructor.
         """
+        self._called = True
         self._assert_built_as_v1()
         if not hasattr(self, "_thread_local"):
@@ -803,7 +807,12 @@ class Layer(base_layer.Layer):
         if build_graph and base_layer_utils.needs_keras_history(inputs):
             base_layer_utils.create_keras_history(inputs)
-        with call_context.enter(self, inputs, build_graph, training_value):
+        with call_context.enter(
+            self,
+            inputs,
+            build_graph,
+            call_context_args={"training": training_value},
+        ):
             # Check input assumptions set after layer building, e.g. input
             # shape.
             if build_graph:
@@ -2177,8 +2186,8 @@ class Layer(base_layer.Layer):
                 else:
                     self._set_dtype_policy(policy.Policy(dtype))
             input_shapes = None
-            if all(hasattr(x, "shape") for x in input_list):
-                input_shapes = tf.nest.map_structure(lambda x: x.shape, inputs)
+            if any(hasattr(x, "shape") for x in input_list):
+                input_shapes = tf_utils.get_shapes(inputs)
             # Only call `build` if the user has manually overridden the build
             # method.
             if not hasattr(self.build, "_is_default"):

tf_keras/src/engine/data_adapter.py CHANGED Viewed

@@ -231,7 +231,7 @@ class TensorLikeDataAdapter(DataAdapter):
                 return True
             return False
-        return all(_is_tensor(v) for v in flat_inputs)
+        return all(_is_tensor(v) for v in flat_inputs if v is not None)
     def __init__(
         self,
@@ -259,7 +259,7 @@ class TensorLikeDataAdapter(DataAdapter):
         inputs = pack_x_y_sample_weight(x, y, sample_weights)
         num_samples = set(
-            int(i.shape[0]) for i in tf.nest.flatten(inputs)
+            int(i.shape[0]) for i in tf.nest.flatten(inputs) if i is not None
         ).pop()
         _check_data_cardinality(inputs)
@@ -386,7 +386,7 @@ class TensorLikeDataAdapter(DataAdapter):
         def grab_batch(i, data):
             return tf.nest.map_structure(
-                lambda d: tf.gather(d, i, axis=0), data
+                lambda d: tf.gather(d, i, axis=0) if d is not None else d, data
             )
         dataset = dataset.map(grab_batch, num_parallel_calls=tf.data.AUTOTUNE)
@@ -459,7 +459,7 @@ class GenericArrayLikeDataAdapter(TensorLikeDataAdapter):
         if not TensorLikeDataAdapter.can_handle(
             x, y
         ) and not CompositeTensorDataAdapter.can_handle(x, y):
-            return all(_is_array_like(v) for v in flat_inputs)
+            return all(_is_array_like(v) for v in flat_inputs if v is not None)
         else:
             return False
@@ -496,7 +496,7 @@ class GenericArrayLikeDataAdapter(TensorLikeDataAdapter):
             shape[0] = None
             return tuple(shape)
-        flat_dtypes = [inp.dtype for inp in flat_inputs]
+        flat_dtypes = [inp.dtype for inp in flat_inputs if inp is not None]
         contiguous = True
         if self._shuffle and self._shuffle != "batch":
             contiguous = False
@@ -509,15 +509,26 @@ class GenericArrayLikeDataAdapter(TensorLikeDataAdapter):
             # to a Tensor may force it into memory..
             def py_method(ind):
                 def slice_array(data):
+                    if data is None:
+                        return None
                     return training_utils.slice_arrays(
                         data, ind.numpy(), contiguous=contiguous
                     )
-                return [slice_array(inp) for inp in flat_inputs]
+                return [
+                    slice_array(inp) for inp in flat_inputs if inp is not None
+                ]
-            flat_out = tf.py_function(py_method, [indices], flat_dtypes)
-            for v, original_inp in zip(flat_out, flat_inputs):
-                v.set_shape(dynamic_shape_like(original_inp))
+            results = tf.py_function(py_method, [indices], flat_dtypes)
+            results_it = iter(results)
+            flat_out = []
+            for original_inp in flat_inputs:
+                if original_inp is None:
+                    flat_out.append(None)
+                else:
+                    v = next(results_it)
+                    v.set_shape(dynamic_shape_like(original_inp))
+                    flat_out.append(v)
             return tf.nest.pack_sequence_as(inputs, flat_out)
         dataset = indices_dataset.map(
@@ -608,8 +619,10 @@ class CompositeTensorDataAdapter(DataAdapter):
                 return True
             return _is_composite(v)
-        return any(_is_composite(v) for v in flat_inputs) and all(
-            _is_tensor_or_composite(v) for v in flat_inputs
+        return any(
+            _is_composite(v) for v in flat_inputs if v is not None
+        ) and all(
+            _is_tensor_or_composite(v) for v in flat_inputs if v is not None
         )
     def __init__(
@@ -1944,14 +1957,18 @@ def single_batch_iterator(
 def _check_data_cardinality(data):
-    num_samples = set(int(i.shape[0]) for i in tf.nest.flatten(data))
+    num_samples = set(
+        int(i.shape[0]) for i in tf.nest.flatten(data) if i is not None
+    )
     if len(num_samples) > 1:
         msg = "Data cardinality is ambiguous:\n"
         for label, single_data in zip(["x", "y", "sample_weight"], data):
             msg += "  {} sizes: {}\n".format(
                 label,
                 ", ".join(
-                    str(i.shape[0]) for i in tf.nest.flatten(single_data)
+                    str(i.shape[0])
+                    for i in tf.nest.flatten(single_data)
+                    if i is not None
                 ),
             )
         msg += "Make sure all arrays contain the same number of samples."

tf_keras/src/engine/functional.py CHANGED Viewed

@@ -351,25 +351,45 @@ class Functional(training_lib.Model):
         if isinstance(self._nested_inputs, dict):
             # Case where `_nested_inputs` is a plain dict of Inputs.
             names = sorted(self._nested_inputs.keys())
-            return [
-                input_spec.InputSpec(
-                    shape=shape_with_no_batch_size(self._nested_inputs[name]),
-                    allow_last_axis_squeeze=True,
-                    name=name,
+            specs = []
+            for name in names:
+                layer = self._nested_inputs[name]._keras_history.layer
+                optional = (
+                    layer.optional
+                    if isinstance(layer, input_layer_module.InputLayer)
+                    else False
                 )
-                for name in names
-            ]
+                specs.append(
+                    input_spec.InputSpec(
+                        shape=shape_with_no_batch_size(
+                            self._nested_inputs[name]
+                        ),
+                        allow_last_axis_squeeze=True,
+                        name=name,
+                        optional=optional,
+                    )
+                )
+            return specs
         else:
             # Single input, or list / tuple of inputs.
             # The data may be passed as a dict keyed by input name.
-            return [
-                input_spec.InputSpec(
-                    shape=shape_with_no_batch_size(x),
-                    allow_last_axis_squeeze=True,
-                    name=x._keras_history.layer.name,
+            specs = []
+            for x in self.inputs:
+                layer = x._keras_history.layer
+                optional = (
+                    layer.optional
+                    if isinstance(layer, input_layer_module.InputLayer)
+                    else False
                 )
-                for x in self.inputs
-            ]
+                specs.append(
+                    input_spec.InputSpec(
+                        shape=shape_with_no_batch_size(x),
+                        allow_last_axis_squeeze=True,
+                        name=x._keras_history.layer.name,
+                        optional=optional,
+                    )
+                )
+            return specs
     @input_spec.setter
     def input_spec(self, value):
@@ -644,7 +664,8 @@ class Functional(training_lib.Model):
         else:
             masks = self._flatten_to_reference_inputs(mask)
         for input_t, mask in zip(inputs, masks):
-            input_t._keras_mask = mask
+            if input_t is not None:
+                input_t._keras_mask = mask
         # Dictionary mapping reference tensors to computed tensors.
         tensor_dict = {}

tf_keras/src/engine/input_layer.py CHANGED Viewed

@@ -98,6 +98,8 @@ class InputLayer(base_layer.Layer):
             `tf.TypeSpec` represents the entire batch. When provided, all other
             args except name must be `None`.
         name: Optional name of the layer (string).
+        optional: Boolean, whether the input is optional or not. An optional
+            input can accept `None` values.
     """
     @traceback_utils.filter_traceback
@@ -111,6 +113,7 @@ class InputLayer(base_layer.Layer):
         name=None,
         ragged=None,
         type_spec=None,
+        optional=False,
         **kwargs,
     ):
         self._init_input_shape = input_shape
@@ -180,6 +183,7 @@ class InputLayer(base_layer.Layer):
         self.ragged = True if ragged else False
         self.batch_size = batch_size
         self.supports_masking = True
+        self.optional = optional
         if isinstance(input_shape, tf.TensorShape):
             input_shape = tuple(input_shape.as_list())
@@ -284,6 +288,7 @@ class InputLayer(base_layer.Layer):
                 "sparse": self.sparse,
                 "ragged": self.ragged,
                 "name": self.name,
+                "optional": self.optional,
             }
         return config
@@ -303,6 +308,7 @@ def Input(
     tensor=None,
     ragged=None,
     type_spec=None,
+    optional=False,
     **kwargs,
 ):
     """`Input()` is used to instantiate a TF-Keras tensor.
@@ -341,6 +347,8 @@ def Input(
             [this guide](https://www.tensorflow.org/guide/ragged_tensor).
         type_spec: A `tf.TypeSpec` object to create the input placeholder from.
             When provided, all other args except name must be None.
+        optional: Boolean, whether the input is optional or not. An optional
+            input can accept `None` values.
         **kwargs: deprecated arguments support. Supports `batch_shape` and
             `batch_input_shape`.
@@ -415,6 +423,7 @@ def Input(
         "ragged": ragged,
         "input_tensor": tensor,
         "type_spec": type_spec,
+        "optional": optional,
     }
     batch_input_shape = kwargs.pop(

tf_keras/src/engine/input_spec.py CHANGED Viewed

@@ -56,6 +56,8 @@ class InputSpec:
         as long as the last axis of the spec is 1.
       name: Expected key corresponding to this input when passing data as
         a dictionary.
+      optional: Boolean, whether the input is optional or not. An optional input
+        can accept `None` values.
     Example:
@@ -82,6 +84,7 @@ class InputSpec:
         axes=None,
         allow_last_axis_squeeze=False,
         name=None,
+        optional=False,
     ):
         self.dtype = tf.as_dtype(dtype).name if dtype is not None else None
         shape = tf.TensorShape(shape)
@@ -99,6 +102,7 @@ class InputSpec:
         self.min_ndim = min_ndim
         self.name = name
         self.allow_last_axis_squeeze = allow_last_axis_squeeze
+        self.optional = optional
         try:
             axes = axes or {}
             self.axes = {int(k): axes[k] for k in axes}
@@ -204,7 +208,11 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
             inputs = list_inputs
     inputs = tf.nest.flatten(inputs)
-    for x in inputs:
+    for _, (x, spec) in enumerate(zip(inputs, input_spec)):
+        if spec is None:
+            continue
+        if x is None and spec.optional:
+            continue
         # Having a shape/dtype is the only commonality of the various
         # tensor-like objects that may be passed. The most common kind of
         # invalid type we are guarding for is a Layer instance (Functional API),
@@ -224,6 +232,8 @@ def assert_input_compatibility(input_spec, inputs, layer_name):
     for input_index, (x, spec) in enumerate(zip(inputs, input_spec)):
         if spec is None:
             continue
+        if x is None and spec.optional:
+            continue
         shape = tf.TensorShape(x.shape)
         if shape.rank is None:

tf_keras/src/layers/activation/softmax.py CHANGED Viewed

@@ -70,6 +70,8 @@ class Softmax(Layer):
     Args:
         axis: Integer, or list of Integers, axis along which the softmax
             normalization is applied.
+        robust_masking: Bool, if true will use a more robust implementation when
+            dealing with masks.
     Call arguments:
         inputs: The inputs, or logits to the softmax layer.
         mask: A boolean mask of the same shape as `inputs`. The mask
@@ -80,23 +82,34 @@ class Softmax(Layer):
         Softmaxed output with the same shape as `inputs`.
     """
-    def __init__(self, axis=-1, **kwargs):
+    def __init__(self, axis=-1, robust_masking=False, **kwargs):
         super().__init__(**kwargs)
         self.supports_masking = True
+        self.robust_masking = robust_masking
         self.axis = axis
     def call(self, inputs, mask=None):
         if mask is not None:
-            # Since mask is 1.0 for positions we want to keep and 0.0 for masked
-            # positions, this operation will create a tensor which is 0.0 for
-            # positions we want to attend and -1e.9 for masked positions.
-            adder = (1.0 - tf.cast(mask, inputs.dtype)) * (
-                _large_compatible_negative(inputs.dtype)
-            )
-            # Since we are adding it to the raw scores before the softmax, this
-            # is effectively the same as removing these entirely.
-            inputs += adder
+            if self.robust_masking:
+                # We keep the positions where the mask is True or > 0.5, and set
+                # the other (masked) positions to -1e.9.
+                if mask.dtype is not tf.bool:
+                    mask = tf.greater(mask, tf.constant(0.5, dtype=mask.dtype))
+                inputs = tf.where(
+                    mask, inputs, _large_compatible_negative(inputs.dtype)
+                )
+            else:
+                # Since mask is 1.0 for positions we want to keep and 0.0 for
+                # masked positions, this operation will create a tensor which is
+                # 0.0 for positions we want to attend and -1e.9 for masked
+                # positions.
+                adder = (1.0 - tf.cast(mask, inputs.dtype)) * (
+                    _large_compatible_negative(inputs.dtype)
+                )
+                # Since we are adding it to the raw scores before the softmax,
+                # this is effectively the same as removing these entirely.
+                inputs += adder
         if isinstance(self.axis, (tuple, list)):
             if len(self.axis) > 1:
                 return tf.exp(
@@ -109,6 +122,8 @@ class Softmax(Layer):
     def get_config(self):
         config = {"axis": self.axis}
+        if self.robust_masking:
+            config["robust_masking"] = True
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))

tf_keras/src/layers/attention/multi_head_attention.py CHANGED Viewed

@@ -198,6 +198,8 @@ class MultiHeadAttention(Layer):
         activity_regularizer: Regularizer for dense layer activity.
         kernel_constraint: Constraint for dense layer kernels.
         bias_constraint: Constraint for dense layer kernels.
+        softmax_robust_masking: If true will use a more numerically robust
+            masking impl.
     Call arguments:
         query: Query `Tensor` of shape `(B, T, dim)`.
@@ -247,6 +249,7 @@ class MultiHeadAttention(Layer):
         activity_regularizer=None,
         kernel_constraint=None,
         bias_constraint=None,
+        softmax_robust_masking=False,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -264,6 +267,7 @@ class MultiHeadAttention(Layer):
         self._activity_regularizer = regularizers.get(activity_regularizer)
         self._kernel_constraint = constraints.get(kernel_constraint)
         self._bias_constraint = constraints.get(bias_constraint)
+        self._softmax_robust_masking = softmax_robust_masking
         if attention_axes is not None and not isinstance(
             attention_axes, collections.abc.Sized
         ):
@@ -298,6 +302,7 @@ class MultiHeadAttention(Layer):
             "query_shape": self._query_shape,
             "key_shape": self._key_shape,
             "value_shape": self._value_shape,
+            "softmax_robust_masking": self._softmax_robust_masking,
         }
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -476,7 +481,9 @@ class MultiHeadAttention(Layer):
             )
         )
         self._softmax = activation.Softmax(
-            axis=norm_axes, dtype=self._dtype_policy
+            axis=norm_axes,
+            robust_masking=self._softmax_robust_masking,
+            dtype=self._dtype_policy,
         )
         self._dropout_layer = regularization.Dropout(
             rate=self._dropout, dtype=self._dtype_policy

tf_keras/src/layers/core/tf_op_layer.py CHANGED Viewed

@@ -259,6 +259,10 @@ class TFOpLambda(Layer):
         self._call_spec.expects_training_arg = False
         self._call_spec.expects_mask_arg = False
+        # Clear the call-context arguments for the layer's call method.
+        # Otherwise, Keras ends up injecting context arguments into the op-call
+        # when the call method accepts kwargs.
+        self._call_spec._expected_context_args.clear()
     def _call_wrapper(self, *args, **kwargs):
         created_variables = []

tf_keras/src/layers/rnn/cell_wrappers.py CHANGED Viewed

@@ -52,9 +52,21 @@ class _RNNCellWrapper(AbstractRNNCell):
         super().__init__(*args, **kwargs)
         self.cell = cell
         cell_call_spec = tf_inspect.getfullargspec(cell.call)
+        accepts_kwargs = cell_call_spec.varkw is not None
         self._call_spec.expects_training_arg = (
             "training" in cell_call_spec.args
-        ) or (cell_call_spec.varkw is not None)
+        ) or accepts_kwargs
+        # Filter _expects_context_arg. An argument is kept if:
+        # 1. It's an explicit argument in cell_call_spec.args OR
+        # 2. The cell accepts arbitrary keyword arguments (**kwargs),
+        #    meaning it could potentially handle the context argument.
+        self._call_spec._expected_context_args = {
+            arg
+            for arg in self._call_spec._expected_context_args
+            if (arg in cell_call_spec.args) or accepts_kwargs
+        }
     def _call_wrapped_cell(self, inputs, state, cell_call_fn, **kwargs):
         """Calls the wrapped cell and performs the wrapping logic.

tf_keras/src/metrics/confusion_metrics.py CHANGED Viewed

@@ -1471,9 +1471,10 @@ class AUC(base_metric.Metric):
                 # label_weights should be of length equal to the number of
                 # labels.
                 shapes.append((self.label_weights, ("L",)))
-                tf.debugging.assert_shapes(
-                    shapes, message="Number of labels is not consistent."
-                )
+            tf.debugging.assert_shapes(
+                shapes, message="Number of labels is not consistent."
+            )
         # Only forward label_weights to update_confusion_matrix_variables when
         # multi_label is False. Otherwise the averaging of individual label AUCs
@@ -1611,13 +1612,59 @@ class AUC(base_metric.Metric):
             )
             x = fp_rate
             y = recall
-        else:  # curve == 'PR'.
+        elif self.curve == metrics_utils.AUCCurve.PR:
             precision = tf.math.divide_no_nan(
                 self.true_positives,
                 tf.math.add(self.true_positives, self.false_positives),
             )
             x = recall
             y = precision
+        else:  # curve == 'PR_GAIN'.
+            # Due to the hyperbolic transform, this formula is less robust than
+            # ROC or PR values. In particular
+            # 1) Both measures diverge when there are no negative examples;
+            # 2) Both measures diverge when there are no true positives;
+            # 3) Recall gain becomes negative when the recall is lower than the
+            # label average (i.e. when more negative examples are classified
+            # positive than real positives).
+            #
+            # We ignore case 1 as it is easily communicated. For case 2 we set
+            # recall_gain to 0 and precision_gain to 1. For case 3 we set the
+            # recall_gain to 0. These fixes will result in an overastimation of
+            # the AUC for estimateors that are anti-correlated with the label
+            # (at some thresholds).
+            #
+            # The scaling factor $\frac{P}{N}$ that is used to form both
+            # gain values.
+            scaling_factor = tf.math.divide_no_nan(
+                tf.math.add(self.true_positives, self.false_negatives),
+                tf.math.add(self.false_positives, self.true_negatives),
+            )
+            recall_gain = 1.0 - scaling_factor * tf.math.divide_no_nan(
+                self.false_negatives, self.true_positives
+            )
+            precision_gain = 1.0 - scaling_factor * tf.math.divide_no_nan(
+                self.false_positives, self.true_positives
+            )
+            # Handle case 2.
+            recall_gain = tf.where(
+                tf.equal(self.true_positives, 0.0),
+                tf.zeros_like(recall_gain),
+                recall_gain,
+            )
+            precision_gain = tf.where(
+                tf.equal(self.true_positives, 0.0),
+                tf.ones_like(precision_gain),
+                precision_gain,
+            )
+            # Handle case 3.
+            recall_gain = tf.math.maximum(
+                recall_gain, tf.zeros_like(recall_gain)
+            )
+            x = recall_gain
+            y = precision_gain
         # Find the rectangle heights based on `summation_method`.
         if (

tf_keras/src/models/sharpness_aware_minimization.py CHANGED Viewed

@@ -72,17 +72,27 @@ class SharpnessAwareMinimization(Model):
         if self.num_batch_splits is not None:
             x_split = tf.split(x, self.num_batch_splits)
             y_split = tf.split(y, self.num_batch_splits)
+            # Split the sample weight if it is provided.
+            if sample_weight is not None:
+                sample_weight_split = tf.split(
+                    sample_weight, self.num_batch_splits
+                )
+            else:
+                sample_weight_split = [None] * self.num_batch_splits
         else:
             x_split = [x]
             y_split = [y]
+            sample_weight_split = [sample_weight]
         gradients_all_batches = []
         pred_all_batches = []
-        for x_batch, y_batch in zip(x_split, y_split):
+        for x_batch, y_batch, sample_weight_batch in zip(
+            x_split, y_split, sample_weight_split
+        ):
             epsilon_w_cache = []
             with tf.GradientTape() as tape:
-                pred = self.model(x_batch)
-                loss = self.compiled_loss(y_batch, pred)
+                pred = self(x_batch)
+                loss = self.compiled_loss(y_batch, pred, sample_weight_batch)
             pred_all_batches.append(pred)
             trainable_variables = self.model.trainable_variables
             gradients = tape.gradient(loss, trainable_variables)
@@ -98,8 +108,8 @@ class SharpnessAwareMinimization(Model):
                 epsilon_w_cache.append(epsilon_w)
             with tf.GradientTape() as tape:
-                pred = self(x_batch)
-                loss = self.compiled_loss(y_batch, pred)
+                pred = self(x_batch, training=True)
+                loss = self.compiled_loss(y_batch, pred, sample_weight_batch)
             gradients = tape.gradient(loss, trainable_variables)
             if len(gradients_all_batches) == 0:
                 for gradient in gradients:
@@ -127,7 +137,7 @@ class SharpnessAwareMinimization(Model):
         self.compiled_metrics.update_state(y, pred, sample_weight)
         return {m.name: m.result() for m in self.metrics}
-    def call(self, inputs):
+    def call(self, inputs, **kwargs):
         """Forward pass of SAM.
         SAM delegates the forward pass call to the wrapped model.
@@ -138,7 +148,7 @@ class SharpnessAwareMinimization(Model):
         Returns:
           A Tensor, the outputs of the wrapped model for given `inputs`.
         """
-        return self.model(inputs)
+        return self.model(inputs, **kwargs)
     def get_config(self):
         config = super().get_config()

tf-keras-nightly 2.20.0.dev2025051109__py3-none-any.whl → 2.21.0.dev2025123010__py3-none-any.whl

tf-keras-nightly 2.20.0.dev2025051109py3-none-any.whl → 2.21.0.dev2025123010py3-none-any.whl