PyPI - tf-keras-nightly - Versions diffs - 2.21.0.dev2025111410__py3-none-any.whl → 2.21.0.dev2025111610__py3-none-any.whl - Mend

tf-keras-nightly 2.21.0.dev2025111410py3-none-any.whl → 2.21.0.dev2025111610py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

tf_keras/__init__.py CHANGED Viewed

@@ -27,4 +27,4 @@ from tf_keras.src.engine.sequential import Sequential
 from tf_keras.src.engine.training import Model
-__version__ = "2.21.0.dev2025111410"
+__version__ = "2.21.0.dev2025111610"

tf_keras/src/layers/activation/softmax.py CHANGED Viewed

@@ -70,6 +70,8 @@ class Softmax(Layer):
     Args:
         axis: Integer, or list of Integers, axis along which the softmax
             normalization is applied.
+        robust_masking: Bool, if true will use a more robust implementation when
+            dealing with masks.
     Call arguments:
         inputs: The inputs, or logits to the softmax layer.
         mask: A boolean mask of the same shape as `inputs`. The mask
@@ -80,23 +82,34 @@ class Softmax(Layer):
         Softmaxed output with the same shape as `inputs`.
     """
-    def __init__(self, axis=-1, **kwargs):
+    def __init__(self, axis=-1, robust_masking=False, **kwargs):
         super().__init__(**kwargs)
         self.supports_masking = True
+        self.robust_masking = robust_masking
         self.axis = axis
     def call(self, inputs, mask=None):
         if mask is not None:
-            # Since mask is 1.0 for positions we want to keep and 0.0 for masked
-            # positions, this operation will create a tensor which is 0.0 for
-            # positions we want to attend and -1e.9 for masked positions.
-            adder = (1.0 - tf.cast(mask, inputs.dtype)) * (
-                _large_compatible_negative(inputs.dtype)
-            )
-            # Since we are adding it to the raw scores before the softmax, this
-            # is effectively the same as removing these entirely.
-            inputs += adder
+            if self.robust_masking:
+                # We keep the positions where the mask is True or > 0.5, and set
+                # the other (masked) positions to -1e.9.
+                if mask.dtype is not tf.bool:
+                    mask = tf.greater(mask, tf.constant(0.5, dtype=mask.dtype))
+                inputs = tf.where(
+                    mask, inputs, _large_compatible_negative(inputs.dtype)
+                )
+            else:
+                # Since mask is 1.0 for positions we want to keep and 0.0 for
+                # masked positions, this operation will create a tensor which is
+                # 0.0 for positions we want to attend and -1e.9 for masked
+                # positions.
+                adder = (1.0 - tf.cast(mask, inputs.dtype)) * (
+                    _large_compatible_negative(inputs.dtype)
+                )
+                # Since we are adding it to the raw scores before the softmax,
+                # this is effectively the same as removing these entirely.
+                inputs += adder
         if isinstance(self.axis, (tuple, list)):
             if len(self.axis) > 1:
                 return tf.exp(
@@ -109,6 +122,8 @@ class Softmax(Layer):
     def get_config(self):
         config = {"axis": self.axis}
+        if self.robust_masking:
+            config["robust_masking"] = True
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))

tf_keras/src/layers/attention/multi_head_attention.py CHANGED Viewed

@@ -198,6 +198,8 @@ class MultiHeadAttention(Layer):
         activity_regularizer: Regularizer for dense layer activity.
         kernel_constraint: Constraint for dense layer kernels.
         bias_constraint: Constraint for dense layer kernels.
+        softmax_robust_masking: If true will use a more numerically robust
+            masking impl.
     Call arguments:
         query: Query `Tensor` of shape `(B, T, dim)`.
@@ -247,6 +249,7 @@ class MultiHeadAttention(Layer):
         activity_regularizer=None,
         kernel_constraint=None,
         bias_constraint=None,
+        softmax_robust_masking=False,
         **kwargs,
     ):
         super().__init__(**kwargs)
@@ -264,6 +267,7 @@ class MultiHeadAttention(Layer):
         self._activity_regularizer = regularizers.get(activity_regularizer)
         self._kernel_constraint = constraints.get(kernel_constraint)
         self._bias_constraint = constraints.get(bias_constraint)
+        self._softmax_robust_masking = softmax_robust_masking
         if attention_axes is not None and not isinstance(
             attention_axes, collections.abc.Sized
         ):
@@ -298,6 +302,7 @@ class MultiHeadAttention(Layer):
             "query_shape": self._query_shape,
             "key_shape": self._key_shape,
             "value_shape": self._value_shape,
+            "softmax_robust_masking": self._softmax_robust_masking,
         }
         base_config = super().get_config()
         return dict(list(base_config.items()) + list(config.items()))
@@ -476,7 +481,9 @@ class MultiHeadAttention(Layer):
             )
         )
         self._softmax = activation.Softmax(
-            axis=norm_axes, dtype=self._dtype_policy
+            axis=norm_axes,
+            robust_masking=self._softmax_robust_masking,
+            dtype=self._dtype_policy,
         )
         self._dropout_layer = regularization.Dropout(
             rate=self._dropout, dtype=self._dtype_policy

{tf_keras_nightly-2.21.0.dev2025111410.dist-info → tf_keras_nightly-2.21.0.dev2025111610.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tf_keras-nightly
-Version: 2.21.0.dev2025111410
+Version: 2.21.0.dev2025111610
 Summary: Deep learning for humans.
 Home-page: https://keras.io/
 Download-URL: https://github.com/keras-team/tf-keras/tags

{tf_keras_nightly-2.21.0.dev2025111410.dist-info → tf_keras_nightly-2.21.0.dev2025111610.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-tf_keras/__init__.py,sha256=E-Wbv4QdWfkRZlDLp2rqLwbYsAYsd1bYwp1zi-uYtmY,911
+tf_keras/__init__.py,sha256=FYSNmTtnHJHq-r-PH9N0iF73Ckbmclt0R91HoEZaCG0,911
 tf_keras/__internal__/__init__.py,sha256=OHQbeIC0QtRBI7dgXaJaVbH8F00x8dCI-DvEcIfyMsE,671
 tf_keras/__internal__/backend/__init__.py,sha256=LnMs2A6685gDG79fxqmdulIYlVE_3WmXlBTBo9ZWYcw,162
 tf_keras/__internal__/layers/__init__.py,sha256=F5SGMhOTPzm-PR44VrfinURHcVeQPIEdwnZlAkSTB3A,176
@@ -314,13 +314,13 @@ tf_keras/src/layers/activation/elu.py,sha256=n-WAE6NjC9mbqcV7Kxgpt8tTbvwCQIGsoCV
 tf_keras/src/layers/activation/leaky_relu.py,sha256=cJmpwgg4KEu--iK9gFuJT7uEGpDArB8q-XNBmJfC7_U,2618
 tf_keras/src/layers/activation/prelu.py,sha256=D2yhneQrYQP6aHSK8nvnMKa1hIeuPZO_XCB2Cu9Cl4Y,4440
 tf_keras/src/layers/activation/relu.py,sha256=JklQuReRiR3huAGr3QRtuGL0URpdspDFzBNjZgv0HDw,4281
-tf_keras/src/layers/activation/softmax.py,sha256=G6MfTCogGTKwyP7b6ByxeIHFNQtUKgrZXB8MP9hNstQ,4105
+tf_keras/src/layers/activation/softmax.py,sha256=0g8uN5N8QDW8lj6nGabR-EBk58njbiNdhDzglv9rxXU,4861
 tf_keras/src/layers/activation/thresholded_relu.py,sha256=rQLn9cr-w6hVJET2mS7OIQ9diiUiqUrX4CysXKNYbmg,2503
 tf_keras/src/layers/attention/__init__.py,sha256=6HjPSyLhs_bf4erT65KyhSCHQF7WeWZe9YTH7iW6Nek,945
 tf_keras/src/layers/attention/additive_attention.py,sha256=jie0cAXJEjU4xXK_Ur1SrEL9RqDIIAPyaAkK8O71TEs,7485
 tf_keras/src/layers/attention/attention.py,sha256=TCnoOWAfh6i275TvudxyjosczBmL_zz9ByEUi-xXkAU,8682
 tf_keras/src/layers/attention/base_dense_attention.py,sha256=cEzBldjwQfuJfNZRimW5s-NqyENU2-lmqaNNxAGxhKw,10856
-tf_keras/src/layers/attention/multi_head_attention.py,sha256=05RC-2BSmCcBFtVY2loQPeiMYp8XArmbvovPl8kpiEA,30279
+tf_keras/src/layers/attention/multi_head_attention.py,sha256=FQX0YtXRy5kg8OlShA7cp2kfczzeWb9Oj3tbzkukLRw,30618
 tf_keras/src/layers/convolutional/__init__.py,sha256=U-4tja5JhSUva2G9uMmsZyZty2N2N9jT6EJRu5HAo-Y,3355
 tf_keras/src/layers/convolutional/base_conv.py,sha256=jvm4elEyIVSNfYZxh4inzQ1Q2CKS_f8VawvXMIJFSC4,17574
 tf_keras/src/layers/convolutional/base_depthwise_conv.py,sha256=SVgR2Y8dpeX4eDEF1e0UY0Mxh4A47eGHhJCQ1peGwNQ,9661
@@ -584,7 +584,7 @@ tf_keras/src/utils/legacy/__init__.py,sha256=EfMmeHYDzwvxNaktPhQbkTdcPSIGCqMhBND
 tf_keras/utils/__init__.py,sha256=b7_d-USe_EmLo02_P99Q1rUCzKBYayPCfiYFStP-0nw,2735
 tf_keras/utils/experimental/__init__.py,sha256=DzGogE2AosjxOVILQBT8PDDcqbWTc0wWnZRobCdpcec,97
 tf_keras/utils/legacy/__init__.py,sha256=7ujlDa5HeSRcth2NdqA0S1P2-VZF1kB3n68jye6Dj-8,189
-tf_keras_nightly-2.21.0.dev2025111410.dist-info/METADATA,sha256=_0AlS6RYJwHUnRkjNRhsQnt0RwkcaZcy6yjybQdGqTg,1857
-tf_keras_nightly-2.21.0.dev2025111410.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-tf_keras_nightly-2.21.0.dev2025111410.dist-info/top_level.txt,sha256=LC8FK7zHDNKxB17C6lGKvrZ_fZZGJsRiBK23SfiDegY,9
-tf_keras_nightly-2.21.0.dev2025111410.dist-info/RECORD,,
+tf_keras_nightly-2.21.0.dev2025111610.dist-info/METADATA,sha256=SF87YdU9NoBNrX-R3R0DLg9bst7FP5Fz0jCx_RtbGas,1857
+tf_keras_nightly-2.21.0.dev2025111610.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+tf_keras_nightly-2.21.0.dev2025111610.dist-info/top_level.txt,sha256=LC8FK7zHDNKxB17C6lGKvrZ_fZZGJsRiBK23SfiDegY,9
+tf_keras_nightly-2.21.0.dev2025111610.dist-info/RECORD,,

{tf_keras_nightly-2.21.0.dev2025111410.dist-info → tf_keras_nightly-2.21.0.dev2025111610.dist-info}/WHEEL RENAMED Viewed

File without changes

{tf_keras_nightly-2.21.0.dev2025111410.dist-info → tf_keras_nightly-2.21.0.dev2025111610.dist-info}/top_level.txt RENAMED Viewed

File without changes

tf-keras-nightly 2.21.0.dev2025111410__py3-none-any.whl → 2.21.0.dev2025111610__py3-none-any.whl

tf-keras-nightly 2.21.0.dev2025111410py3-none-any.whl → 2.21.0.dev2025111610py3-none-any.whl