tf-models-nightly 2.20.0.dev20250807__py2.py3-none-any.whl → 2.20.0.dev20250808__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tf-models-nightly might be problematic. Click here for more details.
- official/nlp/modeling/layers/transformer_encoder_block.py +24 -5
- official/nlp/modeling/layers/transformer_encoder_block_test.py +53 -0
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/METADATA +1 -1
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/RECORD +8 -8
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/AUTHORS +0 -0
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/LICENSE +0 -0
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/WHEEL +0 -0
- {tf_models_nightly-2.20.0.dev20250807.dist-info → tf_models_nightly-2.20.0.dev20250808.dist-info}/top_level.txt +0 -0
|
@@ -274,6 +274,8 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
274
274
|
input_tensor_shape = input_shape
|
|
275
275
|
elif isinstance(input_shape, (list, tuple)):
|
|
276
276
|
input_tensor_shape = tf.TensorShape(input_shape[0])
|
|
277
|
+
elif isinstance(input_shape, dict):
|
|
278
|
+
input_tensor_shape = tf.TensorShape(input_shape["input_tensor"])
|
|
277
279
|
else:
|
|
278
280
|
raise ValueError(
|
|
279
281
|
"The type of input shape argument is not supported, got: %s" %
|
|
@@ -546,11 +548,13 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
546
548
|
"""Transformer self-attention encoder block call.
|
|
547
549
|
|
|
548
550
|
Args:
|
|
549
|
-
inputs: a single tensor or a list of tensors. `input
|
|
550
|
-
sequence of embeddings. [`input tensor`,
|
|
551
|
-
additional attention mask. [`query
|
|
552
|
-
`attention mask`] to have separate input
|
|
553
|
-
key/value to the multi-head attention.
|
|
551
|
+
inputs: a single tensor or a list of tensors, or a dictionary. `input
|
|
552
|
+
tensor` as the single sequence of embeddings. [`input tensor`,
|
|
553
|
+
`attention mask`] to have the additional attention mask. [`query
|
|
554
|
+
tensor`, `key value tensor`, `attention mask`] to have separate input
|
|
555
|
+
streams for the query, and key/value to the multi-head attention. If
|
|
556
|
+
dictionary is provided, it must contain the following keys:
|
|
557
|
+
`input_tensor`, `attention_mask`, `key_value_tensor`.
|
|
554
558
|
output_range: the sequence output range, [0, output_range) for slicing the
|
|
555
559
|
target sequence. `None` means the target sequence is not sliced. If you
|
|
556
560
|
would like to have no change to the model training, it is better to only
|
|
@@ -568,6 +572,21 @@ class TransformerEncoderBlock(tf_keras.layers.Layer):
|
|
|
568
572
|
else:
|
|
569
573
|
raise ValueError("Unexpected inputs to %s with length at %d" %
|
|
570
574
|
(self.__class__, len(inputs)))
|
|
575
|
+
elif isinstance(inputs, dict):
|
|
576
|
+
if not set(inputs.keys()).issubset(
|
|
577
|
+
set(["input_tensor", "key_value_tensor", "attention_mask"])
|
|
578
|
+
):
|
|
579
|
+
raise ValueError(
|
|
580
|
+
f"Unexpected keys in input dictionary to: {inputs.keys()}"
|
|
581
|
+
)
|
|
582
|
+
try:
|
|
583
|
+
input_tensor = inputs["input_tensor"]
|
|
584
|
+
except KeyError as e:
|
|
585
|
+
raise ValueError(
|
|
586
|
+
"Missing required key `input_tensor` in input dictionary."
|
|
587
|
+
) from e
|
|
588
|
+
key_value = inputs.get("key_value_tensor", None)
|
|
589
|
+
attention_mask = inputs.get("attention_mask", None)
|
|
571
590
|
else:
|
|
572
591
|
input_tensor, key_value, attention_mask = (inputs, None, None)
|
|
573
592
|
|
|
@@ -55,6 +55,25 @@ class TransformerEncoderBlockLayerTest(
|
|
|
55
55
|
# The default output of a transformer layer should be the same as the input.
|
|
56
56
|
self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list())
|
|
57
57
|
|
|
58
|
+
def test_layer_creation_with_dict_inputs(self, transformer_cls):
|
|
59
|
+
test_layer = transformer_cls(
|
|
60
|
+
num_attention_heads=10, inner_dim=2048, inner_activation='relu'
|
|
61
|
+
)
|
|
62
|
+
sequence_length = 21
|
|
63
|
+
width = 80
|
|
64
|
+
# Create a 3-dimensional input (the first dimension is implicit).
|
|
65
|
+
data_tensor = tf_keras.Input(shape=(sequence_length, width))
|
|
66
|
+
# Create a 2-dimensional input (the first dimension is implicit).
|
|
67
|
+
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
|
|
68
|
+
inputs = {
|
|
69
|
+
'input_tensor': data_tensor,
|
|
70
|
+
'key_value_tensor': data_tensor,
|
|
71
|
+
'attention_mask': mask_tensor,
|
|
72
|
+
}
|
|
73
|
+
output_tensor = test_layer(inputs)
|
|
74
|
+
# The default output of a transformer layer should be the same as the input.
|
|
75
|
+
self.assertEqual(data_tensor.shape.as_list(), output_tensor.shape.as_list())
|
|
76
|
+
|
|
58
77
|
def test_layer_invocation(self, transformer_cls):
|
|
59
78
|
test_layer = transformer_cls(
|
|
60
79
|
num_attention_heads=10, inner_dim=2048, inner_activation='relu')
|
|
@@ -88,6 +107,40 @@ class TransformerEncoderBlockLayerTest(
|
|
|
88
107
|
# Create a model from the test layer.
|
|
89
108
|
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
|
|
90
109
|
|
|
110
|
+
# Invoke the model on test data. We can't validate the output data itself
|
|
111
|
+
# (the NN is too complex) but this will rule out structural runtime errors.
|
|
112
|
+
batch_size = 6
|
|
113
|
+
input_data = 10 * np.random.random_sample(
|
|
114
|
+
(batch_size, sequence_length, width)
|
|
115
|
+
)
|
|
116
|
+
# The attention mask should be of shape (batch, from_seq_len, to_seq_len),
|
|
117
|
+
# which here is (batch, sequence_length, sequence_length)
|
|
118
|
+
mask_data = np.random.randint(
|
|
119
|
+
2, size=(batch_size, sequence_length, sequence_length)
|
|
120
|
+
)
|
|
121
|
+
_ = model.predict([input_data, mask_data])
|
|
122
|
+
|
|
123
|
+
def test_layer_invocation_with_dict_inputs(self, transformer_cls):
|
|
124
|
+
test_layer = transformer_cls(
|
|
125
|
+
num_attention_heads=10, inner_dim=2048, inner_activation='relu'
|
|
126
|
+
)
|
|
127
|
+
sequence_length = 21
|
|
128
|
+
width = 80
|
|
129
|
+
# Create a 3-dimensional input (the first dimension is implicit).
|
|
130
|
+
data_tensor = tf_keras.Input(shape=(sequence_length, width))
|
|
131
|
+
# Create a 2-dimensional input (the first dimension is implicit).
|
|
132
|
+
mask_tensor = tf_keras.Input(shape=(sequence_length, sequence_length))
|
|
133
|
+
inputs = {
|
|
134
|
+
'input_tensor': data_tensor,
|
|
135
|
+
'key_value_tensor': data_tensor,
|
|
136
|
+
'attention_mask': mask_tensor,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
output_tensor = test_layer(inputs)
|
|
140
|
+
|
|
141
|
+
# Create a model from the test layer.
|
|
142
|
+
model = tf_keras.Model([data_tensor, mask_tensor], output_tensor)
|
|
143
|
+
|
|
91
144
|
# Invoke the model on test data. We can't validate the output data itself
|
|
92
145
|
# (the NN is too complex) but this will rule out structural runtime errors.
|
|
93
146
|
batch_size = 6
|
|
@@ -363,8 +363,8 @@ official/nlp/modeling/layers/tn_expand_condense_test.py,sha256=QWq1dJqQUPe5n69K3
|
|
|
363
363
|
official/nlp/modeling/layers/tn_transformer_expand_condense.py,sha256=omzTkCBEk2TOkHEYDEBwve6WsOitX7IIJHzeKXdqDq0,11012
|
|
364
364
|
official/nlp/modeling/layers/tn_transformer_test.py,sha256=pSCONEZRI4J9_6QLTJ3g_ynUYLrRXsJ1c2YMSiOV_60,8893
|
|
365
365
|
official/nlp/modeling/layers/transformer.py,sha256=VjUO-gVj_PnavbT_vSrg5NDKMr0SRSiqSg5ktd42m5M,20087
|
|
366
|
-
official/nlp/modeling/layers/transformer_encoder_block.py,sha256=
|
|
367
|
-
official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=
|
|
366
|
+
official/nlp/modeling/layers/transformer_encoder_block.py,sha256=5GJgtK1mdTxMDYVWfUoBAI_GvjDL0zO9AWtKCovSZiU,28789
|
|
367
|
+
official/nlp/modeling/layers/transformer_encoder_block_test.py,sha256=7yBgv1UNmfOFre6txF_Rq93RLc1TJwnJ7-Dz4p55sy4,37602
|
|
368
368
|
official/nlp/modeling/layers/transformer_scaffold.py,sha256=qmzhCJvbbFVF9zDqnfO4Zs2JDXwKhK7iEBOhsU6-KpQ,15704
|
|
369
369
|
official/nlp/modeling/layers/transformer_scaffold_test.py,sha256=dRJwesTBKm-mF5mDHrHfVpVNnxa-Wx-fj_4ZHDPTpE0,19920
|
|
370
370
|
official/nlp/modeling/layers/transformer_test.py,sha256=-pk9cdz9UlMpCIkGRkCKsMmjdRGi0seySaaB_2dwmXw,5522
|
|
@@ -1248,9 +1248,9 @@ tensorflow_models/tensorflow_models_test.py,sha256=yiAneltAW3NHSj3fUSvHNBjfq0MGZ
|
|
|
1248
1248
|
tensorflow_models/nlp/__init__.py,sha256=8uQd4wI6Zc4IJMPjtQifMeWVbPFkTxqYh66wfivCOL4,807
|
|
1249
1249
|
tensorflow_models/uplift/__init__.py,sha256=NzaweFf4ZmhRb2l_fuV6bP-2N8oSO3xu6xJqVb1UmpY,999
|
|
1250
1250
|
tensorflow_models/vision/__init__.py,sha256=ks420Ooqzi0hU7HnQpM5rylLaE-YcJdJkBx_umVaXlE,833
|
|
1251
|
-
tf_models_nightly-2.20.0.
|
|
1252
|
-
tf_models_nightly-2.20.0.
|
|
1253
|
-
tf_models_nightly-2.20.0.
|
|
1254
|
-
tf_models_nightly-2.20.0.
|
|
1255
|
-
tf_models_nightly-2.20.0.
|
|
1256
|
-
tf_models_nightly-2.20.0.
|
|
1251
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/AUTHORS,sha256=1dG3fXVu9jlo7bul8xuix5F5vOnczMk7_yWn4y70uw0,337
|
|
1252
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/LICENSE,sha256=WxeBS_DejPZQabxtfMOM_xn8qoZNJDQjrT7z2wG1I4U,11512
|
|
1253
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/METADATA,sha256=w4VBtg2nGIUWJ7FrIO7pKf_mUesG5m7vu1ZCyARsVes,1432
|
|
1254
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/WHEEL,sha256=kGT74LWyRUZrL4VgLh6_g12IeVl_9u9ZVhadrgXZUEY,110
|
|
1255
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/top_level.txt,sha256=gum2FfO5R4cvjl2-QtP-S1aNmsvIZaFFT6VFzU0f4-g,33
|
|
1256
|
+
tf_models_nightly-2.20.0.dev20250808.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|