tf-keras-nightly 2.17.0.dev2024050509__py3-none-any.whl → 2.19.0.dev2024101709__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tf_keras/__init__.py +1 -1
- tf_keras/src/__init__.py +1 -1
- tf_keras/src/callbacks.py +24 -7
- tf_keras/src/engine/base_layer.py +10 -4
- tf_keras/src/engine/base_layer_v1.py +10 -4
- tf_keras/src/engine/node.py +8 -3
- tf_keras/src/layers/activation/prelu.py +1 -1
- tf_keras/src/layers/attention/base_dense_attention.py +2 -1
- tf_keras/src/layers/convolutional/base_conv.py +1 -1
- tf_keras/src/layers/convolutional/base_depthwise_conv.py +3 -1
- tf_keras/src/layers/convolutional/base_separable_conv.py +3 -1
- tf_keras/src/layers/convolutional/conv1d_transpose.py +3 -1
- tf_keras/src/layers/convolutional/conv2d_transpose.py +3 -1
- tf_keras/src/layers/convolutional/conv3d_transpose.py +3 -1
- tf_keras/src/layers/core/dense.py +1 -1
- tf_keras/src/layers/core/embedding.py +1 -1
- tf_keras/src/layers/locally_connected/locally_connected1d.py +1 -1
- tf_keras/src/layers/locally_connected/locally_connected2d.py +1 -1
- tf_keras/src/layers/normalization/batch_normalization.py +1 -1
- tf_keras/src/layers/normalization/layer_normalization.py +1 -1
- tf_keras/src/layers/rnn/abstract_rnn_cell.py +1 -1
- tf_keras/src/layers/rnn/base_conv_lstm.py +0 -1
- tf_keras/src/layers/rnn/base_conv_rnn.py +3 -1
- tf_keras/src/layers/rnn/base_rnn.py +1 -1
- tf_keras/src/layers/rnn/base_wrapper.py +1 -1
- tf_keras/src/layers/rnn/bidirectional.py +2 -1
- tf_keras/src/layers/rnn/cell_wrappers.py +3 -3
- tf_keras/src/layers/rnn/cudnn_gru.py +6 -3
- tf_keras/src/layers/rnn/cudnn_lstm.py +6 -3
- tf_keras/src/layers/rnn/gru.py +35 -47
- tf_keras/src/layers/rnn/legacy_cell_wrappers.py +3 -3
- tf_keras/src/layers/rnn/legacy_cells.py +20 -25
- tf_keras/src/layers/rnn/lstm.py +35 -50
- tf_keras/src/layers/rnn/simple_rnn.py +0 -1
- tf_keras/src/layers/rnn/stacked_rnn_cells.py +1 -1
- tf_keras/src/layers/rnn/time_distributed.py +0 -1
- tf_keras/src/mixed_precision/autocast_variable.py +12 -6
- tf_keras/src/mixed_precision/test_util.py +6 -5
- tf_keras/src/optimizers/legacy/optimizer_v2.py +9 -2
- tf_keras/src/optimizers/optimizer.py +18 -9
- tf_keras/src/premade_models/linear.py +2 -1
- tf_keras/src/utils/data_utils.py +1 -1
- tf_keras/src/utils/steps_per_execution_tuning.py +1 -1
- tf_keras/src/utils/timeseries_dataset.py +13 -5
- {tf_keras_nightly-2.17.0.dev2024050509.dist-info → tf_keras_nightly-2.19.0.dev2024101709.dist-info}/METADATA +2 -2
- {tf_keras_nightly-2.17.0.dev2024050509.dist-info → tf_keras_nightly-2.19.0.dev2024101709.dist-info}/RECORD +48 -48
- {tf_keras_nightly-2.17.0.dev2024050509.dist-info → tf_keras_nightly-2.19.0.dev2024101709.dist-info}/WHEEL +1 -1
- {tf_keras_nightly-2.17.0.dev2024050509.dist-info → tf_keras_nightly-2.19.0.dev2024101709.dist-info}/top_level.txt +0 -0
@@ -246,11 +246,6 @@ class RNNCell(base_layer.Layer):
|
|
246
246
|
"""Integer or TensorShape: size of outputs produced by this cell."""
|
247
247
|
raise NotImplementedError("Abstract method")
|
248
248
|
|
249
|
-
def build(self, _):
|
250
|
-
# This tells the parent Layer object that it's OK to call
|
251
|
-
# self.add_weight() inside the call() method.
|
252
|
-
pass
|
253
|
-
|
254
249
|
def get_initial_state(self, inputs=None, batch_size=None, dtype=None):
|
255
250
|
if inputs is not None:
|
256
251
|
# Validate the given batch_size and dtype against inputs if
|
@@ -445,15 +440,15 @@ class BasicRNNCell(LayerRNNCell):
|
|
445
440
|
return self._num_units
|
446
441
|
|
447
442
|
@tf_utils.shape_type_conversion
|
448
|
-
def build(self,
|
449
|
-
if
|
443
|
+
def build(self, input_shape):
|
444
|
+
if input_shape[-1] is None:
|
450
445
|
raise ValueError(
|
451
446
|
"Expected inputs.shape[-1] to be known, "
|
452
|
-
f"received shape: {
|
447
|
+
f"received shape: {input_shape}"
|
453
448
|
)
|
454
449
|
_check_supported_dtypes(self.dtype)
|
455
450
|
|
456
|
-
input_depth =
|
451
|
+
input_depth = input_shape[-1]
|
457
452
|
self._kernel = self.add_weight(
|
458
453
|
_WEIGHTS_VARIABLE_NAME,
|
459
454
|
shape=[input_depth + self._num_units, self._num_units],
|
@@ -464,7 +459,7 @@ class BasicRNNCell(LayerRNNCell):
|
|
464
459
|
initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
|
465
460
|
)
|
466
461
|
|
467
|
-
|
462
|
+
super().build(input_shape)
|
468
463
|
|
469
464
|
def call(self, inputs, state):
|
470
465
|
"""Most basic RNN: output = new_state = act(W * input + U * state +
|
@@ -563,14 +558,14 @@ class GRUCell(LayerRNNCell):
|
|
563
558
|
return self._num_units
|
564
559
|
|
565
560
|
@tf_utils.shape_type_conversion
|
566
|
-
def build(self,
|
567
|
-
if
|
561
|
+
def build(self, input_shape):
|
562
|
+
if input_shape[-1] is None:
|
568
563
|
raise ValueError(
|
569
564
|
"Expected inputs.shape[-1] to be known, "
|
570
|
-
f"received shape: {
|
565
|
+
f"received shape: {input_shape}"
|
571
566
|
)
|
572
567
|
_check_supported_dtypes(self.dtype)
|
573
|
-
input_depth =
|
568
|
+
input_depth = input_shape[-1]
|
574
569
|
self._gate_kernel = self.add_weight(
|
575
570
|
f"gates/{_WEIGHTS_VARIABLE_NAME}",
|
576
571
|
shape=[input_depth + self._num_units, 2 * self._num_units],
|
@@ -600,7 +595,7 @@ class GRUCell(LayerRNNCell):
|
|
600
595
|
),
|
601
596
|
)
|
602
597
|
|
603
|
-
|
598
|
+
super().build(input_shape)
|
604
599
|
|
605
600
|
def call(self, inputs, state):
|
606
601
|
"""Gated recurrent unit (GRU) with nunits cells."""
|
@@ -774,14 +769,14 @@ class BasicLSTMCell(LayerRNNCell):
|
|
774
769
|
return self._num_units
|
775
770
|
|
776
771
|
@tf_utils.shape_type_conversion
|
777
|
-
def build(self,
|
778
|
-
if
|
772
|
+
def build(self, input_shape):
|
773
|
+
if input_shape[-1] is None:
|
779
774
|
raise ValueError(
|
780
775
|
"Expected inputs.shape[-1] to be known, "
|
781
|
-
f"received shape: {
|
776
|
+
f"received shape: {input_shape}"
|
782
777
|
)
|
783
778
|
_check_supported_dtypes(self.dtype)
|
784
|
-
input_depth =
|
779
|
+
input_depth = input_shape[-1]
|
785
780
|
h_depth = self._num_units
|
786
781
|
self._kernel = self.add_weight(
|
787
782
|
_WEIGHTS_VARIABLE_NAME,
|
@@ -793,7 +788,7 @@ class BasicLSTMCell(LayerRNNCell):
|
|
793
788
|
initializer=tf.compat.v1.zeros_initializer(dtype=self.dtype),
|
794
789
|
)
|
795
790
|
|
796
|
-
|
791
|
+
super().build(input_shape)
|
797
792
|
|
798
793
|
def call(self, inputs, state):
|
799
794
|
"""Long short-term memory cell (LSTM).
|
@@ -1017,14 +1012,14 @@ class LSTMCell(LayerRNNCell):
|
|
1017
1012
|
return self._output_size
|
1018
1013
|
|
1019
1014
|
@tf_utils.shape_type_conversion
|
1020
|
-
def build(self,
|
1021
|
-
if
|
1015
|
+
def build(self, input_shape):
|
1016
|
+
if input_shape[-1] is None:
|
1022
1017
|
raise ValueError(
|
1023
1018
|
"Expected inputs.shape[-1] to be known, "
|
1024
|
-
f"received shape: {
|
1019
|
+
f"received shape: {input_shape}"
|
1025
1020
|
)
|
1026
1021
|
_check_supported_dtypes(self.dtype)
|
1027
|
-
input_depth =
|
1022
|
+
input_depth = input_shape[-1]
|
1028
1023
|
h_depth = self._num_units if self._num_proj is None else self._num_proj
|
1029
1024
|
maybe_partitioner = (
|
1030
1025
|
tf.compat.v1.fixed_size_partitioner(self._num_unit_shards)
|
@@ -1076,7 +1071,7 @@ class LSTMCell(LayerRNNCell):
|
|
1076
1071
|
partitioner=maybe_proj_partitioner,
|
1077
1072
|
)
|
1078
1073
|
|
1079
|
-
|
1074
|
+
super().build(input_shape)
|
1080
1075
|
|
1081
1076
|
def call(self, inputs, state):
|
1082
1077
|
"""Run one step of LSTM.
|
tf_keras/src/layers/rnn/lstm.py
CHANGED
@@ -236,7 +236,6 @@ class LSTMCell(DropoutRNNCellMixin, base_layer.BaseRandomLayer):
|
|
236
236
|
)
|
237
237
|
else:
|
238
238
|
self.bias = None
|
239
|
-
self.built = True
|
240
239
|
|
241
240
|
def _compute_carry_and_output(self, x, h_tm1, c_tm1):
|
242
241
|
"""Computes carry and output using split kernels."""
|
@@ -1063,11 +1062,13 @@ def gpu_lstm(
|
|
1063
1062
|
mask, time_major
|
1064
1063
|
)
|
1065
1064
|
|
1066
|
-
if
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1065
|
+
seq_axis, batch_axis = (0, 1) if time_major else (1, 0)
|
1066
|
+
|
1067
|
+
if sequence_lengths is None:
|
1068
|
+
max_sequence_length = tf.shape(inputs)[seq_axis]
|
1069
|
+
batch_size = tf.shape(inputs)[batch_axis]
|
1070
|
+
sequence_lengths = tf.fill([batch_size], max_sequence_length)
|
1071
|
+
|
1071
1072
|
# For init_h and init_c, cuDNN expects one more dim of num_layers before or
|
1072
1073
|
# after batch dim for time major or batch major inputs respectively
|
1073
1074
|
init_h = tf.expand_dims(init_h, axis=seq_axis)
|
@@ -1099,52 +1100,36 @@ def gpu_lstm(
|
|
1099
1100
|
transpose_weights=True,
|
1100
1101
|
)
|
1101
1102
|
|
1102
|
-
if
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
|
1107
|
-
|
1108
|
-
|
1109
|
-
inputs
|
1110
|
-
|
1111
|
-
|
1112
|
-
|
1113
|
-
batch_axis=batch_axis,
|
1114
|
-
)
|
1115
|
-
outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
|
1116
|
-
input=inputs,
|
1117
|
-
input_h=init_h,
|
1118
|
-
input_c=init_c,
|
1119
|
-
params=params,
|
1120
|
-
is_training=True,
|
1121
|
-
rnn_mode="lstm",
|
1122
|
-
sequence_lengths=sequence_lengths,
|
1123
|
-
time_major=time_major,
|
1103
|
+
if go_backwards:
|
1104
|
+
# Three reversals are required. E.g.,
|
1105
|
+
# normal input = [1, 2, 3, 0, 0] # where 0 need to be masked
|
1106
|
+
# reversed_input_to_cudnn = [3, 2, 1, 0, 0]
|
1107
|
+
# output_from_cudnn = [6, 5, 4, 0, 0]
|
1108
|
+
# expected_output = [0, 0, 6, 5 ,4]
|
1109
|
+
inputs = tf.reverse_sequence(
|
1110
|
+
inputs,
|
1111
|
+
sequence_lengths,
|
1112
|
+
seq_axis=seq_axis,
|
1113
|
+
batch_axis=batch_axis,
|
1124
1114
|
)
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
|
1141
|
-
input=inputs,
|
1142
|
-
input_h=init_h,
|
1143
|
-
input_c=init_c,
|
1144
|
-
params=params,
|
1145
|
-
is_training=True,
|
1146
|
-
rnn_mode="lstm",
|
1115
|
+
outputs, h, c, _, _ = tf.raw_ops.CudnnRNNV3(
|
1116
|
+
input=inputs,
|
1117
|
+
input_h=init_h,
|
1118
|
+
input_c=init_c,
|
1119
|
+
params=params,
|
1120
|
+
is_training=True,
|
1121
|
+
rnn_mode="lstm",
|
1122
|
+
sequence_lengths=sequence_lengths,
|
1123
|
+
time_major=time_major,
|
1124
|
+
)
|
1125
|
+
if go_backwards:
|
1126
|
+
outputs = tf.reverse_sequence(
|
1127
|
+
outputs,
|
1128
|
+
sequence_lengths,
|
1129
|
+
seq_axis=seq_axis,
|
1130
|
+
batch_axis=batch_axis,
|
1147
1131
|
)
|
1132
|
+
outputs = tf.reverse(outputs, axis=[seq_axis])
|
1148
1133
|
|
1149
1134
|
last_output = outputs[-1]
|
1150
1135
|
if not time_major and sequence_lengths is None and return_sequences:
|
@@ -166,6 +166,7 @@ class StackedRNNCells(base_layer.Layer):
|
|
166
166
|
|
167
167
|
@tf_utils.shape_type_conversion
|
168
168
|
def build(self, input_shape):
|
169
|
+
super().build(input_shape)
|
169
170
|
if isinstance(input_shape, list):
|
170
171
|
input_shape = input_shape[0]
|
171
172
|
|
@@ -195,7 +196,6 @@ class StackedRNNCells(base_layer.Layer):
|
|
195
196
|
input_shape = tuple(
|
196
197
|
[batch_size] + tf.TensorShape(output_dim).as_list()
|
197
198
|
)
|
198
|
-
self.built = True
|
199
199
|
|
200
200
|
def get_config(self):
|
201
201
|
cells = []
|
@@ -135,7 +135,6 @@ class TimeDistributed(Wrapper):
|
|
135
135
|
)
|
136
136
|
child_input_shape = tf_utils.convert_shapes(child_input_shape)
|
137
137
|
super().build(tuple(child_input_shape))
|
138
|
-
self.built = True
|
139
138
|
|
140
139
|
def compute_output_shape(self, input_shape):
|
141
140
|
input_shape = tf_utils.convert_shapes(input_shape, to_tuples=False)
|
@@ -124,20 +124,21 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
|
|
124
124
|
def _should_cast(self):
|
125
125
|
"""Returns True if this variable should be casted when accessed."""
|
126
126
|
autocast_dtype = getattr(_autocast_dtype, "dtype", None)
|
127
|
-
return autocast_dtype is not None and self.
|
127
|
+
return autocast_dtype is not None and self.true_dtype != autocast_dtype
|
128
128
|
|
129
129
|
@property
|
130
130
|
def dtype(self):
|
131
|
-
"""The dtype
|
132
|
-
return self.
|
131
|
+
"""The dtype when the value is accessed, that is after casting."""
|
132
|
+
return self._cast_dtype
|
133
133
|
|
134
134
|
@property
|
135
135
|
def true_dtype(self):
|
136
|
-
"""
|
136
|
+
"""The dtype of the underlying variable, before any casts are done."""
|
137
137
|
return self._variable.dtype
|
138
138
|
|
139
139
|
@property
|
140
140
|
def _cast_dtype(self):
|
141
|
+
"""The dtype after casting."""
|
141
142
|
dtype = getattr(_autocast_dtype, "dtype", None)
|
142
143
|
return dtype or self._variable.dtype
|
143
144
|
|
@@ -202,7 +203,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
|
|
202
203
|
if tf.executing_eagerly() and not self._in_graph_mode:
|
203
204
|
repr_str = (
|
204
205
|
"<AutoCastVariable '{v.name}' shape={v.shape} "
|
205
|
-
"dtype={v.
|
206
|
+
"dtype={v.true_dtype.name} "
|
207
|
+
"dtype_to_cast_to={v._cast_dtype.name}, "
|
206
208
|
"numpy={np_repr}>"
|
207
209
|
)
|
208
210
|
return repr_str.format(
|
@@ -211,7 +213,8 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
|
|
211
213
|
else:
|
212
214
|
repr_str = (
|
213
215
|
"<AutoCastVariable '{v.name}' shape={v.shape} "
|
214
|
-
"dtype={v.
|
216
|
+
"dtype={v.true_dtype.name} "
|
217
|
+
"dtype_to_cast_to={v._cast_dtype.name}>"
|
215
218
|
)
|
216
219
|
return repr_str.format(v=self)
|
217
220
|
|
@@ -261,6 +264,9 @@ class AutoCastVariable(tf.Variable, tf.__internal__.types.Tensor):
|
|
261
264
|
def _apply_assign_update(
|
262
265
|
self, update_fn, value, use_locking=None, name=None, read_value=True
|
263
266
|
):
|
267
|
+
# In auto cast scope, we cast back to the actual variable dtype.
|
268
|
+
if self._should_cast():
|
269
|
+
value = tf.cast(value, self.true_dtype)
|
264
270
|
# TODO(b/146181571): This logic can be simplified once
|
265
271
|
# DistributedVariable.assign returns a DistributedVariable. Currently
|
266
272
|
# for MirroredStrategy, it returns a Mirrored value.
|
@@ -171,14 +171,14 @@ class MultiplyLayer(AssertTypeLayer):
|
|
171
171
|
activity_regularizer=self._activity_regularizer, **kwargs
|
172
172
|
)
|
173
173
|
|
174
|
-
def build(self,
|
174
|
+
def build(self, input_shape):
|
175
175
|
self.v = self.add_weight(
|
176
176
|
self._var_name,
|
177
177
|
(),
|
178
178
|
initializer="ones",
|
179
179
|
regularizer=self._regularizer,
|
180
180
|
)
|
181
|
-
|
181
|
+
super().build(input_shape)
|
182
182
|
|
183
183
|
def call(self, inputs):
|
184
184
|
self.assert_input_types(inputs)
|
@@ -205,7 +205,7 @@ class MultiplyLayer(AssertTypeLayer):
|
|
205
205
|
class MultiplyLayerWithoutAutoCast(MultiplyLayer):
|
206
206
|
"""Same as MultiplyLayer, but does not use AutoCastVariables."""
|
207
207
|
|
208
|
-
def build(self,
|
208
|
+
def build(self, input_shape):
|
209
209
|
dtype = self.dtype
|
210
210
|
if dtype in ("float16", "bfloat16"):
|
211
211
|
dtype = "float32"
|
@@ -214,10 +214,11 @@ class MultiplyLayerWithoutAutoCast(MultiplyLayer):
|
|
214
214
|
(),
|
215
215
|
initializer="ones",
|
216
216
|
dtype=dtype,
|
217
|
-
|
217
|
+
autocast=False,
|
218
218
|
regularizer=self._regularizer,
|
219
219
|
)
|
220
|
-
|
220
|
+
# Call Layer.build() to skip MultiplyLayer.build() which we override.
|
221
|
+
base_layer.Layer.build(self, input_shape)
|
221
222
|
|
222
223
|
def call(self, inputs):
|
223
224
|
self.assert_input_types(inputs)
|
@@ -1033,6 +1033,13 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
|
|
1033
1033
|
slot_dict = self._slots.setdefault(var_key, {})
|
1034
1034
|
weight = slot_dict.get(slot_name, None)
|
1035
1035
|
if weight is None:
|
1036
|
+
# Under a mixed precision policy, variables report their "cast"
|
1037
|
+
# dtype. However, we want to use the original dtype for slots.
|
1038
|
+
if hasattr(var, "true_dtype"):
|
1039
|
+
dtype = var.true_dtype
|
1040
|
+
else:
|
1041
|
+
dtype = var.dtype
|
1042
|
+
|
1036
1043
|
if isinstance(initializer, str) or callable(initializer):
|
1037
1044
|
initializer = initializers.get(initializer)
|
1038
1045
|
if isinstance(
|
@@ -1043,7 +1050,7 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
|
|
1043
1050
|
else:
|
1044
1051
|
slot_shape = var.shape
|
1045
1052
|
initial_value = functools.partial(
|
1046
|
-
initializer, shape=slot_shape, dtype=
|
1053
|
+
initializer, shape=slot_shape, dtype=dtype
|
1047
1054
|
)
|
1048
1055
|
else:
|
1049
1056
|
initial_value = initializer
|
@@ -1064,7 +1071,7 @@ class OptimizerV2(tf.__internal__.tracking.Trackable):
|
|
1064
1071
|
with strategy.extended.colocate_vars_with(var):
|
1065
1072
|
weight = tf.Variable(
|
1066
1073
|
name=f"{var._shared_name}/{slot_name}",
|
1067
|
-
dtype=
|
1074
|
+
dtype=dtype,
|
1068
1075
|
trainable=False,
|
1069
1076
|
initial_value=initial_value,
|
1070
1077
|
)
|
@@ -498,26 +498,28 @@ class _BaseOptimizer(tf.__internal__.tracking.AutoTrackable):
|
|
498
498
|
Returns:
|
499
499
|
An optimizer variable.
|
500
500
|
"""
|
501
|
+
# Under a mixed precision policy, variables report their "cast"
|
502
|
+
# dtype. However, we want to use the original dtype for slots.
|
503
|
+
if hasattr(model_variable, "true_dtype"):
|
504
|
+
dtype = model_variable.true_dtype
|
505
|
+
else:
|
506
|
+
dtype = model_variable.dtype
|
501
507
|
if initial_value is None:
|
502
508
|
if shape is None:
|
503
509
|
if model_variable.shape.rank is None:
|
504
510
|
# When the rank is None, we cannot get a concrete
|
505
511
|
# `model_variable.shape`, we use dynamic shape.
|
506
|
-
initial_value = tf.zeros_like(
|
507
|
-
model_variable, dtype=model_variable.dtype
|
508
|
-
)
|
512
|
+
initial_value = tf.zeros_like(model_variable, dtype=dtype)
|
509
513
|
else:
|
510
514
|
# We cannot always use `zeros_like`, because some cases
|
511
515
|
# the shape exists while values don't.
|
512
|
-
initial_value = tf.zeros(
|
513
|
-
model_variable.shape, dtype=model_variable.dtype
|
514
|
-
)
|
516
|
+
initial_value = tf.zeros(model_variable.shape, dtype=dtype)
|
515
517
|
else:
|
516
|
-
initial_value = tf.zeros(shape, dtype=
|
518
|
+
initial_value = tf.zeros(shape, dtype=dtype)
|
517
519
|
variable = tf.Variable(
|
518
520
|
initial_value=initial_value,
|
519
521
|
name=f"{variable_name}/{model_variable._shared_name}",
|
520
|
-
dtype=
|
522
|
+
dtype=dtype,
|
521
523
|
trainable=False,
|
522
524
|
)
|
523
525
|
# If model_variable is a shard of a ShardedVariable, we should add a
|
@@ -1188,10 +1190,17 @@ class Optimizer(_BaseOptimizer):
|
|
1188
1190
|
self._mesh, rank=initial_value.shape.rank
|
1189
1191
|
),
|
1190
1192
|
)
|
1193
|
+
# Under a mixed precision policy, variables report their "cast"
|
1194
|
+
# dtype. However, we want to use the original dtype for optimizer
|
1195
|
+
# variables.
|
1196
|
+
if hasattr(model_variable, "true_dtype"):
|
1197
|
+
dtype = model_variable.true_dtype
|
1198
|
+
else:
|
1199
|
+
dtype = model_variable.dtype
|
1191
1200
|
variable = tf.experimental.dtensor.DVariable(
|
1192
1201
|
initial_value=initial_value,
|
1193
1202
|
name=f"{variable_name}/{model_variable._shared_name}",
|
1194
|
-
dtype=
|
1203
|
+
dtype=dtype,
|
1195
1204
|
trainable=False,
|
1196
1205
|
)
|
1197
1206
|
self._variables.append(variable)
|
@@ -156,7 +156,8 @@ class LinearModel(training.Model):
|
|
156
156
|
)
|
157
157
|
else:
|
158
158
|
self.bias = None
|
159
|
-
|
159
|
+
# Call Layer.build() to skip Model.build() which we override here.
|
160
|
+
base_layer.Layer.build(self, input_shape)
|
160
161
|
|
161
162
|
def call(self, inputs):
|
162
163
|
result = None
|
tf_keras/src/utils/data_utils.py
CHANGED
@@ -1108,7 +1108,7 @@ def pad_sequences(
|
|
1108
1108
|
maxlen = np.max(lengths)
|
1109
1109
|
|
1110
1110
|
is_dtype_str = np.issubdtype(dtype, np.str_) or np.issubdtype(
|
1111
|
-
dtype, np.
|
1111
|
+
dtype, np.str_
|
1112
1112
|
)
|
1113
1113
|
if isinstance(value, str) and dtype != object and not is_dtype_str:
|
1114
1114
|
raise ValueError(
|
@@ -110,16 +110,24 @@ def timeseries_dataset_from_array(
|
|
110
110
|
timesteps to predict the next timestep, you would use:
|
111
111
|
|
112
112
|
```python
|
113
|
-
|
114
|
-
|
113
|
+
data = tf.range(15)
|
114
|
+
sequence_length = 10
|
115
|
+
input_data = data[:]
|
116
|
+
targets = data[sequence_length:]
|
115
117
|
dataset = tf.keras.utils.timeseries_dataset_from_array(
|
116
|
-
input_data, targets, sequence_length=
|
118
|
+
input_data, targets, sequence_length=sequence_length
|
119
|
+
)
|
117
120
|
for batch in dataset:
|
118
121
|
inputs, targets = batch
|
119
|
-
|
122
|
+
# First sequence: steps [0-9]
|
123
|
+
assert np.array_equal(inputs[0], data[:sequence_length])
|
120
124
|
# Corresponding target: step 10
|
121
|
-
assert np.array_equal(targets[0], data[
|
125
|
+
assert np.array_equal(targets[0], data[sequence_length])
|
122
126
|
break
|
127
|
+
# To view the generated dataset
|
128
|
+
for batch in dataset.as_numpy_iterator():
|
129
|
+
input, label = batch
|
130
|
+
print(f"Input:{input}, target:{label}")
|
123
131
|
```
|
124
132
|
|
125
133
|
Example 3: Temporal regression for many-to-many architectures.
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: tf_keras-nightly
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.19.0.dev2024101709
|
4
4
|
Summary: Deep learning for humans.
|
5
5
|
Home-page: https://keras.io/
|
6
6
|
Download-URL: https://github.com/keras-team/tf-keras/tags
|
@@ -26,7 +26,7 @@ Classifier: Topic :: Software Development
|
|
26
26
|
Classifier: Topic :: Software Development :: Libraries
|
27
27
|
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
28
28
|
Requires-Python: >=3.9
|
29
|
-
Requires-Dist: tf-nightly ~=2.
|
29
|
+
Requires-Dist: tf-nightly ~=2.19.0.dev
|
30
30
|
|
31
31
|
TF-Keras is a deep learning API written in Python,
|
32
32
|
running on top of the machine learning platform TensorFlow.
|