tf-keras-nightly 2.19.0.dev2024121210__py3-none-any.whl → 2.21.0.dev2025123010__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tf_keras/__init__.py +1 -1
  2. tf_keras/protobuf/projector_config_pb2.py +23 -12
  3. tf_keras/protobuf/saved_metadata_pb2.py +21 -10
  4. tf_keras/protobuf/versions_pb2.py +19 -8
  5. tf_keras/src/__init__.py +1 -1
  6. tf_keras/src/backend.py +1 -1
  7. tf_keras/src/datasets/boston_housing.py +14 -5
  8. tf_keras/src/datasets/cifar10.py +9 -1
  9. tf_keras/src/datasets/cifar100.py +7 -1
  10. tf_keras/src/datasets/fashion_mnist.py +16 -4
  11. tf_keras/src/datasets/imdb.py +8 -0
  12. tf_keras/src/datasets/mnist.py +9 -3
  13. tf_keras/src/datasets/reuters.py +8 -0
  14. tf_keras/src/engine/base_layer.py +235 -97
  15. tf_keras/src/engine/base_layer_utils.py +17 -5
  16. tf_keras/src/engine/base_layer_v1.py +12 -3
  17. tf_keras/src/engine/data_adapter.py +35 -19
  18. tf_keras/src/engine/functional.py +36 -15
  19. tf_keras/src/engine/input_layer.py +9 -0
  20. tf_keras/src/engine/input_spec.py +11 -1
  21. tf_keras/src/engine/sequential.py +29 -12
  22. tf_keras/src/layers/activation/softmax.py +26 -11
  23. tf_keras/src/layers/attention/multi_head_attention.py +8 -1
  24. tf_keras/src/layers/core/tf_op_layer.py +4 -0
  25. tf_keras/src/layers/normalization/spectral_normalization.py +29 -22
  26. tf_keras/src/layers/rnn/cell_wrappers.py +13 -1
  27. tf_keras/src/metrics/confusion_metrics.py +51 -4
  28. tf_keras/src/models/sharpness_aware_minimization.py +17 -7
  29. tf_keras/src/preprocessing/sequence.py +2 -2
  30. tf_keras/src/saving/legacy/saved_model/save_impl.py +28 -12
  31. tf_keras/src/saving/legacy/saving_utils.py +14 -2
  32. tf_keras/src/saving/saving_api.py +18 -5
  33. tf_keras/src/saving/saving_lib.py +1 -1
  34. tf_keras/src/utils/layer_utils.py +45 -3
  35. tf_keras/src/utils/metrics_utils.py +4 -1
  36. tf_keras/src/utils/tf_utils.py +2 -2
  37. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/METADATA +14 -3
  38. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/RECORD +40 -62
  39. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/WHEEL +1 -1
  40. tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
  41. tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
  42. tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
  43. tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
  44. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
  45. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
  46. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
  47. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
  48. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
  49. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
  50. tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
  51. tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
  52. tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
  53. tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
  54. tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
  55. tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
  56. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
  57. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
  58. tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
  59. tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
  60. tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
  61. tf_keras/src/tests/keras_doctest.py +0 -159
  62. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/top_level.txt +0 -0
@@ -1,86 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of embedding column with dense inputs."""
16
-
17
- import tensorflow.compat.v2 as tf
18
-
19
- import tf_keras.src as keras
20
- from tf_keras.src.layers.preprocessing.benchmarks import (
21
- feature_column_benchmark as fc_bm,
22
- )
23
-
24
- # isort: off
25
- from tensorflow.python.eager.def_function import (
26
- function as tf_function,
27
- )
28
-
29
- NUM_REPEATS = 10
30
- BATCH_SIZES = [32, 256]
31
-
32
-
33
- ### KPL AND FC IMPLEMENTATION BENCHMARKS ###
34
- def embedding_varlen(batch_size, max_length):
35
- """Benchmark a variable-length embedding."""
36
- # Data and constants.
37
- embedding_size = 32768
38
- data = fc_bm.create_data(
39
- max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
40
- )
41
-
42
- # TF-Keras implementation
43
- model = keras.Sequential()
44
- model.add(keras.Input(shape=(None,), name="data", dtype=tf.int64))
45
- model.add(keras.layers.Embedding(embedding_size, 256))
46
- model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
47
-
48
- # FC implementation
49
- fc = tf.feature_column.embedding_column(
50
- tf.feature_column.categorical_column_with_identity(
51
- "data", num_buckets=embedding_size - 1
52
- ),
53
- dimension=256,
54
- )
55
-
56
- # Wrap the FC implementation in a tf.function for a fair comparison
57
- @tf_function()
58
- def fc_fn(tensors):
59
- fc.transform_feature(
60
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
61
- None,
62
- )
63
-
64
- # Benchmark runs
65
- keras_data = {"data": data.to_tensor(default_value=0)}
66
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
67
-
68
- fc_data = {"data": data.to_tensor(default_value=0)}
69
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
70
-
71
- return k_avg_time, fc_avg_time
72
-
73
-
74
- class BenchmarkLayer(fc_bm.LayerBenchmark):
75
- """Benchmark the layer forward pass."""
76
-
77
- def benchmark_layer(self):
78
- for batch in BATCH_SIZES:
79
- name = f"embedding|dense|batch_{batch}"
80
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
81
- self.report(name, k_time, f_time, NUM_REPEATS)
82
-
83
-
84
- if __name__ == "__main__":
85
- tf.test.main()
86
-
@@ -1,89 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of embedding column with varying-length
16
- inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing.benchmarks import (
22
- feature_column_benchmark as fc_bm,
23
- )
24
-
25
- # isort: off
26
- from tensorflow.python.eager.def_function import (
27
- function as tf_function,
28
- )
29
-
30
- NUM_REPEATS = 10
31
- BATCH_SIZES = [32, 256]
32
-
33
-
34
- ### KPL AND FC IMPLEMENTATION BENCHMARKS ###
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- embedding_size = 32768
39
- data = fc_bm.create_data(
40
- max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(
46
- keras.Input(shape=(None,), ragged=True, name="data", dtype=tf.int64)
47
- )
48
- model.add(keras.layers.Embedding(embedding_size, 256))
49
- model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
50
-
51
- # FC implementation
52
- fc = tf.feature_column.embedding_column(
53
- tf.feature_column.categorical_column_with_identity(
54
- "data", num_buckets=embedding_size - 1
55
- ),
56
- dimension=256,
57
- )
58
-
59
- # Wrap the FC implementation in a tf.function for a fair comparison
60
- @tf_function()
61
- def fc_fn(tensors):
62
- fc.transform_feature(
63
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
64
- None,
65
- )
66
-
67
- # Benchmark runs
68
- keras_data = {"data": data}
69
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
70
-
71
- fc_data = {"data": data.to_sparse()}
72
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
73
-
74
- return k_avg_time, fc_avg_time
75
-
76
-
77
- class BenchmarkLayer(fc_bm.LayerBenchmark):
78
- """Benchmark the layer forward pass."""
79
-
80
- def benchmark_layer(self):
81
- for batch in BATCH_SIZES:
82
- name = f"embedding|varlen|batch_{batch}"
83
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
84
- self.report(name, k_time, f_time, NUM_REPEATS)
85
-
86
-
87
- if __name__ == "__main__":
88
- tf.test.main()
89
-
@@ -1,90 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of categorical cross hash columns with dense
16
- inputs."""
17
-
18
-
19
- import tensorflow.compat.v2 as tf
20
-
21
- import tf_keras.src as keras
22
- from tf_keras.src.layers.preprocessing import hashed_crossing
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- num_buckets = 10000
40
- data_a = tf.random.uniform(
41
- shape=(batch_size * NUM_REPEATS, 1), maxval=32768, dtype=tf.int64
42
- )
43
- data_b = tf.strings.as_string(data_a)
44
-
45
- # TF-Keras implementation
46
- input_1 = keras.Input(shape=(1,), name="data_a", dtype=tf.int64)
47
- input_2 = keras.Input(shape=(1,), name="data_b", dtype=tf.string)
48
- outputs = hashed_crossing.HashedCrossing(num_buckets)([input_1, input_2])
49
- model = keras.Model([input_1, input_2], outputs)
50
-
51
- # FC implementation
52
- fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets)
53
-
54
- # Wrap the FC implementation in a tf.function for a fair comparison
55
- @tf_function()
56
- def fc_fn(tensors):
57
- fc.transform_feature(
58
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
59
- None,
60
- )
61
-
62
- # Benchmark runs
63
- keras_data = {
64
- "data_a": data_a,
65
- "data_b": data_b,
66
- }
67
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
68
-
69
- fc_data = {
70
- "data_a": data_a,
71
- "data_b": data_b,
72
- }
73
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
74
-
75
- return k_avg_time, fc_avg_time
76
-
77
-
78
- class BenchmarkLayer(fc_bm.LayerBenchmark):
79
- """Benchmark the layer forward pass."""
80
-
81
- def benchmark_layer(self):
82
- for batch in BATCH_SIZES:
83
- name = f"hashed_cross|dense|batch_{batch}"
84
- k_time, f_time = embedding_varlen(batch_size=batch)
85
- self.report(name, k_time, f_time, NUM_REPEATS)
86
-
87
-
88
- if __name__ == "__main__":
89
- tf.test.main()
90
-
@@ -1,105 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras hashing preprocessing layer."""
16
-
17
- import itertools
18
- import random
19
- import string
20
- import time
21
-
22
- import numpy as np
23
- import tensorflow.compat.v2 as tf
24
-
25
- import tf_keras.src as keras
26
- from tf_keras.src.layers.preprocessing import hashing
27
-
28
-
29
- # word_gen creates random sequences of ASCII letters (both lowercase and upper).
30
- # The number of unique strings is ~2,700.
31
- def word_gen():
32
- for _ in itertools.count(1):
33
- yield "".join(random.choice(string.ascii_letters) for i in range(2))
34
-
35
-
36
- class BenchmarkLayer(tf.test.Benchmark):
37
- """Benchmark the layer forward pass."""
38
-
39
- def run_dataset_implementation(self, batch_size):
40
- num_repeats = 5
41
- starts = []
42
- ends = []
43
- for _ in range(num_repeats):
44
- ds = tf.data.Dataset.from_generator(
45
- word_gen, tf.string, tf.TensorShape([])
46
- )
47
- ds = ds.shuffle(batch_size * 100)
48
- ds = ds.batch(batch_size)
49
- num_batches = 5
50
- ds = ds.take(num_batches)
51
- ds = ds.prefetch(num_batches)
52
- starts.append(time.time())
53
- # Benchmarked code begins here.
54
- for i in ds:
55
- _ = tf.strings.to_hash_bucket(i, num_buckets=2)
56
- # Benchmarked code ends here.
57
- ends.append(time.time())
58
-
59
- avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
60
- return avg_time
61
-
62
- def bm_layer_implementation(self, batch_size):
63
- input_1 = keras.Input(shape=(None,), dtype=tf.string, name="word")
64
- layer = hashing.Hashing(num_bins=2)
65
- _ = layer(input_1)
66
-
67
- num_repeats = 5
68
- starts = []
69
- ends = []
70
- for _ in range(num_repeats):
71
- ds = tf.data.Dataset.from_generator(
72
- word_gen, tf.string, tf.TensorShape([])
73
- )
74
- ds = ds.shuffle(batch_size * 100)
75
- ds = ds.batch(batch_size)
76
- num_batches = 5
77
- ds = ds.take(num_batches)
78
- ds = ds.prefetch(num_batches)
79
- starts.append(time.time())
80
- # Benchmarked code begins here.
81
- for i in ds:
82
- _ = layer(i)
83
- # Benchmarked code ends here.
84
- ends.append(time.time())
85
-
86
- avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
87
- name = f"hashing|batch_{batch_size}"
88
- baseline = self.run_dataset_implementation(batch_size)
89
- extras = {
90
- "dataset implementation baseline": baseline,
91
- "delta seconds": (baseline - avg_time),
92
- "delta percent": ((baseline - avg_time) / baseline) * 100,
93
- }
94
- self.report_benchmark(
95
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
96
- )
97
-
98
- def benchmark_vocab_size_by_batch(self):
99
- for batch in [32, 64, 256]:
100
- self.bm_layer_implementation(batch_size=batch)
101
-
102
-
103
- if __name__ == "__main__":
104
- tf.test.main()
105
-
@@ -1,159 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras image preprocessing layer."""
16
-
17
- import functools
18
- import time
19
-
20
- import numpy as np
21
- import tensorflow.compat.v2 as tf
22
-
23
- import tf_keras.src as keras
24
- from tf_keras.src.layers.preprocessing import image_preprocessing
25
-
26
- LOWER = 0.2
27
- UPPER = 0.4
28
- BATCH_SIZE = 32
29
-
30
-
31
- def rotate(inputs):
32
- """rotate image."""
33
- inputs_shape = tf.shape(inputs)
34
- batch_size = inputs_shape[0]
35
- img_hd = tf.cast(inputs_shape[1], tf.float32)
36
- img_wd = tf.cast(inputs_shape[2], tf.float32)
37
- min_angle = LOWER * 2.0 * np.pi
38
- max_angle = UPPER * 2.0 * np.pi
39
- angles = tf.random.uniform(
40
- shape=[batch_size], minval=min_angle, maxval=max_angle
41
- )
42
- return image_preprocessing.transform(
43
- inputs, image_preprocessing.get_rotation_matrix(angles, img_hd, img_wd)
44
- )
45
-
46
-
47
- def zoom(inputs):
48
- """zoom image."""
49
- inputs_shape = tf.shape(inputs)
50
- batch_size = inputs_shape[0]
51
- img_hd = tf.cast(inputs_shape[1], tf.float32)
52
- img_wd = tf.cast(inputs_shape[2], tf.float32)
53
- height_zoom = tf.random.uniform(
54
- shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER
55
- )
56
- width_zoom = tf.random.uniform(
57
- shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER
58
- )
59
- zooms = tf.cast(
60
- tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32
61
- )
62
- return image_preprocessing.transform(
63
- inputs, image_preprocessing.get_zoom_matrix(zooms, img_hd, img_wd)
64
- )
65
-
66
-
67
- def image_augmentation(inputs, batch_size):
68
- """image augmentation."""
69
- img = inputs
70
- img = tf.image.resize(img, size=[224, 224])
71
- img = tf.image.random_crop(img, size=[batch_size, 224, 224, 3])
72
- img = rotate(img)
73
- img = zoom(img)
74
- return img
75
-
76
-
77
- class BenchmarkLayer(tf.test.Benchmark):
78
- """Benchmark the layer forward pass."""
79
-
80
- def run_dataset_implementation(self, batch_size):
81
- num_repeats = 5
82
- starts = []
83
- ends = []
84
- for _ in range(num_repeats):
85
- ds = tf.data.Dataset.from_tensor_slices(
86
- np.random.random((batch_size, 256, 256, 3))
87
- )
88
- ds = ds.shuffle(batch_size * 100)
89
- ds = ds.batch(batch_size)
90
- ds = ds.prefetch(batch_size)
91
- img_augmentation = functools.partial(
92
- image_augmentation, batch_size=batch_size
93
- )
94
- ds = ds.map(img_augmentation, num_parallel_calls=8)
95
- starts.append(time.time())
96
- count = 0
97
- # Benchmarked code begins here.
98
- for i in ds:
99
- _ = i
100
- count += 1
101
- # Benchmarked code ends here.
102
- ends.append(time.time())
103
-
104
- avg_time = np.mean(np.array(ends) - np.array(starts)) / count
105
- return avg_time
106
-
107
- def bm_layer_implementation(self, batch_size):
108
- with tf.device("/gpu:0"):
109
- img = keras.Input(shape=(256, 256, 3), dtype=tf.float32)
110
- preprocessor = keras.Sequential(
111
- [
112
- image_preprocessing.Resizing(224, 224),
113
- image_preprocessing.RandomCrop(height=224, width=224),
114
- image_preprocessing.RandomRotation(factor=(0.2, 0.4)),
115
- image_preprocessing.RandomFlip(mode="horizontal"),
116
- image_preprocessing.RandomZoom(0.2, 0.2),
117
- ]
118
- )
119
- _ = preprocessor(img)
120
-
121
- num_repeats = 5
122
- starts = []
123
- ends = []
124
- for _ in range(num_repeats):
125
- ds = tf.data.Dataset.from_tensor_slices(
126
- np.random.random((batch_size, 256, 256, 3))
127
- )
128
- ds = ds.shuffle(batch_size * 100)
129
- ds = ds.batch(batch_size)
130
- ds = ds.prefetch(batch_size)
131
- starts.append(time.time())
132
- count = 0
133
- # Benchmarked code begins here.
134
- for i in ds:
135
- _ = preprocessor(i)
136
- count += 1
137
- # Benchmarked code ends here.
138
- ends.append(time.time())
139
-
140
- avg_time = np.mean(np.array(ends) - np.array(starts)) / count
141
- name = f"image_preprocessing|batch_{batch_size}"
142
- baseline = self.run_dataset_implementation(batch_size)
143
- extras = {
144
- "dataset implementation baseline": baseline,
145
- "delta seconds": (baseline - avg_time),
146
- "delta percent": ((baseline - avg_time) / baseline) * 100,
147
- }
148
- self.report_benchmark(
149
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
150
- )
151
-
152
- def benchmark_vocab_size_by_batch(self):
153
- for batch in [32, 64, 256]:
154
- self.bm_layer_implementation(batch_size=batch)
155
-
156
-
157
- if __name__ == "__main__":
158
- tf.test.main()
159
-
@@ -1,135 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras text vectorization preprocessing layer's adapt method.
16
- """
17
-
18
- import collections
19
- import itertools
20
- import random
21
- import string
22
- import time
23
-
24
- import numpy as np
25
- import tensorflow.compat.v2 as tf
26
-
27
- import tf_keras.src as keras
28
- from tf_keras.src.layers.preprocessing import index_lookup
29
-
30
- tf.compat.v1.enable_v2_behavior()
31
-
32
-
33
- # word_gen creates random sequences of ASCII letters (both lowercase and upper).
34
- # The number of unique strings is ~2,700.
35
- def word_gen():
36
- for _ in itertools.count(1):
37
- yield "".join(random.choice(string.ascii_letters) for i in range(2))
38
-
39
-
40
- def get_top_k(dataset, k):
41
- """Python implementation of vocabulary building using a defaultdict."""
42
- counts = collections.defaultdict(int)
43
- for tensor in dataset:
44
- data = tensor.numpy()
45
- for element in data:
46
- counts[element] += 1
47
- sorted_vocab = [
48
- k
49
- for k, _ in sorted(
50
- counts.items(), key=lambda item: item[1], reverse=True
51
- )
52
- ]
53
- if len(sorted_vocab) > k:
54
- sorted_vocab = sorted_vocab[:k]
55
- return sorted_vocab
56
-
57
-
58
- class BenchmarkAdapt(tf.test.Benchmark):
59
- """Benchmark adapt."""
60
-
61
- def run_numpy_implementation(self, num_elements, batch_size, k):
62
- """Test the python implementation."""
63
- ds = tf.data.Dataset.from_generator(
64
- word_gen, tf.string, tf.TensorShape([])
65
- )
66
- batched_ds = ds.take(num_elements).batch(batch_size)
67
- input_t = keras.Input(shape=(), dtype=tf.string)
68
- layer = index_lookup.IndexLookup(
69
- max_tokens=k,
70
- num_oov_indices=0,
71
- mask_token=None,
72
- oov_token="OOV",
73
- dtype=tf.string,
74
- )
75
- _ = layer(input_t)
76
- num_repeats = 5
77
- starts = []
78
- ends = []
79
- for _ in range(num_repeats):
80
- starts.append(time.time())
81
- vocab = get_top_k(batched_ds, k)
82
- layer.set_vocabulary(vocab)
83
- ends.append(time.time())
84
- avg_time = np.mean(np.array(ends) - np.array(starts))
85
- return avg_time
86
-
87
- def bm_adapt_implementation(self, num_elements, batch_size, k):
88
- """Test the KPL adapt implementation."""
89
- ds = tf.data.Dataset.from_generator(
90
- word_gen, tf.string, tf.TensorShape([])
91
- )
92
- batched_ds = ds.take(num_elements).batch(batch_size)
93
- input_t = keras.Input(shape=(), dtype=tf.string)
94
- layer = index_lookup.IndexLookup(
95
- max_tokens=k,
96
- num_oov_indices=0,
97
- mask_token=None,
98
- oov_token="OOV",
99
- dtype=tf.string,
100
- )
101
- _ = layer(input_t)
102
- num_repeats = 5
103
- starts = []
104
- ends = []
105
- for _ in range(num_repeats):
106
- starts.append(time.time())
107
- layer.adapt(batched_ds)
108
- ends.append(time.time())
109
- avg_time = np.mean(np.array(ends) - np.array(starts))
110
- name = "index_lookup_adapt|%s_elements|vocab_size_%s|batch_%s" % (
111
- num_elements,
112
- k,
113
- batch_size,
114
- )
115
- baseline = self.run_numpy_implementation(num_elements, batch_size, k)
116
- extras = {
117
- "numpy implementation baseline": baseline,
118
- "delta seconds": (baseline - avg_time),
119
- "delta percent": ((baseline - avg_time) / baseline) * 100,
120
- }
121
- self.report_benchmark(
122
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
123
- )
124
-
125
- def benchmark_vocab_size_by_batch(self):
126
- for vocab_size in [100, 1000, 10000, 100000, 1000000]:
127
- for batch in [1, 16, 2048]:
128
- self.bm_adapt_implementation(
129
- vocab_size, batch, int(vocab_size / 10)
130
- )
131
-
132
-
133
- if __name__ == "__main__":
134
- tf.test.main()
135
-