tf-keras-nightly 2.19.0.dev2024121210__py3-none-any.whl → 2.21.0.dev2025123010__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tf_keras/__init__.py +1 -1
  2. tf_keras/protobuf/projector_config_pb2.py +23 -12
  3. tf_keras/protobuf/saved_metadata_pb2.py +21 -10
  4. tf_keras/protobuf/versions_pb2.py +19 -8
  5. tf_keras/src/__init__.py +1 -1
  6. tf_keras/src/backend.py +1 -1
  7. tf_keras/src/datasets/boston_housing.py +14 -5
  8. tf_keras/src/datasets/cifar10.py +9 -1
  9. tf_keras/src/datasets/cifar100.py +7 -1
  10. tf_keras/src/datasets/fashion_mnist.py +16 -4
  11. tf_keras/src/datasets/imdb.py +8 -0
  12. tf_keras/src/datasets/mnist.py +9 -3
  13. tf_keras/src/datasets/reuters.py +8 -0
  14. tf_keras/src/engine/base_layer.py +235 -97
  15. tf_keras/src/engine/base_layer_utils.py +17 -5
  16. tf_keras/src/engine/base_layer_v1.py +12 -3
  17. tf_keras/src/engine/data_adapter.py +35 -19
  18. tf_keras/src/engine/functional.py +36 -15
  19. tf_keras/src/engine/input_layer.py +9 -0
  20. tf_keras/src/engine/input_spec.py +11 -1
  21. tf_keras/src/engine/sequential.py +29 -12
  22. tf_keras/src/layers/activation/softmax.py +26 -11
  23. tf_keras/src/layers/attention/multi_head_attention.py +8 -1
  24. tf_keras/src/layers/core/tf_op_layer.py +4 -0
  25. tf_keras/src/layers/normalization/spectral_normalization.py +29 -22
  26. tf_keras/src/layers/rnn/cell_wrappers.py +13 -1
  27. tf_keras/src/metrics/confusion_metrics.py +51 -4
  28. tf_keras/src/models/sharpness_aware_minimization.py +17 -7
  29. tf_keras/src/preprocessing/sequence.py +2 -2
  30. tf_keras/src/saving/legacy/saved_model/save_impl.py +28 -12
  31. tf_keras/src/saving/legacy/saving_utils.py +14 -2
  32. tf_keras/src/saving/saving_api.py +18 -5
  33. tf_keras/src/saving/saving_lib.py +1 -1
  34. tf_keras/src/utils/layer_utils.py +45 -3
  35. tf_keras/src/utils/metrics_utils.py +4 -1
  36. tf_keras/src/utils/tf_utils.py +2 -2
  37. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/METADATA +14 -3
  38. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/RECORD +40 -62
  39. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/WHEEL +1 -1
  40. tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
  41. tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
  42. tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
  43. tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
  44. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
  45. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
  46. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
  47. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
  48. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
  49. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
  50. tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
  51. tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
  52. tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
  53. tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
  54. tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
  55. tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
  56. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
  57. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
  58. tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
  59. tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
  60. tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
  61. tf_keras/src/tests/keras_doctest.py +0 -159
  62. {tf_keras_nightly-2.19.0.dev2024121210.dist-info → tf_keras_nightly-2.21.0.dev2025123010.dist-info}/top_level.txt +0 -0
@@ -1,110 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from files with dense
16
- inputs."""
17
-
18
- import os
19
-
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import string_lookup
24
- from tf_keras.src.layers.preprocessing.benchmarks import (
25
- feature_column_benchmark as fc_bm,
26
- )
27
-
28
- # isort: off
29
- from tensorflow.python.eager.def_function import (
30
- function as tf_function,
31
- )
32
-
33
- NUM_REPEATS = 10
34
- BATCH_SIZES = [32, 256]
35
-
36
-
37
- class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
38
- """Benchmark the layer forward pass."""
39
-
40
- def _write_to_temp_file(self, file_name, vocab_list):
41
- vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
42
- with tf.io.gfile.GFile(vocab_path, "w") as writer:
43
- for vocab in vocab_list:
44
- writer.write(vocab + "\n")
45
- writer.flush()
46
- writer.close()
47
- return vocab_path
48
-
49
- def embedding_varlen(self, batch_size, max_length):
50
- """Benchmark a variable-length embedding."""
51
- # Data and constants.
52
- vocab = fc_bm.create_vocabulary(32768)
53
-
54
- path = self._write_to_temp_file("tmp", vocab)
55
-
56
- data = fc_bm.create_string_data(
57
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
58
- )
59
-
60
- # TF-Keras implementation
61
- model = keras.Sequential()
62
- model.add(
63
- keras.Input(shape=(max_length,), name="data", dtype=tf.string)
64
- )
65
- model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
66
-
67
- # FC implementation
68
- fc = tf.feature_column.categorical_column_with_vocabulary_list(
69
- key="data", vocabulary_list=vocab, num_oov_buckets=1
70
- )
71
-
72
- # Wrap the FC implementation in a tf.function for a fair comparison
73
- @tf_function()
74
- def fc_fn(tensors):
75
- fc.transform_feature(
76
- tf.__internal__.feature_column.FeatureTransformationCache(
77
- tensors
78
- ),
79
- None,
80
- )
81
-
82
- # Benchmark runs
83
- keras_data = {
84
- "data": data.to_tensor(
85
- default_value="", shape=(batch_size, max_length)
86
- )
87
- }
88
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
89
-
90
- fc_data = {
91
- "data": data.to_tensor(
92
- default_value="", shape=(batch_size, max_length)
93
- )
94
- }
95
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
96
-
97
- return k_avg_time, fc_avg_time
98
-
99
- def benchmark_layer(self):
100
- for batch in BATCH_SIZES:
101
- name = f"vocab_list|dense|batch_{batch}"
102
- k_time, f_time = self.embedding_varlen(
103
- batch_size=batch, max_length=256
104
- )
105
- self.report(name, k_time, f_time, NUM_REPEATS)
106
-
107
-
108
- if __name__ == "__main__":
109
- tf.test.main()
110
-
@@ -1,103 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from files with
16
- varying-length inputs."""
17
-
18
- import os
19
-
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import string_lookup
24
- from tf_keras.src.layers.preprocessing.benchmarks import (
25
- feature_column_benchmark as fc_bm,
26
- )
27
-
28
- # isort: off
29
- from tensorflow.python.eager.def_function import (
30
- function as tf_function,
31
- )
32
-
33
- NUM_REPEATS = 10
34
- BATCH_SIZES = [32, 256]
35
-
36
-
37
- class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
38
- """Benchmark the layer forward pass."""
39
-
40
- def _write_to_temp_file(self, file_name, vocab_list):
41
- vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
42
- with tf.io.gfile.GFile(vocab_path, "w") as writer:
43
- for vocab in vocab_list:
44
- writer.write(vocab + "\n")
45
- writer.flush()
46
- writer.close()
47
- return vocab_path
48
-
49
- def embedding_varlen(self, batch_size, max_length):
50
- """Benchmark a variable-length embedding."""
51
- # Data and constants.
52
- vocab = fc_bm.create_vocabulary(32768)
53
- path = self._write_to_temp_file("tmp", vocab)
54
-
55
- data = fc_bm.create_string_data(
56
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
57
- )
58
-
59
- # TF-Keras implementation
60
- model = keras.Sequential()
61
- model.add(
62
- keras.Input(
63
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
64
- )
65
- )
66
- model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
67
-
68
- # FC implementation
69
- fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
70
- key="data", vocabulary_list=vocab, num_oov_buckets=1
71
- )
72
-
73
- # Wrap the FC implementation in a tf.function for a fair comparison
74
- @tf_function()
75
- def fc_fn(tensors):
76
- fc.transform_feature(
77
- tf.__internal__.feature_column.FeatureTransformationCache(
78
- tensors
79
- ),
80
- None,
81
- )
82
-
83
- # Benchmark runs
84
- keras_data = {"data": data}
85
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
86
-
87
- fc_data = {"data": data.to_sparse()}
88
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
89
-
90
- return k_avg_time, fc_avg_time
91
-
92
- def benchmark_layer(self):
93
- for batch in BATCH_SIZES:
94
- name = f"vocab_list|varlen|batch_{batch}"
95
- k_time, f_time = self.embedding_varlen(
96
- batch_size=batch, max_length=256
97
- )
98
- self.report(name, k_time, f_time, NUM_REPEATS)
99
-
100
-
101
- if __name__ == "__main__":
102
- tf.test.main()
103
-
@@ -1,87 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from lists with dense
16
- inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import string_lookup
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- vocab = fc_bm.create_vocabulary(32768)
39
- data = fc_bm.create_string_data(
40
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
46
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
47
-
48
- # FC implementation
49
- fc = tf.feature_column.categorical_column_with_vocabulary_list(
50
- key="data", vocabulary_list=vocab, num_oov_buckets=1
51
- )
52
-
53
- # Wrap the FC implementation in a tf.function for a fair comparison
54
- @tf_function()
55
- def fc_fn(tensors):
56
- fc.transform_feature(
57
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
58
- None,
59
- )
60
-
61
- # Benchmark runs
62
- keras_data = {
63
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
64
- }
65
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
66
-
67
- fc_data = {
68
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
69
- }
70
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
71
-
72
- return k_avg_time, fc_avg_time
73
-
74
-
75
- class BenchmarkLayer(fc_bm.LayerBenchmark):
76
- """Benchmark the layer forward pass."""
77
-
78
- def benchmark_layer(self):
79
- for batch in BATCH_SIZES:
80
- name = f"vocab_list|dense|batch_{batch}"
81
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
82
- self.report(name, k_time, f_time, NUM_REPEATS)
83
-
84
-
85
- if __name__ == "__main__":
86
- tf.test.main()
87
-
@@ -1,96 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns + indicator from lists
16
- with dense inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import category_encoding
22
- from tf_keras.src.layers.preprocessing import string_lookup
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size, max_length):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- vocab_size = 32768
40
- vocab = fc_bm.create_vocabulary(vocab_size)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
48
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
49
- model.add(
50
- category_encoding.CategoryEncoding(
51
- num_tokens=vocab_size + 1, output_mode="count"
52
- )
53
- )
54
-
55
- # FC implementation
56
- fc = tf.feature_column.indicator_column(
57
- tf.feature_column.categorical_column_with_vocabulary_list(
58
- key="data", vocabulary_list=vocab, num_oov_buckets=1
59
- )
60
- )
61
-
62
- # Wrap the FC implementation in a tf.function for a fair comparison
63
- @tf_function()
64
- def fc_fn(tensors):
65
- fc.transform_feature(
66
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
67
- None,
68
- )
69
-
70
- # Benchmark runs
71
- keras_data = {
72
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
73
- }
74
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
75
-
76
- fc_data = {
77
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
78
- }
79
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
80
-
81
- return k_avg_time, fc_avg_time
82
-
83
-
84
- class BenchmarkLayer(fc_bm.LayerBenchmark):
85
- """Benchmark the layer forward pass."""
86
-
87
- def benchmark_layer(self):
88
- for batch in BATCH_SIZES:
89
- name = f"vocab_list_indicator|dense|batch_{batch}"
90
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
91
- self.report(name, k_time, f_time, NUM_REPEATS)
92
-
93
-
94
- if __name__ == "__main__":
95
- tf.test.main()
96
-
@@ -1,96 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns + indicator from lists
16
- with varying-length inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import category_encoding
22
- from tf_keras.src.layers.preprocessing import string_lookup
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size, max_length):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- vocab_size = 32768
40
- vocab = fc_bm.create_vocabulary(vocab_size)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(
48
- keras.Input(
49
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
50
- )
51
- )
52
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
53
- model.add(
54
- category_encoding.CategoryEncoding(
55
- num_tokens=vocab_size + 1, output_mode="count"
56
- )
57
- )
58
-
59
- # FC implementation
60
- fc = tf.feature_column.indicator_column(
61
- tf.feature_column.sequence_categorical_column_with_vocabulary_list(
62
- key="data", vocabulary_list=vocab, num_oov_buckets=1
63
- )
64
- )
65
-
66
- # Wrap the FC implementation in a tf.function for a fair comparison
67
- @tf_function()
68
- def fc_fn(tensors):
69
- fc.transform_feature(
70
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
71
- None,
72
- )
73
-
74
- # Benchmark runs
75
- keras_data = {"data": data}
76
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
77
-
78
- fc_data = {"data": data.to_sparse()}
79
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
80
-
81
- return k_avg_time, fc_avg_time
82
-
83
-
84
- class BenchmarkLayer(fc_bm.LayerBenchmark):
85
- """Benchmark the layer forward pass."""
86
-
87
- def benchmark_layer(self):
88
- for batch in BATCH_SIZES:
89
- name = f"vocab_list_indicator|varlen|batch_{batch}"
90
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
91
- self.report(name, k_time, f_time, NUM_REPEATS)
92
-
93
-
94
- if __name__ == "__main__":
95
- tf.test.main()
96
-
@@ -1,87 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from lists with
16
- varying-length inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import string_lookup
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- vocab = fc_bm.create_vocabulary(32768)
39
- data = fc_bm.create_string_data(
40
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(
46
- keras.Input(
47
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
48
- )
49
- )
50
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
51
-
52
- # FC implementation
53
- fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
54
- key="data", vocabulary_list=vocab, num_oov_buckets=1
55
- )
56
-
57
- # Wrap the FC implementation in a tf.function for a fair comparison
58
- @tf_function()
59
- def fc_fn(tensors):
60
- fc.transform_feature(
61
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
62
- None,
63
- )
64
-
65
- # Benchmark runs
66
- keras_data = {"data": data}
67
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
68
-
69
- fc_data = {"data": data.to_sparse()}
70
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
71
-
72
- return k_avg_time, fc_avg_time
73
-
74
-
75
- class BenchmarkLayer(fc_bm.LayerBenchmark):
76
- """Benchmark the layer forward pass."""
77
-
78
- def benchmark_layer(self):
79
- for batch in BATCH_SIZES:
80
- name = f"vocab_list|varlen|batch_{batch}"
81
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
82
- self.report(name, k_time, f_time, NUM_REPEATS)
83
-
84
-
85
- if __name__ == "__main__":
86
- tf.test.main()
87
-
@@ -1,109 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras discretization preprocessing layer's adapt method."""
16
-
17
- import time
18
-
19
- import numpy as np
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import discretization
24
-
25
- EPSILON = 0.1
26
-
27
-
28
- def reduce_fn(state, values, epsilon=EPSILON):
29
- """tf.data.Dataset-friendly implementation of mean and variance."""
30
-
31
- (state_,) = state
32
- summary = discretization.summarize(values, epsilon)
33
- if np.sum(state_[:, 0]) == 0:
34
- return (summary,)
35
- return (discretization.merge_summaries(state_, summary, epsilon),)
36
-
37
-
38
- class BenchmarkAdapt(tf.test.Benchmark):
39
- """Benchmark adapt."""
40
-
41
- def run_dataset_implementation(self, num_elements, batch_size):
42
- input_t = keras.Input(shape=(1,))
43
- layer = discretization.Discretization()
44
- _ = layer(input_t)
45
-
46
- num_repeats = 5
47
- starts = []
48
- ends = []
49
- for _ in range(num_repeats):
50
- ds = tf.data.Dataset.range(num_elements)
51
- ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
52
- ds = ds.batch(batch_size)
53
-
54
- starts.append(time.time())
55
- # Benchmarked code begins here.
56
- state = ds.reduce((np.zeros((1, 2)),), reduce_fn)
57
-
58
- bins = discretization.get_bucket_boundaries(state, 100)
59
- layer.set_weights([bins])
60
- # Benchmarked code ends here.
61
- ends.append(time.time())
62
-
63
- avg_time = np.mean(np.array(ends) - np.array(starts))
64
- return avg_time
65
-
66
- def bm_adapt_implementation(self, num_elements, batch_size):
67
- """Test the KPL adapt implementation."""
68
- input_t = keras.Input(shape=(1,), dtype=tf.float32)
69
- layer = discretization.Discretization()
70
- _ = layer(input_t)
71
-
72
- num_repeats = 5
73
- starts = []
74
- ends = []
75
- for _ in range(num_repeats):
76
- ds = tf.data.Dataset.range(num_elements)
77
- ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
78
- ds = ds.batch(batch_size)
79
-
80
- starts.append(time.time())
81
- # Benchmarked code begins here.
82
- layer.adapt(ds)
83
- # Benchmarked code ends here.
84
- ends.append(time.time())
85
-
86
- avg_time = np.mean(np.array(ends) - np.array(starts))
87
- name = "discretization_adapt|%s_elements|batch_%s" % (
88
- num_elements,
89
- batch_size,
90
- )
91
- baseline = self.run_dataset_implementation(num_elements, batch_size)
92
- extras = {
93
- "tf.data implementation baseline": baseline,
94
- "delta seconds": (baseline - avg_time),
95
- "delta percent": ((baseline - avg_time) / baseline) * 100,
96
- }
97
- self.report_benchmark(
98
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
99
- )
100
-
101
- def benchmark_vocab_size_by_batch(self):
102
- for vocab_size in [100, 1000, 10000, 100000, 1000000]:
103
- for batch in [64 * 2048]:
104
- self.bm_adapt_implementation(vocab_size, batch)
105
-
106
-
107
- if __name__ == "__main__":
108
- tf.test.main()
109
-