tf-keras-nightly 2.20.0.dev2025062209__py3-none-any.whl → 2.20.0.dev2025082909__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. tf_keras/__init__.py +1 -1
  2. tf_keras/protobuf/projector_config_pb2.py +23 -12
  3. tf_keras/protobuf/saved_metadata_pb2.py +21 -10
  4. tf_keras/protobuf/versions_pb2.py +19 -8
  5. tf_keras/src/engine/base_layer.py +11 -0
  6. tf_keras/src/metrics/confusion_metrics.py +47 -1
  7. tf_keras/src/models/sharpness_aware_minimization.py +17 -7
  8. tf_keras/src/utils/metrics_utils.py +4 -1
  9. {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/METADATA +1 -1
  10. {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/RECORD +12 -34
  11. tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
  12. tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
  13. tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
  14. tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
  15. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
  16. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
  17. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
  18. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
  19. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
  20. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
  21. tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
  22. tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
  23. tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
  24. tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
  25. tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
  26. tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
  27. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
  28. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
  29. tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
  30. tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
  31. tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
  32. tf_keras/src/tests/keras_doctest.py +0 -159
  33. {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/WHEEL +0 -0
  34. {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/top_level.txt +0 -0
@@ -1,89 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of categorical hash columns with dense
16
- inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import hashing
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
-
39
- num_buckets = 10000
40
- vocab = fc_bm.create_vocabulary(32768)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
48
- model.add(hashing.Hashing(num_buckets))
49
-
50
- # FC implementation
51
- fc = tf.feature_column.sequence_categorical_column_with_hash_bucket(
52
- "data", num_buckets
53
- )
54
-
55
- # Wrap the FC implementation in a tf.function for a fair comparison
56
- @tf_function()
57
- def fc_fn(tensors):
58
- fc.transform_feature(
59
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
60
- None,
61
- )
62
-
63
- # Benchmark runs
64
- keras_data = {
65
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
66
- }
67
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
68
-
69
- fc_data = {
70
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
71
- }
72
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
73
-
74
- return k_avg_time, fc_avg_time
75
-
76
-
77
- class BenchmarkLayer(fc_bm.LayerBenchmark):
78
- """Benchmark the layer forward pass."""
79
-
80
- def benchmark_layer(self):
81
- for batch in BATCH_SIZES:
82
- name = f"hash|dense|batch_{batch}"
83
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
84
- self.report(name, k_time, f_time, NUM_REPEATS)
85
-
86
-
87
- if __name__ == "__main__":
88
- tf.test.main()
89
-
@@ -1,89 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of categorical hash columns with
16
- varying-length inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import hashing
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
-
39
- num_buckets = 10000
40
- vocab = fc_bm.create_vocabulary(32768)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(
48
- keras.Input(
49
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
50
- )
51
- )
52
- model.add(hashing.Hashing(num_buckets))
53
-
54
- # FC implementation
55
- fc = tf.feature_column.categorical_column_with_hash_bucket(
56
- "data", num_buckets
57
- )
58
-
59
- # Wrap the FC implementation in a tf.function for a fair comparison
60
- @tf_function()
61
- def fc_fn(tensors):
62
- fc.transform_feature(
63
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
64
- None,
65
- )
66
-
67
- # Benchmark runs
68
- keras_data = {"data": data}
69
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
70
-
71
- fc_data = {"data": data.to_sparse()}
72
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
73
-
74
- return k_avg_time, fc_avg_time
75
-
76
-
77
- class BenchmarkLayer(fc_bm.LayerBenchmark):
78
- """Benchmark the layer forward pass."""
79
-
80
- def benchmark_layer(self):
81
- for batch in BATCH_SIZES:
82
- name = f"hash|varlen|batch_{batch}"
83
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
84
- self.report(name, k_time, f_time, NUM_REPEATS)
85
-
86
-
87
- if __name__ == "__main__":
88
- tf.test.main()
89
-
@@ -1,110 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from files with dense
16
- inputs."""
17
-
18
- import os
19
-
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import string_lookup
24
- from tf_keras.src.layers.preprocessing.benchmarks import (
25
- feature_column_benchmark as fc_bm,
26
- )
27
-
28
- # isort: off
29
- from tensorflow.python.eager.def_function import (
30
- function as tf_function,
31
- )
32
-
33
- NUM_REPEATS = 10
34
- BATCH_SIZES = [32, 256]
35
-
36
-
37
- class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
38
- """Benchmark the layer forward pass."""
39
-
40
- def _write_to_temp_file(self, file_name, vocab_list):
41
- vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
42
- with tf.io.gfile.GFile(vocab_path, "w") as writer:
43
- for vocab in vocab_list:
44
- writer.write(vocab + "\n")
45
- writer.flush()
46
- writer.close()
47
- return vocab_path
48
-
49
- def embedding_varlen(self, batch_size, max_length):
50
- """Benchmark a variable-length embedding."""
51
- # Data and constants.
52
- vocab = fc_bm.create_vocabulary(32768)
53
-
54
- path = self._write_to_temp_file("tmp", vocab)
55
-
56
- data = fc_bm.create_string_data(
57
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
58
- )
59
-
60
- # TF-Keras implementation
61
- model = keras.Sequential()
62
- model.add(
63
- keras.Input(shape=(max_length,), name="data", dtype=tf.string)
64
- )
65
- model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
66
-
67
- # FC implementation
68
- fc = tf.feature_column.categorical_column_with_vocabulary_list(
69
- key="data", vocabulary_list=vocab, num_oov_buckets=1
70
- )
71
-
72
- # Wrap the FC implementation in a tf.function for a fair comparison
73
- @tf_function()
74
- def fc_fn(tensors):
75
- fc.transform_feature(
76
- tf.__internal__.feature_column.FeatureTransformationCache(
77
- tensors
78
- ),
79
- None,
80
- )
81
-
82
- # Benchmark runs
83
- keras_data = {
84
- "data": data.to_tensor(
85
- default_value="", shape=(batch_size, max_length)
86
- )
87
- }
88
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
89
-
90
- fc_data = {
91
- "data": data.to_tensor(
92
- default_value="", shape=(batch_size, max_length)
93
- )
94
- }
95
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
96
-
97
- return k_avg_time, fc_avg_time
98
-
99
- def benchmark_layer(self):
100
- for batch in BATCH_SIZES:
101
- name = f"vocab_list|dense|batch_{batch}"
102
- k_time, f_time = self.embedding_varlen(
103
- batch_size=batch, max_length=256
104
- )
105
- self.report(name, k_time, f_time, NUM_REPEATS)
106
-
107
-
108
- if __name__ == "__main__":
109
- tf.test.main()
110
-
@@ -1,103 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from files with
16
- varying-length inputs."""
17
-
18
- import os
19
-
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import string_lookup
24
- from tf_keras.src.layers.preprocessing.benchmarks import (
25
- feature_column_benchmark as fc_bm,
26
- )
27
-
28
- # isort: off
29
- from tensorflow.python.eager.def_function import (
30
- function as tf_function,
31
- )
32
-
33
- NUM_REPEATS = 10
34
- BATCH_SIZES = [32, 256]
35
-
36
-
37
- class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
38
- """Benchmark the layer forward pass."""
39
-
40
- def _write_to_temp_file(self, file_name, vocab_list):
41
- vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
42
- with tf.io.gfile.GFile(vocab_path, "w") as writer:
43
- for vocab in vocab_list:
44
- writer.write(vocab + "\n")
45
- writer.flush()
46
- writer.close()
47
- return vocab_path
48
-
49
- def embedding_varlen(self, batch_size, max_length):
50
- """Benchmark a variable-length embedding."""
51
- # Data and constants.
52
- vocab = fc_bm.create_vocabulary(32768)
53
- path = self._write_to_temp_file("tmp", vocab)
54
-
55
- data = fc_bm.create_string_data(
56
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
57
- )
58
-
59
- # TF-Keras implementation
60
- model = keras.Sequential()
61
- model.add(
62
- keras.Input(
63
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
64
- )
65
- )
66
- model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
67
-
68
- # FC implementation
69
- fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
70
- key="data", vocabulary_list=vocab, num_oov_buckets=1
71
- )
72
-
73
- # Wrap the FC implementation in a tf.function for a fair comparison
74
- @tf_function()
75
- def fc_fn(tensors):
76
- fc.transform_feature(
77
- tf.__internal__.feature_column.FeatureTransformationCache(
78
- tensors
79
- ),
80
- None,
81
- )
82
-
83
- # Benchmark runs
84
- keras_data = {"data": data}
85
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
86
-
87
- fc_data = {"data": data.to_sparse()}
88
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
89
-
90
- return k_avg_time, fc_avg_time
91
-
92
- def benchmark_layer(self):
93
- for batch in BATCH_SIZES:
94
- name = f"vocab_list|varlen|batch_{batch}"
95
- k_time, f_time = self.embedding_varlen(
96
- batch_size=batch, max_length=256
97
- )
98
- self.report(name, k_time, f_time, NUM_REPEATS)
99
-
100
-
101
- if __name__ == "__main__":
102
- tf.test.main()
103
-
@@ -1,87 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from lists with dense
16
- inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import string_lookup
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- vocab = fc_bm.create_vocabulary(32768)
39
- data = fc_bm.create_string_data(
40
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
46
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
47
-
48
- # FC implementation
49
- fc = tf.feature_column.categorical_column_with_vocabulary_list(
50
- key="data", vocabulary_list=vocab, num_oov_buckets=1
51
- )
52
-
53
- # Wrap the FC implementation in a tf.function for a fair comparison
54
- @tf_function()
55
- def fc_fn(tensors):
56
- fc.transform_feature(
57
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
58
- None,
59
- )
60
-
61
- # Benchmark runs
62
- keras_data = {
63
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
64
- }
65
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
66
-
67
- fc_data = {
68
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
69
- }
70
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
71
-
72
- return k_avg_time, fc_avg_time
73
-
74
-
75
- class BenchmarkLayer(fc_bm.LayerBenchmark):
76
- """Benchmark the layer forward pass."""
77
-
78
- def benchmark_layer(self):
79
- for batch in BATCH_SIZES:
80
- name = f"vocab_list|dense|batch_{batch}"
81
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
82
- self.report(name, k_time, f_time, NUM_REPEATS)
83
-
84
-
85
- if __name__ == "__main__":
86
- tf.test.main()
87
-
@@ -1,96 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns + indicator from lists
16
- with dense inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import category_encoding
22
- from tf_keras.src.layers.preprocessing import string_lookup
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size, max_length):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- vocab_size = 32768
40
- vocab = fc_bm.create_vocabulary(vocab_size)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
48
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
49
- model.add(
50
- category_encoding.CategoryEncoding(
51
- num_tokens=vocab_size + 1, output_mode="count"
52
- )
53
- )
54
-
55
- # FC implementation
56
- fc = tf.feature_column.indicator_column(
57
- tf.feature_column.categorical_column_with_vocabulary_list(
58
- key="data", vocabulary_list=vocab, num_oov_buckets=1
59
- )
60
- )
61
-
62
- # Wrap the FC implementation in a tf.function for a fair comparison
63
- @tf_function()
64
- def fc_fn(tensors):
65
- fc.transform_feature(
66
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
67
- None,
68
- )
69
-
70
- # Benchmark runs
71
- keras_data = {
72
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
73
- }
74
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
75
-
76
- fc_data = {
77
- "data": data.to_tensor(default_value="", shape=(batch_size, max_length))
78
- }
79
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
80
-
81
- return k_avg_time, fc_avg_time
82
-
83
-
84
- class BenchmarkLayer(fc_bm.LayerBenchmark):
85
- """Benchmark the layer forward pass."""
86
-
87
- def benchmark_layer(self):
88
- for batch in BATCH_SIZES:
89
- name = f"vocab_list_indicator|dense|batch_{batch}"
90
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
91
- self.report(name, k_time, f_time, NUM_REPEATS)
92
-
93
-
94
- if __name__ == "__main__":
95
- tf.test.main()
96
-
@@ -1,96 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns + indicator from lists
16
- with varying-length inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import category_encoding
22
- from tf_keras.src.layers.preprocessing import string_lookup
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size, max_length):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- vocab_size = 32768
40
- vocab = fc_bm.create_vocabulary(vocab_size)
41
- data = fc_bm.create_string_data(
42
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
43
- )
44
-
45
- # TF-Keras implementation
46
- model = keras.Sequential()
47
- model.add(
48
- keras.Input(
49
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
50
- )
51
- )
52
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
53
- model.add(
54
- category_encoding.CategoryEncoding(
55
- num_tokens=vocab_size + 1, output_mode="count"
56
- )
57
- )
58
-
59
- # FC implementation
60
- fc = tf.feature_column.indicator_column(
61
- tf.feature_column.sequence_categorical_column_with_vocabulary_list(
62
- key="data", vocabulary_list=vocab, num_oov_buckets=1
63
- )
64
- )
65
-
66
- # Wrap the FC implementation in a tf.function for a fair comparison
67
- @tf_function()
68
- def fc_fn(tensors):
69
- fc.transform_feature(
70
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
71
- None,
72
- )
73
-
74
- # Benchmark runs
75
- keras_data = {"data": data}
76
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
77
-
78
- fc_data = {"data": data.to_sparse()}
79
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
80
-
81
- return k_avg_time, fc_avg_time
82
-
83
-
84
- class BenchmarkLayer(fc_bm.LayerBenchmark):
85
- """Benchmark the layer forward pass."""
86
-
87
- def benchmark_layer(self):
88
- for batch in BATCH_SIZES:
89
- name = f"vocab_list_indicator|varlen|batch_{batch}"
90
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
91
- self.report(name, k_time, f_time, NUM_REPEATS)
92
-
93
-
94
- if __name__ == "__main__":
95
- tf.test.main()
96
-