tf-keras-nightly 2.20.0.dev2025062109__py3-none-any.whl → 2.20.0.dev2025082818__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of tf-keras-nightly might be problematic. Click here for more details.

Files changed (33) hide show
  1. tf_keras/__init__.py +1 -1
  2. tf_keras/protobuf/projector_config_pb2.py +23 -12
  3. tf_keras/protobuf/saved_metadata_pb2.py +21 -10
  4. tf_keras/protobuf/versions_pb2.py +19 -8
  5. tf_keras/src/metrics/confusion_metrics.py +47 -1
  6. tf_keras/src/models/sharpness_aware_minimization.py +17 -7
  7. tf_keras/src/utils/metrics_utils.py +4 -1
  8. {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/METADATA +1 -1
  9. {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/RECORD +11 -33
  10. tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
  11. tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
  12. tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
  13. tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
  14. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
  15. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
  16. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
  17. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
  18. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
  19. tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
  20. tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
  21. tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
  22. tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
  23. tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
  24. tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
  25. tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
  26. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
  27. tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
  28. tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
  29. tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
  30. tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
  31. tf_keras/src/tests/keras_doctest.py +0 -159
  32. {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/WHEEL +0 -0
  33. {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/top_level.txt +0 -0
@@ -1,87 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of vocabulary columns from lists with
16
- varying-length inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing import string_lookup
22
- from tf_keras.src.layers.preprocessing.benchmarks import (
23
- feature_column_benchmark as fc_bm,
24
- )
25
-
26
- # isort: off
27
- from tensorflow.python.eager.def_function import (
28
- function as tf_function,
29
- )
30
-
31
- NUM_REPEATS = 10
32
- BATCH_SIZES = [32, 256]
33
-
34
-
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- vocab = fc_bm.create_vocabulary(32768)
39
- data = fc_bm.create_string_data(
40
- max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(
46
- keras.Input(
47
- shape=(max_length,), name="data", ragged=True, dtype=tf.string
48
- )
49
- )
50
- model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
51
-
52
- # FC implementation
53
- fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
54
- key="data", vocabulary_list=vocab, num_oov_buckets=1
55
- )
56
-
57
- # Wrap the FC implementation in a tf.function for a fair comparison
58
- @tf_function()
59
- def fc_fn(tensors):
60
- fc.transform_feature(
61
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
62
- None,
63
- )
64
-
65
- # Benchmark runs
66
- keras_data = {"data": data}
67
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
68
-
69
- fc_data = {"data": data.to_sparse()}
70
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
71
-
72
- return k_avg_time, fc_avg_time
73
-
74
-
75
- class BenchmarkLayer(fc_bm.LayerBenchmark):
76
- """Benchmark the layer forward pass."""
77
-
78
- def benchmark_layer(self):
79
- for batch in BATCH_SIZES:
80
- name = f"vocab_list|varlen|batch_{batch}"
81
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
82
- self.report(name, k_time, f_time, NUM_REPEATS)
83
-
84
-
85
- if __name__ == "__main__":
86
- tf.test.main()
87
-
@@ -1,109 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras discretization preprocessing layer's adapt method."""
16
-
17
- import time
18
-
19
- import numpy as np
20
- import tensorflow.compat.v2 as tf
21
-
22
- import tf_keras.src as keras
23
- from tf_keras.src.layers.preprocessing import discretization
24
-
25
- EPSILON = 0.1
26
-
27
-
28
- def reduce_fn(state, values, epsilon=EPSILON):
29
- """tf.data.Dataset-friendly implementation of mean and variance."""
30
-
31
- (state_,) = state
32
- summary = discretization.summarize(values, epsilon)
33
- if np.sum(state_[:, 0]) == 0:
34
- return (summary,)
35
- return (discretization.merge_summaries(state_, summary, epsilon),)
36
-
37
-
38
- class BenchmarkAdapt(tf.test.Benchmark):
39
- """Benchmark adapt."""
40
-
41
- def run_dataset_implementation(self, num_elements, batch_size):
42
- input_t = keras.Input(shape=(1,))
43
- layer = discretization.Discretization()
44
- _ = layer(input_t)
45
-
46
- num_repeats = 5
47
- starts = []
48
- ends = []
49
- for _ in range(num_repeats):
50
- ds = tf.data.Dataset.range(num_elements)
51
- ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
52
- ds = ds.batch(batch_size)
53
-
54
- starts.append(time.time())
55
- # Benchmarked code begins here.
56
- state = ds.reduce((np.zeros((1, 2)),), reduce_fn)
57
-
58
- bins = discretization.get_bucket_boundaries(state, 100)
59
- layer.set_weights([bins])
60
- # Benchmarked code ends here.
61
- ends.append(time.time())
62
-
63
- avg_time = np.mean(np.array(ends) - np.array(starts))
64
- return avg_time
65
-
66
- def bm_adapt_implementation(self, num_elements, batch_size):
67
- """Test the KPL adapt implementation."""
68
- input_t = keras.Input(shape=(1,), dtype=tf.float32)
69
- layer = discretization.Discretization()
70
- _ = layer(input_t)
71
-
72
- num_repeats = 5
73
- starts = []
74
- ends = []
75
- for _ in range(num_repeats):
76
- ds = tf.data.Dataset.range(num_elements)
77
- ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
78
- ds = ds.batch(batch_size)
79
-
80
- starts.append(time.time())
81
- # Benchmarked code begins here.
82
- layer.adapt(ds)
83
- # Benchmarked code ends here.
84
- ends.append(time.time())
85
-
86
- avg_time = np.mean(np.array(ends) - np.array(starts))
87
- name = "discretization_adapt|%s_elements|batch_%s" % (
88
- num_elements,
89
- batch_size,
90
- )
91
- baseline = self.run_dataset_implementation(num_elements, batch_size)
92
- extras = {
93
- "tf.data implementation baseline": baseline,
94
- "delta seconds": (baseline - avg_time),
95
- "delta percent": ((baseline - avg_time) / baseline) * 100,
96
- }
97
- self.report_benchmark(
98
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
99
- )
100
-
101
- def benchmark_vocab_size_by_batch(self):
102
- for vocab_size in [100, 1000, 10000, 100000, 1000000]:
103
- for batch in [64 * 2048]:
104
- self.bm_adapt_implementation(vocab_size, batch)
105
-
106
-
107
- if __name__ == "__main__":
108
- tf.test.main()
109
-
@@ -1,86 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of embedding column with dense inputs."""
16
-
17
- import tensorflow.compat.v2 as tf
18
-
19
- import tf_keras.src as keras
20
- from tf_keras.src.layers.preprocessing.benchmarks import (
21
- feature_column_benchmark as fc_bm,
22
- )
23
-
24
- # isort: off
25
- from tensorflow.python.eager.def_function import (
26
- function as tf_function,
27
- )
28
-
29
- NUM_REPEATS = 10
30
- BATCH_SIZES = [32, 256]
31
-
32
-
33
- ### KPL AND FC IMPLEMENTATION BENCHMARKS ###
34
- def embedding_varlen(batch_size, max_length):
35
- """Benchmark a variable-length embedding."""
36
- # Data and constants.
37
- embedding_size = 32768
38
- data = fc_bm.create_data(
39
- max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
40
- )
41
-
42
- # TF-Keras implementation
43
- model = keras.Sequential()
44
- model.add(keras.Input(shape=(None,), name="data", dtype=tf.int64))
45
- model.add(keras.layers.Embedding(embedding_size, 256))
46
- model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
47
-
48
- # FC implementation
49
- fc = tf.feature_column.embedding_column(
50
- tf.feature_column.categorical_column_with_identity(
51
- "data", num_buckets=embedding_size - 1
52
- ),
53
- dimension=256,
54
- )
55
-
56
- # Wrap the FC implementation in a tf.function for a fair comparison
57
- @tf_function()
58
- def fc_fn(tensors):
59
- fc.transform_feature(
60
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
61
- None,
62
- )
63
-
64
- # Benchmark runs
65
- keras_data = {"data": data.to_tensor(default_value=0)}
66
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
67
-
68
- fc_data = {"data": data.to_tensor(default_value=0)}
69
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
70
-
71
- return k_avg_time, fc_avg_time
72
-
73
-
74
- class BenchmarkLayer(fc_bm.LayerBenchmark):
75
- """Benchmark the layer forward pass."""
76
-
77
- def benchmark_layer(self):
78
- for batch in BATCH_SIZES:
79
- name = f"embedding|dense|batch_{batch}"
80
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
81
- self.report(name, k_time, f_time, NUM_REPEATS)
82
-
83
-
84
- if __name__ == "__main__":
85
- tf.test.main()
86
-
@@ -1,89 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of embedding column with varying-length
16
- inputs."""
17
-
18
- import tensorflow.compat.v2 as tf
19
-
20
- import tf_keras.src as keras
21
- from tf_keras.src.layers.preprocessing.benchmarks import (
22
- feature_column_benchmark as fc_bm,
23
- )
24
-
25
- # isort: off
26
- from tensorflow.python.eager.def_function import (
27
- function as tf_function,
28
- )
29
-
30
- NUM_REPEATS = 10
31
- BATCH_SIZES = [32, 256]
32
-
33
-
34
- ### KPL AND FC IMPLEMENTATION BENCHMARKS ###
35
- def embedding_varlen(batch_size, max_length):
36
- """Benchmark a variable-length embedding."""
37
- # Data and constants.
38
- embedding_size = 32768
39
- data = fc_bm.create_data(
40
- max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
41
- )
42
-
43
- # TF-Keras implementation
44
- model = keras.Sequential()
45
- model.add(
46
- keras.Input(shape=(None,), ragged=True, name="data", dtype=tf.int64)
47
- )
48
- model.add(keras.layers.Embedding(embedding_size, 256))
49
- model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
50
-
51
- # FC implementation
52
- fc = tf.feature_column.embedding_column(
53
- tf.feature_column.categorical_column_with_identity(
54
- "data", num_buckets=embedding_size - 1
55
- ),
56
- dimension=256,
57
- )
58
-
59
- # Wrap the FC implementation in a tf.function for a fair comparison
60
- @tf_function()
61
- def fc_fn(tensors):
62
- fc.transform_feature(
63
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
64
- None,
65
- )
66
-
67
- # Benchmark runs
68
- keras_data = {"data": data}
69
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
70
-
71
- fc_data = {"data": data.to_sparse()}
72
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
73
-
74
- return k_avg_time, fc_avg_time
75
-
76
-
77
- class BenchmarkLayer(fc_bm.LayerBenchmark):
78
- """Benchmark the layer forward pass."""
79
-
80
- def benchmark_layer(self):
81
- for batch in BATCH_SIZES:
82
- name = f"embedding|varlen|batch_{batch}"
83
- k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
84
- self.report(name, k_time, f_time, NUM_REPEATS)
85
-
86
-
87
- if __name__ == "__main__":
88
- tf.test.main()
89
-
@@ -1,90 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for KPL implementation of categorical cross hash columns with dense
16
- inputs."""
17
-
18
-
19
- import tensorflow.compat.v2 as tf
20
-
21
- import tf_keras.src as keras
22
- from tf_keras.src.layers.preprocessing import hashed_crossing
23
- from tf_keras.src.layers.preprocessing.benchmarks import (
24
- feature_column_benchmark as fc_bm,
25
- )
26
-
27
- # isort: off
28
- from tensorflow.python.eager.def_function import (
29
- function as tf_function,
30
- )
31
-
32
- NUM_REPEATS = 10
33
- BATCH_SIZES = [32, 256]
34
-
35
-
36
- def embedding_varlen(batch_size):
37
- """Benchmark a variable-length embedding."""
38
- # Data and constants.
39
- num_buckets = 10000
40
- data_a = tf.random.uniform(
41
- shape=(batch_size * NUM_REPEATS, 1), maxval=32768, dtype=tf.int64
42
- )
43
- data_b = tf.strings.as_string(data_a)
44
-
45
- # TF-Keras implementation
46
- input_1 = keras.Input(shape=(1,), name="data_a", dtype=tf.int64)
47
- input_2 = keras.Input(shape=(1,), name="data_b", dtype=tf.string)
48
- outputs = hashed_crossing.HashedCrossing(num_buckets)([input_1, input_2])
49
- model = keras.Model([input_1, input_2], outputs)
50
-
51
- # FC implementation
52
- fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets)
53
-
54
- # Wrap the FC implementation in a tf.function for a fair comparison
55
- @tf_function()
56
- def fc_fn(tensors):
57
- fc.transform_feature(
58
- tf.__internal__.feature_column.FeatureTransformationCache(tensors),
59
- None,
60
- )
61
-
62
- # Benchmark runs
63
- keras_data = {
64
- "data_a": data_a,
65
- "data_b": data_b,
66
- }
67
- k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
68
-
69
- fc_data = {
70
- "data_a": data_a,
71
- "data_b": data_b,
72
- }
73
- fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
74
-
75
- return k_avg_time, fc_avg_time
76
-
77
-
78
- class BenchmarkLayer(fc_bm.LayerBenchmark):
79
- """Benchmark the layer forward pass."""
80
-
81
- def benchmark_layer(self):
82
- for batch in BATCH_SIZES:
83
- name = f"hashed_cross|dense|batch_{batch}"
84
- k_time, f_time = embedding_varlen(batch_size=batch)
85
- self.report(name, k_time, f_time, NUM_REPEATS)
86
-
87
-
88
- if __name__ == "__main__":
89
- tf.test.main()
90
-
@@ -1,105 +0,0 @@
1
- # Copyright 2020 The TensorFlow Authors. All Rights Reserved.
2
- #
3
- # Licensed under the Apache License, Version 2.0 (the "License");
4
- # you may not use this file except in compliance with the License.
5
- # You may obtain a copy of the License at
6
- #
7
- # http://www.apache.org/licenses/LICENSE-2.0
8
- #
9
- # Unless required by applicable law or agreed to in writing, software
10
- # distributed under the License is distributed on an "AS IS" BASIS,
11
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
- # See the License for the specific language governing permissions and
13
- # limitations under the License.
14
- # ==============================================================================
15
- """Benchmark for TF-Keras hashing preprocessing layer."""
16
-
17
- import itertools
18
- import random
19
- import string
20
- import time
21
-
22
- import numpy as np
23
- import tensorflow.compat.v2 as tf
24
-
25
- import tf_keras.src as keras
26
- from tf_keras.src.layers.preprocessing import hashing
27
-
28
-
29
- # word_gen creates random sequences of ASCII letters (both lowercase and upper).
30
- # The number of unique strings is ~2,700.
31
- def word_gen():
32
- for _ in itertools.count(1):
33
- yield "".join(random.choice(string.ascii_letters) for i in range(2))
34
-
35
-
36
- class BenchmarkLayer(tf.test.Benchmark):
37
- """Benchmark the layer forward pass."""
38
-
39
- def run_dataset_implementation(self, batch_size):
40
- num_repeats = 5
41
- starts = []
42
- ends = []
43
- for _ in range(num_repeats):
44
- ds = tf.data.Dataset.from_generator(
45
- word_gen, tf.string, tf.TensorShape([])
46
- )
47
- ds = ds.shuffle(batch_size * 100)
48
- ds = ds.batch(batch_size)
49
- num_batches = 5
50
- ds = ds.take(num_batches)
51
- ds = ds.prefetch(num_batches)
52
- starts.append(time.time())
53
- # Benchmarked code begins here.
54
- for i in ds:
55
- _ = tf.strings.to_hash_bucket(i, num_buckets=2)
56
- # Benchmarked code ends here.
57
- ends.append(time.time())
58
-
59
- avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
60
- return avg_time
61
-
62
- def bm_layer_implementation(self, batch_size):
63
- input_1 = keras.Input(shape=(None,), dtype=tf.string, name="word")
64
- layer = hashing.Hashing(num_bins=2)
65
- _ = layer(input_1)
66
-
67
- num_repeats = 5
68
- starts = []
69
- ends = []
70
- for _ in range(num_repeats):
71
- ds = tf.data.Dataset.from_generator(
72
- word_gen, tf.string, tf.TensorShape([])
73
- )
74
- ds = ds.shuffle(batch_size * 100)
75
- ds = ds.batch(batch_size)
76
- num_batches = 5
77
- ds = ds.take(num_batches)
78
- ds = ds.prefetch(num_batches)
79
- starts.append(time.time())
80
- # Benchmarked code begins here.
81
- for i in ds:
82
- _ = layer(i)
83
- # Benchmarked code ends here.
84
- ends.append(time.time())
85
-
86
- avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
87
- name = f"hashing|batch_{batch_size}"
88
- baseline = self.run_dataset_implementation(batch_size)
89
- extras = {
90
- "dataset implementation baseline": baseline,
91
- "delta seconds": (baseline - avg_time),
92
- "delta percent": ((baseline - avg_time) / baseline) * 100,
93
- }
94
- self.report_benchmark(
95
- iters=num_repeats, wall_time=avg_time, extras=extras, name=name
96
- )
97
-
98
- def benchmark_vocab_size_by_batch(self):
99
- for batch in [32, 64, 256]:
100
- self.bm_layer_implementation(batch_size=batch)
101
-
102
-
103
- if __name__ == "__main__":
104
- tf.test.main()
105
-