tf-keras-nightly 2.20.0.dev2025062109__py3-none-any.whl → 2.20.0.dev2025082818__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of tf-keras-nightly might be problematic. Click here for more details.
- tf_keras/__init__.py +1 -1
- tf_keras/protobuf/projector_config_pb2.py +23 -12
- tf_keras/protobuf/saved_metadata_pb2.py +21 -10
- tf_keras/protobuf/versions_pb2.py +19 -8
- tf_keras/src/metrics/confusion_metrics.py +47 -1
- tf_keras/src/models/sharpness_aware_minimization.py +17 -7
- tf_keras/src/utils/metrics_utils.py +4 -1
- {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/METADATA +1 -1
- {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/RECORD +11 -33
- tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
- tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
- tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
- tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
- tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
- tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
- tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
- tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
- tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
- tf_keras/src/tests/keras_doctest.py +0 -159
- {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/WHEEL +0 -0
- {tf_keras_nightly-2.20.0.dev2025062109.dist-info → tf_keras_nightly-2.20.0.dev2025082818.dist-info}/top_level.txt +0 -0
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of categorical hash columns with dense
|
|
16
|
-
inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import hashing
|
|
22
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
23
|
-
feature_column_benchmark as fc_bm,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# isort: off
|
|
27
|
-
from tensorflow.python.eager.def_function import (
|
|
28
|
-
function as tf_function,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
NUM_REPEATS = 10
|
|
32
|
-
BATCH_SIZES = [32, 256]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
|
|
39
|
-
num_buckets = 10000
|
|
40
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
41
|
-
data = fc_bm.create_string_data(
|
|
42
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# TF-Keras implementation
|
|
46
|
-
model = keras.Sequential()
|
|
47
|
-
model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
|
|
48
|
-
model.add(hashing.Hashing(num_buckets))
|
|
49
|
-
|
|
50
|
-
# FC implementation
|
|
51
|
-
fc = tf.feature_column.sequence_categorical_column_with_hash_bucket(
|
|
52
|
-
"data", num_buckets
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
56
|
-
@tf_function()
|
|
57
|
-
def fc_fn(tensors):
|
|
58
|
-
fc.transform_feature(
|
|
59
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
60
|
-
None,
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# Benchmark runs
|
|
64
|
-
keras_data = {
|
|
65
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
66
|
-
}
|
|
67
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
68
|
-
|
|
69
|
-
fc_data = {
|
|
70
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
71
|
-
}
|
|
72
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
73
|
-
|
|
74
|
-
return k_avg_time, fc_avg_time
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
78
|
-
"""Benchmark the layer forward pass."""
|
|
79
|
-
|
|
80
|
-
def benchmark_layer(self):
|
|
81
|
-
for batch in BATCH_SIZES:
|
|
82
|
-
name = f"hash|dense|batch_{batch}"
|
|
83
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
84
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
if __name__ == "__main__":
|
|
88
|
-
tf.test.main()
|
|
89
|
-
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of categorical hash columns with
|
|
16
|
-
varying-length inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import hashing
|
|
22
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
23
|
-
feature_column_benchmark as fc_bm,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# isort: off
|
|
27
|
-
from tensorflow.python.eager.def_function import (
|
|
28
|
-
function as tf_function,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
NUM_REPEATS = 10
|
|
32
|
-
BATCH_SIZES = [32, 256]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
|
|
39
|
-
num_buckets = 10000
|
|
40
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
41
|
-
data = fc_bm.create_string_data(
|
|
42
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.0
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# TF-Keras implementation
|
|
46
|
-
model = keras.Sequential()
|
|
47
|
-
model.add(
|
|
48
|
-
keras.Input(
|
|
49
|
-
shape=(max_length,), name="data", ragged=True, dtype=tf.string
|
|
50
|
-
)
|
|
51
|
-
)
|
|
52
|
-
model.add(hashing.Hashing(num_buckets))
|
|
53
|
-
|
|
54
|
-
# FC implementation
|
|
55
|
-
fc = tf.feature_column.categorical_column_with_hash_bucket(
|
|
56
|
-
"data", num_buckets
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
60
|
-
@tf_function()
|
|
61
|
-
def fc_fn(tensors):
|
|
62
|
-
fc.transform_feature(
|
|
63
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
64
|
-
None,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
# Benchmark runs
|
|
68
|
-
keras_data = {"data": data}
|
|
69
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
70
|
-
|
|
71
|
-
fc_data = {"data": data.to_sparse()}
|
|
72
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
73
|
-
|
|
74
|
-
return k_avg_time, fc_avg_time
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
78
|
-
"""Benchmark the layer forward pass."""
|
|
79
|
-
|
|
80
|
-
def benchmark_layer(self):
|
|
81
|
-
for batch in BATCH_SIZES:
|
|
82
|
-
name = f"hash|varlen|batch_{batch}"
|
|
83
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
84
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
if __name__ == "__main__":
|
|
88
|
-
tf.test.main()
|
|
89
|
-
|
|
@@ -1,110 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns from files with dense
|
|
16
|
-
inputs."""
|
|
17
|
-
|
|
18
|
-
import os
|
|
19
|
-
|
|
20
|
-
import tensorflow.compat.v2 as tf
|
|
21
|
-
|
|
22
|
-
import tf_keras.src as keras
|
|
23
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
24
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
25
|
-
feature_column_benchmark as fc_bm,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
# isort: off
|
|
29
|
-
from tensorflow.python.eager.def_function import (
|
|
30
|
-
function as tf_function,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
NUM_REPEATS = 10
|
|
34
|
-
BATCH_SIZES = [32, 256]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
|
|
38
|
-
"""Benchmark the layer forward pass."""
|
|
39
|
-
|
|
40
|
-
def _write_to_temp_file(self, file_name, vocab_list):
|
|
41
|
-
vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
|
|
42
|
-
with tf.io.gfile.GFile(vocab_path, "w") as writer:
|
|
43
|
-
for vocab in vocab_list:
|
|
44
|
-
writer.write(vocab + "\n")
|
|
45
|
-
writer.flush()
|
|
46
|
-
writer.close()
|
|
47
|
-
return vocab_path
|
|
48
|
-
|
|
49
|
-
def embedding_varlen(self, batch_size, max_length):
|
|
50
|
-
"""Benchmark a variable-length embedding."""
|
|
51
|
-
# Data and constants.
|
|
52
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
53
|
-
|
|
54
|
-
path = self._write_to_temp_file("tmp", vocab)
|
|
55
|
-
|
|
56
|
-
data = fc_bm.create_string_data(
|
|
57
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
# TF-Keras implementation
|
|
61
|
-
model = keras.Sequential()
|
|
62
|
-
model.add(
|
|
63
|
-
keras.Input(shape=(max_length,), name="data", dtype=tf.string)
|
|
64
|
-
)
|
|
65
|
-
model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
|
|
66
|
-
|
|
67
|
-
# FC implementation
|
|
68
|
-
fc = tf.feature_column.categorical_column_with_vocabulary_list(
|
|
69
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
70
|
-
)
|
|
71
|
-
|
|
72
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
73
|
-
@tf_function()
|
|
74
|
-
def fc_fn(tensors):
|
|
75
|
-
fc.transform_feature(
|
|
76
|
-
tf.__internal__.feature_column.FeatureTransformationCache(
|
|
77
|
-
tensors
|
|
78
|
-
),
|
|
79
|
-
None,
|
|
80
|
-
)
|
|
81
|
-
|
|
82
|
-
# Benchmark runs
|
|
83
|
-
keras_data = {
|
|
84
|
-
"data": data.to_tensor(
|
|
85
|
-
default_value="", shape=(batch_size, max_length)
|
|
86
|
-
)
|
|
87
|
-
}
|
|
88
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
89
|
-
|
|
90
|
-
fc_data = {
|
|
91
|
-
"data": data.to_tensor(
|
|
92
|
-
default_value="", shape=(batch_size, max_length)
|
|
93
|
-
)
|
|
94
|
-
}
|
|
95
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
96
|
-
|
|
97
|
-
return k_avg_time, fc_avg_time
|
|
98
|
-
|
|
99
|
-
def benchmark_layer(self):
|
|
100
|
-
for batch in BATCH_SIZES:
|
|
101
|
-
name = f"vocab_list|dense|batch_{batch}"
|
|
102
|
-
k_time, f_time = self.embedding_varlen(
|
|
103
|
-
batch_size=batch, max_length=256
|
|
104
|
-
)
|
|
105
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
if __name__ == "__main__":
|
|
109
|
-
tf.test.main()
|
|
110
|
-
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns from files with
|
|
16
|
-
varying-length inputs."""
|
|
17
|
-
|
|
18
|
-
import os
|
|
19
|
-
|
|
20
|
-
import tensorflow.compat.v2 as tf
|
|
21
|
-
|
|
22
|
-
import tf_keras.src as keras
|
|
23
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
24
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
25
|
-
feature_column_benchmark as fc_bm,
|
|
26
|
-
)
|
|
27
|
-
|
|
28
|
-
# isort: off
|
|
29
|
-
from tensorflow.python.eager.def_function import (
|
|
30
|
-
function as tf_function,
|
|
31
|
-
)
|
|
32
|
-
|
|
33
|
-
NUM_REPEATS = 10
|
|
34
|
-
BATCH_SIZES = [32, 256]
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
class BenchmarkLayer(tf.test.TestCase, fc_bm.LayerBenchmark):
|
|
38
|
-
"""Benchmark the layer forward pass."""
|
|
39
|
-
|
|
40
|
-
def _write_to_temp_file(self, file_name, vocab_list):
|
|
41
|
-
vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
|
|
42
|
-
with tf.io.gfile.GFile(vocab_path, "w") as writer:
|
|
43
|
-
for vocab in vocab_list:
|
|
44
|
-
writer.write(vocab + "\n")
|
|
45
|
-
writer.flush()
|
|
46
|
-
writer.close()
|
|
47
|
-
return vocab_path
|
|
48
|
-
|
|
49
|
-
def embedding_varlen(self, batch_size, max_length):
|
|
50
|
-
"""Benchmark a variable-length embedding."""
|
|
51
|
-
# Data and constants.
|
|
52
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
53
|
-
path = self._write_to_temp_file("tmp", vocab)
|
|
54
|
-
|
|
55
|
-
data = fc_bm.create_string_data(
|
|
56
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
# TF-Keras implementation
|
|
60
|
-
model = keras.Sequential()
|
|
61
|
-
model.add(
|
|
62
|
-
keras.Input(
|
|
63
|
-
shape=(max_length,), name="data", ragged=True, dtype=tf.string
|
|
64
|
-
)
|
|
65
|
-
)
|
|
66
|
-
model.add(string_lookup.StringLookup(vocabulary=path, mask_token=None))
|
|
67
|
-
|
|
68
|
-
# FC implementation
|
|
69
|
-
fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
|
|
70
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
71
|
-
)
|
|
72
|
-
|
|
73
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
74
|
-
@tf_function()
|
|
75
|
-
def fc_fn(tensors):
|
|
76
|
-
fc.transform_feature(
|
|
77
|
-
tf.__internal__.feature_column.FeatureTransformationCache(
|
|
78
|
-
tensors
|
|
79
|
-
),
|
|
80
|
-
None,
|
|
81
|
-
)
|
|
82
|
-
|
|
83
|
-
# Benchmark runs
|
|
84
|
-
keras_data = {"data": data}
|
|
85
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
86
|
-
|
|
87
|
-
fc_data = {"data": data.to_sparse()}
|
|
88
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
89
|
-
|
|
90
|
-
return k_avg_time, fc_avg_time
|
|
91
|
-
|
|
92
|
-
def benchmark_layer(self):
|
|
93
|
-
for batch in BATCH_SIZES:
|
|
94
|
-
name = f"vocab_list|varlen|batch_{batch}"
|
|
95
|
-
k_time, f_time = self.embedding_varlen(
|
|
96
|
-
batch_size=batch, max_length=256
|
|
97
|
-
)
|
|
98
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
if __name__ == "__main__":
|
|
102
|
-
tf.test.main()
|
|
103
|
-
|
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns from lists with dense
|
|
16
|
-
inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
22
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
23
|
-
feature_column_benchmark as fc_bm,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# isort: off
|
|
27
|
-
from tensorflow.python.eager.def_function import (
|
|
28
|
-
function as tf_function,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
NUM_REPEATS = 10
|
|
32
|
-
BATCH_SIZES = [32, 256]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
39
|
-
data = fc_bm.create_string_data(
|
|
40
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# TF-Keras implementation
|
|
44
|
-
model = keras.Sequential()
|
|
45
|
-
model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
|
|
46
|
-
model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
|
|
47
|
-
|
|
48
|
-
# FC implementation
|
|
49
|
-
fc = tf.feature_column.categorical_column_with_vocabulary_list(
|
|
50
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
54
|
-
@tf_function()
|
|
55
|
-
def fc_fn(tensors):
|
|
56
|
-
fc.transform_feature(
|
|
57
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
58
|
-
None,
|
|
59
|
-
)
|
|
60
|
-
|
|
61
|
-
# Benchmark runs
|
|
62
|
-
keras_data = {
|
|
63
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
64
|
-
}
|
|
65
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
66
|
-
|
|
67
|
-
fc_data = {
|
|
68
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
69
|
-
}
|
|
70
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
71
|
-
|
|
72
|
-
return k_avg_time, fc_avg_time
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
76
|
-
"""Benchmark the layer forward pass."""
|
|
77
|
-
|
|
78
|
-
def benchmark_layer(self):
|
|
79
|
-
for batch in BATCH_SIZES:
|
|
80
|
-
name = f"vocab_list|dense|batch_{batch}"
|
|
81
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
82
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if __name__ == "__main__":
|
|
86
|
-
tf.test.main()
|
|
87
|
-
|
tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns + indicator from lists
|
|
16
|
-
with dense inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import category_encoding
|
|
22
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
23
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
24
|
-
feature_column_benchmark as fc_bm,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# isort: off
|
|
28
|
-
from tensorflow.python.eager.def_function import (
|
|
29
|
-
function as tf_function,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
NUM_REPEATS = 10
|
|
33
|
-
BATCH_SIZES = [32, 256]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def embedding_varlen(batch_size, max_length):
|
|
37
|
-
"""Benchmark a variable-length embedding."""
|
|
38
|
-
# Data and constants.
|
|
39
|
-
vocab_size = 32768
|
|
40
|
-
vocab = fc_bm.create_vocabulary(vocab_size)
|
|
41
|
-
data = fc_bm.create_string_data(
|
|
42
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# TF-Keras implementation
|
|
46
|
-
model = keras.Sequential()
|
|
47
|
-
model.add(keras.Input(shape=(max_length,), name="data", dtype=tf.string))
|
|
48
|
-
model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
|
|
49
|
-
model.add(
|
|
50
|
-
category_encoding.CategoryEncoding(
|
|
51
|
-
num_tokens=vocab_size + 1, output_mode="count"
|
|
52
|
-
)
|
|
53
|
-
)
|
|
54
|
-
|
|
55
|
-
# FC implementation
|
|
56
|
-
fc = tf.feature_column.indicator_column(
|
|
57
|
-
tf.feature_column.categorical_column_with_vocabulary_list(
|
|
58
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
59
|
-
)
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
63
|
-
@tf_function()
|
|
64
|
-
def fc_fn(tensors):
|
|
65
|
-
fc.transform_feature(
|
|
66
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
67
|
-
None,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# Benchmark runs
|
|
71
|
-
keras_data = {
|
|
72
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
73
|
-
}
|
|
74
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
75
|
-
|
|
76
|
-
fc_data = {
|
|
77
|
-
"data": data.to_tensor(default_value="", shape=(batch_size, max_length))
|
|
78
|
-
}
|
|
79
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
80
|
-
|
|
81
|
-
return k_avg_time, fc_avg_time
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
85
|
-
"""Benchmark the layer forward pass."""
|
|
86
|
-
|
|
87
|
-
def benchmark_layer(self):
|
|
88
|
-
for batch in BATCH_SIZES:
|
|
89
|
-
name = f"vocab_list_indicator|dense|batch_{batch}"
|
|
90
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
91
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if __name__ == "__main__":
|
|
95
|
-
tf.test.main()
|
|
96
|
-
|
tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py
DELETED
|
@@ -1,96 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns + indicator from lists
|
|
16
|
-
with varying-length inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import category_encoding
|
|
22
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
23
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
24
|
-
feature_column_benchmark as fc_bm,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# isort: off
|
|
28
|
-
from tensorflow.python.eager.def_function import (
|
|
29
|
-
function as tf_function,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
NUM_REPEATS = 10
|
|
33
|
-
BATCH_SIZES = [32, 256]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def embedding_varlen(batch_size, max_length):
|
|
37
|
-
"""Benchmark a variable-length embedding."""
|
|
38
|
-
# Data and constants.
|
|
39
|
-
vocab_size = 32768
|
|
40
|
-
vocab = fc_bm.create_vocabulary(vocab_size)
|
|
41
|
-
data = fc_bm.create_string_data(
|
|
42
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
# TF-Keras implementation
|
|
46
|
-
model = keras.Sequential()
|
|
47
|
-
model.add(
|
|
48
|
-
keras.Input(
|
|
49
|
-
shape=(max_length,), name="data", ragged=True, dtype=tf.string
|
|
50
|
-
)
|
|
51
|
-
)
|
|
52
|
-
model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
|
|
53
|
-
model.add(
|
|
54
|
-
category_encoding.CategoryEncoding(
|
|
55
|
-
num_tokens=vocab_size + 1, output_mode="count"
|
|
56
|
-
)
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
# FC implementation
|
|
60
|
-
fc = tf.feature_column.indicator_column(
|
|
61
|
-
tf.feature_column.sequence_categorical_column_with_vocabulary_list(
|
|
62
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
63
|
-
)
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
67
|
-
@tf_function()
|
|
68
|
-
def fc_fn(tensors):
|
|
69
|
-
fc.transform_feature(
|
|
70
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
71
|
-
None,
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
# Benchmark runs
|
|
75
|
-
keras_data = {"data": data}
|
|
76
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
77
|
-
|
|
78
|
-
fc_data = {"data": data.to_sparse()}
|
|
79
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
80
|
-
|
|
81
|
-
return k_avg_time, fc_avg_time
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
85
|
-
"""Benchmark the layer forward pass."""
|
|
86
|
-
|
|
87
|
-
def benchmark_layer(self):
|
|
88
|
-
for batch in BATCH_SIZES:
|
|
89
|
-
name = f"vocab_list_indicator|varlen|batch_{batch}"
|
|
90
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
91
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
if __name__ == "__main__":
|
|
95
|
-
tf.test.main()
|
|
96
|
-
|