tf-keras-nightly 2.20.0.dev2025062209__py3-none-any.whl → 2.20.0.dev2025082909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tf_keras/__init__.py +1 -1
- tf_keras/protobuf/projector_config_pb2.py +23 -12
- tf_keras/protobuf/saved_metadata_pb2.py +21 -10
- tf_keras/protobuf/versions_pb2.py +19 -8
- tf_keras/src/engine/base_layer.py +11 -0
- tf_keras/src/metrics/confusion_metrics.py +47 -1
- tf_keras/src/models/sharpness_aware_minimization.py +17 -7
- tf_keras/src/utils/metrics_utils.py +4 -1
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/METADATA +1 -1
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/RECORD +12 -34
- tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
- tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
- tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
- tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
- tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
- tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
- tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
- tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
- tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
- tf_keras/src/tests/keras_doctest.py +0 -159
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/WHEEL +0 -0
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/top_level.txt +0 -0
|
@@ -1,87 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of vocabulary columns from lists with
|
|
16
|
-
varying-length inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing import string_lookup
|
|
22
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
23
|
-
feature_column_benchmark as fc_bm,
|
|
24
|
-
)
|
|
25
|
-
|
|
26
|
-
# isort: off
|
|
27
|
-
from tensorflow.python.eager.def_function import (
|
|
28
|
-
function as tf_function,
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
NUM_REPEATS = 10
|
|
32
|
-
BATCH_SIZES = [32, 256]
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
vocab = fc_bm.create_vocabulary(32768)
|
|
39
|
-
data = fc_bm.create_string_data(
|
|
40
|
-
max_length, batch_size * NUM_REPEATS, vocab, pct_oov=0.15
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# TF-Keras implementation
|
|
44
|
-
model = keras.Sequential()
|
|
45
|
-
model.add(
|
|
46
|
-
keras.Input(
|
|
47
|
-
shape=(max_length,), name="data", ragged=True, dtype=tf.string
|
|
48
|
-
)
|
|
49
|
-
)
|
|
50
|
-
model.add(string_lookup.StringLookup(vocabulary=vocab, mask_token=None))
|
|
51
|
-
|
|
52
|
-
# FC implementation
|
|
53
|
-
fc = tf.feature_column.sequence_categorical_column_with_vocabulary_list(
|
|
54
|
-
key="data", vocabulary_list=vocab, num_oov_buckets=1
|
|
55
|
-
)
|
|
56
|
-
|
|
57
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
58
|
-
@tf_function()
|
|
59
|
-
def fc_fn(tensors):
|
|
60
|
-
fc.transform_feature(
|
|
61
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
62
|
-
None,
|
|
63
|
-
)
|
|
64
|
-
|
|
65
|
-
# Benchmark runs
|
|
66
|
-
keras_data = {"data": data}
|
|
67
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
68
|
-
|
|
69
|
-
fc_data = {"data": data.to_sparse()}
|
|
70
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
71
|
-
|
|
72
|
-
return k_avg_time, fc_avg_time
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
76
|
-
"""Benchmark the layer forward pass."""
|
|
77
|
-
|
|
78
|
-
def benchmark_layer(self):
|
|
79
|
-
for batch in BATCH_SIZES:
|
|
80
|
-
name = f"vocab_list|varlen|batch_{batch}"
|
|
81
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
82
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
if __name__ == "__main__":
|
|
86
|
-
tf.test.main()
|
|
87
|
-
|
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras discretization preprocessing layer's adapt method."""
|
|
16
|
-
|
|
17
|
-
import time
|
|
18
|
-
|
|
19
|
-
import numpy as np
|
|
20
|
-
import tensorflow.compat.v2 as tf
|
|
21
|
-
|
|
22
|
-
import tf_keras.src as keras
|
|
23
|
-
from tf_keras.src.layers.preprocessing import discretization
|
|
24
|
-
|
|
25
|
-
EPSILON = 0.1
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
def reduce_fn(state, values, epsilon=EPSILON):
|
|
29
|
-
"""tf.data.Dataset-friendly implementation of mean and variance."""
|
|
30
|
-
|
|
31
|
-
(state_,) = state
|
|
32
|
-
summary = discretization.summarize(values, epsilon)
|
|
33
|
-
if np.sum(state_[:, 0]) == 0:
|
|
34
|
-
return (summary,)
|
|
35
|
-
return (discretization.merge_summaries(state_, summary, epsilon),)
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class BenchmarkAdapt(tf.test.Benchmark):
|
|
39
|
-
"""Benchmark adapt."""
|
|
40
|
-
|
|
41
|
-
def run_dataset_implementation(self, num_elements, batch_size):
|
|
42
|
-
input_t = keras.Input(shape=(1,))
|
|
43
|
-
layer = discretization.Discretization()
|
|
44
|
-
_ = layer(input_t)
|
|
45
|
-
|
|
46
|
-
num_repeats = 5
|
|
47
|
-
starts = []
|
|
48
|
-
ends = []
|
|
49
|
-
for _ in range(num_repeats):
|
|
50
|
-
ds = tf.data.Dataset.range(num_elements)
|
|
51
|
-
ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
|
|
52
|
-
ds = ds.batch(batch_size)
|
|
53
|
-
|
|
54
|
-
starts.append(time.time())
|
|
55
|
-
# Benchmarked code begins here.
|
|
56
|
-
state = ds.reduce((np.zeros((1, 2)),), reduce_fn)
|
|
57
|
-
|
|
58
|
-
bins = discretization.get_bucket_boundaries(state, 100)
|
|
59
|
-
layer.set_weights([bins])
|
|
60
|
-
# Benchmarked code ends here.
|
|
61
|
-
ends.append(time.time())
|
|
62
|
-
|
|
63
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
64
|
-
return avg_time
|
|
65
|
-
|
|
66
|
-
def bm_adapt_implementation(self, num_elements, batch_size):
|
|
67
|
-
"""Test the KPL adapt implementation."""
|
|
68
|
-
input_t = keras.Input(shape=(1,), dtype=tf.float32)
|
|
69
|
-
layer = discretization.Discretization()
|
|
70
|
-
_ = layer(input_t)
|
|
71
|
-
|
|
72
|
-
num_repeats = 5
|
|
73
|
-
starts = []
|
|
74
|
-
ends = []
|
|
75
|
-
for _ in range(num_repeats):
|
|
76
|
-
ds = tf.data.Dataset.range(num_elements)
|
|
77
|
-
ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
|
|
78
|
-
ds = ds.batch(batch_size)
|
|
79
|
-
|
|
80
|
-
starts.append(time.time())
|
|
81
|
-
# Benchmarked code begins here.
|
|
82
|
-
layer.adapt(ds)
|
|
83
|
-
# Benchmarked code ends here.
|
|
84
|
-
ends.append(time.time())
|
|
85
|
-
|
|
86
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
87
|
-
name = "discretization_adapt|%s_elements|batch_%s" % (
|
|
88
|
-
num_elements,
|
|
89
|
-
batch_size,
|
|
90
|
-
)
|
|
91
|
-
baseline = self.run_dataset_implementation(num_elements, batch_size)
|
|
92
|
-
extras = {
|
|
93
|
-
"tf.data implementation baseline": baseline,
|
|
94
|
-
"delta seconds": (baseline - avg_time),
|
|
95
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
96
|
-
}
|
|
97
|
-
self.report_benchmark(
|
|
98
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
99
|
-
)
|
|
100
|
-
|
|
101
|
-
def benchmark_vocab_size_by_batch(self):
|
|
102
|
-
for vocab_size in [100, 1000, 10000, 100000, 1000000]:
|
|
103
|
-
for batch in [64 * 2048]:
|
|
104
|
-
self.bm_adapt_implementation(vocab_size, batch)
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
if __name__ == "__main__":
|
|
108
|
-
tf.test.main()
|
|
109
|
-
|
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of embedding column with dense inputs."""
|
|
16
|
-
|
|
17
|
-
import tensorflow.compat.v2 as tf
|
|
18
|
-
|
|
19
|
-
import tf_keras.src as keras
|
|
20
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
21
|
-
feature_column_benchmark as fc_bm,
|
|
22
|
-
)
|
|
23
|
-
|
|
24
|
-
# isort: off
|
|
25
|
-
from tensorflow.python.eager.def_function import (
|
|
26
|
-
function as tf_function,
|
|
27
|
-
)
|
|
28
|
-
|
|
29
|
-
NUM_REPEATS = 10
|
|
30
|
-
BATCH_SIZES = [32, 256]
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
### KPL AND FC IMPLEMENTATION BENCHMARKS ###
|
|
34
|
-
def embedding_varlen(batch_size, max_length):
|
|
35
|
-
"""Benchmark a variable-length embedding."""
|
|
36
|
-
# Data and constants.
|
|
37
|
-
embedding_size = 32768
|
|
38
|
-
data = fc_bm.create_data(
|
|
39
|
-
max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
# TF-Keras implementation
|
|
43
|
-
model = keras.Sequential()
|
|
44
|
-
model.add(keras.Input(shape=(None,), name="data", dtype=tf.int64))
|
|
45
|
-
model.add(keras.layers.Embedding(embedding_size, 256))
|
|
46
|
-
model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
|
|
47
|
-
|
|
48
|
-
# FC implementation
|
|
49
|
-
fc = tf.feature_column.embedding_column(
|
|
50
|
-
tf.feature_column.categorical_column_with_identity(
|
|
51
|
-
"data", num_buckets=embedding_size - 1
|
|
52
|
-
),
|
|
53
|
-
dimension=256,
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
57
|
-
@tf_function()
|
|
58
|
-
def fc_fn(tensors):
|
|
59
|
-
fc.transform_feature(
|
|
60
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
61
|
-
None,
|
|
62
|
-
)
|
|
63
|
-
|
|
64
|
-
# Benchmark runs
|
|
65
|
-
keras_data = {"data": data.to_tensor(default_value=0)}
|
|
66
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
67
|
-
|
|
68
|
-
fc_data = {"data": data.to_tensor(default_value=0)}
|
|
69
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
70
|
-
|
|
71
|
-
return k_avg_time, fc_avg_time
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
75
|
-
"""Benchmark the layer forward pass."""
|
|
76
|
-
|
|
77
|
-
def benchmark_layer(self):
|
|
78
|
-
for batch in BATCH_SIZES:
|
|
79
|
-
name = f"embedding|dense|batch_{batch}"
|
|
80
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
81
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if __name__ == "__main__":
|
|
85
|
-
tf.test.main()
|
|
86
|
-
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of embedding column with varying-length
|
|
16
|
-
inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
22
|
-
feature_column_benchmark as fc_bm,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
# isort: off
|
|
26
|
-
from tensorflow.python.eager.def_function import (
|
|
27
|
-
function as tf_function,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
NUM_REPEATS = 10
|
|
31
|
-
BATCH_SIZES = [32, 256]
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
### KPL AND FC IMPLEMENTATION BENCHMARKS ###
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
embedding_size = 32768
|
|
39
|
-
data = fc_bm.create_data(
|
|
40
|
-
max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
|
|
41
|
-
)
|
|
42
|
-
|
|
43
|
-
# TF-Keras implementation
|
|
44
|
-
model = keras.Sequential()
|
|
45
|
-
model.add(
|
|
46
|
-
keras.Input(shape=(None,), ragged=True, name="data", dtype=tf.int64)
|
|
47
|
-
)
|
|
48
|
-
model.add(keras.layers.Embedding(embedding_size, 256))
|
|
49
|
-
model.add(keras.layers.Lambda(lambda x: tf.reduce_mean(x, axis=-1)))
|
|
50
|
-
|
|
51
|
-
# FC implementation
|
|
52
|
-
fc = tf.feature_column.embedding_column(
|
|
53
|
-
tf.feature_column.categorical_column_with_identity(
|
|
54
|
-
"data", num_buckets=embedding_size - 1
|
|
55
|
-
),
|
|
56
|
-
dimension=256,
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
60
|
-
@tf_function()
|
|
61
|
-
def fc_fn(tensors):
|
|
62
|
-
fc.transform_feature(
|
|
63
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
64
|
-
None,
|
|
65
|
-
)
|
|
66
|
-
|
|
67
|
-
# Benchmark runs
|
|
68
|
-
keras_data = {"data": data}
|
|
69
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
70
|
-
|
|
71
|
-
fc_data = {"data": data.to_sparse()}
|
|
72
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
73
|
-
|
|
74
|
-
return k_avg_time, fc_avg_time
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
78
|
-
"""Benchmark the layer forward pass."""
|
|
79
|
-
|
|
80
|
-
def benchmark_layer(self):
|
|
81
|
-
for batch in BATCH_SIZES:
|
|
82
|
-
name = f"embedding|varlen|batch_{batch}"
|
|
83
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
84
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
if __name__ == "__main__":
|
|
88
|
-
tf.test.main()
|
|
89
|
-
|
|
@@ -1,90 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of categorical cross hash columns with dense
|
|
16
|
-
inputs."""
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
import tensorflow.compat.v2 as tf
|
|
20
|
-
|
|
21
|
-
import tf_keras.src as keras
|
|
22
|
-
from tf_keras.src.layers.preprocessing import hashed_crossing
|
|
23
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
24
|
-
feature_column_benchmark as fc_bm,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
# isort: off
|
|
28
|
-
from tensorflow.python.eager.def_function import (
|
|
29
|
-
function as tf_function,
|
|
30
|
-
)
|
|
31
|
-
|
|
32
|
-
NUM_REPEATS = 10
|
|
33
|
-
BATCH_SIZES = [32, 256]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
def embedding_varlen(batch_size):
|
|
37
|
-
"""Benchmark a variable-length embedding."""
|
|
38
|
-
# Data and constants.
|
|
39
|
-
num_buckets = 10000
|
|
40
|
-
data_a = tf.random.uniform(
|
|
41
|
-
shape=(batch_size * NUM_REPEATS, 1), maxval=32768, dtype=tf.int64
|
|
42
|
-
)
|
|
43
|
-
data_b = tf.strings.as_string(data_a)
|
|
44
|
-
|
|
45
|
-
# TF-Keras implementation
|
|
46
|
-
input_1 = keras.Input(shape=(1,), name="data_a", dtype=tf.int64)
|
|
47
|
-
input_2 = keras.Input(shape=(1,), name="data_b", dtype=tf.string)
|
|
48
|
-
outputs = hashed_crossing.HashedCrossing(num_buckets)([input_1, input_2])
|
|
49
|
-
model = keras.Model([input_1, input_2], outputs)
|
|
50
|
-
|
|
51
|
-
# FC implementation
|
|
52
|
-
fc = tf.feature_column.crossed_column(["data_a", "data_b"], num_buckets)
|
|
53
|
-
|
|
54
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
55
|
-
@tf_function()
|
|
56
|
-
def fc_fn(tensors):
|
|
57
|
-
fc.transform_feature(
|
|
58
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
59
|
-
None,
|
|
60
|
-
)
|
|
61
|
-
|
|
62
|
-
# Benchmark runs
|
|
63
|
-
keras_data = {
|
|
64
|
-
"data_a": data_a,
|
|
65
|
-
"data_b": data_b,
|
|
66
|
-
}
|
|
67
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
68
|
-
|
|
69
|
-
fc_data = {
|
|
70
|
-
"data_a": data_a,
|
|
71
|
-
"data_b": data_b,
|
|
72
|
-
}
|
|
73
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
74
|
-
|
|
75
|
-
return k_avg_time, fc_avg_time
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
79
|
-
"""Benchmark the layer forward pass."""
|
|
80
|
-
|
|
81
|
-
def benchmark_layer(self):
|
|
82
|
-
for batch in BATCH_SIZES:
|
|
83
|
-
name = f"hashed_cross|dense|batch_{batch}"
|
|
84
|
-
k_time, f_time = embedding_varlen(batch_size=batch)
|
|
85
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
if __name__ == "__main__":
|
|
89
|
-
tf.test.main()
|
|
90
|
-
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras hashing preprocessing layer."""
|
|
16
|
-
|
|
17
|
-
import itertools
|
|
18
|
-
import random
|
|
19
|
-
import string
|
|
20
|
-
import time
|
|
21
|
-
|
|
22
|
-
import numpy as np
|
|
23
|
-
import tensorflow.compat.v2 as tf
|
|
24
|
-
|
|
25
|
-
import tf_keras.src as keras
|
|
26
|
-
from tf_keras.src.layers.preprocessing import hashing
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
# word_gen creates random sequences of ASCII letters (both lowercase and upper).
|
|
30
|
-
# The number of unique strings is ~2,700.
|
|
31
|
-
def word_gen():
|
|
32
|
-
for _ in itertools.count(1):
|
|
33
|
-
yield "".join(random.choice(string.ascii_letters) for i in range(2))
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class BenchmarkLayer(tf.test.Benchmark):
|
|
37
|
-
"""Benchmark the layer forward pass."""
|
|
38
|
-
|
|
39
|
-
def run_dataset_implementation(self, batch_size):
|
|
40
|
-
num_repeats = 5
|
|
41
|
-
starts = []
|
|
42
|
-
ends = []
|
|
43
|
-
for _ in range(num_repeats):
|
|
44
|
-
ds = tf.data.Dataset.from_generator(
|
|
45
|
-
word_gen, tf.string, tf.TensorShape([])
|
|
46
|
-
)
|
|
47
|
-
ds = ds.shuffle(batch_size * 100)
|
|
48
|
-
ds = ds.batch(batch_size)
|
|
49
|
-
num_batches = 5
|
|
50
|
-
ds = ds.take(num_batches)
|
|
51
|
-
ds = ds.prefetch(num_batches)
|
|
52
|
-
starts.append(time.time())
|
|
53
|
-
# Benchmarked code begins here.
|
|
54
|
-
for i in ds:
|
|
55
|
-
_ = tf.strings.to_hash_bucket(i, num_buckets=2)
|
|
56
|
-
# Benchmarked code ends here.
|
|
57
|
-
ends.append(time.time())
|
|
58
|
-
|
|
59
|
-
avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
|
|
60
|
-
return avg_time
|
|
61
|
-
|
|
62
|
-
def bm_layer_implementation(self, batch_size):
|
|
63
|
-
input_1 = keras.Input(shape=(None,), dtype=tf.string, name="word")
|
|
64
|
-
layer = hashing.Hashing(num_bins=2)
|
|
65
|
-
_ = layer(input_1)
|
|
66
|
-
|
|
67
|
-
num_repeats = 5
|
|
68
|
-
starts = []
|
|
69
|
-
ends = []
|
|
70
|
-
for _ in range(num_repeats):
|
|
71
|
-
ds = tf.data.Dataset.from_generator(
|
|
72
|
-
word_gen, tf.string, tf.TensorShape([])
|
|
73
|
-
)
|
|
74
|
-
ds = ds.shuffle(batch_size * 100)
|
|
75
|
-
ds = ds.batch(batch_size)
|
|
76
|
-
num_batches = 5
|
|
77
|
-
ds = ds.take(num_batches)
|
|
78
|
-
ds = ds.prefetch(num_batches)
|
|
79
|
-
starts.append(time.time())
|
|
80
|
-
# Benchmarked code begins here.
|
|
81
|
-
for i in ds:
|
|
82
|
-
_ = layer(i)
|
|
83
|
-
# Benchmarked code ends here.
|
|
84
|
-
ends.append(time.time())
|
|
85
|
-
|
|
86
|
-
avg_time = np.mean(np.array(ends) - np.array(starts)) / num_batches
|
|
87
|
-
name = f"hashing|batch_{batch_size}"
|
|
88
|
-
baseline = self.run_dataset_implementation(batch_size)
|
|
89
|
-
extras = {
|
|
90
|
-
"dataset implementation baseline": baseline,
|
|
91
|
-
"delta seconds": (baseline - avg_time),
|
|
92
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
93
|
-
}
|
|
94
|
-
self.report_benchmark(
|
|
95
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
96
|
-
)
|
|
97
|
-
|
|
98
|
-
def benchmark_vocab_size_by_batch(self):
|
|
99
|
-
for batch in [32, 64, 256]:
|
|
100
|
-
self.bm_layer_implementation(batch_size=batch)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
if __name__ == "__main__":
|
|
104
|
-
tf.test.main()
|
|
105
|
-
|