tf-keras-nightly 2.20.0.dev2025062209__py3-none-any.whl → 2.20.0.dev2025082909__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tf_keras/__init__.py +1 -1
- tf_keras/protobuf/projector_config_pb2.py +23 -12
- tf_keras/protobuf/saved_metadata_pb2.py +21 -10
- tf_keras/protobuf/versions_pb2.py +19 -8
- tf_keras/src/engine/base_layer.py +11 -0
- tf_keras/src/metrics/confusion_metrics.py +47 -1
- tf_keras/src/models/sharpness_aware_minimization.py +17 -7
- tf_keras/src/utils/metrics_utils.py +4 -1
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/METADATA +1 -1
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/RECORD +12 -34
- tf_keras/src/layers/preprocessing/benchmarks/bucketized_column_dense_benchmark.py +0 -85
- tf_keras/src/layers/preprocessing/benchmarks/category_encoding_benchmark.py +0 -84
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_dense_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_hash_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_dense_benchmark.py +0 -110
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_file_varlen_benchmark.py +0 -103
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_dense_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_dense_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_indicator_varlen_benchmark.py +0 -96
- tf_keras/src/layers/preprocessing/benchmarks/category_vocab_list_varlen_benchmark.py +0 -87
- tf_keras/src/layers/preprocessing/benchmarks/discretization_adapt_benchmark.py +0 -109
- tf_keras/src/layers/preprocessing/benchmarks/embedding_dense_benchmark.py +0 -86
- tf_keras/src/layers/preprocessing/benchmarks/embedding_varlen_benchmark.py +0 -89
- tf_keras/src/layers/preprocessing/benchmarks/hashed_crossing_benchmark.py +0 -90
- tf_keras/src/layers/preprocessing/benchmarks/hashing_benchmark.py +0 -105
- tf_keras/src/layers/preprocessing/benchmarks/image_preproc_benchmark.py +0 -159
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_adapt_benchmark.py +0 -135
- tf_keras/src/layers/preprocessing/benchmarks/index_lookup_forward_benchmark.py +0 -144
- tf_keras/src/layers/preprocessing/benchmarks/normalization_adapt_benchmark.py +0 -124
- tf_keras/src/layers/preprocessing/benchmarks/weighted_embedding_varlen_benchmark.py +0 -99
- tf_keras/src/saving/legacy/saved_model/create_test_saved_model.py +0 -37
- tf_keras/src/tests/keras_doctest.py +0 -159
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/WHEEL +0 -0
- {tf_keras_nightly-2.20.0.dev2025062209.dist-info → tf_keras_nightly-2.20.0.dev2025082909.dist-info}/top_level.txt +0 -0
|
@@ -1,159 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras image preprocessing layer."""
|
|
16
|
-
|
|
17
|
-
import functools
|
|
18
|
-
import time
|
|
19
|
-
|
|
20
|
-
import numpy as np
|
|
21
|
-
import tensorflow.compat.v2 as tf
|
|
22
|
-
|
|
23
|
-
import tf_keras.src as keras
|
|
24
|
-
from tf_keras.src.layers.preprocessing import image_preprocessing
|
|
25
|
-
|
|
26
|
-
LOWER = 0.2
|
|
27
|
-
UPPER = 0.4
|
|
28
|
-
BATCH_SIZE = 32
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def rotate(inputs):
|
|
32
|
-
"""rotate image."""
|
|
33
|
-
inputs_shape = tf.shape(inputs)
|
|
34
|
-
batch_size = inputs_shape[0]
|
|
35
|
-
img_hd = tf.cast(inputs_shape[1], tf.float32)
|
|
36
|
-
img_wd = tf.cast(inputs_shape[2], tf.float32)
|
|
37
|
-
min_angle = LOWER * 2.0 * np.pi
|
|
38
|
-
max_angle = UPPER * 2.0 * np.pi
|
|
39
|
-
angles = tf.random.uniform(
|
|
40
|
-
shape=[batch_size], minval=min_angle, maxval=max_angle
|
|
41
|
-
)
|
|
42
|
-
return image_preprocessing.transform(
|
|
43
|
-
inputs, image_preprocessing.get_rotation_matrix(angles, img_hd, img_wd)
|
|
44
|
-
)
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def zoom(inputs):
|
|
48
|
-
"""zoom image."""
|
|
49
|
-
inputs_shape = tf.shape(inputs)
|
|
50
|
-
batch_size = inputs_shape[0]
|
|
51
|
-
img_hd = tf.cast(inputs_shape[1], tf.float32)
|
|
52
|
-
img_wd = tf.cast(inputs_shape[2], tf.float32)
|
|
53
|
-
height_zoom = tf.random.uniform(
|
|
54
|
-
shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER
|
|
55
|
-
)
|
|
56
|
-
width_zoom = tf.random.uniform(
|
|
57
|
-
shape=[batch_size, 1], minval=1.0 + LOWER, maxval=1.0 + UPPER
|
|
58
|
-
)
|
|
59
|
-
zooms = tf.cast(
|
|
60
|
-
tf.concat([width_zoom, height_zoom], axis=1), dtype=tf.float32
|
|
61
|
-
)
|
|
62
|
-
return image_preprocessing.transform(
|
|
63
|
-
inputs, image_preprocessing.get_zoom_matrix(zooms, img_hd, img_wd)
|
|
64
|
-
)
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
def image_augmentation(inputs, batch_size):
|
|
68
|
-
"""image augmentation."""
|
|
69
|
-
img = inputs
|
|
70
|
-
img = tf.image.resize(img, size=[224, 224])
|
|
71
|
-
img = tf.image.random_crop(img, size=[batch_size, 224, 224, 3])
|
|
72
|
-
img = rotate(img)
|
|
73
|
-
img = zoom(img)
|
|
74
|
-
return img
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
class BenchmarkLayer(tf.test.Benchmark):
|
|
78
|
-
"""Benchmark the layer forward pass."""
|
|
79
|
-
|
|
80
|
-
def run_dataset_implementation(self, batch_size):
|
|
81
|
-
num_repeats = 5
|
|
82
|
-
starts = []
|
|
83
|
-
ends = []
|
|
84
|
-
for _ in range(num_repeats):
|
|
85
|
-
ds = tf.data.Dataset.from_tensor_slices(
|
|
86
|
-
np.random.random((batch_size, 256, 256, 3))
|
|
87
|
-
)
|
|
88
|
-
ds = ds.shuffle(batch_size * 100)
|
|
89
|
-
ds = ds.batch(batch_size)
|
|
90
|
-
ds = ds.prefetch(batch_size)
|
|
91
|
-
img_augmentation = functools.partial(
|
|
92
|
-
image_augmentation, batch_size=batch_size
|
|
93
|
-
)
|
|
94
|
-
ds = ds.map(img_augmentation, num_parallel_calls=8)
|
|
95
|
-
starts.append(time.time())
|
|
96
|
-
count = 0
|
|
97
|
-
# Benchmarked code begins here.
|
|
98
|
-
for i in ds:
|
|
99
|
-
_ = i
|
|
100
|
-
count += 1
|
|
101
|
-
# Benchmarked code ends here.
|
|
102
|
-
ends.append(time.time())
|
|
103
|
-
|
|
104
|
-
avg_time = np.mean(np.array(ends) - np.array(starts)) / count
|
|
105
|
-
return avg_time
|
|
106
|
-
|
|
107
|
-
def bm_layer_implementation(self, batch_size):
|
|
108
|
-
with tf.device("/gpu:0"):
|
|
109
|
-
img = keras.Input(shape=(256, 256, 3), dtype=tf.float32)
|
|
110
|
-
preprocessor = keras.Sequential(
|
|
111
|
-
[
|
|
112
|
-
image_preprocessing.Resizing(224, 224),
|
|
113
|
-
image_preprocessing.RandomCrop(height=224, width=224),
|
|
114
|
-
image_preprocessing.RandomRotation(factor=(0.2, 0.4)),
|
|
115
|
-
image_preprocessing.RandomFlip(mode="horizontal"),
|
|
116
|
-
image_preprocessing.RandomZoom(0.2, 0.2),
|
|
117
|
-
]
|
|
118
|
-
)
|
|
119
|
-
_ = preprocessor(img)
|
|
120
|
-
|
|
121
|
-
num_repeats = 5
|
|
122
|
-
starts = []
|
|
123
|
-
ends = []
|
|
124
|
-
for _ in range(num_repeats):
|
|
125
|
-
ds = tf.data.Dataset.from_tensor_slices(
|
|
126
|
-
np.random.random((batch_size, 256, 256, 3))
|
|
127
|
-
)
|
|
128
|
-
ds = ds.shuffle(batch_size * 100)
|
|
129
|
-
ds = ds.batch(batch_size)
|
|
130
|
-
ds = ds.prefetch(batch_size)
|
|
131
|
-
starts.append(time.time())
|
|
132
|
-
count = 0
|
|
133
|
-
# Benchmarked code begins here.
|
|
134
|
-
for i in ds:
|
|
135
|
-
_ = preprocessor(i)
|
|
136
|
-
count += 1
|
|
137
|
-
# Benchmarked code ends here.
|
|
138
|
-
ends.append(time.time())
|
|
139
|
-
|
|
140
|
-
avg_time = np.mean(np.array(ends) - np.array(starts)) / count
|
|
141
|
-
name = f"image_preprocessing|batch_{batch_size}"
|
|
142
|
-
baseline = self.run_dataset_implementation(batch_size)
|
|
143
|
-
extras = {
|
|
144
|
-
"dataset implementation baseline": baseline,
|
|
145
|
-
"delta seconds": (baseline - avg_time),
|
|
146
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
147
|
-
}
|
|
148
|
-
self.report_benchmark(
|
|
149
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
def benchmark_vocab_size_by_batch(self):
|
|
153
|
-
for batch in [32, 64, 256]:
|
|
154
|
-
self.bm_layer_implementation(batch_size=batch)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
if __name__ == "__main__":
|
|
158
|
-
tf.test.main()
|
|
159
|
-
|
|
@@ -1,135 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras text vectorization preprocessing layer's adapt method.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import collections
|
|
19
|
-
import itertools
|
|
20
|
-
import random
|
|
21
|
-
import string
|
|
22
|
-
import time
|
|
23
|
-
|
|
24
|
-
import numpy as np
|
|
25
|
-
import tensorflow.compat.v2 as tf
|
|
26
|
-
|
|
27
|
-
import tf_keras.src as keras
|
|
28
|
-
from tf_keras.src.layers.preprocessing import index_lookup
|
|
29
|
-
|
|
30
|
-
tf.compat.v1.enable_v2_behavior()
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
# word_gen creates random sequences of ASCII letters (both lowercase and upper).
|
|
34
|
-
# The number of unique strings is ~2,700.
|
|
35
|
-
def word_gen():
|
|
36
|
-
for _ in itertools.count(1):
|
|
37
|
-
yield "".join(random.choice(string.ascii_letters) for i in range(2))
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def get_top_k(dataset, k):
|
|
41
|
-
"""Python implementation of vocabulary building using a defaultdict."""
|
|
42
|
-
counts = collections.defaultdict(int)
|
|
43
|
-
for tensor in dataset:
|
|
44
|
-
data = tensor.numpy()
|
|
45
|
-
for element in data:
|
|
46
|
-
counts[element] += 1
|
|
47
|
-
sorted_vocab = [
|
|
48
|
-
k
|
|
49
|
-
for k, _ in sorted(
|
|
50
|
-
counts.items(), key=lambda item: item[1], reverse=True
|
|
51
|
-
)
|
|
52
|
-
]
|
|
53
|
-
if len(sorted_vocab) > k:
|
|
54
|
-
sorted_vocab = sorted_vocab[:k]
|
|
55
|
-
return sorted_vocab
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class BenchmarkAdapt(tf.test.Benchmark):
|
|
59
|
-
"""Benchmark adapt."""
|
|
60
|
-
|
|
61
|
-
def run_numpy_implementation(self, num_elements, batch_size, k):
|
|
62
|
-
"""Test the python implementation."""
|
|
63
|
-
ds = tf.data.Dataset.from_generator(
|
|
64
|
-
word_gen, tf.string, tf.TensorShape([])
|
|
65
|
-
)
|
|
66
|
-
batched_ds = ds.take(num_elements).batch(batch_size)
|
|
67
|
-
input_t = keras.Input(shape=(), dtype=tf.string)
|
|
68
|
-
layer = index_lookup.IndexLookup(
|
|
69
|
-
max_tokens=k,
|
|
70
|
-
num_oov_indices=0,
|
|
71
|
-
mask_token=None,
|
|
72
|
-
oov_token="OOV",
|
|
73
|
-
dtype=tf.string,
|
|
74
|
-
)
|
|
75
|
-
_ = layer(input_t)
|
|
76
|
-
num_repeats = 5
|
|
77
|
-
starts = []
|
|
78
|
-
ends = []
|
|
79
|
-
for _ in range(num_repeats):
|
|
80
|
-
starts.append(time.time())
|
|
81
|
-
vocab = get_top_k(batched_ds, k)
|
|
82
|
-
layer.set_vocabulary(vocab)
|
|
83
|
-
ends.append(time.time())
|
|
84
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
85
|
-
return avg_time
|
|
86
|
-
|
|
87
|
-
def bm_adapt_implementation(self, num_elements, batch_size, k):
|
|
88
|
-
"""Test the KPL adapt implementation."""
|
|
89
|
-
ds = tf.data.Dataset.from_generator(
|
|
90
|
-
word_gen, tf.string, tf.TensorShape([])
|
|
91
|
-
)
|
|
92
|
-
batched_ds = ds.take(num_elements).batch(batch_size)
|
|
93
|
-
input_t = keras.Input(shape=(), dtype=tf.string)
|
|
94
|
-
layer = index_lookup.IndexLookup(
|
|
95
|
-
max_tokens=k,
|
|
96
|
-
num_oov_indices=0,
|
|
97
|
-
mask_token=None,
|
|
98
|
-
oov_token="OOV",
|
|
99
|
-
dtype=tf.string,
|
|
100
|
-
)
|
|
101
|
-
_ = layer(input_t)
|
|
102
|
-
num_repeats = 5
|
|
103
|
-
starts = []
|
|
104
|
-
ends = []
|
|
105
|
-
for _ in range(num_repeats):
|
|
106
|
-
starts.append(time.time())
|
|
107
|
-
layer.adapt(batched_ds)
|
|
108
|
-
ends.append(time.time())
|
|
109
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
110
|
-
name = "index_lookup_adapt|%s_elements|vocab_size_%s|batch_%s" % (
|
|
111
|
-
num_elements,
|
|
112
|
-
k,
|
|
113
|
-
batch_size,
|
|
114
|
-
)
|
|
115
|
-
baseline = self.run_numpy_implementation(num_elements, batch_size, k)
|
|
116
|
-
extras = {
|
|
117
|
-
"numpy implementation baseline": baseline,
|
|
118
|
-
"delta seconds": (baseline - avg_time),
|
|
119
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
120
|
-
}
|
|
121
|
-
self.report_benchmark(
|
|
122
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
123
|
-
)
|
|
124
|
-
|
|
125
|
-
def benchmark_vocab_size_by_batch(self):
|
|
126
|
-
for vocab_size in [100, 1000, 10000, 100000, 1000000]:
|
|
127
|
-
for batch in [1, 16, 2048]:
|
|
128
|
-
self.bm_adapt_implementation(
|
|
129
|
-
vocab_size, batch, int(vocab_size / 10)
|
|
130
|
-
)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if __name__ == "__main__":
|
|
134
|
-
tf.test.main()
|
|
135
|
-
|
|
@@ -1,144 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras text vectorization preprocessing layer's adapt method.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import os
|
|
19
|
-
import random
|
|
20
|
-
import string
|
|
21
|
-
import time
|
|
22
|
-
|
|
23
|
-
import numpy as np
|
|
24
|
-
import tensorflow.compat.v2 as tf
|
|
25
|
-
|
|
26
|
-
import tf_keras.src as keras
|
|
27
|
-
from tf_keras.src.layers.preprocessing import index_lookup
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
# word_gen creates random sequences of ASCII letters (both lowercase and upper).
|
|
31
|
-
# The number of unique strings is ~2,700.
|
|
32
|
-
def tensor_gen(batch, num_elements):
|
|
33
|
-
data = []
|
|
34
|
-
for _ in range(batch):
|
|
35
|
-
batch_element = []
|
|
36
|
-
for _ in range(num_elements - 1):
|
|
37
|
-
tok = "".join(random.choice(string.ascii_letters) for i in range(2))
|
|
38
|
-
batch_element.append(tok)
|
|
39
|
-
batch_element.append("") # Explicitly test the empty string.
|
|
40
|
-
data.append(batch_element)
|
|
41
|
-
return tf.constant(data)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def get_vocab():
|
|
45
|
-
vocab = list(
|
|
46
|
-
set([a + b for a in string.ascii_letters for b in string.ascii_letters])
|
|
47
|
-
)
|
|
48
|
-
vocab.sort()
|
|
49
|
-
return vocab
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# This class uses TestCase for get_temp_dir().
|
|
53
|
-
class BenchmarkLookup(tf.test.Benchmark):
|
|
54
|
-
"""Benchmark the index lookup layer's forward pass."""
|
|
55
|
-
|
|
56
|
-
def _write_to_temp_file(self, file_name, vocab_list):
|
|
57
|
-
vocab_path = os.path.join(self.get_temp_dir(), file_name + ".txt")
|
|
58
|
-
with tf.io.gfile.GFile(vocab_path, "w") as writer:
|
|
59
|
-
for vocab in vocab_list:
|
|
60
|
-
writer.write(vocab + "\n")
|
|
61
|
-
writer.flush()
|
|
62
|
-
writer.close()
|
|
63
|
-
return vocab_path
|
|
64
|
-
|
|
65
|
-
def run_numpy_implementation(self, data, vocab):
|
|
66
|
-
"""Test the python implementation."""
|
|
67
|
-
input_t = keras.Input(shape=(), dtype=tf.string)
|
|
68
|
-
layer = index_lookup.IndexLookup(
|
|
69
|
-
vocabulary=vocab,
|
|
70
|
-
max_tokens=None,
|
|
71
|
-
num_oov_indices=1,
|
|
72
|
-
mask_token="",
|
|
73
|
-
oov_token="OOV",
|
|
74
|
-
dtype=tf.string,
|
|
75
|
-
)
|
|
76
|
-
out_t = layer(input_t)
|
|
77
|
-
model = keras.Model(input_t, out_t)
|
|
78
|
-
num_repeats = 5
|
|
79
|
-
starts = []
|
|
80
|
-
ends = []
|
|
81
|
-
_ = model(data)
|
|
82
|
-
for _ in range(num_repeats):
|
|
83
|
-
starts.append(time.time())
|
|
84
|
-
out = model(data)
|
|
85
|
-
ends.append(time.time())
|
|
86
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
87
|
-
return avg_time, out
|
|
88
|
-
|
|
89
|
-
def bm_adapt_implementation(self, num_elements, batch_size):
|
|
90
|
-
"""Test the KPL adapt implementation."""
|
|
91
|
-
vocab = get_vocab()
|
|
92
|
-
vocab_file = self._write_to_temp_file("vocab", vocab)
|
|
93
|
-
vocabulary_initializer = tf.lookup.TextFileInitializer(
|
|
94
|
-
filename=vocab_file,
|
|
95
|
-
key_dtype=tf.string,
|
|
96
|
-
key_index=tf.lookup.TextFileIndex.WHOLE_LINE,
|
|
97
|
-
value_dtype=tf.int64,
|
|
98
|
-
value_index=tf.lookup.TextFileIndex.LINE_NUMBER,
|
|
99
|
-
value_index_offset=2,
|
|
100
|
-
)
|
|
101
|
-
input_t = keras.Input(shape=(), dtype=tf.string)
|
|
102
|
-
layer = index_lookup.IndexLookup(
|
|
103
|
-
vocabulary=vocabulary_initializer,
|
|
104
|
-
max_tokens=None,
|
|
105
|
-
num_oov_indices=1,
|
|
106
|
-
mask_token="",
|
|
107
|
-
oov_token="OOV",
|
|
108
|
-
dtype=tf.string,
|
|
109
|
-
)
|
|
110
|
-
out_t = layer(input_t)
|
|
111
|
-
model = keras.Model(input_t, out_t)
|
|
112
|
-
num_repeats = 5
|
|
113
|
-
starts = []
|
|
114
|
-
ends = []
|
|
115
|
-
data = tensor_gen(batch_size, num_elements)
|
|
116
|
-
_ = model(data)
|
|
117
|
-
for _ in range(num_repeats):
|
|
118
|
-
starts.append(time.time())
|
|
119
|
-
_ = model(data)
|
|
120
|
-
ends.append(time.time())
|
|
121
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
122
|
-
baseline, _ = self.run_numpy_implementation(data, vocab)
|
|
123
|
-
extras = {
|
|
124
|
-
"numpy implementation baseline": baseline,
|
|
125
|
-
"delta seconds": (baseline - avg_time),
|
|
126
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
127
|
-
}
|
|
128
|
-
name = "index_lookup_forward|%s_elements|batch_%s" % (
|
|
129
|
-
num_elements,
|
|
130
|
-
batch_size,
|
|
131
|
-
)
|
|
132
|
-
self.report_benchmark(
|
|
133
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
def benchmark_vocab_size_by_batch(self):
|
|
137
|
-
for tensor_size in [100, 1000, 10000]:
|
|
138
|
-
for batch in [1, 16, 2048]:
|
|
139
|
-
self.bm_adapt_implementation(tensor_size, batch)
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
if __name__ == "__main__":
|
|
143
|
-
tf.test.main()
|
|
144
|
-
|
|
@@ -1,124 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for TF-Keras text vectorization preprocessing layer's adapt method.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
import time
|
|
19
|
-
|
|
20
|
-
import numpy as np
|
|
21
|
-
import tensorflow.compat.v2 as tf
|
|
22
|
-
|
|
23
|
-
import tf_keras.src as keras
|
|
24
|
-
from tf_keras.src.layers.preprocessing import normalization
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def reduce_fn(state, values):
|
|
28
|
-
"""tf.data.Dataset-friendly implementation of mean and variance."""
|
|
29
|
-
k, n, ex, ex2 = state
|
|
30
|
-
# If this is the first iteration, we pick the first value to be 'k',
|
|
31
|
-
# which helps with precision - we assume that k is close to an average
|
|
32
|
-
# value and calculate mean and variance with respect to that.
|
|
33
|
-
k = tf.cond(tf.equal(n, 0), lambda: values[0], lambda: k)
|
|
34
|
-
|
|
35
|
-
sum_v = tf.reduce_sum(values, axis=0)
|
|
36
|
-
sum_v2 = tf.reduce_sum(tf.square(values), axis=0)
|
|
37
|
-
ones = tf.ones_like(values, dtype=tf.int32)
|
|
38
|
-
batch_size = tf.reduce_sum(ones, axis=0)
|
|
39
|
-
batch_size_f = tf.cast(batch_size, tf.float32)
|
|
40
|
-
|
|
41
|
-
ex = 0 + sum_v - tf.multiply(batch_size_f, k)
|
|
42
|
-
ex2 = (
|
|
43
|
-
0
|
|
44
|
-
+ sum_v2
|
|
45
|
-
+ tf.multiply(
|
|
46
|
-
batch_size_f,
|
|
47
|
-
(tf.square(k) - tf.multiply(tf.multiply(2.0, k), sum_v)),
|
|
48
|
-
)
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
return (k, n + batch_size, ex, ex2)
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class BenchmarkAdapt(tf.test.Benchmark):
|
|
55
|
-
"""Benchmark adapt."""
|
|
56
|
-
|
|
57
|
-
def run_dataset_implementation(self, num_elements, batch_size):
|
|
58
|
-
input_t = keras.Input(shape=(1,))
|
|
59
|
-
layer = normalization.Normalization()
|
|
60
|
-
_ = layer(input_t)
|
|
61
|
-
|
|
62
|
-
num_repeats = 5
|
|
63
|
-
starts = []
|
|
64
|
-
ends = []
|
|
65
|
-
for _ in range(num_repeats):
|
|
66
|
-
ds = tf.data.Dataset.range(num_elements)
|
|
67
|
-
ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
|
|
68
|
-
ds = ds.batch(batch_size)
|
|
69
|
-
|
|
70
|
-
starts.append(time.time())
|
|
71
|
-
# Benchmarked code begins here.
|
|
72
|
-
k, n, ex, ex2 = ds.reduce((0.0, 0, 0.0, 0.0), reduce_fn)
|
|
73
|
-
mean = k.numpy() + ex.numpy() / n.numpy()
|
|
74
|
-
var = (ex2.numpy() - (ex.numpy() * ex.numpy()) / n.numpy()) / (
|
|
75
|
-
n.numpy() - 1
|
|
76
|
-
)
|
|
77
|
-
layer.set_weights([mean, var])
|
|
78
|
-
# Benchmarked code ends here.
|
|
79
|
-
ends.append(time.time())
|
|
80
|
-
|
|
81
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
82
|
-
return avg_time
|
|
83
|
-
|
|
84
|
-
def bm_adapt_implementation(self, num_elements, batch_size):
|
|
85
|
-
"""Test the KPL adapt implementation."""
|
|
86
|
-
input_t = keras.Input(shape=(1,), dtype=tf.float32)
|
|
87
|
-
layer = normalization.Normalization()
|
|
88
|
-
_ = layer(input_t)
|
|
89
|
-
|
|
90
|
-
num_repeats = 5
|
|
91
|
-
starts = []
|
|
92
|
-
ends = []
|
|
93
|
-
for _ in range(num_repeats):
|
|
94
|
-
ds = tf.data.Dataset.range(num_elements)
|
|
95
|
-
ds = ds.map(lambda x: tf.expand_dims(tf.cast(x, tf.float32), -1))
|
|
96
|
-
ds = ds.batch(batch_size)
|
|
97
|
-
|
|
98
|
-
starts.append(time.time())
|
|
99
|
-
# Benchmarked code begins here.
|
|
100
|
-
layer.adapt(ds)
|
|
101
|
-
# Benchmarked code ends here.
|
|
102
|
-
ends.append(time.time())
|
|
103
|
-
|
|
104
|
-
avg_time = np.mean(np.array(ends) - np.array(starts))
|
|
105
|
-
name = f"normalization_adapt|{num_elements}_elements|batch_{batch_size}"
|
|
106
|
-
baseline = self.run_dataset_implementation(num_elements, batch_size)
|
|
107
|
-
extras = {
|
|
108
|
-
"tf.data implementation baseline": baseline,
|
|
109
|
-
"delta seconds": (baseline - avg_time),
|
|
110
|
-
"delta percent": ((baseline - avg_time) / baseline) * 100,
|
|
111
|
-
}
|
|
112
|
-
self.report_benchmark(
|
|
113
|
-
iters=num_repeats, wall_time=avg_time, extras=extras, name=name
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
def benchmark_vocab_size_by_batch(self):
|
|
117
|
-
for vocab_size in [100, 1000, 10000, 100000, 1000000]:
|
|
118
|
-
for batch in [1, 16, 2048]:
|
|
119
|
-
self.bm_adapt_implementation(vocab_size, batch)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if __name__ == "__main__":
|
|
123
|
-
tf.test.main()
|
|
124
|
-
|
|
@@ -1,99 +0,0 @@
|
|
|
1
|
-
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
|
|
2
|
-
#
|
|
3
|
-
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
# you may not use this file except in compliance with the License.
|
|
5
|
-
# You may obtain a copy of the License at
|
|
6
|
-
#
|
|
7
|
-
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
-
#
|
|
9
|
-
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
-
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
-
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
-
# See the License for the specific language governing permissions and
|
|
13
|
-
# limitations under the License.
|
|
14
|
-
# ==============================================================================
|
|
15
|
-
"""Benchmark for KPL implementation of weighted embedding column with
|
|
16
|
-
varying-length inputs."""
|
|
17
|
-
|
|
18
|
-
import tensorflow.compat.v2 as tf
|
|
19
|
-
|
|
20
|
-
import tf_keras.src as keras
|
|
21
|
-
from tf_keras.src.layers.preprocessing.benchmarks import (
|
|
22
|
-
feature_column_benchmark as fc_bm,
|
|
23
|
-
)
|
|
24
|
-
|
|
25
|
-
# isort: off
|
|
26
|
-
from tensorflow.python.eager.def_function import (
|
|
27
|
-
function as tf_function,
|
|
28
|
-
)
|
|
29
|
-
|
|
30
|
-
NUM_REPEATS = 10
|
|
31
|
-
BATCH_SIZES = [32, 256]
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
### KPL AND FC IMPLEMENTATION BENCHMARKS ###
|
|
35
|
-
def embedding_varlen(batch_size, max_length):
|
|
36
|
-
"""Benchmark a variable-length embedding."""
|
|
37
|
-
# Data and constants.
|
|
38
|
-
embedding_size = 32768
|
|
39
|
-
data = fc_bm.create_data(
|
|
40
|
-
max_length, batch_size * NUM_REPEATS, embedding_size - 1, dtype=int
|
|
41
|
-
)
|
|
42
|
-
weight = tf.ones_like(data, dtype=tf.float32)
|
|
43
|
-
|
|
44
|
-
# TF-Keras implementation
|
|
45
|
-
data_input = keras.Input(
|
|
46
|
-
shape=(None,), ragged=True, name="data", dtype=tf.int64
|
|
47
|
-
)
|
|
48
|
-
weight_input = keras.Input(
|
|
49
|
-
shape=(None,), ragged=True, name="weight", dtype=tf.float32
|
|
50
|
-
)
|
|
51
|
-
embedded_data = keras.layers.Embedding(embedding_size, 256)(data_input)
|
|
52
|
-
weighted_embedding = tf.multiply(
|
|
53
|
-
embedded_data, tf.expand_dims(weight_input, -1)
|
|
54
|
-
)
|
|
55
|
-
reduced_embedding = tf.reduce_sum(weighted_embedding, axis=1)
|
|
56
|
-
model = keras.Model([data_input, weight_input], reduced_embedding)
|
|
57
|
-
|
|
58
|
-
# FC implementation
|
|
59
|
-
fc = tf.feature_column.embedding_column(
|
|
60
|
-
tf.feature_column.weighted_categorical_column(
|
|
61
|
-
tf.feature_column.categorical_column_with_identity(
|
|
62
|
-
"data", num_buckets=embedding_size - 1
|
|
63
|
-
),
|
|
64
|
-
weight_feature_key="weight",
|
|
65
|
-
),
|
|
66
|
-
dimension=256,
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
# Wrap the FC implementation in a tf.function for a fair comparison
|
|
70
|
-
@tf_function()
|
|
71
|
-
def fc_fn(tensors):
|
|
72
|
-
fc.transform_feature(
|
|
73
|
-
tf.__internal__.feature_column.FeatureTransformationCache(tensors),
|
|
74
|
-
None,
|
|
75
|
-
)
|
|
76
|
-
|
|
77
|
-
# Benchmark runs
|
|
78
|
-
keras_data = {"data": data, "weight": weight}
|
|
79
|
-
k_avg_time = fc_bm.run_keras(keras_data, model, batch_size, NUM_REPEATS)
|
|
80
|
-
|
|
81
|
-
fc_data = {"data": data.to_sparse(), "weight": weight.to_sparse()}
|
|
82
|
-
fc_avg_time = fc_bm.run_fc(fc_data, fc_fn, batch_size, NUM_REPEATS)
|
|
83
|
-
|
|
84
|
-
return k_avg_time, fc_avg_time
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
class BenchmarkLayer(fc_bm.LayerBenchmark):
|
|
88
|
-
"""Benchmark the layer forward pass."""
|
|
89
|
-
|
|
90
|
-
def benchmark_layer(self):
|
|
91
|
-
for batch in BATCH_SIZES:
|
|
92
|
-
name = f"weighted_embedding|varlen|batch_{batch}"
|
|
93
|
-
k_time, f_time = embedding_varlen(batch_size=batch, max_length=256)
|
|
94
|
-
self.report(name, k_time, f_time, NUM_REPEATS)
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
if __name__ == "__main__":
|
|
98
|
-
tf.test.main()
|
|
99
|
-
|
|
@@ -1,37 +0,0 @@
|
|
|
1
|
-
"""A binary that creates a serialized SavedModel from a keras model.
|
|
2
|
-
|
|
3
|
-
This is used in tests to ensure that model serialization is deterministic across
|
|
4
|
-
different processes.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import tensorflow.compat.v2 as tf
|
|
8
|
-
from absl import app
|
|
9
|
-
from absl import flags
|
|
10
|
-
|
|
11
|
-
from tf_keras.src import regularizers
|
|
12
|
-
from tf_keras.src.testing_infra import test_utils
|
|
13
|
-
|
|
14
|
-
flags.DEFINE_string("output_path", "", "The path to write the SavedModel at.")
|
|
15
|
-
|
|
16
|
-
FLAGS = flags.FLAGS
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
def main(_) -> None:
|
|
20
|
-
with test_utils.model_type_scope("functional"):
|
|
21
|
-
model = test_utils.get_small_mlp(1, 4, input_dim=3)
|
|
22
|
-
model.layers[-1].activity_regularizer = regularizers.get("l2")
|
|
23
|
-
model.activity_regularizer = regularizers.get("l2")
|
|
24
|
-
model.compile(loss="mse", optimizer="rmsprop")
|
|
25
|
-
|
|
26
|
-
def callable_loss():
|
|
27
|
-
return tf.reduce_sum(model.weights[0])
|
|
28
|
-
|
|
29
|
-
model.add_loss(callable_loss)
|
|
30
|
-
|
|
31
|
-
print(f"_____Writing saved model to: {FLAGS.output_path}")
|
|
32
|
-
model.save(FLAGS.output_path)
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
if __name__ == "__main__":
|
|
36
|
-
app.run(main)
|
|
37
|
-
|