tfds-nightly 4.9.9.dev202508080045__py3-none-any.whl → 4.9.9.dev202508090044__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorflow_datasets/conftest.py +18 -5
- tensorflow_datasets/core/constants.py +2 -1
- tensorflow_datasets/core/utils/file_utils_test.py +1 -9
- tensorflow_datasets/scripts/cli/build_test.py +70 -75
- tensorflow_datasets/scripts/cli/cli_utils.py +49 -8
- tensorflow_datasets/scripts/cli/conftest.py +4 -3
- tensorflow_datasets/scripts/cli/main.py +3 -21
- tensorflow_datasets/scripts/utils/flag_utils.py +2 -2
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/METADATA +1 -1
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/RECORD +15 -15
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/WHEEL +0 -0
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/entry_points.txt +0 -0
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/licenses/AUTHORS +0 -0
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/licenses/LICENSE +0 -0
- {tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/top_level.txt +0 -0
tensorflow_datasets/conftest.py
CHANGED
@@ -20,16 +20,21 @@ plugins (hooks and fixtures) common to all tests.
|
|
20
20
|
|
21
21
|
See: https://docs.pytest.org/en/latest/writing_plugins.html
|
22
22
|
"""
|
23
|
+
|
23
24
|
from __future__ import annotations
|
24
25
|
|
25
26
|
import builtins
|
26
27
|
import importlib
|
28
|
+
import os
|
29
|
+
import pathlib
|
27
30
|
import sys
|
28
31
|
import typing
|
29
32
|
from typing import Iterator, Type
|
30
33
|
|
34
|
+
from etils import epath
|
31
35
|
import pytest
|
32
36
|
from tensorflow_datasets import setup_teardown
|
37
|
+
from tensorflow_datasets.core import constants
|
33
38
|
|
34
39
|
if typing.TYPE_CHECKING:
|
35
40
|
from tensorflow_datasets import testing
|
@@ -52,11 +57,9 @@ def disable_community_datasets():
|
|
52
57
|
# visibility isn't automatically set.
|
53
58
|
from tensorflow_datasets.core import visibility # pylint: disable=g-import-not-at-top
|
54
59
|
|
55
|
-
visibility.set_availables(
|
56
|
-
|
57
|
-
|
58
|
-
]
|
59
|
-
)
|
60
|
+
visibility.set_availables([
|
61
|
+
visibility.DatasetType.TFDS_PUBLIC,
|
62
|
+
])
|
60
63
|
|
61
64
|
|
62
65
|
@pytest.fixture(scope='session', autouse=True)
|
@@ -144,3 +147,13 @@ def dummy_dataset(
|
|
144
147
|
return _make_dataset(tmp_path_factory, testing.DummyDataset)
|
145
148
|
|
146
149
|
|
150
|
+
@pytest.fixture(name='default_data_dir')
|
151
|
+
def mock_default_data_dir(
|
152
|
+
monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
|
153
|
+
) -> epath.Path:
|
154
|
+
"""Sets the default data dir to a temp dir."""
|
155
|
+
default_data_dir = epath.Path(tmp_path) / 'default_data_dir'
|
156
|
+
monkeypatch.setattr(constants, 'DATA_DIR', os.fspath(default_data_dir))
|
157
|
+
return default_data_dir
|
158
|
+
|
159
|
+
|
@@ -18,6 +18,7 @@
|
|
18
18
|
# IMPORTANT: when changing values here, update docstrings.
|
19
19
|
|
20
20
|
import os
|
21
|
+
from typing import Final
|
21
22
|
|
22
23
|
# Directory in which datasets are declared within TFDS sources.
|
23
24
|
DATASETS_TFDS_SRC_DIR = 'datasets'
|
@@ -27,7 +28,7 @@ SRC_BASE_URL = 'https://github.com/tensorflow/datasets/tree/master/'
|
|
27
28
|
|
28
29
|
# Directory where to store processed datasets.
|
29
30
|
# If modifying this, should also update `scripts/cli/build.py` `--data_dir`
|
30
|
-
DATA_DIR = os.environ.get(
|
31
|
+
DATA_DIR: Final[str] = os.environ.get(
|
31
32
|
'TFDS_DATA_DIR',
|
32
33
|
os.path.join(os.path.expanduser('~'), 'tensorflow_datasets'),
|
33
34
|
)
|
@@ -62,16 +62,8 @@ def _assert_data_dir(
|
|
62
62
|
assert data_dir == expected_data_dir
|
63
63
|
|
64
64
|
|
65
|
-
@pytest.fixture(name='default_data_dir')
|
66
|
-
def mock_default_data_dir(monkeypatch, tmp_path):
|
67
|
-
"""Sets the default data dir to a temp dir."""
|
68
|
-
default_data_dir = tmp_path / 'default_data_dir'
|
69
|
-
monkeypatch.setattr(constants, 'DATA_DIR', default_data_dir)
|
70
|
-
return default_data_dir
|
71
|
-
|
72
|
-
|
73
65
|
@pytest.fixture(name='other_data_dir')
|
74
|
-
def mock_other_data_dir(default_data_dir):
|
66
|
+
def mock_other_data_dir(default_data_dir: epath.Path):
|
75
67
|
"""Adds another data dir to the registered data dirs."""
|
76
68
|
other_data_dir = default_data_dir.parent / 'other_data_dir'
|
77
69
|
file_utils.add_data_dir(other_data_dir)
|
@@ -13,14 +13,12 @@
|
|
13
13
|
# See the License for the specific language governing permissions and
|
14
14
|
# limitations under the License.
|
15
15
|
|
16
|
-
|
17
|
-
|
16
|
+
from collections.abc import Callable, Iterator
|
18
17
|
import contextlib
|
19
18
|
import dataclasses
|
19
|
+
import functools
|
20
20
|
import multiprocessing
|
21
21
|
import os
|
22
|
-
import pathlib
|
23
|
-
from typing import Dict, Iterator, List, Optional
|
24
22
|
from unittest import mock
|
25
23
|
|
26
24
|
from etils import epath
|
@@ -45,7 +43,7 @@ class DummyDatasetNoGenerate(tfds.testing.DummyDataset):
|
|
45
43
|
|
46
44
|
@utils.classproperty
|
47
45
|
@classmethod
|
48
|
-
def url_infos(cls) ->
|
46
|
+
def url_infos(cls) -> dict[str, download.checksums.UrlInfo] | None:
|
49
47
|
return {
|
50
48
|
'http://data.org/file1.zip': download.checksums.UrlInfo(
|
51
49
|
size=42,
|
@@ -55,33 +53,21 @@ class DummyDatasetNoGenerate(tfds.testing.DummyDataset):
|
|
55
53
|
}
|
56
54
|
|
57
55
|
|
58
|
-
@pytest.fixture(scope='function', autouse=True)
|
59
|
-
def mock_default_data_dir(tmp_path: pathlib.Path):
|
60
|
-
"""Changes the default `--data_dir` to tmp_path."""
|
61
|
-
tmp_path = tmp_path / 'datasets'
|
62
|
-
default_data_dir = os.environ.get('TFDS_DATA_DIR')
|
63
|
-
try:
|
64
|
-
os.environ['TFDS_DATA_DIR'] = os.fspath(tmp_path)
|
65
|
-
yield tmp_path
|
66
|
-
finally:
|
67
|
-
if default_data_dir:
|
68
|
-
os.environ['TFDS_DATA_DIR'] = default_data_dir
|
69
|
-
else:
|
70
|
-
del os.environ['TFDS_DATA_DIR']
|
71
|
-
|
72
|
-
|
73
56
|
@contextlib.contextmanager
|
74
57
|
def mock_cwd(path: epath.PathLike) -> Iterator[None]:
|
75
58
|
"""Mock the current directory."""
|
76
|
-
path =
|
59
|
+
path = epath.Path(path)
|
77
60
|
assert path.exists() and path.is_dir() # Check given path is valid cwd dir
|
78
|
-
with mock.patch('
|
61
|
+
with mock.patch.object(os, 'getcwd', return_value=os.fspath(path)):
|
79
62
|
yield
|
80
63
|
|
81
64
|
|
82
|
-
def _build(
|
65
|
+
def _build(
|
66
|
+
cmd_flags: str, data_dir: epath.Path, mock_download_and_prepare: bool = True
|
67
|
+
) -> list[str]:
|
83
68
|
"""Executes `tfds build {cmd_flags}` and returns the list of generated ds."""
|
84
69
|
# Execute the command
|
70
|
+
cmd_flags = f'--data_dir={data_dir} {cmd_flags}'
|
85
71
|
args = main._parse_flags(f'tfds build {cmd_flags}'.split())
|
86
72
|
|
87
73
|
original_dl_and_prepare = tfds.core.DatasetBuilder.download_and_prepare
|
@@ -101,9 +87,8 @@ def _build(cmd_flags: str, mock_download_and_prepare: bool = True) -> List[str]:
|
|
101
87
|
else:
|
102
88
|
return original_dl_and_prepare(self, *args, **kwargs)
|
103
89
|
|
104
|
-
with mock.patch(
|
105
|
-
|
106
|
-
_download_and_prepare,
|
90
|
+
with mock.patch.object(
|
91
|
+
tfds.core.DatasetBuilder, 'download_and_prepare', _download_and_prepare
|
107
92
|
):
|
108
93
|
main.main(args)
|
109
94
|
queue.put(None)
|
@@ -115,31 +100,39 @@ def _build(cmd_flags: str, mock_download_and_prepare: bool = True) -> List[str]:
|
|
115
100
|
return generated_ds_names
|
116
101
|
|
117
102
|
|
118
|
-
|
119
|
-
|
120
|
-
|
103
|
+
@pytest.fixture(name='build')
|
104
|
+
def mock_build(
|
105
|
+
default_data_dir: epath.Path,
|
106
|
+
) -> Callable[[str, bool], list[str]]:
|
107
|
+
"""Returns a function to execute `tfds build`."""
|
108
|
+
return functools.partial(_build, data_dir=default_data_dir)
|
109
|
+
|
110
|
+
|
111
|
+
def test_build_single(build):
|
112
|
+
assert build('mnist') == ['mnist']
|
113
|
+
assert build('mnist:3.0.1') == ['mnist']
|
121
114
|
# Keyword arguments also possible
|
122
|
-
assert
|
115
|
+
assert build('--datasets mnist') == ['mnist']
|
123
116
|
|
124
117
|
with pytest.raises(tfds.core.registered.DatasetNotFoundError):
|
125
|
-
|
118
|
+
build('unknown_dataset')
|
126
119
|
|
127
120
|
with pytest.raises(AssertionError, match='cannot be loaded at version 1.0.0'):
|
128
|
-
|
121
|
+
build('mnist:1.0.0') # Can only built the last version
|
129
122
|
|
130
123
|
with pytest.raises(ValueError, match='not have config'):
|
131
|
-
|
124
|
+
build('mnist --config_idx 0')
|
132
125
|
|
133
126
|
|
134
|
-
def test_build_multiple():
|
127
|
+
def test_build_multiple(build):
|
135
128
|
# Multiple datasets can be built in a single call
|
136
|
-
assert
|
129
|
+
assert build('mnist imagenet2012 cifar10') == [
|
137
130
|
'mnist',
|
138
131
|
'imagenet2012',
|
139
132
|
'cifar10',
|
140
133
|
]
|
141
134
|
# Keyword arguments also possible
|
142
|
-
assert
|
135
|
+
assert build('mnist --datasets imagenet2012 cifar10') == [
|
143
136
|
'mnist',
|
144
137
|
'imagenet2012',
|
145
138
|
'cifar10',
|
@@ -147,9 +140,9 @@ def test_build_multiple():
|
|
147
140
|
|
148
141
|
|
149
142
|
@pytest.mark.parametrize('num_processes', range(1, 4))
|
150
|
-
def test_build_parallel(num_processes):
|
143
|
+
def test_build_parallel(build, num_processes):
|
151
144
|
# Order is not guaranteed
|
152
|
-
assert set(
|
145
|
+
assert set(build(f'trivia_qa --num-processes={num_processes}')) == set([
|
153
146
|
'trivia_qa/rc',
|
154
147
|
'trivia_qa/rc.nocontext',
|
155
148
|
'trivia_qa/unfiltered',
|
@@ -157,9 +150,9 @@ def test_build_parallel(num_processes):
|
|
157
150
|
])
|
158
151
|
|
159
152
|
|
160
|
-
def test_build_dataset_configs():
|
153
|
+
def test_build_dataset_configs(build):
|
161
154
|
# By default, all configs are build
|
162
|
-
assert
|
155
|
+
assert build('trivia_qa') == [
|
163
156
|
'trivia_qa/rc',
|
164
157
|
'trivia_qa/rc.nocontext',
|
165
158
|
'trivia_qa/unfiltered',
|
@@ -169,47 +162,47 @@ def test_build_dataset_configs():
|
|
169
162
|
# If config is set, only the defined config is generated
|
170
163
|
|
171
164
|
# --config_idx
|
172
|
-
assert
|
165
|
+
assert build('trivia_qa --config_idx=0') == ['trivia_qa/rc']
|
173
166
|
|
174
167
|
# --config
|
175
|
-
assert
|
168
|
+
assert build('trivia_qa --config unfiltered.nocontext') == [
|
176
169
|
'trivia_qa/unfiltered.nocontext',
|
177
170
|
]
|
178
171
|
|
179
172
|
# --config Json
|
180
173
|
config_json = '{"name":"my_config","description":"abcd"}'
|
181
|
-
assert
|
174
|
+
assert build(f'imdb_reviews --config {config_json}') == [
|
182
175
|
'imdb_reviews/my_config',
|
183
176
|
]
|
184
177
|
|
185
178
|
# name/config
|
186
|
-
assert
|
179
|
+
assert build('trivia_qa/unfiltered.nocontext') == [
|
187
180
|
'trivia_qa/unfiltered.nocontext'
|
188
181
|
]
|
189
182
|
|
190
183
|
with pytest.raises(ValueError, match='Config should only be defined once'):
|
191
|
-
|
184
|
+
build('trivia_qa/unfiltered.nocontext --config_idx=0')
|
192
185
|
|
193
186
|
with pytest.raises(ValueError, match='greater than number of configs'):
|
194
|
-
|
187
|
+
build('trivia_qa --config_idx 100')
|
195
188
|
|
196
189
|
|
197
|
-
def test_exclude_datasets():
|
190
|
+
def test_exclude_datasets(build):
|
198
191
|
# Exclude all datasets except 2
|
199
192
|
all_ds = [b for b in tfds.list_builders() if b not in ('mnist', 'cifar10')]
|
200
193
|
all_ds_str = ','.join(all_ds)
|
201
194
|
|
202
|
-
assert
|
195
|
+
assert build(f'--exclude_datasets {all_ds_str}') == [
|
203
196
|
'cifar10',
|
204
197
|
'mnist',
|
205
198
|
]
|
206
199
|
|
207
200
|
with pytest.raises(ValueError, match="--exclude_datasets can't be used"):
|
208
|
-
|
201
|
+
build('mnist --exclude_datasets cifar10')
|
209
202
|
|
210
203
|
|
211
|
-
def test_build_overwrite(
|
212
|
-
data_dir =
|
204
|
+
def test_build_overwrite(build, default_data_dir: epath.Path):
|
205
|
+
data_dir = default_data_dir / 'mnist/3.0.1'
|
213
206
|
data_dir.mkdir(parents=True)
|
214
207
|
metadata_path = tfds.core.tfds_path(
|
215
208
|
'testing/test_data/dataset_info/mnist/3.0.1'
|
@@ -219,80 +212,82 @@ def test_build_overwrite(mock_default_data_dir: pathlib.Path): # pylint: disabl
|
|
219
212
|
data_dir.joinpath(f.name).write_text(f.read_text())
|
220
213
|
|
221
214
|
# By default, will skip generation if the data already exists
|
222
|
-
assert
|
215
|
+
assert build('mnist') == ['mnist'] # Called, but no-op
|
223
216
|
assert data_dir.exists()
|
224
217
|
|
225
|
-
assert
|
218
|
+
assert build('mnist --overwrite') == ['mnist']
|
226
219
|
assert not data_dir.exists() # Previous data-dir has been removed
|
227
220
|
|
228
221
|
|
229
|
-
def test_max_examples_per_split_0(
|
230
|
-
assert
|
222
|
+
def test_max_examples_per_split_0(build, default_data_dir: epath.Path):
|
223
|
+
assert build(
|
231
224
|
'dummy_dataset_no_generate --max_examples_per_split 0',
|
232
225
|
mock_download_and_prepare=False,
|
233
226
|
) == ['dummy_dataset_no_generate']
|
234
227
|
|
235
|
-
builder_path =
|
228
|
+
builder_path = default_data_dir / 'dummy_dataset_no_generate/1.0.0'
|
236
229
|
# Dataset has been generated
|
237
230
|
assert builder_path.exists()
|
238
231
|
# tf-records files have not been generated
|
239
|
-
assert sorted(builder_path.iterdir()) == [
|
240
|
-
|
241
|
-
|
232
|
+
assert sorted(p.name for p in builder_path.iterdir()) == [
|
233
|
+
'dataset_info.json',
|
234
|
+
'features.json',
|
242
235
|
]
|
243
236
|
|
244
237
|
|
245
|
-
def test_build_files():
|
238
|
+
def test_build_files(build):
|
246
239
|
# Make sure DummyDataset isn't registered by default
|
247
240
|
with pytest.raises(tfds.core.registered.DatasetNotFoundError):
|
248
|
-
|
241
|
+
build('dummy_dataset')
|
249
242
|
|
250
243
|
with pytest.raises(FileNotFoundError, match='Could not find .* script'):
|
251
|
-
|
244
|
+
build('')
|
252
245
|
|
253
246
|
# cd .../datasets/dummy_dataset && tfds build
|
254
247
|
with mock_cwd(_DUMMY_DATASET_PATH):
|
255
|
-
assert
|
248
|
+
assert build('') == ['dummy_dataset']
|
256
249
|
|
257
250
|
# cd .../datasets/dummy_dataset && tfds build dummy_dataset.py
|
258
251
|
with mock_cwd(_DUMMY_DATASET_PATH):
|
259
|
-
assert
|
252
|
+
assert build('dummy_dataset.py') == ['dummy_dataset']
|
260
253
|
|
261
254
|
# cd .../datasets/ && tfds build dummy_dataset
|
262
255
|
with mock_cwd(_DUMMY_DATASET_PATH.parent):
|
263
|
-
assert
|
256
|
+
assert build('dummy_dataset') == ['dummy_dataset']
|
264
257
|
|
265
258
|
# cd .../datasets/ && tfds build dummy_dataset --imports=xxx
|
266
259
|
# --imports is passed. so do not load dataset from file
|
267
260
|
with mock_cwd(_DUMMY_DATASET_PATH.parent):
|
268
261
|
with pytest.raises(tfds.core.registered.DatasetNotFoundError):
|
269
|
-
assert
|
262
|
+
assert build('dummy_dataset --imports=os')
|
270
263
|
|
271
264
|
# cd .../datasets/ && tfds build dummy_dataset/dummy_dataset
|
272
265
|
with mock_cwd(_DUMMY_DATASET_PATH.parent):
|
273
|
-
assert
|
266
|
+
assert build('dummy_dataset/dummy_dataset') == ['dummy_dataset']
|
274
267
|
|
275
268
|
|
276
269
|
# Somehow only with tf-nightly, `dummy_dataset` is already imported by
|
277
270
|
# community/load_test.py (with `skip_registration()`). Thus import here have
|
278
271
|
# no-effects.
|
279
272
|
@pytest.mark.skip(reason='Conflict with `load_test.py`')
|
280
|
-
def test_build_import():
|
273
|
+
def test_build_import(build):
|
281
274
|
# DummyDataset isn't registered by default
|
282
275
|
with pytest.raises(tfds.core.registered.DatasetNotFoundError):
|
283
|
-
|
276
|
+
build('dummy_dataset')
|
284
277
|
|
285
278
|
# --imports register the dataset
|
286
279
|
ds_module = 'tensorflow_datasets.testing.dummy_dataset.dummy_dataset'
|
287
|
-
assert
|
280
|
+
assert build(f'dummy_dataset --imports {ds_module}') == ['dummy_dataset']
|
288
281
|
|
289
282
|
|
290
|
-
def test_download_only():
|
291
|
-
with mock.patch(
|
292
|
-
|
283
|
+
def test_download_only(build):
|
284
|
+
with mock.patch.object(
|
285
|
+
download.DownloadManager, 'download', autospec=True
|
293
286
|
) as mock_download:
|
294
|
-
assert not
|
295
|
-
mock_download.assert_called_with(
|
287
|
+
assert not build('dummy_dataset_no_generate --download_only')
|
288
|
+
mock_download.assert_called_with(
|
289
|
+
mock.ANY, {'file0': 'http://data.org/file1.zip'}
|
290
|
+
)
|
296
291
|
|
297
292
|
|
298
293
|
@pytest.mark.parametrize(
|
@@ -16,18 +16,65 @@
|
|
16
16
|
"""Utility functions for TFDS CLI."""
|
17
17
|
|
18
18
|
import argparse
|
19
|
+
from collections.abc import Sequence
|
19
20
|
import dataclasses
|
20
21
|
import itertools
|
21
|
-
import os
|
22
22
|
import pathlib
|
23
23
|
|
24
24
|
from absl import logging
|
25
|
+
from absl.flags import argparse_flags
|
25
26
|
from etils import epath
|
27
|
+
import simple_parsing
|
28
|
+
from tensorflow_datasets.core import constants
|
26
29
|
from tensorflow_datasets.core import dataset_builder
|
27
30
|
from tensorflow_datasets.core import download
|
28
31
|
from tensorflow_datasets.core import file_adapters
|
29
32
|
from tensorflow_datasets.core import naming
|
30
33
|
from tensorflow_datasets.core.utils import file_utils
|
34
|
+
from tensorflow_datasets.scripts.utils import flag_utils
|
35
|
+
|
36
|
+
|
37
|
+
class ArgumentParser(
|
38
|
+
argparse_flags.ArgumentParser, simple_parsing.ArgumentParser
|
39
|
+
):
|
40
|
+
"""An `ArgumentParser` that handles both `simple_parsing` and `absl` flags.
|
41
|
+
|
42
|
+
This class is a workaround for the fact that `simple_parsing.ArgumentParser`
|
43
|
+
does not natively handle `absl.flags`. Without this, `absl` flags are not
|
44
|
+
correctly parsed, especially when they are mixed with positional arguments,
|
45
|
+
leading to errors.
|
46
|
+
|
47
|
+
The `absl.flags.argparse_flags.ArgumentParser` is designed to integrate `absl`
|
48
|
+
flags into an `argparse` setup. It does this by dynamically adding all
|
49
|
+
defined `absl` flags to the parser instance upon initialization.
|
50
|
+
|
51
|
+
By inheriting from both, we get the features of both:
|
52
|
+
- `simple_parsing.ArgumentParser`: Allows defining arguments from typed
|
53
|
+
dataclasses.
|
54
|
+
- `argparse_flags.ArgumentParser`: Adds support for `absl` flags.
|
55
|
+
|
56
|
+
The Method Resolution Order (MRO) is:
|
57
|
+
`ArgumentParser` -> `argparse_flags.ArgumentParser` ->
|
58
|
+
`simple_parsing.ArgumentParser` -> `argparse.ArgumentParser` -> `object`.
|
59
|
+
|
60
|
+
This order is important. `argparse_flags.ArgumentParser` is first so that it
|
61
|
+
can intercept arguments and handle `absl` flags before they are passed to
|
62
|
+
`simple_parsing.ArgumentParser`.
|
63
|
+
"""
|
64
|
+
|
65
|
+
def parse_known_args(
|
66
|
+
self,
|
67
|
+
args: Sequence[str] | None = None,
|
68
|
+
namespace: argparse.Namespace | None = None,
|
69
|
+
attempt_to_reorder: bool = False,
|
70
|
+
):
|
71
|
+
# `argparse_flags.ArgumentParser` does not support `attempt_to_reorder` that
|
72
|
+
# is used by `simple_parsing.ArgumentParser`. Since we don't need it, we can
|
73
|
+
# just ignore it.
|
74
|
+
del attempt_to_reorder
|
75
|
+
if args:
|
76
|
+
args = flag_utils.normalize_flags(args)
|
77
|
+
return super().parse_known_args(args, namespace)
|
31
78
|
|
32
79
|
|
33
80
|
@dataclasses.dataclass
|
@@ -119,13 +166,7 @@ def add_path_argument_group(parser: argparse.ArgumentParser):
|
|
119
166
|
path_group.add_argument(
|
120
167
|
'--data_dir',
|
121
168
|
type=epath.Path,
|
122
|
-
|
123
|
-
default=epath.Path(
|
124
|
-
os.environ.get(
|
125
|
-
'TFDS_DATA_DIR',
|
126
|
-
os.path.join(os.path.expanduser('~'), 'tensorflow_datasets'),
|
127
|
-
)
|
128
|
-
),
|
169
|
+
default=epath.Path(constants.DATA_DIR),
|
129
170
|
help=(
|
130
171
|
'Where to place datasets. Default to '
|
131
172
|
'`~/tensorflow_datasets/` or `TFDS_DATA_DIR` environement variable.'
|
@@ -15,10 +15,11 @@
|
|
15
15
|
|
16
16
|
"""CLI Fixtures."""
|
17
17
|
|
18
|
-
import argparse
|
19
18
|
from unittest import mock
|
20
19
|
|
21
20
|
import pytest
|
21
|
+
import simple_parsing
|
22
|
+
from tensorflow_datasets.scripts.cli import cli_utils
|
22
23
|
|
23
24
|
|
24
25
|
@pytest.fixture(scope='session', autouse=True)
|
@@ -29,7 +30,7 @@ def _mock_argparse_flags():
|
|
29
30
|
# another test):
|
30
31
|
# `flags.DEFINE_string('data_dir')` with `parser.add_argument('--data_dir')`
|
31
32
|
# We patch argparse_flags during test, so absl flags are ignored.
|
32
|
-
with mock.patch(
|
33
|
-
'
|
33
|
+
with mock.patch.object(
|
34
|
+
cli_utils, 'ArgumentParser', simple_parsing.ArgumentParser
|
34
35
|
):
|
35
36
|
yield
|
@@ -29,24 +29,21 @@ from absl import app
|
|
29
29
|
from absl import flags
|
30
30
|
from absl import logging
|
31
31
|
|
32
|
-
import simple_parsing
|
33
32
|
import tensorflow_datasets.public_api as tfds
|
34
33
|
|
35
34
|
# Import commands
|
36
35
|
from tensorflow_datasets.scripts.cli import build
|
36
|
+
from tensorflow_datasets.scripts.cli import cli_utils
|
37
37
|
from tensorflow_datasets.scripts.cli import convert_format
|
38
38
|
from tensorflow_datasets.scripts.cli import croissant
|
39
39
|
from tensorflow_datasets.scripts.cli import new
|
40
|
-
from tensorflow_datasets.scripts.utils import flag_utils
|
41
40
|
|
42
41
|
FLAGS = flags.FLAGS
|
43
42
|
|
44
43
|
|
45
44
|
def _parse_flags(argv: List[str]) -> argparse.Namespace:
|
46
45
|
"""Command lines flag parsing."""
|
47
|
-
|
48
|
-
|
49
|
-
parser = simple_parsing.ArgumentParser(
|
46
|
+
parser = cli_utils.ArgumentParser(
|
50
47
|
description='Tensorflow Datasets CLI tool',
|
51
48
|
allow_abbrev=False,
|
52
49
|
)
|
@@ -67,22 +64,7 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
|
|
67
64
|
new.register_subparser(subparser)
|
68
65
|
convert_format.register_subparser(subparser)
|
69
66
|
croissant.register_subparser(subparser)
|
70
|
-
|
71
|
-
namespace, remaining_argv = parser.parse_known_args(argv[1:])
|
72
|
-
|
73
|
-
# Manually parse absl flags from the remaining arguments.
|
74
|
-
try:
|
75
|
-
# FLAGS requires the program name as the first argument.
|
76
|
-
positionals = FLAGS(argv[:1] + remaining_argv)
|
77
|
-
except flags.Error as e:
|
78
|
-
parser.error(str(e))
|
79
|
-
|
80
|
-
# There should be no positional arguments left, as they should have been
|
81
|
-
# handled by the sub-commands.
|
82
|
-
if len(positionals) > 1:
|
83
|
-
parser.error(f"unrecognized arguments: {' '.join(positionals[1:])}")
|
84
|
-
|
85
|
-
return namespace
|
67
|
+
return parser.parse_args(argv[1:])
|
86
68
|
|
87
69
|
|
88
70
|
def main(args: argparse.Namespace) -> None:
|
@@ -15,11 +15,11 @@
|
|
15
15
|
|
16
16
|
"""Utility for handling flags."""
|
17
17
|
|
18
|
+
from collections.abc import Sequence
|
18
19
|
import re
|
19
|
-
from typing import List
|
20
20
|
|
21
21
|
|
22
|
-
def normalize_flags(argv:
|
22
|
+
def normalize_flags(argv: Sequence[str]) -> list[str]:
|
23
23
|
"""Returns normalized explicit bolean flags for `absl.flags` compatibility.
|
24
24
|
|
25
25
|
Note: Boolean flags in `absl.flags` can be specified with --bool, --nobool,
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tfds-nightly
|
3
|
-
Version: 4.9.9.
|
3
|
+
Version: 4.9.9.dev202508090044
|
4
4
|
Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
|
5
5
|
Home-page: https://github.com/tensorflow/datasets
|
6
6
|
Download-URL: https://github.com/tensorflow/datasets/tags
|
{tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/RECORD
RENAMED
@@ -1,6 +1,6 @@
|
|
1
1
|
tensorflow_datasets/__init__.py,sha256=uP6rUgdo-WeHzrZEHAsCREyxz9lO2IPCIVerl6Ea_JQ,3622
|
2
2
|
tensorflow_datasets/community-datasets.toml,sha256=xHUi5yCwLSAbn1nMwImRviZ03F1ZmdR2cyLJ2LDqhIU,629
|
3
|
-
tensorflow_datasets/conftest.py,sha256=
|
3
|
+
tensorflow_datasets/conftest.py,sha256=95ocbFU_SvVgWHI7NVkH1wXDrb-OGZrd_ihiVVkpk9g,5269
|
4
4
|
tensorflow_datasets/import_public_api_test.py,sha256=NPjteyVJL2ZDPEznMWBpP6hHbCaWiaZPxOcgnyr-HqY,852
|
5
5
|
tensorflow_datasets/import_test.py,sha256=6rtuyyIwF9QNPyu6c-s5t-aA0fKPy2R9ondIHJVOkhY,801
|
6
6
|
tensorflow_datasets/import_without_tf_test.py,sha256=znenZUnnltG7Jh2-PhhMEl_APadgiz1qPXi5P3Z85xo,3459
|
@@ -59,7 +59,7 @@ tensorflow_datasets/core/as_dataframe.py,sha256=3-2ScAo2G6wwYWbz_w3Crb4QyBwcuIYh
|
|
59
59
|
tensorflow_datasets/core/as_dataframe_test.py,sha256=cGgk3f9j87dDRA2EXedlYb11NpOLdew0dA_O0ZG-PLQ,2048
|
60
60
|
tensorflow_datasets/core/beam_utils.py,sha256=0X2lE9ILqLWZe5Idg58-G5XtgITXEAxqVodDtCDo9Ro,5109
|
61
61
|
tensorflow_datasets/core/beam_utils_test.py,sha256=5ZhntgG658uT5pF4kw_U5Epm8lu0tdg4cI-0viMItzg,2852
|
62
|
-
tensorflow_datasets/core/constants.py,sha256=
|
62
|
+
tensorflow_datasets/core/constants.py,sha256=5mWPx8lo4VFd-_HS07JquFc_4ajWvLkpbpU1_2izA0k,2806
|
63
63
|
tensorflow_datasets/core/dataset_builder.py,sha256=GMPEtJ3vcELXg5IvqgYHLjTxdomMzLNZV7rW8YBJg2E,80616
|
64
64
|
tensorflow_datasets/core/dataset_builder_beam_test.py,sha256=d7UsYNsAIY4FcANAERLcVMDcajIpAi0uMfrnQoe4yv4,8386
|
65
65
|
tensorflow_datasets/core/dataset_builder_notfdv_test.py,sha256=eIWlOZijQfopdze85EkbcPY1I8lFmEBnedcoUoOAnRQ,1346
|
@@ -253,7 +253,7 @@ tensorflow_datasets/core/utils/dtype_utils_test.py,sha256=-Qe2fQzDO5sjS36ZL-dY9w
|
|
253
253
|
tensorflow_datasets/core/utils/error_utils.py,sha256=lnquUa_VGRjn7-G_5x-PvWGgnnO6GAWsi9I7xeVuGxQ,3204
|
254
254
|
tensorflow_datasets/core/utils/error_utils_test.py,sha256=Brt8X12ZlWCR4x3WLnSvq4X76eyU1yH3t5STPfAkxUs,2060
|
255
255
|
tensorflow_datasets/core/utils/file_utils.py,sha256=vL-ulAVClrvkA71DvEvdGR2EdNmOR9Y9hNKUDwJ3Ll4,18534
|
256
|
-
tensorflow_datasets/core/utils/file_utils_test.py,sha256=
|
256
|
+
tensorflow_datasets/core/utils/file_utils_test.py,sha256=N-_jZlE73h2OqDxFmQJQ0cdb5a3FBilNjeP0iU3JMwQ,13438
|
257
257
|
tensorflow_datasets/core/utils/gcs_utils.py,sha256=8mBOgEepkah1Rw36F6DNIVhLzfXbR8iS8KMLQUM5sPk,5154
|
258
258
|
tensorflow_datasets/core/utils/gcs_utils_test.py,sha256=Ig8S37AvFG2g7kNjYxqgmqNKlLPeXt31XD7RY4UzsDg,2578
|
259
259
|
tensorflow_datasets/core/utils/huggingface_utils.py,sha256=NeYaUoO3vIFH8M0hZ8k4w7AchFZJIGsuV1XwKJVttfw,5325
|
@@ -1980,16 +1980,16 @@ tensorflow_datasets/scripts/cleanup/url_filename_recorder.py,sha256=iLcsT8UgbyNU
|
|
1980
1980
|
tensorflow_datasets/scripts/cleanup/url_status_checker.py,sha256=Tr3LtLnGhI8ElDAS-ejmuAU3rs1lmqmYlU4figoVQg0,1967
|
1981
1981
|
tensorflow_datasets/scripts/cli/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
|
1982
1982
|
tensorflow_datasets/scripts/cli/build.py,sha256=jZp7CaP62D2Usi4l-o9oCUqTHhnigX15PNUr9pOd4Wo,14961
|
1983
|
-
tensorflow_datasets/scripts/cli/build_test.py,sha256=
|
1983
|
+
tensorflow_datasets/scripts/cli/build_test.py,sha256=xlFYScPSMcsUR27GQ-W5wdGdLdkXu_n0hM1rl20WWW8,10542
|
1984
1984
|
tensorflow_datasets/scripts/cli/builder_templates.py,sha256=99SvH3skigkc2Qg737BV2OzhXL_Rgu4az8eVHsxKCLk,7985
|
1985
1985
|
tensorflow_datasets/scripts/cli/builder_templates_test.py,sha256=HBNB-v2zlImKULPI8Webs9hXCkeFmWT29urxav-tDe8,2062
|
1986
|
-
tensorflow_datasets/scripts/cli/cli_utils.py,sha256=
|
1987
|
-
tensorflow_datasets/scripts/cli/conftest.py,sha256=
|
1986
|
+
tensorflow_datasets/scripts/cli/cli_utils.py,sha256=zE-jLQw0dn_98PHOTLX6pMoFqjSCBOD7lh5dytJcphE,14049
|
1987
|
+
tensorflow_datasets/scripts/cli/conftest.py,sha256=3PNh_BbR013G4HyLAZOleUXsQ9mICrD03NaKwdHFMXs,1291
|
1988
1988
|
tensorflow_datasets/scripts/cli/convert_format.py,sha256=02RDZQQCuXf_XFFpx0gmRVkYyJg534kY0fZwGKxtUL4,4197
|
1989
1989
|
tensorflow_datasets/scripts/cli/convert_format_utils.py,sha256=U_q5WVgMNrjBkOc166U4Y_eca5KOS3Xb3jSDjp4XdK4,29078
|
1990
1990
|
tensorflow_datasets/scripts/cli/convert_format_utils_test.py,sha256=9JGNu9TvUWzbuhe6DWwnO3V9Lia5S1Is64re-pceAWE,8823
|
1991
1991
|
tensorflow_datasets/scripts/cli/croissant.py,sha256=6jzmOXt_i7aeJHUVX7_zpRRMEXId_PzU24zUDdExRUs,6112
|
1992
|
-
tensorflow_datasets/scripts/cli/main.py,sha256=
|
1992
|
+
tensorflow_datasets/scripts/cli/main.py,sha256=qUKxjULaL2ilkPqh6vcsbtnKRJJhZCBn--ZkveFO2mA,3789
|
1993
1993
|
tensorflow_datasets/scripts/cli/main_test.py,sha256=3zNaS_2FmxxLoZOX05iJ2riuP4Qv8cx6bhAI56tV8YI,1067
|
1994
1994
|
tensorflow_datasets/scripts/cli/new.py,sha256=x_GQSEVva1XuMvFwL3rANjDxviwZviXKHCICY7P30Jc,7803
|
1995
1995
|
tensorflow_datasets/scripts/cli/new_test.py,sha256=USr9So-FPtg8UzaQPPacXn0E1ukDIoew9oYkOn45oik,2655
|
@@ -2022,7 +2022,7 @@ tensorflow_datasets/scripts/documentation/templates/community_namespace_toc.md,s
|
|
2022
2022
|
tensorflow_datasets/scripts/tools/__init__.py,sha256=rG-M1W3iDDSWXfKasKS3UhqroWicWvULibHch3yIuxc,623
|
2023
2023
|
tensorflow_datasets/scripts/tools/compute_split_info.py,sha256=BmLQdoBVaODYABwCmUK4i6LW6m72E6N8T7Vu8QEsgmg,2791
|
2024
2024
|
tensorflow_datasets/scripts/utils/__init__.py,sha256=rG-M1W3iDDSWXfKasKS3UhqroWicWvULibHch3yIuxc,623
|
2025
|
-
tensorflow_datasets/scripts/utils/flag_utils.py,sha256=
|
2025
|
+
tensorflow_datasets/scripts/utils/flag_utils.py,sha256=HJBbcfYJGcChSEUxFcfgIqYoTAq32K12hBeXGKn9qJU,1561
|
2026
2026
|
tensorflow_datasets/scripts/utils/flag_utils_test.py,sha256=wHCZ7n5ppzQPHlDMMu6MI6zblCmRSXx3HAAz6K63oqw,1259
|
2027
2027
|
tensorflow_datasets/structured/__init__.py,sha256=FaJdqKPZ6Ca4R1WybpoWLP6BL-frtAVA7Le5nU5bY0g,2468
|
2028
2028
|
tensorflow_datasets/structured/amazon_us_reviews.py,sha256=ZZS7Qn3huPzcu5DVhd1aDpUTYFAB3jvIKQoQt9I-7iM,915
|
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
|
|
2468
2468
|
tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
|
2469
2469
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
|
2470
2470
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
|
2471
|
-
tfds_nightly-4.9.9.
|
2472
|
-
tfds_nightly-4.9.9.
|
2473
|
-
tfds_nightly-4.9.9.
|
2474
|
-
tfds_nightly-4.9.9.
|
2475
|
-
tfds_nightly-4.9.9.
|
2476
|
-
tfds_nightly-4.9.9.
|
2477
|
-
tfds_nightly-4.9.9.
|
2471
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
|
2472
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
2473
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/METADATA,sha256=zSR51MIaMyUvB8ZEeJ5C4RzIQ2BAjy_VKE5aO6IsHJc,11694
|
2474
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
2475
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
|
2476
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
|
2477
|
+
tfds_nightly-4.9.9.dev202508090044.dist-info/RECORD,,
|
{tfds_nightly-4.9.9.dev202508080045.dist-info → tfds_nightly-4.9.9.dev202508090044.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|