tfds-nightly 4.9.9.dev202508080045__py3-none-any.whl → 4.9.9.dev202508100046__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -20,16 +20,21 @@ plugins (hooks and fixtures) common to all tests.
20
20
 
21
21
  See: https://docs.pytest.org/en/latest/writing_plugins.html
22
22
  """
23
+
23
24
  from __future__ import annotations
24
25
 
25
26
  import builtins
26
27
  import importlib
28
+ import os
29
+ import pathlib
27
30
  import sys
28
31
  import typing
29
32
  from typing import Iterator, Type
30
33
 
34
+ from etils import epath
31
35
  import pytest
32
36
  from tensorflow_datasets import setup_teardown
37
+ from tensorflow_datasets.core import constants
33
38
 
34
39
  if typing.TYPE_CHECKING:
35
40
  from tensorflow_datasets import testing
@@ -52,11 +57,9 @@ def disable_community_datasets():
52
57
  # visibility isn't automatically set.
53
58
  from tensorflow_datasets.core import visibility # pylint: disable=g-import-not-at-top
54
59
 
55
- visibility.set_availables(
56
- [
57
- visibility.DatasetType.TFDS_PUBLIC,
58
- ]
59
- )
60
+ visibility.set_availables([
61
+ visibility.DatasetType.TFDS_PUBLIC,
62
+ ])
60
63
 
61
64
 
62
65
  @pytest.fixture(scope='session', autouse=True)
@@ -144,3 +147,13 @@ def dummy_dataset(
144
147
  return _make_dataset(tmp_path_factory, testing.DummyDataset)
145
148
 
146
149
 
150
+ @pytest.fixture(name='default_data_dir')
151
+ def mock_default_data_dir(
152
+ monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
153
+ ) -> epath.Path:
154
+ """Sets the default data dir to a temp dir."""
155
+ default_data_dir = epath.Path(tmp_path) / 'default_data_dir'
156
+ monkeypatch.setattr(constants, 'DATA_DIR', os.fspath(default_data_dir))
157
+ return default_data_dir
158
+
159
+
@@ -18,6 +18,7 @@
18
18
  # IMPORTANT: when changing values here, update docstrings.
19
19
 
20
20
  import os
21
+ from typing import Final
21
22
 
22
23
  # Directory in which datasets are declared within TFDS sources.
23
24
  DATASETS_TFDS_SRC_DIR = 'datasets'
@@ -27,7 +28,7 @@ SRC_BASE_URL = 'https://github.com/tensorflow/datasets/tree/master/'
27
28
 
28
29
  # Directory where to store processed datasets.
29
30
  # If modifying this, should also update `scripts/cli/build.py` `--data_dir`
30
- DATA_DIR = os.environ.get(
31
+ DATA_DIR: Final[str] = os.environ.get(
31
32
  'TFDS_DATA_DIR',
32
33
  os.path.join(os.path.expanduser('~'), 'tensorflow_datasets'),
33
34
  )
@@ -62,16 +62,8 @@ def _assert_data_dir(
62
62
  assert data_dir == expected_data_dir
63
63
 
64
64
 
65
- @pytest.fixture(name='default_data_dir')
66
- def mock_default_data_dir(monkeypatch, tmp_path):
67
- """Sets the default data dir to a temp dir."""
68
- default_data_dir = tmp_path / 'default_data_dir'
69
- monkeypatch.setattr(constants, 'DATA_DIR', default_data_dir)
70
- return default_data_dir
71
-
72
-
73
65
  @pytest.fixture(name='other_data_dir')
74
- def mock_other_data_dir(default_data_dir):
66
+ def mock_other_data_dir(default_data_dir: epath.Path):
75
67
  """Adds another data dir to the registered data dirs."""
76
68
  other_data_dir = default_data_dir.parent / 'other_data_dir'
77
69
  file_utils.add_data_dir(other_data_dir)
@@ -13,14 +13,12 @@
13
13
  # See the License for the specific language governing permissions and
14
14
  # limitations under the License.
15
15
 
16
- """Tests for tensorflow_datasets.scripts.cli.build."""
17
-
16
+ from collections.abc import Callable, Iterator
18
17
  import contextlib
19
18
  import dataclasses
19
+ import functools
20
20
  import multiprocessing
21
21
  import os
22
- import pathlib
23
- from typing import Dict, Iterator, List, Optional
24
22
  from unittest import mock
25
23
 
26
24
  from etils import epath
@@ -45,7 +43,7 @@ class DummyDatasetNoGenerate(tfds.testing.DummyDataset):
45
43
 
46
44
  @utils.classproperty
47
45
  @classmethod
48
- def url_infos(cls) -> Optional[Dict[str, download.checksums.UrlInfo]]:
46
+ def url_infos(cls) -> dict[str, download.checksums.UrlInfo] | None:
49
47
  return {
50
48
  'http://data.org/file1.zip': download.checksums.UrlInfo(
51
49
  size=42,
@@ -55,33 +53,21 @@ class DummyDatasetNoGenerate(tfds.testing.DummyDataset):
55
53
  }
56
54
 
57
55
 
58
- @pytest.fixture(scope='function', autouse=True)
59
- def mock_default_data_dir(tmp_path: pathlib.Path):
60
- """Changes the default `--data_dir` to tmp_path."""
61
- tmp_path = tmp_path / 'datasets'
62
- default_data_dir = os.environ.get('TFDS_DATA_DIR')
63
- try:
64
- os.environ['TFDS_DATA_DIR'] = os.fspath(tmp_path)
65
- yield tmp_path
66
- finally:
67
- if default_data_dir:
68
- os.environ['TFDS_DATA_DIR'] = default_data_dir
69
- else:
70
- del os.environ['TFDS_DATA_DIR']
71
-
72
-
73
56
  @contextlib.contextmanager
74
57
  def mock_cwd(path: epath.PathLike) -> Iterator[None]:
75
58
  """Mock the current directory."""
76
- path = pathlib.Path(path)
59
+ path = epath.Path(path)
77
60
  assert path.exists() and path.is_dir() # Check given path is valid cwd dir
78
- with mock.patch('os.getcwd', return_value=os.fspath(path)):
61
+ with mock.patch.object(os, 'getcwd', return_value=os.fspath(path)):
79
62
  yield
80
63
 
81
64
 
82
- def _build(cmd_flags: str, mock_download_and_prepare: bool = True) -> List[str]:
65
+ def _build(
66
+ cmd_flags: str, data_dir: epath.Path, mock_download_and_prepare: bool = True
67
+ ) -> list[str]:
83
68
  """Executes `tfds build {cmd_flags}` and returns the list of generated ds."""
84
69
  # Execute the command
70
+ cmd_flags = f'--data_dir={data_dir} {cmd_flags}'
85
71
  args = main._parse_flags(f'tfds build {cmd_flags}'.split())
86
72
 
87
73
  original_dl_and_prepare = tfds.core.DatasetBuilder.download_and_prepare
@@ -101,9 +87,8 @@ def _build(cmd_flags: str, mock_download_and_prepare: bool = True) -> List[str]:
101
87
  else:
102
88
  return original_dl_and_prepare(self, *args, **kwargs)
103
89
 
104
- with mock.patch(
105
- 'tensorflow_datasets.core.DatasetBuilder.download_and_prepare',
106
- _download_and_prepare,
90
+ with mock.patch.object(
91
+ tfds.core.DatasetBuilder, 'download_and_prepare', _download_and_prepare
107
92
  ):
108
93
  main.main(args)
109
94
  queue.put(None)
@@ -115,31 +100,39 @@ def _build(cmd_flags: str, mock_download_and_prepare: bool = True) -> List[str]:
115
100
  return generated_ds_names
116
101
 
117
102
 
118
- def test_build_single():
119
- assert _build('mnist') == ['mnist']
120
- assert _build('mnist:3.0.1') == ['mnist']
103
+ @pytest.fixture(name='build')
104
+ def mock_build(
105
+ default_data_dir: epath.Path,
106
+ ) -> Callable[[str, bool], list[str]]:
107
+ """Returns a function to execute `tfds build`."""
108
+ return functools.partial(_build, data_dir=default_data_dir)
109
+
110
+
111
+ def test_build_single(build):
112
+ assert build('mnist') == ['mnist']
113
+ assert build('mnist:3.0.1') == ['mnist']
121
114
  # Keyword arguments also possible
122
- assert _build('--datasets mnist') == ['mnist']
115
+ assert build('--datasets mnist') == ['mnist']
123
116
 
124
117
  with pytest.raises(tfds.core.registered.DatasetNotFoundError):
125
- _build('unknown_dataset')
118
+ build('unknown_dataset')
126
119
 
127
120
  with pytest.raises(AssertionError, match='cannot be loaded at version 1.0.0'):
128
- _build('mnist:1.0.0') # Can only built the last version
121
+ build('mnist:1.0.0') # Can only built the last version
129
122
 
130
123
  with pytest.raises(ValueError, match='not have config'):
131
- _build('mnist --config_idx 0')
124
+ build('mnist --config_idx 0')
132
125
 
133
126
 
134
- def test_build_multiple():
127
+ def test_build_multiple(build):
135
128
  # Multiple datasets can be built in a single call
136
- assert _build('mnist imagenet2012 cifar10') == [
129
+ assert build('mnist imagenet2012 cifar10') == [
137
130
  'mnist',
138
131
  'imagenet2012',
139
132
  'cifar10',
140
133
  ]
141
134
  # Keyword arguments also possible
142
- assert _build('mnist --datasets imagenet2012 cifar10') == [
135
+ assert build('mnist --datasets imagenet2012 cifar10') == [
143
136
  'mnist',
144
137
  'imagenet2012',
145
138
  'cifar10',
@@ -147,9 +140,9 @@ def test_build_multiple():
147
140
 
148
141
 
149
142
  @pytest.mark.parametrize('num_processes', range(1, 4))
150
- def test_build_parallel(num_processes):
143
+ def test_build_parallel(build, num_processes):
151
144
  # Order is not guaranteed
152
- assert set(_build(f'trivia_qa --num-processes={num_processes}')) == set([
145
+ assert set(build(f'trivia_qa --num-processes={num_processes}')) == set([
153
146
  'trivia_qa/rc',
154
147
  'trivia_qa/rc.nocontext',
155
148
  'trivia_qa/unfiltered',
@@ -157,9 +150,9 @@ def test_build_parallel(num_processes):
157
150
  ])
158
151
 
159
152
 
160
- def test_build_dataset_configs():
153
+ def test_build_dataset_configs(build):
161
154
  # By default, all configs are build
162
- assert _build('trivia_qa') == [
155
+ assert build('trivia_qa') == [
163
156
  'trivia_qa/rc',
164
157
  'trivia_qa/rc.nocontext',
165
158
  'trivia_qa/unfiltered',
@@ -169,47 +162,47 @@ def test_build_dataset_configs():
169
162
  # If config is set, only the defined config is generated
170
163
 
171
164
  # --config_idx
172
- assert _build('trivia_qa --config_idx=0') == ['trivia_qa/rc']
165
+ assert build('trivia_qa --config_idx=0') == ['trivia_qa/rc']
173
166
 
174
167
  # --config
175
- assert _build('trivia_qa --config unfiltered.nocontext') == [
168
+ assert build('trivia_qa --config unfiltered.nocontext') == [
176
169
  'trivia_qa/unfiltered.nocontext',
177
170
  ]
178
171
 
179
172
  # --config Json
180
173
  config_json = '{"name":"my_config","description":"abcd"}'
181
- assert _build(f'imdb_reviews --config {config_json}') == [
174
+ assert build(f'imdb_reviews --config {config_json}') == [
182
175
  'imdb_reviews/my_config',
183
176
  ]
184
177
 
185
178
  # name/config
186
- assert _build('trivia_qa/unfiltered.nocontext') == [
179
+ assert build('trivia_qa/unfiltered.nocontext') == [
187
180
  'trivia_qa/unfiltered.nocontext'
188
181
  ]
189
182
 
190
183
  with pytest.raises(ValueError, match='Config should only be defined once'):
191
- _build('trivia_qa/unfiltered.nocontext --config_idx=0')
184
+ build('trivia_qa/unfiltered.nocontext --config_idx=0')
192
185
 
193
186
  with pytest.raises(ValueError, match='greater than number of configs'):
194
- _build('trivia_qa --config_idx 100')
187
+ build('trivia_qa --config_idx 100')
195
188
 
196
189
 
197
- def test_exclude_datasets():
190
+ def test_exclude_datasets(build):
198
191
  # Exclude all datasets except 2
199
192
  all_ds = [b for b in tfds.list_builders() if b not in ('mnist', 'cifar10')]
200
193
  all_ds_str = ','.join(all_ds)
201
194
 
202
- assert _build(f'--exclude_datasets {all_ds_str}') == [
195
+ assert build(f'--exclude_datasets {all_ds_str}') == [
203
196
  'cifar10',
204
197
  'mnist',
205
198
  ]
206
199
 
207
200
  with pytest.raises(ValueError, match="--exclude_datasets can't be used"):
208
- _build('mnist --exclude_datasets cifar10')
201
+ build('mnist --exclude_datasets cifar10')
209
202
 
210
203
 
211
- def test_build_overwrite(mock_default_data_dir: pathlib.Path): # pylint: disable=redefined-outer-name
212
- data_dir = mock_default_data_dir / 'mnist/3.0.1'
204
+ def test_build_overwrite(build, default_data_dir: epath.Path):
205
+ data_dir = default_data_dir / 'mnist/3.0.1'
213
206
  data_dir.mkdir(parents=True)
214
207
  metadata_path = tfds.core.tfds_path(
215
208
  'testing/test_data/dataset_info/mnist/3.0.1'
@@ -219,80 +212,82 @@ def test_build_overwrite(mock_default_data_dir: pathlib.Path): # pylint: disabl
219
212
  data_dir.joinpath(f.name).write_text(f.read_text())
220
213
 
221
214
  # By default, will skip generation if the data already exists
222
- assert _build('mnist') == ['mnist'] # Called, but no-op
215
+ assert build('mnist') == ['mnist'] # Called, but no-op
223
216
  assert data_dir.exists()
224
217
 
225
- assert _build('mnist --overwrite') == ['mnist']
218
+ assert build('mnist --overwrite') == ['mnist']
226
219
  assert not data_dir.exists() # Previous data-dir has been removed
227
220
 
228
221
 
229
- def test_max_examples_per_split_0(mock_default_data_dir: pathlib.Path): # pylint: disable=redefined-outer-name
230
- assert _build(
222
+ def test_max_examples_per_split_0(build, default_data_dir: epath.Path):
223
+ assert build(
231
224
  'dummy_dataset_no_generate --max_examples_per_split 0',
232
225
  mock_download_and_prepare=False,
233
226
  ) == ['dummy_dataset_no_generate']
234
227
 
235
- builder_path = mock_default_data_dir / 'dummy_dataset_no_generate/1.0.0'
228
+ builder_path = default_data_dir / 'dummy_dataset_no_generate/1.0.0'
236
229
  # Dataset has been generated
237
230
  assert builder_path.exists()
238
231
  # tf-records files have not been generated
239
- assert sorted(builder_path.iterdir()) == [
240
- builder_path / 'dataset_info.json',
241
- builder_path / 'features.json',
232
+ assert sorted(p.name for p in builder_path.iterdir()) == [
233
+ 'dataset_info.json',
234
+ 'features.json',
242
235
  ]
243
236
 
244
237
 
245
- def test_build_files():
238
+ def test_build_files(build):
246
239
  # Make sure DummyDataset isn't registered by default
247
240
  with pytest.raises(tfds.core.registered.DatasetNotFoundError):
248
- _build('dummy_dataset')
241
+ build('dummy_dataset')
249
242
 
250
243
  with pytest.raises(FileNotFoundError, match='Could not find .* script'):
251
- _build('')
244
+ build('')
252
245
 
253
246
  # cd .../datasets/dummy_dataset && tfds build
254
247
  with mock_cwd(_DUMMY_DATASET_PATH):
255
- assert _build('') == ['dummy_dataset']
248
+ assert build('') == ['dummy_dataset']
256
249
 
257
250
  # cd .../datasets/dummy_dataset && tfds build dummy_dataset.py
258
251
  with mock_cwd(_DUMMY_DATASET_PATH):
259
- assert _build('dummy_dataset.py') == ['dummy_dataset']
252
+ assert build('dummy_dataset.py') == ['dummy_dataset']
260
253
 
261
254
  # cd .../datasets/ && tfds build dummy_dataset
262
255
  with mock_cwd(_DUMMY_DATASET_PATH.parent):
263
- assert _build('dummy_dataset') == ['dummy_dataset']
256
+ assert build('dummy_dataset') == ['dummy_dataset']
264
257
 
265
258
  # cd .../datasets/ && tfds build dummy_dataset --imports=xxx
266
259
  # --imports is passed. so do not load dataset from file
267
260
  with mock_cwd(_DUMMY_DATASET_PATH.parent):
268
261
  with pytest.raises(tfds.core.registered.DatasetNotFoundError):
269
- assert _build('dummy_dataset --imports=os')
262
+ assert build('dummy_dataset --imports=os')
270
263
 
271
264
  # cd .../datasets/ && tfds build dummy_dataset/dummy_dataset
272
265
  with mock_cwd(_DUMMY_DATASET_PATH.parent):
273
- assert _build('dummy_dataset/dummy_dataset') == ['dummy_dataset']
266
+ assert build('dummy_dataset/dummy_dataset') == ['dummy_dataset']
274
267
 
275
268
 
276
269
  # Somehow only with tf-nightly, `dummy_dataset` is already imported by
277
270
  # community/load_test.py (with `skip_registration()`). Thus import here have
278
271
  # no-effects.
279
272
  @pytest.mark.skip(reason='Conflict with `load_test.py`')
280
- def test_build_import():
273
+ def test_build_import(build):
281
274
  # DummyDataset isn't registered by default
282
275
  with pytest.raises(tfds.core.registered.DatasetNotFoundError):
283
- _build('dummy_dataset')
276
+ build('dummy_dataset')
284
277
 
285
278
  # --imports register the dataset
286
279
  ds_module = 'tensorflow_datasets.testing.dummy_dataset.dummy_dataset'
287
- assert _build(f'dummy_dataset --imports {ds_module}') == ['dummy_dataset']
280
+ assert build(f'dummy_dataset --imports {ds_module}') == ['dummy_dataset']
288
281
 
289
282
 
290
- def test_download_only():
291
- with mock.patch(
292
- 'tensorflow_datasets.download.DownloadManager.download'
283
+ def test_download_only(build):
284
+ with mock.patch.object(
285
+ download.DownloadManager, 'download', autospec=True
293
286
  ) as mock_download:
294
- assert not _build('dummy_dataset_no_generate --download_only')
295
- mock_download.assert_called_with({'file0': 'http://data.org/file1.zip'})
287
+ assert not build('dummy_dataset_no_generate --download_only')
288
+ mock_download.assert_called_with(
289
+ mock.ANY, {'file0': 'http://data.org/file1.zip'}
290
+ )
296
291
 
297
292
 
298
293
  @pytest.mark.parametrize(
@@ -16,18 +16,65 @@
16
16
  """Utility functions for TFDS CLI."""
17
17
 
18
18
  import argparse
19
+ from collections.abc import Sequence
19
20
  import dataclasses
20
21
  import itertools
21
- import os
22
22
  import pathlib
23
23
 
24
24
  from absl import logging
25
+ from absl.flags import argparse_flags
25
26
  from etils import epath
27
+ import simple_parsing
28
+ from tensorflow_datasets.core import constants
26
29
  from tensorflow_datasets.core import dataset_builder
27
30
  from tensorflow_datasets.core import download
28
31
  from tensorflow_datasets.core import file_adapters
29
32
  from tensorflow_datasets.core import naming
30
33
  from tensorflow_datasets.core.utils import file_utils
34
+ from tensorflow_datasets.scripts.utils import flag_utils
35
+
36
+
37
+ class ArgumentParser(
38
+ argparse_flags.ArgumentParser, simple_parsing.ArgumentParser
39
+ ):
40
+ """An `ArgumentParser` that handles both `simple_parsing` and `absl` flags.
41
+
42
+ This class is a workaround for the fact that `simple_parsing.ArgumentParser`
43
+ does not natively handle `absl.flags`. Without this, `absl` flags are not
44
+ correctly parsed, especially when they are mixed with positional arguments,
45
+ leading to errors.
46
+
47
+ The `absl.flags.argparse_flags.ArgumentParser` is designed to integrate `absl`
48
+ flags into an `argparse` setup. It does this by dynamically adding all
49
+ defined `absl` flags to the parser instance upon initialization.
50
+
51
+ By inheriting from both, we get the features of both:
52
+ - `simple_parsing.ArgumentParser`: Allows defining arguments from typed
53
+ dataclasses.
54
+ - `argparse_flags.ArgumentParser`: Adds support for `absl` flags.
55
+
56
+ The Method Resolution Order (MRO) is:
57
+ `ArgumentParser` -> `argparse_flags.ArgumentParser` ->
58
+ `simple_parsing.ArgumentParser` -> `argparse.ArgumentParser` -> `object`.
59
+
60
+ This order is important. `argparse_flags.ArgumentParser` is first so that it
61
+ can intercept arguments and handle `absl` flags before they are passed to
62
+ `simple_parsing.ArgumentParser`.
63
+ """
64
+
65
+ def parse_known_args(
66
+ self,
67
+ args: Sequence[str] | None = None,
68
+ namespace: argparse.Namespace | None = None,
69
+ attempt_to_reorder: bool = False,
70
+ ):
71
+ # `argparse_flags.ArgumentParser` does not support `attempt_to_reorder` that
72
+ # is used by `simple_parsing.ArgumentParser`. Since we don't need it, we can
73
+ # just ignore it.
74
+ del attempt_to_reorder
75
+ if args:
76
+ args = flag_utils.normalize_flags(args)
77
+ return super().parse_known_args(args, namespace)
31
78
 
32
79
 
33
80
  @dataclasses.dataclass
@@ -119,13 +166,7 @@ def add_path_argument_group(parser: argparse.ArgumentParser):
119
166
  path_group.add_argument(
120
167
  '--data_dir',
121
168
  type=epath.Path,
122
- # Should match tfds.core.constant.DATA_DIR !!
123
- default=epath.Path(
124
- os.environ.get(
125
- 'TFDS_DATA_DIR',
126
- os.path.join(os.path.expanduser('~'), 'tensorflow_datasets'),
127
- )
128
- ),
169
+ default=epath.Path(constants.DATA_DIR),
129
170
  help=(
130
171
  'Where to place datasets. Default to '
131
172
  '`~/tensorflow_datasets/` or `TFDS_DATA_DIR` environement variable.'
@@ -15,10 +15,11 @@
15
15
 
16
16
  """CLI Fixtures."""
17
17
 
18
- import argparse
19
18
  from unittest import mock
20
19
 
21
20
  import pytest
21
+ import simple_parsing
22
+ from tensorflow_datasets.scripts.cli import cli_utils
22
23
 
23
24
 
24
25
  @pytest.fixture(scope='session', autouse=True)
@@ -29,7 +30,7 @@ def _mock_argparse_flags():
29
30
  # another test):
30
31
  # `flags.DEFINE_string('data_dir')` with `parser.add_argument('--data_dir')`
31
32
  # We patch argparse_flags during test, so absl flags are ignored.
32
- with mock.patch(
33
- 'absl.flags.argparse_flags.ArgumentParser', argparse.ArgumentParser
33
+ with mock.patch.object(
34
+ cli_utils, 'ArgumentParser', simple_parsing.ArgumentParser
34
35
  ):
35
36
  yield
@@ -29,24 +29,21 @@ from absl import app
29
29
  from absl import flags
30
30
  from absl import logging
31
31
 
32
- import simple_parsing
33
32
  import tensorflow_datasets.public_api as tfds
34
33
 
35
34
  # Import commands
36
35
  from tensorflow_datasets.scripts.cli import build
36
+ from tensorflow_datasets.scripts.cli import cli_utils
37
37
  from tensorflow_datasets.scripts.cli import convert_format
38
38
  from tensorflow_datasets.scripts.cli import croissant
39
39
  from tensorflow_datasets.scripts.cli import new
40
- from tensorflow_datasets.scripts.utils import flag_utils
41
40
 
42
41
  FLAGS = flags.FLAGS
43
42
 
44
43
 
45
44
  def _parse_flags(argv: List[str]) -> argparse.Namespace:
46
45
  """Command lines flag parsing."""
47
- argv = flag_utils.normalize_flags(argv) # See b/174043007 for context.
48
-
49
- parser = simple_parsing.ArgumentParser(
46
+ parser = cli_utils.ArgumentParser(
50
47
  description='Tensorflow Datasets CLI tool',
51
48
  allow_abbrev=False,
52
49
  )
@@ -67,22 +64,7 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
67
64
  new.register_subparser(subparser)
68
65
  convert_format.register_subparser(subparser)
69
66
  croissant.register_subparser(subparser)
70
-
71
- namespace, remaining_argv = parser.parse_known_args(argv[1:])
72
-
73
- # Manually parse absl flags from the remaining arguments.
74
- try:
75
- # FLAGS requires the program name as the first argument.
76
- positionals = FLAGS(argv[:1] + remaining_argv)
77
- except flags.Error as e:
78
- parser.error(str(e))
79
-
80
- # There should be no positional arguments left, as they should have been
81
- # handled by the sub-commands.
82
- if len(positionals) > 1:
83
- parser.error(f"unrecognized arguments: {' '.join(positionals[1:])}")
84
-
85
- return namespace
67
+ return parser.parse_args(argv[1:])
86
68
 
87
69
 
88
70
  def main(args: argparse.Namespace) -> None:
@@ -15,11 +15,11 @@
15
15
 
16
16
  """Utility for handling flags."""
17
17
 
18
+ from collections.abc import Sequence
18
19
  import re
19
- from typing import List
20
20
 
21
21
 
22
- def normalize_flags(argv: List[str]) -> List[str]:
22
+ def normalize_flags(argv: Sequence[str]) -> list[str]:
23
23
  """Returns normalized explicit bolean flags for `absl.flags` compatibility.
24
24
 
25
25
  Note: Boolean flags in `absl.flags` can be specified with --bool, --nobool,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202508080045
3
+ Version: 4.9.9.dev202508100046
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -1,6 +1,6 @@
1
1
  tensorflow_datasets/__init__.py,sha256=uP6rUgdo-WeHzrZEHAsCREyxz9lO2IPCIVerl6Ea_JQ,3622
2
2
  tensorflow_datasets/community-datasets.toml,sha256=xHUi5yCwLSAbn1nMwImRviZ03F1ZmdR2cyLJ2LDqhIU,629
3
- tensorflow_datasets/conftest.py,sha256=FMsFSgifx-tVWv-smAejkKckdQAvnYl9nG9wmmi5G40,4831
3
+ tensorflow_datasets/conftest.py,sha256=95ocbFU_SvVgWHI7NVkH1wXDrb-OGZrd_ihiVVkpk9g,5269
4
4
  tensorflow_datasets/import_public_api_test.py,sha256=NPjteyVJL2ZDPEznMWBpP6hHbCaWiaZPxOcgnyr-HqY,852
5
5
  tensorflow_datasets/import_test.py,sha256=6rtuyyIwF9QNPyu6c-s5t-aA0fKPy2R9ondIHJVOkhY,801
6
6
  tensorflow_datasets/import_without_tf_test.py,sha256=znenZUnnltG7Jh2-PhhMEl_APadgiz1qPXi5P3Z85xo,3459
@@ -59,7 +59,7 @@ tensorflow_datasets/core/as_dataframe.py,sha256=3-2ScAo2G6wwYWbz_w3Crb4QyBwcuIYh
59
59
  tensorflow_datasets/core/as_dataframe_test.py,sha256=cGgk3f9j87dDRA2EXedlYb11NpOLdew0dA_O0ZG-PLQ,2048
60
60
  tensorflow_datasets/core/beam_utils.py,sha256=0X2lE9ILqLWZe5Idg58-G5XtgITXEAxqVodDtCDo9Ro,5109
61
61
  tensorflow_datasets/core/beam_utils_test.py,sha256=5ZhntgG658uT5pF4kw_U5Epm8lu0tdg4cI-0viMItzg,2852
62
- tensorflow_datasets/core/constants.py,sha256=eAzxhRpS1vx5LMedCmes2nfSU7BL831yi1J8JZkhtXY,2769
62
+ tensorflow_datasets/core/constants.py,sha256=5mWPx8lo4VFd-_HS07JquFc_4ajWvLkpbpU1_2izA0k,2806
63
63
  tensorflow_datasets/core/dataset_builder.py,sha256=GMPEtJ3vcELXg5IvqgYHLjTxdomMzLNZV7rW8YBJg2E,80616
64
64
  tensorflow_datasets/core/dataset_builder_beam_test.py,sha256=d7UsYNsAIY4FcANAERLcVMDcajIpAi0uMfrnQoe4yv4,8386
65
65
  tensorflow_datasets/core/dataset_builder_notfdv_test.py,sha256=eIWlOZijQfopdze85EkbcPY1I8lFmEBnedcoUoOAnRQ,1346
@@ -253,7 +253,7 @@ tensorflow_datasets/core/utils/dtype_utils_test.py,sha256=-Qe2fQzDO5sjS36ZL-dY9w
253
253
  tensorflow_datasets/core/utils/error_utils.py,sha256=lnquUa_VGRjn7-G_5x-PvWGgnnO6GAWsi9I7xeVuGxQ,3204
254
254
  tensorflow_datasets/core/utils/error_utils_test.py,sha256=Brt8X12ZlWCR4x3WLnSvq4X76eyU1yH3t5STPfAkxUs,2060
255
255
  tensorflow_datasets/core/utils/file_utils.py,sha256=vL-ulAVClrvkA71DvEvdGR2EdNmOR9Y9hNKUDwJ3Ll4,18534
256
- tensorflow_datasets/core/utils/file_utils_test.py,sha256=SCw_XFRhyxGCFEVjt9pOdupsoULPdi8iT38JBrnUuDM,13708
256
+ tensorflow_datasets/core/utils/file_utils_test.py,sha256=N-_jZlE73h2OqDxFmQJQ0cdb5a3FBilNjeP0iU3JMwQ,13438
257
257
  tensorflow_datasets/core/utils/gcs_utils.py,sha256=8mBOgEepkah1Rw36F6DNIVhLzfXbR8iS8KMLQUM5sPk,5154
258
258
  tensorflow_datasets/core/utils/gcs_utils_test.py,sha256=Ig8S37AvFG2g7kNjYxqgmqNKlLPeXt31XD7RY4UzsDg,2578
259
259
  tensorflow_datasets/core/utils/huggingface_utils.py,sha256=NeYaUoO3vIFH8M0hZ8k4w7AchFZJIGsuV1XwKJVttfw,5325
@@ -1980,16 +1980,16 @@ tensorflow_datasets/scripts/cleanup/url_filename_recorder.py,sha256=iLcsT8UgbyNU
1980
1980
  tensorflow_datasets/scripts/cleanup/url_status_checker.py,sha256=Tr3LtLnGhI8ElDAS-ejmuAU3rs1lmqmYlU4figoVQg0,1967
1981
1981
  tensorflow_datasets/scripts/cli/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
1982
1982
  tensorflow_datasets/scripts/cli/build.py,sha256=jZp7CaP62D2Usi4l-o9oCUqTHhnigX15PNUr9pOd4Wo,14961
1983
- tensorflow_datasets/scripts/cli/build_test.py,sha256=UoDsSPDMdMhhx7eoZZfCP6beXgxUibiyUUDbWNudV_A,10822
1983
+ tensorflow_datasets/scripts/cli/build_test.py,sha256=xlFYScPSMcsUR27GQ-W5wdGdLdkXu_n0hM1rl20WWW8,10542
1984
1984
  tensorflow_datasets/scripts/cli/builder_templates.py,sha256=99SvH3skigkc2Qg737BV2OzhXL_Rgu4az8eVHsxKCLk,7985
1985
1985
  tensorflow_datasets/scripts/cli/builder_templates_test.py,sha256=HBNB-v2zlImKULPI8Webs9hXCkeFmWT29urxav-tDe8,2062
1986
- tensorflow_datasets/scripts/cli/cli_utils.py,sha256=rMYMcQj1w46OTOeMyp3qf4y9v7ArOGh6u5NaCjBXal8,12313
1987
- tensorflow_datasets/scripts/cli/conftest.py,sha256=cmvCCV-efT5ZXYPkCSGS1OxoKNPAfSsLcFTfYfe61S0,1233
1986
+ tensorflow_datasets/scripts/cli/cli_utils.py,sha256=zE-jLQw0dn_98PHOTLX6pMoFqjSCBOD7lh5dytJcphE,14049
1987
+ tensorflow_datasets/scripts/cli/conftest.py,sha256=3PNh_BbR013G4HyLAZOleUXsQ9mICrD03NaKwdHFMXs,1291
1988
1988
  tensorflow_datasets/scripts/cli/convert_format.py,sha256=02RDZQQCuXf_XFFpx0gmRVkYyJg534kY0fZwGKxtUL4,4197
1989
1989
  tensorflow_datasets/scripts/cli/convert_format_utils.py,sha256=U_q5WVgMNrjBkOc166U4Y_eca5KOS3Xb3jSDjp4XdK4,29078
1990
1990
  tensorflow_datasets/scripts/cli/convert_format_utils_test.py,sha256=9JGNu9TvUWzbuhe6DWwnO3V9Lia5S1Is64re-pceAWE,8823
1991
1991
  tensorflow_datasets/scripts/cli/croissant.py,sha256=6jzmOXt_i7aeJHUVX7_zpRRMEXId_PzU24zUDdExRUs,6112
1992
- tensorflow_datasets/scripts/cli/main.py,sha256=FJJwyUtM1N9gNDsxGm850m5ejzzJ9mgESNW9Xz8E9_I,4383
1992
+ tensorflow_datasets/scripts/cli/main.py,sha256=qUKxjULaL2ilkPqh6vcsbtnKRJJhZCBn--ZkveFO2mA,3789
1993
1993
  tensorflow_datasets/scripts/cli/main_test.py,sha256=3zNaS_2FmxxLoZOX05iJ2riuP4Qv8cx6bhAI56tV8YI,1067
1994
1994
  tensorflow_datasets/scripts/cli/new.py,sha256=x_GQSEVva1XuMvFwL3rANjDxviwZviXKHCICY7P30Jc,7803
1995
1995
  tensorflow_datasets/scripts/cli/new_test.py,sha256=USr9So-FPtg8UzaQPPacXn0E1ukDIoew9oYkOn45oik,2655
@@ -2022,7 +2022,7 @@ tensorflow_datasets/scripts/documentation/templates/community_namespace_toc.md,s
2022
2022
  tensorflow_datasets/scripts/tools/__init__.py,sha256=rG-M1W3iDDSWXfKasKS3UhqroWicWvULibHch3yIuxc,623
2023
2023
  tensorflow_datasets/scripts/tools/compute_split_info.py,sha256=BmLQdoBVaODYABwCmUK4i6LW6m72E6N8T7Vu8QEsgmg,2791
2024
2024
  tensorflow_datasets/scripts/utils/__init__.py,sha256=rG-M1W3iDDSWXfKasKS3UhqroWicWvULibHch3yIuxc,623
2025
- tensorflow_datasets/scripts/utils/flag_utils.py,sha256=X8H8j33jgKMZsNZYvR_P1EVMMAhQuOVkcsph2vxex-Q,1544
2025
+ tensorflow_datasets/scripts/utils/flag_utils.py,sha256=HJBbcfYJGcChSEUxFcfgIqYoTAq32K12hBeXGKn9qJU,1561
2026
2026
  tensorflow_datasets/scripts/utils/flag_utils_test.py,sha256=wHCZ7n5ppzQPHlDMMu6MI6zblCmRSXx3HAAz6K63oqw,1259
2027
2027
  tensorflow_datasets/structured/__init__.py,sha256=FaJdqKPZ6Ca4R1WybpoWLP6BL-frtAVA7Le5nU5bY0g,2468
2028
2028
  tensorflow_datasets/structured/amazon_us_reviews.py,sha256=ZZS7Qn3huPzcu5DVhd1aDpUTYFAB3jvIKQoQt9I-7iM,915
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2468
2468
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2469
2469
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2470
2470
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2471
- tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
- tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
- tfds_nightly-4.9.9.dev202508080045.dist-info/METADATA,sha256=Za3dYGEQJMn0dTM0uUOEd_2jik8ANJg6Lmtp94-hKQ0,11694
2474
- tfds_nightly-4.9.9.dev202508080045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
- tfds_nightly-4.9.9.dev202508080045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
- tfds_nightly-4.9.9.dev202508080045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
- tfds_nightly-4.9.9.dev202508080045.dist-info/RECORD,,
2471
+ tfds_nightly-4.9.9.dev202508100046.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
+ tfds_nightly-4.9.9.dev202508100046.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
+ tfds_nightly-4.9.9.dev202508100046.dist-info/METADATA,sha256=--q_kSTpLi-zqwrRk3t5zotcnGbAYYuZsFNTT8DUO0c,11694
2474
+ tfds_nightly-4.9.9.dev202508100046.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
+ tfds_nightly-4.9.9.dev202508100046.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
+ tfds_nightly-4.9.9.dev202508100046.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
+ tfds_nightly-4.9.9.dev202508100046.dist-info/RECORD,,