tfds-nightly 4.9.9.dev202508110045__py3-none-any.whl → 4.9.9.dev202508130045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,8 +15,8 @@
15
15
 
16
16
  """`tfds build` command."""
17
17
 
18
- import argparse
19
18
  from collections.abc import Iterator
19
+ import dataclasses
20
20
  import functools
21
21
  import importlib
22
22
  import itertools
@@ -26,112 +26,100 @@ import os
26
26
  from typing import Any, Type
27
27
 
28
28
  from absl import logging
29
+ import simple_parsing
29
30
  import tensorflow_datasets as tfds
30
31
  from tensorflow_datasets.scripts.cli import cli_utils
31
32
 
32
- # pylint: disable=logging-fstring-interpolation
33
33
 
34
-
35
- def register_subparser(parsers: argparse._SubParsersAction) -> None: # pylint: disable=protected-access
36
- """Add subparser for `build` command.
37
-
38
- New flags should be added to `cli_utils` module.
39
-
40
- Args:
41
- parsers: The subparsers object to add the parser to.
34
+ @dataclasses.dataclass(frozen=True, kw_only=True)
35
+ class Args(cli_utils.Args):
36
+ """Commands for downloading and preparing datasets.
37
+
38
+ Attributes:
39
+ positional_datasets: Name(s) of the dataset(s) to build. Default to current
40
+ dir. See https://www.tensorflow.org/datasets/cli for accepted values.
41
+ datasets: Datasets can also be provided as keyword argument.
42
+ debug: Debug & tests options. Use --pdb to enter post-mortem debugging mode
43
+ if an exception is raised.
44
+ paths: Path options.
45
+ generation: Generation options.
46
+ publishing: Publishing options.
47
+ automation: Automation options.
42
48
  """
43
- build_parser = parsers.add_parser(
44
- 'build', help='Commands for downloading and preparing datasets.'
45
- )
46
- build_parser.add_argument(
47
- 'datasets', # Positional arguments
48
- type=str,
49
+
50
+ positional_datasets: list[str] = simple_parsing.field(
51
+ positional=True,
49
52
  nargs='*',
50
- help=(
51
- 'Name(s) of the dataset(s) to build. Default to current dir. '
52
- 'See https://www.tensorflow.org/datasets/cli for accepted values.'
53
- ),
54
- )
55
- build_parser.add_argument( # Also accept keyword arguments
56
- '--datasets',
57
- type=str,
58
- nargs='+',
59
- dest='datasets_keyword',
60
- help='Datasets can also be provided as keyword argument.',
53
+ default_factory=list,
54
+ # Need to explicitly set metavar for command-line help.
55
+ metavar='datasets',
61
56
  )
57
+ datasets: list[str] = simple_parsing.field(nargs='*', default_factory=list)
62
58
 
63
- cli_utils.add_debug_argument_group(build_parser)
64
- cli_utils.add_path_argument_group(build_parser)
65
- cli_utils.add_generation_argument_group(build_parser)
66
- cli_utils.add_publish_argument_group(build_parser)
67
-
68
- # **** Automation options ****
69
- automation_group = build_parser.add_argument_group(
70
- 'Automation', description='Used by automated scripts.'
59
+ debug: cli_utils.DebugOptions = cli_utils.DebugOptions()
60
+ paths: cli_utils.PathOptions = simple_parsing.field(
61
+ default_factory=cli_utils.PathOptions
71
62
  )
72
- automation_group.add_argument(
73
- '--exclude_datasets',
74
- type=str,
75
- help=(
76
- 'If set, generate all datasets except the one defined here. '
77
- 'Comma separated list of datasets to exclude. '
78
- ),
63
+ generation: cli_utils.GenerationOptions = simple_parsing.field(
64
+ default_factory=cli_utils.GenerationOptions
79
65
  )
80
- automation_group.add_argument(
81
- '--experimental_latest_version',
82
- action='store_true',
83
- help=(
84
- 'Build the latest Version(experiments=...) available rather than '
85
- 'default version.'
86
- ),
66
+ publishing: cli_utils.PublishingOptions = simple_parsing.field(
67
+ default_factory=cli_utils.PublishingOptions
68
+ )
69
+ automation: cli_utils.AutomationOptions = simple_parsing.field(
70
+ default_factory=cli_utils.AutomationOptions
87
71
  )
88
72
 
89
- build_parser.set_defaults(subparser_fn=_build_datasets)
90
-
91
-
92
- def _build_datasets(args: argparse.Namespace) -> None:
93
- """Build the given datasets."""
94
- # Eventually register additional datasets imports
95
- if args.imports:
96
- list(importlib.import_module(m) for m in args.imports.split(','))
73
+ def execute(self) -> None:
74
+ """Build the given datasets."""
75
+ # Eventually register additional datasets imports
76
+ if self.generation.imports:
77
+ list(
78
+ importlib.import_module(m) for m in self.generation.imports.split(',')
79
+ )
97
80
 
98
- # Select datasets to generate
99
- datasets = (args.datasets or []) + (args.datasets_keyword or [])
100
- if args.exclude_datasets: # Generate all datasets if `--exclude_datasets` set
101
- if datasets:
102
- raise ValueError("--exclude_datasets can't be used with `datasets`")
103
- datasets = set(tfds.list_builders(with_community_datasets=False)) - set(
104
- args.exclude_datasets.split(',')
81
+ # Select datasets to generate
82
+ datasets = self.positional_datasets + self.datasets
83
+ if (
84
+ self.automation.exclude_datasets
85
+ ): # Generate all datasets if `--exclude_datasets` set
86
+ if datasets:
87
+ raise ValueError("--exclude_datasets can't be used with `datasets`")
88
+ datasets = set(tfds.list_builders(with_community_datasets=False)) - set(
89
+ self.automation.exclude_datasets.split(',')
90
+ )
91
+ datasets = sorted(datasets) # `set` is not deterministic
92
+ else:
93
+ datasets = datasets or [''] # Empty string for default
94
+
95
+ # Import builder classes
96
+ builders_cls_and_kwargs = [
97
+ _get_builder_cls_and_kwargs(
98
+ dataset, has_imports=bool(self.generation.imports)
99
+ )
100
+ for dataset in datasets
101
+ ]
102
+
103
+ # Parallelize datasets generation.
104
+ builders = itertools.chain(*(
105
+ _make_builders(self, builder_cls, builder_kwargs)
106
+ for (builder_cls, builder_kwargs) in builders_cls_and_kwargs
107
+ ))
108
+ process_builder_fn = functools.partial(
109
+ _download if self.generation.download_only else _download_and_prepare,
110
+ self,
105
111
  )
106
- datasets = sorted(datasets) # `set` is not deterministic
107
- else:
108
- datasets = datasets or [''] # Empty string for default
109
-
110
- # Import builder classes
111
- builders_cls_and_kwargs = [
112
- _get_builder_cls_and_kwargs(dataset, has_imports=bool(args.imports))
113
- for dataset in datasets
114
- ]
115
-
116
- # Parallelize datasets generation.
117
- builders = itertools.chain(*(
118
- _make_builders(args, builder_cls, builder_kwargs)
119
- for (builder_cls, builder_kwargs) in builders_cls_and_kwargs
120
- ))
121
- process_builder_fn = functools.partial(
122
- _download if args.download_only else _download_and_prepare, args
123
- )
124
112
 
125
- if args.num_processes == 1:
126
- for builder in builders:
127
- process_builder_fn(builder)
128
- else:
129
- with multiprocessing.Pool(args.num_processes) as pool:
130
- pool.map(process_builder_fn, builders)
113
+ if self.generation.num_processes == 1:
114
+ for builder in builders:
115
+ process_builder_fn(builder)
116
+ else:
117
+ with multiprocessing.Pool(self.generation.num_processes) as pool:
118
+ pool.map(process_builder_fn, builders)
131
119
 
132
120
 
133
121
  def _make_builders(
134
- args: argparse.Namespace,
122
+ args: Args,
135
123
  builder_cls: Type[tfds.core.DatasetBuilder],
136
124
  builder_kwargs: dict[str, Any],
137
125
  ) -> Iterator[tfds.core.DatasetBuilder]:
@@ -146,7 +134,7 @@ def _make_builders(
146
134
  Initialized dataset builders.
147
135
  """
148
136
  # Eventually overwrite version
149
- if args.experimental_latest_version:
137
+ if args.automation.experimental_latest_version:
150
138
  if 'version' in builder_kwargs:
151
139
  raise ValueError(
152
140
  "Can't have both `--experimental_latest` and version set (`:1.0.0`)"
@@ -157,19 +145,19 @@ def _make_builders(
157
145
  builder_kwargs['config'] = _get_config_name(
158
146
  builder_cls=builder_cls,
159
147
  config_kwarg=builder_kwargs.get('config'),
160
- config_name=args.config,
161
- config_idx=args.config_idx,
148
+ config_name=args.generation.config,
149
+ config_idx=args.generation.config_idx,
162
150
  )
163
151
 
164
- if args.file_format:
165
- builder_kwargs['file_format'] = args.file_format
152
+ if args.generation.file_format:
153
+ builder_kwargs['file_format'] = args.generation.file_format
166
154
 
167
155
  make_builder = functools.partial(
168
156
  _make_builder,
169
157
  builder_cls,
170
- overwrite=args.overwrite,
171
- fail_if_exists=args.fail_if_exists,
172
- data_dir=args.data_dir,
158
+ overwrite=args.debug.overwrite,
159
+ fail_if_exists=args.debug.fail_if_exists,
160
+ data_dir=args.paths.data_dir,
173
161
  **builder_kwargs,
174
162
  )
175
163
 
@@ -203,7 +191,7 @@ def _get_builder_cls_and_kwargs(
203
191
  if not has_imports:
204
192
  path = _search_script_path(ds_to_build)
205
193
  if path is not None:
206
- logging.info(f'Loading dataset {ds_to_build} from path: {path}')
194
+ logging.info('Loading dataset %s from path: %s', ds_to_build, path)
207
195
  # Dynamically load user dataset script
208
196
  # When possible, load from the parent's parent, so module is named
209
197
  # "foo.foo_dataset_builder".
@@ -228,7 +216,9 @@ def _get_builder_cls_and_kwargs(
228
216
  name, builder_kwargs = tfds.core.naming.parse_builder_name_kwargs(ds_to_build)
229
217
  builder_cls = tfds.builder_cls(str(name))
230
218
  logging.info(
231
- f'Loading dataset {ds_to_build} from imports: {builder_cls.__module__}'
219
+ 'Loading dataset %s from imports: %s',
220
+ ds_to_build,
221
+ builder_cls.__module__,
232
222
  )
233
223
  return builder_cls, builder_kwargs
234
224
 
@@ -308,7 +298,7 @@ def _make_builder(
308
298
 
309
299
 
310
300
  def _download(
311
- args: argparse.Namespace,
301
+ args: Args,
312
302
  builder: tfds.core.DatasetBuilder,
313
303
  ) -> None:
314
304
  """Downloads all files of the given builder."""
@@ -330,7 +320,7 @@ def _download(
330
320
  if builder.MAX_SIMULTANEOUS_DOWNLOADS is not None:
331
321
  max_simultaneous_downloads = builder.MAX_SIMULTANEOUS_DOWNLOADS
332
322
 
333
- download_dir = args.download_dir or os.path.join(
323
+ download_dir = args.paths.download_dir or os.path.join(
334
324
  builder._data_dir_root, 'downloads' # pylint: disable=protected-access
335
325
  )
336
326
  dl_manager = tfds.download.DownloadManager(
@@ -352,39 +342,39 @@ def _download(
352
342
 
353
343
 
354
344
  def _download_and_prepare(
355
- args: argparse.Namespace,
345
+ args: Args,
356
346
  builder: tfds.core.DatasetBuilder,
357
347
  ) -> None:
358
348
  """Generate a single builder."""
359
349
  cli_utils.download_and_prepare(
360
350
  builder=builder,
361
351
  download_config=_make_download_config(args, dataset_name=builder.name),
362
- download_dir=args.download_dir,
363
- publish_dir=args.publish_dir,
364
- skip_if_published=args.skip_if_published,
365
- overwrite=args.overwrite,
366
- beam_pipeline_options=args.beam_pipeline_options,
367
- nondeterministic_order=args.nondeterministic_order,
352
+ download_dir=args.paths.download_dir,
353
+ publish_dir=args.publishing.publish_dir,
354
+ skip_if_published=args.publishing.skip_if_published,
355
+ overwrite=args.debug.overwrite,
356
+ beam_pipeline_options=args.generation.beam_pipeline_options,
357
+ nondeterministic_order=args.generation.nondeterministic_order,
368
358
  )
369
359
 
370
360
 
371
361
  def _make_download_config(
372
- args: argparse.Namespace,
362
+ args: Args,
373
363
  dataset_name: str,
374
364
  ) -> tfds.download.DownloadConfig:
375
365
  """Generate the download and prepare configuration."""
376
366
  # Load the download config
377
- manual_dir = args.manual_dir
378
- if args.add_name_to_manual_dir:
367
+ manual_dir = args.paths.manual_dir
368
+ if args.paths.add_name_to_manual_dir:
379
369
  manual_dir = manual_dir / dataset_name
380
370
 
381
371
  kwargs = {}
382
- if args.max_shard_size_mb:
383
- kwargs['max_shard_size'] = args.max_shard_size_mb << 20
384
- if args.num_shards:
385
- kwargs['num_shards'] = args.num_shards
386
- if args.download_config:
387
- kwargs.update(json.loads(args.download_config))
372
+ if args.generation.max_shard_size_mb:
373
+ kwargs['max_shard_size'] = args.generation.max_shard_size_mb << 20
374
+ if args.generation.num_shards:
375
+ kwargs['num_shards'] = args.generation.num_shards
376
+ if args.generation.download_config:
377
+ kwargs.update(json.loads(args.generation.download_config))
388
378
 
389
379
  if 'download_mode' in kwargs:
390
380
  kwargs['download_mode'] = tfds.download.GenerateMode(
@@ -392,15 +382,15 @@ def _make_download_config(
392
382
  )
393
383
  else:
394
384
  kwargs['download_mode'] = tfds.download.GenerateMode.REUSE_DATASET_IF_EXISTS
395
- if args.update_metadata_only:
385
+ if args.generation.update_metadata_only:
396
386
  kwargs['download_mode'] = tfds.download.GenerateMode.UPDATE_DATASET_INFO
397
387
 
398
388
  return tfds.download.DownloadConfig(
399
- extract_dir=args.extract_dir,
389
+ extract_dir=args.paths.extract_dir,
400
390
  manual_dir=manual_dir,
401
- max_examples_per_split=args.max_examples_per_split,
402
- register_checksums=args.register_checksums,
403
- force_checksums_validation=args.force_checksums_validation,
391
+ max_examples_per_split=args.debug.max_examples_per_split,
392
+ register_checksums=args.generation.register_checksums,
393
+ force_checksums_validation=args.generation.force_checksums_validation,
404
394
  **kwargs,
405
395
  )
406
396
 
@@ -445,11 +435,10 @@ def _get_config_name(
445
435
  else:
446
436
  return config_name
447
437
  elif config_idx is not None: # `--config_idx 123`
448
- if config_idx > len(builder_cls.BUILDER_CONFIGS):
438
+ if config_idx >= len(builder_cls.BUILDER_CONFIGS):
449
439
  raise ValueError(
450
- f'--config_idx {config_idx} greater than number '
451
- f'of configs {len(builder_cls.BUILDER_CONFIGS)} for '
452
- f'{builder_cls.name}.'
440
+ f'--config_idx {config_idx} greater than number of configs '
441
+ f'{len(builder_cls.BUILDER_CONFIGS)} for {builder_cls.name}.'
453
442
  )
454
443
  else:
455
444
  # Use `config.name` to avoid
@@ -19,6 +19,7 @@ import dataclasses
19
19
  import functools
20
20
  import multiprocessing
21
21
  import os
22
+ import typing
22
23
  from unittest import mock
23
24
 
24
25
  from etils import epath
@@ -311,7 +312,8 @@ def test_download_only(build):
311
312
  )
312
313
  def test_make_download_config(args: str, download_config_kwargs):
313
314
  args = main._parse_flags(f'tfds build x {args}'.split())
314
- actual = build_lib._make_download_config(args, dataset_name='x')
315
+ cmd_args = typing.cast(build_lib.Args, args.command)
316
+ actual = build_lib._make_download_config(cmd_args, dataset_name='x')
315
317
  # Ignore the beam runner
316
318
  actual = actual.replace(beam_runner=None)
317
319
  expected = tfds.download.DownloadConfig(**download_config_kwargs)
@@ -15,11 +15,13 @@
15
15
 
16
16
  """Utility functions for TFDS CLI."""
17
17
 
18
+ import abc
18
19
  import argparse
19
- from collections.abc import Sequence
20
+ from collections.abc import Callable, Sequence
20
21
  import dataclasses
21
22
  import itertools
22
23
  import pathlib
24
+ from typing import TypeVar
23
25
 
24
26
  from absl import logging
25
27
  from absl.flags import argparse_flags
@@ -33,6 +35,8 @@ from tensorflow_datasets.core import naming
33
35
  from tensorflow_datasets.core.utils import file_utils
34
36
  from tensorflow_datasets.scripts.utils import flag_utils
35
37
 
38
+ _DataclassT = TypeVar('_DataclassT')
39
+
36
40
 
37
41
  class ArgumentParser(
38
42
  argparse_flags.ArgumentParser, simple_parsing.ArgumentParser
@@ -77,6 +81,33 @@ class ArgumentParser(
77
81
  return super().parse_known_args(args, namespace)
78
82
 
79
83
 
84
+ def make_flags_parser(
85
+ args_dataclass: type[_DataclassT], description: str
86
+ ) -> Callable[[list[str]], _DataclassT]:
87
+ """Returns a function that parses flags and returns the dataclass instance."""
88
+
89
+ def _parse_flags(argv: list[str]) -> _DataclassT:
90
+ """Command lines flag parsing."""
91
+ parser = ArgumentParser(
92
+ description=description,
93
+ allow_abbrev=False,
94
+ )
95
+ parser.add_arguments(args_dataclass, dest='args')
96
+ return parser.parse_args(argv[1:]).args
97
+
98
+ return _parse_flags
99
+
100
+
101
+ @dataclasses.dataclass(frozen=True, kw_only=True)
102
+ class Args(abc.ABC):
103
+ """CLI arguments for TFDS CLI commands."""
104
+
105
+ @abc.abstractmethod
106
+ def execute(self) -> None:
107
+ """Execute the CLI command."""
108
+ ...
109
+
110
+
80
111
  @dataclasses.dataclass
81
112
  class DatasetInfo:
82
113
  """Structure for common string used for formatting.
@@ -127,232 +158,137 @@ class DatasetInfo:
127
158
  self.ds_import = ds_import
128
159
 
129
160
 
130
- def add_debug_argument_group(parser: argparse.ArgumentParser):
131
- """Adds debug argument group to the parser."""
132
- debug_group = parser.add_argument_group(
133
- 'Debug & tests',
134
- description=(
135
- '--pdb Enter post-mortem debugging mode if an exception is raised.'
136
- ),
137
- )
138
- debug_group.add_argument(
139
- '--overwrite',
140
- action='store_true',
141
- help='Delete pre-existing dataset if it exists.',
142
- )
143
- debug_group.add_argument(
144
- '--fail_if_exists',
145
- action='store_true',
146
- default=False,
147
- help='Fails the program if there is a pre-existing dataset.',
148
- )
149
- debug_group.add_argument(
150
- '--max_examples_per_split',
151
- type=int,
152
- nargs='?',
153
- const=1,
154
- help=(
155
- 'When set, only generate the first X examples (default to 1), rather'
156
- ' than the full dataset.If set to 0, only execute the'
157
- ' `_split_generators` (which download the original data), but skip'
158
- ' `_generator_examples`'
159
- ),
160
- )
161
+ @dataclasses.dataclass(frozen=True, kw_only=True)
162
+ class DebugOptions:
163
+ """Debug & tests options.
161
164
 
165
+ Attributes:
166
+ overwrite: If True, delete pre-existing dataset if it exists.
167
+ fail_if_exists: If True, fails the program if there is a pre-existing
168
+ dataset.
169
+ max_examples_per_split: When set, only generate the first X examples
170
+ (default to 1), rather than the full dataset. If set to 0, only execute
171
+ the `_split_generators` (which download the original data), but skip
172
+ `_generator_examples`.
173
+ """
162
174
 
163
- def add_path_argument_group(parser: argparse.ArgumentParser):
164
- """Adds path argument group to the parser."""
165
- path_group = parser.add_argument_group('Paths')
166
- path_group.add_argument(
167
- '--data_dir',
168
- type=epath.Path,
169
- default=epath.Path(constants.DATA_DIR),
170
- help=(
171
- 'Where to place datasets. Default to '
172
- '`~/tensorflow_datasets/` or `TFDS_DATA_DIR` environement variable.'
173
- ),
174
- )
175
- path_group.add_argument(
176
- '--download_dir',
177
- type=epath.Path,
178
- help='Where to place downloads. Default to `<data_dir>/downloads/`.',
179
- )
180
- path_group.add_argument(
181
- '--extract_dir',
182
- type=epath.Path,
183
- help='Where to extract files. Default to `<download_dir>/extracted/`.',
184
- )
185
- path_group.add_argument(
186
- '--manual_dir',
187
- type=epath.Path,
188
- help=(
189
- 'Where to manually download data (required for some datasets). '
190
- 'Default to `<download_dir>/manual/`.'
191
- ),
192
- )
193
- path_group.add_argument(
194
- '--add_name_to_manual_dir',
195
- action='store_true',
196
- help=(
197
- 'If true, append the dataset name to the `manual_dir` (e.g. '
198
- '`<download_dir>/manual/<dataset_name>/`. Useful to avoid collisions '
199
- 'if many datasets are generated.'
200
- ),
175
+ overwrite: bool = simple_parsing.flag(default=False)
176
+ fail_if_exists: bool = simple_parsing.flag(default=False)
177
+ max_examples_per_split: int | None = simple_parsing.field(
178
+ default=None, nargs='?', const=1
201
179
  )
202
180
 
203
181
 
204
- def add_generation_argument_group(parser: argparse.ArgumentParser):
205
- """Adds generation argument group to the parser."""
206
- generation_group = parser.add_argument_group('Generation')
207
- generation_group.add_argument(
208
- '--download_only',
209
- action='store_true',
210
- help=(
211
- 'If True, download all files but do not prepare the dataset. Uses the'
212
- ' checksum.tsv to find out what to download. Therefore, this does not'
213
- ' work in combination with --register_checksums.'
214
- ),
215
- )
216
- generation_group.add_argument(
217
- '--config',
218
- '-c',
219
- type=str,
220
- help=(
221
- 'Config name to build. Build all configs if not set. Can also be a'
222
- ' json of the kwargs forwarded to the config `__init__` (for custom'
223
- ' configs).'
224
- ),
225
- )
226
- # We are forced to have 2 flags to avoid ambiguity when config name is
227
- # a number (e.g. `voc/2017`)
228
- generation_group.add_argument(
229
- '--config_idx',
230
- type=int,
231
- help=(
232
- 'Config id to build (`builder_cls.BUILDER_CONFIGS[config_idx]`). '
233
- 'Mutually exclusive with `--config`.'
234
- ),
235
- )
236
- generation_group.add_argument(
237
- '--update_metadata_only',
238
- action='store_true',
239
- default=False,
240
- help=(
241
- 'If True, existing dataset_info.json is updated with metadata defined'
242
- ' in Builder class(es). Datasets must already have been prepared.'
243
- ),
244
- )
245
- generation_group.add_argument(
246
- '--download_config',
247
- type=str,
248
- help=(
249
- 'A json of the kwargs forwarded to the config `__init__` (for custom'
250
- ' DownloadConfigs).'
251
- ),
252
- )
253
- generation_group.add_argument(
254
- '--imports',
255
- '-i',
256
- type=str,
257
- help='Comma separated list of module to import to register datasets.',
258
- )
259
- generation_group.add_argument(
260
- '--register_checksums',
261
- action='store_true',
262
- help='If True, store size and checksum of downloaded files.',
263
- )
264
- generation_group.add_argument(
265
- '--force_checksums_validation',
266
- action='store_true',
267
- help='If True, raise an error if the checksums are not found.',
268
- )
269
- # For compatibility with absl.flags (which generates --foo and --nofoo).
270
- generation_group.add_argument(
271
- '--noforce_checksums_validation',
272
- dest='force_checksums_validation',
273
- action='store_false',
274
- help='If specified, bypass the checks on the checksums.',
275
- )
276
- generation_group.add_argument(
277
- '--beam_pipeline_options',
278
- type=str,
279
- # nargs='+',
280
- help=(
281
- 'A (comma-separated) list of flags to pass to `PipelineOptions` when'
282
- ' preparing with Apache Beam. (see:'
283
- ' https://www.tensorflow.org/datasets/beam_datasets). Example:'
284
- ' `--beam_pipeline_options=job_name=my-job,project=my-project`'
285
- ),
286
- )
287
- format_values = [f.value for f in file_adapters.FileFormat]
288
- generation_group.add_argument(
289
- '--file_format',
290
- type=str,
291
- help=(
292
- 'File format to which generate the tf-examples. '
293
- f'Available values: {format_values} (see `tfds.core.FileFormat`).'
294
- ),
295
- )
296
- generation_group.add_argument(
297
- '--max_shard_size_mb', type=int, help='The max shard size in megabytes.'
298
- )
299
- generation_group.add_argument(
300
- '--num_shards', type=int, help='The number of shards to write to.'
301
- )
302
- generation_group.add_argument(
303
- '--num-processes',
304
- type=int,
305
- default=1,
306
- help='Number of parallel build processes.',
307
- )
308
- generation_group.add_argument(
309
- '--nondeterministic_order',
310
- action='store_true',
311
- default=False,
312
- help=(
313
- 'If True, it will not assure deterministic ordering when writing'
314
- ' examples to disk. This might result in quicker dataset preparation.'
315
- ),
316
- )
317
- # For compatibility with absl.flags (which generates --foo and --nofoo).
318
- generation_group.add_argument(
319
- '--nonondeterministic_order',
320
- dest='nondeterministic_order',
321
- action='store_false',
322
- help=(
323
- 'If specified, it will assure deterministic ordering when writing'
324
- ' examples to disk.'
325
- ),
326
- )
182
+ @dataclasses.dataclass(frozen=True, kw_only=True)
183
+ class PathOptions:
184
+ """Path options.
327
185
 
186
+ Attributes:
187
+ data_dir: Where to place datasets. Default to `~/tensorflow_datasets/` or
188
+ `TFDS_DATA_DIR` environement variable.
189
+ download_dir: Where to place downloads. Default to `<data_dir>/downloads/`.
190
+ extract_dir: Where to extract files. Default to `<download_dir>/extracted/`.
191
+ manual_dir: Where to manually download data (required for some datasets).
192
+ Default to `<download_dir>/manual/`.
193
+ add_name_to_manual_dir: If true, append the dataset name to the `manual_dir`
194
+ (e.g. `<download_dir>/manual/<dataset_name>/`). Useful to avoid collisions
195
+ if many datasets are generated.
196
+ """
328
197
 
329
- def add_publish_argument_group(parser: argparse.ArgumentParser):
330
- """Adds publish argument group to the parser."""
331
- publish_group = parser.add_argument_group(
332
- 'Publishing',
333
- description='Options for publishing successfully created datasets.',
198
+ data_dir: epath.Path = simple_parsing.field(
199
+ default=epath.Path(constants.DATA_DIR)
334
200
  )
335
- publish_group.add_argument(
336
- '--publish_dir',
337
- type=epath.Path,
201
+ download_dir: epath.Path | None = None
202
+ extract_dir: epath.Path | None = None
203
+ manual_dir: epath.Path | None = None
204
+ add_name_to_manual_dir: bool = simple_parsing.flag(default=False)
205
+
206
+
207
+ @dataclasses.dataclass(frozen=True, kw_only=True)
208
+ class GenerationOptions:
209
+ """Generation options.
210
+
211
+ Attributes:
212
+ download_only: If True, download all files but do not prepare the dataset.
213
+ Uses the checksum.tsv to find out what to download. Therefore, this does
214
+ not work in combination with --register_checksums.
215
+ config: Config name to build. Build all configs if not set. Can also be a
216
+ json of the kwargs forwarded to the config `__init__` (for custom
217
+ configs).
218
+ config_idx: Config id to build (`builder_cls.BUILDER_CONFIGS[config_idx]`).
219
+ Mutually exclusive with `--config`. We are forced to have 2 flags to avoid
220
+ ambiguity when `config` is a number (e.g. `voc/2017`).
221
+ update_metadata_only: If True, existing dataset_info.json is updated with
222
+ metadata defined in Builder class(es). Datasets must already have been
223
+ prepared.
224
+ download_config: A json of the kwargs forwarded to the config `__init__`
225
+ (for custom DownloadConfigs).
226
+ imports: Comma separated list of module to import to register datasets.
227
+ register_checksums: If True, store size and checksum of downloaded files.
228
+ force_checksums_validation: If True, raise an error if the checksums are not
229
+ found. Otherwise, bypass the checks on the checksums
230
+ beam_pipeline_options: A (comma-separated) list of flags to pass to
231
+ `PipelineOptions` when preparing with Apache Beam. (see:
232
+ https://www.tensorflow.org/datasets/beam_datasets). Example:
233
+ `--beam_pipeline_options=job_name=my-job,project=my-project`
234
+ file_format: File format to which generate the tf-examples.
235
+ max_shard_size_mb: The max shard size in megabytes.
236
+ num_shards: The number of shards to write to.
237
+ num_processes: Number of parallel build processes.
238
+ nondeterministic_order: If True, it will not assure deterministic ordering
239
+ when writing examples to disk. This might result in quicker dataset
240
+ preparation. Otherwise, it will assure deterministic ordering when writing
241
+ examples to disk
242
+ """
243
+
244
+ download_only: bool = simple_parsing.flag(default=False)
245
+ config: str | None = simple_parsing.field(default=None, alias='-c')
246
+ config_idx: int | None = None
247
+ update_metadata_only: bool = simple_parsing.flag(default=False)
248
+ download_config: str | None = None
249
+ imports: str | None = simple_parsing.field(default=None, alias='-i')
250
+ register_checksums: bool = simple_parsing.flag(default=False)
251
+ force_checksums_validation: bool = simple_parsing.flag(default=False)
252
+ beam_pipeline_options: str | None = None
253
+ file_format: str | None = simple_parsing.choice(
254
+ *(file_format.value for file_format in file_adapters.FileFormat),
338
255
  default=None,
339
- required=False,
340
- help=(
341
- 'Where to optionally publish the dataset after it has been '
342
- 'generated successfully. Should be the root data dir under which'
343
- 'datasets are stored. '
344
- 'If unspecified, dataset will not be published'
345
- ),
346
- )
347
- publish_group.add_argument(
348
- '--skip_if_published',
349
- action='store_true',
350
- default=False,
351
- help=(
352
- 'If the dataset with the same version and config is already '
353
- 'published, then it will not be regenerated.'
354
- ),
355
256
  )
257
+ max_shard_size_mb: int | None = None
258
+ num_shards: int | None = None
259
+ num_processes: int = simple_parsing.field(default=1, alias='num-processes')
260
+ nondeterministic_order: bool = simple_parsing.flag(default=False)
261
+
262
+
263
+ @dataclasses.dataclass(frozen=True, kw_only=True)
264
+ class PublishingOptions:
265
+ """Publishing options.
266
+
267
+ Attributes:
268
+ publish_dir: Where to optionally publish the dataset after it has been
269
+ generated successfully. Should be the root data dir under which datasets
270
+ are stored. If unspecified, dataset will not be published.
271
+ skip_if_published: If the dataset with the same version and config is
272
+ already published, then it will not be regenerated.
273
+ """
274
+
275
+ publish_dir: epath.Path | None = None
276
+ skip_if_published: bool = simple_parsing.flag(default=False)
277
+
278
+
279
+ @dataclasses.dataclass(frozen=True, kw_only=True)
280
+ class AutomationOptions:
281
+ """Automation options.
282
+
283
+ Attributes:
284
+ exclude_datasets: If set, generate all datasets except the one defined here.
285
+ Comma separated list of datasets to exclude.
286
+ experimental_latest_version: Build the latest Version(experiments=...)
287
+ available rather than default version.
288
+ """
289
+
290
+ exclude_datasets: str | None = None
291
+ experimental_latest_version: bool = simple_parsing.flag(default=False)
356
292
 
357
293
 
358
294
  def download_and_prepare(
@@ -25,19 +25,18 @@ tfds convert_format \
25
25
  ```
26
26
  """
27
27
 
28
- import argparse
29
28
  import dataclasses
30
- import typing
31
29
 
32
30
  from etils import epath
33
31
  import simple_parsing
34
32
  from tensorflow_datasets.core import file_adapters
33
+ from tensorflow_datasets.scripts.cli import cli_utils
35
34
  from tensorflow_datasets.scripts.cli import convert_format_utils
36
35
 
37
36
 
38
37
  @dataclasses.dataclass(frozen=True, kw_only=True)
39
- class Args:
40
- """CLI arguments for converting datasets from one file format to another.
38
+ class Args(cli_utils.Args):
39
+ """Converts a dataset from one file format to another format.
41
40
 
42
41
  Attributes:
43
42
  root_data_dir: Root data dir that contains all datasets. All datasets and
@@ -94,14 +93,3 @@ class Args:
94
93
  num_workers=self.num_workers,
95
94
  fail_on_error=not self.only_log_errors,
96
95
  )
97
-
98
-
99
- def register_subparser(parsers: argparse._SubParsersAction) -> None:
100
- """Add subparser for `convert_format` command."""
101
- parser = parsers.add_parser(
102
- 'convert_format',
103
- help='Converts a dataset from one file format to another format.',
104
- )
105
- parser = typing.cast(simple_parsing.ArgumentParser, parser)
106
- parser.add_arguments(Args, dest='args')
107
- parser.set_defaults(subparser_fn=lambda args: args.args.execute())
@@ -26,11 +26,9 @@ tfds build_croissant \
26
26
  ```
27
27
  """
28
28
 
29
- import argparse
30
29
  import dataclasses
31
30
  import functools
32
31
  import json
33
- import typing
34
32
 
35
33
  from etils import epath
36
34
  import mlcroissant as mlc
@@ -43,8 +41,8 @@ from tensorflow_datasets.scripts.cli import cli_utils
43
41
 
44
42
 
45
43
  @dataclasses.dataclass(frozen=True, kw_only=True)
46
- class CmdArgs(simple_parsing.helpers.FrozenSerializable):
47
- """CLI arguments for preparing a Croissant dataset.
44
+ class CmdArgs(simple_parsing.helpers.FrozenSerializable, cli_utils.Args):
45
+ """Prepares a Croissant dataset.
48
46
 
49
47
  Attributes:
50
48
  jsonld: Path to the JSONLD file.
@@ -122,18 +120,10 @@ class CmdArgs(simple_parsing.helpers.FrozenSerializable):
122
120
  self.overwrite_version or self.dataset.metadata.version or '1.0.0'
123
121
  )
124
122
 
125
-
126
- def register_subparser(parsers: argparse._SubParsersAction):
127
- """Add subparser for `convert_format` command."""
128
- parser = parsers.add_parser(
129
- 'build_croissant',
130
- help='Prepares a croissant dataset',
131
- )
132
- parser = typing.cast(simple_parsing.ArgumentParser, parser)
133
- parser.add_arguments(CmdArgs, dest='args')
134
- parser.set_defaults(
135
- subparser_fn=lambda args: prepare_croissant_builders(args.args)
136
- )
123
+ def execute(self) -> None:
124
+ """Creates Croissant Builders and prepares them."""
125
+ for record_set_id in self.record_set_ids:
126
+ prepare_croissant_builder(args=self, record_set_id=record_set_id)
137
127
 
138
128
 
139
129
  def prepare_croissant_builder(
@@ -163,14 +153,3 @@ def prepare_croissant_builder(
163
153
  beam_pipeline_options=None,
164
154
  )
165
155
  return builder
166
-
167
-
168
- def prepare_croissant_builders(args: CmdArgs):
169
- """Creates Croissant Builders and prepares them.
170
-
171
- Args:
172
- args: CLI arguments.
173
- """
174
- # Generate each config sequentially.
175
- for record_set_id in args.record_set_ids:
176
- prepare_croissant_builder(args=args, record_set_id=record_set_id)
@@ -21,13 +21,13 @@ TFDS CLI to help creates and build datasets (e.g. `tfds new my_dataset`,
21
21
  See: https://www.tensorflow.org/datasets/cli
22
22
  """
23
23
 
24
- import argparse
24
+ import dataclasses
25
25
  import logging as python_logging
26
- from typing import List
27
26
 
28
27
  from absl import app
29
28
  from absl import flags
30
29
  from absl import logging
30
+ import simple_parsing
31
31
 
32
32
  import tensorflow_datasets.public_api as tfds
33
33
 
@@ -41,33 +41,60 @@ from tensorflow_datasets.scripts.cli import new
41
41
  FLAGS = flags.FLAGS
42
42
 
43
43
 
44
- def _parse_flags(argv: List[str]) -> argparse.Namespace:
45
- """Command lines flag parsing."""
46
- parser = cli_utils.ArgumentParser(
47
- description='Tensorflow Datasets CLI tool',
48
- allow_abbrev=False,
49
- )
50
- parser.add_argument(
51
- '--version',
52
- action='version',
53
- version='TensorFlow Datasets: ' + tfds.__version__,
54
- )
55
- parser.add_argument(
56
- '--dry_run',
57
- action='store_true',
58
- help='If True, print the parsed arguments.',
44
+ @dataclasses.dataclass(frozen=True, kw_only=True)
45
+ class _DummyCommand:
46
+ """Dummy command to avoid `command is MISSING` error."""
47
+
48
+ pass
49
+
50
+
51
+ version_field = simple_parsing.field(
52
+ action='version',
53
+ version='TensorFlow Datasets: ' + tfds.__version__,
54
+ help='The version of the TensorFlow Datasets package.',
55
+ )
56
+
57
+
58
+ @dataclasses.dataclass(frozen=True, kw_only=True)
59
+ class Args(cli_utils.Args):
60
+ """Tensorflow Datasets CLI tool."""
61
+
62
+ version: str = version_field
63
+ """The version of the TensorFlow Datasets package."""
64
+
65
+ dry_run: bool = simple_parsing.flag(default=False)
66
+ """If True, print the parsed arguments and exit."""
67
+
68
+ command: build.Args | new.Args | convert_format.Args | croissant.CmdArgs = (
69
+ simple_parsing.subparsers(
70
+ {
71
+ 'build': build.Args,
72
+ 'new': new.Args,
73
+ 'convert_format': convert_format.Args,
74
+ 'build_croissant': croissant.CmdArgs,
75
+ },
76
+ default_factory=_DummyCommand,
77
+ )
59
78
  )
60
- parser.set_defaults(subparser_fn=lambda _: parser.print_help())
61
- # Register sub-commands
62
- subparser = parser.add_subparsers(title='command')
63
- build.register_subparser(subparser)
64
- new.register_subparser(subparser)
65
- convert_format.register_subparser(subparser)
66
- croissant.register_subparser(subparser)
67
- return parser.parse_args(argv[1:])
79
+ """The command to execute."""
80
+
81
+ def execute(self) -> None:
82
+ """Run the command."""
83
+ if self.dry_run:
84
+ print(self)
85
+ # When no command is given, print the help message.
86
+ elif isinstance(self.command, _DummyCommand):
87
+ _parse_flags(['', '--help'])
88
+ else:
89
+ self.command.execute()
90
+
91
+
92
+ _parse_flags = cli_utils.make_flags_parser(
93
+ Args, description='Tensorflow Datasets CLI tool'
94
+ )
68
95
 
69
96
 
70
- def main(args: argparse.Namespace) -> None:
97
+ def main(args: Args) -> None:
71
98
 
72
99
  # From the CLI, all datasets are visible
73
100
  tfds.core.visibility.set_availables([
@@ -98,11 +125,7 @@ def main(args: argparse.Namespace) -> None:
98
125
  new_stream = tfds.core.utils.tqdm_utils.TqdmStream()
99
126
  python_handler.setStream(new_stream)
100
127
 
101
- if args.dry_run:
102
- print(args)
103
- else:
104
- # Launch the subcommand defined in the subparser (or default to print help)
105
- args.subparser_fn(args)
128
+ args.execute()
106
129
 
107
130
 
108
131
  def launch_cli() -> None:
@@ -15,13 +15,11 @@
15
15
 
16
16
  """`tfds new` command."""
17
17
 
18
- import argparse
19
18
  import dataclasses
20
19
  import os
21
20
  import pathlib
22
21
  import subprocess
23
22
  import textwrap
24
- import typing
25
23
 
26
24
  import simple_parsing
27
25
  from tensorflow_datasets.core import constants
@@ -33,8 +31,8 @@ from tensorflow_datasets.scripts.cli import cli_utils as utils
33
31
 
34
32
 
35
33
  @dataclasses.dataclass(frozen=True, kw_only=True)
36
- class Args:
37
- """CLI arguments for creating a new dataset directory.
34
+ class Args(utils.Args):
35
+ """Creates a new dataset directory from the template.
38
36
 
39
37
  Attributes:
40
38
  dataset_name: Name of the dataset to be created (in snake_case).
@@ -71,17 +69,6 @@ class Args:
71
69
  )
72
70
 
73
71
 
74
- def register_subparser(parsers: argparse._SubParsersAction) -> None:
75
- """Add subparser for `new` command."""
76
- parser = parsers.add_parser(
77
- 'new',
78
- help='Creates a new dataset directory from the template.',
79
- )
80
- parser = typing.cast(simple_parsing.ArgumentParser, parser)
81
- parser.add_arguments(Args, dest='args')
82
- parser.set_defaults(subparser_fn=lambda args: args.args.execute())
83
-
84
-
85
72
  def create_dataset_files(
86
73
  dataset_name: str,
87
74
  dataset_dir: pathlib.Path,
@@ -15,13 +15,12 @@
15
15
 
16
16
  r"""Wrapper around `tfds build`."""
17
17
 
18
- import argparse
19
- from typing import List
18
+ import typing
20
19
 
21
20
  from absl import app
22
21
  from absl import flags
23
22
  from absl import logging
24
-
23
+ from tensorflow_datasets.scripts.cli import build
25
24
  from tensorflow_datasets.scripts.cli import main as main_cli
26
25
 
27
26
  module_import = flags.DEFINE_string('module_import', None, '`--imports` flag.')
@@ -33,7 +32,7 @@ builder_config_id = flags.DEFINE_integer(
33
32
 
34
33
 
35
34
 
36
- def _parse_flags(argv: List[str]) -> argparse.Namespace:
35
+ def _parse_flags(argv: list[str]) -> main_cli.Args:
37
36
  """Command lines flag parsing."""
38
37
  return main_cli._parse_flags([argv[0], 'build'] + argv[1:]) # pylint: disable=protected-access
39
38
 
@@ -41,17 +40,18 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
41
40
  _display_warning = True
42
41
 
43
42
 
44
- def main(args: argparse.Namespace) -> None:
43
+ def main(args: main_cli.Args) -> None:
45
44
  if _display_warning:
46
45
  logging.warning(
47
46
  '***`tfds build` should be used instead of `download_and_prepare`.***'
48
47
  )
48
+ cmd_args = typing.cast(build.Args, args.command)
49
49
  if module_import.value:
50
- args.imports = module_import.value
50
+ cmd_args.generation.imports = module_import.value
51
51
  if dataset.value:
52
- args.datasets = [dataset.value]
52
+ cmd_args.datasets = [dataset.value]
53
53
  if builder_config_id.value is not None:
54
- args.config_idx = builder_config_id.value
54
+ cmd_args.generation.config_idx = builder_config_id.value
55
55
  main_cli.main(args)
56
56
 
57
57
 
@@ -105,15 +105,19 @@ class DatasetBuilderTestCase(
105
105
  BUILDER_CONFIGS from the class will be tested.
106
106
  * DL_EXTRACT_RESULT: `dict[str, str]`, the returned result of mocked
107
107
  `download_and_extract` method. The values should be the path of files
108
- present in the `fake_examples` directory, relative to that directory.
109
- If not specified, path to `fake_examples` will always be returned.
108
+ present in the `fake_examples` (or `dummy_data`) directory, relative to
109
+ that directory.
110
+ If not specified, path to `fake_examples` (or `dummy_data`) will always be
111
+ returned.
110
112
  * DL_EXTRACT_ONLY_RESULT: `dict[str, str]`, the returned result of mocked
111
113
  `extract` method. The values should be the path of files present in the
112
- `fake_examples` directory, relative to that directory. If not specified:
114
+ `fake_examples` (or `dummy_data`) directory, relative to that directory.
115
+ If not specified:
113
116
  will call DownloadManager `extract` method.
114
117
  * DL_DOWNLOAD_RESULT: `dict[str, str]`, the returned result of mocked
115
118
  `download_and_extract` method. The values should be the path of files
116
- present in the `fake_examples` directory, relative to that directory.
119
+ present in the `fake_examples` (or `dummy_data`) directory, relative to
120
+ that directory.
117
121
  If not specified: will use DL_EXTRACT_RESULT (this is due to backwards
118
122
  compatibility and will be removed in the future).
119
123
  * EXAMPLE_DIR: `str`, the base directory in in which fake examples are
@@ -167,11 +171,9 @@ class DatasetBuilderTestCase(
167
171
  "Assign your DatasetBuilder class to %s.DATASET_CLASS." % name
168
172
  )
169
173
 
170
- cls._available_cm = visibility.set_availables_tmp(
171
- [
172
- visibility.DatasetType.TFDS_PUBLIC,
173
- ]
174
- )
174
+ cls._available_cm = visibility.set_availables_tmp([
175
+ visibility.DatasetType.TFDS_PUBLIC,
176
+ ])
175
177
  cls._available_cm.__enter__() # pylint: disable=protected-access
176
178
 
177
179
  @classmethod
@@ -398,9 +400,9 @@ class DatasetBuilderTestCase(
398
400
  err_msg = (
399
401
  "Did you forget to record checksums with `--register_checksums` ? See"
400
402
  " instructions at:"
401
- " https://www.tensorflow.org/datasets/add_dataset#run_the_generation_codeIf"
402
- " want to opt-out of checksums validation, please add `SKIP_CHECKSUMS ="
403
- " True` to the `DatasetBuilderTestCase`.\n"
403
+ " https://www.tensorflow.org/datasets/add_dataset#run_the_generation_code"
404
+ " If you want to opt-out of checksums validation, please add "
405
+ " `SKIP_CHECKSUMS = True` to the `DatasetBuilderTestCase`.\n"
404
406
  )
405
407
  url_infos = self.dataset_class.url_infos
406
408
  filepath = self.dataset_class._checksums_path # pylint: disable=protected-access
@@ -574,15 +576,13 @@ class DatasetBuilderTestCase(
574
576
 
575
577
  # If configs specified, ensure they are all valid
576
578
  if builder.builder_config and builder.builder_config.description:
577
- err_msg = textwrap.dedent(
578
- """\
579
+ err_msg = textwrap.dedent("""\
579
580
  The BuilderConfig description should be a one-line description of
580
581
  the config.
581
582
  It shouldn't be the same as `builder.info.description` to avoid
582
583
  redundancy. Both `config.description` and `builder.info.description`
583
584
  will be displayed in the catalog.
584
- """
585
- )
585
+ """)
586
586
  ratio = difflib.SequenceMatcher(
587
587
  None,
588
588
  builder.builder_config.description,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202508110045
3
+ Version: 4.9.9.dev202508130045
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -1965,7 +1965,7 @@ tensorflow_datasets/robotics/rtx/__init__.py,sha256=T5AMbjr-iztrX4Q7k4QhiMNXLOAK
1965
1965
  tensorflow_datasets/robotics/rtx/rtx.py,sha256=8OEnc0_LNsgEJjaySoMwWDjzgiv4hzeobuploMM1cdo,50084
1966
1966
  tensorflow_datasets/scripts/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
1967
1967
  tensorflow_datasets/scripts/convert_format.py,sha256=Kopn3YbNqH-euJaWFsd1nyo56-HDHgq8fDzRViXdx9A,3604
1968
- tensorflow_datasets/scripts/download_and_prepare.py,sha256=Yd9Kg9u_WG6GgfXY6BFnyXz1utXd6clHbLZN9lnWrJc,1777
1968
+ tensorflow_datasets/scripts/download_and_prepare.py,sha256=LzbjSnFeo53r1D5oaRgTucHtJiabhBBYodmZsKBpt9s,1875
1969
1969
  tensorflow_datasets/scripts/freeze_dataset_versions.py,sha256=SKC7raxmREqaD5pUnSuy_NHdu9gxTlRxJIOoPoT3cuw,1244
1970
1970
  tensorflow_datasets/scripts/print_num_configs.py,sha256=an80znBHmkycQS4ZEHFQTi1fuFop56tDUx9hgguVcvw,971
1971
1971
  tensorflow_datasets/scripts/replace_fake_images.py,sha256=9L2m3zY0nntaOmsVlNWy6BRJEEytyrMuu5W0LXzLCpA,5223
@@ -1979,19 +1979,19 @@ tensorflow_datasets/scripts/cleanup/refactor_dataset_as_folder.py,sha256=VpEc2Us
1979
1979
  tensorflow_datasets/scripts/cleanup/url_filename_recorder.py,sha256=iLcsT8UgbyNUw00N7bVBC0zCqEuIQ2ndeCCcb4B-OEc,4490
1980
1980
  tensorflow_datasets/scripts/cleanup/url_status_checker.py,sha256=Tr3LtLnGhI8ElDAS-ejmuAU3rs1lmqmYlU4figoVQg0,1967
1981
1981
  tensorflow_datasets/scripts/cli/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
1982
- tensorflow_datasets/scripts/cli/build.py,sha256=jZp7CaP62D2Usi4l-o9oCUqTHhnigX15PNUr9pOd4Wo,14961
1983
- tensorflow_datasets/scripts/cli/build_test.py,sha256=xlFYScPSMcsUR27GQ-W5wdGdLdkXu_n0hM1rl20WWW8,10542
1982
+ tensorflow_datasets/scripts/cli/build.py,sha256=_YetKh9ZZJfo3w6brP5sdzsdCKfVM4HnQLUyX4mbrX4,15002
1983
+ tensorflow_datasets/scripts/cli/build_test.py,sha256=K7ho7IRtAty1ZNPLj33Th_nZajYBkXRLA4u3dbElQmo,10615
1984
1984
  tensorflow_datasets/scripts/cli/builder_templates.py,sha256=99SvH3skigkc2Qg737BV2OzhXL_Rgu4az8eVHsxKCLk,7985
1985
1985
  tensorflow_datasets/scripts/cli/builder_templates_test.py,sha256=HBNB-v2zlImKULPI8Webs9hXCkeFmWT29urxav-tDe8,2062
1986
- tensorflow_datasets/scripts/cli/cli_utils.py,sha256=zE-jLQw0dn_98PHOTLX6pMoFqjSCBOD7lh5dytJcphE,14049
1986
+ tensorflow_datasets/scripts/cli/cli_utils.py,sha256=sARBmqVP9W6FgTNTPcCN8rUpRqoOAd4WdMksBRnu1Tg,13307
1987
1987
  tensorflow_datasets/scripts/cli/conftest.py,sha256=3PNh_BbR013G4HyLAZOleUXsQ9mICrD03NaKwdHFMXs,1291
1988
- tensorflow_datasets/scripts/cli/convert_format.py,sha256=02RDZQQCuXf_XFFpx0gmRVkYyJg534kY0fZwGKxtUL4,4197
1988
+ tensorflow_datasets/scripts/cli/convert_format.py,sha256=ZS7CmWJ-oZ0usO4TB8GKDj9TBJ5MyEO0I9QLRg7eQOw,3797
1989
1989
  tensorflow_datasets/scripts/cli/convert_format_utils.py,sha256=U_q5WVgMNrjBkOc166U4Y_eca5KOS3Xb3jSDjp4XdK4,29078
1990
1990
  tensorflow_datasets/scripts/cli/convert_format_utils_test.py,sha256=9JGNu9TvUWzbuhe6DWwnO3V9Lia5S1Is64re-pceAWE,8823
1991
- tensorflow_datasets/scripts/cli/croissant.py,sha256=6jzmOXt_i7aeJHUVX7_zpRRMEXId_PzU24zUDdExRUs,6112
1992
- tensorflow_datasets/scripts/cli/main.py,sha256=qUKxjULaL2ilkPqh6vcsbtnKRJJhZCBn--ZkveFO2mA,3789
1991
+ tensorflow_datasets/scripts/cli/croissant.py,sha256=0JFcSCc4nuk-jVnG_dFQkvTWiKuNZDx-OUTC4gjqRwA,5568
1992
+ tensorflow_datasets/scripts/cli/main.py,sha256=T4MRQGfNm-FLrp8aZoujQcHY6ctkmX2B6qkErFQUVpA,4238
1993
1993
  tensorflow_datasets/scripts/cli/main_test.py,sha256=3zNaS_2FmxxLoZOX05iJ2riuP4Qv8cx6bhAI56tV8YI,1067
1994
- tensorflow_datasets/scripts/cli/new.py,sha256=x_GQSEVva1XuMvFwL3rANjDxviwZviXKHCICY7P30Jc,7803
1994
+ tensorflow_datasets/scripts/cli/new.py,sha256=fJok7iV0zauRKwV9n3FLVG57qfiVHYUXVBtqjEApNBY,7386
1995
1995
  tensorflow_datasets/scripts/cli/new_test.py,sha256=USr9So-FPtg8UzaQPPacXn0E1ukDIoew9oYkOn45oik,2655
1996
1996
  tensorflow_datasets/scripts/deployment/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
1997
1997
  tensorflow_datasets/scripts/deployment/copy_dataset_info_files.py,sha256=uLuvwOWqvo1SOLAcxAOHIWBvfbyZQJ7nF79v8lTalKQ,2690
@@ -2122,7 +2122,7 @@ tensorflow_datasets/summarization/media_sum/media_sum.py,sha256=CIhR_cfQb1aEfu9B
2122
2122
  tensorflow_datasets/summarization/summscreen/__init__.py,sha256=ADxohrpUPJjug4r2kGCCJEWZzVD4s2S0smqLfjkc8YY,718
2123
2123
  tensorflow_datasets/summarization/summscreen/summscreen.py,sha256=DfwGr3vsRhOC62ODJ1Sp7-v219bPjJ93KK043YReV7I,884
2124
2124
  tensorflow_datasets/testing/__init__.py,sha256=aSwY_kciK-EZXp1D_JRkuuCJwtbFljGZ72c9YNB6yfE,6049
2125
- tensorflow_datasets/testing/dataset_builder_testing.py,sha256=ziE2twrc1-LQExGp4g5Nbq9hlbFow3VdX8RTC83R6bM,25093
2125
+ tensorflow_datasets/testing/dataset_builder_testing.py,sha256=t95l1N8exM7G7qdPMHe1oOlF0E7KpptJBNivLXA3Tqo,25155
2126
2126
  tensorflow_datasets/testing/dataset_builder_testing_test.py,sha256=Nf7Ykg5bY5o9ZatQKrRJhr-qGTtNKle4aZph4rt72i4,1283
2127
2127
  tensorflow_datasets/testing/dataset_collection_builder_testing.py,sha256=tUv2l53rc9GEo4sWvM9OP9r-Ze54dcDakeLQBMS7yos,4825
2128
2128
  tensorflow_datasets/testing/dataset_collection_builder_testing_test.py,sha256=Dw5tACaDjVt9CZi0V84tMAh2JJexrRwWF1N3DID1Mbs,1155
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2468
2468
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2469
2469
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2470
2470
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2471
- tfds_nightly-4.9.9.dev202508110045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
- tfds_nightly-4.9.9.dev202508110045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
- tfds_nightly-4.9.9.dev202508110045.dist-info/METADATA,sha256=Xr0YCoYhfCImcmxnROqE4vp203B8XMiktUrh1hrarRw,11694
2474
- tfds_nightly-4.9.9.dev202508110045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
- tfds_nightly-4.9.9.dev202508110045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
- tfds_nightly-4.9.9.dev202508110045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
- tfds_nightly-4.9.9.dev202508110045.dist-info/RECORD,,
2471
+ tfds_nightly-4.9.9.dev202508130045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
+ tfds_nightly-4.9.9.dev202508130045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
+ tfds_nightly-4.9.9.dev202508130045.dist-info/METADATA,sha256=MRLubuygIcfrej-GxBNv-7IT4Nyueo9Uqa-rh7TrfOQ,11694
2474
+ tfds_nightly-4.9.9.dev202508130045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
+ tfds_nightly-4.9.9.dev202508130045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
+ tfds_nightly-4.9.9.dev202508130045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
+ tfds_nightly-4.9.9.dev202508130045.dist-info/RECORD,,