tfds-nightly 4.9.9.dev202508060045__py3-none-any.whl → 4.9.9.dev202508080045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -26,94 +26,74 @@ tfds convert_format \
26
26
  """
27
27
 
28
28
  import argparse
29
- from collections.abc import Sequence
29
+ import dataclasses
30
+ import typing
30
31
 
31
32
  from etils import epath
33
+ import simple_parsing
32
34
  from tensorflow_datasets.core import file_adapters
33
35
  from tensorflow_datasets.scripts.cli import convert_format_utils
34
36
 
35
37
 
36
- def add_parser_arguments(parser: argparse.ArgumentParser) -> None:
37
- """Add arguments for `convert_format` subparser."""
38
- parser.add_argument(
39
- '--root_data_dir',
40
- type=str,
41
- help=(
42
- 'Root data dir that contains all datasets. All datasets and all their'
43
- ' configs and versions that are in this folder will be converted.'
44
- ),
45
- required=False,
46
- )
47
- parser.add_argument(
48
- '--dataset_dir',
49
- type=str,
50
- help=(
51
- 'Path where the dataset to be converted is located. Converts all'
52
- ' configs and versions in this folder.'
53
- ),
54
- required=False,
55
- )
56
- parser.add_argument(
57
- '--dataset_version_dir',
58
- type=str,
59
- help=(
60
- 'Path where the dataset to be converted is located. Should include'
61
- ' config and version. Can also be a comma-separated list of paths. If'
62
- ' multiple paths are specified, `--out_dir` should not be specified,'
63
- ' since each dataset will be converted in the same directory as the'
64
- ' input dataset.'
65
- ),
66
- required=False,
67
- )
68
- parser.add_argument(
69
- '--out_file_format',
70
- type=str,
71
- choices=[file_format.value for file_format in file_adapters.FileFormat],
72
- help='File format to convert the dataset to.',
73
- required=True,
74
- )
75
- parser.add_argument(
76
- '--out_dir',
77
- type=str,
78
- help=(
79
- 'Path where the converted dataset will be stored. Should include the'
80
- ' config and version, e.g. `/data/dataset_name/config/1.2.3`. If not'
81
- ' specified, the converted shards will be stored in the same'
82
- ' directory as the input dataset.'
83
- ),
84
- default='',
85
- required=False,
86
- )
87
- parser.add_argument(
88
- '--overwrite',
89
- action='store_true',
90
- help='Whether to overwrite the output directory if it already exists.',
91
- )
92
- parser.add_argument(
93
- '--use_beam',
94
- action='store_true',
95
- help='Use beam to convert the dataset.',
96
- )
97
- parser.add_argument(
98
- '--num_workers',
99
- type=int,
100
- default=8,
101
- help=(
102
- 'Number of workers to use when not using Beam. If `--use_beam` is'
103
- ' set, this flag is ignored. If `--num_workers=1`, the conversion'
104
- ' will be done sequentially.'
105
- ),
38
+ @dataclasses.dataclass(frozen=True, kw_only=True)
39
+ class Args:
40
+ """CLI arguments for converting datasets from one file format to another.
41
+
42
+ Attributes:
43
+ root_data_dir: Root data dir that contains all datasets. All datasets and
44
+ all their configs and versions that are in this folder will be converted.
45
+ dataset_dir: Path where the dataset to be converted is located. Converts all
46
+ configs and versions in this folder.
47
+ dataset_version_dir: Path where the dataset to be converted is located.
48
+ Should include config and version. Can also be a comma-separated list of
49
+ paths. If multiple paths are specified, `--out_dir` should not be
50
+ specified, since each dataset will be converted in the same directory as
51
+ the input dataset.
52
+ out_file_format: File format to convert the dataset to.
53
+ out_dir: Path where the converted dataset will be stored. Datasets will be
54
+ stored with the same folder structure as the input folder. If `None`, the
55
+ converted shards will be stored in the same folder as the input datasets.
56
+ overwrite: Whether to overwrite the output directory if it already exists.
57
+ use_beam: Use beam to convert the dataset.
58
+ num_workers: Number of workers to use when not using Beam. If `--use_beam`
59
+ is set, this flag is ignored. If `--num_workers=1`, the conversion will be
60
+ done sequentially.
61
+ only_log_errors: If set, errors during the conversion will be logged as
62
+ errors and will not crash the conversion. If you are converting a large
63
+ number of datasets, you might want to set this flag to true.
64
+ """
65
+
66
+ root_data_dir: epath.Path | None = None
67
+ dataset_dir: epath.Path | None = None
68
+ dataset_version_dir: list[epath.Path] = simple_parsing.field(
69
+ default_factory=list,
70
+ type=lambda dataset_version_dirs_str: [
71
+ epath.Path(path) for path in dataset_version_dirs_str.split(',')
72
+ ],
73
+ nargs='?',
106
74
  )
107
- parser.add_argument(
108
- '--only_log_errors',
109
- action='store_true',
110
- default=False,
111
- help=(
112
- 'If set, errors during the conversion will be logged as errors and'
113
- ' will not crash the conversion. If you are converting a large number'
114
- ' of datasets, you might want to set this flag to true.'
115
- ),
75
+ out_file_format: str = simple_parsing.choice(
76
+ *(file_format.value for file_format in file_adapters.FileFormat),
116
77
  )
78
+ out_dir: epath.Path | None = None
79
+ overwrite: bool = False
80
+ use_beam: bool = False
81
+ num_workers: int = 8
82
+ only_log_errors: bool = False
83
+
84
+ def execute(self) -> None:
85
+ """Converts a dataset from one file format to another."""
86
+ convert_format_utils.convert_dataset(
87
+ out_dir=self.out_dir,
88
+ out_file_format=self.out_file_format,
89
+ dataset_dir=self.dataset_dir,
90
+ root_data_dir=self.root_data_dir,
91
+ dataset_version_dir=self.dataset_version_dir,
92
+ overwrite=self.overwrite,
93
+ use_beam=self.use_beam,
94
+ num_workers=self.num_workers,
95
+ fail_on_error=not self.only_log_errors,
96
+ )
117
97
 
118
98
 
119
99
  def register_subparser(parsers: argparse._SubParsersAction) -> None:
@@ -122,27 +102,6 @@ def register_subparser(parsers: argparse._SubParsersAction) -> None:
122
102
  'convert_format',
123
103
  help='Converts a dataset from one file format to another format.',
124
104
  )
125
- add_parser_arguments(parser)
126
-
127
- def _parse_dataset_version_dir(
128
- dataset_version_dir: str | None,
129
- ) -> Sequence[epath.Path] | None:
130
- if not dataset_version_dir:
131
- return None
132
- return [epath.Path(path) for path in dataset_version_dir.split(',')]
133
-
134
- parser.set_defaults(
135
- subparser_fn=lambda args: convert_format_utils.convert_dataset(
136
- out_dir=epath.Path(args.out_dir) if args.out_dir else None,
137
- out_file_format=args.out_file_format,
138
- dataset_dir=args.dataset_dir or None,
139
- root_data_dir=args.root_data_dir or None,
140
- dataset_version_dir=_parse_dataset_version_dir(
141
- args.dataset_version_dir
142
- ),
143
- overwrite=args.overwrite,
144
- use_beam=args.use_beam,
145
- num_workers=args.num_workers,
146
- fail_on_error=not args.only_log_errors,
147
- )
148
- )
105
+ parser = typing.cast(simple_parsing.ArgumentParser, parser)
106
+ parser.add_arguments(Args, dest='args')
107
+ parser.set_defaults(subparser_fn=lambda args: args.args.execute())
@@ -125,16 +125,11 @@ class CmdArgs(simple_parsing.helpers.FrozenSerializable):
125
125
 
126
126
  def register_subparser(parsers: argparse._SubParsersAction):
127
127
  """Add subparser for `convert_format` command."""
128
- orig_parser_class = parsers._parser_class # pylint: disable=protected-access
129
- try:
130
- parsers._parser_class = simple_parsing.ArgumentParser # pylint: disable=protected-access
131
- parser = parsers.add_parser(
132
- 'build_croissant',
133
- help='Prepares a croissant dataset',
134
- )
135
- parser = typing.cast(simple_parsing.ArgumentParser, parser)
136
- finally:
137
- parsers._parser_class = orig_parser_class # pylint: disable=protected-access
128
+ parser = parsers.add_parser(
129
+ 'build_croissant',
130
+ help='Prepares a croissant dataset',
131
+ )
132
+ parser = typing.cast(simple_parsing.ArgumentParser, parser)
138
133
  parser.add_arguments(CmdArgs, dest='args')
139
134
  parser.set_defaults(
140
135
  subparser_fn=lambda args: prepare_croissant_builders(args.args)
@@ -28,8 +28,8 @@ from typing import List
28
28
  from absl import app
29
29
  from absl import flags
30
30
  from absl import logging
31
- from absl.flags import argparse_flags
32
31
 
32
+ import simple_parsing
33
33
  import tensorflow_datasets.public_api as tfds
34
34
 
35
35
  # Import commands
@@ -46,7 +46,7 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
46
46
  """Command lines flag parsing."""
47
47
  argv = flag_utils.normalize_flags(argv) # See b/174043007 for context.
48
48
 
49
- parser = argparse_flags.ArgumentParser(
49
+ parser = simple_parsing.ArgumentParser(
50
50
  description='Tensorflow Datasets CLI tool',
51
51
  allow_abbrev=False,
52
52
  )
@@ -67,7 +67,22 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
67
67
  new.register_subparser(subparser)
68
68
  convert_format.register_subparser(subparser)
69
69
  croissant.register_subparser(subparser)
70
- return parser.parse_args(argv[1:])
70
+
71
+ namespace, remaining_argv = parser.parse_known_args(argv[1:])
72
+
73
+ # Manually parse absl flags from the remaining arguments.
74
+ try:
75
+ # FLAGS requires the program name as the first argument.
76
+ positionals = FLAGS(argv[:1] + remaining_argv)
77
+ except flags.Error as e:
78
+ parser.error(str(e))
79
+
80
+ # There should be no positional arguments left, as they should have been
81
+ # handled by the sub-commands.
82
+ if len(positionals) > 1:
83
+ parser.error(f"unrecognized arguments: {' '.join(positionals[1:])}")
84
+
85
+ return namespace
71
86
 
72
87
 
73
88
  def main(args: argparse.Namespace) -> None:
@@ -16,12 +16,14 @@
16
16
  """`tfds new` command."""
17
17
 
18
18
  import argparse
19
+ import dataclasses
19
20
  import os
20
21
  import pathlib
21
22
  import subprocess
22
23
  import textwrap
23
- from typing import Optional
24
+ import typing
24
25
 
26
+ import simple_parsing
25
27
  from tensorflow_datasets.core import constants
26
28
  from tensorflow_datasets.core import dataset_metadata
27
29
  from tensorflow_datasets.core import naming
@@ -30,60 +32,60 @@ from tensorflow_datasets.scripts.cli import builder_templates
30
32
  from tensorflow_datasets.scripts.cli import cli_utils as utils
31
33
 
32
34
 
33
- def register_subparser(parsers: argparse._SubParsersAction) -> None: # pylint: disable=protected-access
34
- """Add subparser for `new` command."""
35
- new_parser = parsers.add_parser(
36
- 'new', help='Creates a new dataset directory from the template.'
37
- )
38
- new_parser.add_argument(
39
- 'dataset_name', # Positional argument
40
- type=str,
41
- help='Name of the dataset to be created (in snake_case)',
35
+ @dataclasses.dataclass(frozen=True, kw_only=True)
36
+ class Args:
37
+ """CLI arguments for creating a new dataset directory.
38
+
39
+ Attributes:
40
+ dataset_name: Name of the dataset to be created (in snake_case).
41
+ data_format: Format of the input data, which is used to generate a
42
+ format-specific template.
43
+ dir: Path where the dataset directory will be created. Defaults to current
44
+ directory.
45
+ """
46
+
47
+ dataset_name: str = simple_parsing.field(
48
+ positional=True,
49
+ # Need to explicitly set metavar for command-line help.
50
+ metavar='dataset_name',
42
51
  )
43
- new_parser.add_argument(
44
- '--data_format', # Optional argument
45
- type=str,
52
+ data_format: str = simple_parsing.choice(
53
+ builder_templates.STANDARD,
54
+ builder_templates.CONLL,
55
+ builder_templates.CONLLU,
46
56
  default=builder_templates.STANDARD,
47
- choices=[
48
- builder_templates.STANDARD,
49
- builder_templates.CONLL,
50
- builder_templates.CONLLU,
51
- ],
52
- help=(
53
- 'Optional format of the input data, which is used to generate a '
54
- 'format-specific template.'
55
- ),
56
- )
57
- new_parser.add_argument(
58
- '--dir',
59
- type=pathlib.Path,
60
- default=pathlib.Path.cwd(),
61
- help=(
62
- 'Path where the dataset directory will be created. '
63
- 'Defaults to current directory.'
64
- ),
65
57
  )
66
- new_parser.set_defaults(subparser_fn=_create_dataset_files)
67
-
68
-
69
- def _create_dataset_files(args: argparse.Namespace) -> None:
70
- """Creates the dataset directory. Executed by `tfds new <name>`."""
71
- if not naming.is_valid_dataset_and_class_name(args.dataset_name):
72
- raise ValueError(
73
- 'Invalid dataset name. It should be a valid Python class name.'
58
+ dir: pathlib.Path = simple_parsing.field(default_factory=pathlib.Path.cwd)
59
+
60
+ def execute(self) -> None:
61
+ """Creates the dataset directory."""
62
+ if not naming.is_valid_dataset_and_class_name(self.dataset_name):
63
+ raise ValueError(
64
+ 'Invalid dataset name. It should be a valid Python class name.'
65
+ )
66
+
67
+ create_dataset_files(
68
+ dataset_name=self.dataset_name,
69
+ dataset_dir=self.dir,
70
+ data_format=self.data_format,
74
71
  )
75
72
 
76
- create_dataset_files(
77
- dataset_name=args.dataset_name,
78
- dataset_dir=args.dir,
79
- data_format=args.data_format,
73
+
74
+ def register_subparser(parsers: argparse._SubParsersAction) -> None:
75
+ """Add subparser for `new` command."""
76
+ parser = parsers.add_parser(
77
+ 'new',
78
+ help='Creates a new dataset directory from the template.',
80
79
  )
80
+ parser = typing.cast(simple_parsing.ArgumentParser, parser)
81
+ parser.add_arguments(Args, dest='args')
82
+ parser.set_defaults(subparser_fn=lambda args: args.args.execute())
81
83
 
82
84
 
83
85
  def create_dataset_files(
84
86
  dataset_name: str,
85
87
  dataset_dir: pathlib.Path,
86
- data_format: Optional[str] = None,
88
+ data_format: str | None = None,
87
89
  ) -> None:
88
90
  """Creates the dataset files."""
89
91
  # Creates the root directory
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202508060045
3
+ Version: 4.9.9.dev202508080045
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -1985,13 +1985,13 @@ tensorflow_datasets/scripts/cli/builder_templates.py,sha256=99SvH3skigkc2Qg737BV
1985
1985
  tensorflow_datasets/scripts/cli/builder_templates_test.py,sha256=HBNB-v2zlImKULPI8Webs9hXCkeFmWT29urxav-tDe8,2062
1986
1986
  tensorflow_datasets/scripts/cli/cli_utils.py,sha256=rMYMcQj1w46OTOeMyp3qf4y9v7ArOGh6u5NaCjBXal8,12313
1987
1987
  tensorflow_datasets/scripts/cli/conftest.py,sha256=cmvCCV-efT5ZXYPkCSGS1OxoKNPAfSsLcFTfYfe61S0,1233
1988
- tensorflow_datasets/scripts/cli/convert_format.py,sha256=Cefg7Cd4Nmz8AlwEm68T286YEuoDHtRfQfdtq7XAZrA,4805
1988
+ tensorflow_datasets/scripts/cli/convert_format.py,sha256=02RDZQQCuXf_XFFpx0gmRVkYyJg534kY0fZwGKxtUL4,4197
1989
1989
  tensorflow_datasets/scripts/cli/convert_format_utils.py,sha256=U_q5WVgMNrjBkOc166U4Y_eca5KOS3Xb3jSDjp4XdK4,29078
1990
1990
  tensorflow_datasets/scripts/cli/convert_format_utils_test.py,sha256=9JGNu9TvUWzbuhe6DWwnO3V9Lia5S1Is64re-pceAWE,8823
1991
- tensorflow_datasets/scripts/cli/croissant.py,sha256=CMOe-8iEN7ZdRTiJ5w3iiJFYKsHiqzroAXmbfJimp1Y,6396
1992
- tensorflow_datasets/scripts/cli/main.py,sha256=cDlnkHU2-DIjFmMQo3gGPJ7pVwGY48ypqR1xm5E82TU,3910
1991
+ tensorflow_datasets/scripts/cli/croissant.py,sha256=6jzmOXt_i7aeJHUVX7_zpRRMEXId_PzU24zUDdExRUs,6112
1992
+ tensorflow_datasets/scripts/cli/main.py,sha256=FJJwyUtM1N9gNDsxGm850m5ejzzJ9mgESNW9Xz8E9_I,4383
1993
1993
  tensorflow_datasets/scripts/cli/main_test.py,sha256=3zNaS_2FmxxLoZOX05iJ2riuP4Qv8cx6bhAI56tV8YI,1067
1994
- tensorflow_datasets/scripts/cli/new.py,sha256=eTAEQ5QVj7DZwyymsxNW-5KNRJb-YYWIUGcW7yhC2Ew,7699
1994
+ tensorflow_datasets/scripts/cli/new.py,sha256=x_GQSEVva1XuMvFwL3rANjDxviwZviXKHCICY7P30Jc,7803
1995
1995
  tensorflow_datasets/scripts/cli/new_test.py,sha256=USr9So-FPtg8UzaQPPacXn0E1ukDIoew9oYkOn45oik,2655
1996
1996
  tensorflow_datasets/scripts/deployment/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
1997
1997
  tensorflow_datasets/scripts/deployment/copy_dataset_info_files.py,sha256=uLuvwOWqvo1SOLAcxAOHIWBvfbyZQJ7nF79v8lTalKQ,2690
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2468
2468
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2469
2469
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2470
2470
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2471
- tfds_nightly-4.9.9.dev202508060045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
- tfds_nightly-4.9.9.dev202508060045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
- tfds_nightly-4.9.9.dev202508060045.dist-info/METADATA,sha256=iFGEm6VS9FbPUGFhCr2P8ywlEUbvtBt7ha7Na3Emwxw,11694
2474
- tfds_nightly-4.9.9.dev202508060045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
- tfds_nightly-4.9.9.dev202508060045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
- tfds_nightly-4.9.9.dev202508060045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
- tfds_nightly-4.9.9.dev202508060045.dist-info/RECORD,,
2471
+ tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2472
+ tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2473
+ tfds_nightly-4.9.9.dev202508080045.dist-info/METADATA,sha256=Za3dYGEQJMn0dTM0uUOEd_2jik8ANJg6Lmtp94-hKQ0,11694
2474
+ tfds_nightly-4.9.9.dev202508080045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2475
+ tfds_nightly-4.9.9.dev202508080045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2476
+ tfds_nightly-4.9.9.dev202508080045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2477
+ tfds_nightly-4.9.9.dev202508080045.dist-info/RECORD,,