PyPI - tfds-nightly - Versions diffs - 4.9.9.dev202508060045__py3-none-any.whl → 4.9.9.dev202508080045__py3-none-any.whl - Mend

tfds-nightly 4.9.9.dev202508060045py3-none-any.whl → 4.9.9.dev202508080045py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

tensorflow_datasets/scripts/cli/convert_format.py CHANGED Viewed

@@ -26,94 +26,74 @@ tfds convert_format \
 """
 import argparse
-from collections.abc import Sequence
+import dataclasses
+import typing
 from etils import epath
+import simple_parsing
 from tensorflow_datasets.core import file_adapters
 from tensorflow_datasets.scripts.cli import convert_format_utils
-def add_parser_arguments(parser: argparse.ArgumentParser) -> None:
-  """Add arguments for `convert_format` subparser."""
-  parser.add_argument(
-      '--root_data_dir',
-      type=str,
-      help=(
-          'Root data dir that contains all datasets. All datasets and all their'
-          ' configs and versions that are in this folder will be converted.'
-      ),
-      required=False,
-  )
-  parser.add_argument(
-      '--dataset_dir',
-      type=str,
-      help=(
-          'Path where the dataset to be converted is located. Converts all'
-          ' configs and versions in this folder.'
-      ),
-      required=False,
-  )
-  parser.add_argument(
-      '--dataset_version_dir',
-      type=str,
-      help=(
-          'Path where the dataset to be converted is located. Should include'
-          ' config and version. Can also be a comma-separated list of paths. If'
-          ' multiple paths are specified, `--out_dir` should not be specified,'
-          ' since each dataset will be converted in the same directory as the'
-          ' input dataset.'
-      ),
-      required=False,
-  )
-  parser.add_argument(
-      '--out_file_format',
-      type=str,
-      choices=[file_format.value for file_format in file_adapters.FileFormat],
-      help='File format to convert the dataset to.',
-      required=True,
-  )
-  parser.add_argument(
-      '--out_dir',
-      type=str,
-      help=(
-          'Path where the converted dataset will be stored. Should include the'
-          ' config and version, e.g. `/data/dataset_name/config/1.2.3`. If not'
-          ' specified, the converted shards will be stored in the same'
-          ' directory as the input dataset.'
-      ),
-      default='',
-      required=False,
-  )
-  parser.add_argument(
-      '--overwrite',
-      action='store_true',
-      help='Whether to overwrite the output directory if it already exists.',
-  )
-  parser.add_argument(
-      '--use_beam',
-      action='store_true',
-      help='Use beam to convert the dataset.',
-  )
-  parser.add_argument(
-      '--num_workers',
-      type=int,
-      default=8,
-      help=(
-          'Number of workers to use when not using Beam. If `--use_beam` is'
-          ' set, this flag is ignored. If `--num_workers=1`, the conversion'
-          ' will be done sequentially.'
-      ),
+@dataclasses.dataclass(frozen=True, kw_only=True)
+class Args:
+  """CLI arguments for converting datasets from one file format to another.
+  Attributes:
+    root_data_dir: Root data dir that contains all datasets. All datasets and
+      all their configs and versions that are in this folder will be converted.
+    dataset_dir: Path where the dataset to be converted is located. Converts all
+      configs and versions in this folder.
+    dataset_version_dir: Path where the dataset to be converted is located.
+      Should include config and version. Can also be a comma-separated list of
+      paths. If multiple paths are specified, `--out_dir` should not be
+      specified, since each dataset will be converted in the same directory as
+      the input dataset.
+    out_file_format: File format to convert the dataset to.
+    out_dir: Path where the converted dataset will be stored. Datasets will be
+      stored with the same folder structure as the input folder. If `None`, the
+      converted shards will be stored in the same folder as the input datasets.
+    overwrite: Whether to overwrite the output directory if it already exists.
+    use_beam: Use beam to convert the dataset.
+    num_workers: Number of workers to use when not using Beam. If `--use_beam`
+      is set, this flag is ignored. If `--num_workers=1`, the conversion will be
+      done sequentially.
+    only_log_errors: If set, errors during the conversion will be logged as
+      errors and will not crash the conversion. If you are converting a large
+      number of datasets, you might want to set this flag to true.
+  """
+  root_data_dir: epath.Path | None = None
+  dataset_dir: epath.Path | None = None
+  dataset_version_dir: list[epath.Path] = simple_parsing.field(
+      default_factory=list,
+      type=lambda dataset_version_dirs_str: [
+          epath.Path(path) for path in dataset_version_dirs_str.split(',')
+      ],
+      nargs='?',
   )
-  parser.add_argument(
-      '--only_log_errors',
-      action='store_true',
-      default=False,
-      help=(
-          'If set, errors during the conversion will be logged as errors and'
-          ' will not crash the conversion. If you are converting a large number'
-          ' of datasets, you might want to set this flag to true.'
-      ),
+  out_file_format: str = simple_parsing.choice(
+      *(file_format.value for file_format in file_adapters.FileFormat),
   )
+  out_dir: epath.Path | None = None
+  overwrite: bool = False
+  use_beam: bool = False
+  num_workers: int = 8
+  only_log_errors: bool = False
+  def execute(self) -> None:
+    """Converts a dataset from one file format to another."""
+    convert_format_utils.convert_dataset(
+        out_dir=self.out_dir,
+        out_file_format=self.out_file_format,
+        dataset_dir=self.dataset_dir,
+        root_data_dir=self.root_data_dir,
+        dataset_version_dir=self.dataset_version_dir,
+        overwrite=self.overwrite,
+        use_beam=self.use_beam,
+        num_workers=self.num_workers,
+        fail_on_error=not self.only_log_errors,
+    )
 def register_subparser(parsers: argparse._SubParsersAction) -> None:
@@ -122,27 +102,6 @@ def register_subparser(parsers: argparse._SubParsersAction) -> None:
       'convert_format',
       help='Converts a dataset from one file format to another format.',
   )
-  add_parser_arguments(parser)
-  def _parse_dataset_version_dir(
-      dataset_version_dir: str | None,
-  ) -> Sequence[epath.Path] | None:
-    if not dataset_version_dir:
-      return None
-    return [epath.Path(path) for path in dataset_version_dir.split(',')]
-  parser.set_defaults(
-      subparser_fn=lambda args: convert_format_utils.convert_dataset(
-          out_dir=epath.Path(args.out_dir) if args.out_dir else None,
-          out_file_format=args.out_file_format,
-          dataset_dir=args.dataset_dir or None,
-          root_data_dir=args.root_data_dir or None,
-          dataset_version_dir=_parse_dataset_version_dir(
-              args.dataset_version_dir
-          ),
-          overwrite=args.overwrite,
-          use_beam=args.use_beam,
-          num_workers=args.num_workers,
-          fail_on_error=not args.only_log_errors,
-      )
-  )
+  parser = typing.cast(simple_parsing.ArgumentParser, parser)
+  parser.add_arguments(Args, dest='args')
+  parser.set_defaults(subparser_fn=lambda args: args.args.execute())

tensorflow_datasets/scripts/cli/croissant.py CHANGED Viewed

@@ -125,16 +125,11 @@ class CmdArgs(simple_parsing.helpers.FrozenSerializable):
 def register_subparser(parsers: argparse._SubParsersAction):
   """Add subparser for `convert_format` command."""
-  orig_parser_class = parsers._parser_class  # pylint: disable=protected-access
-  try:
-    parsers._parser_class = simple_parsing.ArgumentParser  # pylint: disable=protected-access
-    parser = parsers.add_parser(
-        'build_croissant',
-        help='Prepares a croissant dataset',
-    )
-    parser = typing.cast(simple_parsing.ArgumentParser, parser)
-  finally:
-    parsers._parser_class = orig_parser_class  # pylint: disable=protected-access
+  parser = parsers.add_parser(
+      'build_croissant',
+      help='Prepares a croissant dataset',
+  )
+  parser = typing.cast(simple_parsing.ArgumentParser, parser)
   parser.add_arguments(CmdArgs, dest='args')
   parser.set_defaults(
       subparser_fn=lambda args: prepare_croissant_builders(args.args)

tensorflow_datasets/scripts/cli/main.py CHANGED Viewed

@@ -28,8 +28,8 @@ from typing import List
 from absl import app
 from absl import flags
 from absl import logging
-from absl.flags import argparse_flags
+import simple_parsing
 import tensorflow_datasets.public_api as tfds
 # Import commands
@@ -46,7 +46,7 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
   """Command lines flag parsing."""
   argv = flag_utils.normalize_flags(argv)  # See b/174043007 for context.
-  parser = argparse_flags.ArgumentParser(
+  parser = simple_parsing.ArgumentParser(
       description='Tensorflow Datasets CLI tool',
       allow_abbrev=False,
   )
@@ -67,7 +67,22 @@ def _parse_flags(argv: List[str]) -> argparse.Namespace:
   new.register_subparser(subparser)
   convert_format.register_subparser(subparser)
   croissant.register_subparser(subparser)
-  return parser.parse_args(argv[1:])
+  namespace, remaining_argv = parser.parse_known_args(argv[1:])
+  # Manually parse absl flags from the remaining arguments.
+  try:
+    # FLAGS requires the program name as the first argument.
+    positionals = FLAGS(argv[:1] + remaining_argv)
+  except flags.Error as e:
+    parser.error(str(e))
+  # There should be no positional arguments left, as they should have been
+  # handled by the sub-commands.
+  if len(positionals) > 1:
+    parser.error(f"unrecognized arguments: {' '.join(positionals[1:])}")
+  return namespace
 def main(args: argparse.Namespace) -> None:

tensorflow_datasets/scripts/cli/new.py CHANGED Viewed

@@ -16,12 +16,14 @@
 """`tfds new` command."""
 import argparse
+import dataclasses
 import os
 import pathlib
 import subprocess
 import textwrap
-from typing import Optional
+import typing
+import simple_parsing
 from tensorflow_datasets.core import constants
 from tensorflow_datasets.core import dataset_metadata
 from tensorflow_datasets.core import naming
@@ -30,60 +32,60 @@ from tensorflow_datasets.scripts.cli import builder_templates
 from tensorflow_datasets.scripts.cli import cli_utils as utils
-def register_subparser(parsers: argparse._SubParsersAction) -> None:  # pylint: disable=protected-access
-  """Add subparser for `new` command."""
-  new_parser = parsers.add_parser(
-      'new', help='Creates a new dataset directory from the template.'
-  )
-  new_parser.add_argument(
-      'dataset_name',  # Positional argument
-      type=str,
-      help='Name of the dataset to be created (in snake_case)',
+@dataclasses.dataclass(frozen=True, kw_only=True)
+class Args:
+  """CLI arguments for creating a new dataset directory.
+  Attributes:
+    dataset_name: Name of the dataset to be created (in snake_case).
+    data_format: Format of the input data, which is used to generate a
+      format-specific template.
+    dir: Path where the dataset directory will be created. Defaults to current
+      directory.
+  """
+  dataset_name: str = simple_parsing.field(
+      positional=True,
+      # Need to explicitly set metavar for command-line help.
+      metavar='dataset_name',
   )
-  new_parser.add_argument(
-      '--data_format',  # Optional argument
-      type=str,
+  data_format: str = simple_parsing.choice(
+      builder_templates.STANDARD,
+      builder_templates.CONLL,
+      builder_templates.CONLLU,
       default=builder_templates.STANDARD,
-      choices=[
-          builder_templates.STANDARD,
-          builder_templates.CONLL,
-          builder_templates.CONLLU,
-      ],
-      help=(
-          'Optional format of the input data, which is used to generate a '
-          'format-specific template.'
-      ),
-  )
-  new_parser.add_argument(
-      '--dir',
-      type=pathlib.Path,
-      default=pathlib.Path.cwd(),
-      help=(
-          'Path where the dataset directory will be created. '
-          'Defaults to current directory.'
-      ),
   )
-  new_parser.set_defaults(subparser_fn=_create_dataset_files)
-def _create_dataset_files(args: argparse.Namespace) -> None:
-  """Creates the dataset directory. Executed by `tfds new <name>`."""
-  if not naming.is_valid_dataset_and_class_name(args.dataset_name):
-    raise ValueError(
-        'Invalid dataset name. It should be a valid Python class name.'
+  dir: pathlib.Path = simple_parsing.field(default_factory=pathlib.Path.cwd)
+  def execute(self) -> None:
+    """Creates the dataset directory."""
+    if not naming.is_valid_dataset_and_class_name(self.dataset_name):
+      raise ValueError(
+          'Invalid dataset name. It should be a valid Python class name.'
+      )
+    create_dataset_files(
+        dataset_name=self.dataset_name,
+        dataset_dir=self.dir,
+        data_format=self.data_format,
     )
-  create_dataset_files(
-      dataset_name=args.dataset_name,
-      dataset_dir=args.dir,
-      data_format=args.data_format,
+def register_subparser(parsers: argparse._SubParsersAction) -> None:
+  """Add subparser for `new` command."""
+  parser = parsers.add_parser(
+      'new',
+      help='Creates a new dataset directory from the template.',
   )
+  parser = typing.cast(simple_parsing.ArgumentParser, parser)
+  parser.add_arguments(Args, dest='args')
+  parser.set_defaults(subparser_fn=lambda args: args.args.execute())
 def create_dataset_files(
     dataset_name: str,
     dataset_dir: pathlib.Path,
-    data_format: Optional[str] = None,
+    data_format: str | None = None,
 ) -> None:
   """Creates the dataset files."""
   # Creates the root directory

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: tfds-nightly
-Version: 4.9.9.dev202508060045
+Version: 4.9.9.dev202508080045
 Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
 Home-page: https://github.com/tensorflow/datasets
 Download-URL: https://github.com/tensorflow/datasets/tags

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/RECORD RENAMED Viewed

@@ -1985,13 +1985,13 @@ tensorflow_datasets/scripts/cli/builder_templates.py,sha256=99SvH3skigkc2Qg737BV
 tensorflow_datasets/scripts/cli/builder_templates_test.py,sha256=HBNB-v2zlImKULPI8Webs9hXCkeFmWT29urxav-tDe8,2062
 tensorflow_datasets/scripts/cli/cli_utils.py,sha256=rMYMcQj1w46OTOeMyp3qf4y9v7ArOGh6u5NaCjBXal8,12313
 tensorflow_datasets/scripts/cli/conftest.py,sha256=cmvCCV-efT5ZXYPkCSGS1OxoKNPAfSsLcFTfYfe61S0,1233
-tensorflow_datasets/scripts/cli/convert_format.py,sha256=Cefg7Cd4Nmz8AlwEm68T286YEuoDHtRfQfdtq7XAZrA,4805
+tensorflow_datasets/scripts/cli/convert_format.py,sha256=02RDZQQCuXf_XFFpx0gmRVkYyJg534kY0fZwGKxtUL4,4197
 tensorflow_datasets/scripts/cli/convert_format_utils.py,sha256=U_q5WVgMNrjBkOc166U4Y_eca5KOS3Xb3jSDjp4XdK4,29078
 tensorflow_datasets/scripts/cli/convert_format_utils_test.py,sha256=9JGNu9TvUWzbuhe6DWwnO3V9Lia5S1Is64re-pceAWE,8823
-tensorflow_datasets/scripts/cli/croissant.py,sha256=CMOe-8iEN7ZdRTiJ5w3iiJFYKsHiqzroAXmbfJimp1Y,6396
-tensorflow_datasets/scripts/cli/main.py,sha256=cDlnkHU2-DIjFmMQo3gGPJ7pVwGY48ypqR1xm5E82TU,3910
+tensorflow_datasets/scripts/cli/croissant.py,sha256=6jzmOXt_i7aeJHUVX7_zpRRMEXId_PzU24zUDdExRUs,6112
+tensorflow_datasets/scripts/cli/main.py,sha256=FJJwyUtM1N9gNDsxGm850m5ejzzJ9mgESNW9Xz8E9_I,4383
 tensorflow_datasets/scripts/cli/main_test.py,sha256=3zNaS_2FmxxLoZOX05iJ2riuP4Qv8cx6bhAI56tV8YI,1067
-tensorflow_datasets/scripts/cli/new.py,sha256=eTAEQ5QVj7DZwyymsxNW-5KNRJb-YYWIUGcW7yhC2Ew,7699
+tensorflow_datasets/scripts/cli/new.py,sha256=x_GQSEVva1XuMvFwL3rANjDxviwZviXKHCICY7P30Jc,7803
 tensorflow_datasets/scripts/cli/new_test.py,sha256=USr9So-FPtg8UzaQPPacXn0E1ukDIoew9oYkOn45oik,2655
 tensorflow_datasets/scripts/deployment/__init__.py,sha256=Z8UWkv0wbzS4AzaLgSpYVGApYv5j57RWY0vN5Z553BQ,613
 tensorflow_datasets/scripts/deployment/copy_dataset_info_files.py,sha256=uLuvwOWqvo1SOLAcxAOHIWBvfbyZQJ7nF79v8lTalKQ,2690
@@ -2468,10 +2468,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
 tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
 tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
 tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
-tfds_nightly-4.9.9.dev202508060045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
-tfds_nightly-4.9.9.dev202508060045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
-tfds_nightly-4.9.9.dev202508060045.dist-info/METADATA,sha256=iFGEm6VS9FbPUGFhCr2P8ywlEUbvtBt7ha7Na3Emwxw,11694
-tfds_nightly-4.9.9.dev202508060045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-tfds_nightly-4.9.9.dev202508060045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
-tfds_nightly-4.9.9.dev202508060045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
-tfds_nightly-4.9.9.dev202508060045.dist-info/RECORD,,
+tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
+tfds_nightly-4.9.9.dev202508080045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
+tfds_nightly-4.9.9.dev202508080045.dist-info/METADATA,sha256=Za3dYGEQJMn0dTM0uUOEd_2jik8ANJg6Lmtp94-hKQ0,11694
+tfds_nightly-4.9.9.dev202508080045.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+tfds_nightly-4.9.9.dev202508080045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
+tfds_nightly-4.9.9.dev202508080045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
+tfds_nightly-4.9.9.dev202508080045.dist-info/RECORD,,

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/WHEEL RENAMED Viewed

File without changes

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/licenses/AUTHORS RENAMED Viewed

File without changes

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{tfds_nightly-4.9.9.dev202508060045.dist-info → tfds_nightly-4.9.9.dev202508080045.dist-info}/top_level.txt RENAMED Viewed

File without changes

tfds-nightly 4.9.9.dev202508060045__py3-none-any.whl → 4.9.9.dev202508080045__py3-none-any.whl

tfds-nightly 4.9.9.dev202508060045py3-none-any.whl → 4.9.9.dev202508080045py3-none-any.whl