tfds-nightly 4.9.9.dev202507010046__py3-none-any.whl → 4.9.9.dev202507030044__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -666,7 +666,7 @@ class ShardedFileTemplate:
666
666
  `/path/dataset_name-split.fileformat@num_shards` or
667
667
  `/path/dataset_name-split@num_shards.fileformat` depending on the format.
668
668
  If `num_shards` is not given, then it returns
669
- `/path/dataset_name-split.fileformat*`.
669
+ `/path/dataset_name-split.fileformat-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]`.
670
670
 
671
671
  Args:
672
672
  num_shards: optional specification of the number of shards.
@@ -681,7 +681,7 @@ class ShardedFileTemplate:
681
681
  elif use_at_notation:
682
682
  replacement = '@*'
683
683
  else:
684
- replacement = '*'
684
+ replacement = '-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
685
685
  return _replace_shard_pattern(os.fspath(a_filepath), replacement)
686
686
 
687
687
  def glob_pattern(self, num_shards: int | None = None) -> str:
@@ -459,7 +459,7 @@ def test_sharded_file_template_shard_index():
459
459
  )
460
460
  assert (
461
461
  os.fspath(template.sharded_filepaths_pattern())
462
- == '/my/path/data/mnist-train.tfrecord*'
462
+ == '/my/path/data/mnist-train.tfrecord-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
463
463
  )
464
464
  assert (
465
465
  os.fspath(template.sharded_filepaths_pattern(num_shards=100))
@@ -474,7 +474,10 @@ def test_glob_pattern():
474
474
  filetype_suffix='tfrecord',
475
475
  data_dir=epath.Path('/data'),
476
476
  )
477
- assert '/data/ds-train.tfrecord*' == template.glob_pattern()
477
+ assert (
478
+ '/data/ds-train.tfrecord-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
479
+ == template.glob_pattern()
480
+ )
478
481
  assert '/data/ds-train.tfrecord-*-of-00042' == template.glob_pattern(
479
482
  num_shards=42
480
483
  )
@@ -816,8 +816,9 @@ class NoShuffleBeamWriter:
816
816
  logging.info("Finalizing writer for %s", self._filename_template.split)
817
817
  # We don't know the number of shards, the length of each shard, nor the
818
818
  # total size, so we compute them here.
819
- prefix = epath.Path(self._filename_template.filepath_prefix())
820
- shards = self._filename_template.data_dir.glob(f"{prefix.name}*")
819
+ shards = self._filename_template.data_dir.glob(
820
+ self._filename_template.glob_pattern()
821
+ )
821
822
 
822
823
  def _get_length_and_size(shard: epath.Path) -> tuple[epath.Path, int, int]:
823
824
  length = self._file_adapter.num_examples(shard)
@@ -592,39 +592,47 @@ class NoShuffleBeamWriterTest(parameterized.TestCase):
592
592
 
593
593
  with tempfile.TemporaryDirectory() as tmp_dir:
594
594
  tmp_dir = epath.Path(tmp_dir)
595
- filename_template = naming.ShardedFileTemplate(
596
- dataset_name='foo',
597
- split='train',
598
- filetype_suffix=file_format.file_suffix,
599
- data_dir=tmp_dir,
600
- )
601
- writer = writer_lib.NoShuffleBeamWriter(
602
- serializer=testing.DummySerializer('dummy specs'),
603
- filename_template=filename_template,
604
- file_format=file_format,
605
- )
595
+
596
+ def get_writer(split):
597
+ filename_template = naming.ShardedFileTemplate(
598
+ dataset_name='foo',
599
+ split=split,
600
+ filetype_suffix=file_format.file_suffix,
601
+ data_dir=tmp_dir,
602
+ )
603
+ return writer_lib.NoShuffleBeamWriter(
604
+ serializer=testing.DummySerializer('dummy specs'),
605
+ filename_template=filename_template,
606
+ file_format=file_format,
607
+ )
608
+
606
609
  to_write = [(i, str(i).encode('utf-8')) for i in range(10)]
607
610
  # Here we need to disable type check as `beam.Create` is not capable of
608
611
  # inferring the type of the PCollection elements.
609
612
  options = beam.options.pipeline_options.PipelineOptions(
610
613
  pipeline_type_check=False
611
614
  )
612
- with beam.Pipeline(options=options, runner=_get_runner()) as pipeline:
613
-
614
- @beam.ptransform_fn
615
- def _build_pcollection(pipeline):
616
- pcollection = pipeline | 'Start' >> beam.Create(to_write)
617
- return writer.write_from_pcollection(pcollection)
618
-
619
- _ = pipeline | 'test' >> _build_pcollection() # pylint: disable=no-value-for-parameter
620
- shard_lengths, total_size = writer.finalize()
621
- self.assertNotEmpty(shard_lengths)
622
- self.assertEqual(sum(shard_lengths), 10)
623
- self.assertGreater(total_size, 10)
615
+ writers = [get_writer(split) for split in ('train-b', 'train')]
616
+
617
+ for writer in writers:
618
+ with beam.Pipeline(options=options, runner=_get_runner()) as pipeline:
619
+
620
+ @beam.ptransform_fn
621
+ def _build_pcollection(pipeline, writer):
622
+ pcollection = pipeline | 'Start' >> beam.Create(to_write)
623
+ return writer.write_from_pcollection(pcollection)
624
+
625
+ _ = pipeline | 'test' >> _build_pcollection(writer)
626
+
624
627
  files = list(tmp_dir.iterdir())
625
- self.assertGreaterEqual(len(files), 1)
628
+ self.assertGreaterEqual(len(files), 2)
626
629
  for f in files:
627
630
  self.assertIn(file_format.file_suffix, f.name)
631
+ for writer in writers:
632
+ shard_lengths, total_size = writer.finalize()
633
+ self.assertNotEmpty(shard_lengths)
634
+ self.assertEqual(sum(shard_lengths), 10)
635
+ self.assertGreater(total_size, 10)
628
636
 
629
637
 
630
638
  class CustomExampleWriter(writer_lib.ExampleWriter):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202507010046
3
+ Version: 4.9.9.dev202507030044
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -88,8 +88,8 @@ tensorflow_datasets/core/lazy_imports_lib.py,sha256=Q-c3qGEZJDqviEQUiro2iBpMw7KA
88
88
  tensorflow_datasets/core/lazy_imports_lib_test.py,sha256=cbdamDUJIY5YORm6coyCMIralgsL_gCUfa2Dzdj6ZPY,1695
89
89
  tensorflow_datasets/core/load.py,sha256=1FQVnKwn8OVS_IgDbs9XN7aIVxQnyfrS0pI2X9dh77M,37765
90
90
  tensorflow_datasets/core/load_test.py,sha256=EEa8GuSIrEbn0RcGrWS3hmmatKBqBA3QOQWpQ1WjVgA,6490
91
- tensorflow_datasets/core/naming.py,sha256=IYz4U9_2lLqpeyJzHZCZ0mbL6hLxnBugsB-IhKduYBU,25506
92
- tensorflow_datasets/core/naming_test.py,sha256=cC6Cf3Urhpwf1Wtgt85zar8KsFU9VrZgV0ZqPCp5PE4,29986
91
+ tensorflow_datasets/core/naming.py,sha256=B_P77QDA4lkG2FUl4PrzZR0U6qqae_fLxruGBw3ZSVc,25614
92
+ tensorflow_datasets/core/naming_test.py,sha256=SwydgLjf2Mouow1yVZlc73sb8rp4522NhkTSEmg31vo,30112
93
93
  tensorflow_datasets/core/read_only_builder.py,sha256=R0QIqckUjl74G7oBj1uCRm_g9e0omstDMTbbwC25B88,22146
94
94
  tensorflow_datasets/core/read_only_builder_test.py,sha256=Nw2KQCHBdTW7210Um2K3SzfqAOJB1v1r2yJkzdFehWA,24174
95
95
  tensorflow_datasets/core/reader.py,sha256=s65FNOUDyAhd4OgHOSvE5lr4rnlUnOILjlVcRS6Qbhw,17345
@@ -112,8 +112,8 @@ tensorflow_datasets/core/units_test.py,sha256=rGR0rsP9M0BVCqv2OA1GZRH5csq8_gPYhI
112
112
  tensorflow_datasets/core/valid_tags.txt,sha256=HLn8CV1ORQZaAhLr-U-5MsYFrYBVHDgs4bKEu2nzlVw,20100
113
113
  tensorflow_datasets/core/visibility.py,sha256=43jHRRdg2xHRpAA2mUD1Yz-vOs5EVhx3xhB2RoIJBg8,3498
114
114
  tensorflow_datasets/core/visibility_test.py,sha256=h_UwIBfLgIkMSSSPoQmT0mNUUOH8jAdebA_DdWNSxdg,1350
115
- tensorflow_datasets/core/writer.py,sha256=JyVr7Zs5IYp-kHXp8LBs03enuMgD7-T5DN6uq0LbF4s,28909
116
- tensorflow_datasets/core/writer_test.py,sha256=CLx1tE2QM0sUkFz3hpIP8tZQcTpPDY9VklzuCEa4qCE,22531
115
+ tensorflow_datasets/core/writer.py,sha256=T41xcagE1IhFqKNtoHR467SXqbOw7PrQR2nm7nXn5Yc,28877
116
+ tensorflow_datasets/core/writer_test.py,sha256=j-lvS96jFmvBF0bd0mVR4EGBbxeFW7ucxxFXtC40wTo,22702
117
117
  tensorflow_datasets/core/community/__init__.py,sha256=bAU6d62u2i14gRw3xgAzkQS8kRcuRnJWqEVn_r0RXRs,1206
118
118
  tensorflow_datasets/core/community/cache.py,sha256=-dx3iEsgktu8OR42a64CFX64HtaXMHjXAfnYlc0H5BM,2130
119
119
  tensorflow_datasets/core/community/config.py,sha256=SiIgegGmxQjoM_8HmKFLdib-loTxpQpEwXXKQbTLJI0,4451
@@ -2461,10 +2461,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2461
2461
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2462
2462
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2463
2463
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2464
- tfds_nightly-4.9.9.dev202507010046.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2465
- tfds_nightly-4.9.9.dev202507010046.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2466
- tfds_nightly-4.9.9.dev202507010046.dist-info/METADATA,sha256=ldOcz84AQsdQXpXII94T_z57amwrq8Je-zIvnGynfX4,11963
2467
- tfds_nightly-4.9.9.dev202507010046.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2468
- tfds_nightly-4.9.9.dev202507010046.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2469
- tfds_nightly-4.9.9.dev202507010046.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2470
- tfds_nightly-4.9.9.dev202507010046.dist-info/RECORD,,
2464
+ tfds_nightly-4.9.9.dev202507030044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2465
+ tfds_nightly-4.9.9.dev202507030044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2466
+ tfds_nightly-4.9.9.dev202507030044.dist-info/METADATA,sha256=mBYq6qCzA_MbWBx9ui0LQFkk3GFTUH0nWBZnNOafRxY,11963
2467
+ tfds_nightly-4.9.9.dev202507030044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2468
+ tfds_nightly-4.9.9.dev202507030044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2469
+ tfds_nightly-4.9.9.dev202507030044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2470
+ tfds_nightly-4.9.9.dev202507030044.dist-info/RECORD,,