tfds-nightly 4.9.9.dev202507010046__py3-none-any.whl → 4.9.9.dev202507030044__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorflow_datasets/core/naming.py +2 -2
- tensorflow_datasets/core/naming_test.py +5 -2
- tensorflow_datasets/core/writer.py +3 -2
- tensorflow_datasets/core/writer_test.py +32 -24
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/METADATA +1 -1
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/RECORD +11 -11
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/WHEEL +0 -0
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/entry_points.txt +0 -0
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/licenses/AUTHORS +0 -0
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/licenses/LICENSE +0 -0
- {tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/top_level.txt +0 -0
@@ -666,7 +666,7 @@ class ShardedFileTemplate:
|
|
666
666
|
`/path/dataset_name-split.fileformat@num_shards` or
|
667
667
|
`/path/dataset_name-split@num_shards.fileformat` depending on the format.
|
668
668
|
If `num_shards` is not given, then it returns
|
669
|
-
`/path/dataset_name-split.fileformat
|
669
|
+
`/path/dataset_name-split.fileformat-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]`.
|
670
670
|
|
671
671
|
Args:
|
672
672
|
num_shards: optional specification of the number of shards.
|
@@ -681,7 +681,7 @@ class ShardedFileTemplate:
|
|
681
681
|
elif use_at_notation:
|
682
682
|
replacement = '@*'
|
683
683
|
else:
|
684
|
-
replacement = '
|
684
|
+
replacement = '-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
|
685
685
|
return _replace_shard_pattern(os.fspath(a_filepath), replacement)
|
686
686
|
|
687
687
|
def glob_pattern(self, num_shards: int | None = None) -> str:
|
@@ -459,7 +459,7 @@ def test_sharded_file_template_shard_index():
|
|
459
459
|
)
|
460
460
|
assert (
|
461
461
|
os.fspath(template.sharded_filepaths_pattern())
|
462
|
-
== '/my/path/data/mnist-train.tfrecord
|
462
|
+
== '/my/path/data/mnist-train.tfrecord-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
|
463
463
|
)
|
464
464
|
assert (
|
465
465
|
os.fspath(template.sharded_filepaths_pattern(num_shards=100))
|
@@ -474,7 +474,10 @@ def test_glob_pattern():
|
|
474
474
|
filetype_suffix='tfrecord',
|
475
475
|
data_dir=epath.Path('/data'),
|
476
476
|
)
|
477
|
-
assert
|
477
|
+
assert (
|
478
|
+
'/data/ds-train.tfrecord-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]'
|
479
|
+
== template.glob_pattern()
|
480
|
+
)
|
478
481
|
assert '/data/ds-train.tfrecord-*-of-00042' == template.glob_pattern(
|
479
482
|
num_shards=42
|
480
483
|
)
|
@@ -816,8 +816,9 @@ class NoShuffleBeamWriter:
|
|
816
816
|
logging.info("Finalizing writer for %s", self._filename_template.split)
|
817
817
|
# We don't know the number of shards, the length of each shard, nor the
|
818
818
|
# total size, so we compute them here.
|
819
|
-
|
820
|
-
|
819
|
+
shards = self._filename_template.data_dir.glob(
|
820
|
+
self._filename_template.glob_pattern()
|
821
|
+
)
|
821
822
|
|
822
823
|
def _get_length_and_size(shard: epath.Path) -> tuple[epath.Path, int, int]:
|
823
824
|
length = self._file_adapter.num_examples(shard)
|
@@ -592,39 +592,47 @@ class NoShuffleBeamWriterTest(parameterized.TestCase):
|
|
592
592
|
|
593
593
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
594
594
|
tmp_dir = epath.Path(tmp_dir)
|
595
|
-
|
596
|
-
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
595
|
+
|
596
|
+
def get_writer(split):
|
597
|
+
filename_template = naming.ShardedFileTemplate(
|
598
|
+
dataset_name='foo',
|
599
|
+
split=split,
|
600
|
+
filetype_suffix=file_format.file_suffix,
|
601
|
+
data_dir=tmp_dir,
|
602
|
+
)
|
603
|
+
return writer_lib.NoShuffleBeamWriter(
|
604
|
+
serializer=testing.DummySerializer('dummy specs'),
|
605
|
+
filename_template=filename_template,
|
606
|
+
file_format=file_format,
|
607
|
+
)
|
608
|
+
|
606
609
|
to_write = [(i, str(i).encode('utf-8')) for i in range(10)]
|
607
610
|
# Here we need to disable type check as `beam.Create` is not capable of
|
608
611
|
# inferring the type of the PCollection elements.
|
609
612
|
options = beam.options.pipeline_options.PipelineOptions(
|
610
613
|
pipeline_type_check=False
|
611
614
|
)
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
615
|
+
writers = [get_writer(split) for split in ('train-b', 'train')]
|
616
|
+
|
617
|
+
for writer in writers:
|
618
|
+
with beam.Pipeline(options=options, runner=_get_runner()) as pipeline:
|
619
|
+
|
620
|
+
@beam.ptransform_fn
|
621
|
+
def _build_pcollection(pipeline, writer):
|
622
|
+
pcollection = pipeline | 'Start' >> beam.Create(to_write)
|
623
|
+
return writer.write_from_pcollection(pcollection)
|
624
|
+
|
625
|
+
_ = pipeline | 'test' >> _build_pcollection(writer)
|
626
|
+
|
624
627
|
files = list(tmp_dir.iterdir())
|
625
|
-
self.assertGreaterEqual(len(files),
|
628
|
+
self.assertGreaterEqual(len(files), 2)
|
626
629
|
for f in files:
|
627
630
|
self.assertIn(file_format.file_suffix, f.name)
|
631
|
+
for writer in writers:
|
632
|
+
shard_lengths, total_size = writer.finalize()
|
633
|
+
self.assertNotEmpty(shard_lengths)
|
634
|
+
self.assertEqual(sum(shard_lengths), 10)
|
635
|
+
self.assertGreater(total_size, 10)
|
628
636
|
|
629
637
|
|
630
638
|
class CustomExampleWriter(writer_lib.ExampleWriter):
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tfds-nightly
|
3
|
-
Version: 4.9.9.
|
3
|
+
Version: 4.9.9.dev202507030044
|
4
4
|
Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
|
5
5
|
Home-page: https://github.com/tensorflow/datasets
|
6
6
|
Download-URL: https://github.com/tensorflow/datasets/tags
|
{tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/RECORD
RENAMED
@@ -88,8 +88,8 @@ tensorflow_datasets/core/lazy_imports_lib.py,sha256=Q-c3qGEZJDqviEQUiro2iBpMw7KA
|
|
88
88
|
tensorflow_datasets/core/lazy_imports_lib_test.py,sha256=cbdamDUJIY5YORm6coyCMIralgsL_gCUfa2Dzdj6ZPY,1695
|
89
89
|
tensorflow_datasets/core/load.py,sha256=1FQVnKwn8OVS_IgDbs9XN7aIVxQnyfrS0pI2X9dh77M,37765
|
90
90
|
tensorflow_datasets/core/load_test.py,sha256=EEa8GuSIrEbn0RcGrWS3hmmatKBqBA3QOQWpQ1WjVgA,6490
|
91
|
-
tensorflow_datasets/core/naming.py,sha256=
|
92
|
-
tensorflow_datasets/core/naming_test.py,sha256=
|
91
|
+
tensorflow_datasets/core/naming.py,sha256=B_P77QDA4lkG2FUl4PrzZR0U6qqae_fLxruGBw3ZSVc,25614
|
92
|
+
tensorflow_datasets/core/naming_test.py,sha256=SwydgLjf2Mouow1yVZlc73sb8rp4522NhkTSEmg31vo,30112
|
93
93
|
tensorflow_datasets/core/read_only_builder.py,sha256=R0QIqckUjl74G7oBj1uCRm_g9e0omstDMTbbwC25B88,22146
|
94
94
|
tensorflow_datasets/core/read_only_builder_test.py,sha256=Nw2KQCHBdTW7210Um2K3SzfqAOJB1v1r2yJkzdFehWA,24174
|
95
95
|
tensorflow_datasets/core/reader.py,sha256=s65FNOUDyAhd4OgHOSvE5lr4rnlUnOILjlVcRS6Qbhw,17345
|
@@ -112,8 +112,8 @@ tensorflow_datasets/core/units_test.py,sha256=rGR0rsP9M0BVCqv2OA1GZRH5csq8_gPYhI
|
|
112
112
|
tensorflow_datasets/core/valid_tags.txt,sha256=HLn8CV1ORQZaAhLr-U-5MsYFrYBVHDgs4bKEu2nzlVw,20100
|
113
113
|
tensorflow_datasets/core/visibility.py,sha256=43jHRRdg2xHRpAA2mUD1Yz-vOs5EVhx3xhB2RoIJBg8,3498
|
114
114
|
tensorflow_datasets/core/visibility_test.py,sha256=h_UwIBfLgIkMSSSPoQmT0mNUUOH8jAdebA_DdWNSxdg,1350
|
115
|
-
tensorflow_datasets/core/writer.py,sha256=
|
116
|
-
tensorflow_datasets/core/writer_test.py,sha256=
|
115
|
+
tensorflow_datasets/core/writer.py,sha256=T41xcagE1IhFqKNtoHR467SXqbOw7PrQR2nm7nXn5Yc,28877
|
116
|
+
tensorflow_datasets/core/writer_test.py,sha256=j-lvS96jFmvBF0bd0mVR4EGBbxeFW7ucxxFXtC40wTo,22702
|
117
117
|
tensorflow_datasets/core/community/__init__.py,sha256=bAU6d62u2i14gRw3xgAzkQS8kRcuRnJWqEVn_r0RXRs,1206
|
118
118
|
tensorflow_datasets/core/community/cache.py,sha256=-dx3iEsgktu8OR42a64CFX64HtaXMHjXAfnYlc0H5BM,2130
|
119
119
|
tensorflow_datasets/core/community/config.py,sha256=SiIgegGmxQjoM_8HmKFLdib-loTxpQpEwXXKQbTLJI0,4451
|
@@ -2461,10 +2461,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
|
|
2461
2461
|
tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
|
2462
2462
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
|
2463
2463
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
|
2464
|
-
tfds_nightly-4.9.9.
|
2465
|
-
tfds_nightly-4.9.9.
|
2466
|
-
tfds_nightly-4.9.9.
|
2467
|
-
tfds_nightly-4.9.9.
|
2468
|
-
tfds_nightly-4.9.9.
|
2469
|
-
tfds_nightly-4.9.9.
|
2470
|
-
tfds_nightly-4.9.9.
|
2464
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
|
2465
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
2466
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/METADATA,sha256=mBYq6qCzA_MbWBx9ui0LQFkk3GFTUH0nWBZnNOafRxY,11963
|
2467
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
2468
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
|
2469
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
|
2470
|
+
tfds_nightly-4.9.9.dev202507030044.dist-info/RECORD,,
|
{tfds_nightly-4.9.9.dev202507010046.dist-info → tfds_nightly-4.9.9.dev202507030044.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|