tfds-nightly 4.9.8.dev202504250044__py3-none-any.whl → 4.9.8.dev202504260044__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tensorflow_datasets/core/splits.py +20 -6
- tensorflow_datasets/core/splits_test.py +37 -0
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/METADATA +1 -1
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/RECORD +9 -9
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/WHEEL +0 -0
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/entry_points.txt +0 -0
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/licenses/AUTHORS +0 -0
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/licenses/LICENSE +0 -0
- {tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/top_level.txt +0 -0
@@ -282,9 +282,11 @@ class MultiSplitInfo(SplitInfo):
|
|
282
282
|
This should only be used to read data and not when producing data.
|
283
283
|
"""
|
284
284
|
|
285
|
-
split_infos: list[SplitInfo] = dataclasses.field(
|
285
|
+
split_infos: list[SplitInfo | SubSplitInfo] = dataclasses.field(
|
286
|
+
default_factory=list
|
287
|
+
)
|
286
288
|
|
287
|
-
def __init__(self, name: str, split_infos: list[SplitInfo]):
|
289
|
+
def __init__(self, name: str, split_infos: list[SplitInfo | SubSplitInfo]):
|
288
290
|
if not split_infos:
|
289
291
|
raise ValueError('Need to pass a non-empty list of SplitInfos')
|
290
292
|
object.__setattr__(self, 'split_infos', split_infos)
|
@@ -315,6 +317,16 @@ class MultiSplitInfo(SplitInfo):
|
|
315
317
|
f'split_infos={self.split_infos!r})'
|
316
318
|
)
|
317
319
|
|
320
|
+
@property
|
321
|
+
def examples_in_shards(self) -> list[int]:
|
322
|
+
result = []
|
323
|
+
for split_info in self.split_infos:
|
324
|
+
if isinstance(split_info, (SubSplitInfo, MultiSplitInfo)):
|
325
|
+
result.extend(split_info.examples_in_shards)
|
326
|
+
else:
|
327
|
+
result.extend(split_info.shard_lengths)
|
328
|
+
return result
|
329
|
+
|
318
330
|
@property
|
319
331
|
def file_instructions(self) -> list[shard_utils.FileInstruction]:
|
320
332
|
result = []
|
@@ -361,6 +373,10 @@ class SubSplitInfo:
|
|
361
373
|
def shard_lengths(self) -> list[int]:
|
362
374
|
return [f.take for f in self.file_instructions]
|
363
375
|
|
376
|
+
@property
|
377
|
+
def examples_in_shards(self) -> list[int]:
|
378
|
+
return [f.examples_in_shard for f in self.file_instructions]
|
379
|
+
|
364
380
|
@property
|
365
381
|
def num_examples(self) -> int:
|
366
382
|
"""Returns the number of example in the subsplit."""
|
@@ -526,7 +542,7 @@ def _make_absolute_instructions(
|
|
526
542
|
|
527
543
|
def _file_instructions_for_split(
|
528
544
|
instruction: _AbsoluteInstruction,
|
529
|
-
split_info: SplitInfo,
|
545
|
+
split_info: SplitInfo | SubSplitInfo,
|
530
546
|
) -> list[shard_utils.FileInstruction]:
|
531
547
|
"""Returns the file instructions from the given instruction applied to the given split info."""
|
532
548
|
if not split_info.num_examples:
|
@@ -537,9 +553,7 @@ def _file_instructions_for_split(
|
|
537
553
|
return []
|
538
554
|
to = split_info.num_examples if instruction.to is None else instruction.to
|
539
555
|
if isinstance(split_info, (SubSplitInfo, MultiSplitInfo)):
|
540
|
-
examples_in_shards =
|
541
|
-
f.examples_in_shard for f in split_info.file_instructions
|
542
|
-
]
|
556
|
+
examples_in_shards = split_info.examples_in_shards
|
543
557
|
else:
|
544
558
|
examples_in_shards = None
|
545
559
|
return shard_utils.get_file_instructions(
|
@@ -255,6 +255,43 @@ class SplitDictTest(testing.TestCase):
|
|
255
255
|
self.assertEqual(file_instruction.take, 2)
|
256
256
|
self.assertEqual(file_instruction.examples_in_shard, 10)
|
257
257
|
|
258
|
+
def test_multi_split_empty_shard(self):
|
259
|
+
split_info = splits.MultiSplitInfo(
|
260
|
+
name='train',
|
261
|
+
split_infos=[
|
262
|
+
splits.SplitInfo(
|
263
|
+
name='train',
|
264
|
+
shard_lengths=[5, 0, 5],
|
265
|
+
num_bytes=0,
|
266
|
+
filename_template=_filename_template(
|
267
|
+
split='train', data_dir='/abc'
|
268
|
+
),
|
269
|
+
),
|
270
|
+
],
|
271
|
+
)
|
272
|
+
split_dict = splits.SplitDict([split_info])
|
273
|
+
sub_split = split_dict['train[:90%]']
|
274
|
+
self.assertEqual(sub_split.name, 'train[:90%]')
|
275
|
+
self.assertEqual(sub_split.num_examples, 9)
|
276
|
+
self.assertEqual(sub_split.shard_lengths, [5, 4])
|
277
|
+
self.assertEqual(
|
278
|
+
sub_split.file_instructions,
|
279
|
+
[
|
280
|
+
shard_utils.FileInstruction(
|
281
|
+
filename='/abc/ds_name-train.tfrecord-00000-of-00003',
|
282
|
+
skip=0,
|
283
|
+
take=5,
|
284
|
+
examples_in_shard=5,
|
285
|
+
),
|
286
|
+
shard_utils.FileInstruction(
|
287
|
+
filename='/abc/ds_name-train.tfrecord-00002-of-00003',
|
288
|
+
skip=0,
|
289
|
+
take=4,
|
290
|
+
examples_in_shard=5,
|
291
|
+
),
|
292
|
+
],
|
293
|
+
)
|
294
|
+
|
258
295
|
|
259
296
|
class SplitsTest(testing.TestCase):
|
260
297
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: tfds-nightly
|
3
|
-
Version: 4.9.8.
|
3
|
+
Version: 4.9.8.dev202504260044
|
4
4
|
Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
|
5
5
|
Home-page: https://github.com/tensorflow/datasets
|
6
6
|
Download-URL: https://github.com/tensorflow/datasets/tags
|
{tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/RECORD
RENAMED
@@ -102,8 +102,8 @@ tensorflow_datasets/core/shuffle.py,sha256=xMOaKql03o6sxcJp_KFG60N2tua2xE2xibQue
|
|
102
102
|
tensorflow_datasets/core/shuffle_test.py,sha256=-xkd3kx5Ci2gGyKbMwdzGwhjFBv6LTKX1FPPn9eanGI,7123
|
103
103
|
tensorflow_datasets/core/split_builder.py,sha256=HHOX3KatqnSL5EW-6vXzyDDKG4FOEyjGXM_MRuLNxHE,22190
|
104
104
|
tensorflow_datasets/core/split_builder_test.py,sha256=0HsFONj8A09voGS_7zCAb9nd1PdTqT1u4MXR4aWAoS4,3223
|
105
|
-
tensorflow_datasets/core/splits.py,sha256=
|
106
|
-
tensorflow_datasets/core/splits_test.py,sha256=
|
105
|
+
tensorflow_datasets/core/splits.py,sha256=K5M8fJeleCcJiHa_TsiNI0AQcMimkSSb0D73VOOfai0,29356
|
106
|
+
tensorflow_datasets/core/splits_test.py,sha256=OD-ZNnpkPV8WL43IUywXdoAM2QuMV8htsqChw3Nqq9Q,24987
|
107
107
|
tensorflow_datasets/core/subsplits_utils.py,sha256=ZxzeEVbUVlADSTigQbf4k9V4rdjjoRvAuBVXl1-BNrg,6127
|
108
108
|
tensorflow_datasets/core/subsplits_utils_test.py,sha256=iC6f4Bfpnon_NL7TGeCwp3OW2P3jTIPIpWbCqxmuhX4,5159
|
109
109
|
tensorflow_datasets/core/tf_compat.py,sha256=GY-LBj1V9LLNl2LtAh4dLcATisW_8VYTWHgTR31MXvU,1820
|
@@ -2460,10 +2460,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=DJ687VN9hAp6SLXnr_P12
|
|
2460
2460
|
tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=38tJQ73VHz8vOJn-AyZh2we2YJucbSRIgmgcrsC6bQM,719
|
2461
2461
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=k-7YD1SGr5bASfdR2_09rrqz-8cpWdIcBWWEXhCvzuk,16903
|
2462
2462
|
tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=vLlluBW77ASNVC4ix7t8idkSUBI6q1-B7zmRV_ICCQM,1778
|
2463
|
-
tfds_nightly-4.9.8.
|
2464
|
-
tfds_nightly-4.9.8.
|
2465
|
-
tfds_nightly-4.9.8.
|
2466
|
-
tfds_nightly-4.9.8.
|
2467
|
-
tfds_nightly-4.9.8.
|
2468
|
-
tfds_nightly-4.9.8.
|
2469
|
-
tfds_nightly-4.9.8.
|
2463
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
|
2464
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
2465
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/METADATA,sha256=wkSM6wKcHi1BPt11eiYbzbK-9GyCKuOIyE-xt9ZXivY,11879
|
2466
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
|
2467
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
|
2468
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
|
2469
|
+
tfds_nightly-4.9.8.dev202504260044.dist-info/RECORD,,
|
{tfds_nightly-4.9.8.dev202504250044.dist-info → tfds_nightly-4.9.8.dev202504260044.dist-info}/WHEEL
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|