tfds-nightly 4.9.8.dev202504250044__py3-none-any.whl → 4.9.8.dev202504270045__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -282,9 +282,11 @@ class MultiSplitInfo(SplitInfo):
282
282
  This should only be used to read data and not when producing data.
283
283
  """
284
284
 
285
- split_infos: list[SplitInfo] = dataclasses.field(default_factory=list)
285
+ split_infos: list[SplitInfo | SubSplitInfo] = dataclasses.field(
286
+ default_factory=list
287
+ )
286
288
 
287
- def __init__(self, name: str, split_infos: list[SplitInfo]):
289
+ def __init__(self, name: str, split_infos: list[SplitInfo | SubSplitInfo]):
288
290
  if not split_infos:
289
291
  raise ValueError('Need to pass a non-empty list of SplitInfos')
290
292
  object.__setattr__(self, 'split_infos', split_infos)
@@ -315,6 +317,16 @@ class MultiSplitInfo(SplitInfo):
315
317
  f'split_infos={self.split_infos!r})'
316
318
  )
317
319
 
320
+ @property
321
+ def examples_in_shards(self) -> list[int]:
322
+ result = []
323
+ for split_info in self.split_infos:
324
+ if isinstance(split_info, (SubSplitInfo, MultiSplitInfo)):
325
+ result.extend(split_info.examples_in_shards)
326
+ else:
327
+ result.extend(split_info.shard_lengths)
328
+ return result
329
+
318
330
  @property
319
331
  def file_instructions(self) -> list[shard_utils.FileInstruction]:
320
332
  result = []
@@ -361,6 +373,10 @@ class SubSplitInfo:
361
373
  def shard_lengths(self) -> list[int]:
362
374
  return [f.take for f in self.file_instructions]
363
375
 
376
+ @property
377
+ def examples_in_shards(self) -> list[int]:
378
+ return [f.examples_in_shard for f in self.file_instructions]
379
+
364
380
  @property
365
381
  def num_examples(self) -> int:
366
382
  """Returns the number of example in the subsplit."""
@@ -526,7 +542,7 @@ def _make_absolute_instructions(
526
542
 
527
543
  def _file_instructions_for_split(
528
544
  instruction: _AbsoluteInstruction,
529
- split_info: SplitInfo,
545
+ split_info: SplitInfo | SubSplitInfo,
530
546
  ) -> list[shard_utils.FileInstruction]:
531
547
  """Returns the file instructions from the given instruction applied to the given split info."""
532
548
  if not split_info.num_examples:
@@ -537,9 +553,7 @@ def _file_instructions_for_split(
537
553
  return []
538
554
  to = split_info.num_examples if instruction.to is None else instruction.to
539
555
  if isinstance(split_info, (SubSplitInfo, MultiSplitInfo)):
540
- examples_in_shards = [
541
- f.examples_in_shard for f in split_info.file_instructions
542
- ]
556
+ examples_in_shards = split_info.examples_in_shards
543
557
  else:
544
558
  examples_in_shards = None
545
559
  return shard_utils.get_file_instructions(
@@ -255,6 +255,43 @@ class SplitDictTest(testing.TestCase):
255
255
  self.assertEqual(file_instruction.take, 2)
256
256
  self.assertEqual(file_instruction.examples_in_shard, 10)
257
257
 
258
+ def test_multi_split_empty_shard(self):
259
+ split_info = splits.MultiSplitInfo(
260
+ name='train',
261
+ split_infos=[
262
+ splits.SplitInfo(
263
+ name='train',
264
+ shard_lengths=[5, 0, 5],
265
+ num_bytes=0,
266
+ filename_template=_filename_template(
267
+ split='train', data_dir='/abc'
268
+ ),
269
+ ),
270
+ ],
271
+ )
272
+ split_dict = splits.SplitDict([split_info])
273
+ sub_split = split_dict['train[:90%]']
274
+ self.assertEqual(sub_split.name, 'train[:90%]')
275
+ self.assertEqual(sub_split.num_examples, 9)
276
+ self.assertEqual(sub_split.shard_lengths, [5, 4])
277
+ self.assertEqual(
278
+ sub_split.file_instructions,
279
+ [
280
+ shard_utils.FileInstruction(
281
+ filename='/abc/ds_name-train.tfrecord-00000-of-00003',
282
+ skip=0,
283
+ take=5,
284
+ examples_in_shard=5,
285
+ ),
286
+ shard_utils.FileInstruction(
287
+ filename='/abc/ds_name-train.tfrecord-00002-of-00003',
288
+ skip=0,
289
+ take=4,
290
+ examples_in_shard=5,
291
+ ),
292
+ ],
293
+ )
294
+
258
295
 
259
296
  class SplitsTest(testing.TestCase):
260
297
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.8.dev202504250044
3
+ Version: 4.9.8.dev202504270045
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -102,8 +102,8 @@ tensorflow_datasets/core/shuffle.py,sha256=xMOaKql03o6sxcJp_KFG60N2tua2xE2xibQue
102
102
  tensorflow_datasets/core/shuffle_test.py,sha256=-xkd3kx5Ci2gGyKbMwdzGwhjFBv6LTKX1FPPn9eanGI,7123
103
103
  tensorflow_datasets/core/split_builder.py,sha256=HHOX3KatqnSL5EW-6vXzyDDKG4FOEyjGXM_MRuLNxHE,22190
104
104
  tensorflow_datasets/core/split_builder_test.py,sha256=0HsFONj8A09voGS_7zCAb9nd1PdTqT1u4MXR4aWAoS4,3223
105
- tensorflow_datasets/core/splits.py,sha256=LKCmJSMQxRKTo3IoOBB3dGaoOxxUpogXVyilGTrRcOU,28912
106
- tensorflow_datasets/core/splits_test.py,sha256=WtbAKIs7eOsGmLC3KxZ6DcSNSP4AE2KNBgP_nanK6JY,23820
105
+ tensorflow_datasets/core/splits.py,sha256=K5M8fJeleCcJiHa_TsiNI0AQcMimkSSb0D73VOOfai0,29356
106
+ tensorflow_datasets/core/splits_test.py,sha256=OD-ZNnpkPV8WL43IUywXdoAM2QuMV8htsqChw3Nqq9Q,24987
107
107
  tensorflow_datasets/core/subsplits_utils.py,sha256=ZxzeEVbUVlADSTigQbf4k9V4rdjjoRvAuBVXl1-BNrg,6127
108
108
  tensorflow_datasets/core/subsplits_utils_test.py,sha256=iC6f4Bfpnon_NL7TGeCwp3OW2P3jTIPIpWbCqxmuhX4,5159
109
109
  tensorflow_datasets/core/tf_compat.py,sha256=GY-LBj1V9LLNl2LtAh4dLcATisW_8VYTWHgTR31MXvU,1820
@@ -2460,10 +2460,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=DJ687VN9hAp6SLXnr_P12
2460
2460
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=38tJQ73VHz8vOJn-AyZh2we2YJucbSRIgmgcrsC6bQM,719
2461
2461
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=k-7YD1SGr5bASfdR2_09rrqz-8cpWdIcBWWEXhCvzuk,16903
2462
2462
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=vLlluBW77ASNVC4ix7t8idkSUBI6q1-B7zmRV_ICCQM,1778
2463
- tfds_nightly-4.9.8.dev202504250044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2464
- tfds_nightly-4.9.8.dev202504250044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2465
- tfds_nightly-4.9.8.dev202504250044.dist-info/METADATA,sha256=z0sHgmHnCUDPiFpIf9hdFefU9FdtlhyrD-0ksU7lurI,11879
2466
- tfds_nightly-4.9.8.dev202504250044.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
2467
- tfds_nightly-4.9.8.dev202504250044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2468
- tfds_nightly-4.9.8.dev202504250044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2469
- tfds_nightly-4.9.8.dev202504250044.dist-info/RECORD,,
2463
+ tfds_nightly-4.9.8.dev202504270045.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2464
+ tfds_nightly-4.9.8.dev202504270045.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2465
+ tfds_nightly-4.9.8.dev202504270045.dist-info/METADATA,sha256=zhywWcA5B-E_n0qBfUkiFbrRdksbC8Caca5mwbDsPr4,11879
2466
+ tfds_nightly-4.9.8.dev202504270045.dist-info/WHEEL,sha256=SmOxYU7pzNKBqASvQJ7DjX3XGUF92lrGhMb3R6_iiqI,91
2467
+ tfds_nightly-4.9.8.dev202504270045.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2468
+ tfds_nightly-4.9.8.dev202504270045.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2469
+ tfds_nightly-4.9.8.dev202504270045.dist-info/RECORD,,