tfds-nightly 4.9.9.dev202506040044__py3-none-any.whl → 4.9.9.dev202506050044__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,3 +49,28 @@ CHECKSUMS_FILENAME = 'checksums.tsv'
49
49
  # Filepath for mapping between TFDS datasets and PapersWithCode entries.
50
50
  PWC_FILENAME = 'tfds_to_pwc_links.json'
51
51
  PWC_LINKS_PATH = f'scripts/documentation/{PWC_FILENAME}'
52
+
53
+ # Retry parameters. Delays are in seconds.
54
+ TFDS_RETRY_TRIES = int(os.environ.get('TFDS_RETRY_TRIES', 3))
55
+ TFDS_RETRY_INITIAL_DELAY = int(os.environ.get('TFDS_RETRY_INITIAL_DELAY', 1))
56
+ # How much to multiply the delay by for each subsequent try
57
+ TFDS_RETRY_DELAY_MULTIPLIER = int(
58
+ os.environ.get('TFDS_RETRY_DELAY_MULTIPLIER', 2)
59
+ )
60
+ # Random noise to add to the delay (random pick between 0 and noise).
61
+ TFDS_RETRY_NOISE = float(os.environ.get('TFDS_RETRY_NOISE', 0.5))
62
+ # If the error message contains any of these substrings, retry.
63
+ TFDS_RETRY_MSG_SUBSTRINGS = os.environ.get(
64
+ 'TFDS_RETRY_MSG_SUBSTRINGS',
65
+ (
66
+ 'deadline_exceeded,'
67
+ '408 Request Timeout,'
68
+ '429 Too Many Requests,'
69
+ '500 Internal Server Error,'
70
+ '502 Bad Gateway,'
71
+ '503 Service Unavailable,'
72
+ '504 Gateway Timeout,'
73
+ '509 Bandwidth Limit Exceeded,'
74
+ '599 Gateway Error'
75
+ ),
76
+ ).split(',')
@@ -64,6 +64,7 @@ with epy.lazy_imports():
64
64
  from tensorflow_datasets.core.utils import file_utils
65
65
  from tensorflow_datasets.core.utils import gcs_utils
66
66
  from tensorflow_datasets.core.utils import read_config as read_config_lib
67
+ from tensorflow_datasets.core.utils import retry
67
68
  from tensorflow_datasets.core.utils import type_utils
68
69
  # pylint: enable=g-import-not-at-top
69
70
 
@@ -290,7 +291,8 @@ class DatasetBuilder(registered.RegisteredDataset):
290
291
  # Compute the base directory (for download) and dataset/version directory.
291
292
  self._data_dir_root, self._data_dir = self._build_data_dir(data_dir)
292
293
  # If the dataset info is available, use it.
293
- if dataset_info.dataset_info_path(self.data_path).exists():
294
+ dataset_info_path = dataset_info.dataset_info_path(self.data_path)
295
+ if retry.retry(dataset_info_path.exists):
294
296
  self.info.read_from_directory(self._data_dir)
295
297
  else: # Use the code version (do not restore data)
296
298
  self.info.initialize_from_bucket()
@@ -466,8 +468,8 @@ class DatasetBuilder(registered.RegisteredDataset):
466
468
  # zipfile.Path does not have `.parts`. Additionally, `os.fspath`
467
469
  # will extract the file, so use `str`.
468
470
  "tensorflow_datasets" in str(new_path)
469
- and legacy_path.exists()
470
- and not new_path.exists()
471
+ and retry.retry(legacy_path.exists)
472
+ and not retry.retry(new_path.exists)
471
473
  ):
472
474
  return legacy_path
473
475
  else:
@@ -484,7 +486,7 @@ class DatasetBuilder(registered.RegisteredDataset):
484
486
  # Search for the url_info file.
485
487
  checksums_path = cls._checksums_path
486
488
  # If url_info file is found, load the urls
487
- if checksums_path and checksums_path.exists():
489
+ if checksums_path and retry.retry(checksums_path.exists):
488
490
  return download.checksums.load_url_infos(checksums_path)
489
491
  else:
490
492
  return None
@@ -624,7 +626,7 @@ class DatasetBuilder(registered.RegisteredDataset):
624
626
 
625
627
  download_config = download_config or download.DownloadConfig()
626
628
  data_path = self.data_path
627
- data_exists = data_path.exists()
629
+ data_exists = retry.retry(data_path.exists)
628
630
 
629
631
  # Saving nondeterministic_order in the DatasetInfo for documentation.
630
632
  if download_config.nondeterministic_order:
@@ -640,7 +642,7 @@ class DatasetBuilder(registered.RegisteredDataset):
640
642
  "Deleting pre-existing dataset %s (%s)", self.name, self.data_dir
641
643
  )
642
644
  data_path.rmtree() # Delete pre-existing data.
643
- data_exists = data_path.exists()
645
+ data_exists = retry.retry(data_path.exists)
644
646
  else:
645
647
  logging.info("Reusing dataset %s (%s)", self.name, self.data_dir)
646
648
  return
@@ -805,7 +807,7 @@ class DatasetBuilder(registered.RegisteredDataset):
805
807
  def _update_dataset_info(self) -> None:
806
808
  """Updates the `dataset_info.json` file in the dataset dir."""
807
809
  info_file = self.data_path / constants.DATASET_INFO_FILENAME
808
- if not info_file.exists():
810
+ if not retry.retry(info_file.exists):
809
811
  raise AssertionError(f"To update {info_file}, it must already exist.")
810
812
  new_info = self.info
811
813
  new_info.read_from_directory(self.data_path)
@@ -1020,7 +1022,7 @@ class DatasetBuilder(registered.RegisteredDataset):
1020
1022
  self.assert_is_not_blocked()
1021
1023
 
1022
1024
  # pylint: enable=line-too-long
1023
- if not self.data_path.exists():
1025
+ if not retry.retry(self.data_path.exists):
1024
1026
  raise AssertionError(
1025
1027
  "Dataset %s: could not find data in %s. Please make sure to call "
1026
1028
  "dataset_builder.download_and_prepare(), or pass download=True to "
@@ -1817,7 +1819,7 @@ class GeneratorBasedBuilder(FileReaderBuilder):
1817
1819
  """Returns the text in the given file and records the lineage."""
1818
1820
  filename = epath.Path(filename)
1819
1821
  self.info.add_file_data_source_access(filename)
1820
- return filename.read_text(encoding=encoding)
1822
+ return retry.retry(filename.read_text, encoding=encoding)
1821
1823
 
1822
1824
  def read_tfrecord_as_dataset(
1823
1825
  self,
@@ -2057,9 +2059,9 @@ def _save_default_config_name(
2057
2059
  def load_default_config_name(builder_dir: epath.Path) -> str | None:
2058
2060
  """Load `builder_cls` metadata (common to all builder configs)."""
2059
2061
  config_path = builder_dir / ".config" / constants.METADATA_FILENAME
2060
- if not config_path.exists():
2062
+ if not retry.retry(config_path.exists):
2061
2063
  return None
2062
- data = json.loads(config_path.read_text())
2064
+ data = json.loads(retry.retry(config_path.read_text))
2063
2065
  return data.get("default_config_name")
2064
2066
 
2065
2067
 
@@ -61,6 +61,7 @@ with epy.lazy_imports():
61
61
  # pylint: disable=g-import-not-at-top
62
62
  from tensorflow_datasets.core.utils import file_utils
63
63
  from tensorflow_datasets.core.utils import gcs_utils
64
+ from tensorflow_datasets.core.utils import retry
64
65
 
65
66
  from google.protobuf import json_format
66
67
  # pylint: enable=g-import-not-at-top
@@ -1123,7 +1124,7 @@ def read_from_json(path: epath.PathLike) -> dataset_info_pb2.DatasetInfo:
1123
1124
  DatasetInfoFileError: If the dataset info file cannot be read.
1124
1125
  """
1125
1126
  try:
1126
- json_str = epath.Path(path).read_text()
1127
+ json_str = retry.retry(epath.Path(path).read_text)
1127
1128
  except OSError as e:
1128
1129
  raise DatasetInfoFileError(
1129
1130
  f"Could not read dataset info from {path}"
@@ -37,6 +37,7 @@ from tensorflow_datasets.core.proto import feature_pb2
37
37
  from tensorflow_datasets.core.utils import dtype_utils
38
38
  from tensorflow_datasets.core.utils import np_utils
39
39
  from tensorflow_datasets.core.utils import py_utils
40
+ from tensorflow_datasets.core.utils import retry
40
41
  from tensorflow_datasets.core.utils import tf_utils
41
42
  from tensorflow_datasets.core.utils import type_utils
42
43
  from tensorflow_datasets.core.utils.lazy_imports_utils import tensorflow as tf
@@ -658,7 +659,7 @@ class FeatureConnector(object, metaclass=abc.ABCMeta):
658
659
  Returns:
659
660
  The reconstructed feature instance.
660
661
  """
661
- content = json.loads(make_config_path(root_dir).read_text())
662
+ content = json.loads(retry.retry(make_config_path(root_dir).read_text))
662
663
  feature = FeatureConnector.from_json(content)
663
664
  feature.load_metadata(root_dir, feature_name=None)
664
665
  return feature
@@ -36,6 +36,7 @@ from tensorflow_datasets.core import naming
36
36
  from tensorflow_datasets.core import proto as proto_lib
37
37
  from tensorflow_datasets.core import units
38
38
  from tensorflow_datasets.core import utils
39
+ from tensorflow_datasets.core.utils import retry
39
40
  from tensorflow_datasets.core.utils import shard_utils
40
41
 
41
42
  from tensorflow_metadata.proto.v0 import statistics_pb2
@@ -149,7 +150,7 @@ class SplitInfo:
149
150
  pattern = filename_template.glob_pattern(num_shards=self.num_shards)
150
151
  else:
151
152
  pattern = filename_template.sharded_filepaths_pattern(num_shards=None)
152
- return list(data_dir.glob(pattern))
153
+ return list(retry.retry(data_dir.glob, pattern))
153
154
  else:
154
155
  raise ValueError(f'Filename template for split {self.name} is empty.')
155
156
 
@@ -0,0 +1,54 @@
1
+ # coding=utf-8
2
+ # Copyright 2025 The TensorFlow Datasets Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """To add retry logic to operations suceptible to transient failures."""
17
+
18
+ import random
19
+ import time
20
+ from typing import Callable, ParamSpec, TypeVar
21
+
22
+ from absl import logging
23
+ from tensorflow_datasets.core import constants
24
+
25
+
26
+ P = ParamSpec("P")
27
+ T = TypeVar("T")
28
+
29
+
30
+ def retry(func: Callable[P, T], *args: P.args, **kwargs: P.kwargs) -> T:
31
+ """Returns a decorator that retries the function."""
32
+ # We purposely don't use flags, as this code might be run before flags are
33
+ # parsed.
34
+ tries = constants.TFDS_RETRY_TRIES
35
+ delay = constants.TFDS_RETRY_INITIAL_DELAY
36
+ multiplier = constants.TFDS_RETRY_DELAY_MULTIPLIER
37
+ noise = constants.TFDS_RETRY_NOISE
38
+ msg_substrings = constants.TFDS_RETRY_MSG_SUBSTRINGS
39
+ for trial in range(1, tries + 1):
40
+ try:
41
+ return func(*args, **kwargs)
42
+ except BaseException as err: # pylint: disable=broad-except
43
+ if trial >= tries:
44
+ raise err
45
+ msg = str(err)
46
+ for msg_substring in msg_substrings:
47
+ if msg_substring in msg:
48
+ break
49
+ else:
50
+ raise err
51
+ delay = delay + random.uniform(0, noise)
52
+ logging.warning("%s, retrying in %s seconds...", msg, delay)
53
+ time.sleep(delay)
54
+ delay *= multiplier
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: tfds-nightly
3
- Version: 4.9.9.dev202506040044
3
+ Version: 4.9.9.dev202506050044
4
4
  Summary: tensorflow/datasets is a library of datasets ready to use with TensorFlow.
5
5
  Home-page: https://github.com/tensorflow/datasets
6
6
  Download-URL: https://github.com/tensorflow/datasets/tags
@@ -59,15 +59,15 @@ tensorflow_datasets/core/as_dataframe.py,sha256=3-2ScAo2G6wwYWbz_w3Crb4QyBwcuIYh
59
59
  tensorflow_datasets/core/as_dataframe_test.py,sha256=cGgk3f9j87dDRA2EXedlYb11NpOLdew0dA_O0ZG-PLQ,2048
60
60
  tensorflow_datasets/core/beam_utils.py,sha256=0X2lE9ILqLWZe5Idg58-G5XtgITXEAxqVodDtCDo9Ro,5109
61
61
  tensorflow_datasets/core/beam_utils_test.py,sha256=5ZhntgG658uT5pF4kw_U5Epm8lu0tdg4cI-0viMItzg,2852
62
- tensorflow_datasets/core/constants.py,sha256=O6uQ-jNiup0PUq-7itcLDgkszNbZb31hkvuX_POPx9c,1841
63
- tensorflow_datasets/core/dataset_builder.py,sha256=eqBUVv9jNboGGU3flEcT9iATOA_k-QqdRUkEGEu7DIk,80387
62
+ tensorflow_datasets/core/constants.py,sha256=eAzxhRpS1vx5LMedCmes2nfSU7BL831yi1J8JZkhtXY,2769
63
+ tensorflow_datasets/core/dataset_builder.py,sha256=4l3ArfMSUs0llpo0e59LMg6qZIuoIoQhz8EOK5ThYxk,80603
64
64
  tensorflow_datasets/core/dataset_builder_beam_test.py,sha256=d7UsYNsAIY4FcANAERLcVMDcajIpAi0uMfrnQoe4yv4,8386
65
65
  tensorflow_datasets/core/dataset_builder_notfdv_test.py,sha256=eIWlOZijQfopdze85EkbcPY1I8lFmEBnedcoUoOAnRQ,1346
66
66
  tensorflow_datasets/core/dataset_builder_read_test.py,sha256=QxodggixId7zmknhnC0hAYMcuLehWUODVPo7JvjYw9k,4571
67
67
  tensorflow_datasets/core/dataset_builder_test.py,sha256=Q2GlADkgd0hzHHPLdMalW1tyc00QPdPNzaLxR5oU4HQ,49257
68
68
  tensorflow_datasets/core/dataset_collection_builder.py,sha256=9tvIWFL4gLFaDFnU_ioKaGGzPQshmACS64vagjTVzkE,7224
69
69
  tensorflow_datasets/core/dataset_collection_builder_test.py,sha256=m6H9Kxb-RHUiBEZXd6kyheoixlogJ3OwzixHM2jP6WA,3664
70
- tensorflow_datasets/core/dataset_info.py,sha256=fUBghpY4DtKWlhVA7OOVvbuQKhaWPhHZ2jXUJMPMQm8,52563
70
+ tensorflow_datasets/core/dataset_info.py,sha256=NHerzNwv9L_ZUE1vvd9O9pS06Si7rCXGYaiTiUF1PJY,52625
71
71
  tensorflow_datasets/core/dataset_info_test.py,sha256=T9wnE8XMf1Cl_5IX6fx55AHp7q40mTTQG9H_dyBlTLM,31522
72
72
  tensorflow_datasets/core/dataset_metadata.py,sha256=ffmBbfrwsiBqMfwrbzm2EcpmmzmEhIUJNuTkz3oZW3U,3091
73
73
  tensorflow_datasets/core/dataset_metadata_test.py,sha256=gbY2Yv1Q8_euuZ0DGaZu3g-0KteqoYoTwrBgyf-uFbc,1822
@@ -102,7 +102,7 @@ tensorflow_datasets/core/shuffle.py,sha256=nBS0pysbTRP77LY6wnxwJmAyKc3aS1lxksaFO
102
102
  tensorflow_datasets/core/shuffle_test.py,sha256=Hx2v9QXt3Ul8Q6_VsWfsKgtNHFQwGtx7U_1AYufXkHg,7123
103
103
  tensorflow_datasets/core/split_builder.py,sha256=cpz-YowMhmiZZVp7eQPNrh23KvE0-Ef5gme1kCkDjD0,22190
104
104
  tensorflow_datasets/core/split_builder_test.py,sha256=kBUVUnQQB_c82AhgjhK3hoYfiAqLt7tDFTzsvZRGQCw,3223
105
- tensorflow_datasets/core/splits.py,sha256=Bgrh-ncQs0COz2z8fa1bIOnzgCUvsPpPpXQXveCBTCc,29356
105
+ tensorflow_datasets/core/splits.py,sha256=O3jK4Dalp4tEPeZ9AHbkpW1UkJ6uv5m4YRu2x_ZZTJ4,29418
106
106
  tensorflow_datasets/core/splits_test.py,sha256=KrM82r0YsJRTGfpYUCkBxiGDC7BjZFcTvJ-Hbo6HwF0,24987
107
107
  tensorflow_datasets/core/subsplits_utils.py,sha256=BPHVPAvHlqt4d3HUr4J2Znn8G63pXLPQ29TBi484MOE,6127
108
108
  tensorflow_datasets/core/subsplits_utils_test.py,sha256=TIRLtfaf2n38pByhpqYTXEEvs8hrWe2eXk9RFdBMrFQ,5159
@@ -191,7 +191,7 @@ tensorflow_datasets/core/features/class_label_feature.py,sha256=d8fbBDCWs6UjnGgk
191
191
  tensorflow_datasets/core/features/class_label_feature_test.py,sha256=RuUmDTbf9VLTKKQRCjyutLM54EIoNEkVe-lQkvdC3yw,6215
192
192
  tensorflow_datasets/core/features/dataset_feature.py,sha256=R-TyDL6-yspS1bWInWBiDYNWXeUm9VAXqPGQz8zUMPA,7403
193
193
  tensorflow_datasets/core/features/dataset_feature_test.py,sha256=np40I8R9zHfwvB2njk0r4pm0QdOFN5FmMcLjqqP9cvQ,19725
194
- tensorflow_datasets/core/features/feature.py,sha256=2EO3ypXS_-urY4Gf_Tf8H_kOpRBUQTqYdSewTbO-03c,40184
194
+ tensorflow_datasets/core/features/feature.py,sha256=Ih8T_4xY9tPtap3fHeD7Y8ADFjyUM1eJ5pCQVD5GGCE,40244
195
195
  tensorflow_datasets/core/features/feature_test.py,sha256=FBrRjxzsgdn7o7YEvv5Kd09pBPu9-VyOyXJDL2d8_So,6712
196
196
  tensorflow_datasets/core/features/features_dict.py,sha256=qQfmpfPDhKfmUh_38DGkFtn8ANvze9ziNfZwoqqWFwk,10071
197
197
  tensorflow_datasets/core/features/features_test.py,sha256=i3k_r8BJIC4oJ1dxSRIJP4LJU3Y0279EOBXmY4S0iGI,14041
@@ -269,6 +269,7 @@ tensorflow_datasets/core/utils/py_utils_test.py,sha256=gGrcypIyFI7Ej0nWREfEqkrhE
269
269
  tensorflow_datasets/core/utils/read_config.py,sha256=tRx4ab2I1phU-eSfin_CUNtO7YsCA8n9dqNAX-PzyaE,6601
270
270
  tensorflow_datasets/core/utils/resource_utils.py,sha256=T-gvhO9JdUh-_hx08WSbwjUXtWGj78902hVur-kirRM,1949
271
271
  tensorflow_datasets/core/utils/resource_utils_test.py,sha256=AT-uwz_739qUSPtWlijl39Ks2RhVOuse4FJD9gsY__c,956
272
+ tensorflow_datasets/core/utils/retry.py,sha256=tjw2MHPWQ-uk3ldKu1Z7skUkT-X6GULbrhpfOPC6O2Q,1812
272
273
  tensorflow_datasets/core/utils/shard_utils.py,sha256=BgR4S2SNDKiz8SlOdfdWm0FeRcmCRHOIM4v9FHSZH0g,9208
273
274
  tensorflow_datasets/core/utils/shard_utils_test.py,sha256=DJBDVEwy72JRAWBC9yT4U1nYuqwYqgfeiYfgcKuqWFA,8129
274
275
  tensorflow_datasets/core/utils/tf_utils.py,sha256=NUPVvsL7bFfhOYRKFdoZdZZ5vx5PUWWYUc3FTXJO1x4,8450
@@ -2460,10 +2461,10 @@ tensorflow_datasets/vision_language/wit/wit_test.py,sha256=PXS8DMNW-MDrT2p5oy4Ic
2460
2461
  tensorflow_datasets/vision_language/wit_kaggle/__init__.py,sha256=vGwSGeM8WE4Q-l0-eEE1sBojmk6YT0l1OO60AWa4Q40,719
2461
2462
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle.py,sha256=q-vX_FBzIwsFxL4sY9vuyQ3UQD2PLM4yhUR4U6l-qao,16903
2462
2463
  tensorflow_datasets/vision_language/wit_kaggle/wit_kaggle_test.py,sha256=ZymHT1NkmD-pUnh3BmM3_g30c5afsWYnmqDD9dVyDSA,1778
2463
- tfds_nightly-4.9.9.dev202506040044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2464
- tfds_nightly-4.9.9.dev202506040044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2465
- tfds_nightly-4.9.9.dev202506040044.dist-info/METADATA,sha256=u2UDO83W52k1gG7xoqEX7jhLRUyYaXxn0IkTcoo8BLU,11963
2466
- tfds_nightly-4.9.9.dev202506040044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2467
- tfds_nightly-4.9.9.dev202506040044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2468
- tfds_nightly-4.9.9.dev202506040044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2469
- tfds_nightly-4.9.9.dev202506040044.dist-info/RECORD,,
2464
+ tfds_nightly-4.9.9.dev202506050044.dist-info/licenses/AUTHORS,sha256=nvBG4WwfgjuOu1oZkuQKw9kg7X6rve679ObS-YDDmXg,309
2465
+ tfds_nightly-4.9.9.dev202506050044.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
2466
+ tfds_nightly-4.9.9.dev202506050044.dist-info/METADATA,sha256=g1lyp8ePiwknC6QILmY08oWra_aLQc_642wtGymD_Jc,11963
2467
+ tfds_nightly-4.9.9.dev202506050044.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
2468
+ tfds_nightly-4.9.9.dev202506050044.dist-info/entry_points.txt,sha256=eHEL7nF5y1uCY2FgkuYIdE062epJXlAQTSdq89px4p4,73
2469
+ tfds_nightly-4.9.9.dev202506050044.dist-info/top_level.txt,sha256=bAevmk9209s_oxVZVlN6hSDIVS423qrMQvmcWSvW4do,20
2470
+ tfds_nightly-4.9.9.dev202506050044.dist-info/RECORD,,