toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (197) hide show
  1. toil/__init__.py +124 -86
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +39 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +651 -155
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +784 -397
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1137 -534
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +1031 -349
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +772 -412
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +204 -58
  49. toil/lib/aws/utils.py +290 -213
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/dockstore.py +379 -0
  55. toil/lib/ec2.py +322 -209
  56. toil/lib/ec2nodes.py +174 -105
  57. toil/lib/encryption/_dummy.py +5 -3
  58. toil/lib/encryption/_nacl.py +10 -6
  59. toil/lib/encryption/conftest.py +1 -0
  60. toil/lib/exceptions.py +26 -7
  61. toil/lib/expando.py +4 -2
  62. toil/lib/ftp_utils.py +217 -0
  63. toil/lib/generatedEC2Lists.py +127 -19
  64. toil/lib/history.py +1271 -0
  65. toil/lib/history_submission.py +681 -0
  66. toil/lib/humanize.py +6 -2
  67. toil/lib/io.py +121 -12
  68. toil/lib/iterables.py +4 -2
  69. toil/lib/memoize.py +12 -8
  70. toil/lib/misc.py +83 -18
  71. toil/lib/objects.py +2 -2
  72. toil/lib/resources.py +19 -7
  73. toil/lib/retry.py +125 -87
  74. toil/lib/threading.py +282 -80
  75. toil/lib/throttle.py +15 -14
  76. toil/lib/trs.py +390 -0
  77. toil/lib/web.py +38 -0
  78. toil/options/common.py +850 -402
  79. toil/options/cwl.py +185 -90
  80. toil/options/runner.py +50 -0
  81. toil/options/wdl.py +70 -19
  82. toil/provisioners/__init__.py +111 -46
  83. toil/provisioners/abstractProvisioner.py +322 -157
  84. toil/provisioners/aws/__init__.py +62 -30
  85. toil/provisioners/aws/awsProvisioner.py +980 -627
  86. toil/provisioners/clusterScaler.py +541 -279
  87. toil/provisioners/gceProvisioner.py +283 -180
  88. toil/provisioners/node.py +147 -79
  89. toil/realtimeLogger.py +34 -22
  90. toil/resource.py +137 -75
  91. toil/server/app.py +127 -61
  92. toil/server/celery_app.py +3 -1
  93. toil/server/cli/wes_cwl_runner.py +84 -55
  94. toil/server/utils.py +56 -31
  95. toil/server/wes/abstract_backend.py +64 -26
  96. toil/server/wes/amazon_wes_utils.py +21 -15
  97. toil/server/wes/tasks.py +121 -63
  98. toil/server/wes/toil_backend.py +142 -107
  99. toil/server/wsgi_app.py +4 -3
  100. toil/serviceManager.py +58 -22
  101. toil/statsAndLogging.py +183 -65
  102. toil/test/__init__.py +263 -179
  103. toil/test/batchSystems/batchSystemTest.py +438 -195
  104. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  105. toil/test/batchSystems/test_gridengine.py +173 -0
  106. toil/test/batchSystems/test_lsf_helper.py +67 -58
  107. toil/test/batchSystems/test_slurm.py +265 -49
  108. toil/test/cactus/test_cactus_integration.py +20 -22
  109. toil/test/cwl/conftest.py +39 -0
  110. toil/test/cwl/cwlTest.py +375 -72
  111. toil/test/cwl/measure_default_memory.cwl +12 -0
  112. toil/test/cwl/not_run_required_input.cwl +29 -0
  113. toil/test/cwl/optional-file.cwl +18 -0
  114. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  115. toil/test/docs/scriptsTest.py +60 -34
  116. toil/test/jobStores/jobStoreTest.py +412 -235
  117. toil/test/lib/aws/test_iam.py +116 -48
  118. toil/test/lib/aws/test_s3.py +16 -9
  119. toil/test/lib/aws/test_utils.py +5 -6
  120. toil/test/lib/dockerTest.py +118 -141
  121. toil/test/lib/test_conversions.py +113 -115
  122. toil/test/lib/test_ec2.py +57 -49
  123. toil/test/lib/test_history.py +212 -0
  124. toil/test/lib/test_misc.py +12 -5
  125. toil/test/lib/test_trs.py +161 -0
  126. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  127. toil/test/mesos/helloWorld.py +7 -6
  128. toil/test/mesos/stress.py +25 -20
  129. toil/test/options/options.py +7 -2
  130. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  131. toil/test/provisioners/clusterScalerTest.py +440 -250
  132. toil/test/provisioners/clusterTest.py +81 -42
  133. toil/test/provisioners/gceProvisionerTest.py +174 -100
  134. toil/test/provisioners/provisionerTest.py +25 -13
  135. toil/test/provisioners/restartScript.py +5 -4
  136. toil/test/server/serverTest.py +188 -141
  137. toil/test/sort/restart_sort.py +137 -68
  138. toil/test/sort/sort.py +134 -66
  139. toil/test/sort/sortTest.py +91 -49
  140. toil/test/src/autoDeploymentTest.py +140 -100
  141. toil/test/src/busTest.py +20 -18
  142. toil/test/src/checkpointTest.py +8 -2
  143. toil/test/src/deferredFunctionTest.py +49 -35
  144. toil/test/src/dockerCheckTest.py +33 -26
  145. toil/test/src/environmentTest.py +20 -10
  146. toil/test/src/fileStoreTest.py +538 -271
  147. toil/test/src/helloWorldTest.py +7 -4
  148. toil/test/src/importExportFileTest.py +61 -31
  149. toil/test/src/jobDescriptionTest.py +32 -17
  150. toil/test/src/jobEncapsulationTest.py +2 -0
  151. toil/test/src/jobFileStoreTest.py +74 -50
  152. toil/test/src/jobServiceTest.py +187 -73
  153. toil/test/src/jobTest.py +120 -70
  154. toil/test/src/miscTests.py +19 -18
  155. toil/test/src/promisedRequirementTest.py +82 -36
  156. toil/test/src/promisesTest.py +7 -6
  157. toil/test/src/realtimeLoggerTest.py +6 -6
  158. toil/test/src/regularLogTest.py +71 -37
  159. toil/test/src/resourceTest.py +80 -49
  160. toil/test/src/restartDAGTest.py +36 -22
  161. toil/test/src/resumabilityTest.py +9 -2
  162. toil/test/src/retainTempDirTest.py +45 -14
  163. toil/test/src/systemTest.py +12 -8
  164. toil/test/src/threadingTest.py +44 -25
  165. toil/test/src/toilContextManagerTest.py +10 -7
  166. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  167. toil/test/src/workerTest.py +33 -16
  168. toil/test/utils/toilDebugTest.py +70 -58
  169. toil/test/utils/toilKillTest.py +4 -5
  170. toil/test/utils/utilsTest.py +239 -102
  171. toil/test/wdl/wdltoil_test.py +789 -148
  172. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  173. toil/toilState.py +52 -26
  174. toil/utils/toilConfig.py +13 -4
  175. toil/utils/toilDebugFile.py +44 -27
  176. toil/utils/toilDebugJob.py +85 -25
  177. toil/utils/toilDestroyCluster.py +11 -6
  178. toil/utils/toilKill.py +8 -3
  179. toil/utils/toilLaunchCluster.py +251 -145
  180. toil/utils/toilMain.py +37 -16
  181. toil/utils/toilRsyncCluster.py +27 -14
  182. toil/utils/toilSshCluster.py +45 -22
  183. toil/utils/toilStats.py +75 -36
  184. toil/utils/toilStatus.py +226 -119
  185. toil/utils/toilUpdateEC2Instances.py +3 -1
  186. toil/version.py +6 -6
  187. toil/wdl/utils.py +5 -5
  188. toil/wdl/wdltoil.py +3528 -1053
  189. toil/worker.py +370 -149
  190. toil-8.1.0b1.dist-info/METADATA +178 -0
  191. toil-8.1.0b1.dist-info/RECORD +259 -0
  192. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
  193. toil-7.0.0.dist-info/METADATA +0 -158
  194. toil-7.0.0.dist-info/RECORD +0 -244
  195. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
  196. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
  197. {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
@@ -16,45 +16,39 @@ import os
16
16
  import pickle
17
17
  import re
18
18
  import shutil
19
- import sys
20
19
  from abc import ABC, ABCMeta, abstractmethod
20
+ from collections.abc import Iterator, ValuesView
21
21
  from contextlib import closing, contextmanager
22
22
  from datetime import timedelta
23
23
  from http.client import BadStatusLine
24
- from typing import (IO,
25
- TYPE_CHECKING,
26
- Any,
27
- Callable,
28
- ContextManager,
29
- Dict,
30
- Iterable,
31
- Iterator,
32
- List,
33
- Optional,
34
- Set,
35
- Tuple,
36
- Union,
37
- ValuesView,
38
- cast,
39
- overload)
40
-
41
- if sys.version_info >= (3, 8):
42
- from typing import Literal
43
- else:
44
- from typing_extensions import Literal
45
-
24
+ from typing import (
25
+ IO,
26
+ TYPE_CHECKING,
27
+ Any,
28
+ Callable,
29
+ ContextManager,
30
+ Literal,
31
+ Optional,
32
+ Union,
33
+ cast,
34
+ overload,
35
+ )
46
36
  from urllib.error import HTTPError
47
37
  from urllib.parse import ParseResult, urlparse
48
- from urllib.request import urlopen
38
+ from urllib.request import urlopen, Request
49
39
  from uuid import uuid4
50
40
 
51
41
  from toil.common import Config, getNodeID, safeUnpickleFromStream
52
42
  from toil.fileStores import FileID
53
- from toil.job import (CheckpointJobDescription,
54
- JobDescription,
55
- JobException,
56
- ServiceJobDescription)
43
+ from toil.job import (
44
+ CheckpointJobDescription,
45
+ JobDescription,
46
+ JobException,
47
+ ServiceJobDescription,
48
+ )
49
+ from toil.lib.ftp_utils import FtpFsAccess
57
50
  from toil.lib.compatibility import deprecated
51
+ from toil.lib.exceptions import UnimplementedURLException
58
52
  from toil.lib.io import WriteWatchingStream
59
53
  from toil.lib.memoize import memoize
60
54
  from toil.lib.retry import ErrorCondition, retry
@@ -67,18 +61,22 @@ logger = logging.getLogger(__name__)
67
61
  try:
68
62
  from botocore.exceptions import ProxyConnectionError
69
63
  except ImportError:
64
+
70
65
  class ProxyConnectionError(BaseException): # type: ignore
71
66
  """Dummy class."""
72
67
 
68
+
73
69
  class LocatorException(Exception):
74
70
  """
75
71
  Base exception class for all locator exceptions.
76
72
  For example, job store/aws bucket exceptions where they already exist
77
73
  """
78
- def __init__(self, error_msg: str, locator: str, prefix: Optional[str]=None):
74
+
75
+ def __init__(self, error_msg: str, locator: str, prefix: Optional[str] = None):
79
76
  full_locator = locator if prefix is None else f"{prefix}:{locator}"
80
77
  super().__init__(error_msg % full_locator)
81
78
 
79
+
82
80
  class InvalidImportExportUrlException(Exception):
83
81
  def __init__(self, url: ParseResult) -> None:
84
82
  """
@@ -86,24 +84,10 @@ class InvalidImportExportUrlException(Exception):
86
84
  """
87
85
  super().__init__("The URL '%s' is invalid." % url.geturl())
88
86
 
89
- class UnimplementedURLException(RuntimeError):
90
- def __init__(self, url: ParseResult, operation: str) -> None:
91
- """
92
- Make a new exception to report that a URL scheme is not implemented, or
93
- that the implementation can't be loaded because its dependencies are
94
- not installed.
95
-
96
- :param url: The given URL
97
- :param operation: Whether we are trying to 'import' or 'export'
98
- """
99
- super().__init__(
100
- f"No available job store implementation can {operation} the URL "
101
- f"'{url.geturl()}'. Ensure Toil has been installed "
102
- f"with the appropriate extras."
103
- )
104
87
 
105
88
  class NoSuchJobException(Exception):
106
89
  """Indicates that the specified job does not exist."""
90
+
107
91
  def __init__(self, jobStoreID: FileID):
108
92
  """
109
93
  :param str jobStoreID: the jobStoreID that was mistakenly assumed to exist
@@ -113,17 +97,21 @@ class NoSuchJobException(Exception):
113
97
 
114
98
  class ConcurrentFileModificationException(Exception):
115
99
  """Indicates that the file was attempted to be modified by multiple processes at once."""
100
+
116
101
  def __init__(self, jobStoreFileID: FileID):
117
102
  """
118
103
  :param jobStoreFileID: the ID of the file that was modified by multiple workers
119
104
  or processes concurrently
120
105
  """
121
- super().__init__('Concurrent update to file %s detected.' % jobStoreFileID)
106
+ super().__init__("Concurrent update to file %s detected." % jobStoreFileID)
122
107
 
123
108
 
124
109
  class NoSuchFileException(Exception):
125
110
  """Indicates that the specified file does not exist."""
126
- def __init__(self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any):
111
+
112
+ def __init__(
113
+ self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any
114
+ ):
127
115
  """
128
116
  :param jobStoreFileID: the ID of the file that was mistakenly assumed to exist
129
117
  :param customName: optionally, an alternate name for the nonexistent file
@@ -146,22 +134,31 @@ class NoSuchFileException(Exception):
146
134
 
147
135
  class NoSuchJobStoreException(LocatorException):
148
136
  """Indicates that the specified job store does not exist."""
137
+
149
138
  def __init__(self, locator: str, prefix: str):
150
139
  """
151
140
  :param str locator: The location of the job store
152
141
  """
153
- super().__init__("The job store '%s' does not exist, so there is nothing to restart.", locator, prefix)
142
+ super().__init__(
143
+ "The job store '%s' does not exist, so there is nothing to restart.",
144
+ locator,
145
+ prefix,
146
+ )
154
147
 
155
148
 
156
149
  class JobStoreExistsException(LocatorException):
157
150
  """Indicates that the specified job store already exists."""
151
+
158
152
  def __init__(self, locator: str, prefix: str):
159
153
  """
160
154
  :param str locator: The location of the job store
161
155
  """
162
156
  super().__init__(
163
157
  "The job store '%s' already exists. Use --restart to resume the workflow, or remove "
164
- "the job store with 'toil clean' to start the workflow from scratch.", locator, prefix)
158
+ "the job store with 'toil clean' to start the workflow from scratch.",
159
+ locator,
160
+ prefix,
161
+ )
165
162
 
166
163
 
167
164
  class AbstractJobStore(ABC):
@@ -213,7 +210,7 @@ class AbstractJobStore(ABC):
213
210
  self.__config = config
214
211
  self.write_config()
215
212
 
216
- @deprecated(new_function_name='write_config')
213
+ @deprecated(new_function_name="write_config")
217
214
  def writeConfig(self) -> None:
218
215
  return self.write_config()
219
216
 
@@ -222,7 +219,9 @@ class AbstractJobStore(ABC):
222
219
  Persists the value of the :attr:`AbstractJobStore.config` attribute to the
223
220
  job store, so that it can be retrieved later by other instances of this class.
224
221
  """
225
- with self.write_shared_file_stream('config.pickle', encrypted=False) as fileHandle:
222
+ with self.write_shared_file_stream(
223
+ "config.pickle", encrypted=False
224
+ ) as fileHandle:
226
225
  pickle.dump(self.__config, fileHandle, pickle.HIGHEST_PROTOCOL)
227
226
 
228
227
  def resume(self) -> None:
@@ -232,7 +231,7 @@ class AbstractJobStore(ABC):
232
231
 
233
232
  :raises NoSuchJobStoreException: if the physical storage for this job store doesn't exist
234
233
  """
235
- with self.read_shared_file_stream('config.pickle') as fileHandle:
234
+ with self.read_shared_file_stream("config.pickle") as fileHandle:
236
235
  config = safeUnpickleFromStream(fileHandle)
237
236
  assert config.workflowID is not None
238
237
  self.__config = config
@@ -250,9 +249,9 @@ class AbstractJobStore(ABC):
250
249
  """
251
250
  return self.__locator
252
251
 
253
- rootJobStoreIDFileName = 'rootJobStoreID'
252
+ rootJobStoreIDFileName = "rootJobStoreID"
254
253
 
255
- @deprecated(new_function_name='set_root_job')
254
+ @deprecated(new_function_name="set_root_job")
256
255
  def setRootJob(self, rootJobStoreID: FileID) -> None:
257
256
  """Set the root job of the workflow backed by this job store."""
258
257
  return self.set_root_job(rootJobStoreID)
@@ -264,9 +263,9 @@ class AbstractJobStore(ABC):
264
263
  :param job_id: The ID of the job to set as root
265
264
  """
266
265
  with self.write_shared_file_stream(self.rootJobStoreIDFileName) as f:
267
- f.write(job_id.encode('utf-8'))
266
+ f.write(job_id.encode("utf-8"))
268
267
 
269
- @deprecated(new_function_name='load_root_job')
268
+ @deprecated(new_function_name="load_root_job")
270
269
  def loadRootJob(self) -> JobDescription:
271
270
  return self.load_root_job()
272
271
 
@@ -281,16 +280,18 @@ class AbstractJobStore(ABC):
281
280
  """
282
281
  try:
283
282
  with self.read_shared_file_stream(self.rootJobStoreIDFileName) as f:
284
- rootJobStoreID = f.read().decode('utf-8')
283
+ rootJobStoreID = f.read().decode("utf-8")
285
284
  except NoSuchFileException:
286
- raise JobException('No job has been set as the root in this job store')
285
+ raise JobException("No job has been set as the root in this job store")
287
286
  if not self.job_exists(rootJobStoreID):
288
- raise JobException("The root job '%s' doesn't exist. Either the Toil workflow "
289
- "is finished or has never been started" % rootJobStoreID)
287
+ raise JobException(
288
+ "The root job '%s' doesn't exist. Either the Toil workflow "
289
+ "is finished or has never been started" % rootJobStoreID
290
+ )
290
291
  return self.load_job(rootJobStoreID)
291
292
 
292
293
  # FIXME: This is only used in tests, why do we have it?
293
- @deprecated(new_function_name='create_root_job')
294
+ @deprecated(new_function_name="create_root_job")
294
295
  def createRootJob(self, desc: JobDescription) -> JobDescription:
295
296
  return self.create_root_job(desc)
296
297
 
@@ -307,7 +308,7 @@ class AbstractJobStore(ABC):
307
308
  self.set_root_job(job_description.jobStoreID)
308
309
  return job_description
309
310
 
310
- @deprecated(new_function_name='get_root_job_return_value')
311
+ @deprecated(new_function_name="get_root_job_return_value")
311
312
  def getRootJobReturnValue(self) -> Any:
312
313
  return self.get_root_job_return_value()
313
314
 
@@ -318,12 +319,12 @@ class AbstractJobStore(ABC):
318
319
  Raises an exception if the root job hasn't fulfilled its promise yet.
319
320
  """
320
321
  # Parse out the return value from the root job
321
- with self.read_shared_file_stream('rootJobReturnValue') as fH:
322
+ with self.read_shared_file_stream("rootJobReturnValue") as fH:
322
323
  return safeUnpickleFromStream(fH)
323
324
 
324
325
  @staticmethod
325
326
  @memoize
326
- def _get_job_store_classes() -> List['AbstractJobStore']:
327
+ def _get_job_store_classes() -> list["AbstractJobStore"]:
327
328
  """
328
329
  A list of concrete AbstractJobStore implementations whose dependencies are installed.
329
330
 
@@ -333,23 +334,30 @@ class AbstractJobStore(ABC):
333
334
  "toil.jobStores.fileJobStore.FileJobStore",
334
335
  "toil.jobStores.googleJobStore.GoogleJobStore",
335
336
  "toil.jobStores.aws.jobStore.AWSJobStore",
336
- "toil.jobStores.abstractJobStore.JobStoreSupport")
337
+ "toil.jobStores.abstractJobStore.JobStoreSupport",
338
+ )
337
339
  jobStoreClasses = []
338
340
  for className in jobStoreClassNames:
339
- moduleName, className = className.rsplit('.', 1)
341
+ moduleName, className = className.rsplit(".", 1)
340
342
  from importlib import import_module
343
+
341
344
  try:
342
345
  module = import_module(moduleName)
343
346
  except (ImportError, ProxyConnectionError):
344
- logger.debug("Unable to import '%s' as is expected if the corresponding extra was "
345
- "omitted at installation time.", moduleName)
347
+ logger.debug(
348
+ "Unable to import '%s' as is expected if the corresponding extra was "
349
+ "omitted at installation time.",
350
+ moduleName,
351
+ )
346
352
  else:
347
353
  jobStoreClass = getattr(module, className)
348
354
  jobStoreClasses.append(jobStoreClass)
349
355
  return jobStoreClasses
350
356
 
351
357
  @classmethod
352
- def _findJobStoreForUrl(cls, url: ParseResult, export: bool = False) -> 'AbstractJobStore':
358
+ def _findJobStoreForUrl(
359
+ cls, url: ParseResult, export: bool = False
360
+ ) -> "AbstractJobStore":
353
361
  """
354
362
  Returns the AbstractJobStore subclass that supports the given URL.
355
363
 
@@ -368,46 +376,58 @@ class AbstractJobStore(ABC):
368
376
  # returns a file ID. Explain this to MyPy.
369
377
 
370
378
  @overload
371
- def importFile(self,
372
- srcUrl: str,
373
- sharedFileName: str,
374
- hardlink: bool = False,
375
- symlink: bool = True) -> None: ...
379
+ def importFile(
380
+ self,
381
+ srcUrl: str,
382
+ sharedFileName: str,
383
+ hardlink: bool = False,
384
+ symlink: bool = True,
385
+ ) -> None: ...
376
386
 
377
387
  @overload
378
- def importFile(self,
379
- srcUrl: str,
380
- sharedFileName: None = None,
381
- hardlink: bool = False,
382
- symlink: bool = True) -> FileID: ...
383
-
384
- @deprecated(new_function_name='import_file')
385
- def importFile(self,
386
- srcUrl: str,
387
- sharedFileName: Optional[str] = None,
388
- hardlink: bool = False,
389
- symlink: bool = True) -> Optional[FileID]:
388
+ def importFile(
389
+ self,
390
+ srcUrl: str,
391
+ sharedFileName: None = None,
392
+ hardlink: bool = False,
393
+ symlink: bool = True,
394
+ ) -> FileID: ...
395
+
396
+ @deprecated(new_function_name="import_file")
397
+ def importFile(
398
+ self,
399
+ srcUrl: str,
400
+ sharedFileName: Optional[str] = None,
401
+ hardlink: bool = False,
402
+ symlink: bool = True,
403
+ ) -> Optional[FileID]:
390
404
  return self.import_file(srcUrl, sharedFileName, hardlink, symlink)
391
405
 
392
406
  @overload
393
- def import_file(self,
394
- src_uri: str,
395
- shared_file_name: str,
396
- hardlink: bool = False,
397
- symlink: bool = True) -> None: ...
407
+ def import_file(
408
+ self,
409
+ src_uri: str,
410
+ shared_file_name: str,
411
+ hardlink: bool = False,
412
+ symlink: bool = True,
413
+ ) -> None: ...
398
414
 
399
415
  @overload
400
- def import_file(self,
401
- src_uri: str,
402
- shared_file_name: None = None,
403
- hardlink: bool = False,
404
- symlink: bool = True) -> FileID: ...
405
-
406
- def import_file(self,
407
- src_uri: str,
408
- shared_file_name: Optional[str] = None,
409
- hardlink: bool = False,
410
- symlink: bool = True) -> Optional[FileID]:
416
+ def import_file(
417
+ self,
418
+ src_uri: str,
419
+ shared_file_name: None = None,
420
+ hardlink: bool = False,
421
+ symlink: bool = True,
422
+ ) -> FileID: ...
423
+
424
+ def import_file(
425
+ self,
426
+ src_uri: str,
427
+ shared_file_name: Optional[str] = None,
428
+ hardlink: bool = False,
429
+ symlink: bool = True,
430
+ ) -> Optional[FileID]:
411
431
  """
412
432
  Imports the file at the given URL into job store. The ID of the newly imported file is
413
433
  returned. If the name of a shared file name is provided, the file will be imported as
@@ -445,18 +465,23 @@ class AbstractJobStore(ABC):
445
465
  # subclasses of AbstractJobStore.
446
466
  parseResult = urlparse(src_uri)
447
467
  otherCls = self._findJobStoreForUrl(parseResult)
448
- return self._import_file(otherCls,
449
- parseResult,
450
- shared_file_name=shared_file_name,
451
- hardlink=hardlink,
452
- symlink=symlink)
453
-
454
- def _import_file(self,
455
- otherCls: 'AbstractJobStore',
456
- uri: ParseResult,
457
- shared_file_name: Optional[str] = None,
458
- hardlink: bool = False,
459
- symlink: bool = True) -> Optional[FileID]:
468
+ logger.info("Importing input %s...", src_uri)
469
+ return self._import_file(
470
+ otherCls,
471
+ parseResult,
472
+ shared_file_name=shared_file_name,
473
+ hardlink=hardlink,
474
+ symlink=symlink,
475
+ )
476
+
477
+ def _import_file(
478
+ self,
479
+ otherCls: "AbstractJobStore",
480
+ uri: ParseResult,
481
+ shared_file_name: Optional[str] = None,
482
+ hardlink: bool = False,
483
+ symlink: bool = True,
484
+ ) -> Optional[FileID]:
460
485
  """
461
486
  Import the file at the given URL using the given job store class to retrieve that file.
462
487
  See also :meth:`.importFile`. This method applies a generic approach to importing: it
@@ -486,7 +511,7 @@ class AbstractJobStore(ABC):
486
511
  otherCls._read_from_url(uri, writable)
487
512
  return None
488
513
 
489
- @deprecated(new_function_name='export_file')
514
+ @deprecated(new_function_name="export_file")
490
515
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
491
516
  return self.export_file(jobStoreFileID, dstUrl)
492
517
 
@@ -505,13 +530,17 @@ class AbstractJobStore(ABC):
505
530
  :param str file_id: The id of the file in the job store that should be exported.
506
531
 
507
532
  :param str dst_uri: URL that points to a file or object in the storage mechanism of a
508
- supported URL scheme e.g. a blob in an AWS s3 bucket.
533
+ supported URL scheme e.g. a blob in an AWS s3 bucket. May also be a local path.
509
534
  """
535
+ from toil.common import Toil
536
+ dst_uri = Toil.normalize_uri(dst_uri)
510
537
  parseResult = urlparse(dst_uri)
511
538
  otherCls = self._findJobStoreForUrl(parseResult, export=True)
512
539
  self._export_file(otherCls, file_id, parseResult)
513
540
 
514
- def _export_file(self, otherCls: 'AbstractJobStore', jobStoreFileID: FileID, url: ParseResult) -> None:
541
+ def _export_file(
542
+ self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
543
+ ) -> None:
515
544
  """
516
545
  Refer to exportFile docstring for information about this method.
517
546
 
@@ -526,7 +555,9 @@ class AbstractJobStore(ABC):
526
555
  """
527
556
  self._default_export_file(otherCls, jobStoreFileID, url)
528
557
 
529
- def _default_export_file(self, otherCls: 'AbstractJobStore', jobStoreFileID: FileID, url: ParseResult) -> None:
558
+ def _default_export_file(
559
+ self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
560
+ ) -> None:
530
561
  """
531
562
  Refer to exportFile docstring for information about this method.
532
563
 
@@ -541,7 +572,7 @@ class AbstractJobStore(ABC):
541
572
  """
542
573
  executable = False
543
574
  with self.read_file_stream(jobStoreFileID) as readable:
544
- if getattr(jobStoreFileID, 'executable', False):
575
+ if getattr(jobStoreFileID, "executable", False):
545
576
  executable = jobStoreFileID.executable
546
577
  otherCls._write_to_url(readable, url, executable)
547
578
 
@@ -550,6 +581,8 @@ class AbstractJobStore(ABC):
550
581
  """
551
582
  Return True if the file at the given URI exists, and False otherwise.
552
583
 
584
+ May raise an error if file existence cannot be determined.
585
+
553
586
  :param src_uri: URL that points to a file or object in the storage
554
587
  mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
555
588
  """
@@ -580,7 +613,7 @@ class AbstractJobStore(ABC):
580
613
  return otherCls._get_is_directory(parseResult)
581
614
 
582
615
  @classmethod
583
- def list_url(cls, src_uri: str) -> List[str]:
616
+ def list_url(cls, src_uri: str) -> list[str]:
584
617
  """
585
618
  List the directory at the given URL. Returned path components can be
586
619
  joined with '/' onto the passed URL to form new URLs. Those that end in
@@ -605,7 +638,7 @@ class AbstractJobStore(ABC):
605
638
  return otherCls._list_url(parseResult)
606
639
 
607
640
  @classmethod
608
- def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
641
+ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
609
642
  """
610
643
  Read the given URL and write its content into the given writable stream.
611
644
 
@@ -636,6 +669,8 @@ class AbstractJobStore(ABC):
636
669
  def _url_exists(cls, url: ParseResult) -> bool:
637
670
  """
638
671
  Return True if the item at the given URL exists, and Flase otherwise.
672
+
673
+ May raise an error if file existence cannot be determined.
639
674
  """
640
675
  raise NotImplementedError(f"No implementation for {url}")
641
676
 
@@ -663,7 +698,7 @@ class AbstractJobStore(ABC):
663
698
 
664
699
  @classmethod
665
700
  @abstractmethod
666
- def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> Tuple[int, bool]:
701
+ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
667
702
  """
668
703
  Reads the contents of the object at the specified location and writes it to the given
669
704
  writable stream.
@@ -683,7 +718,7 @@ class AbstractJobStore(ABC):
683
718
 
684
719
  @classmethod
685
720
  @abstractmethod
686
- def _list_url(cls, url: ParseResult) -> List[str]:
721
+ def _list_url(cls, url: ParseResult) -> list[str]:
687
722
  """
688
723
  List the contents of the given URL, which may or may not end in '/'
689
724
 
@@ -715,7 +750,12 @@ class AbstractJobStore(ABC):
715
750
 
716
751
  @classmethod
717
752
  @abstractmethod
718
- def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
753
+ def _write_to_url(
754
+ cls,
755
+ readable: Union[IO[bytes], IO[str]],
756
+ url: ParseResult,
757
+ executable: bool = False,
758
+ ) -> None:
719
759
  """
720
760
  Reads the contents of the given readable stream and writes it to the object at the
721
761
  specified location. Raises FileNotFoundError if the URL doesn't exist..
@@ -762,11 +802,11 @@ class AbstractJobStore(ABC):
762
802
  """
763
803
  raise NotImplementedError()
764
804
 
765
- @deprecated(new_function_name='get_env')
766
- def getEnv(self) -> Dict[str, str]:
805
+ @deprecated(new_function_name="get_env")
806
+ def getEnv(self) -> dict[str, str]:
767
807
  return self.get_env()
768
808
 
769
- def get_env(self) -> Dict[str, str]:
809
+ def get_env(self) -> dict[str, str]:
770
810
  """
771
811
  Returns a dictionary of environment variables that this job store requires to be set in
772
812
  order to function properly on a worker.
@@ -777,7 +817,7 @@ class AbstractJobStore(ABC):
777
817
 
778
818
  # Cleanup functions
779
819
  def clean(
780
- self, jobCache: Optional[Dict[Union[str, "TemporaryID"], JobDescription]] = None
820
+ self, jobCache: Optional[dict[Union[str, "TemporaryID"], JobDescription]] = None
781
821
  ) -> JobDescription:
782
822
  """
783
823
  Function to cleanup the state of a job store after a restart.
@@ -805,7 +845,9 @@ class AbstractJobStore(ABC):
805
845
  return self.load_job(jobId)
806
846
 
807
847
  def haveJob(jobId: str) -> bool:
808
- assert len(jobId) > 1, f"Job ID {jobId} too short; is a string being used as a list?"
848
+ assert (
849
+ len(jobId) > 1
850
+ ), f"Job ID {jobId} too short; is a string being used as a list?"
809
851
  if jobCache is not None:
810
852
  if jobId in jobCache:
811
853
  return True
@@ -825,13 +867,15 @@ class AbstractJobStore(ABC):
825
867
  jobCache[str(jobDescription.jobStoreID)] = jobDescription
826
868
  self.update_job(jobDescription)
827
869
 
828
- def getJobDescriptions() -> Union[ValuesView[JobDescription], Iterator[JobDescription]]:
870
+ def getJobDescriptions() -> (
871
+ Union[ValuesView[JobDescription], Iterator[JobDescription]]
872
+ ):
829
873
  if jobCache is not None:
830
874
  return jobCache.values()
831
875
  else:
832
876
  return self.jobs()
833
877
 
834
- def get_jobs_reachable_from_root() -> Set[str]:
878
+ def get_jobs_reachable_from_root() -> set[str]:
835
879
  """
836
880
  Traverse the job graph from the root job and return a flattened set of all active jobstore IDs.
837
881
 
@@ -841,8 +885,7 @@ class AbstractJobStore(ABC):
841
885
  # Iterate from the root JobDescription and collate all jobs
842
886
  # that are reachable from it.
843
887
  root_job_description = self.load_root_job()
844
- reachable_from_root: Set[str] = set()
845
-
888
+ reachable_from_root: set[str] = set()
846
889
 
847
890
  for merged_in in root_job_description.get_chain():
848
891
  # Add the job itself and any other jobs that chained with it.
@@ -854,7 +897,6 @@ class AbstractJobStore(ABC):
854
897
  if haveJob(service_job_store_id):
855
898
  reachable_from_root.add(service_job_store_id)
856
899
 
857
-
858
900
  # Unprocessed means it might have successor jobs we need to add.
859
901
  unprocessed_job_descriptions = [root_job_description]
860
902
 
@@ -867,15 +909,24 @@ class AbstractJobStore(ABC):
867
909
  # exploring them, since we took their successors.
868
910
  reachable_from_root.add(merged_in.job_store_id)
869
911
  for successor_job_store_id in job_description.allSuccessors():
870
- if successor_job_store_id not in reachable_from_root and haveJob(successor_job_store_id):
871
- successor_job_description = getJobDescription(successor_job_store_id)
912
+ if (
913
+ successor_job_store_id not in reachable_from_root
914
+ and haveJob(successor_job_store_id)
915
+ ):
916
+ successor_job_description = getJobDescription(
917
+ successor_job_store_id
918
+ )
872
919
 
873
920
  # Add all of the successor's linked service jobs as well.
874
- for service_job_store_id in successor_job_description.services:
921
+ for (
922
+ service_job_store_id
923
+ ) in successor_job_description.services:
875
924
  if haveJob(service_job_store_id):
876
925
  reachable_from_root.add(service_job_store_id)
877
926
 
878
- new_job_descriptions_to_process.append(successor_job_description)
927
+ new_job_descriptions_to_process.append(
928
+ successor_job_description
929
+ )
879
930
  unprocessed_job_descriptions = new_job_descriptions_to_process
880
931
 
881
932
  logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
@@ -885,22 +936,32 @@ class AbstractJobStore(ABC):
885
936
 
886
937
  # Cleanup jobs that are not reachable from the root, and therefore orphaned
887
938
  # TODO: Avoid reiterating reachable_from_root (which may be very large)
888
- unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
939
+ unreachable = [
940
+ x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root
941
+ ]
889
942
  for jobDescription in unreachable:
890
943
  # clean up any associated files before deletion
891
944
  for fileID in jobDescription.filesToDelete:
892
945
  # Delete any files that should already be deleted
893
- logger.warning(f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed.")
946
+ logger.warning(
947
+ f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed."
948
+ )
894
949
  self.delete_file(fileID)
895
950
  # Delete the job from us and the cache
896
951
  deleteJob(str(jobDescription.jobStoreID))
897
952
 
898
- jobDescriptionsReachableFromRoot = {id: getJobDescription(id) for id in reachable_from_root}
953
+ jobDescriptionsReachableFromRoot = {
954
+ id: getJobDescription(id) for id in reachable_from_root
955
+ }
899
956
 
900
957
  # Clean up any checkpoint jobs -- delete any successors it
901
958
  # may have launched, and restore the job to a pristine state
902
959
  jobsDeletedByCheckpoints = set()
903
- for jobDescription in [desc for desc in jobDescriptionsReachableFromRoot.values() if isinstance(desc, CheckpointJobDescription)]:
960
+ for jobDescription in [
961
+ desc
962
+ for desc in jobDescriptionsReachableFromRoot.values()
963
+ if isinstance(desc, CheckpointJobDescription)
964
+ ]:
904
965
  if jobDescription.jobStoreID in jobsDeletedByCheckpoints:
905
966
  # This is a checkpoint that was nested within an
906
967
  # earlier checkpoint, so it and all its successors are
@@ -926,8 +987,10 @@ class AbstractJobStore(ABC):
926
987
  if len(jobDescription.filesToDelete) != 0:
927
988
  # Delete any files that should already be deleted
928
989
  for fileID in jobDescription.filesToDelete:
929
- logger.critical("Removing file in job store: %s that was "
930
- "marked for deletion but not previously removed" % fileID)
990
+ logger.critical(
991
+ "Removing file in job store: %s that was "
992
+ "marked for deletion but not previously removed" % fileID
993
+ )
931
994
  self.delete_file(fileID)
932
995
  jobDescription.filesToDelete = []
933
996
  changed[0] = True
@@ -940,6 +1003,7 @@ class AbstractJobStore(ABC):
940
1003
 
941
1004
  def stackSizeFn() -> int:
942
1005
  return len(list(jobDescription.allSuccessors()))
1006
+
943
1007
  startStackSize = stackSizeFn()
944
1008
  # Remove deleted jobs
945
1009
  jobDescription.filterSuccessors(haveJob)
@@ -965,16 +1029,25 @@ class AbstractJobStore(ABC):
965
1029
  assert isinstance(serviceJobDescription, ServiceJobDescription)
966
1030
 
967
1031
  if flag == 1:
968
- logger.debug("Recreating a start service flag for job: %s, flag: %s",
969
- jobStoreID, newFlag)
1032
+ logger.debug(
1033
+ "Recreating a start service flag for job: %s, flag: %s",
1034
+ jobStoreID,
1035
+ newFlag,
1036
+ )
970
1037
  serviceJobDescription.startJobStoreID = newFlag
971
1038
  elif flag == 2:
972
- logger.debug("Recreating a terminate service flag for job: %s, flag: %s",
973
- jobStoreID, newFlag)
1039
+ logger.debug(
1040
+ "Recreating a terminate service flag for job: %s, flag: %s",
1041
+ jobStoreID,
1042
+ newFlag,
1043
+ )
974
1044
  serviceJobDescription.terminateJobStoreID = newFlag
975
1045
  else:
976
- logger.debug("Recreating a error service flag for job: %s, flag: %s",
977
- jobStoreID, newFlag)
1046
+ logger.debug(
1047
+ "Recreating a error service flag for job: %s, flag: %s",
1048
+ jobStoreID,
1049
+ newFlag,
1050
+ )
978
1051
  assert flag == 3
979
1052
  serviceJobDescription.errorJobStoreID = newFlag
980
1053
 
@@ -987,6 +1060,7 @@ class AbstractJobStore(ABC):
987
1060
 
988
1061
  def servicesSizeFn() -> int:
989
1062
  return len(jobDescription.services)
1063
+
990
1064
  startServicesSize = servicesSizeFn()
991
1065
 
992
1066
  def replaceFlagsIfNeeded(serviceJobDescription: JobDescription) -> None:
@@ -1047,12 +1121,14 @@ class AbstractJobStore(ABC):
1047
1121
 
1048
1122
  # Remove any crufty stats/logging files from the previous run
1049
1123
  logger.debug("Discarding old statistics and logs...")
1124
+
1050
1125
  # We have to manually discard the stream to avoid getting
1051
1126
  # stuck on a blocking write from the job store.
1052
1127
  def discardStream(stream: Union[IO[bytes], IO[str]]) -> None:
1053
1128
  """Read the stream 4K at a time until EOF, discarding all input."""
1054
1129
  while len(stream.read(4096)) != 0:
1055
1130
  pass
1131
+
1056
1132
  self.read_logs(discardStream)
1057
1133
 
1058
1134
  logger.debug("Job store is clean")
@@ -1064,7 +1140,7 @@ class AbstractJobStore(ABC):
1064
1140
  # existence of jobs
1065
1141
  ##########################################
1066
1142
 
1067
- @deprecated(new_function_name='assign_job_id')
1143
+ @deprecated(new_function_name="assign_job_id")
1068
1144
  def assignID(self, jobDescription: JobDescription) -> None:
1069
1145
  return self.assign_job_id(jobDescription)
1070
1146
 
@@ -1088,7 +1164,7 @@ class AbstractJobStore(ABC):
1088
1164
  """
1089
1165
  yield
1090
1166
 
1091
- @deprecated(new_function_name='create_job')
1167
+ @deprecated(new_function_name="create_job")
1092
1168
  def create(self, jobDescription: JobDescription) -> JobDescription:
1093
1169
  return self.create_job(jobDescription)
1094
1170
 
@@ -1104,7 +1180,7 @@ class AbstractJobStore(ABC):
1104
1180
  """
1105
1181
  raise NotImplementedError()
1106
1182
 
1107
- @deprecated(new_function_name='job_exists')
1183
+ @deprecated(new_function_name="job_exists")
1108
1184
  def exists(self, jobStoreID: str) -> bool:
1109
1185
  return self.job_exists(jobStoreID)
1110
1186
 
@@ -1120,7 +1196,7 @@ class AbstractJobStore(ABC):
1120
1196
  # One year should be sufficient to finish any pipeline ;-)
1121
1197
  publicUrlExpiration = timedelta(days=365)
1122
1198
 
1123
- @deprecated(new_function_name='get_public_url')
1199
+ @deprecated(new_function_name="get_public_url")
1124
1200
  def getPublicUrl(self, fileName: str) -> str:
1125
1201
  return self.get_public_url(fileName)
1126
1202
 
@@ -1139,7 +1215,7 @@ class AbstractJobStore(ABC):
1139
1215
  """
1140
1216
  raise NotImplementedError()
1141
1217
 
1142
- @deprecated(new_function_name='get_shared_public_url')
1218
+ @deprecated(new_function_name="get_shared_public_url")
1143
1219
  def getSharedPublicUrl(self, sharedFileName: str) -> str:
1144
1220
  return self.get_shared_public_url(sharedFileName)
1145
1221
 
@@ -1161,7 +1237,7 @@ class AbstractJobStore(ABC):
1161
1237
  """
1162
1238
  raise NotImplementedError()
1163
1239
 
1164
- @deprecated(new_function_name='load_job')
1240
+ @deprecated(new_function_name="load_job")
1165
1241
  def load(self, jobStoreID: str) -> JobDescription:
1166
1242
  return self.load_job(jobStoreID)
1167
1243
 
@@ -1181,7 +1257,7 @@ class AbstractJobStore(ABC):
1181
1257
  """
1182
1258
  raise NotImplementedError()
1183
1259
 
1184
- @deprecated(new_function_name='update_job')
1260
+ @deprecated(new_function_name="update_job")
1185
1261
  def update(self, jobDescription: JobDescription) -> None:
1186
1262
  return self.update_job(jobDescription)
1187
1263
 
@@ -1196,7 +1272,7 @@ class AbstractJobStore(ABC):
1196
1272
  """
1197
1273
  raise NotImplementedError()
1198
1274
 
1199
- @deprecated(new_function_name='delete_job')
1275
+ @deprecated(new_function_name="delete_job")
1200
1276
  def delete(self, jobStoreID: str) -> None:
1201
1277
  return self.delete_job(jobStoreID)
1202
1278
 
@@ -1233,12 +1309,19 @@ class AbstractJobStore(ABC):
1233
1309
  # associated with a given job.
1234
1310
  ##########################################
1235
1311
 
1236
- @deprecated(new_function_name='write_file')
1237
- def writeFile(self, localFilePath: str, jobStoreID: Optional[str] = None, cleanup: bool = False) -> str:
1312
+ @deprecated(new_function_name="write_file")
1313
+ def writeFile(
1314
+ self,
1315
+ localFilePath: str,
1316
+ jobStoreID: Optional[str] = None,
1317
+ cleanup: bool = False,
1318
+ ) -> str:
1238
1319
  return self.write_file(localFilePath, jobStoreID, cleanup)
1239
1320
 
1240
1321
  @abstractmethod
1241
- def write_file(self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False) -> str:
1322
+ def write_file(
1323
+ self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False
1324
+ ) -> str:
1242
1325
  """
1243
1326
  Takes a file (as a path) and places it in this job store. Returns an ID that can be used
1244
1327
  to retrieve the file at a later time. The file is written in a atomic manner. It will
@@ -1269,19 +1352,27 @@ class AbstractJobStore(ABC):
1269
1352
  """
1270
1353
  raise NotImplementedError()
1271
1354
 
1272
- @deprecated(new_function_name='write_file_stream')
1273
- def writeFileStream(self, jobStoreID: Optional[str] = None, cleanup: bool = False, basename: Optional[str] = None,
1274
- encoding: Optional[str] = None, errors: Optional[str] = None) -> ContextManager[Tuple[IO[bytes], str]]:
1355
+ @deprecated(new_function_name="write_file_stream")
1356
+ def writeFileStream(
1357
+ self,
1358
+ jobStoreID: Optional[str] = None,
1359
+ cleanup: bool = False,
1360
+ basename: Optional[str] = None,
1361
+ encoding: Optional[str] = None,
1362
+ errors: Optional[str] = None,
1363
+ ) -> ContextManager[tuple[IO[bytes], str]]:
1275
1364
  return self.write_file_stream(jobStoreID, cleanup, basename, encoding, errors)
1276
1365
 
1277
1366
  @abstractmethod
1278
1367
  @contextmanager
1279
- def write_file_stream(self,
1280
- job_id: Optional[str] = None,
1281
- cleanup: bool = False,
1282
- basename: Optional[str] = None,
1283
- encoding: Optional[str] = None,
1284
- errors: Optional[str] = None) -> Iterator[Tuple[IO[bytes], str]]:
1368
+ def write_file_stream(
1369
+ self,
1370
+ job_id: Optional[str] = None,
1371
+ cleanup: bool = False,
1372
+ basename: Optional[str] = None,
1373
+ encoding: Optional[str] = None,
1374
+ errors: Optional[str] = None,
1375
+ ) -> Iterator[tuple[IO[bytes], str]]:
1285
1376
  """
1286
1377
  Similar to writeFile, but returns a context manager yielding a tuple of
1287
1378
  1) a file handle which can be written to and 2) the ID of the resulting
@@ -1320,18 +1411,22 @@ class AbstractJobStore(ABC):
1320
1411
  """
1321
1412
  raise NotImplementedError()
1322
1413
 
1323
- @deprecated(new_function_name='get_empty_file_store_id')
1324
- def getEmptyFileStoreID(self,
1325
- jobStoreID: Optional[str] = None,
1326
- cleanup: bool = False,
1327
- basename: Optional[str] = None) -> str:
1414
+ @deprecated(new_function_name="get_empty_file_store_id")
1415
+ def getEmptyFileStoreID(
1416
+ self,
1417
+ jobStoreID: Optional[str] = None,
1418
+ cleanup: bool = False,
1419
+ basename: Optional[str] = None,
1420
+ ) -> str:
1328
1421
  return self.get_empty_file_store_id(jobStoreID, cleanup, basename)
1329
1422
 
1330
1423
  @abstractmethod
1331
- def get_empty_file_store_id(self,
1332
- job_id: Optional[str] = None,
1333
- cleanup: bool = False,
1334
- basename: Optional[str] = None) -> str:
1424
+ def get_empty_file_store_id(
1425
+ self,
1426
+ job_id: Optional[str] = None,
1427
+ cleanup: bool = False,
1428
+ basename: Optional[str] = None,
1429
+ ) -> str:
1335
1430
  """
1336
1431
  Creates an empty file in the job store and returns its ID.
1337
1432
  Call to fileExists(getEmptyFileStoreID(jobStoreID)) will return True.
@@ -1353,8 +1448,10 @@ class AbstractJobStore(ABC):
1353
1448
  """
1354
1449
  raise NotImplementedError()
1355
1450
 
1356
- @deprecated(new_function_name='read_file')
1357
- def readFile(self, jobStoreFileID: str, localFilePath: str, symlink: bool = False) -> None:
1451
+ @deprecated(new_function_name="read_file")
1452
+ def readFile(
1453
+ self, jobStoreFileID: str, localFilePath: str, symlink: bool = False
1454
+ ) -> None:
1358
1455
  return self.read_file(jobStoreFileID, localFilePath, symlink)
1359
1456
 
1360
1457
  @abstractmethod
@@ -1382,7 +1479,7 @@ class AbstractJobStore(ABC):
1382
1479
  """
1383
1480
  raise NotImplementedError()
1384
1481
 
1385
- @deprecated(new_function_name='read_file_stream')
1482
+ @deprecated(new_function_name="read_file_stream")
1386
1483
  def readFileStream(
1387
1484
  self,
1388
1485
  jobStoreFileID: str,
@@ -1397,14 +1494,12 @@ class AbstractJobStore(ABC):
1397
1494
  file_id: Union[FileID, str],
1398
1495
  encoding: Literal[None] = None,
1399
1496
  errors: Optional[str] = None,
1400
- ) -> ContextManager[IO[bytes]]:
1401
- ...
1497
+ ) -> ContextManager[IO[bytes]]: ...
1402
1498
 
1403
1499
  @overload
1404
1500
  def read_file_stream(
1405
1501
  self, file_id: Union[FileID, str], encoding: str, errors: Optional[str] = None
1406
- ) -> ContextManager[IO[str]]:
1407
- ...
1502
+ ) -> ContextManager[IO[str]]: ...
1408
1503
 
1409
1504
  @abstractmethod
1410
1505
  def read_file_stream(
@@ -1430,7 +1525,7 @@ class AbstractJobStore(ABC):
1430
1525
  """
1431
1526
  raise NotImplementedError()
1432
1527
 
1433
- @deprecated(new_function_name='delete_file')
1528
+ @deprecated(new_function_name="delete_file")
1434
1529
  def deleteFile(self, jobStoreFileID: str) -> None:
1435
1530
  return self.delete_file(jobStoreFileID)
1436
1531
 
@@ -1444,7 +1539,7 @@ class AbstractJobStore(ABC):
1444
1539
  """
1445
1540
  raise NotImplementedError()
1446
1541
 
1447
- @deprecated(new_function_name='file_exists')
1542
+ @deprecated(new_function_name="file_exists")
1448
1543
  def fileExists(self, jobStoreFileID: str) -> bool:
1449
1544
  """Determine whether a file exists in this job store."""
1450
1545
  return self.file_exists(jobStoreFileID)
@@ -1458,7 +1553,7 @@ class AbstractJobStore(ABC):
1458
1553
  """
1459
1554
  raise NotImplementedError()
1460
1555
 
1461
- @deprecated(new_function_name='get_file_size')
1556
+ @deprecated(new_function_name="get_file_size")
1462
1557
  def getFileSize(self, jobStoreFileID: str) -> int:
1463
1558
  """Get the size of the given file in bytes."""
1464
1559
  return self.get_file_size(jobStoreFileID)
@@ -1478,7 +1573,7 @@ class AbstractJobStore(ABC):
1478
1573
  """
1479
1574
  raise NotImplementedError()
1480
1575
 
1481
- @deprecated(new_function_name='update_file')
1576
+ @deprecated(new_function_name="update_file")
1482
1577
  def updateFile(self, jobStoreFileID: str, localFilePath: str) -> None:
1483
1578
  """Replaces the existing version of a file in the job store."""
1484
1579
  return self.update_file(jobStoreFileID, localFilePath)
@@ -1499,19 +1594,20 @@ class AbstractJobStore(ABC):
1499
1594
  """
1500
1595
  raise NotImplementedError()
1501
1596
 
1502
- @deprecated(new_function_name='update_file_stream')
1503
- def updateFileStream(self,
1504
- jobStoreFileID: str,
1505
- encoding: Optional[str] = None,
1506
- errors: Optional[str] = None) -> ContextManager[IO[Any]]:
1597
+ @deprecated(new_function_name="update_file_stream")
1598
+ def updateFileStream(
1599
+ self,
1600
+ jobStoreFileID: str,
1601
+ encoding: Optional[str] = None,
1602
+ errors: Optional[str] = None,
1603
+ ) -> ContextManager[IO[Any]]:
1507
1604
  return self.update_file_stream(jobStoreFileID, encoding, errors)
1508
1605
 
1509
1606
  @abstractmethod
1510
1607
  @contextmanager
1511
- def update_file_stream(self,
1512
- file_id: str,
1513
- encoding: Optional[str] = None,
1514
- errors: Optional[str] = None) -> Iterator[IO[Any]]:
1608
+ def update_file_stream(
1609
+ self, file_id: str, encoding: Optional[str] = None, errors: Optional[str] = None
1610
+ ) -> Iterator[IO[Any]]:
1515
1611
  """
1516
1612
  Replaces the existing version of a file in the job store. Similar to writeFile, but
1517
1613
  returns a context manager yielding a file handle which can be written to. The
@@ -1537,20 +1633,29 @@ class AbstractJobStore(ABC):
1537
1633
  # with specific jobs.
1538
1634
  ##########################################
1539
1635
 
1540
- sharedFileNameRegex = re.compile(r'^[a-zA-Z0-9._-]+$')
1636
+ sharedFileNameRegex = re.compile(r"^[a-zA-Z0-9._-]+$")
1541
1637
 
1542
- @deprecated(new_function_name='write_shared_file_stream')
1543
- def writeSharedFileStream(self, sharedFileName: str, isProtected: Optional[bool] = None, encoding: Optional[str] = None,
1544
- errors: Optional[str] = None) -> ContextManager[IO[bytes]]:
1545
- return self.write_shared_file_stream(sharedFileName, isProtected, encoding, errors)
1638
+ @deprecated(new_function_name="write_shared_file_stream")
1639
+ def writeSharedFileStream(
1640
+ self,
1641
+ sharedFileName: str,
1642
+ isProtected: Optional[bool] = None,
1643
+ encoding: Optional[str] = None,
1644
+ errors: Optional[str] = None,
1645
+ ) -> ContextManager[IO[bytes]]:
1646
+ return self.write_shared_file_stream(
1647
+ sharedFileName, isProtected, encoding, errors
1648
+ )
1546
1649
 
1547
1650
  @abstractmethod
1548
1651
  @contextmanager
1549
- def write_shared_file_stream(self,
1550
- shared_file_name: str,
1551
- encrypted: Optional[bool] = None,
1552
- encoding: Optional[str] = None,
1553
- errors: Optional[str] = None) -> Iterator[IO[bytes]]:
1652
+ def write_shared_file_stream(
1653
+ self,
1654
+ shared_file_name: str,
1655
+ encrypted: Optional[bool] = None,
1656
+ encoding: Optional[str] = None,
1657
+ errors: Optional[str] = None,
1658
+ ) -> Iterator[IO[bytes]]:
1554
1659
  """
1555
1660
  Returns a context manager yielding a writable file handle to the global file referenced
1556
1661
  by the given name. File will be created in an atomic manner.
@@ -1575,19 +1680,23 @@ class AbstractJobStore(ABC):
1575
1680
  """
1576
1681
  raise NotImplementedError()
1577
1682
 
1578
- @deprecated(new_function_name='read_shared_file_stream')
1579
- def readSharedFileStream(self,
1580
- sharedFileName: str,
1581
- encoding: Optional[str] = None,
1582
- errors: Optional[str] = None) -> ContextManager[IO[bytes]]:
1683
+ @deprecated(new_function_name="read_shared_file_stream")
1684
+ def readSharedFileStream(
1685
+ self,
1686
+ sharedFileName: str,
1687
+ encoding: Optional[str] = None,
1688
+ errors: Optional[str] = None,
1689
+ ) -> ContextManager[IO[bytes]]:
1583
1690
  return self.read_shared_file_stream(sharedFileName, encoding, errors)
1584
1691
 
1585
1692
  @abstractmethod
1586
1693
  @contextmanager
1587
- def read_shared_file_stream(self,
1588
- shared_file_name: str,
1589
- encoding: Optional[str] = None,
1590
- errors: Optional[str] = None) -> Iterator[IO[bytes]]:
1694
+ def read_shared_file_stream(
1695
+ self,
1696
+ shared_file_name: str,
1697
+ encoding: Optional[str] = None,
1698
+ errors: Optional[str] = None,
1699
+ ) -> Iterator[IO[bytes]]:
1591
1700
  """
1592
1701
  Returns a context manager yielding a readable file handle to the global file referenced
1593
1702
  by the given name.
@@ -1606,7 +1715,7 @@ class AbstractJobStore(ABC):
1606
1715
  """
1607
1716
  raise NotImplementedError()
1608
1717
 
1609
- @deprecated(new_function_name='write_logs')
1718
+ @deprecated(new_function_name="write_logs")
1610
1719
  def writeStatsAndLogging(self, statsAndLoggingString: str) -> None:
1611
1720
  return self.write_logs(statsAndLoggingString)
1612
1721
 
@@ -1622,8 +1731,10 @@ class AbstractJobStore(ABC):
1622
1731
  """
1623
1732
  raise NotImplementedError()
1624
1733
 
1625
- @deprecated(new_function_name='read_logs')
1626
- def readStatsAndLogging(self, callback: Callable[..., Any], readAll: bool = False) -> int:
1734
+ @deprecated(new_function_name="read_logs")
1735
+ def readStatsAndLogging(
1736
+ self, callback: Callable[..., Any], readAll: bool = False
1737
+ ) -> int:
1627
1738
  return self.read_logs(callback, readAll)
1628
1739
 
1629
1740
  @abstractmethod
@@ -1658,8 +1769,8 @@ class AbstractJobStore(ABC):
1658
1769
  this method. Other methods will rely on always having the most current
1659
1770
  pid available. So far there is no reason to store any old pids.
1660
1771
  """
1661
- with self.write_shared_file_stream('pid.log') as f:
1662
- f.write(str(os.getpid()).encode('utf-8'))
1772
+ with self.write_shared_file_stream("pid.log") as f:
1773
+ f.write(str(os.getpid()).encode("utf-8"))
1663
1774
 
1664
1775
  def read_leader_pid(self) -> int:
1665
1776
  """
@@ -1667,7 +1778,7 @@ class AbstractJobStore(ABC):
1667
1778
 
1668
1779
  :raise NoSuchFileException: If the PID file doesn't exist.
1669
1780
  """
1670
- with self.read_shared_file_stream('pid.log') as f:
1781
+ with self.read_shared_file_stream("pid.log") as f:
1671
1782
  return int(f.read().strip())
1672
1783
 
1673
1784
  def write_leader_node_id(self) -> None:
@@ -1676,7 +1787,7 @@ class AbstractJobStore(ABC):
1676
1787
  by the leader.
1677
1788
  """
1678
1789
  with self.write_shared_file_stream("leader_node_id.log") as f:
1679
- f.write(getNodeID().encode('utf-8'))
1790
+ f.write(getNodeID().encode("utf-8"))
1680
1791
 
1681
1792
  def read_leader_node_id(self) -> str:
1682
1793
  """
@@ -1685,7 +1796,7 @@ class AbstractJobStore(ABC):
1685
1796
  :raise NoSuchFileException: If the node ID file doesn't exist.
1686
1797
  """
1687
1798
  with self.read_shared_file_stream("leader_node_id.log") as f:
1688
- return f.read().decode('utf-8').strip()
1799
+ return f.read().decode("utf-8").strip()
1689
1800
 
1690
1801
  def write_kill_flag(self, kill: bool = False) -> None:
1691
1802
  """
@@ -1698,7 +1809,7 @@ class AbstractJobStore(ABC):
1698
1809
  workers are expected to be cleaned up by the leader.
1699
1810
  """
1700
1811
  with self.write_shared_file_stream("_toil_kill_flag") as f:
1701
- f.write(("YES" if kill else "NO").encode('utf-8'))
1812
+ f.write(("YES" if kill else "NO").encode("utf-8"))
1702
1813
 
1703
1814
  def read_kill_flag(self) -> bool:
1704
1815
  """
@@ -1739,25 +1850,40 @@ class AbstractJobStore(ABC):
1739
1850
  if not cls._validateSharedFileName(sharedFileName):
1740
1851
  raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
1741
1852
 
1853
+
1742
1854
  class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1743
1855
  """
1744
1856
  A mostly fake JobStore to access URLs not really associated with real job
1745
1857
  stores.
1746
1858
  """
1747
1859
 
1860
+ @classmethod
1861
+ def _setup_ftp(cls) -> FtpFsAccess:
1862
+ # FTP connections are not reused. Ideally, a thread should watch any reused FTP connections
1863
+ # and close them when necessary
1864
+ return FtpFsAccess()
1865
+
1748
1866
  @classmethod
1749
1867
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
1750
- return url.scheme.lower() in ('http', 'https', 'ftp') and not export
1868
+ return url.scheme.lower() in ("http", "https", "ftp") and not export
1751
1869
 
1752
1870
  @classmethod
1753
1871
  def _url_exists(cls, url: ParseResult) -> bool:
1872
+ # Deal with FTP first to support user/password auth
1873
+ if url.scheme.lower() == "ftp":
1874
+ ftp = cls._setup_ftp()
1875
+ return ftp.exists(url.geturl())
1876
+
1754
1877
  try:
1755
- # TODO: Figure out how to HEAD instead of this.
1756
- with cls._open_url(url):
1878
+ with closing(urlopen(Request(url.geturl(), method="HEAD"))):
1757
1879
  return True
1758
- except:
1759
- pass
1760
- return False
1880
+ except HTTPError as e:
1881
+ if e.code in (404, 410):
1882
+ return False
1883
+ else:
1884
+ raise
1885
+ # Any other errors we should pass through because something really went
1886
+ # wrong (e.g. server is broken today but file may usually exist)
1761
1887
 
1762
1888
  @classmethod
1763
1889
  @retry(
@@ -1767,17 +1893,19 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1767
1893
  ]
1768
1894
  )
1769
1895
  def _get_size(cls, url: ParseResult) -> Optional[int]:
1770
- if url.scheme.lower() == 'ftp':
1771
- return None
1772
- with closing(urlopen(url.geturl())) as readable:
1773
- # just read the header for content length
1774
- size = readable.info().get('content-length')
1775
- return int(size) if size is not None else None
1896
+ if url.scheme.lower() == "ftp":
1897
+ ftp = cls._setup_ftp()
1898
+ return ftp.size(url.geturl())
1899
+
1900
+ # just read the header for content length
1901
+ resp = urlopen(Request(url.geturl(), method="HEAD"))
1902
+ size = resp.info().get("content-length")
1903
+ return int(size) if size is not None else None
1776
1904
 
1777
1905
  @classmethod
1778
1906
  def _read_from_url(
1779
1907
  cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1780
- ) -> Tuple[int, bool]:
1908
+ ) -> tuple[int, bool]:
1781
1909
  # We can't actually retry after we start writing.
1782
1910
  # TODO: Implement retry with byte range requests
1783
1911
  with cls._open_url(url) as readable:
@@ -1786,8 +1914,10 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1786
1914
  # nested function can modify it without creating its own
1787
1915
  # local with the same name.
1788
1916
  size = [0]
1917
+
1789
1918
  def count(l: int) -> None:
1790
1919
  size[0] += l
1920
+
1791
1921
  counter = WriteWatchingStream(writable)
1792
1922
  counter.onWrite(count)
1793
1923
 
@@ -1799,18 +1929,32 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1799
1929
  @retry(
1800
1930
  errors=[
1801
1931
  BadStatusLine,
1802
- ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1932
+ ErrorCondition(error=HTTPError, error_codes=[408, 429, 500, 502, 503]),
1803
1933
  ]
1804
1934
  )
1805
1935
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
1936
+ # Deal with FTP first so we support user/password auth
1937
+ if url.scheme.lower() == "ftp":
1938
+ ftp = cls._setup_ftp()
1939
+ # we open in read mode as write mode is not supported
1940
+ return ftp.open(url.geturl(), mode="r")
1941
+
1806
1942
  try:
1807
1943
  return cast(IO[bytes], closing(urlopen(url.geturl())))
1808
1944
  except HTTPError as e:
1809
- if e.code == 404:
1945
+ if e.code in (404, 410):
1810
1946
  # Translate into a FileNotFoundError for detecting
1811
- # un-importable files
1947
+ # known nonexistent files
1812
1948
  raise FileNotFoundError(str(url)) from e
1813
1949
  else:
1950
+ # Other codes indicate a real problem with the server; we don't
1951
+ # want to e.g. run a workflow without an optional input that
1952
+ # the user specified a path to just because the server was
1953
+ # busy.
1954
+
1955
+ # Sometimes we expect to see this when polling existence for
1956
+ # inputs at guessed paths, so don't complain *too* loudly here.
1957
+ logger.debug("Unusual status %d for URL %s", e.code, str(url))
1814
1958
  raise
1815
1959
 
1816
1960
  @classmethod
@@ -1819,6 +1963,6 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1819
1963
  return False
1820
1964
 
1821
1965
  @classmethod
1822
- def _list_url(cls, url: ParseResult) -> List[str]:
1966
+ def _list_url(cls, url: ParseResult) -> list[str]:
1823
1967
  # TODO: Implement HTTP index parsing and FTP directory listing
1824
1968
  raise NotImplementedError("HTTP and FTP URLs cannot yet be listed")