toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -16,45 +16,39 @@ import os
16
16
  import pickle
17
17
  import re
18
18
  import shutil
19
- import sys
20
19
  from abc import ABC, ABCMeta, abstractmethod
20
+ from collections.abc import Iterator, ValuesView
21
21
  from contextlib import closing, contextmanager
22
22
  from datetime import timedelta
23
23
  from http.client import BadStatusLine
24
- from typing import (IO,
25
- TYPE_CHECKING,
26
- Any,
27
- Callable,
28
- ContextManager,
29
- Dict,
30
- Iterable,
31
- Iterator,
32
- List,
33
- Optional,
34
- Set,
35
- Tuple,
36
- Union,
37
- ValuesView,
38
- cast,
39
- overload)
40
-
41
- if sys.version_info >= (3, 8):
42
- from typing import Literal
43
- else:
44
- from typing_extensions import Literal
45
-
24
+ from typing import (
25
+ IO,
26
+ TYPE_CHECKING,
27
+ Any,
28
+ Callable,
29
+ ContextManager,
30
+ Literal,
31
+ Optional,
32
+ Union,
33
+ cast,
34
+ overload,
35
+ )
46
36
  from urllib.error import HTTPError
47
37
  from urllib.parse import ParseResult, urlparse
48
- from urllib.request import urlopen
38
+ from urllib.request import urlopen, Request
49
39
  from uuid import uuid4
50
40
 
51
41
  from toil.common import Config, getNodeID, safeUnpickleFromStream
52
42
  from toil.fileStores import FileID
53
- from toil.job import (CheckpointJobDescription,
54
- JobDescription,
55
- JobException,
56
- ServiceJobDescription)
43
+ from toil.job import (
44
+ CheckpointJobDescription,
45
+ JobDescription,
46
+ JobException,
47
+ ServiceJobDescription,
48
+ )
49
+ from toil.lib.ftp_utils import FtpFsAccess
57
50
  from toil.lib.compatibility import deprecated
51
+ from toil.lib.exceptions import UnimplementedURLException
58
52
  from toil.lib.io import WriteWatchingStream
59
53
  from toil.lib.memoize import memoize
60
54
  from toil.lib.retry import ErrorCondition, retry
@@ -67,10 +61,22 @@ logger = logging.getLogger(__name__)
67
61
  try:
68
62
  from botocore.exceptions import ProxyConnectionError
69
63
  except ImportError:
64
+
70
65
  class ProxyConnectionError(BaseException): # type: ignore
71
66
  """Dummy class."""
72
67
 
73
68
 
69
+ class LocatorException(Exception):
70
+ """
71
+ Base exception class for all locator exceptions.
72
+ For example, job store/aws bucket exceptions where they already exist
73
+ """
74
+
75
+ def __init__(self, error_msg: str, locator: str, prefix: Optional[str] = None):
76
+ full_locator = locator if prefix is None else f"{prefix}:{locator}"
77
+ super().__init__(error_msg % full_locator)
78
+
79
+
74
80
  class InvalidImportExportUrlException(Exception):
75
81
  def __init__(self, url: ParseResult) -> None:
76
82
  """
@@ -78,24 +84,10 @@ class InvalidImportExportUrlException(Exception):
78
84
  """
79
85
  super().__init__("The URL '%s' is invalid." % url.geturl())
80
86
 
81
- class UnimplementedURLException(RuntimeError):
82
- def __init__(self, url: ParseResult, operation: str) -> None:
83
- """
84
- Make a new exception to report that a URL scheme is not implemented, or
85
- that the implementation can't be loaded because its dependencies are
86
- not installed.
87
-
88
- :param url: The given URL
89
- :param operation: Whether we are trying to 'import' or 'export'
90
- """
91
- super().__init__(
92
- f"No available job store implementation can {operation} the URL "
93
- f"'{url.geturl()}'. Ensure Toil has been installed "
94
- f"with the appropriate extras."
95
- )
96
87
 
97
88
  class NoSuchJobException(Exception):
98
89
  """Indicates that the specified job does not exist."""
90
+
99
91
  def __init__(self, jobStoreID: FileID):
100
92
  """
101
93
  :param str jobStoreID: the jobStoreID that was mistakenly assumed to exist
@@ -105,17 +97,21 @@ class NoSuchJobException(Exception):
105
97
 
106
98
  class ConcurrentFileModificationException(Exception):
107
99
  """Indicates that the file was attempted to be modified by multiple processes at once."""
100
+
108
101
  def __init__(self, jobStoreFileID: FileID):
109
102
  """
110
103
  :param jobStoreFileID: the ID of the file that was modified by multiple workers
111
104
  or processes concurrently
112
105
  """
113
- super().__init__('Concurrent update to file %s detected.' % jobStoreFileID)
106
+ super().__init__("Concurrent update to file %s detected." % jobStoreFileID)
114
107
 
115
108
 
116
109
  class NoSuchFileException(Exception):
117
110
  """Indicates that the specified file does not exist."""
118
- def __init__(self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any):
111
+
112
+ def __init__(
113
+ self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any
114
+ ):
119
115
  """
120
116
  :param jobStoreFileID: the ID of the file that was mistakenly assumed to exist
121
117
  :param customName: optionally, an alternate name for the nonexistent file
@@ -136,24 +132,33 @@ class NoSuchFileException(Exception):
136
132
  super().__init__(message)
137
133
 
138
134
 
139
- class NoSuchJobStoreException(Exception):
135
+ class NoSuchJobStoreException(LocatorException):
140
136
  """Indicates that the specified job store does not exist."""
141
- def __init__(self, locator: str):
137
+
138
+ def __init__(self, locator: str, prefix: str):
142
139
  """
143
140
  :param str locator: The location of the job store
144
141
  """
145
- super().__init__("The job store '%s' does not exist, so there is nothing to restart." % locator)
142
+ super().__init__(
143
+ "The job store '%s' does not exist, so there is nothing to restart.",
144
+ locator,
145
+ prefix,
146
+ )
146
147
 
147
148
 
148
- class JobStoreExistsException(Exception):
149
+ class JobStoreExistsException(LocatorException):
149
150
  """Indicates that the specified job store already exists."""
150
- def __init__(self, locator: str):
151
+
152
+ def __init__(self, locator: str, prefix: str):
151
153
  """
152
154
  :param str locator: The location of the job store
153
155
  """
154
156
  super().__init__(
155
157
  "The job store '%s' already exists. Use --restart to resume the workflow, or remove "
156
- "the job store with 'toil clean' to start the workflow from scratch." % locator)
158
+ "the job store with 'toil clean' to start the workflow from scratch.",
159
+ locator,
160
+ prefix,
161
+ )
157
162
 
158
163
 
159
164
  class AbstractJobStore(ABC):
@@ -205,7 +210,7 @@ class AbstractJobStore(ABC):
205
210
  self.__config = config
206
211
  self.write_config()
207
212
 
208
- @deprecated(new_function_name='write_config')
213
+ @deprecated(new_function_name="write_config")
209
214
  def writeConfig(self) -> None:
210
215
  return self.write_config()
211
216
 
@@ -214,7 +219,9 @@ class AbstractJobStore(ABC):
214
219
  Persists the value of the :attr:`AbstractJobStore.config` attribute to the
215
220
  job store, so that it can be retrieved later by other instances of this class.
216
221
  """
217
- with self.write_shared_file_stream('config.pickle', encrypted=False) as fileHandle:
222
+ with self.write_shared_file_stream(
223
+ "config.pickle", encrypted=False
224
+ ) as fileHandle:
218
225
  pickle.dump(self.__config, fileHandle, pickle.HIGHEST_PROTOCOL)
219
226
 
220
227
  def resume(self) -> None:
@@ -224,7 +231,7 @@ class AbstractJobStore(ABC):
224
231
 
225
232
  :raises NoSuchJobStoreException: if the physical storage for this job store doesn't exist
226
233
  """
227
- with self.read_shared_file_stream('config.pickle') as fileHandle:
234
+ with self.read_shared_file_stream("config.pickle") as fileHandle:
228
235
  config = safeUnpickleFromStream(fileHandle)
229
236
  assert config.workflowID is not None
230
237
  self.__config = config
@@ -242,9 +249,9 @@ class AbstractJobStore(ABC):
242
249
  """
243
250
  return self.__locator
244
251
 
245
- rootJobStoreIDFileName = 'rootJobStoreID'
252
+ rootJobStoreIDFileName = "rootJobStoreID"
246
253
 
247
- @deprecated(new_function_name='set_root_job')
254
+ @deprecated(new_function_name="set_root_job")
248
255
  def setRootJob(self, rootJobStoreID: FileID) -> None:
249
256
  """Set the root job of the workflow backed by this job store."""
250
257
  return self.set_root_job(rootJobStoreID)
@@ -256,9 +263,9 @@ class AbstractJobStore(ABC):
256
263
  :param job_id: The ID of the job to set as root
257
264
  """
258
265
  with self.write_shared_file_stream(self.rootJobStoreIDFileName) as f:
259
- f.write(job_id.encode('utf-8'))
266
+ f.write(job_id.encode("utf-8"))
260
267
 
261
- @deprecated(new_function_name='load_root_job')
268
+ @deprecated(new_function_name="load_root_job")
262
269
  def loadRootJob(self) -> JobDescription:
263
270
  return self.load_root_job()
264
271
 
@@ -273,16 +280,18 @@ class AbstractJobStore(ABC):
273
280
  """
274
281
  try:
275
282
  with self.read_shared_file_stream(self.rootJobStoreIDFileName) as f:
276
- rootJobStoreID = f.read().decode('utf-8')
283
+ rootJobStoreID = f.read().decode("utf-8")
277
284
  except NoSuchFileException:
278
- raise JobException('No job has been set as the root in this job store')
285
+ raise JobException("No job has been set as the root in this job store")
279
286
  if not self.job_exists(rootJobStoreID):
280
- raise JobException("The root job '%s' doesn't exist. Either the Toil workflow "
281
- "is finished or has never been started" % rootJobStoreID)
287
+ raise JobException(
288
+ "The root job '%s' doesn't exist. Either the Toil workflow "
289
+ "is finished or has never been started" % rootJobStoreID
290
+ )
282
291
  return self.load_job(rootJobStoreID)
283
292
 
284
293
  # FIXME: This is only used in tests, why do we have it?
285
- @deprecated(new_function_name='create_root_job')
294
+ @deprecated(new_function_name="create_root_job")
286
295
  def createRootJob(self, desc: JobDescription) -> JobDescription:
287
296
  return self.create_root_job(desc)
288
297
 
@@ -299,7 +308,7 @@ class AbstractJobStore(ABC):
299
308
  self.set_root_job(job_description.jobStoreID)
300
309
  return job_description
301
310
 
302
- @deprecated(new_function_name='get_root_job_return_value')
311
+ @deprecated(new_function_name="get_root_job_return_value")
303
312
  def getRootJobReturnValue(self) -> Any:
304
313
  return self.get_root_job_return_value()
305
314
 
@@ -310,12 +319,12 @@ class AbstractJobStore(ABC):
310
319
  Raises an exception if the root job hasn't fulfilled its promise yet.
311
320
  """
312
321
  # Parse out the return value from the root job
313
- with self.read_shared_file_stream('rootJobReturnValue') as fH:
322
+ with self.read_shared_file_stream("rootJobReturnValue") as fH:
314
323
  return safeUnpickleFromStream(fH)
315
324
 
316
325
  @staticmethod
317
326
  @memoize
318
- def _get_job_store_classes() -> List['AbstractJobStore']:
327
+ def _get_job_store_classes() -> list["AbstractJobStore"]:
319
328
  """
320
329
  A list of concrete AbstractJobStore implementations whose dependencies are installed.
321
330
 
@@ -325,23 +334,30 @@ class AbstractJobStore(ABC):
325
334
  "toil.jobStores.fileJobStore.FileJobStore",
326
335
  "toil.jobStores.googleJobStore.GoogleJobStore",
327
336
  "toil.jobStores.aws.jobStore.AWSJobStore",
328
- "toil.jobStores.abstractJobStore.JobStoreSupport")
337
+ "toil.jobStores.abstractJobStore.JobStoreSupport",
338
+ )
329
339
  jobStoreClasses = []
330
340
  for className in jobStoreClassNames:
331
- moduleName, className = className.rsplit('.', 1)
341
+ moduleName, className = className.rsplit(".", 1)
332
342
  from importlib import import_module
343
+
333
344
  try:
334
345
  module = import_module(moduleName)
335
346
  except (ImportError, ProxyConnectionError):
336
- logger.debug("Unable to import '%s' as is expected if the corresponding extra was "
337
- "omitted at installation time.", moduleName)
347
+ logger.debug(
348
+ "Unable to import '%s' as is expected if the corresponding extra was "
349
+ "omitted at installation time.",
350
+ moduleName,
351
+ )
338
352
  else:
339
353
  jobStoreClass = getattr(module, className)
340
354
  jobStoreClasses.append(jobStoreClass)
341
355
  return jobStoreClasses
342
356
 
343
357
  @classmethod
344
- def _findJobStoreForUrl(cls, url: ParseResult, export: bool = False) -> 'AbstractJobStore':
358
+ def _findJobStoreForUrl(
359
+ cls, url: ParseResult, export: bool = False
360
+ ) -> "AbstractJobStore":
345
361
  """
346
362
  Returns the AbstractJobStore subclass that supports the given URL.
347
363
 
@@ -360,46 +376,58 @@ class AbstractJobStore(ABC):
360
376
  # returns a file ID. Explain this to MyPy.
361
377
 
362
378
  @overload
363
- def importFile(self,
364
- srcUrl: str,
365
- sharedFileName: str,
366
- hardlink: bool = False,
367
- symlink: bool = True) -> None: ...
379
+ def importFile(
380
+ self,
381
+ srcUrl: str,
382
+ sharedFileName: str,
383
+ hardlink: bool = False,
384
+ symlink: bool = True,
385
+ ) -> None: ...
368
386
 
369
387
  @overload
370
- def importFile(self,
371
- srcUrl: str,
372
- sharedFileName: None = None,
373
- hardlink: bool = False,
374
- symlink: bool = True) -> FileID: ...
375
-
376
- @deprecated(new_function_name='import_file')
377
- def importFile(self,
378
- srcUrl: str,
379
- sharedFileName: Optional[str] = None,
380
- hardlink: bool = False,
381
- symlink: bool = True) -> Optional[FileID]:
388
+ def importFile(
389
+ self,
390
+ srcUrl: str,
391
+ sharedFileName: None = None,
392
+ hardlink: bool = False,
393
+ symlink: bool = True,
394
+ ) -> FileID: ...
395
+
396
+ @deprecated(new_function_name="import_file")
397
+ def importFile(
398
+ self,
399
+ srcUrl: str,
400
+ sharedFileName: Optional[str] = None,
401
+ hardlink: bool = False,
402
+ symlink: bool = True,
403
+ ) -> Optional[FileID]:
382
404
  return self.import_file(srcUrl, sharedFileName, hardlink, symlink)
383
405
 
384
406
  @overload
385
- def import_file(self,
386
- src_uri: str,
387
- shared_file_name: str,
388
- hardlink: bool = False,
389
- symlink: bool = True) -> None: ...
407
+ def import_file(
408
+ self,
409
+ src_uri: str,
410
+ shared_file_name: str,
411
+ hardlink: bool = False,
412
+ symlink: bool = True,
413
+ ) -> None: ...
390
414
 
391
415
  @overload
392
- def import_file(self,
393
- src_uri: str,
394
- shared_file_name: None = None,
395
- hardlink: bool = False,
396
- symlink: bool = True) -> FileID: ...
397
-
398
- def import_file(self,
399
- src_uri: str,
400
- shared_file_name: Optional[str] = None,
401
- hardlink: bool = False,
402
- symlink: bool = True) -> Optional[FileID]:
416
+ def import_file(
417
+ self,
418
+ src_uri: str,
419
+ shared_file_name: None = None,
420
+ hardlink: bool = False,
421
+ symlink: bool = True,
422
+ ) -> FileID: ...
423
+
424
+ def import_file(
425
+ self,
426
+ src_uri: str,
427
+ shared_file_name: Optional[str] = None,
428
+ hardlink: bool = False,
429
+ symlink: bool = True,
430
+ ) -> Optional[FileID]:
403
431
  """
404
432
  Imports the file at the given URL into job store. The ID of the newly imported file is
405
433
  returned. If the name of a shared file name is provided, the file will be imported as
@@ -437,18 +465,23 @@ class AbstractJobStore(ABC):
437
465
  # subclasses of AbstractJobStore.
438
466
  parseResult = urlparse(src_uri)
439
467
  otherCls = self._findJobStoreForUrl(parseResult)
440
- return self._import_file(otherCls,
441
- parseResult,
442
- shared_file_name=shared_file_name,
443
- hardlink=hardlink,
444
- symlink=symlink)
445
-
446
- def _import_file(self,
447
- otherCls: 'AbstractJobStore',
448
- uri: ParseResult,
449
- shared_file_name: Optional[str] = None,
450
- hardlink: bool = False,
451
- symlink: bool = True) -> Optional[FileID]:
468
+ logger.info("Importing input %s...", src_uri)
469
+ return self._import_file(
470
+ otherCls,
471
+ parseResult,
472
+ shared_file_name=shared_file_name,
473
+ hardlink=hardlink,
474
+ symlink=symlink,
475
+ )
476
+
477
+ def _import_file(
478
+ self,
479
+ otherCls: "AbstractJobStore",
480
+ uri: ParseResult,
481
+ shared_file_name: Optional[str] = None,
482
+ hardlink: bool = False,
483
+ symlink: bool = True,
484
+ ) -> Optional[FileID]:
452
485
  """
453
486
  Import the file at the given URL using the given job store class to retrieve that file.
454
487
  See also :meth:`.importFile`. This method applies a generic approach to importing: it
@@ -478,7 +511,7 @@ class AbstractJobStore(ABC):
478
511
  otherCls._read_from_url(uri, writable)
479
512
  return None
480
513
 
481
- @deprecated(new_function_name='export_file')
514
+ @deprecated(new_function_name="export_file")
482
515
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
483
516
  return self.export_file(jobStoreFileID, dstUrl)
484
517
 
@@ -497,13 +530,17 @@ class AbstractJobStore(ABC):
497
530
  :param str file_id: The id of the file in the job store that should be exported.
498
531
 
499
532
  :param str dst_uri: URL that points to a file or object in the storage mechanism of a
500
- supported URL scheme e.g. a blob in an AWS s3 bucket.
533
+ supported URL scheme e.g. a blob in an AWS s3 bucket. May also be a local path.
501
534
  """
535
+ from toil.common import Toil
536
+ dst_uri = Toil.normalize_uri(dst_uri)
502
537
  parseResult = urlparse(dst_uri)
503
538
  otherCls = self._findJobStoreForUrl(parseResult, export=True)
504
539
  self._export_file(otherCls, file_id, parseResult)
505
540
 
506
- def _export_file(self, otherCls: 'AbstractJobStore', jobStoreFileID: FileID, url: ParseResult) -> None:
541
+ def _export_file(
542
+ self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
543
+ ) -> None:
507
544
  """
508
545
  Refer to exportFile docstring for information about this method.
509
546
 
@@ -518,7 +555,9 @@ class AbstractJobStore(ABC):
518
555
  """
519
556
  self._default_export_file(otherCls, jobStoreFileID, url)
520
557
 
521
- def _default_export_file(self, otherCls: 'AbstractJobStore', jobStoreFileID: FileID, url: ParseResult) -> None:
558
+ def _default_export_file(
559
+ self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
560
+ ) -> None:
522
561
  """
523
562
  Refer to exportFile docstring for information about this method.
524
563
 
@@ -533,7 +572,7 @@ class AbstractJobStore(ABC):
533
572
  """
534
573
  executable = False
535
574
  with self.read_file_stream(jobStoreFileID) as readable:
536
- if getattr(jobStoreFileID, 'executable', False):
575
+ if getattr(jobStoreFileID, "executable", False):
537
576
  executable = jobStoreFileID.executable
538
577
  otherCls._write_to_url(readable, url, executable)
539
578
 
@@ -542,6 +581,8 @@ class AbstractJobStore(ABC):
542
581
  """
543
582
  Return True if the file at the given URI exists, and False otherwise.
544
583
 
584
+ May raise an error if file existence cannot be determined.
585
+
545
586
  :param src_uri: URL that points to a file or object in the storage
546
587
  mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
547
588
  """
@@ -572,7 +613,7 @@ class AbstractJobStore(ABC):
572
613
  return otherCls._get_is_directory(parseResult)
573
614
 
574
615
  @classmethod
575
- def list_url(cls, src_uri: str) -> List[str]:
616
+ def list_url(cls, src_uri: str) -> list[str]:
576
617
  """
577
618
  List the directory at the given URL. Returned path components can be
578
619
  joined with '/' onto the passed URL to form new URLs. Those that end in
@@ -597,7 +638,7 @@ class AbstractJobStore(ABC):
597
638
  return otherCls._list_url(parseResult)
598
639
 
599
640
  @classmethod
600
- def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
641
+ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
601
642
  """
602
643
  Read the given URL and write its content into the given writable stream.
603
644
 
@@ -628,6 +669,8 @@ class AbstractJobStore(ABC):
628
669
  def _url_exists(cls, url: ParseResult) -> bool:
629
670
  """
630
671
  Return True if the item at the given URL exists, and Flase otherwise.
672
+
673
+ May raise an error if file existence cannot be determined.
631
674
  """
632
675
  raise NotImplementedError(f"No implementation for {url}")
633
676
 
@@ -655,7 +698,7 @@ class AbstractJobStore(ABC):
655
698
 
656
699
  @classmethod
657
700
  @abstractmethod
658
- def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> Tuple[int, bool]:
701
+ def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
659
702
  """
660
703
  Reads the contents of the object at the specified location and writes it to the given
661
704
  writable stream.
@@ -675,7 +718,7 @@ class AbstractJobStore(ABC):
675
718
 
676
719
  @classmethod
677
720
  @abstractmethod
678
- def _list_url(cls, url: ParseResult) -> List[str]:
721
+ def _list_url(cls, url: ParseResult) -> list[str]:
679
722
  """
680
723
  List the contents of the given URL, which may or may not end in '/'
681
724
 
@@ -707,7 +750,12 @@ class AbstractJobStore(ABC):
707
750
 
708
751
  @classmethod
709
752
  @abstractmethod
710
- def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
753
+ def _write_to_url(
754
+ cls,
755
+ readable: Union[IO[bytes], IO[str]],
756
+ url: ParseResult,
757
+ executable: bool = False,
758
+ ) -> None:
711
759
  """
712
760
  Reads the contents of the given readable stream and writes it to the object at the
713
761
  specified location. Raises FileNotFoundError if the URL doesn't exist..
@@ -754,11 +802,11 @@ class AbstractJobStore(ABC):
754
802
  """
755
803
  raise NotImplementedError()
756
804
 
757
- @deprecated(new_function_name='get_env')
758
- def getEnv(self) -> Dict[str, str]:
805
+ @deprecated(new_function_name="get_env")
806
+ def getEnv(self) -> dict[str, str]:
759
807
  return self.get_env()
760
808
 
761
- def get_env(self) -> Dict[str, str]:
809
+ def get_env(self) -> dict[str, str]:
762
810
  """
763
811
  Returns a dictionary of environment variables that this job store requires to be set in
764
812
  order to function properly on a worker.
@@ -769,7 +817,7 @@ class AbstractJobStore(ABC):
769
817
 
770
818
  # Cleanup functions
771
819
  def clean(
772
- self, jobCache: Optional[Dict[Union[str, "TemporaryID"], JobDescription]] = None
820
+ self, jobCache: Optional[dict[Union[str, "TemporaryID"], JobDescription]] = None
773
821
  ) -> JobDescription:
774
822
  """
775
823
  Function to cleanup the state of a job store after a restart.
@@ -797,7 +845,9 @@ class AbstractJobStore(ABC):
797
845
  return self.load_job(jobId)
798
846
 
799
847
  def haveJob(jobId: str) -> bool:
800
- assert len(jobId) > 1, f"Job ID {jobId} too short; is a string being used as a list?"
848
+ assert (
849
+ len(jobId) > 1
850
+ ), f"Job ID {jobId} too short; is a string being used as a list?"
801
851
  if jobCache is not None:
802
852
  if jobId in jobCache:
803
853
  return True
@@ -817,13 +867,15 @@ class AbstractJobStore(ABC):
817
867
  jobCache[str(jobDescription.jobStoreID)] = jobDescription
818
868
  self.update_job(jobDescription)
819
869
 
820
- def getJobDescriptions() -> Union[ValuesView[JobDescription], Iterator[JobDescription]]:
870
+ def getJobDescriptions() -> (
871
+ Union[ValuesView[JobDescription], Iterator[JobDescription]]
872
+ ):
821
873
  if jobCache is not None:
822
874
  return jobCache.values()
823
875
  else:
824
876
  return self.jobs()
825
877
 
826
- def get_jobs_reachable_from_root() -> Set[str]:
878
+ def get_jobs_reachable_from_root() -> set[str]:
827
879
  """
828
880
  Traverse the job graph from the root job and return a flattened set of all active jobstore IDs.
829
881
 
@@ -833,18 +885,17 @@ class AbstractJobStore(ABC):
833
885
  # Iterate from the root JobDescription and collate all jobs
834
886
  # that are reachable from it.
835
887
  root_job_description = self.load_root_job()
836
- reachable_from_root: Set[str] = set()
888
+ reachable_from_root: set[str] = set()
837
889
 
838
- # Add first root job outside of the loop below.
839
- reachable_from_root.add(str(root_job_description.jobStoreID))
840
- # add all of root's linked service jobs as well
841
- for service_jobstore_id in root_job_description.services:
842
- if haveJob(service_jobstore_id):
843
- reachable_from_root.add(service_jobstore_id)
844
- for merged_jobstore_id in root_job_description.merged_jobs:
890
+ for merged_in in root_job_description.get_chain():
891
+ # Add the job itself and any other jobs that chained with it.
845
892
  # Keep merged-in jobs around themselves, but don't bother
846
893
  # exploring them, since we took their successors.
847
- reachable_from_root.add(merged_jobstore_id)
894
+ reachable_from_root.add(merged_in.job_store_id)
895
+ # add all of root's linked service jobs as well
896
+ for service_job_store_id in root_job_description.services:
897
+ if haveJob(service_job_store_id):
898
+ reachable_from_root.add(service_job_store_id)
848
899
 
849
900
  # Unprocessed means it might have successor jobs we need to add.
850
901
  unprocessed_job_descriptions = [root_job_description]
@@ -852,24 +903,30 @@ class AbstractJobStore(ABC):
852
903
  while unprocessed_job_descriptions:
853
904
  new_job_descriptions_to_process = [] # Reset.
854
905
  for job_description in unprocessed_job_descriptions:
855
- for successor_jobstore_id in job_description.allSuccessors():
856
- if successor_jobstore_id not in reachable_from_root and haveJob(successor_jobstore_id):
857
- successor_job_description = getJobDescription(successor_jobstore_id)
858
-
859
- # Add each successor job.
860
- reachable_from_root.add(
861
- str(successor_job_description.jobStoreID)
862
- )
863
- # Add all of the successor's linked service jobs as well.
864
- for service_jobstore_id in successor_job_description.services:
865
- if haveJob(service_jobstore_id):
866
- reachable_from_root.add(service_jobstore_id)
867
-
868
- new_job_descriptions_to_process.append(successor_job_description)
869
- for merged_jobstore_id in job_description.merged_jobs:
906
+ for merged_in in job_description.get_chain():
907
+ # Add the job and anything chained with it.
870
908
  # Keep merged-in jobs around themselves, but don't bother
871
909
  # exploring them, since we took their successors.
872
- reachable_from_root.add(merged_jobstore_id)
910
+ reachable_from_root.add(merged_in.job_store_id)
911
+ for successor_job_store_id in job_description.allSuccessors():
912
+ if (
913
+ successor_job_store_id not in reachable_from_root
914
+ and haveJob(successor_job_store_id)
915
+ ):
916
+ successor_job_description = getJobDescription(
917
+ successor_job_store_id
918
+ )
919
+
920
+ # Add all of the successor's linked service jobs as well.
921
+ for (
922
+ service_job_store_id
923
+ ) in successor_job_description.services:
924
+ if haveJob(service_job_store_id):
925
+ reachable_from_root.add(service_job_store_id)
926
+
927
+ new_job_descriptions_to_process.append(
928
+ successor_job_description
929
+ )
873
930
  unprocessed_job_descriptions = new_job_descriptions_to_process
874
931
 
875
932
  logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
@@ -879,22 +936,32 @@ class AbstractJobStore(ABC):
879
936
 
880
937
  # Cleanup jobs that are not reachable from the root, and therefore orphaned
881
938
  # TODO: Avoid reiterating reachable_from_root (which may be very large)
882
- unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
939
+ unreachable = [
940
+ x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root
941
+ ]
883
942
  for jobDescription in unreachable:
884
943
  # clean up any associated files before deletion
885
944
  for fileID in jobDescription.filesToDelete:
886
945
  # Delete any files that should already be deleted
887
- logger.warning(f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed.")
946
+ logger.warning(
947
+ f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed."
948
+ )
888
949
  self.delete_file(fileID)
889
950
  # Delete the job from us and the cache
890
951
  deleteJob(str(jobDescription.jobStoreID))
891
952
 
892
- jobDescriptionsReachableFromRoot = {id: getJobDescription(id) for id in reachable_from_root}
953
+ jobDescriptionsReachableFromRoot = {
954
+ id: getJobDescription(id) for id in reachable_from_root
955
+ }
893
956
 
894
957
  # Clean up any checkpoint jobs -- delete any successors it
895
958
  # may have launched, and restore the job to a pristine state
896
959
  jobsDeletedByCheckpoints = set()
897
- for jobDescription in [desc for desc in jobDescriptionsReachableFromRoot.values() if isinstance(desc, CheckpointJobDescription)]:
960
+ for jobDescription in [
961
+ desc
962
+ for desc in jobDescriptionsReachableFromRoot.values()
963
+ if isinstance(desc, CheckpointJobDescription)
964
+ ]:
898
965
  if jobDescription.jobStoreID in jobsDeletedByCheckpoints:
899
966
  # This is a checkpoint that was nested within an
900
967
  # earlier checkpoint, so it and all its successors are
@@ -920,20 +987,23 @@ class AbstractJobStore(ABC):
920
987
  if len(jobDescription.filesToDelete) != 0:
921
988
  # Delete any files that should already be deleted
922
989
  for fileID in jobDescription.filesToDelete:
923
- logger.critical("Removing file in job store: %s that was "
924
- "marked for deletion but not previously removed" % fileID)
990
+ logger.critical(
991
+ "Removing file in job store: %s that was "
992
+ "marked for deletion but not previously removed" % fileID
993
+ )
925
994
  self.delete_file(fileID)
926
995
  jobDescription.filesToDelete = []
927
996
  changed[0] = True
928
997
 
929
- # For a job whose command is already executed, remove jobs from the
998
+ # For a job whose body has already executed, remove jobs from the
930
999
  # stack that are already deleted. This cleans up the case that the
931
1000
  # jobDescription had successors to run, but had not been updated to
932
1001
  # reflect this.
933
- if jobDescription.command is None:
1002
+ if not jobDescription.has_body():
934
1003
 
935
1004
  def stackSizeFn() -> int:
936
1005
  return len(list(jobDescription.allSuccessors()))
1006
+
937
1007
  startStackSize = stackSizeFn()
938
1008
  # Remove deleted jobs
939
1009
  jobDescription.filterSuccessors(haveJob)
@@ -959,16 +1029,25 @@ class AbstractJobStore(ABC):
959
1029
  assert isinstance(serviceJobDescription, ServiceJobDescription)
960
1030
 
961
1031
  if flag == 1:
962
- logger.debug("Recreating a start service flag for job: %s, flag: %s",
963
- jobStoreID, newFlag)
1032
+ logger.debug(
1033
+ "Recreating a start service flag for job: %s, flag: %s",
1034
+ jobStoreID,
1035
+ newFlag,
1036
+ )
964
1037
  serviceJobDescription.startJobStoreID = newFlag
965
1038
  elif flag == 2:
966
- logger.debug("Recreating a terminate service flag for job: %s, flag: %s",
967
- jobStoreID, newFlag)
1039
+ logger.debug(
1040
+ "Recreating a terminate service flag for job: %s, flag: %s",
1041
+ jobStoreID,
1042
+ newFlag,
1043
+ )
968
1044
  serviceJobDescription.terminateJobStoreID = newFlag
969
1045
  else:
970
- logger.debug("Recreating a error service flag for job: %s, flag: %s",
971
- jobStoreID, newFlag)
1046
+ logger.debug(
1047
+ "Recreating a error service flag for job: %s, flag: %s",
1048
+ jobStoreID,
1049
+ newFlag,
1050
+ )
972
1051
  assert flag == 3
973
1052
  serviceJobDescription.errorJobStoreID = newFlag
974
1053
 
@@ -981,6 +1060,7 @@ class AbstractJobStore(ABC):
981
1060
 
982
1061
  def servicesSizeFn() -> int:
983
1062
  return len(jobDescription.services)
1063
+
984
1064
  startServicesSize = servicesSizeFn()
985
1065
 
986
1066
  def replaceFlagsIfNeeded(serviceJobDescription: JobDescription) -> None:
@@ -1041,12 +1121,14 @@ class AbstractJobStore(ABC):
1041
1121
 
1042
1122
  # Remove any crufty stats/logging files from the previous run
1043
1123
  logger.debug("Discarding old statistics and logs...")
1124
+
1044
1125
  # We have to manually discard the stream to avoid getting
1045
1126
  # stuck on a blocking write from the job store.
1046
1127
  def discardStream(stream: Union[IO[bytes], IO[str]]) -> None:
1047
1128
  """Read the stream 4K at a time until EOF, discarding all input."""
1048
1129
  while len(stream.read(4096)) != 0:
1049
1130
  pass
1131
+
1050
1132
  self.read_logs(discardStream)
1051
1133
 
1052
1134
  logger.debug("Job store is clean")
@@ -1058,7 +1140,7 @@ class AbstractJobStore(ABC):
1058
1140
  # existence of jobs
1059
1141
  ##########################################
1060
1142
 
1061
- @deprecated(new_function_name='assign_job_id')
1143
+ @deprecated(new_function_name="assign_job_id")
1062
1144
  def assignID(self, jobDescription: JobDescription) -> None:
1063
1145
  return self.assign_job_id(jobDescription)
1064
1146
 
@@ -1082,7 +1164,7 @@ class AbstractJobStore(ABC):
1082
1164
  """
1083
1165
  yield
1084
1166
 
1085
- @deprecated(new_function_name='create_job')
1167
+ @deprecated(new_function_name="create_job")
1086
1168
  def create(self, jobDescription: JobDescription) -> JobDescription:
1087
1169
  return self.create_job(jobDescription)
1088
1170
 
@@ -1098,7 +1180,7 @@ class AbstractJobStore(ABC):
1098
1180
  """
1099
1181
  raise NotImplementedError()
1100
1182
 
1101
- @deprecated(new_function_name='job_exists')
1183
+ @deprecated(new_function_name="job_exists")
1102
1184
  def exists(self, jobStoreID: str) -> bool:
1103
1185
  return self.job_exists(jobStoreID)
1104
1186
 
@@ -1114,7 +1196,7 @@ class AbstractJobStore(ABC):
1114
1196
  # One year should be sufficient to finish any pipeline ;-)
1115
1197
  publicUrlExpiration = timedelta(days=365)
1116
1198
 
1117
- @deprecated(new_function_name='get_public_url')
1199
+ @deprecated(new_function_name="get_public_url")
1118
1200
  def getPublicUrl(self, fileName: str) -> str:
1119
1201
  return self.get_public_url(fileName)
1120
1202
 
@@ -1133,7 +1215,7 @@ class AbstractJobStore(ABC):
1133
1215
  """
1134
1216
  raise NotImplementedError()
1135
1217
 
1136
- @deprecated(new_function_name='get_shared_public_url')
1218
+ @deprecated(new_function_name="get_shared_public_url")
1137
1219
  def getSharedPublicUrl(self, sharedFileName: str) -> str:
1138
1220
  return self.get_shared_public_url(sharedFileName)
1139
1221
 
@@ -1155,7 +1237,7 @@ class AbstractJobStore(ABC):
1155
1237
  """
1156
1238
  raise NotImplementedError()
1157
1239
 
1158
- @deprecated(new_function_name='load_job')
1240
+ @deprecated(new_function_name="load_job")
1159
1241
  def load(self, jobStoreID: str) -> JobDescription:
1160
1242
  return self.load_job(jobStoreID)
1161
1243
 
@@ -1175,7 +1257,7 @@ class AbstractJobStore(ABC):
1175
1257
  """
1176
1258
  raise NotImplementedError()
1177
1259
 
1178
- @deprecated(new_function_name='update_job')
1260
+ @deprecated(new_function_name="update_job")
1179
1261
  def update(self, jobDescription: JobDescription) -> None:
1180
1262
  return self.update_job(jobDescription)
1181
1263
 
@@ -1190,7 +1272,7 @@ class AbstractJobStore(ABC):
1190
1272
  """
1191
1273
  raise NotImplementedError()
1192
1274
 
1193
- @deprecated(new_function_name='delete_job')
1275
+ @deprecated(new_function_name="delete_job")
1194
1276
  def delete(self, jobStoreID: str) -> None:
1195
1277
  return self.delete_job(jobStoreID)
1196
1278
 
@@ -1227,12 +1309,19 @@ class AbstractJobStore(ABC):
1227
1309
  # associated with a given job.
1228
1310
  ##########################################
1229
1311
 
1230
- @deprecated(new_function_name='write_file')
1231
- def writeFile(self, localFilePath: str, jobStoreID: Optional[str] = None, cleanup: bool = False) -> str:
1312
+ @deprecated(new_function_name="write_file")
1313
+ def writeFile(
1314
+ self,
1315
+ localFilePath: str,
1316
+ jobStoreID: Optional[str] = None,
1317
+ cleanup: bool = False,
1318
+ ) -> str:
1232
1319
  return self.write_file(localFilePath, jobStoreID, cleanup)
1233
1320
 
1234
1321
  @abstractmethod
1235
- def write_file(self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False) -> str:
1322
+ def write_file(
1323
+ self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False
1324
+ ) -> str:
1236
1325
  """
1237
1326
  Takes a file (as a path) and places it in this job store. Returns an ID that can be used
1238
1327
  to retrieve the file at a later time. The file is written in a atomic manner. It will
@@ -1263,19 +1352,27 @@ class AbstractJobStore(ABC):
1263
1352
  """
1264
1353
  raise NotImplementedError()
1265
1354
 
1266
- @deprecated(new_function_name='write_file_stream')
1267
- def writeFileStream(self, jobStoreID: Optional[str] = None, cleanup: bool = False, basename: Optional[str] = None,
1268
- encoding: Optional[str] = None, errors: Optional[str] = None) -> ContextManager[Tuple[IO[bytes], str]]:
1355
+ @deprecated(new_function_name="write_file_stream")
1356
+ def writeFileStream(
1357
+ self,
1358
+ jobStoreID: Optional[str] = None,
1359
+ cleanup: bool = False,
1360
+ basename: Optional[str] = None,
1361
+ encoding: Optional[str] = None,
1362
+ errors: Optional[str] = None,
1363
+ ) -> ContextManager[tuple[IO[bytes], str]]:
1269
1364
  return self.write_file_stream(jobStoreID, cleanup, basename, encoding, errors)
1270
1365
 
1271
1366
  @abstractmethod
1272
1367
  @contextmanager
1273
- def write_file_stream(self,
1274
- job_id: Optional[str] = None,
1275
- cleanup: bool = False,
1276
- basename: Optional[str] = None,
1277
- encoding: Optional[str] = None,
1278
- errors: Optional[str] = None) -> Iterator[Tuple[IO[bytes], str]]:
1368
+ def write_file_stream(
1369
+ self,
1370
+ job_id: Optional[str] = None,
1371
+ cleanup: bool = False,
1372
+ basename: Optional[str] = None,
1373
+ encoding: Optional[str] = None,
1374
+ errors: Optional[str] = None,
1375
+ ) -> Iterator[tuple[IO[bytes], str]]:
1279
1376
  """
1280
1377
  Similar to writeFile, but returns a context manager yielding a tuple of
1281
1378
  1) a file handle which can be written to and 2) the ID of the resulting
@@ -1314,18 +1411,22 @@ class AbstractJobStore(ABC):
1314
1411
  """
1315
1412
  raise NotImplementedError()
1316
1413
 
1317
- @deprecated(new_function_name='get_empty_file_store_id')
1318
- def getEmptyFileStoreID(self,
1319
- jobStoreID: Optional[str] = None,
1320
- cleanup: bool = False,
1321
- basename: Optional[str] = None) -> str:
1414
+ @deprecated(new_function_name="get_empty_file_store_id")
1415
+ def getEmptyFileStoreID(
1416
+ self,
1417
+ jobStoreID: Optional[str] = None,
1418
+ cleanup: bool = False,
1419
+ basename: Optional[str] = None,
1420
+ ) -> str:
1322
1421
  return self.get_empty_file_store_id(jobStoreID, cleanup, basename)
1323
1422
 
1324
1423
  @abstractmethod
1325
- def get_empty_file_store_id(self,
1326
- job_id: Optional[str] = None,
1327
- cleanup: bool = False,
1328
- basename: Optional[str] = None) -> str:
1424
+ def get_empty_file_store_id(
1425
+ self,
1426
+ job_id: Optional[str] = None,
1427
+ cleanup: bool = False,
1428
+ basename: Optional[str] = None,
1429
+ ) -> str:
1329
1430
  """
1330
1431
  Creates an empty file in the job store and returns its ID.
1331
1432
  Call to fileExists(getEmptyFileStoreID(jobStoreID)) will return True.
@@ -1347,8 +1448,10 @@ class AbstractJobStore(ABC):
1347
1448
  """
1348
1449
  raise NotImplementedError()
1349
1450
 
1350
- @deprecated(new_function_name='read_file')
1351
- def readFile(self, jobStoreFileID: str, localFilePath: str, symlink: bool = False) -> None:
1451
+ @deprecated(new_function_name="read_file")
1452
+ def readFile(
1453
+ self, jobStoreFileID: str, localFilePath: str, symlink: bool = False
1454
+ ) -> None:
1352
1455
  return self.read_file(jobStoreFileID, localFilePath, symlink)
1353
1456
 
1354
1457
  @abstractmethod
@@ -1376,7 +1479,7 @@ class AbstractJobStore(ABC):
1376
1479
  """
1377
1480
  raise NotImplementedError()
1378
1481
 
1379
- @deprecated(new_function_name='read_file_stream')
1482
+ @deprecated(new_function_name="read_file_stream")
1380
1483
  def readFileStream(
1381
1484
  self,
1382
1485
  jobStoreFileID: str,
@@ -1391,14 +1494,12 @@ class AbstractJobStore(ABC):
1391
1494
  file_id: Union[FileID, str],
1392
1495
  encoding: Literal[None] = None,
1393
1496
  errors: Optional[str] = None,
1394
- ) -> ContextManager[IO[bytes]]:
1395
- ...
1497
+ ) -> ContextManager[IO[bytes]]: ...
1396
1498
 
1397
1499
  @overload
1398
1500
  def read_file_stream(
1399
1501
  self, file_id: Union[FileID, str], encoding: str, errors: Optional[str] = None
1400
- ) -> ContextManager[IO[str]]:
1401
- ...
1502
+ ) -> ContextManager[IO[str]]: ...
1402
1503
 
1403
1504
  @abstractmethod
1404
1505
  def read_file_stream(
@@ -1424,7 +1525,7 @@ class AbstractJobStore(ABC):
1424
1525
  """
1425
1526
  raise NotImplementedError()
1426
1527
 
1427
- @deprecated(new_function_name='delete_file')
1528
+ @deprecated(new_function_name="delete_file")
1428
1529
  def deleteFile(self, jobStoreFileID: str) -> None:
1429
1530
  return self.delete_file(jobStoreFileID)
1430
1531
 
@@ -1438,7 +1539,7 @@ class AbstractJobStore(ABC):
1438
1539
  """
1439
1540
  raise NotImplementedError()
1440
1541
 
1441
- @deprecated(new_function_name='file_exists')
1542
+ @deprecated(new_function_name="file_exists")
1442
1543
  def fileExists(self, jobStoreFileID: str) -> bool:
1443
1544
  """Determine whether a file exists in this job store."""
1444
1545
  return self.file_exists(jobStoreFileID)
@@ -1452,7 +1553,7 @@ class AbstractJobStore(ABC):
1452
1553
  """
1453
1554
  raise NotImplementedError()
1454
1555
 
1455
- @deprecated(new_function_name='get_file_size')
1556
+ @deprecated(new_function_name="get_file_size")
1456
1557
  def getFileSize(self, jobStoreFileID: str) -> int:
1457
1558
  """Get the size of the given file in bytes."""
1458
1559
  return self.get_file_size(jobStoreFileID)
@@ -1472,7 +1573,7 @@ class AbstractJobStore(ABC):
1472
1573
  """
1473
1574
  raise NotImplementedError()
1474
1575
 
1475
- @deprecated(new_function_name='update_file')
1576
+ @deprecated(new_function_name="update_file")
1476
1577
  def updateFile(self, jobStoreFileID: str, localFilePath: str) -> None:
1477
1578
  """Replaces the existing version of a file in the job store."""
1478
1579
  return self.update_file(jobStoreFileID, localFilePath)
@@ -1493,19 +1594,20 @@ class AbstractJobStore(ABC):
1493
1594
  """
1494
1595
  raise NotImplementedError()
1495
1596
 
1496
- @deprecated(new_function_name='update_file_stream')
1497
- def updateFileStream(self,
1498
- jobStoreFileID: str,
1499
- encoding: Optional[str] = None,
1500
- errors: Optional[str] = None) -> ContextManager[IO[Any]]:
1597
+ @deprecated(new_function_name="update_file_stream")
1598
+ def updateFileStream(
1599
+ self,
1600
+ jobStoreFileID: str,
1601
+ encoding: Optional[str] = None,
1602
+ errors: Optional[str] = None,
1603
+ ) -> ContextManager[IO[Any]]:
1501
1604
  return self.update_file_stream(jobStoreFileID, encoding, errors)
1502
1605
 
1503
1606
  @abstractmethod
1504
1607
  @contextmanager
1505
- def update_file_stream(self,
1506
- file_id: str,
1507
- encoding: Optional[str] = None,
1508
- errors: Optional[str] = None) -> Iterator[IO[Any]]:
1608
+ def update_file_stream(
1609
+ self, file_id: str, encoding: Optional[str] = None, errors: Optional[str] = None
1610
+ ) -> Iterator[IO[Any]]:
1509
1611
  """
1510
1612
  Replaces the existing version of a file in the job store. Similar to writeFile, but
1511
1613
  returns a context manager yielding a file handle which can be written to. The
@@ -1531,20 +1633,29 @@ class AbstractJobStore(ABC):
1531
1633
  # with specific jobs.
1532
1634
  ##########################################
1533
1635
 
1534
- sharedFileNameRegex = re.compile(r'^[a-zA-Z0-9._-]+$')
1636
+ sharedFileNameRegex = re.compile(r"^[a-zA-Z0-9._-]+$")
1535
1637
 
1536
- @deprecated(new_function_name='write_shared_file_stream')
1537
- def writeSharedFileStream(self, sharedFileName: str, isProtected: Optional[bool] = None, encoding: Optional[str] = None,
1538
- errors: Optional[str] = None) -> ContextManager[IO[bytes]]:
1539
- return self.write_shared_file_stream(sharedFileName, isProtected, encoding, errors)
1638
+ @deprecated(new_function_name="write_shared_file_stream")
1639
+ def writeSharedFileStream(
1640
+ self,
1641
+ sharedFileName: str,
1642
+ isProtected: Optional[bool] = None,
1643
+ encoding: Optional[str] = None,
1644
+ errors: Optional[str] = None,
1645
+ ) -> ContextManager[IO[bytes]]:
1646
+ return self.write_shared_file_stream(
1647
+ sharedFileName, isProtected, encoding, errors
1648
+ )
1540
1649
 
1541
1650
  @abstractmethod
1542
1651
  @contextmanager
1543
- def write_shared_file_stream(self,
1544
- shared_file_name: str,
1545
- encrypted: Optional[bool] = None,
1546
- encoding: Optional[str] = None,
1547
- errors: Optional[str] = None) -> Iterator[IO[bytes]]:
1652
+ def write_shared_file_stream(
1653
+ self,
1654
+ shared_file_name: str,
1655
+ encrypted: Optional[bool] = None,
1656
+ encoding: Optional[str] = None,
1657
+ errors: Optional[str] = None,
1658
+ ) -> Iterator[IO[bytes]]:
1548
1659
  """
1549
1660
  Returns a context manager yielding a writable file handle to the global file referenced
1550
1661
  by the given name. File will be created in an atomic manner.
@@ -1569,19 +1680,23 @@ class AbstractJobStore(ABC):
1569
1680
  """
1570
1681
  raise NotImplementedError()
1571
1682
 
1572
- @deprecated(new_function_name='read_shared_file_stream')
1573
- def readSharedFileStream(self,
1574
- sharedFileName: str,
1575
- encoding: Optional[str] = None,
1576
- errors: Optional[str] = None) -> ContextManager[IO[bytes]]:
1683
+ @deprecated(new_function_name="read_shared_file_stream")
1684
+ def readSharedFileStream(
1685
+ self,
1686
+ sharedFileName: str,
1687
+ encoding: Optional[str] = None,
1688
+ errors: Optional[str] = None,
1689
+ ) -> ContextManager[IO[bytes]]:
1577
1690
  return self.read_shared_file_stream(sharedFileName, encoding, errors)
1578
1691
 
1579
1692
  @abstractmethod
1580
1693
  @contextmanager
1581
- def read_shared_file_stream(self,
1582
- shared_file_name: str,
1583
- encoding: Optional[str] = None,
1584
- errors: Optional[str] = None) -> Iterator[IO[bytes]]:
1694
+ def read_shared_file_stream(
1695
+ self,
1696
+ shared_file_name: str,
1697
+ encoding: Optional[str] = None,
1698
+ errors: Optional[str] = None,
1699
+ ) -> Iterator[IO[bytes]]:
1585
1700
  """
1586
1701
  Returns a context manager yielding a readable file handle to the global file referenced
1587
1702
  by the given name.
@@ -1600,7 +1715,7 @@ class AbstractJobStore(ABC):
1600
1715
  """
1601
1716
  raise NotImplementedError()
1602
1717
 
1603
- @deprecated(new_function_name='write_logs')
1718
+ @deprecated(new_function_name="write_logs")
1604
1719
  def writeStatsAndLogging(self, statsAndLoggingString: str) -> None:
1605
1720
  return self.write_logs(statsAndLoggingString)
1606
1721
 
@@ -1616,8 +1731,10 @@ class AbstractJobStore(ABC):
1616
1731
  """
1617
1732
  raise NotImplementedError()
1618
1733
 
1619
- @deprecated(new_function_name='read_logs')
1620
- def readStatsAndLogging(self, callback: Callable[..., Any], readAll: bool = False) -> int:
1734
+ @deprecated(new_function_name="read_logs")
1735
+ def readStatsAndLogging(
1736
+ self, callback: Callable[..., Any], readAll: bool = False
1737
+ ) -> int:
1621
1738
  return self.read_logs(callback, readAll)
1622
1739
 
1623
1740
  @abstractmethod
@@ -1652,8 +1769,8 @@ class AbstractJobStore(ABC):
1652
1769
  this method. Other methods will rely on always having the most current
1653
1770
  pid available. So far there is no reason to store any old pids.
1654
1771
  """
1655
- with self.write_shared_file_stream('pid.log') as f:
1656
- f.write(str(os.getpid()).encode('utf-8'))
1772
+ with self.write_shared_file_stream("pid.log") as f:
1773
+ f.write(str(os.getpid()).encode("utf-8"))
1657
1774
 
1658
1775
  def read_leader_pid(self) -> int:
1659
1776
  """
@@ -1661,7 +1778,7 @@ class AbstractJobStore(ABC):
1661
1778
 
1662
1779
  :raise NoSuchFileException: If the PID file doesn't exist.
1663
1780
  """
1664
- with self.read_shared_file_stream('pid.log') as f:
1781
+ with self.read_shared_file_stream("pid.log") as f:
1665
1782
  return int(f.read().strip())
1666
1783
 
1667
1784
  def write_leader_node_id(self) -> None:
@@ -1670,7 +1787,7 @@ class AbstractJobStore(ABC):
1670
1787
  by the leader.
1671
1788
  """
1672
1789
  with self.write_shared_file_stream("leader_node_id.log") as f:
1673
- f.write(getNodeID().encode('utf-8'))
1790
+ f.write(getNodeID().encode("utf-8"))
1674
1791
 
1675
1792
  def read_leader_node_id(self) -> str:
1676
1793
  """
@@ -1679,7 +1796,7 @@ class AbstractJobStore(ABC):
1679
1796
  :raise NoSuchFileException: If the node ID file doesn't exist.
1680
1797
  """
1681
1798
  with self.read_shared_file_stream("leader_node_id.log") as f:
1682
- return f.read().decode('utf-8').strip()
1799
+ return f.read().decode("utf-8").strip()
1683
1800
 
1684
1801
  def write_kill_flag(self, kill: bool = False) -> None:
1685
1802
  """
@@ -1692,7 +1809,7 @@ class AbstractJobStore(ABC):
1692
1809
  workers are expected to be cleaned up by the leader.
1693
1810
  """
1694
1811
  with self.write_shared_file_stream("_toil_kill_flag") as f:
1695
- f.write(("YES" if kill else "NO").encode('utf-8'))
1812
+ f.write(("YES" if kill else "NO").encode("utf-8"))
1696
1813
 
1697
1814
  def read_kill_flag(self) -> bool:
1698
1815
  """
@@ -1733,25 +1850,40 @@ class AbstractJobStore(ABC):
1733
1850
  if not cls._validateSharedFileName(sharedFileName):
1734
1851
  raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
1735
1852
 
1853
+
1736
1854
  class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1737
1855
  """
1738
1856
  A mostly fake JobStore to access URLs not really associated with real job
1739
1857
  stores.
1740
1858
  """
1741
1859
 
1860
+ @classmethod
1861
+ def _setup_ftp(cls) -> FtpFsAccess:
1862
+ # FTP connections are not reused. Ideally, a thread should watch any reused FTP connections
1863
+ # and close them when necessary
1864
+ return FtpFsAccess()
1865
+
1742
1866
  @classmethod
1743
1867
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
1744
- return url.scheme.lower() in ('http', 'https', 'ftp') and not export
1868
+ return url.scheme.lower() in ("http", "https", "ftp") and not export
1745
1869
 
1746
1870
  @classmethod
1747
1871
  def _url_exists(cls, url: ParseResult) -> bool:
1872
+ # Deal with FTP first to support user/password auth
1873
+ if url.scheme.lower() == "ftp":
1874
+ ftp = cls._setup_ftp()
1875
+ return ftp.exists(url.geturl())
1876
+
1748
1877
  try:
1749
- # TODO: Figure out how to HEAD instead of this.
1750
- with cls._open_url(url):
1878
+ with closing(urlopen(Request(url.geturl(), method="HEAD"))):
1751
1879
  return True
1752
- except:
1753
- pass
1754
- return False
1880
+ except HTTPError as e:
1881
+ if e.code in (404, 410):
1882
+ return False
1883
+ else:
1884
+ raise
1885
+ # Any other errors we should pass through because something really went
1886
+ # wrong (e.g. server is broken today but file may usually exist)
1755
1887
 
1756
1888
  @classmethod
1757
1889
  @retry(
@@ -1761,17 +1893,19 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1761
1893
  ]
1762
1894
  )
1763
1895
  def _get_size(cls, url: ParseResult) -> Optional[int]:
1764
- if url.scheme.lower() == 'ftp':
1765
- return None
1766
- with closing(urlopen(url.geturl())) as readable:
1767
- # just read the header for content length
1768
- size = readable.info().get('content-length')
1769
- return int(size) if size is not None else None
1896
+ if url.scheme.lower() == "ftp":
1897
+ ftp = cls._setup_ftp()
1898
+ return ftp.size(url.geturl())
1899
+
1900
+ # just read the header for content length
1901
+ resp = urlopen(Request(url.geturl(), method="HEAD"))
1902
+ size = resp.info().get("content-length")
1903
+ return int(size) if size is not None else None
1770
1904
 
1771
1905
  @classmethod
1772
1906
  def _read_from_url(
1773
1907
  cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1774
- ) -> Tuple[int, bool]:
1908
+ ) -> tuple[int, bool]:
1775
1909
  # We can't actually retry after we start writing.
1776
1910
  # TODO: Implement retry with byte range requests
1777
1911
  with cls._open_url(url) as readable:
@@ -1780,8 +1914,10 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1780
1914
  # nested function can modify it without creating its own
1781
1915
  # local with the same name.
1782
1916
  size = [0]
1917
+
1783
1918
  def count(l: int) -> None:
1784
1919
  size[0] += l
1920
+
1785
1921
  counter = WriteWatchingStream(writable)
1786
1922
  counter.onWrite(count)
1787
1923
 
@@ -1793,18 +1929,32 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1793
1929
  @retry(
1794
1930
  errors=[
1795
1931
  BadStatusLine,
1796
- ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1932
+ ErrorCondition(error=HTTPError, error_codes=[408, 429, 500, 502, 503]),
1797
1933
  ]
1798
1934
  )
1799
1935
  def _open_url(cls, url: ParseResult) -> IO[bytes]:
1936
+ # Deal with FTP first so we support user/password auth
1937
+ if url.scheme.lower() == "ftp":
1938
+ ftp = cls._setup_ftp()
1939
+ # we open in read mode as write mode is not supported
1940
+ return ftp.open(url.geturl(), mode="r")
1941
+
1800
1942
  try:
1801
1943
  return cast(IO[bytes], closing(urlopen(url.geturl())))
1802
1944
  except HTTPError as e:
1803
- if e.code == 404:
1945
+ if e.code in (404, 410):
1804
1946
  # Translate into a FileNotFoundError for detecting
1805
- # un-importable files
1947
+ # known nonexistent files
1806
1948
  raise FileNotFoundError(str(url)) from e
1807
1949
  else:
1950
+ # Other codes indicate a real problem with the server; we don't
1951
+ # want to e.g. run a workflow without an optional input that
1952
+ # the user specified a path to just because the server was
1953
+ # busy.
1954
+
1955
+ # Sometimes we expect to see this when polling existence for
1956
+ # inputs at guessed paths, so don't complain *too* loudly here.
1957
+ logger.debug("Unusual status %d for URL %s", e.code, str(url))
1808
1958
  raise
1809
1959
 
1810
1960
  @classmethod
@@ -1813,6 +1963,6 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1813
1963
  return False
1814
1964
 
1815
1965
  @classmethod
1816
- def _list_url(cls, url: ParseResult) -> List[str]:
1966
+ def _list_url(cls, url: ParseResult) -> list[str]:
1817
1967
  # TODO: Implement HTTP index parsing and FTP directory listing
1818
1968
  raise NotImplementedError("HTTP and FTP URLs cannot yet be listed")