toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -16,45 +16,39 @@ import os
|
|
|
16
16
|
import pickle
|
|
17
17
|
import re
|
|
18
18
|
import shutil
|
|
19
|
-
import sys
|
|
20
19
|
from abc import ABC, ABCMeta, abstractmethod
|
|
20
|
+
from collections.abc import Iterator, ValuesView
|
|
21
21
|
from contextlib import closing, contextmanager
|
|
22
22
|
from datetime import timedelta
|
|
23
23
|
from http.client import BadStatusLine
|
|
24
|
-
from typing import (
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
Union,
|
|
37
|
-
ValuesView,
|
|
38
|
-
cast,
|
|
39
|
-
overload)
|
|
40
|
-
|
|
41
|
-
if sys.version_info >= (3, 8):
|
|
42
|
-
from typing import Literal
|
|
43
|
-
else:
|
|
44
|
-
from typing_extensions import Literal
|
|
45
|
-
|
|
24
|
+
from typing import (
|
|
25
|
+
IO,
|
|
26
|
+
TYPE_CHECKING,
|
|
27
|
+
Any,
|
|
28
|
+
Callable,
|
|
29
|
+
ContextManager,
|
|
30
|
+
Literal,
|
|
31
|
+
Optional,
|
|
32
|
+
Union,
|
|
33
|
+
cast,
|
|
34
|
+
overload,
|
|
35
|
+
)
|
|
46
36
|
from urllib.error import HTTPError
|
|
47
37
|
from urllib.parse import ParseResult, urlparse
|
|
48
|
-
from urllib.request import urlopen
|
|
38
|
+
from urllib.request import urlopen, Request
|
|
49
39
|
from uuid import uuid4
|
|
50
40
|
|
|
51
41
|
from toil.common import Config, getNodeID, safeUnpickleFromStream
|
|
52
42
|
from toil.fileStores import FileID
|
|
53
|
-
from toil.job import (
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
43
|
+
from toil.job import (
|
|
44
|
+
CheckpointJobDescription,
|
|
45
|
+
JobDescription,
|
|
46
|
+
JobException,
|
|
47
|
+
ServiceJobDescription,
|
|
48
|
+
)
|
|
49
|
+
from toil.lib.ftp_utils import FtpFsAccess
|
|
57
50
|
from toil.lib.compatibility import deprecated
|
|
51
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
58
52
|
from toil.lib.io import WriteWatchingStream
|
|
59
53
|
from toil.lib.memoize import memoize
|
|
60
54
|
from toil.lib.retry import ErrorCondition, retry
|
|
@@ -67,10 +61,22 @@ logger = logging.getLogger(__name__)
|
|
|
67
61
|
try:
|
|
68
62
|
from botocore.exceptions import ProxyConnectionError
|
|
69
63
|
except ImportError:
|
|
64
|
+
|
|
70
65
|
class ProxyConnectionError(BaseException): # type: ignore
|
|
71
66
|
"""Dummy class."""
|
|
72
67
|
|
|
73
68
|
|
|
69
|
+
class LocatorException(Exception):
|
|
70
|
+
"""
|
|
71
|
+
Base exception class for all locator exceptions.
|
|
72
|
+
For example, job store/aws bucket exceptions where they already exist
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, error_msg: str, locator: str, prefix: Optional[str] = None):
|
|
76
|
+
full_locator = locator if prefix is None else f"{prefix}:{locator}"
|
|
77
|
+
super().__init__(error_msg % full_locator)
|
|
78
|
+
|
|
79
|
+
|
|
74
80
|
class InvalidImportExportUrlException(Exception):
|
|
75
81
|
def __init__(self, url: ParseResult) -> None:
|
|
76
82
|
"""
|
|
@@ -78,24 +84,10 @@ class InvalidImportExportUrlException(Exception):
|
|
|
78
84
|
"""
|
|
79
85
|
super().__init__("The URL '%s' is invalid." % url.geturl())
|
|
80
86
|
|
|
81
|
-
class UnimplementedURLException(RuntimeError):
|
|
82
|
-
def __init__(self, url: ParseResult, operation: str) -> None:
|
|
83
|
-
"""
|
|
84
|
-
Make a new exception to report that a URL scheme is not implemented, or
|
|
85
|
-
that the implementation can't be loaded because its dependencies are
|
|
86
|
-
not installed.
|
|
87
|
-
|
|
88
|
-
:param url: The given URL
|
|
89
|
-
:param operation: Whether we are trying to 'import' or 'export'
|
|
90
|
-
"""
|
|
91
|
-
super().__init__(
|
|
92
|
-
f"No available job store implementation can {operation} the URL "
|
|
93
|
-
f"'{url.geturl()}'. Ensure Toil has been installed "
|
|
94
|
-
f"with the appropriate extras."
|
|
95
|
-
)
|
|
96
87
|
|
|
97
88
|
class NoSuchJobException(Exception):
|
|
98
89
|
"""Indicates that the specified job does not exist."""
|
|
90
|
+
|
|
99
91
|
def __init__(self, jobStoreID: FileID):
|
|
100
92
|
"""
|
|
101
93
|
:param str jobStoreID: the jobStoreID that was mistakenly assumed to exist
|
|
@@ -105,17 +97,21 @@ class NoSuchJobException(Exception):
|
|
|
105
97
|
|
|
106
98
|
class ConcurrentFileModificationException(Exception):
|
|
107
99
|
"""Indicates that the file was attempted to be modified by multiple processes at once."""
|
|
100
|
+
|
|
108
101
|
def __init__(self, jobStoreFileID: FileID):
|
|
109
102
|
"""
|
|
110
103
|
:param jobStoreFileID: the ID of the file that was modified by multiple workers
|
|
111
104
|
or processes concurrently
|
|
112
105
|
"""
|
|
113
|
-
super().__init__(
|
|
106
|
+
super().__init__("Concurrent update to file %s detected." % jobStoreFileID)
|
|
114
107
|
|
|
115
108
|
|
|
116
109
|
class NoSuchFileException(Exception):
|
|
117
110
|
"""Indicates that the specified file does not exist."""
|
|
118
|
-
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any
|
|
114
|
+
):
|
|
119
115
|
"""
|
|
120
116
|
:param jobStoreFileID: the ID of the file that was mistakenly assumed to exist
|
|
121
117
|
:param customName: optionally, an alternate name for the nonexistent file
|
|
@@ -136,24 +132,33 @@ class NoSuchFileException(Exception):
|
|
|
136
132
|
super().__init__(message)
|
|
137
133
|
|
|
138
134
|
|
|
139
|
-
class NoSuchJobStoreException(
|
|
135
|
+
class NoSuchJobStoreException(LocatorException):
|
|
140
136
|
"""Indicates that the specified job store does not exist."""
|
|
141
|
-
|
|
137
|
+
|
|
138
|
+
def __init__(self, locator: str, prefix: str):
|
|
142
139
|
"""
|
|
143
140
|
:param str locator: The location of the job store
|
|
144
141
|
"""
|
|
145
|
-
super().__init__(
|
|
142
|
+
super().__init__(
|
|
143
|
+
"The job store '%s' does not exist, so there is nothing to restart.",
|
|
144
|
+
locator,
|
|
145
|
+
prefix,
|
|
146
|
+
)
|
|
146
147
|
|
|
147
148
|
|
|
148
|
-
class JobStoreExistsException(
|
|
149
|
+
class JobStoreExistsException(LocatorException):
|
|
149
150
|
"""Indicates that the specified job store already exists."""
|
|
150
|
-
|
|
151
|
+
|
|
152
|
+
def __init__(self, locator: str, prefix: str):
|
|
151
153
|
"""
|
|
152
154
|
:param str locator: The location of the job store
|
|
153
155
|
"""
|
|
154
156
|
super().__init__(
|
|
155
157
|
"The job store '%s' already exists. Use --restart to resume the workflow, or remove "
|
|
156
|
-
"the job store with 'toil clean' to start the workflow from scratch."
|
|
158
|
+
"the job store with 'toil clean' to start the workflow from scratch.",
|
|
159
|
+
locator,
|
|
160
|
+
prefix,
|
|
161
|
+
)
|
|
157
162
|
|
|
158
163
|
|
|
159
164
|
class AbstractJobStore(ABC):
|
|
@@ -205,7 +210,7 @@ class AbstractJobStore(ABC):
|
|
|
205
210
|
self.__config = config
|
|
206
211
|
self.write_config()
|
|
207
212
|
|
|
208
|
-
@deprecated(new_function_name=
|
|
213
|
+
@deprecated(new_function_name="write_config")
|
|
209
214
|
def writeConfig(self) -> None:
|
|
210
215
|
return self.write_config()
|
|
211
216
|
|
|
@@ -214,7 +219,9 @@ class AbstractJobStore(ABC):
|
|
|
214
219
|
Persists the value of the :attr:`AbstractJobStore.config` attribute to the
|
|
215
220
|
job store, so that it can be retrieved later by other instances of this class.
|
|
216
221
|
"""
|
|
217
|
-
with self.write_shared_file_stream(
|
|
222
|
+
with self.write_shared_file_stream(
|
|
223
|
+
"config.pickle", encrypted=False
|
|
224
|
+
) as fileHandle:
|
|
218
225
|
pickle.dump(self.__config, fileHandle, pickle.HIGHEST_PROTOCOL)
|
|
219
226
|
|
|
220
227
|
def resume(self) -> None:
|
|
@@ -224,7 +231,7 @@ class AbstractJobStore(ABC):
|
|
|
224
231
|
|
|
225
232
|
:raises NoSuchJobStoreException: if the physical storage for this job store doesn't exist
|
|
226
233
|
"""
|
|
227
|
-
with self.read_shared_file_stream(
|
|
234
|
+
with self.read_shared_file_stream("config.pickle") as fileHandle:
|
|
228
235
|
config = safeUnpickleFromStream(fileHandle)
|
|
229
236
|
assert config.workflowID is not None
|
|
230
237
|
self.__config = config
|
|
@@ -242,9 +249,9 @@ class AbstractJobStore(ABC):
|
|
|
242
249
|
"""
|
|
243
250
|
return self.__locator
|
|
244
251
|
|
|
245
|
-
rootJobStoreIDFileName =
|
|
252
|
+
rootJobStoreIDFileName = "rootJobStoreID"
|
|
246
253
|
|
|
247
|
-
@deprecated(new_function_name=
|
|
254
|
+
@deprecated(new_function_name="set_root_job")
|
|
248
255
|
def setRootJob(self, rootJobStoreID: FileID) -> None:
|
|
249
256
|
"""Set the root job of the workflow backed by this job store."""
|
|
250
257
|
return self.set_root_job(rootJobStoreID)
|
|
@@ -256,9 +263,9 @@ class AbstractJobStore(ABC):
|
|
|
256
263
|
:param job_id: The ID of the job to set as root
|
|
257
264
|
"""
|
|
258
265
|
with self.write_shared_file_stream(self.rootJobStoreIDFileName) as f:
|
|
259
|
-
f.write(job_id.encode(
|
|
266
|
+
f.write(job_id.encode("utf-8"))
|
|
260
267
|
|
|
261
|
-
@deprecated(new_function_name=
|
|
268
|
+
@deprecated(new_function_name="load_root_job")
|
|
262
269
|
def loadRootJob(self) -> JobDescription:
|
|
263
270
|
return self.load_root_job()
|
|
264
271
|
|
|
@@ -273,16 +280,18 @@ class AbstractJobStore(ABC):
|
|
|
273
280
|
"""
|
|
274
281
|
try:
|
|
275
282
|
with self.read_shared_file_stream(self.rootJobStoreIDFileName) as f:
|
|
276
|
-
rootJobStoreID = f.read().decode(
|
|
283
|
+
rootJobStoreID = f.read().decode("utf-8")
|
|
277
284
|
except NoSuchFileException:
|
|
278
|
-
raise JobException(
|
|
285
|
+
raise JobException("No job has been set as the root in this job store")
|
|
279
286
|
if not self.job_exists(rootJobStoreID):
|
|
280
|
-
raise JobException(
|
|
281
|
-
|
|
287
|
+
raise JobException(
|
|
288
|
+
"The root job '%s' doesn't exist. Either the Toil workflow "
|
|
289
|
+
"is finished or has never been started" % rootJobStoreID
|
|
290
|
+
)
|
|
282
291
|
return self.load_job(rootJobStoreID)
|
|
283
292
|
|
|
284
293
|
# FIXME: This is only used in tests, why do we have it?
|
|
285
|
-
@deprecated(new_function_name=
|
|
294
|
+
@deprecated(new_function_name="create_root_job")
|
|
286
295
|
def createRootJob(self, desc: JobDescription) -> JobDescription:
|
|
287
296
|
return self.create_root_job(desc)
|
|
288
297
|
|
|
@@ -299,7 +308,7 @@ class AbstractJobStore(ABC):
|
|
|
299
308
|
self.set_root_job(job_description.jobStoreID)
|
|
300
309
|
return job_description
|
|
301
310
|
|
|
302
|
-
@deprecated(new_function_name=
|
|
311
|
+
@deprecated(new_function_name="get_root_job_return_value")
|
|
303
312
|
def getRootJobReturnValue(self) -> Any:
|
|
304
313
|
return self.get_root_job_return_value()
|
|
305
314
|
|
|
@@ -310,12 +319,12 @@ class AbstractJobStore(ABC):
|
|
|
310
319
|
Raises an exception if the root job hasn't fulfilled its promise yet.
|
|
311
320
|
"""
|
|
312
321
|
# Parse out the return value from the root job
|
|
313
|
-
with self.read_shared_file_stream(
|
|
322
|
+
with self.read_shared_file_stream("rootJobReturnValue") as fH:
|
|
314
323
|
return safeUnpickleFromStream(fH)
|
|
315
324
|
|
|
316
325
|
@staticmethod
|
|
317
326
|
@memoize
|
|
318
|
-
def _get_job_store_classes() ->
|
|
327
|
+
def _get_job_store_classes() -> list["AbstractJobStore"]:
|
|
319
328
|
"""
|
|
320
329
|
A list of concrete AbstractJobStore implementations whose dependencies are installed.
|
|
321
330
|
|
|
@@ -325,23 +334,30 @@ class AbstractJobStore(ABC):
|
|
|
325
334
|
"toil.jobStores.fileJobStore.FileJobStore",
|
|
326
335
|
"toil.jobStores.googleJobStore.GoogleJobStore",
|
|
327
336
|
"toil.jobStores.aws.jobStore.AWSJobStore",
|
|
328
|
-
"toil.jobStores.abstractJobStore.JobStoreSupport"
|
|
337
|
+
"toil.jobStores.abstractJobStore.JobStoreSupport",
|
|
338
|
+
)
|
|
329
339
|
jobStoreClasses = []
|
|
330
340
|
for className in jobStoreClassNames:
|
|
331
|
-
moduleName, className = className.rsplit(
|
|
341
|
+
moduleName, className = className.rsplit(".", 1)
|
|
332
342
|
from importlib import import_module
|
|
343
|
+
|
|
333
344
|
try:
|
|
334
345
|
module = import_module(moduleName)
|
|
335
346
|
except (ImportError, ProxyConnectionError):
|
|
336
|
-
logger.debug(
|
|
337
|
-
|
|
347
|
+
logger.debug(
|
|
348
|
+
"Unable to import '%s' as is expected if the corresponding extra was "
|
|
349
|
+
"omitted at installation time.",
|
|
350
|
+
moduleName,
|
|
351
|
+
)
|
|
338
352
|
else:
|
|
339
353
|
jobStoreClass = getattr(module, className)
|
|
340
354
|
jobStoreClasses.append(jobStoreClass)
|
|
341
355
|
return jobStoreClasses
|
|
342
356
|
|
|
343
357
|
@classmethod
|
|
344
|
-
def _findJobStoreForUrl(
|
|
358
|
+
def _findJobStoreForUrl(
|
|
359
|
+
cls, url: ParseResult, export: bool = False
|
|
360
|
+
) -> "AbstractJobStore":
|
|
345
361
|
"""
|
|
346
362
|
Returns the AbstractJobStore subclass that supports the given URL.
|
|
347
363
|
|
|
@@ -360,46 +376,58 @@ class AbstractJobStore(ABC):
|
|
|
360
376
|
# returns a file ID. Explain this to MyPy.
|
|
361
377
|
|
|
362
378
|
@overload
|
|
363
|
-
def importFile(
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
379
|
+
def importFile(
|
|
380
|
+
self,
|
|
381
|
+
srcUrl: str,
|
|
382
|
+
sharedFileName: str,
|
|
383
|
+
hardlink: bool = False,
|
|
384
|
+
symlink: bool = True,
|
|
385
|
+
) -> None: ...
|
|
368
386
|
|
|
369
387
|
@overload
|
|
370
|
-
def importFile(
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
388
|
+
def importFile(
|
|
389
|
+
self,
|
|
390
|
+
srcUrl: str,
|
|
391
|
+
sharedFileName: None = None,
|
|
392
|
+
hardlink: bool = False,
|
|
393
|
+
symlink: bool = True,
|
|
394
|
+
) -> FileID: ...
|
|
395
|
+
|
|
396
|
+
@deprecated(new_function_name="import_file")
|
|
397
|
+
def importFile(
|
|
398
|
+
self,
|
|
399
|
+
srcUrl: str,
|
|
400
|
+
sharedFileName: Optional[str] = None,
|
|
401
|
+
hardlink: bool = False,
|
|
402
|
+
symlink: bool = True,
|
|
403
|
+
) -> Optional[FileID]:
|
|
382
404
|
return self.import_file(srcUrl, sharedFileName, hardlink, symlink)
|
|
383
405
|
|
|
384
406
|
@overload
|
|
385
|
-
def import_file(
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
407
|
+
def import_file(
|
|
408
|
+
self,
|
|
409
|
+
src_uri: str,
|
|
410
|
+
shared_file_name: str,
|
|
411
|
+
hardlink: bool = False,
|
|
412
|
+
symlink: bool = True,
|
|
413
|
+
) -> None: ...
|
|
390
414
|
|
|
391
415
|
@overload
|
|
392
|
-
def import_file(
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
416
|
+
def import_file(
|
|
417
|
+
self,
|
|
418
|
+
src_uri: str,
|
|
419
|
+
shared_file_name: None = None,
|
|
420
|
+
hardlink: bool = False,
|
|
421
|
+
symlink: bool = True,
|
|
422
|
+
) -> FileID: ...
|
|
423
|
+
|
|
424
|
+
def import_file(
|
|
425
|
+
self,
|
|
426
|
+
src_uri: str,
|
|
427
|
+
shared_file_name: Optional[str] = None,
|
|
428
|
+
hardlink: bool = False,
|
|
429
|
+
symlink: bool = True,
|
|
430
|
+
) -> Optional[FileID]:
|
|
403
431
|
"""
|
|
404
432
|
Imports the file at the given URL into job store. The ID of the newly imported file is
|
|
405
433
|
returned. If the name of a shared file name is provided, the file will be imported as
|
|
@@ -437,18 +465,23 @@ class AbstractJobStore(ABC):
|
|
|
437
465
|
# subclasses of AbstractJobStore.
|
|
438
466
|
parseResult = urlparse(src_uri)
|
|
439
467
|
otherCls = self._findJobStoreForUrl(parseResult)
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
468
|
+
logger.info("Importing input %s...", src_uri)
|
|
469
|
+
return self._import_file(
|
|
470
|
+
otherCls,
|
|
471
|
+
parseResult,
|
|
472
|
+
shared_file_name=shared_file_name,
|
|
473
|
+
hardlink=hardlink,
|
|
474
|
+
symlink=symlink,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
def _import_file(
|
|
478
|
+
self,
|
|
479
|
+
otherCls: "AbstractJobStore",
|
|
480
|
+
uri: ParseResult,
|
|
481
|
+
shared_file_name: Optional[str] = None,
|
|
482
|
+
hardlink: bool = False,
|
|
483
|
+
symlink: bool = True,
|
|
484
|
+
) -> Optional[FileID]:
|
|
452
485
|
"""
|
|
453
486
|
Import the file at the given URL using the given job store class to retrieve that file.
|
|
454
487
|
See also :meth:`.importFile`. This method applies a generic approach to importing: it
|
|
@@ -478,7 +511,7 @@ class AbstractJobStore(ABC):
|
|
|
478
511
|
otherCls._read_from_url(uri, writable)
|
|
479
512
|
return None
|
|
480
513
|
|
|
481
|
-
@deprecated(new_function_name=
|
|
514
|
+
@deprecated(new_function_name="export_file")
|
|
482
515
|
def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
|
|
483
516
|
return self.export_file(jobStoreFileID, dstUrl)
|
|
484
517
|
|
|
@@ -497,13 +530,17 @@ class AbstractJobStore(ABC):
|
|
|
497
530
|
:param str file_id: The id of the file in the job store that should be exported.
|
|
498
531
|
|
|
499
532
|
:param str dst_uri: URL that points to a file or object in the storage mechanism of a
|
|
500
|
-
supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
533
|
+
supported URL scheme e.g. a blob in an AWS s3 bucket. May also be a local path.
|
|
501
534
|
"""
|
|
535
|
+
from toil.common import Toil
|
|
536
|
+
dst_uri = Toil.normalize_uri(dst_uri)
|
|
502
537
|
parseResult = urlparse(dst_uri)
|
|
503
538
|
otherCls = self._findJobStoreForUrl(parseResult, export=True)
|
|
504
539
|
self._export_file(otherCls, file_id, parseResult)
|
|
505
540
|
|
|
506
|
-
def _export_file(
|
|
541
|
+
def _export_file(
|
|
542
|
+
self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
|
|
543
|
+
) -> None:
|
|
507
544
|
"""
|
|
508
545
|
Refer to exportFile docstring for information about this method.
|
|
509
546
|
|
|
@@ -518,7 +555,9 @@ class AbstractJobStore(ABC):
|
|
|
518
555
|
"""
|
|
519
556
|
self._default_export_file(otherCls, jobStoreFileID, url)
|
|
520
557
|
|
|
521
|
-
def _default_export_file(
|
|
558
|
+
def _default_export_file(
|
|
559
|
+
self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
|
|
560
|
+
) -> None:
|
|
522
561
|
"""
|
|
523
562
|
Refer to exportFile docstring for information about this method.
|
|
524
563
|
|
|
@@ -533,7 +572,7 @@ class AbstractJobStore(ABC):
|
|
|
533
572
|
"""
|
|
534
573
|
executable = False
|
|
535
574
|
with self.read_file_stream(jobStoreFileID) as readable:
|
|
536
|
-
if getattr(jobStoreFileID,
|
|
575
|
+
if getattr(jobStoreFileID, "executable", False):
|
|
537
576
|
executable = jobStoreFileID.executable
|
|
538
577
|
otherCls._write_to_url(readable, url, executable)
|
|
539
578
|
|
|
@@ -542,6 +581,8 @@ class AbstractJobStore(ABC):
|
|
|
542
581
|
"""
|
|
543
582
|
Return True if the file at the given URI exists, and False otherwise.
|
|
544
583
|
|
|
584
|
+
May raise an error if file existence cannot be determined.
|
|
585
|
+
|
|
545
586
|
:param src_uri: URL that points to a file or object in the storage
|
|
546
587
|
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
547
588
|
"""
|
|
@@ -572,7 +613,7 @@ class AbstractJobStore(ABC):
|
|
|
572
613
|
return otherCls._get_is_directory(parseResult)
|
|
573
614
|
|
|
574
615
|
@classmethod
|
|
575
|
-
def list_url(cls, src_uri: str) ->
|
|
616
|
+
def list_url(cls, src_uri: str) -> list[str]:
|
|
576
617
|
"""
|
|
577
618
|
List the directory at the given URL. Returned path components can be
|
|
578
619
|
joined with '/' onto the passed URL to form new URLs. Those that end in
|
|
@@ -597,7 +638,7 @@ class AbstractJobStore(ABC):
|
|
|
597
638
|
return otherCls._list_url(parseResult)
|
|
598
639
|
|
|
599
640
|
@classmethod
|
|
600
|
-
def read_from_url(cls, src_uri: str, writable: IO[bytes]) ->
|
|
641
|
+
def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
|
|
601
642
|
"""
|
|
602
643
|
Read the given URL and write its content into the given writable stream.
|
|
603
644
|
|
|
@@ -628,6 +669,8 @@ class AbstractJobStore(ABC):
|
|
|
628
669
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
629
670
|
"""
|
|
630
671
|
Return True if the item at the given URL exists, and Flase otherwise.
|
|
672
|
+
|
|
673
|
+
May raise an error if file existence cannot be determined.
|
|
631
674
|
"""
|
|
632
675
|
raise NotImplementedError(f"No implementation for {url}")
|
|
633
676
|
|
|
@@ -655,7 +698,7 @@ class AbstractJobStore(ABC):
|
|
|
655
698
|
|
|
656
699
|
@classmethod
|
|
657
700
|
@abstractmethod
|
|
658
|
-
def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) ->
|
|
701
|
+
def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
|
|
659
702
|
"""
|
|
660
703
|
Reads the contents of the object at the specified location and writes it to the given
|
|
661
704
|
writable stream.
|
|
@@ -675,7 +718,7 @@ class AbstractJobStore(ABC):
|
|
|
675
718
|
|
|
676
719
|
@classmethod
|
|
677
720
|
@abstractmethod
|
|
678
|
-
def _list_url(cls, url: ParseResult) ->
|
|
721
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
679
722
|
"""
|
|
680
723
|
List the contents of the given URL, which may or may not end in '/'
|
|
681
724
|
|
|
@@ -707,7 +750,12 @@ class AbstractJobStore(ABC):
|
|
|
707
750
|
|
|
708
751
|
@classmethod
|
|
709
752
|
@abstractmethod
|
|
710
|
-
def _write_to_url(
|
|
753
|
+
def _write_to_url(
|
|
754
|
+
cls,
|
|
755
|
+
readable: Union[IO[bytes], IO[str]],
|
|
756
|
+
url: ParseResult,
|
|
757
|
+
executable: bool = False,
|
|
758
|
+
) -> None:
|
|
711
759
|
"""
|
|
712
760
|
Reads the contents of the given readable stream and writes it to the object at the
|
|
713
761
|
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
@@ -754,11 +802,11 @@ class AbstractJobStore(ABC):
|
|
|
754
802
|
"""
|
|
755
803
|
raise NotImplementedError()
|
|
756
804
|
|
|
757
|
-
@deprecated(new_function_name=
|
|
758
|
-
def getEnv(self) ->
|
|
805
|
+
@deprecated(new_function_name="get_env")
|
|
806
|
+
def getEnv(self) -> dict[str, str]:
|
|
759
807
|
return self.get_env()
|
|
760
808
|
|
|
761
|
-
def get_env(self) ->
|
|
809
|
+
def get_env(self) -> dict[str, str]:
|
|
762
810
|
"""
|
|
763
811
|
Returns a dictionary of environment variables that this job store requires to be set in
|
|
764
812
|
order to function properly on a worker.
|
|
@@ -769,7 +817,7 @@ class AbstractJobStore(ABC):
|
|
|
769
817
|
|
|
770
818
|
# Cleanup functions
|
|
771
819
|
def clean(
|
|
772
|
-
self, jobCache: Optional[
|
|
820
|
+
self, jobCache: Optional[dict[Union[str, "TemporaryID"], JobDescription]] = None
|
|
773
821
|
) -> JobDescription:
|
|
774
822
|
"""
|
|
775
823
|
Function to cleanup the state of a job store after a restart.
|
|
@@ -797,7 +845,9 @@ class AbstractJobStore(ABC):
|
|
|
797
845
|
return self.load_job(jobId)
|
|
798
846
|
|
|
799
847
|
def haveJob(jobId: str) -> bool:
|
|
800
|
-
assert
|
|
848
|
+
assert (
|
|
849
|
+
len(jobId) > 1
|
|
850
|
+
), f"Job ID {jobId} too short; is a string being used as a list?"
|
|
801
851
|
if jobCache is not None:
|
|
802
852
|
if jobId in jobCache:
|
|
803
853
|
return True
|
|
@@ -817,13 +867,15 @@ class AbstractJobStore(ABC):
|
|
|
817
867
|
jobCache[str(jobDescription.jobStoreID)] = jobDescription
|
|
818
868
|
self.update_job(jobDescription)
|
|
819
869
|
|
|
820
|
-
def getJobDescriptions() ->
|
|
870
|
+
def getJobDescriptions() -> (
|
|
871
|
+
Union[ValuesView[JobDescription], Iterator[JobDescription]]
|
|
872
|
+
):
|
|
821
873
|
if jobCache is not None:
|
|
822
874
|
return jobCache.values()
|
|
823
875
|
else:
|
|
824
876
|
return self.jobs()
|
|
825
877
|
|
|
826
|
-
def get_jobs_reachable_from_root() ->
|
|
878
|
+
def get_jobs_reachable_from_root() -> set[str]:
|
|
827
879
|
"""
|
|
828
880
|
Traverse the job graph from the root job and return a flattened set of all active jobstore IDs.
|
|
829
881
|
|
|
@@ -833,18 +885,17 @@ class AbstractJobStore(ABC):
|
|
|
833
885
|
# Iterate from the root JobDescription and collate all jobs
|
|
834
886
|
# that are reachable from it.
|
|
835
887
|
root_job_description = self.load_root_job()
|
|
836
|
-
reachable_from_root:
|
|
888
|
+
reachable_from_root: set[str] = set()
|
|
837
889
|
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
# add all of root's linked service jobs as well
|
|
841
|
-
for service_jobstore_id in root_job_description.services:
|
|
842
|
-
if haveJob(service_jobstore_id):
|
|
843
|
-
reachable_from_root.add(service_jobstore_id)
|
|
844
|
-
for merged_jobstore_id in root_job_description.merged_jobs:
|
|
890
|
+
for merged_in in root_job_description.get_chain():
|
|
891
|
+
# Add the job itself and any other jobs that chained with it.
|
|
845
892
|
# Keep merged-in jobs around themselves, but don't bother
|
|
846
893
|
# exploring them, since we took their successors.
|
|
847
|
-
reachable_from_root.add(
|
|
894
|
+
reachable_from_root.add(merged_in.job_store_id)
|
|
895
|
+
# add all of root's linked service jobs as well
|
|
896
|
+
for service_job_store_id in root_job_description.services:
|
|
897
|
+
if haveJob(service_job_store_id):
|
|
898
|
+
reachable_from_root.add(service_job_store_id)
|
|
848
899
|
|
|
849
900
|
# Unprocessed means it might have successor jobs we need to add.
|
|
850
901
|
unprocessed_job_descriptions = [root_job_description]
|
|
@@ -852,24 +903,30 @@ class AbstractJobStore(ABC):
|
|
|
852
903
|
while unprocessed_job_descriptions:
|
|
853
904
|
new_job_descriptions_to_process = [] # Reset.
|
|
854
905
|
for job_description in unprocessed_job_descriptions:
|
|
855
|
-
for
|
|
856
|
-
|
|
857
|
-
successor_job_description = getJobDescription(successor_jobstore_id)
|
|
858
|
-
|
|
859
|
-
# Add each successor job.
|
|
860
|
-
reachable_from_root.add(
|
|
861
|
-
str(successor_job_description.jobStoreID)
|
|
862
|
-
)
|
|
863
|
-
# Add all of the successor's linked service jobs as well.
|
|
864
|
-
for service_jobstore_id in successor_job_description.services:
|
|
865
|
-
if haveJob(service_jobstore_id):
|
|
866
|
-
reachable_from_root.add(service_jobstore_id)
|
|
867
|
-
|
|
868
|
-
new_job_descriptions_to_process.append(successor_job_description)
|
|
869
|
-
for merged_jobstore_id in job_description.merged_jobs:
|
|
906
|
+
for merged_in in job_description.get_chain():
|
|
907
|
+
# Add the job and anything chained with it.
|
|
870
908
|
# Keep merged-in jobs around themselves, but don't bother
|
|
871
909
|
# exploring them, since we took their successors.
|
|
872
|
-
reachable_from_root.add(
|
|
910
|
+
reachable_from_root.add(merged_in.job_store_id)
|
|
911
|
+
for successor_job_store_id in job_description.allSuccessors():
|
|
912
|
+
if (
|
|
913
|
+
successor_job_store_id not in reachable_from_root
|
|
914
|
+
and haveJob(successor_job_store_id)
|
|
915
|
+
):
|
|
916
|
+
successor_job_description = getJobDescription(
|
|
917
|
+
successor_job_store_id
|
|
918
|
+
)
|
|
919
|
+
|
|
920
|
+
# Add all of the successor's linked service jobs as well.
|
|
921
|
+
for (
|
|
922
|
+
service_job_store_id
|
|
923
|
+
) in successor_job_description.services:
|
|
924
|
+
if haveJob(service_job_store_id):
|
|
925
|
+
reachable_from_root.add(service_job_store_id)
|
|
926
|
+
|
|
927
|
+
new_job_descriptions_to_process.append(
|
|
928
|
+
successor_job_description
|
|
929
|
+
)
|
|
873
930
|
unprocessed_job_descriptions = new_job_descriptions_to_process
|
|
874
931
|
|
|
875
932
|
logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
|
|
@@ -879,22 +936,32 @@ class AbstractJobStore(ABC):
|
|
|
879
936
|
|
|
880
937
|
# Cleanup jobs that are not reachable from the root, and therefore orphaned
|
|
881
938
|
# TODO: Avoid reiterating reachable_from_root (which may be very large)
|
|
882
|
-
unreachable = [
|
|
939
|
+
unreachable = [
|
|
940
|
+
x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root
|
|
941
|
+
]
|
|
883
942
|
for jobDescription in unreachable:
|
|
884
943
|
# clean up any associated files before deletion
|
|
885
944
|
for fileID in jobDescription.filesToDelete:
|
|
886
945
|
# Delete any files that should already be deleted
|
|
887
|
-
logger.warning(
|
|
946
|
+
logger.warning(
|
|
947
|
+
f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed."
|
|
948
|
+
)
|
|
888
949
|
self.delete_file(fileID)
|
|
889
950
|
# Delete the job from us and the cache
|
|
890
951
|
deleteJob(str(jobDescription.jobStoreID))
|
|
891
952
|
|
|
892
|
-
jobDescriptionsReachableFromRoot = {
|
|
953
|
+
jobDescriptionsReachableFromRoot = {
|
|
954
|
+
id: getJobDescription(id) for id in reachable_from_root
|
|
955
|
+
}
|
|
893
956
|
|
|
894
957
|
# Clean up any checkpoint jobs -- delete any successors it
|
|
895
958
|
# may have launched, and restore the job to a pristine state
|
|
896
959
|
jobsDeletedByCheckpoints = set()
|
|
897
|
-
for jobDescription in [
|
|
960
|
+
for jobDescription in [
|
|
961
|
+
desc
|
|
962
|
+
for desc in jobDescriptionsReachableFromRoot.values()
|
|
963
|
+
if isinstance(desc, CheckpointJobDescription)
|
|
964
|
+
]:
|
|
898
965
|
if jobDescription.jobStoreID in jobsDeletedByCheckpoints:
|
|
899
966
|
# This is a checkpoint that was nested within an
|
|
900
967
|
# earlier checkpoint, so it and all its successors are
|
|
@@ -920,20 +987,23 @@ class AbstractJobStore(ABC):
|
|
|
920
987
|
if len(jobDescription.filesToDelete) != 0:
|
|
921
988
|
# Delete any files that should already be deleted
|
|
922
989
|
for fileID in jobDescription.filesToDelete:
|
|
923
|
-
logger.critical(
|
|
924
|
-
|
|
990
|
+
logger.critical(
|
|
991
|
+
"Removing file in job store: %s that was "
|
|
992
|
+
"marked for deletion but not previously removed" % fileID
|
|
993
|
+
)
|
|
925
994
|
self.delete_file(fileID)
|
|
926
995
|
jobDescription.filesToDelete = []
|
|
927
996
|
changed[0] = True
|
|
928
997
|
|
|
929
|
-
# For a job whose
|
|
998
|
+
# For a job whose body has already executed, remove jobs from the
|
|
930
999
|
# stack that are already deleted. This cleans up the case that the
|
|
931
1000
|
# jobDescription had successors to run, but had not been updated to
|
|
932
1001
|
# reflect this.
|
|
933
|
-
if jobDescription.
|
|
1002
|
+
if not jobDescription.has_body():
|
|
934
1003
|
|
|
935
1004
|
def stackSizeFn() -> int:
|
|
936
1005
|
return len(list(jobDescription.allSuccessors()))
|
|
1006
|
+
|
|
937
1007
|
startStackSize = stackSizeFn()
|
|
938
1008
|
# Remove deleted jobs
|
|
939
1009
|
jobDescription.filterSuccessors(haveJob)
|
|
@@ -959,16 +1029,25 @@ class AbstractJobStore(ABC):
|
|
|
959
1029
|
assert isinstance(serviceJobDescription, ServiceJobDescription)
|
|
960
1030
|
|
|
961
1031
|
if flag == 1:
|
|
962
|
-
logger.debug(
|
|
963
|
-
|
|
1032
|
+
logger.debug(
|
|
1033
|
+
"Recreating a start service flag for job: %s, flag: %s",
|
|
1034
|
+
jobStoreID,
|
|
1035
|
+
newFlag,
|
|
1036
|
+
)
|
|
964
1037
|
serviceJobDescription.startJobStoreID = newFlag
|
|
965
1038
|
elif flag == 2:
|
|
966
|
-
logger.debug(
|
|
967
|
-
|
|
1039
|
+
logger.debug(
|
|
1040
|
+
"Recreating a terminate service flag for job: %s, flag: %s",
|
|
1041
|
+
jobStoreID,
|
|
1042
|
+
newFlag,
|
|
1043
|
+
)
|
|
968
1044
|
serviceJobDescription.terminateJobStoreID = newFlag
|
|
969
1045
|
else:
|
|
970
|
-
logger.debug(
|
|
971
|
-
|
|
1046
|
+
logger.debug(
|
|
1047
|
+
"Recreating a error service flag for job: %s, flag: %s",
|
|
1048
|
+
jobStoreID,
|
|
1049
|
+
newFlag,
|
|
1050
|
+
)
|
|
972
1051
|
assert flag == 3
|
|
973
1052
|
serviceJobDescription.errorJobStoreID = newFlag
|
|
974
1053
|
|
|
@@ -981,6 +1060,7 @@ class AbstractJobStore(ABC):
|
|
|
981
1060
|
|
|
982
1061
|
def servicesSizeFn() -> int:
|
|
983
1062
|
return len(jobDescription.services)
|
|
1063
|
+
|
|
984
1064
|
startServicesSize = servicesSizeFn()
|
|
985
1065
|
|
|
986
1066
|
def replaceFlagsIfNeeded(serviceJobDescription: JobDescription) -> None:
|
|
@@ -1041,12 +1121,14 @@ class AbstractJobStore(ABC):
|
|
|
1041
1121
|
|
|
1042
1122
|
# Remove any crufty stats/logging files from the previous run
|
|
1043
1123
|
logger.debug("Discarding old statistics and logs...")
|
|
1124
|
+
|
|
1044
1125
|
# We have to manually discard the stream to avoid getting
|
|
1045
1126
|
# stuck on a blocking write from the job store.
|
|
1046
1127
|
def discardStream(stream: Union[IO[bytes], IO[str]]) -> None:
|
|
1047
1128
|
"""Read the stream 4K at a time until EOF, discarding all input."""
|
|
1048
1129
|
while len(stream.read(4096)) != 0:
|
|
1049
1130
|
pass
|
|
1131
|
+
|
|
1050
1132
|
self.read_logs(discardStream)
|
|
1051
1133
|
|
|
1052
1134
|
logger.debug("Job store is clean")
|
|
@@ -1058,7 +1140,7 @@ class AbstractJobStore(ABC):
|
|
|
1058
1140
|
# existence of jobs
|
|
1059
1141
|
##########################################
|
|
1060
1142
|
|
|
1061
|
-
@deprecated(new_function_name=
|
|
1143
|
+
@deprecated(new_function_name="assign_job_id")
|
|
1062
1144
|
def assignID(self, jobDescription: JobDescription) -> None:
|
|
1063
1145
|
return self.assign_job_id(jobDescription)
|
|
1064
1146
|
|
|
@@ -1082,7 +1164,7 @@ class AbstractJobStore(ABC):
|
|
|
1082
1164
|
"""
|
|
1083
1165
|
yield
|
|
1084
1166
|
|
|
1085
|
-
@deprecated(new_function_name=
|
|
1167
|
+
@deprecated(new_function_name="create_job")
|
|
1086
1168
|
def create(self, jobDescription: JobDescription) -> JobDescription:
|
|
1087
1169
|
return self.create_job(jobDescription)
|
|
1088
1170
|
|
|
@@ -1098,7 +1180,7 @@ class AbstractJobStore(ABC):
|
|
|
1098
1180
|
"""
|
|
1099
1181
|
raise NotImplementedError()
|
|
1100
1182
|
|
|
1101
|
-
@deprecated(new_function_name=
|
|
1183
|
+
@deprecated(new_function_name="job_exists")
|
|
1102
1184
|
def exists(self, jobStoreID: str) -> bool:
|
|
1103
1185
|
return self.job_exists(jobStoreID)
|
|
1104
1186
|
|
|
@@ -1114,7 +1196,7 @@ class AbstractJobStore(ABC):
|
|
|
1114
1196
|
# One year should be sufficient to finish any pipeline ;-)
|
|
1115
1197
|
publicUrlExpiration = timedelta(days=365)
|
|
1116
1198
|
|
|
1117
|
-
@deprecated(new_function_name=
|
|
1199
|
+
@deprecated(new_function_name="get_public_url")
|
|
1118
1200
|
def getPublicUrl(self, fileName: str) -> str:
|
|
1119
1201
|
return self.get_public_url(fileName)
|
|
1120
1202
|
|
|
@@ -1133,7 +1215,7 @@ class AbstractJobStore(ABC):
|
|
|
1133
1215
|
"""
|
|
1134
1216
|
raise NotImplementedError()
|
|
1135
1217
|
|
|
1136
|
-
@deprecated(new_function_name=
|
|
1218
|
+
@deprecated(new_function_name="get_shared_public_url")
|
|
1137
1219
|
def getSharedPublicUrl(self, sharedFileName: str) -> str:
|
|
1138
1220
|
return self.get_shared_public_url(sharedFileName)
|
|
1139
1221
|
|
|
@@ -1155,7 +1237,7 @@ class AbstractJobStore(ABC):
|
|
|
1155
1237
|
"""
|
|
1156
1238
|
raise NotImplementedError()
|
|
1157
1239
|
|
|
1158
|
-
@deprecated(new_function_name=
|
|
1240
|
+
@deprecated(new_function_name="load_job")
|
|
1159
1241
|
def load(self, jobStoreID: str) -> JobDescription:
|
|
1160
1242
|
return self.load_job(jobStoreID)
|
|
1161
1243
|
|
|
@@ -1175,7 +1257,7 @@ class AbstractJobStore(ABC):
|
|
|
1175
1257
|
"""
|
|
1176
1258
|
raise NotImplementedError()
|
|
1177
1259
|
|
|
1178
|
-
@deprecated(new_function_name=
|
|
1260
|
+
@deprecated(new_function_name="update_job")
|
|
1179
1261
|
def update(self, jobDescription: JobDescription) -> None:
|
|
1180
1262
|
return self.update_job(jobDescription)
|
|
1181
1263
|
|
|
@@ -1190,7 +1272,7 @@ class AbstractJobStore(ABC):
|
|
|
1190
1272
|
"""
|
|
1191
1273
|
raise NotImplementedError()
|
|
1192
1274
|
|
|
1193
|
-
@deprecated(new_function_name=
|
|
1275
|
+
@deprecated(new_function_name="delete_job")
|
|
1194
1276
|
def delete(self, jobStoreID: str) -> None:
|
|
1195
1277
|
return self.delete_job(jobStoreID)
|
|
1196
1278
|
|
|
@@ -1227,12 +1309,19 @@ class AbstractJobStore(ABC):
|
|
|
1227
1309
|
# associated with a given job.
|
|
1228
1310
|
##########################################
|
|
1229
1311
|
|
|
1230
|
-
@deprecated(new_function_name=
|
|
1231
|
-
def writeFile(
|
|
1312
|
+
@deprecated(new_function_name="write_file")
|
|
1313
|
+
def writeFile(
|
|
1314
|
+
self,
|
|
1315
|
+
localFilePath: str,
|
|
1316
|
+
jobStoreID: Optional[str] = None,
|
|
1317
|
+
cleanup: bool = False,
|
|
1318
|
+
) -> str:
|
|
1232
1319
|
return self.write_file(localFilePath, jobStoreID, cleanup)
|
|
1233
1320
|
|
|
1234
1321
|
@abstractmethod
|
|
1235
|
-
def write_file(
|
|
1322
|
+
def write_file(
|
|
1323
|
+
self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False
|
|
1324
|
+
) -> str:
|
|
1236
1325
|
"""
|
|
1237
1326
|
Takes a file (as a path) and places it in this job store. Returns an ID that can be used
|
|
1238
1327
|
to retrieve the file at a later time. The file is written in a atomic manner. It will
|
|
@@ -1263,19 +1352,27 @@ class AbstractJobStore(ABC):
|
|
|
1263
1352
|
"""
|
|
1264
1353
|
raise NotImplementedError()
|
|
1265
1354
|
|
|
1266
|
-
@deprecated(new_function_name=
|
|
1267
|
-
def writeFileStream(
|
|
1268
|
-
|
|
1355
|
+
@deprecated(new_function_name="write_file_stream")
|
|
1356
|
+
def writeFileStream(
|
|
1357
|
+
self,
|
|
1358
|
+
jobStoreID: Optional[str] = None,
|
|
1359
|
+
cleanup: bool = False,
|
|
1360
|
+
basename: Optional[str] = None,
|
|
1361
|
+
encoding: Optional[str] = None,
|
|
1362
|
+
errors: Optional[str] = None,
|
|
1363
|
+
) -> ContextManager[tuple[IO[bytes], str]]:
|
|
1269
1364
|
return self.write_file_stream(jobStoreID, cleanup, basename, encoding, errors)
|
|
1270
1365
|
|
|
1271
1366
|
@abstractmethod
|
|
1272
1367
|
@contextmanager
|
|
1273
|
-
def write_file_stream(
|
|
1274
|
-
|
|
1275
|
-
|
|
1276
|
-
|
|
1277
|
-
|
|
1278
|
-
|
|
1368
|
+
def write_file_stream(
|
|
1369
|
+
self,
|
|
1370
|
+
job_id: Optional[str] = None,
|
|
1371
|
+
cleanup: bool = False,
|
|
1372
|
+
basename: Optional[str] = None,
|
|
1373
|
+
encoding: Optional[str] = None,
|
|
1374
|
+
errors: Optional[str] = None,
|
|
1375
|
+
) -> Iterator[tuple[IO[bytes], str]]:
|
|
1279
1376
|
"""
|
|
1280
1377
|
Similar to writeFile, but returns a context manager yielding a tuple of
|
|
1281
1378
|
1) a file handle which can be written to and 2) the ID of the resulting
|
|
@@ -1314,18 +1411,22 @@ class AbstractJobStore(ABC):
|
|
|
1314
1411
|
"""
|
|
1315
1412
|
raise NotImplementedError()
|
|
1316
1413
|
|
|
1317
|
-
@deprecated(new_function_name=
|
|
1318
|
-
def getEmptyFileStoreID(
|
|
1319
|
-
|
|
1320
|
-
|
|
1321
|
-
|
|
1414
|
+
@deprecated(new_function_name="get_empty_file_store_id")
|
|
1415
|
+
def getEmptyFileStoreID(
|
|
1416
|
+
self,
|
|
1417
|
+
jobStoreID: Optional[str] = None,
|
|
1418
|
+
cleanup: bool = False,
|
|
1419
|
+
basename: Optional[str] = None,
|
|
1420
|
+
) -> str:
|
|
1322
1421
|
return self.get_empty_file_store_id(jobStoreID, cleanup, basename)
|
|
1323
1422
|
|
|
1324
1423
|
@abstractmethod
|
|
1325
|
-
def get_empty_file_store_id(
|
|
1326
|
-
|
|
1327
|
-
|
|
1328
|
-
|
|
1424
|
+
def get_empty_file_store_id(
|
|
1425
|
+
self,
|
|
1426
|
+
job_id: Optional[str] = None,
|
|
1427
|
+
cleanup: bool = False,
|
|
1428
|
+
basename: Optional[str] = None,
|
|
1429
|
+
) -> str:
|
|
1329
1430
|
"""
|
|
1330
1431
|
Creates an empty file in the job store and returns its ID.
|
|
1331
1432
|
Call to fileExists(getEmptyFileStoreID(jobStoreID)) will return True.
|
|
@@ -1347,8 +1448,10 @@ class AbstractJobStore(ABC):
|
|
|
1347
1448
|
"""
|
|
1348
1449
|
raise NotImplementedError()
|
|
1349
1450
|
|
|
1350
|
-
@deprecated(new_function_name=
|
|
1351
|
-
def readFile(
|
|
1451
|
+
@deprecated(new_function_name="read_file")
|
|
1452
|
+
def readFile(
|
|
1453
|
+
self, jobStoreFileID: str, localFilePath: str, symlink: bool = False
|
|
1454
|
+
) -> None:
|
|
1352
1455
|
return self.read_file(jobStoreFileID, localFilePath, symlink)
|
|
1353
1456
|
|
|
1354
1457
|
@abstractmethod
|
|
@@ -1376,7 +1479,7 @@ class AbstractJobStore(ABC):
|
|
|
1376
1479
|
"""
|
|
1377
1480
|
raise NotImplementedError()
|
|
1378
1481
|
|
|
1379
|
-
@deprecated(new_function_name=
|
|
1482
|
+
@deprecated(new_function_name="read_file_stream")
|
|
1380
1483
|
def readFileStream(
|
|
1381
1484
|
self,
|
|
1382
1485
|
jobStoreFileID: str,
|
|
@@ -1391,14 +1494,12 @@ class AbstractJobStore(ABC):
|
|
|
1391
1494
|
file_id: Union[FileID, str],
|
|
1392
1495
|
encoding: Literal[None] = None,
|
|
1393
1496
|
errors: Optional[str] = None,
|
|
1394
|
-
) -> ContextManager[IO[bytes]]:
|
|
1395
|
-
...
|
|
1497
|
+
) -> ContextManager[IO[bytes]]: ...
|
|
1396
1498
|
|
|
1397
1499
|
@overload
|
|
1398
1500
|
def read_file_stream(
|
|
1399
1501
|
self, file_id: Union[FileID, str], encoding: str, errors: Optional[str] = None
|
|
1400
|
-
) -> ContextManager[IO[str]]:
|
|
1401
|
-
...
|
|
1502
|
+
) -> ContextManager[IO[str]]: ...
|
|
1402
1503
|
|
|
1403
1504
|
@abstractmethod
|
|
1404
1505
|
def read_file_stream(
|
|
@@ -1424,7 +1525,7 @@ class AbstractJobStore(ABC):
|
|
|
1424
1525
|
"""
|
|
1425
1526
|
raise NotImplementedError()
|
|
1426
1527
|
|
|
1427
|
-
@deprecated(new_function_name=
|
|
1528
|
+
@deprecated(new_function_name="delete_file")
|
|
1428
1529
|
def deleteFile(self, jobStoreFileID: str) -> None:
|
|
1429
1530
|
return self.delete_file(jobStoreFileID)
|
|
1430
1531
|
|
|
@@ -1438,7 +1539,7 @@ class AbstractJobStore(ABC):
|
|
|
1438
1539
|
"""
|
|
1439
1540
|
raise NotImplementedError()
|
|
1440
1541
|
|
|
1441
|
-
@deprecated(new_function_name=
|
|
1542
|
+
@deprecated(new_function_name="file_exists")
|
|
1442
1543
|
def fileExists(self, jobStoreFileID: str) -> bool:
|
|
1443
1544
|
"""Determine whether a file exists in this job store."""
|
|
1444
1545
|
return self.file_exists(jobStoreFileID)
|
|
@@ -1452,7 +1553,7 @@ class AbstractJobStore(ABC):
|
|
|
1452
1553
|
"""
|
|
1453
1554
|
raise NotImplementedError()
|
|
1454
1555
|
|
|
1455
|
-
@deprecated(new_function_name=
|
|
1556
|
+
@deprecated(new_function_name="get_file_size")
|
|
1456
1557
|
def getFileSize(self, jobStoreFileID: str) -> int:
|
|
1457
1558
|
"""Get the size of the given file in bytes."""
|
|
1458
1559
|
return self.get_file_size(jobStoreFileID)
|
|
@@ -1472,7 +1573,7 @@ class AbstractJobStore(ABC):
|
|
|
1472
1573
|
"""
|
|
1473
1574
|
raise NotImplementedError()
|
|
1474
1575
|
|
|
1475
|
-
@deprecated(new_function_name=
|
|
1576
|
+
@deprecated(new_function_name="update_file")
|
|
1476
1577
|
def updateFile(self, jobStoreFileID: str, localFilePath: str) -> None:
|
|
1477
1578
|
"""Replaces the existing version of a file in the job store."""
|
|
1478
1579
|
return self.update_file(jobStoreFileID, localFilePath)
|
|
@@ -1493,19 +1594,20 @@ class AbstractJobStore(ABC):
|
|
|
1493
1594
|
"""
|
|
1494
1595
|
raise NotImplementedError()
|
|
1495
1596
|
|
|
1496
|
-
@deprecated(new_function_name=
|
|
1497
|
-
def updateFileStream(
|
|
1498
|
-
|
|
1499
|
-
|
|
1500
|
-
|
|
1597
|
+
@deprecated(new_function_name="update_file_stream")
|
|
1598
|
+
def updateFileStream(
|
|
1599
|
+
self,
|
|
1600
|
+
jobStoreFileID: str,
|
|
1601
|
+
encoding: Optional[str] = None,
|
|
1602
|
+
errors: Optional[str] = None,
|
|
1603
|
+
) -> ContextManager[IO[Any]]:
|
|
1501
1604
|
return self.update_file_stream(jobStoreFileID, encoding, errors)
|
|
1502
1605
|
|
|
1503
1606
|
@abstractmethod
|
|
1504
1607
|
@contextmanager
|
|
1505
|
-
def update_file_stream(
|
|
1506
|
-
|
|
1507
|
-
|
|
1508
|
-
errors: Optional[str] = None) -> Iterator[IO[Any]]:
|
|
1608
|
+
def update_file_stream(
|
|
1609
|
+
self, file_id: str, encoding: Optional[str] = None, errors: Optional[str] = None
|
|
1610
|
+
) -> Iterator[IO[Any]]:
|
|
1509
1611
|
"""
|
|
1510
1612
|
Replaces the existing version of a file in the job store. Similar to writeFile, but
|
|
1511
1613
|
returns a context manager yielding a file handle which can be written to. The
|
|
@@ -1531,20 +1633,29 @@ class AbstractJobStore(ABC):
|
|
|
1531
1633
|
# with specific jobs.
|
|
1532
1634
|
##########################################
|
|
1533
1635
|
|
|
1534
|
-
sharedFileNameRegex = re.compile(r
|
|
1636
|
+
sharedFileNameRegex = re.compile(r"^[a-zA-Z0-9._-]+$")
|
|
1535
1637
|
|
|
1536
|
-
@deprecated(new_function_name=
|
|
1537
|
-
def writeSharedFileStream(
|
|
1538
|
-
|
|
1539
|
-
|
|
1638
|
+
@deprecated(new_function_name="write_shared_file_stream")
|
|
1639
|
+
def writeSharedFileStream(
|
|
1640
|
+
self,
|
|
1641
|
+
sharedFileName: str,
|
|
1642
|
+
isProtected: Optional[bool] = None,
|
|
1643
|
+
encoding: Optional[str] = None,
|
|
1644
|
+
errors: Optional[str] = None,
|
|
1645
|
+
) -> ContextManager[IO[bytes]]:
|
|
1646
|
+
return self.write_shared_file_stream(
|
|
1647
|
+
sharedFileName, isProtected, encoding, errors
|
|
1648
|
+
)
|
|
1540
1649
|
|
|
1541
1650
|
@abstractmethod
|
|
1542
1651
|
@contextmanager
|
|
1543
|
-
def write_shared_file_stream(
|
|
1544
|
-
|
|
1545
|
-
|
|
1546
|
-
|
|
1547
|
-
|
|
1652
|
+
def write_shared_file_stream(
|
|
1653
|
+
self,
|
|
1654
|
+
shared_file_name: str,
|
|
1655
|
+
encrypted: Optional[bool] = None,
|
|
1656
|
+
encoding: Optional[str] = None,
|
|
1657
|
+
errors: Optional[str] = None,
|
|
1658
|
+
) -> Iterator[IO[bytes]]:
|
|
1548
1659
|
"""
|
|
1549
1660
|
Returns a context manager yielding a writable file handle to the global file referenced
|
|
1550
1661
|
by the given name. File will be created in an atomic manner.
|
|
@@ -1569,19 +1680,23 @@ class AbstractJobStore(ABC):
|
|
|
1569
1680
|
"""
|
|
1570
1681
|
raise NotImplementedError()
|
|
1571
1682
|
|
|
1572
|
-
@deprecated(new_function_name=
|
|
1573
|
-
def readSharedFileStream(
|
|
1574
|
-
|
|
1575
|
-
|
|
1576
|
-
|
|
1683
|
+
@deprecated(new_function_name="read_shared_file_stream")
|
|
1684
|
+
def readSharedFileStream(
|
|
1685
|
+
self,
|
|
1686
|
+
sharedFileName: str,
|
|
1687
|
+
encoding: Optional[str] = None,
|
|
1688
|
+
errors: Optional[str] = None,
|
|
1689
|
+
) -> ContextManager[IO[bytes]]:
|
|
1577
1690
|
return self.read_shared_file_stream(sharedFileName, encoding, errors)
|
|
1578
1691
|
|
|
1579
1692
|
@abstractmethod
|
|
1580
1693
|
@contextmanager
|
|
1581
|
-
def read_shared_file_stream(
|
|
1582
|
-
|
|
1583
|
-
|
|
1584
|
-
|
|
1694
|
+
def read_shared_file_stream(
|
|
1695
|
+
self,
|
|
1696
|
+
shared_file_name: str,
|
|
1697
|
+
encoding: Optional[str] = None,
|
|
1698
|
+
errors: Optional[str] = None,
|
|
1699
|
+
) -> Iterator[IO[bytes]]:
|
|
1585
1700
|
"""
|
|
1586
1701
|
Returns a context manager yielding a readable file handle to the global file referenced
|
|
1587
1702
|
by the given name.
|
|
@@ -1600,7 +1715,7 @@ class AbstractJobStore(ABC):
|
|
|
1600
1715
|
"""
|
|
1601
1716
|
raise NotImplementedError()
|
|
1602
1717
|
|
|
1603
|
-
@deprecated(new_function_name=
|
|
1718
|
+
@deprecated(new_function_name="write_logs")
|
|
1604
1719
|
def writeStatsAndLogging(self, statsAndLoggingString: str) -> None:
|
|
1605
1720
|
return self.write_logs(statsAndLoggingString)
|
|
1606
1721
|
|
|
@@ -1616,8 +1731,10 @@ class AbstractJobStore(ABC):
|
|
|
1616
1731
|
"""
|
|
1617
1732
|
raise NotImplementedError()
|
|
1618
1733
|
|
|
1619
|
-
@deprecated(new_function_name=
|
|
1620
|
-
def readStatsAndLogging(
|
|
1734
|
+
@deprecated(new_function_name="read_logs")
|
|
1735
|
+
def readStatsAndLogging(
|
|
1736
|
+
self, callback: Callable[..., Any], readAll: bool = False
|
|
1737
|
+
) -> int:
|
|
1621
1738
|
return self.read_logs(callback, readAll)
|
|
1622
1739
|
|
|
1623
1740
|
@abstractmethod
|
|
@@ -1652,8 +1769,8 @@ class AbstractJobStore(ABC):
|
|
|
1652
1769
|
this method. Other methods will rely on always having the most current
|
|
1653
1770
|
pid available. So far there is no reason to store any old pids.
|
|
1654
1771
|
"""
|
|
1655
|
-
with self.write_shared_file_stream(
|
|
1656
|
-
f.write(str(os.getpid()).encode(
|
|
1772
|
+
with self.write_shared_file_stream("pid.log") as f:
|
|
1773
|
+
f.write(str(os.getpid()).encode("utf-8"))
|
|
1657
1774
|
|
|
1658
1775
|
def read_leader_pid(self) -> int:
|
|
1659
1776
|
"""
|
|
@@ -1661,7 +1778,7 @@ class AbstractJobStore(ABC):
|
|
|
1661
1778
|
|
|
1662
1779
|
:raise NoSuchFileException: If the PID file doesn't exist.
|
|
1663
1780
|
"""
|
|
1664
|
-
with self.read_shared_file_stream(
|
|
1781
|
+
with self.read_shared_file_stream("pid.log") as f:
|
|
1665
1782
|
return int(f.read().strip())
|
|
1666
1783
|
|
|
1667
1784
|
def write_leader_node_id(self) -> None:
|
|
@@ -1670,7 +1787,7 @@ class AbstractJobStore(ABC):
|
|
|
1670
1787
|
by the leader.
|
|
1671
1788
|
"""
|
|
1672
1789
|
with self.write_shared_file_stream("leader_node_id.log") as f:
|
|
1673
|
-
f.write(getNodeID().encode(
|
|
1790
|
+
f.write(getNodeID().encode("utf-8"))
|
|
1674
1791
|
|
|
1675
1792
|
def read_leader_node_id(self) -> str:
|
|
1676
1793
|
"""
|
|
@@ -1679,7 +1796,7 @@ class AbstractJobStore(ABC):
|
|
|
1679
1796
|
:raise NoSuchFileException: If the node ID file doesn't exist.
|
|
1680
1797
|
"""
|
|
1681
1798
|
with self.read_shared_file_stream("leader_node_id.log") as f:
|
|
1682
|
-
return f.read().decode(
|
|
1799
|
+
return f.read().decode("utf-8").strip()
|
|
1683
1800
|
|
|
1684
1801
|
def write_kill_flag(self, kill: bool = False) -> None:
|
|
1685
1802
|
"""
|
|
@@ -1692,7 +1809,7 @@ class AbstractJobStore(ABC):
|
|
|
1692
1809
|
workers are expected to be cleaned up by the leader.
|
|
1693
1810
|
"""
|
|
1694
1811
|
with self.write_shared_file_stream("_toil_kill_flag") as f:
|
|
1695
|
-
f.write(("YES" if kill else "NO").encode(
|
|
1812
|
+
f.write(("YES" if kill else "NO").encode("utf-8"))
|
|
1696
1813
|
|
|
1697
1814
|
def read_kill_flag(self) -> bool:
|
|
1698
1815
|
"""
|
|
@@ -1733,25 +1850,40 @@ class AbstractJobStore(ABC):
|
|
|
1733
1850
|
if not cls._validateSharedFileName(sharedFileName):
|
|
1734
1851
|
raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
|
|
1735
1852
|
|
|
1853
|
+
|
|
1736
1854
|
class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
1737
1855
|
"""
|
|
1738
1856
|
A mostly fake JobStore to access URLs not really associated with real job
|
|
1739
1857
|
stores.
|
|
1740
1858
|
"""
|
|
1741
1859
|
|
|
1860
|
+
@classmethod
|
|
1861
|
+
def _setup_ftp(cls) -> FtpFsAccess:
|
|
1862
|
+
# FTP connections are not reused. Ideally, a thread should watch any reused FTP connections
|
|
1863
|
+
# and close them when necessary
|
|
1864
|
+
return FtpFsAccess()
|
|
1865
|
+
|
|
1742
1866
|
@classmethod
|
|
1743
1867
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
1744
|
-
return url.scheme.lower() in (
|
|
1868
|
+
return url.scheme.lower() in ("http", "https", "ftp") and not export
|
|
1745
1869
|
|
|
1746
1870
|
@classmethod
|
|
1747
1871
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
1872
|
+
# Deal with FTP first to support user/password auth
|
|
1873
|
+
if url.scheme.lower() == "ftp":
|
|
1874
|
+
ftp = cls._setup_ftp()
|
|
1875
|
+
return ftp.exists(url.geturl())
|
|
1876
|
+
|
|
1748
1877
|
try:
|
|
1749
|
-
|
|
1750
|
-
with cls._open_url(url):
|
|
1878
|
+
with closing(urlopen(Request(url.geturl(), method="HEAD"))):
|
|
1751
1879
|
return True
|
|
1752
|
-
except:
|
|
1753
|
-
|
|
1754
|
-
|
|
1880
|
+
except HTTPError as e:
|
|
1881
|
+
if e.code in (404, 410):
|
|
1882
|
+
return False
|
|
1883
|
+
else:
|
|
1884
|
+
raise
|
|
1885
|
+
# Any other errors we should pass through because something really went
|
|
1886
|
+
# wrong (e.g. server is broken today but file may usually exist)
|
|
1755
1887
|
|
|
1756
1888
|
@classmethod
|
|
1757
1889
|
@retry(
|
|
@@ -1761,17 +1893,19 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1761
1893
|
]
|
|
1762
1894
|
)
|
|
1763
1895
|
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
1764
|
-
if url.scheme.lower() ==
|
|
1765
|
-
|
|
1766
|
-
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1896
|
+
if url.scheme.lower() == "ftp":
|
|
1897
|
+
ftp = cls._setup_ftp()
|
|
1898
|
+
return ftp.size(url.geturl())
|
|
1899
|
+
|
|
1900
|
+
# just read the header for content length
|
|
1901
|
+
resp = urlopen(Request(url.geturl(), method="HEAD"))
|
|
1902
|
+
size = resp.info().get("content-length")
|
|
1903
|
+
return int(size) if size is not None else None
|
|
1770
1904
|
|
|
1771
1905
|
@classmethod
|
|
1772
1906
|
def _read_from_url(
|
|
1773
1907
|
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1774
|
-
) ->
|
|
1908
|
+
) -> tuple[int, bool]:
|
|
1775
1909
|
# We can't actually retry after we start writing.
|
|
1776
1910
|
# TODO: Implement retry with byte range requests
|
|
1777
1911
|
with cls._open_url(url) as readable:
|
|
@@ -1780,8 +1914,10 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1780
1914
|
# nested function can modify it without creating its own
|
|
1781
1915
|
# local with the same name.
|
|
1782
1916
|
size = [0]
|
|
1917
|
+
|
|
1783
1918
|
def count(l: int) -> None:
|
|
1784
1919
|
size[0] += l
|
|
1920
|
+
|
|
1785
1921
|
counter = WriteWatchingStream(writable)
|
|
1786
1922
|
counter.onWrite(count)
|
|
1787
1923
|
|
|
@@ -1793,18 +1929,32 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1793
1929
|
@retry(
|
|
1794
1930
|
errors=[
|
|
1795
1931
|
BadStatusLine,
|
|
1796
|
-
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1932
|
+
ErrorCondition(error=HTTPError, error_codes=[408, 429, 500, 502, 503]),
|
|
1797
1933
|
]
|
|
1798
1934
|
)
|
|
1799
1935
|
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
1936
|
+
# Deal with FTP first so we support user/password auth
|
|
1937
|
+
if url.scheme.lower() == "ftp":
|
|
1938
|
+
ftp = cls._setup_ftp()
|
|
1939
|
+
# we open in read mode as write mode is not supported
|
|
1940
|
+
return ftp.open(url.geturl(), mode="r")
|
|
1941
|
+
|
|
1800
1942
|
try:
|
|
1801
1943
|
return cast(IO[bytes], closing(urlopen(url.geturl())))
|
|
1802
1944
|
except HTTPError as e:
|
|
1803
|
-
if e.code
|
|
1945
|
+
if e.code in (404, 410):
|
|
1804
1946
|
# Translate into a FileNotFoundError for detecting
|
|
1805
|
-
#
|
|
1947
|
+
# known nonexistent files
|
|
1806
1948
|
raise FileNotFoundError(str(url)) from e
|
|
1807
1949
|
else:
|
|
1950
|
+
# Other codes indicate a real problem with the server; we don't
|
|
1951
|
+
# want to e.g. run a workflow without an optional input that
|
|
1952
|
+
# the user specified a path to just because the server was
|
|
1953
|
+
# busy.
|
|
1954
|
+
|
|
1955
|
+
# Sometimes we expect to see this when polling existence for
|
|
1956
|
+
# inputs at guessed paths, so don't complain *too* loudly here.
|
|
1957
|
+
logger.debug("Unusual status %d for URL %s", e.code, str(url))
|
|
1808
1958
|
raise
|
|
1809
1959
|
|
|
1810
1960
|
@classmethod
|
|
@@ -1813,6 +1963,6 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1813
1963
|
return False
|
|
1814
1964
|
|
|
1815
1965
|
@classmethod
|
|
1816
|
-
def _list_url(cls, url: ParseResult) ->
|
|
1966
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
1817
1967
|
# TODO: Implement HTTP index parsing and FTP directory listing
|
|
1818
1968
|
raise NotImplementedError("HTTP and FTP URLs cannot yet be listed")
|