toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
|
@@ -16,45 +16,39 @@ import os
|
|
|
16
16
|
import pickle
|
|
17
17
|
import re
|
|
18
18
|
import shutil
|
|
19
|
-
import sys
|
|
20
19
|
from abc import ABC, ABCMeta, abstractmethod
|
|
20
|
+
from collections.abc import Iterator, ValuesView
|
|
21
21
|
from contextlib import closing, contextmanager
|
|
22
22
|
from datetime import timedelta
|
|
23
23
|
from http.client import BadStatusLine
|
|
24
|
-
from typing import (
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
Union,
|
|
37
|
-
ValuesView,
|
|
38
|
-
cast,
|
|
39
|
-
overload)
|
|
40
|
-
|
|
41
|
-
if sys.version_info >= (3, 8):
|
|
42
|
-
from typing import Literal
|
|
43
|
-
else:
|
|
44
|
-
from typing_extensions import Literal
|
|
45
|
-
|
|
24
|
+
from typing import (
|
|
25
|
+
IO,
|
|
26
|
+
TYPE_CHECKING,
|
|
27
|
+
Any,
|
|
28
|
+
Callable,
|
|
29
|
+
ContextManager,
|
|
30
|
+
Literal,
|
|
31
|
+
Optional,
|
|
32
|
+
Union,
|
|
33
|
+
cast,
|
|
34
|
+
overload,
|
|
35
|
+
)
|
|
46
36
|
from urllib.error import HTTPError
|
|
47
37
|
from urllib.parse import ParseResult, urlparse
|
|
48
|
-
from urllib.request import urlopen
|
|
38
|
+
from urllib.request import urlopen, Request
|
|
49
39
|
from uuid import uuid4
|
|
50
40
|
|
|
51
41
|
from toil.common import Config, getNodeID, safeUnpickleFromStream
|
|
52
42
|
from toil.fileStores import FileID
|
|
53
|
-
from toil.job import (
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
43
|
+
from toil.job import (
|
|
44
|
+
CheckpointJobDescription,
|
|
45
|
+
JobDescription,
|
|
46
|
+
JobException,
|
|
47
|
+
ServiceJobDescription,
|
|
48
|
+
)
|
|
49
|
+
from toil.lib.ftp_utils import FtpFsAccess
|
|
57
50
|
from toil.lib.compatibility import deprecated
|
|
51
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
58
52
|
from toil.lib.io import WriteWatchingStream
|
|
59
53
|
from toil.lib.memoize import memoize
|
|
60
54
|
from toil.lib.retry import ErrorCondition, retry
|
|
@@ -67,18 +61,22 @@ logger = logging.getLogger(__name__)
|
|
|
67
61
|
try:
|
|
68
62
|
from botocore.exceptions import ProxyConnectionError
|
|
69
63
|
except ImportError:
|
|
64
|
+
|
|
70
65
|
class ProxyConnectionError(BaseException): # type: ignore
|
|
71
66
|
"""Dummy class."""
|
|
72
67
|
|
|
68
|
+
|
|
73
69
|
class LocatorException(Exception):
|
|
74
70
|
"""
|
|
75
71
|
Base exception class for all locator exceptions.
|
|
76
72
|
For example, job store/aws bucket exceptions where they already exist
|
|
77
73
|
"""
|
|
78
|
-
|
|
74
|
+
|
|
75
|
+
def __init__(self, error_msg: str, locator: str, prefix: Optional[str] = None):
|
|
79
76
|
full_locator = locator if prefix is None else f"{prefix}:{locator}"
|
|
80
77
|
super().__init__(error_msg % full_locator)
|
|
81
78
|
|
|
79
|
+
|
|
82
80
|
class InvalidImportExportUrlException(Exception):
|
|
83
81
|
def __init__(self, url: ParseResult) -> None:
|
|
84
82
|
"""
|
|
@@ -86,24 +84,10 @@ class InvalidImportExportUrlException(Exception):
|
|
|
86
84
|
"""
|
|
87
85
|
super().__init__("The URL '%s' is invalid." % url.geturl())
|
|
88
86
|
|
|
89
|
-
class UnimplementedURLException(RuntimeError):
|
|
90
|
-
def __init__(self, url: ParseResult, operation: str) -> None:
|
|
91
|
-
"""
|
|
92
|
-
Make a new exception to report that a URL scheme is not implemented, or
|
|
93
|
-
that the implementation can't be loaded because its dependencies are
|
|
94
|
-
not installed.
|
|
95
|
-
|
|
96
|
-
:param url: The given URL
|
|
97
|
-
:param operation: Whether we are trying to 'import' or 'export'
|
|
98
|
-
"""
|
|
99
|
-
super().__init__(
|
|
100
|
-
f"No available job store implementation can {operation} the URL "
|
|
101
|
-
f"'{url.geturl()}'. Ensure Toil has been installed "
|
|
102
|
-
f"with the appropriate extras."
|
|
103
|
-
)
|
|
104
87
|
|
|
105
88
|
class NoSuchJobException(Exception):
|
|
106
89
|
"""Indicates that the specified job does not exist."""
|
|
90
|
+
|
|
107
91
|
def __init__(self, jobStoreID: FileID):
|
|
108
92
|
"""
|
|
109
93
|
:param str jobStoreID: the jobStoreID that was mistakenly assumed to exist
|
|
@@ -113,17 +97,21 @@ class NoSuchJobException(Exception):
|
|
|
113
97
|
|
|
114
98
|
class ConcurrentFileModificationException(Exception):
|
|
115
99
|
"""Indicates that the file was attempted to be modified by multiple processes at once."""
|
|
100
|
+
|
|
116
101
|
def __init__(self, jobStoreFileID: FileID):
|
|
117
102
|
"""
|
|
118
103
|
:param jobStoreFileID: the ID of the file that was modified by multiple workers
|
|
119
104
|
or processes concurrently
|
|
120
105
|
"""
|
|
121
|
-
super().__init__(
|
|
106
|
+
super().__init__("Concurrent update to file %s detected." % jobStoreFileID)
|
|
122
107
|
|
|
123
108
|
|
|
124
109
|
class NoSuchFileException(Exception):
|
|
125
110
|
"""Indicates that the specified file does not exist."""
|
|
126
|
-
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self, jobStoreFileID: FileID, customName: Optional[str] = None, *extra: Any
|
|
114
|
+
):
|
|
127
115
|
"""
|
|
128
116
|
:param jobStoreFileID: the ID of the file that was mistakenly assumed to exist
|
|
129
117
|
:param customName: optionally, an alternate name for the nonexistent file
|
|
@@ -146,22 +134,31 @@ class NoSuchFileException(Exception):
|
|
|
146
134
|
|
|
147
135
|
class NoSuchJobStoreException(LocatorException):
|
|
148
136
|
"""Indicates that the specified job store does not exist."""
|
|
137
|
+
|
|
149
138
|
def __init__(self, locator: str, prefix: str):
|
|
150
139
|
"""
|
|
151
140
|
:param str locator: The location of the job store
|
|
152
141
|
"""
|
|
153
|
-
super().__init__(
|
|
142
|
+
super().__init__(
|
|
143
|
+
"The job store '%s' does not exist, so there is nothing to restart.",
|
|
144
|
+
locator,
|
|
145
|
+
prefix,
|
|
146
|
+
)
|
|
154
147
|
|
|
155
148
|
|
|
156
149
|
class JobStoreExistsException(LocatorException):
|
|
157
150
|
"""Indicates that the specified job store already exists."""
|
|
151
|
+
|
|
158
152
|
def __init__(self, locator: str, prefix: str):
|
|
159
153
|
"""
|
|
160
154
|
:param str locator: The location of the job store
|
|
161
155
|
"""
|
|
162
156
|
super().__init__(
|
|
163
157
|
"The job store '%s' already exists. Use --restart to resume the workflow, or remove "
|
|
164
|
-
"the job store with 'toil clean' to start the workflow from scratch.",
|
|
158
|
+
"the job store with 'toil clean' to start the workflow from scratch.",
|
|
159
|
+
locator,
|
|
160
|
+
prefix,
|
|
161
|
+
)
|
|
165
162
|
|
|
166
163
|
|
|
167
164
|
class AbstractJobStore(ABC):
|
|
@@ -213,7 +210,7 @@ class AbstractJobStore(ABC):
|
|
|
213
210
|
self.__config = config
|
|
214
211
|
self.write_config()
|
|
215
212
|
|
|
216
|
-
@deprecated(new_function_name=
|
|
213
|
+
@deprecated(new_function_name="write_config")
|
|
217
214
|
def writeConfig(self) -> None:
|
|
218
215
|
return self.write_config()
|
|
219
216
|
|
|
@@ -222,7 +219,9 @@ class AbstractJobStore(ABC):
|
|
|
222
219
|
Persists the value of the :attr:`AbstractJobStore.config` attribute to the
|
|
223
220
|
job store, so that it can be retrieved later by other instances of this class.
|
|
224
221
|
"""
|
|
225
|
-
with self.write_shared_file_stream(
|
|
222
|
+
with self.write_shared_file_stream(
|
|
223
|
+
"config.pickle", encrypted=False
|
|
224
|
+
) as fileHandle:
|
|
226
225
|
pickle.dump(self.__config, fileHandle, pickle.HIGHEST_PROTOCOL)
|
|
227
226
|
|
|
228
227
|
def resume(self) -> None:
|
|
@@ -232,7 +231,7 @@ class AbstractJobStore(ABC):
|
|
|
232
231
|
|
|
233
232
|
:raises NoSuchJobStoreException: if the physical storage for this job store doesn't exist
|
|
234
233
|
"""
|
|
235
|
-
with self.read_shared_file_stream(
|
|
234
|
+
with self.read_shared_file_stream("config.pickle") as fileHandle:
|
|
236
235
|
config = safeUnpickleFromStream(fileHandle)
|
|
237
236
|
assert config.workflowID is not None
|
|
238
237
|
self.__config = config
|
|
@@ -250,9 +249,9 @@ class AbstractJobStore(ABC):
|
|
|
250
249
|
"""
|
|
251
250
|
return self.__locator
|
|
252
251
|
|
|
253
|
-
rootJobStoreIDFileName =
|
|
252
|
+
rootJobStoreIDFileName = "rootJobStoreID"
|
|
254
253
|
|
|
255
|
-
@deprecated(new_function_name=
|
|
254
|
+
@deprecated(new_function_name="set_root_job")
|
|
256
255
|
def setRootJob(self, rootJobStoreID: FileID) -> None:
|
|
257
256
|
"""Set the root job of the workflow backed by this job store."""
|
|
258
257
|
return self.set_root_job(rootJobStoreID)
|
|
@@ -264,9 +263,9 @@ class AbstractJobStore(ABC):
|
|
|
264
263
|
:param job_id: The ID of the job to set as root
|
|
265
264
|
"""
|
|
266
265
|
with self.write_shared_file_stream(self.rootJobStoreIDFileName) as f:
|
|
267
|
-
f.write(job_id.encode(
|
|
266
|
+
f.write(job_id.encode("utf-8"))
|
|
268
267
|
|
|
269
|
-
@deprecated(new_function_name=
|
|
268
|
+
@deprecated(new_function_name="load_root_job")
|
|
270
269
|
def loadRootJob(self) -> JobDescription:
|
|
271
270
|
return self.load_root_job()
|
|
272
271
|
|
|
@@ -281,16 +280,18 @@ class AbstractJobStore(ABC):
|
|
|
281
280
|
"""
|
|
282
281
|
try:
|
|
283
282
|
with self.read_shared_file_stream(self.rootJobStoreIDFileName) as f:
|
|
284
|
-
rootJobStoreID = f.read().decode(
|
|
283
|
+
rootJobStoreID = f.read().decode("utf-8")
|
|
285
284
|
except NoSuchFileException:
|
|
286
|
-
raise JobException(
|
|
285
|
+
raise JobException("No job has been set as the root in this job store")
|
|
287
286
|
if not self.job_exists(rootJobStoreID):
|
|
288
|
-
raise JobException(
|
|
289
|
-
|
|
287
|
+
raise JobException(
|
|
288
|
+
"The root job '%s' doesn't exist. Either the Toil workflow "
|
|
289
|
+
"is finished or has never been started" % rootJobStoreID
|
|
290
|
+
)
|
|
290
291
|
return self.load_job(rootJobStoreID)
|
|
291
292
|
|
|
292
293
|
# FIXME: This is only used in tests, why do we have it?
|
|
293
|
-
@deprecated(new_function_name=
|
|
294
|
+
@deprecated(new_function_name="create_root_job")
|
|
294
295
|
def createRootJob(self, desc: JobDescription) -> JobDescription:
|
|
295
296
|
return self.create_root_job(desc)
|
|
296
297
|
|
|
@@ -307,7 +308,7 @@ class AbstractJobStore(ABC):
|
|
|
307
308
|
self.set_root_job(job_description.jobStoreID)
|
|
308
309
|
return job_description
|
|
309
310
|
|
|
310
|
-
@deprecated(new_function_name=
|
|
311
|
+
@deprecated(new_function_name="get_root_job_return_value")
|
|
311
312
|
def getRootJobReturnValue(self) -> Any:
|
|
312
313
|
return self.get_root_job_return_value()
|
|
313
314
|
|
|
@@ -318,12 +319,12 @@ class AbstractJobStore(ABC):
|
|
|
318
319
|
Raises an exception if the root job hasn't fulfilled its promise yet.
|
|
319
320
|
"""
|
|
320
321
|
# Parse out the return value from the root job
|
|
321
|
-
with self.read_shared_file_stream(
|
|
322
|
+
with self.read_shared_file_stream("rootJobReturnValue") as fH:
|
|
322
323
|
return safeUnpickleFromStream(fH)
|
|
323
324
|
|
|
324
325
|
@staticmethod
|
|
325
326
|
@memoize
|
|
326
|
-
def _get_job_store_classes() ->
|
|
327
|
+
def _get_job_store_classes() -> list["AbstractJobStore"]:
|
|
327
328
|
"""
|
|
328
329
|
A list of concrete AbstractJobStore implementations whose dependencies are installed.
|
|
329
330
|
|
|
@@ -333,23 +334,30 @@ class AbstractJobStore(ABC):
|
|
|
333
334
|
"toil.jobStores.fileJobStore.FileJobStore",
|
|
334
335
|
"toil.jobStores.googleJobStore.GoogleJobStore",
|
|
335
336
|
"toil.jobStores.aws.jobStore.AWSJobStore",
|
|
336
|
-
"toil.jobStores.abstractJobStore.JobStoreSupport"
|
|
337
|
+
"toil.jobStores.abstractJobStore.JobStoreSupport",
|
|
338
|
+
)
|
|
337
339
|
jobStoreClasses = []
|
|
338
340
|
for className in jobStoreClassNames:
|
|
339
|
-
moduleName, className = className.rsplit(
|
|
341
|
+
moduleName, className = className.rsplit(".", 1)
|
|
340
342
|
from importlib import import_module
|
|
343
|
+
|
|
341
344
|
try:
|
|
342
345
|
module = import_module(moduleName)
|
|
343
346
|
except (ImportError, ProxyConnectionError):
|
|
344
|
-
logger.debug(
|
|
345
|
-
|
|
347
|
+
logger.debug(
|
|
348
|
+
"Unable to import '%s' as is expected if the corresponding extra was "
|
|
349
|
+
"omitted at installation time.",
|
|
350
|
+
moduleName,
|
|
351
|
+
)
|
|
346
352
|
else:
|
|
347
353
|
jobStoreClass = getattr(module, className)
|
|
348
354
|
jobStoreClasses.append(jobStoreClass)
|
|
349
355
|
return jobStoreClasses
|
|
350
356
|
|
|
351
357
|
@classmethod
|
|
352
|
-
def _findJobStoreForUrl(
|
|
358
|
+
def _findJobStoreForUrl(
|
|
359
|
+
cls, url: ParseResult, export: bool = False
|
|
360
|
+
) -> "AbstractJobStore":
|
|
353
361
|
"""
|
|
354
362
|
Returns the AbstractJobStore subclass that supports the given URL.
|
|
355
363
|
|
|
@@ -368,46 +376,58 @@ class AbstractJobStore(ABC):
|
|
|
368
376
|
# returns a file ID. Explain this to MyPy.
|
|
369
377
|
|
|
370
378
|
@overload
|
|
371
|
-
def importFile(
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
379
|
+
def importFile(
|
|
380
|
+
self,
|
|
381
|
+
srcUrl: str,
|
|
382
|
+
sharedFileName: str,
|
|
383
|
+
hardlink: bool = False,
|
|
384
|
+
symlink: bool = True,
|
|
385
|
+
) -> None: ...
|
|
376
386
|
|
|
377
387
|
@overload
|
|
378
|
-
def importFile(
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
388
|
+
def importFile(
|
|
389
|
+
self,
|
|
390
|
+
srcUrl: str,
|
|
391
|
+
sharedFileName: None = None,
|
|
392
|
+
hardlink: bool = False,
|
|
393
|
+
symlink: bool = True,
|
|
394
|
+
) -> FileID: ...
|
|
395
|
+
|
|
396
|
+
@deprecated(new_function_name="import_file")
|
|
397
|
+
def importFile(
|
|
398
|
+
self,
|
|
399
|
+
srcUrl: str,
|
|
400
|
+
sharedFileName: Optional[str] = None,
|
|
401
|
+
hardlink: bool = False,
|
|
402
|
+
symlink: bool = True,
|
|
403
|
+
) -> Optional[FileID]:
|
|
390
404
|
return self.import_file(srcUrl, sharedFileName, hardlink, symlink)
|
|
391
405
|
|
|
392
406
|
@overload
|
|
393
|
-
def import_file(
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
407
|
+
def import_file(
|
|
408
|
+
self,
|
|
409
|
+
src_uri: str,
|
|
410
|
+
shared_file_name: str,
|
|
411
|
+
hardlink: bool = False,
|
|
412
|
+
symlink: bool = True,
|
|
413
|
+
) -> None: ...
|
|
398
414
|
|
|
399
415
|
@overload
|
|
400
|
-
def import_file(
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
416
|
+
def import_file(
|
|
417
|
+
self,
|
|
418
|
+
src_uri: str,
|
|
419
|
+
shared_file_name: None = None,
|
|
420
|
+
hardlink: bool = False,
|
|
421
|
+
symlink: bool = True,
|
|
422
|
+
) -> FileID: ...
|
|
423
|
+
|
|
424
|
+
def import_file(
|
|
425
|
+
self,
|
|
426
|
+
src_uri: str,
|
|
427
|
+
shared_file_name: Optional[str] = None,
|
|
428
|
+
hardlink: bool = False,
|
|
429
|
+
symlink: bool = True,
|
|
430
|
+
) -> Optional[FileID]:
|
|
411
431
|
"""
|
|
412
432
|
Imports the file at the given URL into job store. The ID of the newly imported file is
|
|
413
433
|
returned. If the name of a shared file name is provided, the file will be imported as
|
|
@@ -445,18 +465,23 @@ class AbstractJobStore(ABC):
|
|
|
445
465
|
# subclasses of AbstractJobStore.
|
|
446
466
|
parseResult = urlparse(src_uri)
|
|
447
467
|
otherCls = self._findJobStoreForUrl(parseResult)
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
468
|
+
logger.info("Importing input %s...", src_uri)
|
|
469
|
+
return self._import_file(
|
|
470
|
+
otherCls,
|
|
471
|
+
parseResult,
|
|
472
|
+
shared_file_name=shared_file_name,
|
|
473
|
+
hardlink=hardlink,
|
|
474
|
+
symlink=symlink,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
def _import_file(
|
|
478
|
+
self,
|
|
479
|
+
otherCls: "AbstractJobStore",
|
|
480
|
+
uri: ParseResult,
|
|
481
|
+
shared_file_name: Optional[str] = None,
|
|
482
|
+
hardlink: bool = False,
|
|
483
|
+
symlink: bool = True,
|
|
484
|
+
) -> Optional[FileID]:
|
|
460
485
|
"""
|
|
461
486
|
Import the file at the given URL using the given job store class to retrieve that file.
|
|
462
487
|
See also :meth:`.importFile`. This method applies a generic approach to importing: it
|
|
@@ -486,7 +511,7 @@ class AbstractJobStore(ABC):
|
|
|
486
511
|
otherCls._read_from_url(uri, writable)
|
|
487
512
|
return None
|
|
488
513
|
|
|
489
|
-
@deprecated(new_function_name=
|
|
514
|
+
@deprecated(new_function_name="export_file")
|
|
490
515
|
def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
|
|
491
516
|
return self.export_file(jobStoreFileID, dstUrl)
|
|
492
517
|
|
|
@@ -505,13 +530,17 @@ class AbstractJobStore(ABC):
|
|
|
505
530
|
:param str file_id: The id of the file in the job store that should be exported.
|
|
506
531
|
|
|
507
532
|
:param str dst_uri: URL that points to a file or object in the storage mechanism of a
|
|
508
|
-
supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
533
|
+
supported URL scheme e.g. a blob in an AWS s3 bucket. May also be a local path.
|
|
509
534
|
"""
|
|
535
|
+
from toil.common import Toil
|
|
536
|
+
dst_uri = Toil.normalize_uri(dst_uri)
|
|
510
537
|
parseResult = urlparse(dst_uri)
|
|
511
538
|
otherCls = self._findJobStoreForUrl(parseResult, export=True)
|
|
512
539
|
self._export_file(otherCls, file_id, parseResult)
|
|
513
540
|
|
|
514
|
-
def _export_file(
|
|
541
|
+
def _export_file(
|
|
542
|
+
self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
|
|
543
|
+
) -> None:
|
|
515
544
|
"""
|
|
516
545
|
Refer to exportFile docstring for information about this method.
|
|
517
546
|
|
|
@@ -526,7 +555,9 @@ class AbstractJobStore(ABC):
|
|
|
526
555
|
"""
|
|
527
556
|
self._default_export_file(otherCls, jobStoreFileID, url)
|
|
528
557
|
|
|
529
|
-
def _default_export_file(
|
|
558
|
+
def _default_export_file(
|
|
559
|
+
self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
|
|
560
|
+
) -> None:
|
|
530
561
|
"""
|
|
531
562
|
Refer to exportFile docstring for information about this method.
|
|
532
563
|
|
|
@@ -541,7 +572,7 @@ class AbstractJobStore(ABC):
|
|
|
541
572
|
"""
|
|
542
573
|
executable = False
|
|
543
574
|
with self.read_file_stream(jobStoreFileID) as readable:
|
|
544
|
-
if getattr(jobStoreFileID,
|
|
575
|
+
if getattr(jobStoreFileID, "executable", False):
|
|
545
576
|
executable = jobStoreFileID.executable
|
|
546
577
|
otherCls._write_to_url(readable, url, executable)
|
|
547
578
|
|
|
@@ -550,6 +581,8 @@ class AbstractJobStore(ABC):
|
|
|
550
581
|
"""
|
|
551
582
|
Return True if the file at the given URI exists, and False otherwise.
|
|
552
583
|
|
|
584
|
+
May raise an error if file existence cannot be determined.
|
|
585
|
+
|
|
553
586
|
:param src_uri: URL that points to a file or object in the storage
|
|
554
587
|
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
555
588
|
"""
|
|
@@ -580,7 +613,7 @@ class AbstractJobStore(ABC):
|
|
|
580
613
|
return otherCls._get_is_directory(parseResult)
|
|
581
614
|
|
|
582
615
|
@classmethod
|
|
583
|
-
def list_url(cls, src_uri: str) ->
|
|
616
|
+
def list_url(cls, src_uri: str) -> list[str]:
|
|
584
617
|
"""
|
|
585
618
|
List the directory at the given URL. Returned path components can be
|
|
586
619
|
joined with '/' onto the passed URL to form new URLs. Those that end in
|
|
@@ -605,7 +638,7 @@ class AbstractJobStore(ABC):
|
|
|
605
638
|
return otherCls._list_url(parseResult)
|
|
606
639
|
|
|
607
640
|
@classmethod
|
|
608
|
-
def read_from_url(cls, src_uri: str, writable: IO[bytes]) ->
|
|
641
|
+
def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
|
|
609
642
|
"""
|
|
610
643
|
Read the given URL and write its content into the given writable stream.
|
|
611
644
|
|
|
@@ -636,6 +669,8 @@ class AbstractJobStore(ABC):
|
|
|
636
669
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
637
670
|
"""
|
|
638
671
|
Return True if the item at the given URL exists, and Flase otherwise.
|
|
672
|
+
|
|
673
|
+
May raise an error if file existence cannot be determined.
|
|
639
674
|
"""
|
|
640
675
|
raise NotImplementedError(f"No implementation for {url}")
|
|
641
676
|
|
|
@@ -663,7 +698,7 @@ class AbstractJobStore(ABC):
|
|
|
663
698
|
|
|
664
699
|
@classmethod
|
|
665
700
|
@abstractmethod
|
|
666
|
-
def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) ->
|
|
701
|
+
def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
|
|
667
702
|
"""
|
|
668
703
|
Reads the contents of the object at the specified location and writes it to the given
|
|
669
704
|
writable stream.
|
|
@@ -683,7 +718,7 @@ class AbstractJobStore(ABC):
|
|
|
683
718
|
|
|
684
719
|
@classmethod
|
|
685
720
|
@abstractmethod
|
|
686
|
-
def _list_url(cls, url: ParseResult) ->
|
|
721
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
687
722
|
"""
|
|
688
723
|
List the contents of the given URL, which may or may not end in '/'
|
|
689
724
|
|
|
@@ -715,7 +750,12 @@ class AbstractJobStore(ABC):
|
|
|
715
750
|
|
|
716
751
|
@classmethod
|
|
717
752
|
@abstractmethod
|
|
718
|
-
def _write_to_url(
|
|
753
|
+
def _write_to_url(
|
|
754
|
+
cls,
|
|
755
|
+
readable: Union[IO[bytes], IO[str]],
|
|
756
|
+
url: ParseResult,
|
|
757
|
+
executable: bool = False,
|
|
758
|
+
) -> None:
|
|
719
759
|
"""
|
|
720
760
|
Reads the contents of the given readable stream and writes it to the object at the
|
|
721
761
|
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
@@ -762,11 +802,11 @@ class AbstractJobStore(ABC):
|
|
|
762
802
|
"""
|
|
763
803
|
raise NotImplementedError()
|
|
764
804
|
|
|
765
|
-
@deprecated(new_function_name=
|
|
766
|
-
def getEnv(self) ->
|
|
805
|
+
@deprecated(new_function_name="get_env")
|
|
806
|
+
def getEnv(self) -> dict[str, str]:
|
|
767
807
|
return self.get_env()
|
|
768
808
|
|
|
769
|
-
def get_env(self) ->
|
|
809
|
+
def get_env(self) -> dict[str, str]:
|
|
770
810
|
"""
|
|
771
811
|
Returns a dictionary of environment variables that this job store requires to be set in
|
|
772
812
|
order to function properly on a worker.
|
|
@@ -777,7 +817,7 @@ class AbstractJobStore(ABC):
|
|
|
777
817
|
|
|
778
818
|
# Cleanup functions
|
|
779
819
|
def clean(
|
|
780
|
-
self, jobCache: Optional[
|
|
820
|
+
self, jobCache: Optional[dict[Union[str, "TemporaryID"], JobDescription]] = None
|
|
781
821
|
) -> JobDescription:
|
|
782
822
|
"""
|
|
783
823
|
Function to cleanup the state of a job store after a restart.
|
|
@@ -805,7 +845,9 @@ class AbstractJobStore(ABC):
|
|
|
805
845
|
return self.load_job(jobId)
|
|
806
846
|
|
|
807
847
|
def haveJob(jobId: str) -> bool:
|
|
808
|
-
assert
|
|
848
|
+
assert (
|
|
849
|
+
len(jobId) > 1
|
|
850
|
+
), f"Job ID {jobId} too short; is a string being used as a list?"
|
|
809
851
|
if jobCache is not None:
|
|
810
852
|
if jobId in jobCache:
|
|
811
853
|
return True
|
|
@@ -825,13 +867,15 @@ class AbstractJobStore(ABC):
|
|
|
825
867
|
jobCache[str(jobDescription.jobStoreID)] = jobDescription
|
|
826
868
|
self.update_job(jobDescription)
|
|
827
869
|
|
|
828
|
-
def getJobDescriptions() ->
|
|
870
|
+
def getJobDescriptions() -> (
|
|
871
|
+
Union[ValuesView[JobDescription], Iterator[JobDescription]]
|
|
872
|
+
):
|
|
829
873
|
if jobCache is not None:
|
|
830
874
|
return jobCache.values()
|
|
831
875
|
else:
|
|
832
876
|
return self.jobs()
|
|
833
877
|
|
|
834
|
-
def get_jobs_reachable_from_root() ->
|
|
878
|
+
def get_jobs_reachable_from_root() -> set[str]:
|
|
835
879
|
"""
|
|
836
880
|
Traverse the job graph from the root job and return a flattened set of all active jobstore IDs.
|
|
837
881
|
|
|
@@ -841,8 +885,7 @@ class AbstractJobStore(ABC):
|
|
|
841
885
|
# Iterate from the root JobDescription and collate all jobs
|
|
842
886
|
# that are reachable from it.
|
|
843
887
|
root_job_description = self.load_root_job()
|
|
844
|
-
reachable_from_root:
|
|
845
|
-
|
|
888
|
+
reachable_from_root: set[str] = set()
|
|
846
889
|
|
|
847
890
|
for merged_in in root_job_description.get_chain():
|
|
848
891
|
# Add the job itself and any other jobs that chained with it.
|
|
@@ -854,7 +897,6 @@ class AbstractJobStore(ABC):
|
|
|
854
897
|
if haveJob(service_job_store_id):
|
|
855
898
|
reachable_from_root.add(service_job_store_id)
|
|
856
899
|
|
|
857
|
-
|
|
858
900
|
# Unprocessed means it might have successor jobs we need to add.
|
|
859
901
|
unprocessed_job_descriptions = [root_job_description]
|
|
860
902
|
|
|
@@ -867,15 +909,24 @@ class AbstractJobStore(ABC):
|
|
|
867
909
|
# exploring them, since we took their successors.
|
|
868
910
|
reachable_from_root.add(merged_in.job_store_id)
|
|
869
911
|
for successor_job_store_id in job_description.allSuccessors():
|
|
870
|
-
if
|
|
871
|
-
|
|
912
|
+
if (
|
|
913
|
+
successor_job_store_id not in reachable_from_root
|
|
914
|
+
and haveJob(successor_job_store_id)
|
|
915
|
+
):
|
|
916
|
+
successor_job_description = getJobDescription(
|
|
917
|
+
successor_job_store_id
|
|
918
|
+
)
|
|
872
919
|
|
|
873
920
|
# Add all of the successor's linked service jobs as well.
|
|
874
|
-
for
|
|
921
|
+
for (
|
|
922
|
+
service_job_store_id
|
|
923
|
+
) in successor_job_description.services:
|
|
875
924
|
if haveJob(service_job_store_id):
|
|
876
925
|
reachable_from_root.add(service_job_store_id)
|
|
877
926
|
|
|
878
|
-
new_job_descriptions_to_process.append(
|
|
927
|
+
new_job_descriptions_to_process.append(
|
|
928
|
+
successor_job_description
|
|
929
|
+
)
|
|
879
930
|
unprocessed_job_descriptions = new_job_descriptions_to_process
|
|
880
931
|
|
|
881
932
|
logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
|
|
@@ -885,22 +936,32 @@ class AbstractJobStore(ABC):
|
|
|
885
936
|
|
|
886
937
|
# Cleanup jobs that are not reachable from the root, and therefore orphaned
|
|
887
938
|
# TODO: Avoid reiterating reachable_from_root (which may be very large)
|
|
888
|
-
unreachable = [
|
|
939
|
+
unreachable = [
|
|
940
|
+
x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root
|
|
941
|
+
]
|
|
889
942
|
for jobDescription in unreachable:
|
|
890
943
|
# clean up any associated files before deletion
|
|
891
944
|
for fileID in jobDescription.filesToDelete:
|
|
892
945
|
# Delete any files that should already be deleted
|
|
893
|
-
logger.warning(
|
|
946
|
+
logger.warning(
|
|
947
|
+
f"Deleting file '{fileID}'. It is marked for deletion but has not yet been removed."
|
|
948
|
+
)
|
|
894
949
|
self.delete_file(fileID)
|
|
895
950
|
# Delete the job from us and the cache
|
|
896
951
|
deleteJob(str(jobDescription.jobStoreID))
|
|
897
952
|
|
|
898
|
-
jobDescriptionsReachableFromRoot = {
|
|
953
|
+
jobDescriptionsReachableFromRoot = {
|
|
954
|
+
id: getJobDescription(id) for id in reachable_from_root
|
|
955
|
+
}
|
|
899
956
|
|
|
900
957
|
# Clean up any checkpoint jobs -- delete any successors it
|
|
901
958
|
# may have launched, and restore the job to a pristine state
|
|
902
959
|
jobsDeletedByCheckpoints = set()
|
|
903
|
-
for jobDescription in [
|
|
960
|
+
for jobDescription in [
|
|
961
|
+
desc
|
|
962
|
+
for desc in jobDescriptionsReachableFromRoot.values()
|
|
963
|
+
if isinstance(desc, CheckpointJobDescription)
|
|
964
|
+
]:
|
|
904
965
|
if jobDescription.jobStoreID in jobsDeletedByCheckpoints:
|
|
905
966
|
# This is a checkpoint that was nested within an
|
|
906
967
|
# earlier checkpoint, so it and all its successors are
|
|
@@ -926,8 +987,10 @@ class AbstractJobStore(ABC):
|
|
|
926
987
|
if len(jobDescription.filesToDelete) != 0:
|
|
927
988
|
# Delete any files that should already be deleted
|
|
928
989
|
for fileID in jobDescription.filesToDelete:
|
|
929
|
-
logger.critical(
|
|
930
|
-
|
|
990
|
+
logger.critical(
|
|
991
|
+
"Removing file in job store: %s that was "
|
|
992
|
+
"marked for deletion but not previously removed" % fileID
|
|
993
|
+
)
|
|
931
994
|
self.delete_file(fileID)
|
|
932
995
|
jobDescription.filesToDelete = []
|
|
933
996
|
changed[0] = True
|
|
@@ -940,6 +1003,7 @@ class AbstractJobStore(ABC):
|
|
|
940
1003
|
|
|
941
1004
|
def stackSizeFn() -> int:
|
|
942
1005
|
return len(list(jobDescription.allSuccessors()))
|
|
1006
|
+
|
|
943
1007
|
startStackSize = stackSizeFn()
|
|
944
1008
|
# Remove deleted jobs
|
|
945
1009
|
jobDescription.filterSuccessors(haveJob)
|
|
@@ -965,16 +1029,25 @@ class AbstractJobStore(ABC):
|
|
|
965
1029
|
assert isinstance(serviceJobDescription, ServiceJobDescription)
|
|
966
1030
|
|
|
967
1031
|
if flag == 1:
|
|
968
|
-
logger.debug(
|
|
969
|
-
|
|
1032
|
+
logger.debug(
|
|
1033
|
+
"Recreating a start service flag for job: %s, flag: %s",
|
|
1034
|
+
jobStoreID,
|
|
1035
|
+
newFlag,
|
|
1036
|
+
)
|
|
970
1037
|
serviceJobDescription.startJobStoreID = newFlag
|
|
971
1038
|
elif flag == 2:
|
|
972
|
-
logger.debug(
|
|
973
|
-
|
|
1039
|
+
logger.debug(
|
|
1040
|
+
"Recreating a terminate service flag for job: %s, flag: %s",
|
|
1041
|
+
jobStoreID,
|
|
1042
|
+
newFlag,
|
|
1043
|
+
)
|
|
974
1044
|
serviceJobDescription.terminateJobStoreID = newFlag
|
|
975
1045
|
else:
|
|
976
|
-
logger.debug(
|
|
977
|
-
|
|
1046
|
+
logger.debug(
|
|
1047
|
+
"Recreating a error service flag for job: %s, flag: %s",
|
|
1048
|
+
jobStoreID,
|
|
1049
|
+
newFlag,
|
|
1050
|
+
)
|
|
978
1051
|
assert flag == 3
|
|
979
1052
|
serviceJobDescription.errorJobStoreID = newFlag
|
|
980
1053
|
|
|
@@ -987,6 +1060,7 @@ class AbstractJobStore(ABC):
|
|
|
987
1060
|
|
|
988
1061
|
def servicesSizeFn() -> int:
|
|
989
1062
|
return len(jobDescription.services)
|
|
1063
|
+
|
|
990
1064
|
startServicesSize = servicesSizeFn()
|
|
991
1065
|
|
|
992
1066
|
def replaceFlagsIfNeeded(serviceJobDescription: JobDescription) -> None:
|
|
@@ -1047,12 +1121,14 @@ class AbstractJobStore(ABC):
|
|
|
1047
1121
|
|
|
1048
1122
|
# Remove any crufty stats/logging files from the previous run
|
|
1049
1123
|
logger.debug("Discarding old statistics and logs...")
|
|
1124
|
+
|
|
1050
1125
|
# We have to manually discard the stream to avoid getting
|
|
1051
1126
|
# stuck on a blocking write from the job store.
|
|
1052
1127
|
def discardStream(stream: Union[IO[bytes], IO[str]]) -> None:
|
|
1053
1128
|
"""Read the stream 4K at a time until EOF, discarding all input."""
|
|
1054
1129
|
while len(stream.read(4096)) != 0:
|
|
1055
1130
|
pass
|
|
1131
|
+
|
|
1056
1132
|
self.read_logs(discardStream)
|
|
1057
1133
|
|
|
1058
1134
|
logger.debug("Job store is clean")
|
|
@@ -1064,7 +1140,7 @@ class AbstractJobStore(ABC):
|
|
|
1064
1140
|
# existence of jobs
|
|
1065
1141
|
##########################################
|
|
1066
1142
|
|
|
1067
|
-
@deprecated(new_function_name=
|
|
1143
|
+
@deprecated(new_function_name="assign_job_id")
|
|
1068
1144
|
def assignID(self, jobDescription: JobDescription) -> None:
|
|
1069
1145
|
return self.assign_job_id(jobDescription)
|
|
1070
1146
|
|
|
@@ -1088,7 +1164,7 @@ class AbstractJobStore(ABC):
|
|
|
1088
1164
|
"""
|
|
1089
1165
|
yield
|
|
1090
1166
|
|
|
1091
|
-
@deprecated(new_function_name=
|
|
1167
|
+
@deprecated(new_function_name="create_job")
|
|
1092
1168
|
def create(self, jobDescription: JobDescription) -> JobDescription:
|
|
1093
1169
|
return self.create_job(jobDescription)
|
|
1094
1170
|
|
|
@@ -1104,7 +1180,7 @@ class AbstractJobStore(ABC):
|
|
|
1104
1180
|
"""
|
|
1105
1181
|
raise NotImplementedError()
|
|
1106
1182
|
|
|
1107
|
-
@deprecated(new_function_name=
|
|
1183
|
+
@deprecated(new_function_name="job_exists")
|
|
1108
1184
|
def exists(self, jobStoreID: str) -> bool:
|
|
1109
1185
|
return self.job_exists(jobStoreID)
|
|
1110
1186
|
|
|
@@ -1120,7 +1196,7 @@ class AbstractJobStore(ABC):
|
|
|
1120
1196
|
# One year should be sufficient to finish any pipeline ;-)
|
|
1121
1197
|
publicUrlExpiration = timedelta(days=365)
|
|
1122
1198
|
|
|
1123
|
-
@deprecated(new_function_name=
|
|
1199
|
+
@deprecated(new_function_name="get_public_url")
|
|
1124
1200
|
def getPublicUrl(self, fileName: str) -> str:
|
|
1125
1201
|
return self.get_public_url(fileName)
|
|
1126
1202
|
|
|
@@ -1139,7 +1215,7 @@ class AbstractJobStore(ABC):
|
|
|
1139
1215
|
"""
|
|
1140
1216
|
raise NotImplementedError()
|
|
1141
1217
|
|
|
1142
|
-
@deprecated(new_function_name=
|
|
1218
|
+
@deprecated(new_function_name="get_shared_public_url")
|
|
1143
1219
|
def getSharedPublicUrl(self, sharedFileName: str) -> str:
|
|
1144
1220
|
return self.get_shared_public_url(sharedFileName)
|
|
1145
1221
|
|
|
@@ -1161,7 +1237,7 @@ class AbstractJobStore(ABC):
|
|
|
1161
1237
|
"""
|
|
1162
1238
|
raise NotImplementedError()
|
|
1163
1239
|
|
|
1164
|
-
@deprecated(new_function_name=
|
|
1240
|
+
@deprecated(new_function_name="load_job")
|
|
1165
1241
|
def load(self, jobStoreID: str) -> JobDescription:
|
|
1166
1242
|
return self.load_job(jobStoreID)
|
|
1167
1243
|
|
|
@@ -1181,7 +1257,7 @@ class AbstractJobStore(ABC):
|
|
|
1181
1257
|
"""
|
|
1182
1258
|
raise NotImplementedError()
|
|
1183
1259
|
|
|
1184
|
-
@deprecated(new_function_name=
|
|
1260
|
+
@deprecated(new_function_name="update_job")
|
|
1185
1261
|
def update(self, jobDescription: JobDescription) -> None:
|
|
1186
1262
|
return self.update_job(jobDescription)
|
|
1187
1263
|
|
|
@@ -1196,7 +1272,7 @@ class AbstractJobStore(ABC):
|
|
|
1196
1272
|
"""
|
|
1197
1273
|
raise NotImplementedError()
|
|
1198
1274
|
|
|
1199
|
-
@deprecated(new_function_name=
|
|
1275
|
+
@deprecated(new_function_name="delete_job")
|
|
1200
1276
|
def delete(self, jobStoreID: str) -> None:
|
|
1201
1277
|
return self.delete_job(jobStoreID)
|
|
1202
1278
|
|
|
@@ -1233,12 +1309,19 @@ class AbstractJobStore(ABC):
|
|
|
1233
1309
|
# associated with a given job.
|
|
1234
1310
|
##########################################
|
|
1235
1311
|
|
|
1236
|
-
@deprecated(new_function_name=
|
|
1237
|
-
def writeFile(
|
|
1312
|
+
@deprecated(new_function_name="write_file")
|
|
1313
|
+
def writeFile(
|
|
1314
|
+
self,
|
|
1315
|
+
localFilePath: str,
|
|
1316
|
+
jobStoreID: Optional[str] = None,
|
|
1317
|
+
cleanup: bool = False,
|
|
1318
|
+
) -> str:
|
|
1238
1319
|
return self.write_file(localFilePath, jobStoreID, cleanup)
|
|
1239
1320
|
|
|
1240
1321
|
@abstractmethod
|
|
1241
|
-
def write_file(
|
|
1322
|
+
def write_file(
|
|
1323
|
+
self, local_path: str, job_id: Optional[str] = None, cleanup: bool = False
|
|
1324
|
+
) -> str:
|
|
1242
1325
|
"""
|
|
1243
1326
|
Takes a file (as a path) and places it in this job store. Returns an ID that can be used
|
|
1244
1327
|
to retrieve the file at a later time. The file is written in a atomic manner. It will
|
|
@@ -1269,19 +1352,27 @@ class AbstractJobStore(ABC):
|
|
|
1269
1352
|
"""
|
|
1270
1353
|
raise NotImplementedError()
|
|
1271
1354
|
|
|
1272
|
-
@deprecated(new_function_name=
|
|
1273
|
-
def writeFileStream(
|
|
1274
|
-
|
|
1355
|
+
@deprecated(new_function_name="write_file_stream")
|
|
1356
|
+
def writeFileStream(
|
|
1357
|
+
self,
|
|
1358
|
+
jobStoreID: Optional[str] = None,
|
|
1359
|
+
cleanup: bool = False,
|
|
1360
|
+
basename: Optional[str] = None,
|
|
1361
|
+
encoding: Optional[str] = None,
|
|
1362
|
+
errors: Optional[str] = None,
|
|
1363
|
+
) -> ContextManager[tuple[IO[bytes], str]]:
|
|
1275
1364
|
return self.write_file_stream(jobStoreID, cleanup, basename, encoding, errors)
|
|
1276
1365
|
|
|
1277
1366
|
@abstractmethod
|
|
1278
1367
|
@contextmanager
|
|
1279
|
-
def write_file_stream(
|
|
1280
|
-
|
|
1281
|
-
|
|
1282
|
-
|
|
1283
|
-
|
|
1284
|
-
|
|
1368
|
+
def write_file_stream(
|
|
1369
|
+
self,
|
|
1370
|
+
job_id: Optional[str] = None,
|
|
1371
|
+
cleanup: bool = False,
|
|
1372
|
+
basename: Optional[str] = None,
|
|
1373
|
+
encoding: Optional[str] = None,
|
|
1374
|
+
errors: Optional[str] = None,
|
|
1375
|
+
) -> Iterator[tuple[IO[bytes], str]]:
|
|
1285
1376
|
"""
|
|
1286
1377
|
Similar to writeFile, but returns a context manager yielding a tuple of
|
|
1287
1378
|
1) a file handle which can be written to and 2) the ID of the resulting
|
|
@@ -1320,18 +1411,22 @@ class AbstractJobStore(ABC):
|
|
|
1320
1411
|
"""
|
|
1321
1412
|
raise NotImplementedError()
|
|
1322
1413
|
|
|
1323
|
-
@deprecated(new_function_name=
|
|
1324
|
-
def getEmptyFileStoreID(
|
|
1325
|
-
|
|
1326
|
-
|
|
1327
|
-
|
|
1414
|
+
@deprecated(new_function_name="get_empty_file_store_id")
|
|
1415
|
+
def getEmptyFileStoreID(
|
|
1416
|
+
self,
|
|
1417
|
+
jobStoreID: Optional[str] = None,
|
|
1418
|
+
cleanup: bool = False,
|
|
1419
|
+
basename: Optional[str] = None,
|
|
1420
|
+
) -> str:
|
|
1328
1421
|
return self.get_empty_file_store_id(jobStoreID, cleanup, basename)
|
|
1329
1422
|
|
|
1330
1423
|
@abstractmethod
|
|
1331
|
-
def get_empty_file_store_id(
|
|
1332
|
-
|
|
1333
|
-
|
|
1334
|
-
|
|
1424
|
+
def get_empty_file_store_id(
|
|
1425
|
+
self,
|
|
1426
|
+
job_id: Optional[str] = None,
|
|
1427
|
+
cleanup: bool = False,
|
|
1428
|
+
basename: Optional[str] = None,
|
|
1429
|
+
) -> str:
|
|
1335
1430
|
"""
|
|
1336
1431
|
Creates an empty file in the job store and returns its ID.
|
|
1337
1432
|
Call to fileExists(getEmptyFileStoreID(jobStoreID)) will return True.
|
|
@@ -1353,8 +1448,10 @@ class AbstractJobStore(ABC):
|
|
|
1353
1448
|
"""
|
|
1354
1449
|
raise NotImplementedError()
|
|
1355
1450
|
|
|
1356
|
-
@deprecated(new_function_name=
|
|
1357
|
-
def readFile(
|
|
1451
|
+
@deprecated(new_function_name="read_file")
|
|
1452
|
+
def readFile(
|
|
1453
|
+
self, jobStoreFileID: str, localFilePath: str, symlink: bool = False
|
|
1454
|
+
) -> None:
|
|
1358
1455
|
return self.read_file(jobStoreFileID, localFilePath, symlink)
|
|
1359
1456
|
|
|
1360
1457
|
@abstractmethod
|
|
@@ -1382,7 +1479,7 @@ class AbstractJobStore(ABC):
|
|
|
1382
1479
|
"""
|
|
1383
1480
|
raise NotImplementedError()
|
|
1384
1481
|
|
|
1385
|
-
@deprecated(new_function_name=
|
|
1482
|
+
@deprecated(new_function_name="read_file_stream")
|
|
1386
1483
|
def readFileStream(
|
|
1387
1484
|
self,
|
|
1388
1485
|
jobStoreFileID: str,
|
|
@@ -1397,14 +1494,12 @@ class AbstractJobStore(ABC):
|
|
|
1397
1494
|
file_id: Union[FileID, str],
|
|
1398
1495
|
encoding: Literal[None] = None,
|
|
1399
1496
|
errors: Optional[str] = None,
|
|
1400
|
-
) -> ContextManager[IO[bytes]]:
|
|
1401
|
-
...
|
|
1497
|
+
) -> ContextManager[IO[bytes]]: ...
|
|
1402
1498
|
|
|
1403
1499
|
@overload
|
|
1404
1500
|
def read_file_stream(
|
|
1405
1501
|
self, file_id: Union[FileID, str], encoding: str, errors: Optional[str] = None
|
|
1406
|
-
) -> ContextManager[IO[str]]:
|
|
1407
|
-
...
|
|
1502
|
+
) -> ContextManager[IO[str]]: ...
|
|
1408
1503
|
|
|
1409
1504
|
@abstractmethod
|
|
1410
1505
|
def read_file_stream(
|
|
@@ -1430,7 +1525,7 @@ class AbstractJobStore(ABC):
|
|
|
1430
1525
|
"""
|
|
1431
1526
|
raise NotImplementedError()
|
|
1432
1527
|
|
|
1433
|
-
@deprecated(new_function_name=
|
|
1528
|
+
@deprecated(new_function_name="delete_file")
|
|
1434
1529
|
def deleteFile(self, jobStoreFileID: str) -> None:
|
|
1435
1530
|
return self.delete_file(jobStoreFileID)
|
|
1436
1531
|
|
|
@@ -1444,7 +1539,7 @@ class AbstractJobStore(ABC):
|
|
|
1444
1539
|
"""
|
|
1445
1540
|
raise NotImplementedError()
|
|
1446
1541
|
|
|
1447
|
-
@deprecated(new_function_name=
|
|
1542
|
+
@deprecated(new_function_name="file_exists")
|
|
1448
1543
|
def fileExists(self, jobStoreFileID: str) -> bool:
|
|
1449
1544
|
"""Determine whether a file exists in this job store."""
|
|
1450
1545
|
return self.file_exists(jobStoreFileID)
|
|
@@ -1458,7 +1553,7 @@ class AbstractJobStore(ABC):
|
|
|
1458
1553
|
"""
|
|
1459
1554
|
raise NotImplementedError()
|
|
1460
1555
|
|
|
1461
|
-
@deprecated(new_function_name=
|
|
1556
|
+
@deprecated(new_function_name="get_file_size")
|
|
1462
1557
|
def getFileSize(self, jobStoreFileID: str) -> int:
|
|
1463
1558
|
"""Get the size of the given file in bytes."""
|
|
1464
1559
|
return self.get_file_size(jobStoreFileID)
|
|
@@ -1478,7 +1573,7 @@ class AbstractJobStore(ABC):
|
|
|
1478
1573
|
"""
|
|
1479
1574
|
raise NotImplementedError()
|
|
1480
1575
|
|
|
1481
|
-
@deprecated(new_function_name=
|
|
1576
|
+
@deprecated(new_function_name="update_file")
|
|
1482
1577
|
def updateFile(self, jobStoreFileID: str, localFilePath: str) -> None:
|
|
1483
1578
|
"""Replaces the existing version of a file in the job store."""
|
|
1484
1579
|
return self.update_file(jobStoreFileID, localFilePath)
|
|
@@ -1499,19 +1594,20 @@ class AbstractJobStore(ABC):
|
|
|
1499
1594
|
"""
|
|
1500
1595
|
raise NotImplementedError()
|
|
1501
1596
|
|
|
1502
|
-
@deprecated(new_function_name=
|
|
1503
|
-
def updateFileStream(
|
|
1504
|
-
|
|
1505
|
-
|
|
1506
|
-
|
|
1597
|
+
@deprecated(new_function_name="update_file_stream")
|
|
1598
|
+
def updateFileStream(
|
|
1599
|
+
self,
|
|
1600
|
+
jobStoreFileID: str,
|
|
1601
|
+
encoding: Optional[str] = None,
|
|
1602
|
+
errors: Optional[str] = None,
|
|
1603
|
+
) -> ContextManager[IO[Any]]:
|
|
1507
1604
|
return self.update_file_stream(jobStoreFileID, encoding, errors)
|
|
1508
1605
|
|
|
1509
1606
|
@abstractmethod
|
|
1510
1607
|
@contextmanager
|
|
1511
|
-
def update_file_stream(
|
|
1512
|
-
|
|
1513
|
-
|
|
1514
|
-
errors: Optional[str] = None) -> Iterator[IO[Any]]:
|
|
1608
|
+
def update_file_stream(
|
|
1609
|
+
self, file_id: str, encoding: Optional[str] = None, errors: Optional[str] = None
|
|
1610
|
+
) -> Iterator[IO[Any]]:
|
|
1515
1611
|
"""
|
|
1516
1612
|
Replaces the existing version of a file in the job store. Similar to writeFile, but
|
|
1517
1613
|
returns a context manager yielding a file handle which can be written to. The
|
|
@@ -1537,20 +1633,29 @@ class AbstractJobStore(ABC):
|
|
|
1537
1633
|
# with specific jobs.
|
|
1538
1634
|
##########################################
|
|
1539
1635
|
|
|
1540
|
-
sharedFileNameRegex = re.compile(r
|
|
1636
|
+
sharedFileNameRegex = re.compile(r"^[a-zA-Z0-9._-]+$")
|
|
1541
1637
|
|
|
1542
|
-
@deprecated(new_function_name=
|
|
1543
|
-
def writeSharedFileStream(
|
|
1544
|
-
|
|
1545
|
-
|
|
1638
|
+
@deprecated(new_function_name="write_shared_file_stream")
|
|
1639
|
+
def writeSharedFileStream(
|
|
1640
|
+
self,
|
|
1641
|
+
sharedFileName: str,
|
|
1642
|
+
isProtected: Optional[bool] = None,
|
|
1643
|
+
encoding: Optional[str] = None,
|
|
1644
|
+
errors: Optional[str] = None,
|
|
1645
|
+
) -> ContextManager[IO[bytes]]:
|
|
1646
|
+
return self.write_shared_file_stream(
|
|
1647
|
+
sharedFileName, isProtected, encoding, errors
|
|
1648
|
+
)
|
|
1546
1649
|
|
|
1547
1650
|
@abstractmethod
|
|
1548
1651
|
@contextmanager
|
|
1549
|
-
def write_shared_file_stream(
|
|
1550
|
-
|
|
1551
|
-
|
|
1552
|
-
|
|
1553
|
-
|
|
1652
|
+
def write_shared_file_stream(
|
|
1653
|
+
self,
|
|
1654
|
+
shared_file_name: str,
|
|
1655
|
+
encrypted: Optional[bool] = None,
|
|
1656
|
+
encoding: Optional[str] = None,
|
|
1657
|
+
errors: Optional[str] = None,
|
|
1658
|
+
) -> Iterator[IO[bytes]]:
|
|
1554
1659
|
"""
|
|
1555
1660
|
Returns a context manager yielding a writable file handle to the global file referenced
|
|
1556
1661
|
by the given name. File will be created in an atomic manner.
|
|
@@ -1575,19 +1680,23 @@ class AbstractJobStore(ABC):
|
|
|
1575
1680
|
"""
|
|
1576
1681
|
raise NotImplementedError()
|
|
1577
1682
|
|
|
1578
|
-
@deprecated(new_function_name=
|
|
1579
|
-
def readSharedFileStream(
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
|
|
1683
|
+
@deprecated(new_function_name="read_shared_file_stream")
|
|
1684
|
+
def readSharedFileStream(
|
|
1685
|
+
self,
|
|
1686
|
+
sharedFileName: str,
|
|
1687
|
+
encoding: Optional[str] = None,
|
|
1688
|
+
errors: Optional[str] = None,
|
|
1689
|
+
) -> ContextManager[IO[bytes]]:
|
|
1583
1690
|
return self.read_shared_file_stream(sharedFileName, encoding, errors)
|
|
1584
1691
|
|
|
1585
1692
|
@abstractmethod
|
|
1586
1693
|
@contextmanager
|
|
1587
|
-
def read_shared_file_stream(
|
|
1588
|
-
|
|
1589
|
-
|
|
1590
|
-
|
|
1694
|
+
def read_shared_file_stream(
|
|
1695
|
+
self,
|
|
1696
|
+
shared_file_name: str,
|
|
1697
|
+
encoding: Optional[str] = None,
|
|
1698
|
+
errors: Optional[str] = None,
|
|
1699
|
+
) -> Iterator[IO[bytes]]:
|
|
1591
1700
|
"""
|
|
1592
1701
|
Returns a context manager yielding a readable file handle to the global file referenced
|
|
1593
1702
|
by the given name.
|
|
@@ -1606,7 +1715,7 @@ class AbstractJobStore(ABC):
|
|
|
1606
1715
|
"""
|
|
1607
1716
|
raise NotImplementedError()
|
|
1608
1717
|
|
|
1609
|
-
@deprecated(new_function_name=
|
|
1718
|
+
@deprecated(new_function_name="write_logs")
|
|
1610
1719
|
def writeStatsAndLogging(self, statsAndLoggingString: str) -> None:
|
|
1611
1720
|
return self.write_logs(statsAndLoggingString)
|
|
1612
1721
|
|
|
@@ -1622,8 +1731,10 @@ class AbstractJobStore(ABC):
|
|
|
1622
1731
|
"""
|
|
1623
1732
|
raise NotImplementedError()
|
|
1624
1733
|
|
|
1625
|
-
@deprecated(new_function_name=
|
|
1626
|
-
def readStatsAndLogging(
|
|
1734
|
+
@deprecated(new_function_name="read_logs")
|
|
1735
|
+
def readStatsAndLogging(
|
|
1736
|
+
self, callback: Callable[..., Any], readAll: bool = False
|
|
1737
|
+
) -> int:
|
|
1627
1738
|
return self.read_logs(callback, readAll)
|
|
1628
1739
|
|
|
1629
1740
|
@abstractmethod
|
|
@@ -1658,8 +1769,8 @@ class AbstractJobStore(ABC):
|
|
|
1658
1769
|
this method. Other methods will rely on always having the most current
|
|
1659
1770
|
pid available. So far there is no reason to store any old pids.
|
|
1660
1771
|
"""
|
|
1661
|
-
with self.write_shared_file_stream(
|
|
1662
|
-
f.write(str(os.getpid()).encode(
|
|
1772
|
+
with self.write_shared_file_stream("pid.log") as f:
|
|
1773
|
+
f.write(str(os.getpid()).encode("utf-8"))
|
|
1663
1774
|
|
|
1664
1775
|
def read_leader_pid(self) -> int:
|
|
1665
1776
|
"""
|
|
@@ -1667,7 +1778,7 @@ class AbstractJobStore(ABC):
|
|
|
1667
1778
|
|
|
1668
1779
|
:raise NoSuchFileException: If the PID file doesn't exist.
|
|
1669
1780
|
"""
|
|
1670
|
-
with self.read_shared_file_stream(
|
|
1781
|
+
with self.read_shared_file_stream("pid.log") as f:
|
|
1671
1782
|
return int(f.read().strip())
|
|
1672
1783
|
|
|
1673
1784
|
def write_leader_node_id(self) -> None:
|
|
@@ -1676,7 +1787,7 @@ class AbstractJobStore(ABC):
|
|
|
1676
1787
|
by the leader.
|
|
1677
1788
|
"""
|
|
1678
1789
|
with self.write_shared_file_stream("leader_node_id.log") as f:
|
|
1679
|
-
f.write(getNodeID().encode(
|
|
1790
|
+
f.write(getNodeID().encode("utf-8"))
|
|
1680
1791
|
|
|
1681
1792
|
def read_leader_node_id(self) -> str:
|
|
1682
1793
|
"""
|
|
@@ -1685,7 +1796,7 @@ class AbstractJobStore(ABC):
|
|
|
1685
1796
|
:raise NoSuchFileException: If the node ID file doesn't exist.
|
|
1686
1797
|
"""
|
|
1687
1798
|
with self.read_shared_file_stream("leader_node_id.log") as f:
|
|
1688
|
-
return f.read().decode(
|
|
1799
|
+
return f.read().decode("utf-8").strip()
|
|
1689
1800
|
|
|
1690
1801
|
def write_kill_flag(self, kill: bool = False) -> None:
|
|
1691
1802
|
"""
|
|
@@ -1698,7 +1809,7 @@ class AbstractJobStore(ABC):
|
|
|
1698
1809
|
workers are expected to be cleaned up by the leader.
|
|
1699
1810
|
"""
|
|
1700
1811
|
with self.write_shared_file_stream("_toil_kill_flag") as f:
|
|
1701
|
-
f.write(("YES" if kill else "NO").encode(
|
|
1812
|
+
f.write(("YES" if kill else "NO").encode("utf-8"))
|
|
1702
1813
|
|
|
1703
1814
|
def read_kill_flag(self) -> bool:
|
|
1704
1815
|
"""
|
|
@@ -1739,25 +1850,40 @@ class AbstractJobStore(ABC):
|
|
|
1739
1850
|
if not cls._validateSharedFileName(sharedFileName):
|
|
1740
1851
|
raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
|
|
1741
1852
|
|
|
1853
|
+
|
|
1742
1854
|
class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
1743
1855
|
"""
|
|
1744
1856
|
A mostly fake JobStore to access URLs not really associated with real job
|
|
1745
1857
|
stores.
|
|
1746
1858
|
"""
|
|
1747
1859
|
|
|
1860
|
+
@classmethod
|
|
1861
|
+
def _setup_ftp(cls) -> FtpFsAccess:
|
|
1862
|
+
# FTP connections are not reused. Ideally, a thread should watch any reused FTP connections
|
|
1863
|
+
# and close them when necessary
|
|
1864
|
+
return FtpFsAccess()
|
|
1865
|
+
|
|
1748
1866
|
@classmethod
|
|
1749
1867
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
1750
|
-
return url.scheme.lower() in (
|
|
1868
|
+
return url.scheme.lower() in ("http", "https", "ftp") and not export
|
|
1751
1869
|
|
|
1752
1870
|
@classmethod
|
|
1753
1871
|
def _url_exists(cls, url: ParseResult) -> bool:
|
|
1872
|
+
# Deal with FTP first to support user/password auth
|
|
1873
|
+
if url.scheme.lower() == "ftp":
|
|
1874
|
+
ftp = cls._setup_ftp()
|
|
1875
|
+
return ftp.exists(url.geturl())
|
|
1876
|
+
|
|
1754
1877
|
try:
|
|
1755
|
-
|
|
1756
|
-
with cls._open_url(url):
|
|
1878
|
+
with closing(urlopen(Request(url.geturl(), method="HEAD"))):
|
|
1757
1879
|
return True
|
|
1758
|
-
except:
|
|
1759
|
-
|
|
1760
|
-
|
|
1880
|
+
except HTTPError as e:
|
|
1881
|
+
if e.code in (404, 410):
|
|
1882
|
+
return False
|
|
1883
|
+
else:
|
|
1884
|
+
raise
|
|
1885
|
+
# Any other errors we should pass through because something really went
|
|
1886
|
+
# wrong (e.g. server is broken today but file may usually exist)
|
|
1761
1887
|
|
|
1762
1888
|
@classmethod
|
|
1763
1889
|
@retry(
|
|
@@ -1767,17 +1893,19 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1767
1893
|
]
|
|
1768
1894
|
)
|
|
1769
1895
|
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
1770
|
-
if url.scheme.lower() ==
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
|
|
1775
|
-
|
|
1896
|
+
if url.scheme.lower() == "ftp":
|
|
1897
|
+
ftp = cls._setup_ftp()
|
|
1898
|
+
return ftp.size(url.geturl())
|
|
1899
|
+
|
|
1900
|
+
# just read the header for content length
|
|
1901
|
+
resp = urlopen(Request(url.geturl(), method="HEAD"))
|
|
1902
|
+
size = resp.info().get("content-length")
|
|
1903
|
+
return int(size) if size is not None else None
|
|
1776
1904
|
|
|
1777
1905
|
@classmethod
|
|
1778
1906
|
def _read_from_url(
|
|
1779
1907
|
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1780
|
-
) ->
|
|
1908
|
+
) -> tuple[int, bool]:
|
|
1781
1909
|
# We can't actually retry after we start writing.
|
|
1782
1910
|
# TODO: Implement retry with byte range requests
|
|
1783
1911
|
with cls._open_url(url) as readable:
|
|
@@ -1786,8 +1914,10 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1786
1914
|
# nested function can modify it without creating its own
|
|
1787
1915
|
# local with the same name.
|
|
1788
1916
|
size = [0]
|
|
1917
|
+
|
|
1789
1918
|
def count(l: int) -> None:
|
|
1790
1919
|
size[0] += l
|
|
1920
|
+
|
|
1791
1921
|
counter = WriteWatchingStream(writable)
|
|
1792
1922
|
counter.onWrite(count)
|
|
1793
1923
|
|
|
@@ -1799,18 +1929,32 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1799
1929
|
@retry(
|
|
1800
1930
|
errors=[
|
|
1801
1931
|
BadStatusLine,
|
|
1802
|
-
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1932
|
+
ErrorCondition(error=HTTPError, error_codes=[408, 429, 500, 502, 503]),
|
|
1803
1933
|
]
|
|
1804
1934
|
)
|
|
1805
1935
|
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
1936
|
+
# Deal with FTP first so we support user/password auth
|
|
1937
|
+
if url.scheme.lower() == "ftp":
|
|
1938
|
+
ftp = cls._setup_ftp()
|
|
1939
|
+
# we open in read mode as write mode is not supported
|
|
1940
|
+
return ftp.open(url.geturl(), mode="r")
|
|
1941
|
+
|
|
1806
1942
|
try:
|
|
1807
1943
|
return cast(IO[bytes], closing(urlopen(url.geturl())))
|
|
1808
1944
|
except HTTPError as e:
|
|
1809
|
-
if e.code
|
|
1945
|
+
if e.code in (404, 410):
|
|
1810
1946
|
# Translate into a FileNotFoundError for detecting
|
|
1811
|
-
#
|
|
1947
|
+
# known nonexistent files
|
|
1812
1948
|
raise FileNotFoundError(str(url)) from e
|
|
1813
1949
|
else:
|
|
1950
|
+
# Other codes indicate a real problem with the server; we don't
|
|
1951
|
+
# want to e.g. run a workflow without an optional input that
|
|
1952
|
+
# the user specified a path to just because the server was
|
|
1953
|
+
# busy.
|
|
1954
|
+
|
|
1955
|
+
# Sometimes we expect to see this when polling existence for
|
|
1956
|
+
# inputs at guessed paths, so don't complain *too* loudly here.
|
|
1957
|
+
logger.debug("Unusual status %d for URL %s", e.code, str(url))
|
|
1814
1958
|
raise
|
|
1815
1959
|
|
|
1816
1960
|
@classmethod
|
|
@@ -1819,6 +1963,6 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1819
1963
|
return False
|
|
1820
1964
|
|
|
1821
1965
|
@classmethod
|
|
1822
|
-
def _list_url(cls, url: ParseResult) ->
|
|
1966
|
+
def _list_url(cls, url: ParseResult) -> list[str]:
|
|
1823
1967
|
# TODO: Implement HTTP index parsing and FTP directory listing
|
|
1824
1968
|
raise NotImplementedError("HTTP and FTP URLs cannot yet be listed")
|