toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/job.py
CHANGED
|
@@ -11,6 +11,8 @@
|
|
|
11
11
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
14
16
|
import collections
|
|
15
17
|
import copy
|
|
16
18
|
import importlib
|
|
@@ -27,56 +29,59 @@ from abc import ABCMeta, abstractmethod
|
|
|
27
29
|
from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, Namespace
|
|
28
30
|
from contextlib import contextmanager
|
|
29
31
|
from io import BytesIO
|
|
30
|
-
from typing import (
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
32
|
+
from typing import (
|
|
33
|
+
TYPE_CHECKING,
|
|
34
|
+
Any,
|
|
35
|
+
Callable,
|
|
36
|
+
Dict,
|
|
37
|
+
Iterator,
|
|
38
|
+
List,
|
|
39
|
+
Mapping,
|
|
40
|
+
NamedTuple,
|
|
41
|
+
Optional,
|
|
42
|
+
Sequence,
|
|
43
|
+
Tuple,
|
|
44
|
+
TypeVar,
|
|
45
|
+
Union,
|
|
46
|
+
cast,
|
|
47
|
+
overload,
|
|
48
|
+
TypedDict,
|
|
49
|
+
Literal,
|
|
50
|
+
)
|
|
51
|
+
from urllib.error import HTTPError
|
|
52
|
+
from urllib.parse import urlsplit, unquote, urljoin
|
|
53
|
+
|
|
54
|
+
from toil import memoize
|
|
46
55
|
|
|
56
|
+
import dill
|
|
47
57
|
from configargparse import ArgParser
|
|
48
58
|
|
|
49
|
-
from toil.
|
|
50
|
-
from toil.lib.compatibility import deprecated
|
|
51
|
-
|
|
52
|
-
if sys.version_info >= (3, 8):
|
|
53
|
-
from typing import TypedDict
|
|
54
|
-
else:
|
|
55
|
-
from typing_extensions import TypedDict
|
|
56
|
-
|
|
57
|
-
import dill
|
|
58
|
-
# TODO: When this gets into the standard library, get it from there and drop
|
|
59
|
-
# typing-extensions dependency on Pythons that are new enough.
|
|
60
|
-
from typing_extensions import NotRequired
|
|
59
|
+
from toil.lib.io import is_remote_url
|
|
61
60
|
|
|
62
|
-
if sys.version_info
|
|
63
|
-
from
|
|
61
|
+
if sys.version_info < (3, 11):
|
|
62
|
+
from typing_extensions import NotRequired
|
|
64
63
|
else:
|
|
65
|
-
from
|
|
64
|
+
from typing import NotRequired
|
|
66
65
|
|
|
66
|
+
from toil.bus import Names
|
|
67
67
|
from toil.common import Config, Toil, addOptions, safeUnpickleFromStream
|
|
68
68
|
from toil.deferred import DeferredFunction
|
|
69
69
|
from toil.fileStores import FileID
|
|
70
|
+
from toil.lib.compatibility import deprecated
|
|
70
71
|
from toil.lib.conversions import bytes2human, human2bytes
|
|
71
72
|
from toil.lib.expando import Expando
|
|
72
73
|
from toil.lib.resources import ResourceMonitor
|
|
73
74
|
from toil.resource import ModuleDescriptor
|
|
74
75
|
from toil.statsAndLogging import set_logging_from_options
|
|
75
76
|
|
|
77
|
+
from toil.lib.exceptions import UnimplementedURLException
|
|
78
|
+
|
|
76
79
|
if TYPE_CHECKING:
|
|
77
80
|
from optparse import OptionParser
|
|
78
81
|
|
|
79
|
-
from toil.batchSystems.abstractBatchSystem import
|
|
82
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
83
|
+
BatchJobExitReason
|
|
84
|
+
)
|
|
80
85
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
81
86
|
from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
82
87
|
|
|
@@ -122,24 +127,28 @@ class ConflictingPredecessorError(Exception):
|
|
|
122
127
|
f'The given job: "{predecessor.description}" is already a predecessor of job: "{successor.description}".'
|
|
123
128
|
)
|
|
124
129
|
|
|
130
|
+
|
|
125
131
|
class DebugStoppingPointReached(BaseException):
|
|
126
132
|
"""
|
|
127
133
|
Raised when a job reaches a point at which it has been instructed to stop for debugging.
|
|
128
134
|
"""
|
|
129
|
-
|
|
135
|
+
|
|
130
136
|
|
|
131
137
|
class FilesDownloadedStoppingPointReached(DebugStoppingPointReached):
|
|
132
138
|
"""
|
|
133
139
|
Raised when a job stops because it was asked to download its files, and the files are downloaded.
|
|
134
140
|
"""
|
|
135
141
|
|
|
136
|
-
def __init__(
|
|
142
|
+
def __init__(
|
|
143
|
+
self, message, host_and_job_paths: Optional[list[tuple[str, str]]] = None
|
|
144
|
+
):
|
|
137
145
|
super().__init__(message)
|
|
138
146
|
|
|
139
147
|
# Save the host and user-code-visible paths of files, in case we're
|
|
140
148
|
# using a container and they are different.
|
|
141
149
|
self.host_and_job_paths = host_and_job_paths
|
|
142
150
|
|
|
151
|
+
|
|
143
152
|
class TemporaryID:
|
|
144
153
|
"""
|
|
145
154
|
Placeholder for a unregistered job ID used by a JobDescription.
|
|
@@ -161,7 +170,7 @@ class TemporaryID:
|
|
|
161
170
|
return self.__repr__()
|
|
162
171
|
|
|
163
172
|
def __repr__(self) -> str:
|
|
164
|
-
return f
|
|
173
|
+
return f"TemporaryID({self._value})"
|
|
165
174
|
|
|
166
175
|
def __hash__(self) -> int:
|
|
167
176
|
return hash(self._value)
|
|
@@ -172,6 +181,7 @@ class TemporaryID:
|
|
|
172
181
|
def __ne__(self, other: Any) -> bool:
|
|
173
182
|
return not isinstance(other, TemporaryID) or self._value != other._value
|
|
174
183
|
|
|
184
|
+
|
|
175
185
|
class AcceleratorRequirement(TypedDict):
|
|
176
186
|
"""Requirement for one or more computational accelerators, like a GPU or FPGA."""
|
|
177
187
|
|
|
@@ -210,7 +220,10 @@ class AcceleratorRequirement(TypedDict):
|
|
|
210
220
|
|
|
211
221
|
# TODO: support requesting any GPU with X amount of vram
|
|
212
222
|
|
|
213
|
-
|
|
223
|
+
|
|
224
|
+
def parse_accelerator(
|
|
225
|
+
spec: Union[int, str, dict[str, Union[str, int]]]
|
|
226
|
+
) -> AcceleratorRequirement:
|
|
214
227
|
"""
|
|
215
228
|
Parse an AcceleratorRequirement specified by user code.
|
|
216
229
|
|
|
@@ -247,16 +260,16 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
|
|
|
247
260
|
:raises ValueError: if it gets something it can't parse
|
|
248
261
|
:raises TypeError: if it gets something it can't parse because it's the wrong type.
|
|
249
262
|
"""
|
|
250
|
-
KINDS = {
|
|
251
|
-
BRANDS = {
|
|
252
|
-
APIS = {
|
|
263
|
+
KINDS = {"gpu"}
|
|
264
|
+
BRANDS = {"nvidia", "amd"}
|
|
265
|
+
APIS = {"cuda", "rocm", "opencl"}
|
|
253
266
|
|
|
254
|
-
parsed: AcceleratorRequirement = {
|
|
267
|
+
parsed: AcceleratorRequirement = {"count": 1, "kind": "gpu"}
|
|
255
268
|
|
|
256
269
|
if isinstance(spec, int):
|
|
257
|
-
parsed[
|
|
270
|
+
parsed["count"] = spec
|
|
258
271
|
elif isinstance(spec, str):
|
|
259
|
-
parts = spec.split(
|
|
272
|
+
parts = spec.split(":")
|
|
260
273
|
|
|
261
274
|
if len(parts) > 2:
|
|
262
275
|
raise ValueError("Could not parse AcceleratorRequirement: " + spec)
|
|
@@ -265,7 +278,7 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
|
|
|
265
278
|
|
|
266
279
|
try:
|
|
267
280
|
# If they have : and then a count, or just a count, handle that.
|
|
268
|
-
parsed[
|
|
281
|
+
parsed["count"] = int(possible_count)
|
|
269
282
|
if len(parts) > 1:
|
|
270
283
|
# Then we take whatever was before the colon as text
|
|
271
284
|
possible_description = parts[0]
|
|
@@ -275,73 +288,97 @@ def parse_accelerator(spec: Union[int, str, Dict[str, Union[str, int]]]) -> Acce
|
|
|
275
288
|
# It doesn't end with a number
|
|
276
289
|
if len(parts) == 2:
|
|
277
290
|
# We should have a number though.
|
|
278
|
-
raise ValueError(
|
|
291
|
+
raise ValueError(
|
|
292
|
+
"Could not parse AcceleratorRequirement count in: " + spec
|
|
293
|
+
)
|
|
279
294
|
else:
|
|
280
295
|
# Must be just the description
|
|
281
296
|
possible_description = possible_count
|
|
282
297
|
|
|
283
298
|
# Determine if we have a kind, brand, API, or (by default) model
|
|
284
299
|
if possible_description in KINDS:
|
|
285
|
-
parsed[
|
|
300
|
+
parsed["kind"] = possible_description
|
|
286
301
|
elif possible_description in BRANDS:
|
|
287
|
-
parsed[
|
|
302
|
+
parsed["brand"] = possible_description
|
|
288
303
|
elif possible_description in APIS:
|
|
289
|
-
parsed[
|
|
304
|
+
parsed["api"] = possible_description
|
|
290
305
|
else:
|
|
291
306
|
if possible_description is not None:
|
|
292
|
-
parsed[
|
|
307
|
+
parsed["model"] = possible_description
|
|
293
308
|
elif isinstance(spec, dict):
|
|
294
309
|
# It's a dict, so merge with the defaults.
|
|
295
310
|
parsed.update(spec)
|
|
296
311
|
# TODO: make sure they didn't misspell keys or something
|
|
297
312
|
else:
|
|
298
|
-
raise TypeError(
|
|
313
|
+
raise TypeError(
|
|
314
|
+
f"Cannot parse value of type {type(spec)} as an AcceleratorRequirement"
|
|
315
|
+
)
|
|
299
316
|
|
|
300
|
-
if parsed[
|
|
317
|
+
if parsed["kind"] == "gpu":
|
|
301
318
|
# Use some smarts about what current GPUs are like to elaborate the
|
|
302
319
|
# description.
|
|
303
320
|
|
|
304
|
-
if
|
|
321
|
+
if "brand" not in parsed and "model" in parsed:
|
|
305
322
|
# Try to guess the brand from the model
|
|
306
323
|
for brand in BRANDS:
|
|
307
|
-
if parsed[
|
|
324
|
+
if parsed["model"].startswith(brand):
|
|
308
325
|
# The model often starts with the brand
|
|
309
|
-
parsed[
|
|
326
|
+
parsed["brand"] = brand
|
|
310
327
|
break
|
|
311
328
|
|
|
312
|
-
if
|
|
329
|
+
if "brand" not in parsed and "api" in parsed:
|
|
313
330
|
# Try to guess the brand from the API
|
|
314
|
-
if parsed[
|
|
331
|
+
if parsed["api"] == "cuda":
|
|
315
332
|
# Only nvidia makes cuda cards
|
|
316
|
-
parsed[
|
|
317
|
-
elif parsed[
|
|
333
|
+
parsed["brand"] = "nvidia"
|
|
334
|
+
elif parsed["api"] == "rocm":
|
|
318
335
|
# Only amd makes rocm cards
|
|
319
|
-
parsed[
|
|
336
|
+
parsed["brand"] = "amd"
|
|
320
337
|
|
|
321
338
|
return parsed
|
|
322
339
|
|
|
323
|
-
|
|
340
|
+
|
|
341
|
+
def accelerator_satisfies(
|
|
342
|
+
candidate: AcceleratorRequirement,
|
|
343
|
+
requirement: AcceleratorRequirement,
|
|
344
|
+
ignore: list[str] = [],
|
|
345
|
+
) -> bool:
|
|
324
346
|
"""
|
|
325
347
|
Test if candidate partially satisfies the given requirement.
|
|
326
348
|
|
|
327
349
|
:returns: True if the given candidate at least partially satisfies the
|
|
328
350
|
given requirement (i.e. check all fields other than count).
|
|
329
351
|
"""
|
|
330
|
-
for key in [
|
|
352
|
+
for key in ["kind", "brand", "api", "model"]:
|
|
331
353
|
if key in ignore:
|
|
332
354
|
# Skip this aspect.
|
|
333
355
|
continue
|
|
334
356
|
if key in requirement:
|
|
335
357
|
if key not in candidate:
|
|
336
|
-
logger.debug(
|
|
358
|
+
logger.debug(
|
|
359
|
+
"Candidate %s does not satisfy requirement %s because it does not have a %s",
|
|
360
|
+
candidate,
|
|
361
|
+
requirement,
|
|
362
|
+
key,
|
|
363
|
+
)
|
|
337
364
|
return False
|
|
338
365
|
if candidate[key] != requirement[key]:
|
|
339
|
-
logger.debug(
|
|
366
|
+
logger.debug(
|
|
367
|
+
"Candidate %s does not satisfy requirement %s because it does not have the correct %s",
|
|
368
|
+
candidate,
|
|
369
|
+
requirement,
|
|
370
|
+
key,
|
|
371
|
+
)
|
|
340
372
|
return False
|
|
341
373
|
# If all these match or are more specific than required, we match!
|
|
342
374
|
return True
|
|
343
375
|
|
|
344
|
-
|
|
376
|
+
|
|
377
|
+
def accelerators_fully_satisfy(
|
|
378
|
+
candidates: Optional[list[AcceleratorRequirement]],
|
|
379
|
+
requirement: AcceleratorRequirement,
|
|
380
|
+
ignore: list[str] = [],
|
|
381
|
+
) -> bool:
|
|
345
382
|
"""
|
|
346
383
|
Determine if a set of accelerators satisfy a requirement.
|
|
347
384
|
|
|
@@ -352,21 +389,22 @@ def accelerators_fully_satisfy(candidates: Optional[List[AcceleratorRequirement]
|
|
|
352
389
|
together (i.e. check all fields including count).
|
|
353
390
|
"""
|
|
354
391
|
|
|
355
|
-
count_remaining = requirement[
|
|
392
|
+
count_remaining = requirement["count"]
|
|
356
393
|
|
|
357
394
|
if candidates:
|
|
358
395
|
for candidate in candidates:
|
|
359
396
|
if accelerator_satisfies(candidate, requirement, ignore=ignore):
|
|
360
|
-
if candidate[
|
|
397
|
+
if candidate["count"] > count_remaining:
|
|
361
398
|
# We found all the matching accelerators we need
|
|
362
399
|
count_remaining = 0
|
|
363
400
|
break
|
|
364
401
|
else:
|
|
365
|
-
count_remaining -= candidate[
|
|
402
|
+
count_remaining -= candidate["count"]
|
|
366
403
|
|
|
367
404
|
# If we have no count left we are fully satisfied
|
|
368
405
|
return count_remaining == 0
|
|
369
406
|
|
|
407
|
+
|
|
370
408
|
class RequirementsDict(TypedDict):
|
|
371
409
|
"""
|
|
372
410
|
Typed storage for requirements for a job.
|
|
@@ -377,22 +415,35 @@ class RequirementsDict(TypedDict):
|
|
|
377
415
|
cores: NotRequired[Union[int, float]]
|
|
378
416
|
memory: NotRequired[int]
|
|
379
417
|
disk: NotRequired[int]
|
|
380
|
-
accelerators: NotRequired[
|
|
418
|
+
accelerators: NotRequired[list[AcceleratorRequirement]]
|
|
381
419
|
preemptible: NotRequired[bool]
|
|
382
420
|
|
|
421
|
+
|
|
383
422
|
# These must be all the key names in RequirementsDict
|
|
384
423
|
REQUIREMENT_NAMES = ["disk", "memory", "cores", "accelerators", "preemptible"]
|
|
385
424
|
|
|
386
425
|
# This is the supertype of all value types in RequirementsDict
|
|
387
|
-
ParsedRequirement = Union[int, float, bool,
|
|
426
|
+
ParsedRequirement = Union[int, float, bool, list[AcceleratorRequirement]]
|
|
388
427
|
|
|
389
428
|
# We define some types for things we can parse into different kind of requirements
|
|
390
429
|
ParseableIndivisibleResource = Union[str, int]
|
|
391
430
|
ParseableDivisibleResource = Union[str, int, float]
|
|
392
431
|
ParseableFlag = Union[str, int, bool]
|
|
393
|
-
ParseableAcceleratorRequirement = Union[
|
|
432
|
+
ParseableAcceleratorRequirement = Union[
|
|
433
|
+
str,
|
|
434
|
+
int,
|
|
435
|
+
Mapping[str, Any],
|
|
436
|
+
AcceleratorRequirement,
|
|
437
|
+
Sequence[Union[str, int, Mapping[str, Any], AcceleratorRequirement]],
|
|
438
|
+
]
|
|
439
|
+
|
|
440
|
+
ParseableRequirement = Union[
|
|
441
|
+
ParseableIndivisibleResource,
|
|
442
|
+
ParseableDivisibleResource,
|
|
443
|
+
ParseableFlag,
|
|
444
|
+
ParseableAcceleratorRequirement,
|
|
445
|
+
]
|
|
394
446
|
|
|
395
|
-
ParseableRequirement = Union[ParseableIndivisibleResource, ParseableDivisibleResource, ParseableFlag, ParseableAcceleratorRequirement]
|
|
396
447
|
|
|
397
448
|
class Requirer:
|
|
398
449
|
"""
|
|
@@ -403,9 +454,7 @@ class Requirer:
|
|
|
403
454
|
|
|
404
455
|
_requirementOverrides: RequirementsDict
|
|
405
456
|
|
|
406
|
-
def __init__(
|
|
407
|
-
self, requirements: Mapping[str, ParseableRequirement]
|
|
408
|
-
) -> None:
|
|
457
|
+
def __init__(self, requirements: Mapping[str, ParseableRequirement]) -> None:
|
|
409
458
|
"""
|
|
410
459
|
Parse and save the given requirements.
|
|
411
460
|
|
|
@@ -446,12 +495,11 @@ class Requirer:
|
|
|
446
495
|
raise RuntimeError(f"Config assigned multiple times to {self}")
|
|
447
496
|
self._config = config
|
|
448
497
|
|
|
449
|
-
|
|
450
|
-
def __getstate__(self) -> Dict[str, Any]:
|
|
498
|
+
def __getstate__(self) -> dict[str, Any]:
|
|
451
499
|
"""Return the dict to use as the instance's __dict__ when pickling."""
|
|
452
500
|
# We want to exclude the config from pickling.
|
|
453
501
|
state = self.__dict__.copy()
|
|
454
|
-
state[
|
|
502
|
+
state["_config"] = None
|
|
455
503
|
return state
|
|
456
504
|
|
|
457
505
|
def __copy__(self) -> "Requirer":
|
|
@@ -492,37 +540,29 @@ class Requirer:
|
|
|
492
540
|
@overload
|
|
493
541
|
@staticmethod
|
|
494
542
|
def _parseResource(
|
|
495
|
-
name: Union[Literal["memory"], Literal["disks"]],
|
|
496
|
-
|
|
497
|
-
|
|
543
|
+
name: Union[Literal["memory"], Literal["disks"]],
|
|
544
|
+
value: ParseableIndivisibleResource,
|
|
545
|
+
) -> int: ...
|
|
498
546
|
|
|
499
547
|
@overload
|
|
500
548
|
@staticmethod
|
|
501
549
|
def _parseResource(
|
|
502
550
|
name: Literal["cores"], value: ParseableDivisibleResource
|
|
503
|
-
) -> Union[int, float]:
|
|
504
|
-
...
|
|
551
|
+
) -> Union[int, float]: ...
|
|
505
552
|
|
|
506
553
|
@overload
|
|
507
554
|
@staticmethod
|
|
508
555
|
def _parseResource(
|
|
509
556
|
name: Literal["accelerators"], value: ParseableAcceleratorRequirement
|
|
510
|
-
) ->
|
|
511
|
-
...
|
|
557
|
+
) -> list[AcceleratorRequirement]: ...
|
|
512
558
|
|
|
513
559
|
@overload
|
|
514
560
|
@staticmethod
|
|
515
|
-
def _parseResource(
|
|
516
|
-
name: str, value: ParseableRequirement
|
|
517
|
-
) -> ParsedRequirement:
|
|
518
|
-
...
|
|
561
|
+
def _parseResource(name: str, value: ParseableRequirement) -> ParsedRequirement: ...
|
|
519
562
|
|
|
520
563
|
@overload
|
|
521
564
|
@staticmethod
|
|
522
|
-
def _parseResource(
|
|
523
|
-
name: str, value: None
|
|
524
|
-
) -> None:
|
|
525
|
-
...
|
|
565
|
+
def _parseResource(name: str, value: None) -> None: ...
|
|
526
566
|
|
|
527
567
|
@staticmethod
|
|
528
568
|
def _parseResource(
|
|
@@ -559,43 +599,53 @@ class Requirer:
|
|
|
559
599
|
# Anything can be None.
|
|
560
600
|
return value
|
|
561
601
|
|
|
562
|
-
if name in (
|
|
602
|
+
if name in ("memory", "disk", "cores"):
|
|
563
603
|
# These should be numbers that accept things like "5G".
|
|
564
604
|
if isinstance(value, (str, bytes)):
|
|
565
605
|
value = human2bytes(value)
|
|
566
606
|
if isinstance(value, int):
|
|
567
607
|
return value
|
|
568
|
-
elif isinstance(value, float) and name ==
|
|
608
|
+
elif isinstance(value, float) and name == "cores":
|
|
569
609
|
# But only cores can be fractional.
|
|
570
610
|
return value
|
|
571
611
|
else:
|
|
572
|
-
raise TypeError(
|
|
573
|
-
|
|
612
|
+
raise TypeError(
|
|
613
|
+
f"The '{name}' requirement does not accept values that are of type {type(value)}"
|
|
614
|
+
)
|
|
615
|
+
elif name == "preemptible":
|
|
574
616
|
if isinstance(value, str):
|
|
575
617
|
if value.lower() == "true":
|
|
576
618
|
return True
|
|
577
619
|
elif value.lower() == "false":
|
|
578
620
|
return False
|
|
579
621
|
else:
|
|
580
|
-
raise ValueError(
|
|
622
|
+
raise ValueError(
|
|
623
|
+
f"The '{name}' requirement, as a string, must be 'true' or 'false' but is {value}"
|
|
624
|
+
)
|
|
581
625
|
elif isinstance(value, int):
|
|
582
626
|
if value == 1:
|
|
583
627
|
return True
|
|
584
628
|
if value == 0:
|
|
585
629
|
return False
|
|
586
630
|
else:
|
|
587
|
-
raise ValueError(
|
|
631
|
+
raise ValueError(
|
|
632
|
+
f"The '{name}' requirement, as an int, must be 1 or 0 but is {value}"
|
|
633
|
+
)
|
|
588
634
|
elif isinstance(value, bool):
|
|
589
635
|
return value
|
|
590
636
|
else:
|
|
591
|
-
raise TypeError(
|
|
592
|
-
|
|
637
|
+
raise TypeError(
|
|
638
|
+
f"The '{name}' requirement does not accept values that are of type {type(value)}"
|
|
639
|
+
)
|
|
640
|
+
elif name == "accelerators":
|
|
593
641
|
# The type checking for this is delegated to the
|
|
594
642
|
# AcceleratorRequirement class.
|
|
595
643
|
if isinstance(value, list):
|
|
596
|
-
return [
|
|
644
|
+
return [
|
|
645
|
+
parse_accelerator(v) for v in value
|
|
646
|
+
] # accelerators={'kind': 'gpu', 'brand': 'nvidia', 'count': 2}
|
|
597
647
|
else:
|
|
598
|
-
return [parse_accelerator(value)]
|
|
648
|
+
return [parse_accelerator(value)] # accelerators=1
|
|
599
649
|
else:
|
|
600
650
|
# Anything else we just pass along without opinons
|
|
601
651
|
return cast(ParsedRequirement, value)
|
|
@@ -618,7 +668,10 @@ class Requirer:
|
|
|
618
668
|
)
|
|
619
669
|
return value
|
|
620
670
|
elif self._config is not None:
|
|
621
|
-
values = [
|
|
671
|
+
values = [
|
|
672
|
+
getattr(self._config, "default_" + requirement, None),
|
|
673
|
+
getattr(self._config, "default" + requirement.capitalize(), None),
|
|
674
|
+
]
|
|
622
675
|
value = values[0] if values[0] is not None else values[1]
|
|
623
676
|
if value is None:
|
|
624
677
|
raise AttributeError(
|
|
@@ -679,10 +732,13 @@ class Requirer:
|
|
|
679
732
|
self._requirementOverrides["preemptible"] = Requirer._parseResource(
|
|
680
733
|
"preemptible", val
|
|
681
734
|
)
|
|
735
|
+
|
|
682
736
|
@property
|
|
683
|
-
def accelerators(self) ->
|
|
737
|
+
def accelerators(self) -> list[AcceleratorRequirement]:
|
|
684
738
|
"""Any accelerators, such as GPUs, that are needed."""
|
|
685
|
-
return cast(
|
|
739
|
+
return cast(
|
|
740
|
+
list[AcceleratorRequirement], self._fetchRequirement("accelerators")
|
|
741
|
+
)
|
|
686
742
|
|
|
687
743
|
@accelerators.setter
|
|
688
744
|
def accelerators(self, val: ParseableAcceleratorRequirement) -> None:
|
|
@@ -705,7 +761,7 @@ class Requirer:
|
|
|
705
761
|
if isinstance(original_value, (int, float)):
|
|
706
762
|
# This is something we actually can scale up and down
|
|
707
763
|
new_value = original_value * factor
|
|
708
|
-
if requirement in (
|
|
764
|
+
if requirement in ("memory", "disk"):
|
|
709
765
|
# Must round to an int
|
|
710
766
|
new_value = math.ceil(new_value)
|
|
711
767
|
setattr(scaled, requirement, new_value)
|
|
@@ -723,29 +779,32 @@ class Requirer:
|
|
|
723
779
|
if isinstance(v, (int, float)) and v > 1000:
|
|
724
780
|
# Make large numbers readable
|
|
725
781
|
v = bytes2human(v)
|
|
726
|
-
parts.append(f
|
|
782
|
+
parts.append(f"{k}: {v}")
|
|
727
783
|
if len(parts) == 0:
|
|
728
|
-
parts = [
|
|
729
|
-
return
|
|
784
|
+
parts = ["no requirements"]
|
|
785
|
+
return ", ".join(parts)
|
|
786
|
+
|
|
730
787
|
|
|
731
788
|
class JobBodyReference(NamedTuple):
|
|
732
789
|
"""
|
|
733
790
|
Reference from a job description to its body.
|
|
734
791
|
"""
|
|
792
|
+
|
|
735
793
|
file_store_id: str
|
|
736
794
|
"""File ID (or special shared file name for the root job) of the job's body."""
|
|
737
|
-
module_string: str
|
|
795
|
+
module_string: str
|
|
738
796
|
"""Stringified description of the module needed to load the body."""
|
|
739
797
|
|
|
798
|
+
|
|
740
799
|
class JobDescription(Requirer):
|
|
741
800
|
"""
|
|
742
801
|
Stores all the information that the Toil Leader ever needs to know about a Job.
|
|
743
|
-
|
|
802
|
+
|
|
744
803
|
This includes:
|
|
745
804
|
* Resource requirements.
|
|
746
805
|
* Which jobs are children or follow-ons or predecessors of this job.
|
|
747
806
|
* A reference to the Job object in the job store.
|
|
748
|
-
|
|
807
|
+
|
|
749
808
|
Can be obtained from an actual (i.e. executable) Job object, and can be
|
|
750
809
|
used to obtain the Job object from the JobStore.
|
|
751
810
|
|
|
@@ -760,8 +819,9 @@ class JobDescription(Requirer):
|
|
|
760
819
|
requirements: Mapping[str, Union[int, str, bool]],
|
|
761
820
|
jobName: str,
|
|
762
821
|
unitName: Optional[str] = "",
|
|
763
|
-
displayName: Optional[str] = "",
|
|
764
|
-
local: Optional[bool] = None
|
|
822
|
+
displayName: Optional[str] = "",
|
|
823
|
+
local: Optional[bool] = None,
|
|
824
|
+
files: Optional[set[FileID]] = None,
|
|
765
825
|
) -> None:
|
|
766
826
|
"""
|
|
767
827
|
Create a new JobDescription.
|
|
@@ -784,6 +844,7 @@ class JobDescription(Requirer):
|
|
|
784
844
|
:param local: If True, the job is meant to use minimal resources but is
|
|
785
845
|
sensitive to execution latency, and so should be executed by the
|
|
786
846
|
leader.
|
|
847
|
+
:param files: Set of FileID objects that the job plans to use.
|
|
787
848
|
"""
|
|
788
849
|
# Set requirements
|
|
789
850
|
super().__init__(requirements)
|
|
@@ -794,10 +855,11 @@ class JobDescription(Requirer):
|
|
|
794
855
|
# Save names, making sure they are strings and not e.g. bytes or None.
|
|
795
856
|
def makeString(x: Union[str, bytes, None]) -> str:
|
|
796
857
|
if isinstance(x, bytes):
|
|
797
|
-
return x.decode(
|
|
858
|
+
return x.decode("utf-8", errors="replace")
|
|
798
859
|
if x is None:
|
|
799
860
|
return ""
|
|
800
861
|
return x
|
|
862
|
+
|
|
801
863
|
self.jobName = makeString(jobName)
|
|
802
864
|
self.unitName = makeString(unitName)
|
|
803
865
|
self.displayName = makeString(displayName)
|
|
@@ -844,7 +906,7 @@ class JobDescription(Requirer):
|
|
|
844
906
|
# chained-in job with its original ID, and also this job's ID with its
|
|
845
907
|
# original names, or is empty if no chaining has happened.
|
|
846
908
|
# The first job in the chain comes first in the list.
|
|
847
|
-
self._merged_job_names:
|
|
909
|
+
self._merged_job_names: list[Names] = []
|
|
848
910
|
|
|
849
911
|
# The number of direct predecessors of the job. Needs to be stored at
|
|
850
912
|
# the JobDescription to support dynamically-created jobs with multiple
|
|
@@ -867,17 +929,17 @@ class JobDescription(Requirer):
|
|
|
867
929
|
|
|
868
930
|
# The IDs of all child jobs of the described job.
|
|
869
931
|
# Children which are done must be removed with filterSuccessors.
|
|
870
|
-
self.childIDs:
|
|
932
|
+
self.childIDs: set[str] = set()
|
|
871
933
|
|
|
872
934
|
# The IDs of all follow-on jobs of the described job.
|
|
873
935
|
# Follow-ons which are done must be removed with filterSuccessors.
|
|
874
|
-
self.followOnIDs:
|
|
936
|
+
self.followOnIDs: set[str] = set()
|
|
875
937
|
|
|
876
938
|
# We keep our own children and follow-ons in a list of successor
|
|
877
939
|
# phases, along with any successors adopted from jobs we have chained
|
|
878
940
|
# from. When we finish our own children and follow-ons, we may have to
|
|
879
941
|
# go back and finish successors for those jobs.
|
|
880
|
-
self.successor_phases:
|
|
942
|
+
self.successor_phases: list[set[str]] = [self.followOnIDs, self.childIDs]
|
|
881
943
|
|
|
882
944
|
# Dict from ServiceHostJob ID to list of child ServiceHostJobs that start after it.
|
|
883
945
|
# All services must have an entry, if only to an empty list.
|
|
@@ -893,13 +955,24 @@ class JobDescription(Requirer):
|
|
|
893
955
|
# And we log who made the version (by PID)
|
|
894
956
|
self._job_version_writer = 0
|
|
895
957
|
|
|
958
|
+
# Store FileIDs that the Job will want to use
|
|
959
|
+
# This currently does not serve much of a purpose except for debugging
|
|
960
|
+
# In the future, this can be used to improve job scheduling, see https://github.com/DataBiosphere/toil/issues/3071
|
|
961
|
+
self.files_to_use = files or set()
|
|
962
|
+
|
|
896
963
|
def get_names(self) -> Names:
|
|
897
964
|
"""
|
|
898
965
|
Get the names and ID of this job as a named tuple.
|
|
899
966
|
"""
|
|
900
|
-
return Names(
|
|
967
|
+
return Names(
|
|
968
|
+
self.jobName,
|
|
969
|
+
self.unitName,
|
|
970
|
+
self.displayName,
|
|
971
|
+
self.displayName,
|
|
972
|
+
str(self.jobStoreID),
|
|
973
|
+
)
|
|
901
974
|
|
|
902
|
-
def get_chain(self) ->
|
|
975
|
+
def get_chain(self) -> list[Names]:
|
|
903
976
|
"""
|
|
904
977
|
Get all the jobs that executed in this job's chain, in order.
|
|
905
978
|
|
|
@@ -914,7 +987,7 @@ class JobDescription(Requirer):
|
|
|
914
987
|
else:
|
|
915
988
|
return list(self._merged_job_names)
|
|
916
989
|
|
|
917
|
-
def serviceHostIDsInBatches(self) -> Iterator[
|
|
990
|
+
def serviceHostIDsInBatches(self) -> Iterator[list[str]]:
|
|
918
991
|
"""
|
|
919
992
|
Find all batches of service host job IDs that can be started at the same time.
|
|
920
993
|
|
|
@@ -955,14 +1028,13 @@ class JobDescription(Requirer):
|
|
|
955
1028
|
"""
|
|
956
1029
|
|
|
957
1030
|
for phase in self.successor_phases:
|
|
958
|
-
|
|
959
|
-
yield successor
|
|
1031
|
+
yield from phase
|
|
960
1032
|
|
|
961
|
-
def successors_by_phase(self) -> Iterator[
|
|
1033
|
+
def successors_by_phase(self) -> Iterator[tuple[int, str]]:
|
|
962
1034
|
"""
|
|
963
|
-
Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase
|
|
1035
|
+
Get an iterator over all child/follow-on/chained inherited successor job IDs, along with their phase number on the stack.
|
|
964
1036
|
|
|
965
|
-
Phases
|
|
1037
|
+
Phases execute higher numbers to lower numbers.
|
|
966
1038
|
"""
|
|
967
1039
|
|
|
968
1040
|
for i, phase in enumerate(self.successor_phases):
|
|
@@ -1003,7 +1075,7 @@ class JobDescription(Requirer):
|
|
|
1003
1075
|
"""
|
|
1004
1076
|
self._body = None
|
|
1005
1077
|
|
|
1006
|
-
def get_body(self) ->
|
|
1078
|
+
def get_body(self) -> tuple[str, ModuleDescriptor]:
|
|
1007
1079
|
"""
|
|
1008
1080
|
Get the information needed to load the job body.
|
|
1009
1081
|
|
|
@@ -1016,9 +1088,11 @@ class JobDescription(Requirer):
|
|
|
1016
1088
|
if not self.has_body():
|
|
1017
1089
|
raise RuntimeError(f"Cannot load the body of a job {self} without one")
|
|
1018
1090
|
|
|
1019
|
-
return self._body.file_store_id, ModuleDescriptor.fromCommand(
|
|
1091
|
+
return self._body.file_store_id, ModuleDescriptor.fromCommand(
|
|
1092
|
+
self._body.module_string
|
|
1093
|
+
)
|
|
1020
1094
|
|
|
1021
|
-
def nextSuccessors(self) -> Optional[
|
|
1095
|
+
def nextSuccessors(self) -> Optional[set[str]]:
|
|
1022
1096
|
"""
|
|
1023
1097
|
Return the collection of job IDs for the successors of this job that are ready to run.
|
|
1024
1098
|
|
|
@@ -1101,7 +1175,9 @@ class JobDescription(Requirer):
|
|
|
1101
1175
|
:returns: True if the job appears to be done, and all related child,
|
|
1102
1176
|
follow-on, and service jobs appear to be finished and removed.
|
|
1103
1177
|
"""
|
|
1104
|
-
return
|
|
1178
|
+
return (
|
|
1179
|
+
not self.has_body() and next(self.successorsAndServiceHosts(), None) is None
|
|
1180
|
+
)
|
|
1105
1181
|
|
|
1106
1182
|
def replace(self, other: "JobDescription") -> None:
|
|
1107
1183
|
"""
|
|
@@ -1120,11 +1196,15 @@ class JobDescription(Requirer):
|
|
|
1120
1196
|
# TODO: We can't join the job graphs with Job._jobGraphsJoined, is that a problem?
|
|
1121
1197
|
|
|
1122
1198
|
# Take all the successors other than this one
|
|
1123
|
-
old_phases = [
|
|
1199
|
+
old_phases = [
|
|
1200
|
+
{i for i in p if i != self.jobStoreID} for p in other.successor_phases
|
|
1201
|
+
]
|
|
1124
1202
|
# And drop empty phases
|
|
1125
1203
|
old_phases = [p for p in old_phases if len(p) > 0]
|
|
1126
1204
|
# And put in front of our existing phases
|
|
1127
|
-
logger.debug(
|
|
1205
|
+
logger.debug(
|
|
1206
|
+
"%s is adopting successor phases from %s of: %s", self, other, old_phases
|
|
1207
|
+
)
|
|
1128
1208
|
self.successor_phases = old_phases + self.successor_phases
|
|
1129
1209
|
|
|
1130
1210
|
# When deleting, we need to delete the files for our old ID, and also
|
|
@@ -1148,9 +1228,13 @@ class JobDescription(Requirer):
|
|
|
1148
1228
|
self.jobStoreID = other.jobStoreID
|
|
1149
1229
|
|
|
1150
1230
|
if len(other.filesToDelete) > 0:
|
|
1151
|
-
raise RuntimeError(
|
|
1231
|
+
raise RuntimeError(
|
|
1232
|
+
"Trying to take on the ID of a job that is in the process of being committed!"
|
|
1233
|
+
)
|
|
1152
1234
|
if len(self.filesToDelete) > 0:
|
|
1153
|
-
raise RuntimeError(
|
|
1235
|
+
raise RuntimeError(
|
|
1236
|
+
"Trying to take on the ID of anothe job while in the process of being committed!"
|
|
1237
|
+
)
|
|
1154
1238
|
|
|
1155
1239
|
self._job_version = other._job_version
|
|
1156
1240
|
self._job_version_writer = os.getpid()
|
|
@@ -1160,7 +1244,9 @@ class JobDescription(Requirer):
|
|
|
1160
1244
|
Make sure this JobDescription is not newer than a prospective new version of the JobDescription.
|
|
1161
1245
|
"""
|
|
1162
1246
|
if other._job_version < self._job_version:
|
|
1163
|
-
raise RuntimeError(
|
|
1247
|
+
raise RuntimeError(
|
|
1248
|
+
f"Cannot replace {self} from PID {self._job_version_writer} with older version {other} from PID {other._job_version_writer}"
|
|
1249
|
+
)
|
|
1164
1250
|
|
|
1165
1251
|
def is_updated_by(self, other: "JobDescription") -> bool:
|
|
1166
1252
|
"""
|
|
@@ -1177,7 +1263,7 @@ class JobDescription(Requirer):
|
|
|
1177
1263
|
other._job_version_writer,
|
|
1178
1264
|
self.jobStoreID,
|
|
1179
1265
|
self,
|
|
1180
|
-
self._job_version_writer
|
|
1266
|
+
self._job_version_writer,
|
|
1181
1267
|
)
|
|
1182
1268
|
return False
|
|
1183
1269
|
|
|
@@ -1189,7 +1275,7 @@ class JobDescription(Requirer):
|
|
|
1189
1275
|
other,
|
|
1190
1276
|
other._job_version_writer,
|
|
1191
1277
|
self,
|
|
1192
|
-
self._job_version_writer
|
|
1278
|
+
self._job_version_writer,
|
|
1193
1279
|
)
|
|
1194
1280
|
return False
|
|
1195
1281
|
|
|
@@ -1229,7 +1315,7 @@ class JobDescription(Requirer):
|
|
|
1229
1315
|
"""Test if the ServiceHostJob is a service of the described job."""
|
|
1230
1316
|
return serviceID in self.serviceTree
|
|
1231
1317
|
|
|
1232
|
-
def renameReferences(self, renames:
|
|
1318
|
+
def renameReferences(self, renames: dict[TemporaryID, str]) -> None:
|
|
1233
1319
|
"""
|
|
1234
1320
|
Apply the given dict of ID renames to all references to jobs.
|
|
1235
1321
|
|
|
@@ -1245,8 +1331,12 @@ class JobDescription(Requirer):
|
|
|
1245
1331
|
# Replace each renamed item one at a time to preserve set identity
|
|
1246
1332
|
phase.remove(item)
|
|
1247
1333
|
phase.add(renames[item])
|
|
1248
|
-
self.serviceTree = {
|
|
1249
|
-
|
|
1334
|
+
self.serviceTree = {
|
|
1335
|
+
renames.get(parent, parent): [
|
|
1336
|
+
renames.get(child, child) for child in children
|
|
1337
|
+
]
|
|
1338
|
+
for parent, children in self.serviceTree.items()
|
|
1339
|
+
}
|
|
1250
1340
|
|
|
1251
1341
|
def addPredecessor(self) -> None:
|
|
1252
1342
|
"""Notify the JobDescription that a predecessor has been added to its Job."""
|
|
@@ -1264,7 +1354,11 @@ class JobDescription(Requirer):
|
|
|
1264
1354
|
:param jobStore: The job store we are being placed into
|
|
1265
1355
|
"""
|
|
1266
1356
|
|
|
1267
|
-
def setupJobAfterFailure(
|
|
1357
|
+
def setupJobAfterFailure(
|
|
1358
|
+
self,
|
|
1359
|
+
exit_status: Optional[int] = None,
|
|
1360
|
+
exit_reason: Optional["BatchJobExitReason"] = None,
|
|
1361
|
+
) -> None:
|
|
1268
1362
|
"""
|
|
1269
1363
|
Configure job after a failure.
|
|
1270
1364
|
|
|
@@ -1287,30 +1381,49 @@ class JobDescription(Requirer):
|
|
|
1287
1381
|
if self._config is None:
|
|
1288
1382
|
raise RuntimeError("The job's config is not assigned.")
|
|
1289
1383
|
|
|
1290
|
-
if
|
|
1291
|
-
|
|
1292
|
-
|
|
1384
|
+
if (
|
|
1385
|
+
self._config.enableUnlimitedPreemptibleRetries
|
|
1386
|
+
and exit_reason == BatchJobExitReason.LOST
|
|
1387
|
+
):
|
|
1388
|
+
logger.info(
|
|
1389
|
+
"*Not* reducing try count (%s) of job %s with ID %s",
|
|
1390
|
+
self.remainingTryCount,
|
|
1391
|
+
self,
|
|
1392
|
+
self.jobStoreID,
|
|
1393
|
+
)
|
|
1293
1394
|
else:
|
|
1294
1395
|
self.remainingTryCount = max(0, self.remainingTryCount - 1)
|
|
1295
|
-
logger.warning(
|
|
1296
|
-
|
|
1396
|
+
logger.warning(
|
|
1397
|
+
"Due to failure we are reducing the remaining try count of job %s with ID %s to %s",
|
|
1398
|
+
self,
|
|
1399
|
+
self.jobStoreID,
|
|
1400
|
+
self.remainingTryCount,
|
|
1401
|
+
)
|
|
1297
1402
|
# Set the default memory to be at least as large as the default, in
|
|
1298
1403
|
# case this was a malloc failure (we do this because of the combined
|
|
1299
1404
|
# batch system)
|
|
1300
1405
|
if exit_reason == BatchJobExitReason.MEMLIMIT and self._config.doubleMem:
|
|
1301
1406
|
self.memory = self.memory * 2
|
|
1302
|
-
logger.warning(
|
|
1303
|
-
|
|
1407
|
+
logger.warning(
|
|
1408
|
+
"We have doubled the memory of the failed job %s to %s bytes due to doubleMem flag",
|
|
1409
|
+
self,
|
|
1410
|
+
self.memory,
|
|
1411
|
+
)
|
|
1304
1412
|
if self.memory < self._config.defaultMemory:
|
|
1305
1413
|
self.memory = self._config.defaultMemory
|
|
1306
|
-
logger.warning(
|
|
1307
|
-
|
|
1414
|
+
logger.warning(
|
|
1415
|
+
"We have increased the default memory of the failed job %s to %s bytes",
|
|
1416
|
+
self,
|
|
1417
|
+
self.memory,
|
|
1418
|
+
)
|
|
1308
1419
|
|
|
1309
1420
|
if self.disk < self._config.defaultDisk:
|
|
1310
1421
|
self.disk = self._config.defaultDisk
|
|
1311
|
-
logger.warning(
|
|
1312
|
-
|
|
1313
|
-
|
|
1422
|
+
logger.warning(
|
|
1423
|
+
"We have increased the disk of the failed job %s to the default of %s bytes",
|
|
1424
|
+
self,
|
|
1425
|
+
self.disk,
|
|
1426
|
+
)
|
|
1314
1427
|
|
|
1315
1428
|
def getLogFileHandle(self, jobStore):
|
|
1316
1429
|
"""
|
|
@@ -1360,12 +1473,12 @@ class JobDescription(Requirer):
|
|
|
1360
1473
|
"""Produce a useful logging string identifying this job."""
|
|
1361
1474
|
printedName = "'" + self.jobName + "'"
|
|
1362
1475
|
if self.unitName:
|
|
1363
|
-
printedName +=
|
|
1476
|
+
printedName += " " + self.unitName
|
|
1364
1477
|
|
|
1365
1478
|
if self.jobStoreID is not None:
|
|
1366
|
-
printedName +=
|
|
1479
|
+
printedName += " " + str(self.jobStoreID)
|
|
1367
1480
|
|
|
1368
|
-
printedName +=
|
|
1481
|
+
printedName += " v" + str(self._job_version)
|
|
1369
1482
|
|
|
1370
1483
|
return printedName
|
|
1371
1484
|
|
|
@@ -1374,7 +1487,7 @@ class JobDescription(Requirer):
|
|
|
1374
1487
|
# a time, keyed by jobStoreID.
|
|
1375
1488
|
|
|
1376
1489
|
def __repr__(self):
|
|
1377
|
-
return f
|
|
1490
|
+
return f"{self.__class__.__name__}( **{self.__dict__!r} )"
|
|
1378
1491
|
|
|
1379
1492
|
def reserve_versions(self, count: int) -> None:
|
|
1380
1493
|
"""
|
|
@@ -1394,6 +1507,7 @@ class JobDescription(Requirer):
|
|
|
1394
1507
|
self._job_version_writer = os.getpid()
|
|
1395
1508
|
logger.debug("New job version: %s", self)
|
|
1396
1509
|
|
|
1510
|
+
|
|
1397
1511
|
class ServiceJobDescription(JobDescription):
|
|
1398
1512
|
"""A description of a job that hosts a service."""
|
|
1399
1513
|
|
|
@@ -1464,7 +1578,7 @@ class CheckpointJobDescription(JobDescription):
|
|
|
1464
1578
|
raise RuntimeError(f"Cannot restore an empty checkpoint for a job {self}")
|
|
1465
1579
|
self._body = self.checkpoint
|
|
1466
1580
|
|
|
1467
|
-
def restartCheckpoint(self, jobStore: "AbstractJobStore") ->
|
|
1581
|
+
def restartCheckpoint(self, jobStore: "AbstractJobStore") -> list[str]:
|
|
1468
1582
|
"""
|
|
1469
1583
|
Restart a checkpoint after the total failure of jobs in its subtree.
|
|
1470
1584
|
|
|
@@ -1475,24 +1589,30 @@ class CheckpointJobDescription(JobDescription):
|
|
|
1475
1589
|
Returns a list with the IDs of any successors deleted.
|
|
1476
1590
|
"""
|
|
1477
1591
|
if self.checkpoint is None:
|
|
1478
|
-
raise RuntimeError(
|
|
1592
|
+
raise RuntimeError(
|
|
1593
|
+
"Cannot restart a checkpoint job. The checkpoint was never set."
|
|
1594
|
+
)
|
|
1479
1595
|
successorsDeleted = []
|
|
1480
1596
|
all_successors = list(self.allSuccessors())
|
|
1481
1597
|
if len(all_successors) > 0 or self.serviceTree or self.has_body():
|
|
1482
1598
|
if self.has_body():
|
|
1483
1599
|
if self._body != self.checkpoint:
|
|
1484
|
-
raise RuntimeError(
|
|
1600
|
+
raise RuntimeError(
|
|
1601
|
+
"The stored body reference and checkpoint are not the same."
|
|
1602
|
+
)
|
|
1485
1603
|
logger.debug("Checkpoint job already has body set to run")
|
|
1486
1604
|
else:
|
|
1487
1605
|
self.restore_checkpoint()
|
|
1488
1606
|
|
|
1489
|
-
jobStore.update_job(self)
|
|
1607
|
+
jobStore.update_job(self) # Update immediately to ensure that checkpoint
|
|
1490
1608
|
# is made before deleting any remaining successors
|
|
1491
1609
|
|
|
1492
1610
|
if len(all_successors) > 0 or self.serviceTree:
|
|
1493
1611
|
# If the subtree of successors is not complete restart everything
|
|
1494
|
-
logger.debug(
|
|
1495
|
-
|
|
1612
|
+
logger.debug(
|
|
1613
|
+
"Checkpoint job has unfinished successor jobs, deleting successors: %s, services: %s "
|
|
1614
|
+
% (all_successors, self.serviceTree.keys())
|
|
1615
|
+
)
|
|
1496
1616
|
|
|
1497
1617
|
# Delete everything on the stack, as these represent successors to clean
|
|
1498
1618
|
# up as we restart the queue
|
|
@@ -1505,9 +1625,13 @@ class CheckpointJobDescription(JobDescription):
|
|
|
1505
1625
|
logger.debug("Job %s has already been deleted", otherJobID)
|
|
1506
1626
|
if jobDesc.jobStoreID != self.jobStoreID:
|
|
1507
1627
|
# Delete everything under us except us.
|
|
1508
|
-
logger.debug(
|
|
1628
|
+
logger.debug(
|
|
1629
|
+
"Checkpoint is deleting old successor job: %s",
|
|
1630
|
+
jobDesc.jobStoreID,
|
|
1631
|
+
)
|
|
1509
1632
|
jobStore.delete_job(jobDesc.jobStoreID)
|
|
1510
1633
|
successorsDeleted.append(jobDesc.jobStoreID)
|
|
1634
|
+
|
|
1511
1635
|
recursiveDelete(self)
|
|
1512
1636
|
|
|
1513
1637
|
# Cut links to the jobs we deleted.
|
|
@@ -1536,6 +1660,7 @@ class Job:
|
|
|
1536
1660
|
displayName: Optional[str] = "",
|
|
1537
1661
|
descriptionClass: Optional[type] = None,
|
|
1538
1662
|
local: Optional[bool] = None,
|
|
1663
|
+
files: Optional[set[FileID]] = None,
|
|
1539
1664
|
) -> None:
|
|
1540
1665
|
"""
|
|
1541
1666
|
Job initializer.
|
|
@@ -1556,6 +1681,7 @@ class Job:
|
|
|
1556
1681
|
:param displayName: Human-readable job type display name.
|
|
1557
1682
|
:param descriptionClass: Override for the JobDescription class used to describe the job.
|
|
1558
1683
|
:param local: if the job can be run on the leader.
|
|
1684
|
+
:param files: Set of Files that the job will want to use.
|
|
1559
1685
|
|
|
1560
1686
|
:type memory: int or string convertible by toil.lib.conversions.human2bytes to an int
|
|
1561
1687
|
:type cores: float, int, or string convertible by toil.lib.conversions.human2bytes to an int
|
|
@@ -1571,14 +1697,20 @@ class Job:
|
|
|
1571
1697
|
jobName = self.__class__.__name__
|
|
1572
1698
|
displayName = displayName if displayName else jobName
|
|
1573
1699
|
|
|
1574
|
-
#Some workflows use preemptable instead of preemptible
|
|
1700
|
+
# Some workflows use preemptable instead of preemptible
|
|
1575
1701
|
if preemptable and not preemptible:
|
|
1576
|
-
logger.warning(
|
|
1702
|
+
logger.warning(
|
|
1703
|
+
"Preemptable as a keyword has been deprecated, please use preemptible."
|
|
1704
|
+
)
|
|
1577
1705
|
preemptible = preemptable
|
|
1578
1706
|
# Build a requirements dict for the description
|
|
1579
|
-
requirements = {
|
|
1580
|
-
|
|
1581
|
-
|
|
1707
|
+
requirements = {
|
|
1708
|
+
"memory": memory,
|
|
1709
|
+
"cores": cores,
|
|
1710
|
+
"disk": disk,
|
|
1711
|
+
"accelerators": accelerators,
|
|
1712
|
+
"preemptible": preemptible,
|
|
1713
|
+
}
|
|
1582
1714
|
if descriptionClass is None:
|
|
1583
1715
|
if checkpoint:
|
|
1584
1716
|
# Actually describe as a checkpoint job
|
|
@@ -1594,7 +1726,8 @@ class Job:
|
|
|
1594
1726
|
jobName,
|
|
1595
1727
|
unitName=unitName,
|
|
1596
1728
|
displayName=displayName,
|
|
1597
|
-
local=local
|
|
1729
|
+
local=local,
|
|
1730
|
+
files=files,
|
|
1598
1731
|
)
|
|
1599
1732
|
|
|
1600
1733
|
# Private class variables needed to actually execute a job, in the worker.
|
|
@@ -1617,7 +1750,9 @@ class Job:
|
|
|
1617
1750
|
# Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module
|
|
1618
1751
|
# defining the class self is an instance of, which may be a subclass of Job that may be
|
|
1619
1752
|
# defined in a different module.
|
|
1620
|
-
self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(
|
|
1753
|
+
self.userModule: ModuleDescriptor = ModuleDescriptor.forModule(
|
|
1754
|
+
self.__module__
|
|
1755
|
+
).globalize()
|
|
1621
1756
|
# Maps index paths into composite return values to lists of IDs of files containing
|
|
1622
1757
|
# promised values for those return value items. An index path is a tuple of indices that
|
|
1623
1758
|
# traverses a nested data structure of lists, dicts, tuples or any other type supporting
|
|
@@ -1630,7 +1765,7 @@ class Job:
|
|
|
1630
1765
|
self._tempDir = None
|
|
1631
1766
|
|
|
1632
1767
|
# Holds flags set by set_debug_flag()
|
|
1633
|
-
self._debug_flags:
|
|
1768
|
+
self._debug_flags: set[str] = set()
|
|
1634
1769
|
|
|
1635
1770
|
def __str__(self):
|
|
1636
1771
|
"""
|
|
@@ -1640,7 +1775,7 @@ class Job:
|
|
|
1640
1775
|
if self.description is None:
|
|
1641
1776
|
return repr(self)
|
|
1642
1777
|
else:
|
|
1643
|
-
return
|
|
1778
|
+
return "Job(" + str(self.description) + ")"
|
|
1644
1779
|
|
|
1645
1780
|
def check_initialized(self) -> None:
|
|
1646
1781
|
"""
|
|
@@ -1652,8 +1787,10 @@ class Job:
|
|
|
1652
1787
|
If __init__() has not been called, raise an error.
|
|
1653
1788
|
"""
|
|
1654
1789
|
if not hasattr(self, "_description"):
|
|
1655
|
-
raise ValueError(
|
|
1656
|
-
|
|
1790
|
+
raise ValueError(
|
|
1791
|
+
f"Job instance of type {type(self)} has not been initialized. super().__init__() may not "
|
|
1792
|
+
f"have been called."
|
|
1793
|
+
)
|
|
1657
1794
|
|
|
1658
1795
|
@property
|
|
1659
1796
|
def jobStoreID(self) -> Union[str, TemporaryID]:
|
|
@@ -1673,33 +1810,37 @@ class Job:
|
|
|
1673
1810
|
def disk(self) -> int:
|
|
1674
1811
|
"""The maximum number of bytes of disk the job will require to run."""
|
|
1675
1812
|
return self.description.disk
|
|
1813
|
+
|
|
1676
1814
|
@disk.setter
|
|
1677
1815
|
def disk(self, val):
|
|
1678
|
-
|
|
1816
|
+
self.description.disk = val
|
|
1679
1817
|
|
|
1680
1818
|
@property
|
|
1681
1819
|
def memory(self):
|
|
1682
1820
|
"""The maximum number of bytes of memory the job will require to run."""
|
|
1683
1821
|
return self.description.memory
|
|
1822
|
+
|
|
1684
1823
|
@memory.setter
|
|
1685
1824
|
def memory(self, val):
|
|
1686
|
-
|
|
1825
|
+
self.description.memory = val
|
|
1687
1826
|
|
|
1688
1827
|
@property
|
|
1689
1828
|
def cores(self) -> Union[int, float]:
|
|
1690
1829
|
"""The number of CPU cores required."""
|
|
1691
1830
|
return self.description.cores
|
|
1831
|
+
|
|
1692
1832
|
@cores.setter
|
|
1693
1833
|
def cores(self, val):
|
|
1694
|
-
|
|
1834
|
+
self.description.cores = val
|
|
1695
1835
|
|
|
1696
1836
|
@property
|
|
1697
|
-
def accelerators(self) ->
|
|
1837
|
+
def accelerators(self) -> list[AcceleratorRequirement]:
|
|
1698
1838
|
"""Any accelerators, such as GPUs, that are needed."""
|
|
1699
1839
|
return self.description.accelerators
|
|
1840
|
+
|
|
1700
1841
|
@accelerators.setter
|
|
1701
|
-
def accelerators(self, val:
|
|
1702
|
-
|
|
1842
|
+
def accelerators(self, val: list[ParseableAcceleratorRequirement]) -> None:
|
|
1843
|
+
self.description.accelerators = val
|
|
1703
1844
|
|
|
1704
1845
|
@property
|
|
1705
1846
|
def preemptible(self) -> bool:
|
|
@@ -1709,15 +1850,30 @@ class Job:
|
|
|
1709
1850
|
@deprecated(new_function_name="preemptible")
|
|
1710
1851
|
def preemptable(self):
|
|
1711
1852
|
return self.description.preemptible
|
|
1853
|
+
|
|
1712
1854
|
@preemptible.setter
|
|
1713
1855
|
def preemptible(self, val):
|
|
1714
|
-
|
|
1856
|
+
self.description.preemptible = val
|
|
1715
1857
|
|
|
1716
1858
|
@property
|
|
1717
1859
|
def checkpoint(self) -> bool:
|
|
1718
1860
|
"""Determine if the job is a checkpoint job or not."""
|
|
1719
1861
|
return isinstance(self._description, CheckpointJobDescription)
|
|
1720
1862
|
|
|
1863
|
+
@property
|
|
1864
|
+
def files_to_use(self) -> set[FileID]:
|
|
1865
|
+
return self.description.files_to_use
|
|
1866
|
+
|
|
1867
|
+
@files_to_use.setter
|
|
1868
|
+
def files_to_use(self, val: set[FileID]):
|
|
1869
|
+
self.description.files_to_use = val
|
|
1870
|
+
|
|
1871
|
+
def add_to_files_to_use(self, val: FileID):
|
|
1872
|
+
self.description.files_to_use.add(val)
|
|
1873
|
+
|
|
1874
|
+
def remove_from_files_to_use(self, val: FileID):
|
|
1875
|
+
self.description.files_to_use.remove(val)
|
|
1876
|
+
|
|
1721
1877
|
def assignConfig(self, config: Config) -> None:
|
|
1722
1878
|
"""
|
|
1723
1879
|
Assign the given config object.
|
|
@@ -1831,7 +1987,7 @@ class Job:
|
|
|
1831
1987
|
|
|
1832
1988
|
return followOnJob
|
|
1833
1989
|
|
|
1834
|
-
def hasPredecessor(self, job:
|
|
1990
|
+
def hasPredecessor(self, job: "Job") -> bool:
|
|
1835
1991
|
"""Check if a given job is already a predecessor of this job."""
|
|
1836
1992
|
return job in self._directPredecessors
|
|
1837
1993
|
|
|
@@ -1893,7 +2049,9 @@ class Job:
|
|
|
1893
2049
|
|
|
1894
2050
|
def hasService(self, service: "Job.Service") -> bool:
|
|
1895
2051
|
"""Return True if the given Service is a service of this job, and False otherwise."""
|
|
1896
|
-
return service.hostID is None or self._description.hasServiceHostJob(
|
|
2052
|
+
return service.hostID is None or self._description.hasServiceHostJob(
|
|
2053
|
+
service.hostID
|
|
2054
|
+
)
|
|
1897
2055
|
|
|
1898
2056
|
# Convenience functions for creating jobs
|
|
1899
2057
|
|
|
@@ -1941,7 +2099,9 @@ class Job:
|
|
|
1941
2099
|
:return: The new child job that wraps fn.
|
|
1942
2100
|
"""
|
|
1943
2101
|
if PromisedRequirement.convertPromises(kwargs):
|
|
1944
|
-
return self.addChild(
|
|
2102
|
+
return self.addChild(
|
|
2103
|
+
PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
|
|
2104
|
+
)
|
|
1945
2105
|
else:
|
|
1946
2106
|
return self.addChild(JobFunctionWrappingJob(fn, *args, **kwargs))
|
|
1947
2107
|
|
|
@@ -1957,7 +2117,9 @@ class Job:
|
|
|
1957
2117
|
:return: The new follow-on job that wraps fn.
|
|
1958
2118
|
"""
|
|
1959
2119
|
if PromisedRequirement.convertPromises(kwargs):
|
|
1960
|
-
return self.addFollowOn(
|
|
2120
|
+
return self.addFollowOn(
|
|
2121
|
+
PromisedRequirementJobFunctionWrappingJob.create(fn, *args, **kwargs)
|
|
2122
|
+
)
|
|
1961
2123
|
else:
|
|
1962
2124
|
return self.addFollowOn(JobFunctionWrappingJob(fn, *args, **kwargs))
|
|
1963
2125
|
|
|
@@ -2059,8 +2221,12 @@ class Job:
|
|
|
2059
2221
|
raise JobPromiseConstraintError(self)
|
|
2060
2222
|
# TODO: can we guarantee self.jobStoreID is populated and so pass that here?
|
|
2061
2223
|
with self._promiseJobStore.write_file_stream() as (fileHandle, jobStoreFileID):
|
|
2062
|
-
promise = UnfulfilledPromiseSentinel(
|
|
2063
|
-
|
|
2224
|
+
promise = UnfulfilledPromiseSentinel(
|
|
2225
|
+
str(self.description), jobStoreFileID, False
|
|
2226
|
+
)
|
|
2227
|
+
logger.debug(
|
|
2228
|
+
"Issuing promise %s for result of %s", jobStoreFileID, self.description
|
|
2229
|
+
)
|
|
2064
2230
|
pickle.dump(promise, fileHandle, pickle.HIGHEST_PROTOCOL)
|
|
2065
2231
|
self._rvs[path].append(jobStoreFileID)
|
|
2066
2232
|
return self._promiseJobStore.config.jobStore, jobStoreFileID
|
|
@@ -2110,7 +2276,7 @@ class Job:
|
|
|
2110
2276
|
self.checkJobGraphAcylic()
|
|
2111
2277
|
self.checkNewCheckpointsAreLeafVertices()
|
|
2112
2278
|
|
|
2113
|
-
def getRootJobs(self) ->
|
|
2279
|
+
def getRootJobs(self) -> set["Job"]:
|
|
2114
2280
|
"""
|
|
2115
2281
|
Return the set of root job objects that contain this job.
|
|
2116
2282
|
|
|
@@ -2142,8 +2308,9 @@ class Job:
|
|
|
2142
2308
|
"""
|
|
2143
2309
|
rootJobs = self.getRootJobs()
|
|
2144
2310
|
if len(rootJobs) != 1:
|
|
2145
|
-
raise JobGraphDeadlockException(
|
|
2146
|
-
|
|
2311
|
+
raise JobGraphDeadlockException(
|
|
2312
|
+
"Graph does not contain exactly one" " root job: %s" % rootJobs
|
|
2313
|
+
)
|
|
2147
2314
|
|
|
2148
2315
|
def checkJobGraphAcylic(self):
|
|
2149
2316
|
"""
|
|
@@ -2163,15 +2330,15 @@ class Job:
|
|
|
2163
2330
|
|
|
2164
2331
|
Only deals with jobs created here, rather than loaded from the job store.
|
|
2165
2332
|
"""
|
|
2166
|
-
#Get the root jobs
|
|
2333
|
+
# Get the root jobs
|
|
2167
2334
|
roots = self.getRootJobs()
|
|
2168
2335
|
if len(roots) == 0:
|
|
2169
2336
|
raise JobGraphDeadlockException("Graph contains no root jobs due to cycles")
|
|
2170
2337
|
|
|
2171
|
-
#Get implied edges
|
|
2338
|
+
# Get implied edges
|
|
2172
2339
|
extraEdges = self._getImpliedEdges(roots)
|
|
2173
2340
|
|
|
2174
|
-
#Check for directed cycles in the augmented graph
|
|
2341
|
+
# Check for directed cycles in the augmented graph
|
|
2175
2342
|
visited = set()
|
|
2176
2343
|
for root in roots:
|
|
2177
2344
|
root._checkJobGraphAcylicDFS([], visited, extraEdges)
|
|
@@ -2181,17 +2348,23 @@ class Job:
|
|
|
2181
2348
|
if self not in visited:
|
|
2182
2349
|
visited.add(self)
|
|
2183
2350
|
stack.append(self)
|
|
2184
|
-
for successor in [
|
|
2351
|
+
for successor in [
|
|
2352
|
+
self._registry[jID]
|
|
2353
|
+
for jID in self.description.allSuccessors()
|
|
2354
|
+
if jID in self._registry
|
|
2355
|
+
] + extraEdges[self]:
|
|
2185
2356
|
# Grab all the successors in the current registry (i.e. added form this node) and look at them.
|
|
2186
2357
|
successor._checkJobGraphAcylicDFS(stack, visited, extraEdges)
|
|
2187
2358
|
if stack.pop() != self:
|
|
2188
2359
|
raise RuntimeError("The stack ordering/elements was changed.")
|
|
2189
2360
|
if self in stack:
|
|
2190
2361
|
stack.append(self)
|
|
2191
|
-
raise JobGraphDeadlockException(
|
|
2362
|
+
raise JobGraphDeadlockException(
|
|
2363
|
+
"A cycle of job dependencies has been detected '%s'" % stack
|
|
2364
|
+
)
|
|
2192
2365
|
|
|
2193
2366
|
@staticmethod
|
|
2194
|
-
def _getImpliedEdges(roots) ->
|
|
2367
|
+
def _getImpliedEdges(roots) -> dict["Job", list["Job"]]:
|
|
2195
2368
|
"""
|
|
2196
2369
|
Gets the set of implied edges (between children and follow-ons of a common job).
|
|
2197
2370
|
|
|
@@ -2201,17 +2374,17 @@ class Job:
|
|
|
2201
2374
|
|
|
2202
2375
|
:returns: dict from Job object to list of Job objects that must be done before it can start.
|
|
2203
2376
|
"""
|
|
2204
|
-
#Get nodes (Job objects) in job graph
|
|
2377
|
+
# Get nodes (Job objects) in job graph
|
|
2205
2378
|
nodes = set()
|
|
2206
2379
|
for root in roots:
|
|
2207
2380
|
root._collectAllSuccessors(nodes)
|
|
2208
2381
|
|
|
2209
2382
|
##For each follow-on edge calculate the extra implied edges
|
|
2210
|
-
#Adjacency list of implied edges, i.e. map of jobs to lists of jobs
|
|
2211
|
-
#connected by an implied edge
|
|
2383
|
+
# Adjacency list of implied edges, i.e. map of jobs to lists of jobs
|
|
2384
|
+
# connected by an implied edge
|
|
2212
2385
|
extraEdges = {n: [] for n in nodes}
|
|
2213
2386
|
for job in nodes:
|
|
2214
|
-
|
|
2387
|
+
# Get all the nonempty successor phases
|
|
2215
2388
|
phases = [p for p in job.description.successor_phases if len(p) > 0]
|
|
2216
2389
|
for depth in range(1, len(phases)):
|
|
2217
2390
|
# Add edges from all jobs in the earlier/upper subtrees to all
|
|
@@ -2231,7 +2404,11 @@ class Job:
|
|
|
2231
2404
|
for inUpper in reacheable:
|
|
2232
2405
|
# Add extra edges to the roots of all the lower subtrees
|
|
2233
2406
|
# But skip anything in the lower subtree not in the current _registry (i.e. not created hear)
|
|
2234
|
-
extraEdges[inUpper] += [
|
|
2407
|
+
extraEdges[inUpper] += [
|
|
2408
|
+
job._registry[lowerID]
|
|
2409
|
+
for lowerID in lower
|
|
2410
|
+
if lowerID in job._registry
|
|
2411
|
+
]
|
|
2235
2412
|
|
|
2236
2413
|
return extraEdges
|
|
2237
2414
|
|
|
@@ -2251,17 +2428,21 @@ class Job:
|
|
|
2251
2428
|
:raises toil.job.JobGraphDeadlockException: if there exists a job being added to the graph for which \
|
|
2252
2429
|
checkpoint=True and which is not a leaf.
|
|
2253
2430
|
"""
|
|
2254
|
-
roots =
|
|
2431
|
+
roots = (
|
|
2432
|
+
self.getRootJobs()
|
|
2433
|
+
) # Roots jobs of component, these are preexisting jobs in the graph
|
|
2255
2434
|
|
|
2256
2435
|
# All jobs in the component of the job graph containing self
|
|
2257
2436
|
jobs = set()
|
|
2258
|
-
list(map(lambda x
|
|
2437
|
+
list(map(lambda x: x._collectAllSuccessors(jobs), roots))
|
|
2259
2438
|
|
|
2260
2439
|
# Check for each job for which checkpoint is true that it is a cut vertex or leaf
|
|
2261
2440
|
for y in [x for x in jobs if x.checkpoint]:
|
|
2262
|
-
if y not in roots:
|
|
2441
|
+
if y not in roots: # The roots are the prexisting jobs
|
|
2263
2442
|
if not Job._isLeafVertex(y):
|
|
2264
|
-
raise JobGraphDeadlockException(
|
|
2443
|
+
raise JobGraphDeadlockException(
|
|
2444
|
+
"New checkpoint job %s is not a leaf in the job graph" % y
|
|
2445
|
+
)
|
|
2265
2446
|
|
|
2266
2447
|
####################################################
|
|
2267
2448
|
# Deferred function system
|
|
@@ -2290,7 +2471,9 @@ class Job:
|
|
|
2290
2471
|
:param dict kwargs: The keyword arguments to the function
|
|
2291
2472
|
"""
|
|
2292
2473
|
if self._defer is None:
|
|
2293
|
-
raise Exception(
|
|
2474
|
+
raise Exception(
|
|
2475
|
+
"A deferred function may only be registered with a job while that job is running."
|
|
2476
|
+
)
|
|
2294
2477
|
self._defer(DeferredFunction.create(function, *args, **kwargs))
|
|
2295
2478
|
|
|
2296
2479
|
####################################################
|
|
@@ -2299,7 +2482,7 @@ class Job:
|
|
|
2299
2482
|
# and defining a service (Job.Service)
|
|
2300
2483
|
####################################################
|
|
2301
2484
|
|
|
2302
|
-
class Runner
|
|
2485
|
+
class Runner:
|
|
2303
2486
|
"""Used to setup and run Toil workflow."""
|
|
2304
2487
|
|
|
2305
2488
|
@staticmethod
|
|
@@ -2315,7 +2498,9 @@ class Job:
|
|
|
2315
2498
|
return parser
|
|
2316
2499
|
|
|
2317
2500
|
@staticmethod
|
|
2318
|
-
def getDefaultOptions(
|
|
2501
|
+
def getDefaultOptions(
|
|
2502
|
+
jobStore: Optional[str] = None, jobstore_as_flag: bool = False
|
|
2503
|
+
) -> Namespace:
|
|
2319
2504
|
"""
|
|
2320
2505
|
Get default options for a toil workflow.
|
|
2321
2506
|
|
|
@@ -2326,9 +2511,13 @@ class Job:
|
|
|
2326
2511
|
"""
|
|
2327
2512
|
# setting jobstore_as_flag to True allows the user to declare the jobstore in the config file instead
|
|
2328
2513
|
if not jobstore_as_flag and jobStore is None:
|
|
2329
|
-
raise RuntimeError(
|
|
2330
|
-
|
|
2331
|
-
|
|
2514
|
+
raise RuntimeError(
|
|
2515
|
+
"The jobstore argument cannot be missing if the jobstore_as_flag argument is set "
|
|
2516
|
+
"to False!"
|
|
2517
|
+
)
|
|
2518
|
+
parser = Job.Runner.getDefaultArgumentParser(
|
|
2519
|
+
jobstore_as_flag=jobstore_as_flag
|
|
2520
|
+
)
|
|
2332
2521
|
arguments = []
|
|
2333
2522
|
if jobstore_as_flag and jobStore is not None:
|
|
2334
2523
|
arguments = ["--jobstore", jobStore]
|
|
@@ -2337,7 +2526,10 @@ class Job:
|
|
|
2337
2526
|
return parser.parse_args(args=arguments)
|
|
2338
2527
|
|
|
2339
2528
|
@staticmethod
|
|
2340
|
-
def addToilOptions(
|
|
2529
|
+
def addToilOptions(
|
|
2530
|
+
parser: Union["OptionParser", ArgumentParser],
|
|
2531
|
+
jobstore_as_flag: bool = False,
|
|
2532
|
+
) -> None:
|
|
2341
2533
|
"""
|
|
2342
2534
|
Adds the default toil options to an :mod:`optparse` or :mod:`argparse`
|
|
2343
2535
|
parser object.
|
|
@@ -2377,19 +2569,29 @@ class Job:
|
|
|
2377
2569
|
Is not executed as a job; runs within a ServiceHostJob.
|
|
2378
2570
|
"""
|
|
2379
2571
|
|
|
2380
|
-
def __init__(
|
|
2572
|
+
def __init__(
|
|
2573
|
+
self,
|
|
2574
|
+
memory=None,
|
|
2575
|
+
cores=None,
|
|
2576
|
+
disk=None,
|
|
2577
|
+
accelerators=None,
|
|
2578
|
+
preemptible=None,
|
|
2579
|
+
unitName=None,
|
|
2580
|
+
):
|
|
2381
2581
|
"""
|
|
2382
2582
|
Memory, core and disk requirements are specified identically to as in \
|
|
2383
2583
|
:func:`toil.job.Job.__init__`.
|
|
2384
2584
|
"""
|
|
2385
2585
|
# Save the requirements in ourselves so they are visible on `self` to user code.
|
|
2386
|
-
super().__init__(
|
|
2387
|
-
|
|
2388
|
-
|
|
2389
|
-
|
|
2390
|
-
|
|
2391
|
-
|
|
2392
|
-
|
|
2586
|
+
super().__init__(
|
|
2587
|
+
{
|
|
2588
|
+
"memory": memory,
|
|
2589
|
+
"cores": cores,
|
|
2590
|
+
"disk": disk,
|
|
2591
|
+
"accelerators": accelerators,
|
|
2592
|
+
"preemptible": preemptible,
|
|
2593
|
+
}
|
|
2594
|
+
)
|
|
2393
2595
|
|
|
2394
2596
|
# And the unit name
|
|
2395
2597
|
self.unitName = unitName
|
|
@@ -2467,15 +2669,19 @@ class Job:
|
|
|
2467
2669
|
|
|
2468
2670
|
def filter_main(module_name, class_name):
|
|
2469
2671
|
try:
|
|
2470
|
-
if module_name ==
|
|
2672
|
+
if module_name == "__main__":
|
|
2471
2673
|
return getattr(userModule, class_name)
|
|
2472
2674
|
else:
|
|
2473
2675
|
return getattr(importlib.import_module(module_name), class_name)
|
|
2474
2676
|
except:
|
|
2475
|
-
if module_name ==
|
|
2476
|
-
logger.debug(
|
|
2677
|
+
if module_name == "__main__":
|
|
2678
|
+
logger.debug(
|
|
2679
|
+
"Failed getting %s from module %s.", class_name, userModule
|
|
2680
|
+
)
|
|
2477
2681
|
else:
|
|
2478
|
-
logger.debug(
|
|
2682
|
+
logger.debug(
|
|
2683
|
+
"Failed getting %s from module %s.", class_name, module_name
|
|
2684
|
+
)
|
|
2479
2685
|
raise
|
|
2480
2686
|
|
|
2481
2687
|
class FilteredUnpickler(pickle.Unpickler):
|
|
@@ -2485,7 +2691,9 @@ class Job:
|
|
|
2485
2691
|
unpickler = FilteredUnpickler(fileHandle)
|
|
2486
2692
|
|
|
2487
2693
|
runnable = unpickler.load()
|
|
2488
|
-
if requireInstanceOf is not None and not isinstance(
|
|
2694
|
+
if requireInstanceOf is not None and not isinstance(
|
|
2695
|
+
runnable, requireInstanceOf
|
|
2696
|
+
):
|
|
2489
2697
|
raise RuntimeError(f"Did not find a {requireInstanceOf} when expected")
|
|
2490
2698
|
|
|
2491
2699
|
return runnable
|
|
@@ -2518,15 +2726,28 @@ class Job:
|
|
|
2518
2726
|
# File may be gone if the job is a service being re-run and the accessing job is
|
|
2519
2727
|
# already complete.
|
|
2520
2728
|
if jobStore.file_exists(promiseFileStoreID):
|
|
2521
|
-
logger.debug(
|
|
2729
|
+
logger.debug(
|
|
2730
|
+
"Resolve promise %s from %s with a %s",
|
|
2731
|
+
promiseFileStoreID,
|
|
2732
|
+
self,
|
|
2733
|
+
type(promisedValue),
|
|
2734
|
+
)
|
|
2522
2735
|
with jobStore.update_file_stream(promiseFileStoreID) as fileHandle:
|
|
2523
2736
|
try:
|
|
2524
|
-
pickle.dump(
|
|
2737
|
+
pickle.dump(
|
|
2738
|
+
promisedValue, fileHandle, pickle.HIGHEST_PROTOCOL
|
|
2739
|
+
)
|
|
2525
2740
|
except AttributeError:
|
|
2526
|
-
logger.exception(
|
|
2741
|
+
logger.exception(
|
|
2742
|
+
"Could not pickle promise result %s", promisedValue
|
|
2743
|
+
)
|
|
2527
2744
|
raise
|
|
2528
2745
|
else:
|
|
2529
|
-
logger.debug(
|
|
2746
|
+
logger.debug(
|
|
2747
|
+
"Do not resolve promise %s from %s because it is no longer needed",
|
|
2748
|
+
promiseFileStoreID,
|
|
2749
|
+
self,
|
|
2750
|
+
)
|
|
2530
2751
|
|
|
2531
2752
|
# Functions associated with Job.checkJobGraphAcyclic to establish that the job graph does not
|
|
2532
2753
|
# contain any cycles of dependencies:
|
|
@@ -2551,7 +2772,7 @@ class Job:
|
|
|
2551
2772
|
# We added this successor locally
|
|
2552
2773
|
todo.append(self._registry[successorID])
|
|
2553
2774
|
|
|
2554
|
-
def getTopologicalOrderingOfJobs(self) ->
|
|
2775
|
+
def getTopologicalOrderingOfJobs(self) -> list["Job"]:
|
|
2555
2776
|
"""
|
|
2556
2777
|
:returns: a list of jobs such that for all pairs of indices i, j for which i < j, \
|
|
2557
2778
|
the job at index i can be run before the job at index j.
|
|
@@ -2573,8 +2794,8 @@ class Job:
|
|
|
2573
2794
|
job = todo[-1]
|
|
2574
2795
|
todo.pop()
|
|
2575
2796
|
|
|
2576
|
-
#Do not add the job to the ordering until all its predecessors have been
|
|
2577
|
-
#added to the ordering
|
|
2797
|
+
# Do not add the job to the ordering until all its predecessors have been
|
|
2798
|
+
# added to the ordering
|
|
2578
2799
|
outstandingPredecessor = False
|
|
2579
2800
|
for predJob in job._directPredecessors:
|
|
2580
2801
|
if predJob.jobStoreID not in visited:
|
|
@@ -2599,7 +2820,7 @@ class Job:
|
|
|
2599
2820
|
# Storing Jobs into the JobStore
|
|
2600
2821
|
####################################################
|
|
2601
2822
|
|
|
2602
|
-
def _register(self, jobStore) ->
|
|
2823
|
+
def _register(self, jobStore) -> list[tuple[TemporaryID, str]]:
|
|
2603
2824
|
"""
|
|
2604
2825
|
If this job lacks a JobStore-assigned ID, assign this job an ID.
|
|
2605
2826
|
Must be called for each job before it is saved to the JobStore for the first time.
|
|
@@ -2628,7 +2849,7 @@ class Job:
|
|
|
2628
2849
|
# We already have an ID. No assignment or reference rewrite necessary.
|
|
2629
2850
|
return []
|
|
2630
2851
|
|
|
2631
|
-
def _renameReferences(self, renames:
|
|
2852
|
+
def _renameReferences(self, renames: dict[TemporaryID, str]) -> None:
|
|
2632
2853
|
"""
|
|
2633
2854
|
Apply the given dict of ID renames to all references to other jobs.
|
|
2634
2855
|
|
|
@@ -2664,8 +2885,8 @@ class Job:
|
|
|
2664
2885
|
|
|
2665
2886
|
# Clear out old Cactus compatibility fields that don't need to be
|
|
2666
2887
|
# preserved and shouldn't be serialized.
|
|
2667
|
-
if hasattr(self,
|
|
2668
|
-
delattr(self,
|
|
2888
|
+
if hasattr(self, "_services"):
|
|
2889
|
+
delattr(self, "_services")
|
|
2669
2890
|
|
|
2670
2891
|
# Remember fields we will overwrite
|
|
2671
2892
|
description = self._description
|
|
@@ -2683,7 +2904,9 @@ class Job:
|
|
|
2683
2904
|
self._directPredecessors = set()
|
|
2684
2905
|
|
|
2685
2906
|
# Save the body of the job
|
|
2686
|
-
with jobStore.write_file_stream(
|
|
2907
|
+
with jobStore.write_file_stream(
|
|
2908
|
+
description.jobStoreID, cleanup=True
|
|
2909
|
+
) as (fileHandle, fileStoreID):
|
|
2687
2910
|
pickle.dump(self, fileHandle, pickle.HIGHEST_PROTOCOL)
|
|
2688
2911
|
finally:
|
|
2689
2912
|
# Restore important fields (before handling errors)
|
|
@@ -2709,7 +2932,12 @@ class Job:
|
|
|
2709
2932
|
# Connect the body of the job to the JobDescription
|
|
2710
2933
|
self._description.attach_body(fileStoreID, userScript)
|
|
2711
2934
|
|
|
2712
|
-
def _saveJobGraph(
|
|
2935
|
+
def _saveJobGraph(
|
|
2936
|
+
self,
|
|
2937
|
+
jobStore: "AbstractJobStore",
|
|
2938
|
+
saveSelf: bool = False,
|
|
2939
|
+
returnValues: bool = None,
|
|
2940
|
+
):
|
|
2713
2941
|
"""
|
|
2714
2942
|
Save job data and new JobDescriptions to the given job store for this
|
|
2715
2943
|
job and all descending jobs, including services.
|
|
@@ -2760,7 +2988,12 @@ class Job:
|
|
|
2760
2988
|
# Set up to save last job first, so promises flow the right way
|
|
2761
2989
|
ordering.reverse()
|
|
2762
2990
|
|
|
2763
|
-
logger.debug(
|
|
2991
|
+
logger.debug(
|
|
2992
|
+
"Saving graph of %d jobs, %d non-service, %d new",
|
|
2993
|
+
len(allJobs),
|
|
2994
|
+
len(ordering),
|
|
2995
|
+
len(fakeToReal),
|
|
2996
|
+
)
|
|
2764
2997
|
|
|
2765
2998
|
# Make sure we're the root
|
|
2766
2999
|
if ordering[-1] != self:
|
|
@@ -2773,15 +3006,15 @@ class Job:
|
|
|
2773
3006
|
if not isinstance(j, ServiceHostJob) and j.jobStoreID not in ordered_ids:
|
|
2774
3007
|
raise RuntimeError(f"{j} not found in ordering {ordering}")
|
|
2775
3008
|
|
|
2776
|
-
|
|
2777
|
-
|
|
2778
3009
|
if not saveSelf:
|
|
2779
3010
|
# Fulfil promises for return values (even if value is None)
|
|
2780
3011
|
self._fulfillPromises(returnValues, jobStore)
|
|
2781
3012
|
|
|
2782
3013
|
for job in ordering:
|
|
2783
3014
|
logger.debug("Processing job %s", job.description)
|
|
2784
|
-
for serviceBatch in reversed(
|
|
3015
|
+
for serviceBatch in reversed(
|
|
3016
|
+
list(job.description.serviceHostIDsInBatches())
|
|
3017
|
+
):
|
|
2785
3018
|
# For each batch of service host jobs in reverse order they start
|
|
2786
3019
|
for serviceID in serviceBatch:
|
|
2787
3020
|
logger.debug("Processing service %s", serviceID)
|
|
@@ -2819,7 +3052,8 @@ class Job:
|
|
|
2819
3052
|
# All other job vertices in the graph are checked by checkNewCheckpointsAreLeafVertices
|
|
2820
3053
|
if self.checkpoint and not Job._isLeafVertex(self):
|
|
2821
3054
|
raise JobGraphDeadlockException(
|
|
2822
|
-
|
|
3055
|
+
"New checkpoint job %s is not a leaf in the job graph" % self
|
|
3056
|
+
)
|
|
2823
3057
|
|
|
2824
3058
|
# Save the root job and all descendants and services
|
|
2825
3059
|
self._saveJobGraph(jobStore, saveSelf=True)
|
|
@@ -2845,19 +3079,19 @@ class Job:
|
|
|
2845
3079
|
:param job_description: the JobDescription of the job to retrieve.
|
|
2846
3080
|
:returns: The job referenced by the JobDescription.
|
|
2847
3081
|
"""
|
|
2848
|
-
|
|
3082
|
+
|
|
2849
3083
|
file_store_id, user_module_descriptor = job_description.get_body()
|
|
2850
|
-
logger.debug(
|
|
3084
|
+
logger.debug("Loading user module %s.", user_module_descriptor)
|
|
2851
3085
|
user_module = cls._loadUserModule(user_module_descriptor)
|
|
2852
3086
|
|
|
2853
|
-
#Loads context manager using file stream
|
|
3087
|
+
# Loads context manager using file stream
|
|
2854
3088
|
if file_store_id == "firstJob":
|
|
2855
3089
|
# This one is actually a shared file name and not a file ID.
|
|
2856
3090
|
manager = job_store.read_shared_file_stream(file_store_id)
|
|
2857
3091
|
else:
|
|
2858
3092
|
manager = job_store.read_file_stream(file_store_id)
|
|
2859
3093
|
|
|
2860
|
-
#Open and unpickle
|
|
3094
|
+
# Open and unpickle
|
|
2861
3095
|
with manager as file_handle:
|
|
2862
3096
|
|
|
2863
3097
|
job = cls._unpickle(user_module, file_handle, requireInstanceOf=Job)
|
|
@@ -2869,7 +3103,6 @@ class Job:
|
|
|
2869
3103
|
|
|
2870
3104
|
return job
|
|
2871
3105
|
|
|
2872
|
-
|
|
2873
3106
|
def _run(self, jobGraph=None, fileStore=None, **kwargs):
|
|
2874
3107
|
"""
|
|
2875
3108
|
Function which worker calls to ultimately invoke
|
|
@@ -2934,7 +3167,9 @@ class Job:
|
|
|
2934
3167
|
os.chdir(baseDir)
|
|
2935
3168
|
# Finish up the stats
|
|
2936
3169
|
if stats is not None:
|
|
2937
|
-
totalCpuTime, totalMemoryUsage =
|
|
3170
|
+
totalCpuTime, totalMemoryUsage = (
|
|
3171
|
+
ResourceMonitor.get_total_cpu_time_and_memory_usage()
|
|
3172
|
+
)
|
|
2938
3173
|
stats.jobs.append(
|
|
2939
3174
|
Expando(
|
|
2940
3175
|
time=str(time.time() - startTime),
|
|
@@ -2942,7 +3177,7 @@ class Job:
|
|
|
2942
3177
|
class_name=self._jobName(),
|
|
2943
3178
|
memory=str(totalMemoryUsage),
|
|
2944
3179
|
requested_cores=str(self.cores),
|
|
2945
|
-
disk=str(fileStore.get_disk_usage())
|
|
3180
|
+
disk=str(fileStore.get_disk_usage()),
|
|
2946
3181
|
)
|
|
2947
3182
|
)
|
|
2948
3183
|
|
|
@@ -2987,13 +3222,12 @@ class Job:
|
|
|
2987
3222
|
self._defer = None
|
|
2988
3223
|
self._fileStore = None
|
|
2989
3224
|
|
|
2990
|
-
|
|
2991
3225
|
# Serialize the new Jobs defined by the run method to the jobStore
|
|
2992
3226
|
self._saveJobGraph(jobStore, saveSelf=False, returnValues=returnValues)
|
|
2993
3227
|
|
|
2994
3228
|
# Clear out the body, because the job is done.
|
|
2995
3229
|
self.description.detach_body()
|
|
2996
|
-
|
|
3230
|
+
|
|
2997
3231
|
# That and the new child/follow-on relationships will need to be
|
|
2998
3232
|
# recorded later by an update() of the JobDescription.
|
|
2999
3233
|
|
|
@@ -3016,7 +3250,9 @@ class Job:
|
|
|
3016
3250
|
|
|
3017
3251
|
return flag in self._debug_flags
|
|
3018
3252
|
|
|
3019
|
-
def files_downloaded_hook(
|
|
3253
|
+
def files_downloaded_hook(
|
|
3254
|
+
self, host_and_job_paths: Optional[list[tuple[str, str]]] = None
|
|
3255
|
+
) -> None:
|
|
3020
3256
|
"""
|
|
3021
3257
|
Function that subclasses can call when they have downloaded their input files.
|
|
3022
3258
|
|
|
@@ -3031,7 +3267,10 @@ class Job:
|
|
|
3031
3267
|
# Stop the worker!
|
|
3032
3268
|
logger.info("Job has downloaded its files. Stopping.")
|
|
3033
3269
|
# Send off the path mapping for the debugging wrapper.
|
|
3034
|
-
raise FilesDownloadedStoppingPointReached(
|
|
3270
|
+
raise FilesDownloadedStoppingPointReached(
|
|
3271
|
+
"Files downloaded", host_and_job_paths=host_and_job_paths
|
|
3272
|
+
)
|
|
3273
|
+
|
|
3035
3274
|
|
|
3036
3275
|
class JobException(Exception):
|
|
3037
3276
|
"""General job exception."""
|
|
@@ -3045,6 +3284,7 @@ class JobGraphDeadlockException(JobException):
|
|
|
3045
3284
|
An exception raised in the event that a workflow contains an unresolvable \
|
|
3046
3285
|
dependency, such as a cycle. See :func:`toil.job.Job.checkJobGraphForDeadlocks`.
|
|
3047
3286
|
"""
|
|
3287
|
+
|
|
3048
3288
|
def __init__(self, string):
|
|
3049
3289
|
super().__init__(string)
|
|
3050
3290
|
|
|
@@ -3053,6 +3293,7 @@ class FunctionWrappingJob(Job):
|
|
|
3053
3293
|
"""
|
|
3054
3294
|
Job used to wrap a function. In its `run` method the wrapped function is called.
|
|
3055
3295
|
"""
|
|
3296
|
+
|
|
3056
3297
|
def __init__(self, userFunction, *args, **kwargs):
|
|
3057
3298
|
"""
|
|
3058
3299
|
:param callable userFunction: The function to wrap. It will be called with ``*args`` and
|
|
@@ -3072,7 +3313,9 @@ class FunctionWrappingJob(Job):
|
|
|
3072
3313
|
if argSpec.defaults is None:
|
|
3073
3314
|
argDict = {}
|
|
3074
3315
|
else:
|
|
3075
|
-
argDict = dict(
|
|
3316
|
+
argDict = dict(
|
|
3317
|
+
list(zip(argSpec.args[-len(argSpec.defaults) :], argSpec.defaults))
|
|
3318
|
+
)
|
|
3076
3319
|
|
|
3077
3320
|
def resolve(key, default=None, dehumanize=False):
|
|
3078
3321
|
try:
|
|
@@ -3090,36 +3333,48 @@ class FunctionWrappingJob(Job):
|
|
|
3090
3333
|
value = human2bytes(value)
|
|
3091
3334
|
return value
|
|
3092
3335
|
|
|
3093
|
-
super().__init__(
|
|
3094
|
-
|
|
3095
|
-
|
|
3096
|
-
|
|
3097
|
-
|
|
3098
|
-
|
|
3099
|
-
|
|
3336
|
+
super().__init__(
|
|
3337
|
+
memory=resolve("memory", dehumanize=True),
|
|
3338
|
+
cores=resolve("cores", dehumanize=True),
|
|
3339
|
+
disk=resolve("disk", dehumanize=True),
|
|
3340
|
+
accelerators=resolve("accelerators"),
|
|
3341
|
+
preemptible=resolve("preemptible"),
|
|
3342
|
+
checkpoint=resolve("checkpoint", default=False),
|
|
3343
|
+
unitName=resolve("name", default=None),
|
|
3344
|
+
)
|
|
3100
3345
|
|
|
3101
|
-
self.userFunctionModule = ModuleDescriptor.forModule(
|
|
3346
|
+
self.userFunctionModule = ModuleDescriptor.forModule(
|
|
3347
|
+
userFunction.__module__
|
|
3348
|
+
).globalize()
|
|
3102
3349
|
self.userFunctionName = str(userFunction.__name__)
|
|
3103
3350
|
self.description.jobName = self.userFunctionName
|
|
3104
3351
|
self._args = args
|
|
3105
3352
|
self._kwargs = kwargs
|
|
3106
3353
|
|
|
3107
3354
|
def _getUserFunction(self):
|
|
3108
|
-
logger.debug(
|
|
3109
|
-
|
|
3110
|
-
|
|
3355
|
+
logger.debug(
|
|
3356
|
+
"Loading user function %s from module %s.",
|
|
3357
|
+
self.userFunctionName,
|
|
3358
|
+
self.userFunctionModule,
|
|
3359
|
+
)
|
|
3111
3360
|
userFunctionModule = self._loadUserModule(self.userFunctionModule)
|
|
3112
3361
|
return getattr(userFunctionModule, self.userFunctionName)
|
|
3113
3362
|
|
|
3114
|
-
def run(self,fileStore):
|
|
3115
|
-
userFunction = self._getUserFunction(
|
|
3363
|
+
def run(self, fileStore):
|
|
3364
|
+
userFunction = self._getUserFunction()
|
|
3116
3365
|
return userFunction(*self._args, **self._kwargs)
|
|
3117
3366
|
|
|
3118
3367
|
def getUserScript(self):
|
|
3119
3368
|
return self.userFunctionModule
|
|
3120
3369
|
|
|
3121
3370
|
def _jobName(self):
|
|
3122
|
-
return ".".join(
|
|
3371
|
+
return ".".join(
|
|
3372
|
+
(
|
|
3373
|
+
self.__class__.__name__,
|
|
3374
|
+
self.userFunctionModule.name,
|
|
3375
|
+
self.userFunctionName,
|
|
3376
|
+
)
|
|
3377
|
+
)
|
|
3123
3378
|
|
|
3124
3379
|
|
|
3125
3380
|
class JobFunctionWrappingJob(FunctionWrappingJob):
|
|
@@ -3165,10 +3420,20 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
|
|
|
3165
3420
|
Spawns child function using parent function parameters and fulfilled promised
|
|
3166
3421
|
resource requirements.
|
|
3167
3422
|
"""
|
|
3423
|
+
|
|
3168
3424
|
def __init__(self, userFunction, *args, **kwargs):
|
|
3169
3425
|
self._promisedKwargs = kwargs.copy()
|
|
3170
3426
|
# Replace resource requirements in intermediate job with small values.
|
|
3171
|
-
kwargs.update(
|
|
3427
|
+
kwargs.update(
|
|
3428
|
+
dict(
|
|
3429
|
+
disk="1M",
|
|
3430
|
+
memory="32M",
|
|
3431
|
+
cores=0.1,
|
|
3432
|
+
accelerators=[],
|
|
3433
|
+
preemptible=True,
|
|
3434
|
+
preemptable=True,
|
|
3435
|
+
)
|
|
3436
|
+
)
|
|
3172
3437
|
super().__init__(userFunction, *args, **kwargs)
|
|
3173
3438
|
|
|
3174
3439
|
@classmethod
|
|
@@ -3193,7 +3458,9 @@ class PromisedRequirementFunctionWrappingJob(FunctionWrappingJob):
|
|
|
3193
3458
|
for requirement in REQUIREMENT_NAMES:
|
|
3194
3459
|
try:
|
|
3195
3460
|
if isinstance(self._promisedKwargs[requirement], PromisedRequirement):
|
|
3196
|
-
self._promisedKwargs[requirement] = self._promisedKwargs[
|
|
3461
|
+
self._promisedKwargs[requirement] = self._promisedKwargs[
|
|
3462
|
+
requirement
|
|
3463
|
+
].getValue()
|
|
3197
3464
|
except KeyError:
|
|
3198
3465
|
pass
|
|
3199
3466
|
|
|
@@ -3207,7 +3474,9 @@ class PromisedRequirementJobFunctionWrappingJob(PromisedRequirementFunctionWrapp
|
|
|
3207
3474
|
def run(self, fileStore):
|
|
3208
3475
|
self.evaluatePromisedRequirements()
|
|
3209
3476
|
userFunction = self._getUserFunction()
|
|
3210
|
-
return self.addChildJobFn(
|
|
3477
|
+
return self.addChildJobFn(
|
|
3478
|
+
userFunction, *self._args, **self._promisedKwargs
|
|
3479
|
+
).rv()
|
|
3211
3480
|
|
|
3212
3481
|
|
|
3213
3482
|
class EncapsulatedJob(Job):
|
|
@@ -3234,6 +3503,7 @@ class EncapsulatedJob(Job):
|
|
|
3234
3503
|
is the return value of the root job, e.g. A().encapsulate().rv() and A().rv() will resolve to
|
|
3235
3504
|
the same value after A or A.encapsulate() has been run.
|
|
3236
3505
|
"""
|
|
3506
|
+
|
|
3237
3507
|
def __init__(self, job, unitName=None):
|
|
3238
3508
|
"""
|
|
3239
3509
|
:param toil.job.Job job: the job to encapsulate.
|
|
@@ -3253,7 +3523,12 @@ class EncapsulatedJob(Job):
|
|
|
3253
3523
|
Job.addChild(self, job)
|
|
3254
3524
|
# Use small resource requirements for dummy Job instance.
|
|
3255
3525
|
# But not too small, or the job won't have enough resources to safely start up Toil.
|
|
3256
|
-
self.encapsulatedFollowOn = Job(
|
|
3526
|
+
self.encapsulatedFollowOn = Job(
|
|
3527
|
+
disk="100M",
|
|
3528
|
+
memory="512M",
|
|
3529
|
+
cores=0.1,
|
|
3530
|
+
unitName=None if unitName is None else unitName + "-followOn",
|
|
3531
|
+
)
|
|
3257
3532
|
Job.addFollowOn(self, self.encapsulatedFollowOn)
|
|
3258
3533
|
else:
|
|
3259
3534
|
# Unpickling on the worker, to be run as a no-op.
|
|
@@ -3265,17 +3540,25 @@ class EncapsulatedJob(Job):
|
|
|
3265
3540
|
|
|
3266
3541
|
def addChild(self, childJob):
|
|
3267
3542
|
if self.encapsulatedFollowOn is None:
|
|
3268
|
-
raise RuntimeError(
|
|
3543
|
+
raise RuntimeError(
|
|
3544
|
+
"Children cannot be added to EncapsulatedJob while it is running"
|
|
3545
|
+
)
|
|
3269
3546
|
return Job.addChild(self.encapsulatedFollowOn, childJob)
|
|
3270
3547
|
|
|
3271
3548
|
def addService(self, service, parentService=None):
|
|
3272
3549
|
if self.encapsulatedFollowOn is None:
|
|
3273
|
-
raise RuntimeError(
|
|
3274
|
-
|
|
3550
|
+
raise RuntimeError(
|
|
3551
|
+
"Services cannot be added to EncapsulatedJob while it is running"
|
|
3552
|
+
)
|
|
3553
|
+
return Job.addService(
|
|
3554
|
+
self.encapsulatedFollowOn, service, parentService=parentService
|
|
3555
|
+
)
|
|
3275
3556
|
|
|
3276
3557
|
def addFollowOn(self, followOnJob):
|
|
3277
3558
|
if self.encapsulatedFollowOn is None:
|
|
3278
|
-
raise RuntimeError(
|
|
3559
|
+
raise RuntimeError(
|
|
3560
|
+
"Follow-ons cannot be added to EncapsulatedJob while it is running"
|
|
3561
|
+
)
|
|
3279
3562
|
return Job.addFollowOn(self.encapsulatedFollowOn, followOnJob)
|
|
3280
3563
|
|
|
3281
3564
|
def rv(self, *path) -> "Promise":
|
|
@@ -3318,6 +3601,7 @@ class ServiceHostJob(Job):
|
|
|
3318
3601
|
"""
|
|
3319
3602
|
Job that runs a service. Used internally by Toil. Users should subclass Service instead of using this.
|
|
3320
3603
|
"""
|
|
3604
|
+
|
|
3321
3605
|
def __init__(self, service):
|
|
3322
3606
|
"""
|
|
3323
3607
|
This constructor should not be called by a user.
|
|
@@ -3328,12 +3612,17 @@ class ServiceHostJob(Job):
|
|
|
3328
3612
|
|
|
3329
3613
|
# Make sure the service hasn't been given a host already.
|
|
3330
3614
|
if service.hostID is not None:
|
|
3331
|
-
raise RuntimeError(
|
|
3615
|
+
raise RuntimeError(
|
|
3616
|
+
"Cannot set the host. The service has already been given a host."
|
|
3617
|
+
)
|
|
3332
3618
|
|
|
3333
3619
|
# Make ourselves with name info from the Service and a
|
|
3334
3620
|
# ServiceJobDescription that has the service control flags.
|
|
3335
|
-
super().__init__(
|
|
3336
|
-
|
|
3621
|
+
super().__init__(
|
|
3622
|
+
**service.requirements,
|
|
3623
|
+
unitName=service.unitName,
|
|
3624
|
+
descriptionClass=ServiceJobDescription,
|
|
3625
|
+
)
|
|
3337
3626
|
|
|
3338
3627
|
# Make sure the service knows it has a host now
|
|
3339
3628
|
service.hostID = self.jobStoreID
|
|
@@ -3371,13 +3660,19 @@ class ServiceHostJob(Job):
|
|
|
3371
3660
|
# stuff onto us.
|
|
3372
3661
|
|
|
3373
3662
|
def addChild(self, child):
|
|
3374
|
-
raise RuntimeError(
|
|
3663
|
+
raise RuntimeError(
|
|
3664
|
+
"Service host jobs cannot have children, follow-ons, or services"
|
|
3665
|
+
)
|
|
3375
3666
|
|
|
3376
3667
|
def addFollowOn(self, followOn):
|
|
3377
|
-
raise RuntimeError(
|
|
3668
|
+
raise RuntimeError(
|
|
3669
|
+
"Service host jobs cannot have children, follow-ons, or services"
|
|
3670
|
+
)
|
|
3378
3671
|
|
|
3379
3672
|
def addService(self, service, parentService=None):
|
|
3380
|
-
raise RuntimeError(
|
|
3673
|
+
raise RuntimeError(
|
|
3674
|
+
"Service host jobs cannot have children, follow-ons, or services"
|
|
3675
|
+
)
|
|
3381
3676
|
|
|
3382
3677
|
def saveBody(self, jobStore):
|
|
3383
3678
|
"""
|
|
@@ -3386,7 +3681,9 @@ class ServiceHostJob(Job):
|
|
|
3386
3681
|
# Save unpickled service
|
|
3387
3682
|
service = self.service
|
|
3388
3683
|
# Serialize service
|
|
3389
|
-
self.pickledService = pickle.dumps(
|
|
3684
|
+
self.pickledService = pickle.dumps(
|
|
3685
|
+
self.service, protocol=pickle.HIGHEST_PROTOCOL
|
|
3686
|
+
)
|
|
3390
3687
|
# Clear real service until we have the module to load it back
|
|
3391
3688
|
self.service = None
|
|
3392
3689
|
# Save body as normal
|
|
@@ -3397,24 +3694,30 @@ class ServiceHostJob(Job):
|
|
|
3397
3694
|
|
|
3398
3695
|
def run(self, fileStore):
|
|
3399
3696
|
# Unpickle the service
|
|
3400
|
-
logger.debug(
|
|
3697
|
+
logger.debug("Loading service module %s.", self.serviceModule)
|
|
3401
3698
|
userModule = self._loadUserModule(self.serviceModule)
|
|
3402
|
-
service = self._unpickle(
|
|
3699
|
+
service = self._unpickle(
|
|
3700
|
+
userModule, BytesIO(self.pickledService), requireInstanceOf=Job.Service
|
|
3701
|
+
)
|
|
3403
3702
|
self.pickledService = None
|
|
3404
3703
|
# Make sure it has the config, since it wasn't load()-ed via the JobStore
|
|
3405
3704
|
service.assignConfig(fileStore.jobStore.config)
|
|
3406
|
-
#Start the service
|
|
3705
|
+
# Start the service
|
|
3407
3706
|
startCredentials = service.start(self)
|
|
3408
3707
|
try:
|
|
3409
|
-
#The start credentials must be communicated to processes connecting to
|
|
3410
|
-
#the service, to do this while the run method is running we
|
|
3411
|
-
#cheat and set the return value promise within the run method
|
|
3708
|
+
# The start credentials must be communicated to processes connecting to
|
|
3709
|
+
# the service, to do this while the run method is running we
|
|
3710
|
+
# cheat and set the return value promise within the run method
|
|
3412
3711
|
self._fulfillPromises(startCredentials, fileStore.jobStore)
|
|
3413
|
-
self._rvs =
|
|
3414
|
-
|
|
3712
|
+
self._rvs = (
|
|
3713
|
+
{}
|
|
3714
|
+
) # Set this to avoid the return values being updated after the
|
|
3715
|
+
# run method has completed!
|
|
3415
3716
|
|
|
3416
|
-
#Now flag that the service is running jobs can connect to it
|
|
3417
|
-
logger.debug(
|
|
3717
|
+
# Now flag that the service is running jobs can connect to it
|
|
3718
|
+
logger.debug(
|
|
3719
|
+
"Removing the start jobStoreID to indicate that establishment of the service"
|
|
3720
|
+
)
|
|
3418
3721
|
if self.description.startJobStoreID is None:
|
|
3419
3722
|
raise RuntimeError("No start jobStoreID to remove.")
|
|
3420
3723
|
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
@@ -3422,23 +3725,33 @@ class ServiceHostJob(Job):
|
|
|
3422
3725
|
if fileStore.jobStore.file_exists(self.description.startJobStoreID):
|
|
3423
3726
|
raise RuntimeError("The start jobStoreID is not a file.")
|
|
3424
3727
|
|
|
3425
|
-
#Now block until we are told to stop, which is indicated by the removal
|
|
3426
|
-
#of a file
|
|
3728
|
+
# Now block until we are told to stop, which is indicated by the removal
|
|
3729
|
+
# of a file
|
|
3427
3730
|
if self.description.terminateJobStoreID is None:
|
|
3428
3731
|
raise RuntimeError("No terminate jobStoreID to use.")
|
|
3429
3732
|
while True:
|
|
3430
3733
|
# Check for the terminate signal
|
|
3431
|
-
if not fileStore.jobStore.file_exists(
|
|
3432
|
-
|
|
3433
|
-
|
|
3434
|
-
|
|
3734
|
+
if not fileStore.jobStore.file_exists(
|
|
3735
|
+
self.description.terminateJobStoreID
|
|
3736
|
+
):
|
|
3737
|
+
logger.debug(
|
|
3738
|
+
"Detected that the terminate jobStoreID has been removed so exiting"
|
|
3739
|
+
)
|
|
3740
|
+
if not fileStore.jobStore.file_exists(
|
|
3741
|
+
self.description.errorJobStoreID
|
|
3742
|
+
):
|
|
3743
|
+
raise RuntimeError(
|
|
3744
|
+
"Detected the error jobStoreID has been removed so exiting with an error"
|
|
3745
|
+
)
|
|
3435
3746
|
break
|
|
3436
3747
|
|
|
3437
3748
|
# Check the service's status and exit if failed or complete
|
|
3438
3749
|
try:
|
|
3439
3750
|
if not service.check():
|
|
3440
|
-
logger.debug(
|
|
3441
|
-
|
|
3751
|
+
logger.debug(
|
|
3752
|
+
"The service has finished okay, but we have not been told to terminate. "
|
|
3753
|
+
"Waiting for leader to tell us to come back."
|
|
3754
|
+
)
|
|
3442
3755
|
# TODO: Adjust leader so that it keys on something
|
|
3443
3756
|
# other than the services finishing (assumed to be
|
|
3444
3757
|
# after the children) to know when to run follow-on
|
|
@@ -3449,7 +3762,9 @@ class ServiceHostJob(Job):
|
|
|
3449
3762
|
logger.debug("Detected abnormal termination of the service")
|
|
3450
3763
|
raise
|
|
3451
3764
|
|
|
3452
|
-
time.sleep(
|
|
3765
|
+
time.sleep(
|
|
3766
|
+
fileStore.jobStore.config.servicePollingInterval
|
|
3767
|
+
) # Avoid excessive polling
|
|
3453
3768
|
|
|
3454
3769
|
logger.debug("Service is done")
|
|
3455
3770
|
finally:
|
|
@@ -3460,6 +3775,354 @@ class ServiceHostJob(Job):
|
|
|
3460
3775
|
return self.serviceModule
|
|
3461
3776
|
|
|
3462
3777
|
|
|
3778
|
+
class FileMetadata(NamedTuple):
|
|
3779
|
+
"""
|
|
3780
|
+
Metadata for a file.
|
|
3781
|
+
source is the URL to grab the file from
|
|
3782
|
+
parent_dir is parent directory of the source
|
|
3783
|
+
size is the size of the file. Is none if the filesize cannot be retrieved.
|
|
3784
|
+
"""
|
|
3785
|
+
|
|
3786
|
+
source: str
|
|
3787
|
+
parent_dir: str
|
|
3788
|
+
size: Optional[int]
|
|
3789
|
+
|
|
3790
|
+
|
|
3791
|
+
def potential_absolute_uris(
|
|
3792
|
+
uri: str,
|
|
3793
|
+
path: list[str],
|
|
3794
|
+
importer: Optional[str] = None,
|
|
3795
|
+
execution_dir: Optional[str] = None,
|
|
3796
|
+
) -> Iterator[str]:
|
|
3797
|
+
"""
|
|
3798
|
+
Get potential absolute URIs to check for an imported file.
|
|
3799
|
+
|
|
3800
|
+
Given a URI or bare path, yield in turn all the URIs, with schemes, where we
|
|
3801
|
+
should actually try to find it, given that we want to search under/against
|
|
3802
|
+
the given paths or URIs, the current directory, and the given importing WDL
|
|
3803
|
+
document if any.
|
|
3804
|
+
"""
|
|
3805
|
+
|
|
3806
|
+
if uri == "":
|
|
3807
|
+
# Empty URIs can't come from anywhere.
|
|
3808
|
+
return
|
|
3809
|
+
|
|
3810
|
+
# We need to brute-force find this URI relative to:
|
|
3811
|
+
#
|
|
3812
|
+
# 1. Itself if a full URI.
|
|
3813
|
+
#
|
|
3814
|
+
# 2. Importer's URL, if importer is a URL and this is a
|
|
3815
|
+
# host-root-relative URL starting with / or scheme-relative
|
|
3816
|
+
# starting with //, or just plain relative.
|
|
3817
|
+
#
|
|
3818
|
+
# 3. Current directory, if a relative path.
|
|
3819
|
+
#
|
|
3820
|
+
# 4. All the prefixes in "path".
|
|
3821
|
+
#
|
|
3822
|
+
# If it can't be found anywhere, we ought to (probably) throw
|
|
3823
|
+
# FileNotFoundError like the MiniWDL implementation does, with a
|
|
3824
|
+
# correct errno.
|
|
3825
|
+
#
|
|
3826
|
+
# To do this, we have AbstractFileStore.read_from_url, which can read a
|
|
3827
|
+
# URL into a binary-mode writable, or throw some kind of unspecified
|
|
3828
|
+
# exception if the source doesn't exist or can't be fetched.
|
|
3829
|
+
|
|
3830
|
+
# This holds scheme-applied full URIs for all the places to search.
|
|
3831
|
+
full_path_list = []
|
|
3832
|
+
|
|
3833
|
+
if importer is not None:
|
|
3834
|
+
# Add the place the imported file came form, to search first.
|
|
3835
|
+
full_path_list.append(Toil.normalize_uri(importer))
|
|
3836
|
+
|
|
3837
|
+
# Then the current directory. We need to make sure to include a filename component here or it will treat the current directory with no trailing / as a document and relative paths will look 1 level up.
|
|
3838
|
+
# When importing on a worker, the cwd will be a tmpdir and will result in FileNotFoundError after os.path.abspath, so override with the execution dir
|
|
3839
|
+
full_path_list.append(Toil.normalize_uri(execution_dir or ".") + "/.")
|
|
3840
|
+
|
|
3841
|
+
# Then the specified paths.
|
|
3842
|
+
# TODO:
|
|
3843
|
+
# https://github.com/chanzuckerberg/miniwdl/blob/e3e8ef74e80fbe59f137b0ad40b354957915c345/WDL/Tree.py#L1479-L1482
|
|
3844
|
+
# seems backward actually and might do these first!
|
|
3845
|
+
full_path_list += [Toil.normalize_uri(p) for p in path]
|
|
3846
|
+
|
|
3847
|
+
# This holds all the URIs we tried and failed with.
|
|
3848
|
+
failures: set[str] = set()
|
|
3849
|
+
|
|
3850
|
+
for candidate_base in full_path_list:
|
|
3851
|
+
# Try fetching based off each base URI
|
|
3852
|
+
candidate_uri = urljoin(candidate_base, uri)
|
|
3853
|
+
if candidate_uri in failures:
|
|
3854
|
+
# Already tried this one, maybe we have an absolute uri input.
|
|
3855
|
+
continue
|
|
3856
|
+
logger.debug(
|
|
3857
|
+
"Consider %s which is %s off of %s", candidate_uri, uri, candidate_base
|
|
3858
|
+
)
|
|
3859
|
+
|
|
3860
|
+
# Try it
|
|
3861
|
+
yield candidate_uri
|
|
3862
|
+
# If we come back it didn't work
|
|
3863
|
+
failures.add(candidate_uri)
|
|
3864
|
+
|
|
3865
|
+
|
|
3866
|
+
def get_file_sizes(
|
|
3867
|
+
filenames: List[str],
|
|
3868
|
+
file_source: AbstractJobStore,
|
|
3869
|
+
search_paths: Optional[List[str]] = None,
|
|
3870
|
+
include_remote_files: bool = True,
|
|
3871
|
+
execution_dir: Optional[str] = None,
|
|
3872
|
+
) -> Dict[str, FileMetadata]:
|
|
3873
|
+
"""
|
|
3874
|
+
Resolve relative-URI files in the given environment and turn them into absolute normalized URIs. Returns a dictionary of the *string values* from the WDL file values
|
|
3875
|
+
to a tuple of the normalized URI, parent directory ID, and size of the file. The size of the file may be None, which means unknown size.
|
|
3876
|
+
|
|
3877
|
+
:param filenames: list of filenames to evaluate on
|
|
3878
|
+
:param file_source: Context to search for files with
|
|
3879
|
+
:param task_path: Dotted WDL name of the user-level code doing the
|
|
3880
|
+
importing (probably the workflow name).
|
|
3881
|
+
:param search_paths: If set, try resolving input location relative to the URLs or
|
|
3882
|
+
directories in this list.
|
|
3883
|
+
:param include_remote_files: If set, import files from remote locations. Else leave them as URI references.
|
|
3884
|
+
"""
|
|
3885
|
+
|
|
3886
|
+
@memoize
|
|
3887
|
+
def get_filename_size(filename: str) -> FileMetadata:
|
|
3888
|
+
tried = []
|
|
3889
|
+
for candidate_uri in potential_absolute_uris(
|
|
3890
|
+
filename,
|
|
3891
|
+
search_paths if search_paths is not None else [],
|
|
3892
|
+
execution_dir=execution_dir,
|
|
3893
|
+
):
|
|
3894
|
+
tried.append(candidate_uri)
|
|
3895
|
+
try:
|
|
3896
|
+
if not include_remote_files and is_remote_url(candidate_uri):
|
|
3897
|
+
# Use remote URIs in place. But we need to find the one that exists.
|
|
3898
|
+
if not file_source.url_exists(candidate_uri):
|
|
3899
|
+
# Wasn't found there
|
|
3900
|
+
continue
|
|
3901
|
+
|
|
3902
|
+
# Now we know this exists, so pass it through
|
|
3903
|
+
# Get filesizes
|
|
3904
|
+
filesize = file_source.get_size(candidate_uri)
|
|
3905
|
+
except UnimplementedURLException as e:
|
|
3906
|
+
# We can't find anything that can even support this URL scheme.
|
|
3907
|
+
# Report to the user, they are probably missing an extra.
|
|
3908
|
+
logger.critical("Error: " + str(e))
|
|
3909
|
+
raise
|
|
3910
|
+
except HTTPError as e:
|
|
3911
|
+
# Something went wrong looking for it there.
|
|
3912
|
+
logger.warning(
|
|
3913
|
+
"Checked URL %s but got HTTP status %s", candidate_uri, e.code
|
|
3914
|
+
)
|
|
3915
|
+
if e.code == 405:
|
|
3916
|
+
# 405 Method not allowed, maybe HEAD requests are not supported
|
|
3917
|
+
filesize = None
|
|
3918
|
+
else:
|
|
3919
|
+
# Try the next location.
|
|
3920
|
+
continue
|
|
3921
|
+
except FileNotFoundError:
|
|
3922
|
+
# Wasn't found there
|
|
3923
|
+
continue
|
|
3924
|
+
except Exception:
|
|
3925
|
+
# Something went wrong besides the file not being found. Maybe
|
|
3926
|
+
# we have no auth.
|
|
3927
|
+
logger.error(
|
|
3928
|
+
"Something went wrong when testing for existence of %s",
|
|
3929
|
+
candidate_uri,
|
|
3930
|
+
)
|
|
3931
|
+
raise
|
|
3932
|
+
|
|
3933
|
+
# Work out what the basename for the file was
|
|
3934
|
+
file_basename = os.path.basename(urlsplit(candidate_uri).path)
|
|
3935
|
+
|
|
3936
|
+
if file_basename == "":
|
|
3937
|
+
# We can't have files with no basename because we need to
|
|
3938
|
+
# download them at that basename later in WDL.
|
|
3939
|
+
raise RuntimeError(
|
|
3940
|
+
f"File {candidate_uri} has no basename"
|
|
3941
|
+
)
|
|
3942
|
+
|
|
3943
|
+
# Was actually found
|
|
3944
|
+
if is_remote_url(candidate_uri):
|
|
3945
|
+
# Might be a file URI or other URI.
|
|
3946
|
+
# We need to make sure file URIs and local paths that point to
|
|
3947
|
+
# the same place are treated the same.
|
|
3948
|
+
parsed = urlsplit(candidate_uri)
|
|
3949
|
+
if parsed.scheme == "file:":
|
|
3950
|
+
# This is a local file URI. Convert to a path for source directory tracking.
|
|
3951
|
+
parent_dir = os.path.dirname(unquote(parsed.path))
|
|
3952
|
+
else:
|
|
3953
|
+
# This is some other URL. Get the URL to the parent directory and use that.
|
|
3954
|
+
parent_dir = urljoin(candidate_uri, ".")
|
|
3955
|
+
else:
|
|
3956
|
+
# Must be a local path
|
|
3957
|
+
parent_dir = os.path.dirname(candidate_uri)
|
|
3958
|
+
|
|
3959
|
+
return cast(FileMetadata, (candidate_uri, parent_dir, filesize))
|
|
3960
|
+
# Not found
|
|
3961
|
+
raise RuntimeError(
|
|
3962
|
+
f"Could not find {filename} at any of: {list(potential_absolute_uris(filename, search_paths if search_paths is not None else []))}"
|
|
3963
|
+
)
|
|
3964
|
+
|
|
3965
|
+
return {k: get_filename_size(k) for k in filenames}
|
|
3966
|
+
|
|
3967
|
+
|
|
3968
|
+
class CombineImportsJob(Job):
|
|
3969
|
+
"""
|
|
3970
|
+
Combine the outputs of multiple WorkerImportsJob into one promise
|
|
3971
|
+
"""
|
|
3972
|
+
|
|
3973
|
+
def __init__(self, d: Sequence[Promised[Dict[str, FileID]]], **kwargs):
|
|
3974
|
+
"""
|
|
3975
|
+
:param d: Sequence of dictionaries to merge
|
|
3976
|
+
"""
|
|
3977
|
+
self._d = d
|
|
3978
|
+
super().__init__(**kwargs)
|
|
3979
|
+
|
|
3980
|
+
def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
|
|
3981
|
+
"""
|
|
3982
|
+
Merge the dicts
|
|
3983
|
+
"""
|
|
3984
|
+
d = unwrap_all(self._d)
|
|
3985
|
+
return {k: v for item in d for k, v in item.items()}
|
|
3986
|
+
|
|
3987
|
+
|
|
3988
|
+
class WorkerImportJob(Job):
|
|
3989
|
+
"""
|
|
3990
|
+
Job to do file imports on a worker instead of a leader. Assumes all local and cloud files are accessible.
|
|
3991
|
+
|
|
3992
|
+
For the CWL/WDL runners, this class is only used when runImportsOnWorkers is enabled.
|
|
3993
|
+
"""
|
|
3994
|
+
|
|
3995
|
+
def __init__(
|
|
3996
|
+
self,
|
|
3997
|
+
filenames: List[str],
|
|
3998
|
+
local: bool = False,
|
|
3999
|
+
**kwargs: Any
|
|
4000
|
+
):
|
|
4001
|
+
"""
|
|
4002
|
+
Setup importing files on a worker.
|
|
4003
|
+
:param filenames: List of file URIs to import
|
|
4004
|
+
:param kwargs: args for the superclass
|
|
4005
|
+
"""
|
|
4006
|
+
self.filenames = filenames
|
|
4007
|
+
super().__init__(local=local, **kwargs)
|
|
4008
|
+
|
|
4009
|
+
@staticmethod
|
|
4010
|
+
def import_files(
|
|
4011
|
+
files: List[str], file_source: "AbstractJobStore"
|
|
4012
|
+
) -> Dict[str, FileID]:
|
|
4013
|
+
"""
|
|
4014
|
+
Import a list of files into the jobstore. Returns a mapping of the filename to the associated FileIDs
|
|
4015
|
+
|
|
4016
|
+
When stream is true but the import is not streamable, the worker will run out of
|
|
4017
|
+
disk space and run a new import job with enough disk space instead.
|
|
4018
|
+
:param files: list of files to import
|
|
4019
|
+
:param file_source: AbstractJobStore
|
|
4020
|
+
:return: Dictionary mapping filenames to associated jobstore FileID
|
|
4021
|
+
"""
|
|
4022
|
+
# todo: make the import ensure streaming is done instead of relying on running out of disk space
|
|
4023
|
+
path_to_fileid = {}
|
|
4024
|
+
|
|
4025
|
+
@memoize
|
|
4026
|
+
def import_filename(filename: str) -> Optional[FileID]:
|
|
4027
|
+
return file_source.import_file(filename, symlink=True)
|
|
4028
|
+
|
|
4029
|
+
for file in files:
|
|
4030
|
+
imported = import_filename(file)
|
|
4031
|
+
if imported is not None:
|
|
4032
|
+
path_to_fileid[file] = imported
|
|
4033
|
+
return path_to_fileid
|
|
4034
|
+
|
|
4035
|
+
def run(self, file_store: AbstractFileStore) -> Promised[Dict[str, FileID]]:
|
|
4036
|
+
"""
|
|
4037
|
+
Import the workflow inputs and then create and run the workflow.
|
|
4038
|
+
:return: Promise of workflow outputs
|
|
4039
|
+
"""
|
|
4040
|
+
return self.import_files(self.filenames, file_store.jobStore)
|
|
4041
|
+
|
|
4042
|
+
|
|
4043
|
+
class ImportsJob(Job):
|
|
4044
|
+
"""
|
|
4045
|
+
Job to organize and delegate files to individual WorkerImportJobs.
|
|
4046
|
+
|
|
4047
|
+
For the CWL/WDL runners, this is only used when runImportsOnWorkers is enabled
|
|
4048
|
+
"""
|
|
4049
|
+
|
|
4050
|
+
def __init__(
|
|
4051
|
+
self,
|
|
4052
|
+
file_to_data: Dict[str, FileMetadata],
|
|
4053
|
+
max_batch_size: ParseableIndivisibleResource,
|
|
4054
|
+
import_worker_disk: ParseableIndivisibleResource,
|
|
4055
|
+
**kwargs: Any,
|
|
4056
|
+
):
|
|
4057
|
+
"""
|
|
4058
|
+
Job to take the inputs for a workflow and import them on a worker instead of a leader. Assumes all local and cloud files are accessible.
|
|
4059
|
+
|
|
4060
|
+
This class is only used when runImportsOnWorkers is enabled.
|
|
4061
|
+
|
|
4062
|
+
:param file_to_data: mapping of file source name to file metadata
|
|
4063
|
+
:param max_batch_size: maximum cumulative file size of a batched import
|
|
4064
|
+
"""
|
|
4065
|
+
super().__init__(local=True, **kwargs)
|
|
4066
|
+
self._file_to_data = file_to_data
|
|
4067
|
+
self._max_batch_size = max_batch_size
|
|
4068
|
+
self._import_worker_disk = import_worker_disk
|
|
4069
|
+
|
|
4070
|
+
def run(
|
|
4071
|
+
self, file_store: AbstractFileStore
|
|
4072
|
+
) -> Tuple[Promised[Dict[str, FileID]], Dict[str, FileMetadata]]:
|
|
4073
|
+
"""
|
|
4074
|
+
Import the workflow inputs and then create and run the workflow.
|
|
4075
|
+
:return: Tuple of a mapping from the candidate uri to the file id and a mapping of the source filenames to its metadata. The candidate uri is a field in the file metadata
|
|
4076
|
+
"""
|
|
4077
|
+
max_batch_size = self._max_batch_size
|
|
4078
|
+
file_to_data = self._file_to_data
|
|
4079
|
+
# Run WDL imports on a worker instead
|
|
4080
|
+
|
|
4081
|
+
filenames = list(file_to_data.keys())
|
|
4082
|
+
|
|
4083
|
+
import_jobs = []
|
|
4084
|
+
|
|
4085
|
+
# This list will hold lists of batched filenames
|
|
4086
|
+
file_batches = []
|
|
4087
|
+
|
|
4088
|
+
# List of filenames for each batch
|
|
4089
|
+
per_batch_files = []
|
|
4090
|
+
per_batch_size = 0
|
|
4091
|
+
while len(filenames) > 0:
|
|
4092
|
+
filename = filenames.pop(0)
|
|
4093
|
+
# See if adding this to the queue will make the batch job too big
|
|
4094
|
+
filesize = file_to_data[filename][2]
|
|
4095
|
+
if per_batch_size + filesize >= max_batch_size:
|
|
4096
|
+
# batch is too big now, store to schedule the batch
|
|
4097
|
+
if len(per_batch_files) == 0:
|
|
4098
|
+
# schedule the individual file
|
|
4099
|
+
per_batch_files.append(filename)
|
|
4100
|
+
file_batches.append(per_batch_files)
|
|
4101
|
+
# reset batching calculation
|
|
4102
|
+
per_batch_size = 0
|
|
4103
|
+
else:
|
|
4104
|
+
per_batch_size += filesize
|
|
4105
|
+
per_batch_files.append(filename)
|
|
4106
|
+
|
|
4107
|
+
if per_batch_files:
|
|
4108
|
+
file_batches.append(per_batch_files)
|
|
4109
|
+
|
|
4110
|
+
# Create batch import jobs for each group of files
|
|
4111
|
+
for batch in file_batches:
|
|
4112
|
+
candidate_uris = [file_to_data[filename][0] for filename in batch]
|
|
4113
|
+
import_jobs.append(WorkerImportJob(candidate_uris, disk=self._import_worker_disk))
|
|
4114
|
+
|
|
4115
|
+
for job in import_jobs:
|
|
4116
|
+
self.addChild(job)
|
|
4117
|
+
|
|
4118
|
+
combine_imports_job = CombineImportsJob([job.rv() for job in import_jobs])
|
|
4119
|
+
for job in import_jobs:
|
|
4120
|
+
job.addFollowOn(combine_imports_job)
|
|
4121
|
+
self.addChild(combine_imports_job)
|
|
4122
|
+
|
|
4123
|
+
return combine_imports_job.rv(), file_to_data
|
|
4124
|
+
|
|
4125
|
+
|
|
3463
4126
|
class Promise:
|
|
3464
4127
|
"""
|
|
3465
4128
|
References a return value from a method as a *promise* before the method itself is run.
|
|
@@ -3520,7 +4183,9 @@ class Promise:
|
|
|
3520
4183
|
def __new__(cls, *args) -> "Promise":
|
|
3521
4184
|
"""Instantiate this Promise."""
|
|
3522
4185
|
if len(args) != 2:
|
|
3523
|
-
raise RuntimeError(
|
|
4186
|
+
raise RuntimeError(
|
|
4187
|
+
"Cannot instantiate promise. Invalid number of arguments given (Expected 2)."
|
|
4188
|
+
)
|
|
3524
4189
|
if isinstance(args[0], Job):
|
|
3525
4190
|
# Regular instantiation when promise is created, before it is being pickled
|
|
3526
4191
|
return super().__new__(cls)
|
|
@@ -3541,6 +4206,7 @@ class Promise:
|
|
|
3541
4206
|
value = safeUnpickleFromStream(fileHandle)
|
|
3542
4207
|
return value
|
|
3543
4208
|
|
|
4209
|
+
|
|
3544
4210
|
# Machinery for type-safe-ish Toil Python workflows.
|
|
3545
4211
|
#
|
|
3546
4212
|
# TODO: Until we make Promise generic on the promised type, and work out how to
|
|
@@ -3548,12 +4214,13 @@ class Promise:
|
|
|
3548
4214
|
# method returns, this won't actually be type-safe, because any Promise will be
|
|
3549
4215
|
# a Promised[] for any type.
|
|
3550
4216
|
|
|
3551
|
-
T = TypeVar(
|
|
4217
|
+
T = TypeVar("T")
|
|
3552
4218
|
# We have type shorthand for a promised value.
|
|
3553
4219
|
# Uses a generic type alias, so you can have a Promised[T]. See <https://github.com/python/mypy/pull/2378>.
|
|
3554
4220
|
|
|
3555
4221
|
Promised = Union[Promise, T]
|
|
3556
4222
|
|
|
4223
|
+
|
|
3557
4224
|
def unwrap(p: Promised[T]) -> T:
|
|
3558
4225
|
"""
|
|
3559
4226
|
Function for ensuring you actually have a promised value, and not just a promise.
|
|
@@ -3562,9 +4229,10 @@ def unwrap(p: Promised[T]) -> T:
|
|
|
3562
4229
|
The "unwrap" terminology is borrowed from Rust.
|
|
3563
4230
|
"""
|
|
3564
4231
|
if isinstance(p, Promise):
|
|
3565
|
-
raise TypeError(f
|
|
4232
|
+
raise TypeError(f"Attempted to unwrap a value that is still a Promise: {p}")
|
|
3566
4233
|
return p
|
|
3567
4234
|
|
|
4235
|
+
|
|
3568
4236
|
def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
|
|
3569
4237
|
"""
|
|
3570
4238
|
Function for ensuring you actually have a collection of promised values,
|
|
@@ -3574,9 +4242,12 @@ def unwrap_all(p: Sequence[Promised[T]]) -> Sequence[T]:
|
|
|
3574
4242
|
"""
|
|
3575
4243
|
for i, item in enumerate(p):
|
|
3576
4244
|
if isinstance(item, Promise):
|
|
3577
|
-
raise TypeError(
|
|
4245
|
+
raise TypeError(
|
|
4246
|
+
f"Attempted to unwrap a value at index {i} that is still a Promise: {item}"
|
|
4247
|
+
)
|
|
3578
4248
|
return p
|
|
3579
4249
|
|
|
4250
|
+
|
|
3580
4251
|
class PromisedRequirement:
|
|
3581
4252
|
"""
|
|
3582
4253
|
Class for dynamically allocating job function resource requirements.
|
|
@@ -3603,13 +4274,15 @@ class PromisedRequirement:
|
|
|
3603
4274
|
:param args: variable length argument list
|
|
3604
4275
|
:type args: int or .Promise
|
|
3605
4276
|
"""
|
|
3606
|
-
if hasattr(valueOrCallable,
|
|
4277
|
+
if hasattr(valueOrCallable, "__call__"):
|
|
3607
4278
|
if len(args) == 0:
|
|
3608
|
-
raise RuntimeError(
|
|
4279
|
+
raise RuntimeError("Need parameters for PromisedRequirement function.")
|
|
3609
4280
|
func = valueOrCallable
|
|
3610
4281
|
else:
|
|
3611
4282
|
if len(args) != 0:
|
|
3612
|
-
raise RuntimeError(
|
|
4283
|
+
raise RuntimeError(
|
|
4284
|
+
"Define a PromisedRequirement function to handle multiple arguments."
|
|
4285
|
+
)
|
|
3613
4286
|
func = lambda x: x
|
|
3614
4287
|
args = [valueOrCallable]
|
|
3615
4288
|
|
|
@@ -3622,7 +4295,7 @@ class PromisedRequirement:
|
|
|
3622
4295
|
return func(*self._args)
|
|
3623
4296
|
|
|
3624
4297
|
@staticmethod
|
|
3625
|
-
def convertPromises(kwargs:
|
|
4298
|
+
def convertPromises(kwargs: dict[str, Any]) -> bool:
|
|
3626
4299
|
"""
|
|
3627
4300
|
Return True if reserved resource keyword is a Promise or PromisedRequirement instance.
|
|
3628
4301
|
|
|
@@ -3651,15 +4324,15 @@ class UnfulfilledPromiseSentinel:
|
|
|
3651
4324
|
self.file_id = file_id
|
|
3652
4325
|
|
|
3653
4326
|
@staticmethod
|
|
3654
|
-
def __setstate__(stateDict:
|
|
4327
|
+
def __setstate__(stateDict: dict[str, Any]) -> None:
|
|
3655
4328
|
"""
|
|
3656
4329
|
Only called when unpickling.
|
|
3657
4330
|
|
|
3658
4331
|
This won't be unpickled unless the promise wasn't resolved, so we throw
|
|
3659
4332
|
an exception.
|
|
3660
4333
|
"""
|
|
3661
|
-
jobName = stateDict[
|
|
3662
|
-
file_id = stateDict[
|
|
4334
|
+
jobName = stateDict["fulfillingJobName"]
|
|
4335
|
+
file_id = stateDict["file_id"]
|
|
3663
4336
|
raise RuntimeError(
|
|
3664
4337
|
f"This job was passed promise {file_id} that wasn't yet resolved when it "
|
|
3665
4338
|
f"ran. The job {jobName} that fulfills this promise hasn't yet "
|