toil 7.0.0__py3-none-any.whl → 8.1.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +124 -86
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +39 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +651 -155
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +784 -397
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1137 -534
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +1031 -349
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +772 -412
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +204 -58
- toil/lib/aws/utils.py +290 -213
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/dockstore.py +379 -0
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -105
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/history.py +1271 -0
- toil/lib/history_submission.py +681 -0
- toil/lib/humanize.py +6 -2
- toil/lib/io.py +121 -12
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +83 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +125 -87
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/lib/trs.py +390 -0
- toil/lib/web.py +38 -0
- toil/options/common.py +850 -402
- toil/options/cwl.py +185 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +283 -180
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +84 -55
- toil/server/utils.py +56 -31
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +183 -65
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +265 -49
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/conftest.py +39 -0
- toil/test/cwl/cwlTest.py +375 -72
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/optional-file.cwl +18 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_history.py +212 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/lib/test_trs.py +161 -0
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +6 -6
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3528 -1053
- toil/worker.py +370 -149
- toil-8.1.0b1.dist-info/METADATA +178 -0
- toil-8.1.0b1.dist-info/RECORD +259 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.1.0b1.dist-info}/top_level.txt +0 -0
toil/server/utils.py
CHANGED
|
@@ -16,24 +16,25 @@ import logging
|
|
|
16
16
|
import os
|
|
17
17
|
from abc import abstractmethod
|
|
18
18
|
from datetime import datetime
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import Optional
|
|
20
20
|
from urllib.parse import urlparse
|
|
21
21
|
|
|
22
|
-
import requests
|
|
23
|
-
|
|
24
22
|
from toil.lib.io import AtomicFileCreate
|
|
25
23
|
from toil.lib.retry import retry
|
|
24
|
+
from toil.lib.web import web_session
|
|
26
25
|
|
|
27
26
|
try:
|
|
28
27
|
from toil.lib.aws import get_current_aws_region
|
|
29
28
|
from toil.lib.aws.session import client
|
|
30
29
|
from toil.lib.aws.utils import retry_s3
|
|
30
|
+
|
|
31
31
|
HAVE_S3 = True
|
|
32
32
|
except ImportError:
|
|
33
33
|
HAVE_S3 = False
|
|
34
34
|
|
|
35
35
|
logger = logging.getLogger(__name__)
|
|
36
36
|
|
|
37
|
+
|
|
37
38
|
def get_iso_time() -> str:
|
|
38
39
|
"""
|
|
39
40
|
Return the current time in ISO 8601 format.
|
|
@@ -55,23 +56,30 @@ def link_file(src: str, dest: str) -> None:
|
|
|
55
56
|
os.symlink(src, dest)
|
|
56
57
|
|
|
57
58
|
|
|
58
|
-
def download_file_from_internet(
|
|
59
|
+
def download_file_from_internet(
|
|
60
|
+
src: str, dest: str, content_type: Optional[str] = None
|
|
61
|
+
) -> None:
|
|
59
62
|
"""
|
|
60
63
|
Download a file from the Internet and write it to dest.
|
|
61
64
|
"""
|
|
62
|
-
response =
|
|
65
|
+
response = web_session.get(src)
|
|
63
66
|
|
|
64
67
|
if not response.ok:
|
|
65
68
|
raise RuntimeError("Request failed with a client error or a server error.")
|
|
66
69
|
|
|
67
|
-
if content_type and not response.headers.get("Content-Type", "").startswith(
|
|
70
|
+
if content_type and not response.headers.get("Content-Type", "").startswith(
|
|
71
|
+
content_type
|
|
72
|
+
):
|
|
68
73
|
val = response.headers.get("Content-Type")
|
|
69
74
|
raise RuntimeError(f"Expected content type to be '{content_type}'. Not {val}.")
|
|
70
75
|
|
|
71
76
|
with open(dest, "wb") as f:
|
|
72
77
|
f.write(response.content)
|
|
73
78
|
|
|
74
|
-
|
|
79
|
+
|
|
80
|
+
def download_file_from_s3(
|
|
81
|
+
src: str, dest: str, content_type: Optional[str] = None
|
|
82
|
+
) -> None:
|
|
75
83
|
"""
|
|
76
84
|
Download a file from Amazon S3 and write it to dest.
|
|
77
85
|
"""
|
|
@@ -81,10 +89,11 @@ def download_file_from_s3(src: str, dest: str, content_type: Optional[str] = Non
|
|
|
81
89
|
except ImportError:
|
|
82
90
|
raise RuntimeError("Cannot access S3 as AWS modules are not available")
|
|
83
91
|
|
|
84
|
-
with open(dest,
|
|
92
|
+
with open(dest, "wb") as out_stream:
|
|
85
93
|
obj = get_object_for_url(urlparse(src), existing=True)
|
|
86
94
|
obj.download_fileobj(out_stream)
|
|
87
95
|
|
|
96
|
+
|
|
88
97
|
def get_file_class(path: str) -> str:
|
|
89
98
|
"""
|
|
90
99
|
Return the type of the file as a human readable string.
|
|
@@ -97,6 +106,7 @@ def get_file_class(path: str) -> str:
|
|
|
97
106
|
return "Directory"
|
|
98
107
|
return "Unknown"
|
|
99
108
|
|
|
109
|
+
|
|
100
110
|
@retry(errors=[OSError, BlockingIOError])
|
|
101
111
|
def safe_read_file(file: str) -> Optional[str]:
|
|
102
112
|
"""
|
|
@@ -153,6 +163,7 @@ def safe_write_file(file: str, s: str) -> None:
|
|
|
153
163
|
with open(temp_name, "w") as file_obj:
|
|
154
164
|
file_obj.write(s)
|
|
155
165
|
|
|
166
|
+
|
|
156
167
|
class MemoryStateCache:
|
|
157
168
|
"""
|
|
158
169
|
An in-memory place to store workflow state.
|
|
@@ -164,7 +175,7 @@ class MemoryStateCache:
|
|
|
164
175
|
"""
|
|
165
176
|
|
|
166
177
|
super().__init__()
|
|
167
|
-
self._data:
|
|
178
|
+
self._data: dict[tuple[str, str], Optional[str]] = {}
|
|
168
179
|
|
|
169
180
|
def get(self, workflow_id: str, key: str) -> Optional[str]:
|
|
170
181
|
"""
|
|
@@ -185,6 +196,7 @@ class MemoryStateCache:
|
|
|
185
196
|
else:
|
|
186
197
|
self._data[(workflow_id, key)] = value
|
|
187
198
|
|
|
199
|
+
|
|
188
200
|
class AbstractStateStore:
|
|
189
201
|
"""
|
|
190
202
|
A place for the WES server to keep its state: the set of workflows that
|
|
@@ -250,6 +262,7 @@ class AbstractStateStore:
|
|
|
250
262
|
"""
|
|
251
263
|
self._cache.set(workflow_id, key, value)
|
|
252
264
|
|
|
265
|
+
|
|
253
266
|
class MemoryStateStore(MemoryStateCache, AbstractStateStore):
|
|
254
267
|
"""
|
|
255
268
|
An in-memory place to store workflow state, for testing.
|
|
@@ -261,6 +274,7 @@ class MemoryStateStore(MemoryStateCache, AbstractStateStore):
|
|
|
261
274
|
def __init__(self):
|
|
262
275
|
super().__init__()
|
|
263
276
|
|
|
277
|
+
|
|
264
278
|
class FileStateStore(AbstractStateStore):
|
|
265
279
|
"""
|
|
266
280
|
A place to store workflow state that uses a POSIX-compatible file system.
|
|
@@ -275,7 +289,7 @@ class FileStateStore(AbstractStateStore):
|
|
|
275
289
|
"""
|
|
276
290
|
super().__init__()
|
|
277
291
|
parse = urlparse(url)
|
|
278
|
-
if parse.scheme.lower() not in [
|
|
292
|
+
if parse.scheme.lower() not in ["file", ""]:
|
|
279
293
|
# We want to catch if we get the wrong argument.
|
|
280
294
|
raise RuntimeError(f"{url} doesn't look like a local path")
|
|
281
295
|
if not os.path.exists(parse.path):
|
|
@@ -309,7 +323,9 @@ class FileStateStore(AbstractStateStore):
|
|
|
309
323
|
# Set the value in the file
|
|
310
324
|
safe_write_file(file_path, value)
|
|
311
325
|
|
|
326
|
+
|
|
312
327
|
if HAVE_S3:
|
|
328
|
+
|
|
313
329
|
class S3StateStore(AbstractStateStore):
|
|
314
330
|
"""
|
|
315
331
|
A place to store workflow state that uses an S3-compatible object store.
|
|
@@ -327,7 +343,7 @@ if HAVE_S3:
|
|
|
327
343
|
|
|
328
344
|
parse = urlparse(url)
|
|
329
345
|
|
|
330
|
-
if parse.scheme.lower() !=
|
|
346
|
+
if parse.scheme.lower() != "s3":
|
|
331
347
|
# We want to catch if we get the wrong argument.
|
|
332
348
|
raise RuntimeError(f"{url} doesn't look like an S3 URL")
|
|
333
349
|
|
|
@@ -335,12 +351,14 @@ if HAVE_S3:
|
|
|
335
351
|
# urlparse keeps the leading '/', but here we want a path in the
|
|
336
352
|
# bucket without a leading '/'. We also need to support an empty
|
|
337
353
|
# path.
|
|
338
|
-
self._base_path =
|
|
339
|
-
|
|
354
|
+
self._base_path = (
|
|
355
|
+
parse.path[1:] if parse.path.startswith("/") else parse.path
|
|
356
|
+
)
|
|
357
|
+
self._client = client("s3", region_name=get_current_aws_region())
|
|
340
358
|
|
|
341
359
|
logger.debug("Connected to S3StateStore at %s", url)
|
|
342
360
|
|
|
343
|
-
def _get_bucket_and_path(self, workflow_id: str, key: str) ->
|
|
361
|
+
def _get_bucket_and_path(self, workflow_id: str, key: str) -> tuple[str, str]:
|
|
344
362
|
"""
|
|
345
363
|
Get the bucket and path in the bucket at which a key value belongs.
|
|
346
364
|
"""
|
|
@@ -354,13 +372,12 @@ if HAVE_S3:
|
|
|
354
372
|
bucket, path = self._get_bucket_and_path(workflow_id, key)
|
|
355
373
|
for attempt in retry_s3():
|
|
356
374
|
try:
|
|
357
|
-
logger.debug(
|
|
375
|
+
logger.debug("Fetch %s path %s", bucket, path)
|
|
358
376
|
response = self._client.get_object(Bucket=bucket, Key=path)
|
|
359
|
-
return response[
|
|
377
|
+
return response["Body"].read().decode("utf-8")
|
|
360
378
|
except self._client.exceptions.NoSuchKey:
|
|
361
379
|
return None
|
|
362
380
|
|
|
363
|
-
|
|
364
381
|
def set(self, workflow_id: str, key: str, value: Optional[str]) -> None:
|
|
365
382
|
"""
|
|
366
383
|
Set or clear a key value on S3.
|
|
@@ -369,18 +386,21 @@ if HAVE_S3:
|
|
|
369
386
|
for attempt in retry_s3():
|
|
370
387
|
if value is None:
|
|
371
388
|
# Get rid of it.
|
|
372
|
-
logger.debug(
|
|
389
|
+
logger.debug("Clear %s path %s", bucket, path)
|
|
373
390
|
self._client.delete_object(Bucket=bucket, Key=path)
|
|
374
391
|
return
|
|
375
392
|
else:
|
|
376
393
|
# Store it, clobbering anything there already.
|
|
377
|
-
logger.debug(
|
|
378
|
-
self._client.put_object(
|
|
379
|
-
|
|
394
|
+
logger.debug("Set %s path %s", bucket, path)
|
|
395
|
+
self._client.put_object(
|
|
396
|
+
Bucket=bucket, Key=path, Body=value.encode("utf-8")
|
|
397
|
+
)
|
|
380
398
|
return
|
|
381
399
|
|
|
400
|
+
|
|
382
401
|
# We want to memoize state stores so we can cache on them.
|
|
383
|
-
state_store_cache:
|
|
402
|
+
state_store_cache: dict[str, AbstractStateStore] = {}
|
|
403
|
+
|
|
384
404
|
|
|
385
405
|
def connect_to_state_store(url: str) -> AbstractStateStore:
|
|
386
406
|
"""
|
|
@@ -392,25 +412,30 @@ def connect_to_state_store(url: str) -> AbstractStateStore:
|
|
|
392
412
|
if url not in state_store_cache:
|
|
393
413
|
# We need to actually make the state store
|
|
394
414
|
parse = urlparse(url)
|
|
395
|
-
if parse.scheme.lower() ==
|
|
415
|
+
if parse.scheme.lower() == "s3":
|
|
396
416
|
# It's an S3 URL
|
|
397
417
|
if HAVE_S3:
|
|
398
418
|
# And we can use S3, so make the right implementation for S3.
|
|
399
419
|
state_store_cache[url] = S3StateStore(url)
|
|
400
420
|
else:
|
|
401
421
|
# We can't actually use S3, so complain.
|
|
402
|
-
raise RuntimeError(
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
422
|
+
raise RuntimeError(
|
|
423
|
+
f"Cannot connect to {url} because Toil AWS "
|
|
424
|
+
f"dependencies are not available. Did you "
|
|
425
|
+
f"install Toil with the [aws] extra?"
|
|
426
|
+
)
|
|
427
|
+
elif parse.scheme.lower() in ["file", ""]:
|
|
406
428
|
# It's a file URL or path
|
|
407
429
|
state_store_cache[url] = FileStateStore(url)
|
|
408
430
|
else:
|
|
409
|
-
raise RuntimeError(
|
|
410
|
-
|
|
431
|
+
raise RuntimeError(
|
|
432
|
+
f"Cannot connect to {url} because we do not "
|
|
433
|
+
f"implement its URL scheme"
|
|
434
|
+
)
|
|
411
435
|
|
|
412
436
|
return state_store_cache[url]
|
|
413
437
|
|
|
438
|
+
|
|
414
439
|
class WorkflowStateStore:
|
|
415
440
|
"""
|
|
416
441
|
Slice of a state store for the state of a particular workflow.
|
|
@@ -463,6 +488,7 @@ def connect_to_workflow_state_store(url: str, workflow_id: str) -> WorkflowState
|
|
|
463
488
|
|
|
464
489
|
return WorkflowStateStore(connect_to_state_store(url), workflow_id)
|
|
465
490
|
|
|
491
|
+
|
|
466
492
|
# When we see one of these terminal states, we stay there forever.
|
|
467
493
|
TERMINAL_STATES = {"COMPLETE", "EXECUTOR_ERROR", "SYSTEM_ERROR", "CANCELED"}
|
|
468
494
|
|
|
@@ -470,6 +496,7 @@ TERMINAL_STATES = {"COMPLETE", "EXECUTOR_ERROR", "SYSTEM_ERROR", "CANCELED"}
|
|
|
470
496
|
# workflow running task is gone and move it to CANCELED?
|
|
471
497
|
MAX_CANCELING_SECONDS = 30
|
|
472
498
|
|
|
499
|
+
|
|
473
500
|
class WorkflowStateMachine:
|
|
474
501
|
"""
|
|
475
502
|
Class for managing the WES workflow state machine.
|
|
@@ -628,5 +655,3 @@ class WorkflowStateMachine:
|
|
|
628
655
|
state = "UNKNOWN"
|
|
629
656
|
|
|
630
657
|
return state
|
|
631
|
-
|
|
632
|
-
|
|
@@ -4,7 +4,7 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
from abc import abstractmethod
|
|
7
|
-
from typing import Any, Callable,
|
|
7
|
+
from typing import Any, Callable, Optional, Union
|
|
8
8
|
from urllib.parse import urldefrag
|
|
9
9
|
|
|
10
10
|
import connexion # type: ignore
|
|
@@ -16,18 +16,25 @@ logger = logging.getLogger(__name__)
|
|
|
16
16
|
|
|
17
17
|
# Define a type for WES task log entries in responses
|
|
18
18
|
# TODO: make this a typed dict with all the WES task log field names and their types.
|
|
19
|
-
TaskLog =
|
|
19
|
+
TaskLog = dict[str, Union[str, int, None]]
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class VersionNotImplementedException(Exception):
|
|
23
23
|
"""
|
|
24
24
|
Raised when the requested workflow version is not implemented.
|
|
25
25
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
wf_type: str,
|
|
30
|
+
version: Optional[str] = None,
|
|
31
|
+
supported_versions: Optional[list[str]] = None,
|
|
32
|
+
) -> None:
|
|
28
33
|
if version:
|
|
29
|
-
message = (
|
|
30
|
-
|
|
34
|
+
message = (
|
|
35
|
+
"workflow_type '{}' requires 'workflow_type_version' to be one of '{}'. "
|
|
36
|
+
"Got '{}' instead.".format(wf_type, str(supported_versions), version)
|
|
37
|
+
)
|
|
31
38
|
else:
|
|
32
39
|
message = f"workflow_type '{wf_type}' is not supported."
|
|
33
40
|
|
|
@@ -38,6 +45,7 @@ class MalformedRequestException(Exception):
|
|
|
38
45
|
"""
|
|
39
46
|
Raised when the request is malformed.
|
|
40
47
|
"""
|
|
48
|
+
|
|
41
49
|
def __init__(self, message: str) -> None:
|
|
42
50
|
super().__init__(message)
|
|
43
51
|
|
|
@@ -46,6 +54,7 @@ class WorkflowNotFoundException(Exception):
|
|
|
46
54
|
"""
|
|
47
55
|
Raised when the requested run ID is not found.
|
|
48
56
|
"""
|
|
57
|
+
|
|
49
58
|
def __init__(self) -> None:
|
|
50
59
|
super().__init__("The requested workflow run wasn't found.")
|
|
51
60
|
|
|
@@ -54,6 +63,7 @@ class WorkflowConflictException(Exception):
|
|
|
54
63
|
"""
|
|
55
64
|
Raised when the requested workflow is not in the expected state.
|
|
56
65
|
"""
|
|
66
|
+
|
|
57
67
|
def __init__(self, run_id: str):
|
|
58
68
|
super().__init__(f"Workflow {run_id} exists when it shouldn't.")
|
|
59
69
|
|
|
@@ -62,6 +72,7 @@ class OperationForbidden(Exception):
|
|
|
62
72
|
"""
|
|
63
73
|
Raised when the request is forbidden.
|
|
64
74
|
"""
|
|
75
|
+
|
|
65
76
|
def __init__(self, message: str) -> None:
|
|
66
77
|
super().__init__(message)
|
|
67
78
|
|
|
@@ -70,6 +81,7 @@ class WorkflowExecutionException(Exception):
|
|
|
70
81
|
"""
|
|
71
82
|
Raised when an internal error occurred during the execution of the workflow.
|
|
72
83
|
"""
|
|
84
|
+
|
|
73
85
|
def __init__(self, message: str) -> None:
|
|
74
86
|
super().__init__(message)
|
|
75
87
|
|
|
@@ -81,8 +93,10 @@ def handle_errors(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
|
81
93
|
GA4GH WES spec.
|
|
82
94
|
"""
|
|
83
95
|
|
|
84
|
-
def error(msg: Any, code: int = 500) ->
|
|
85
|
-
logger.warning(
|
|
96
|
+
def error(msg: Any, code: int = 500) -> tuple[dict[str, Any], int]:
|
|
97
|
+
logger.warning(
|
|
98
|
+
f"Exception raised when calling '{func.__name__}()':", exc_info=True
|
|
99
|
+
)
|
|
86
100
|
return {"msg": str(msg), "status_code": code}, code
|
|
87
101
|
|
|
88
102
|
@functools.wraps(func)
|
|
@@ -114,7 +128,7 @@ class WESBackend:
|
|
|
114
128
|
to handle user requests when they hit different endpoints.
|
|
115
129
|
"""
|
|
116
130
|
|
|
117
|
-
def __init__(self, options:
|
|
131
|
+
def __init__(self, options: list[str]):
|
|
118
132
|
"""
|
|
119
133
|
:param options: A list of default engine options to use when executing
|
|
120
134
|
a workflow. Example options:
|
|
@@ -135,7 +149,7 @@ class WESBackend:
|
|
|
135
149
|
return getattr(self, operation_id.split(".")[-1])
|
|
136
150
|
|
|
137
151
|
@abstractmethod
|
|
138
|
-
def get_service_info(self) ->
|
|
152
|
+
def get_service_info(self) -> dict[str, Any]:
|
|
139
153
|
"""
|
|
140
154
|
Get information about the Workflow Execution Service.
|
|
141
155
|
|
|
@@ -144,7 +158,9 @@ class WESBackend:
|
|
|
144
158
|
raise NotImplementedError
|
|
145
159
|
|
|
146
160
|
@abstractmethod
|
|
147
|
-
def list_runs(
|
|
161
|
+
def list_runs(
|
|
162
|
+
self, page_size: Optional[int] = None, page_token: Optional[str] = None
|
|
163
|
+
) -> dict[str, Any]:
|
|
148
164
|
"""
|
|
149
165
|
List the workflow runs.
|
|
150
166
|
|
|
@@ -153,7 +169,7 @@ class WESBackend:
|
|
|
153
169
|
raise NotImplementedError
|
|
154
170
|
|
|
155
171
|
@abstractmethod
|
|
156
|
-
def run_workflow(self) ->
|
|
172
|
+
def run_workflow(self) -> dict[str, str]:
|
|
157
173
|
"""
|
|
158
174
|
Run a workflow. This endpoint creates a new workflow run and returns
|
|
159
175
|
a `RunId` to monitor its progress.
|
|
@@ -163,7 +179,7 @@ class WESBackend:
|
|
|
163
179
|
raise NotImplementedError
|
|
164
180
|
|
|
165
181
|
@abstractmethod
|
|
166
|
-
def get_run_log(self, run_id: str) ->
|
|
182
|
+
def get_run_log(self, run_id: str) -> dict[str, Any]:
|
|
167
183
|
"""
|
|
168
184
|
Get detailed info about a workflow run.
|
|
169
185
|
|
|
@@ -172,7 +188,7 @@ class WESBackend:
|
|
|
172
188
|
raise NotImplementedError
|
|
173
189
|
|
|
174
190
|
@abstractmethod
|
|
175
|
-
def cancel_run(self, run_id: str) ->
|
|
191
|
+
def cancel_run(self, run_id: str) -> dict[str, str]:
|
|
176
192
|
"""
|
|
177
193
|
Cancel a running workflow.
|
|
178
194
|
|
|
@@ -181,7 +197,7 @@ class WESBackend:
|
|
|
181
197
|
raise NotImplementedError
|
|
182
198
|
|
|
183
199
|
@abstractmethod
|
|
184
|
-
def get_run_status(self, run_id: str) ->
|
|
200
|
+
def get_run_status(self, run_id: str) -> dict[str, str]:
|
|
185
201
|
"""
|
|
186
202
|
Get quick status info about a workflow run, returning a simple result
|
|
187
203
|
with the overall state of the workflow run.
|
|
@@ -199,9 +215,17 @@ class WESBackend:
|
|
|
199
215
|
|
|
200
216
|
@staticmethod
|
|
201
217
|
def secure_path(path: str) -> str:
|
|
202
|
-
return os.path.join(
|
|
203
|
-
|
|
204
|
-
|
|
218
|
+
return os.path.join(
|
|
219
|
+
*[
|
|
220
|
+
str(secure_filename(p))
|
|
221
|
+
for p in path.split("/")
|
|
222
|
+
if p not in ("", ".", "..")
|
|
223
|
+
]
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def collect_attachments(
|
|
227
|
+
self, run_id: Optional[str], temp_dir: Optional[str]
|
|
228
|
+
) -> tuple[str, dict[str, Any]]:
|
|
205
229
|
"""
|
|
206
230
|
Collect attachments from the current request by staging uploaded files
|
|
207
231
|
to temp_dir, and return the temp_dir and parsed body of the request.
|
|
@@ -212,7 +236,7 @@ class WESBackend:
|
|
|
212
236
|
"""
|
|
213
237
|
if not temp_dir:
|
|
214
238
|
temp_dir = mkdtemp()
|
|
215
|
-
body:
|
|
239
|
+
body: dict[str, Any] = {}
|
|
216
240
|
has_attachments = False
|
|
217
241
|
for key, ls in connexion.request.files.lists():
|
|
218
242
|
try:
|
|
@@ -223,12 +247,20 @@ class WESBackend:
|
|
|
223
247
|
dest = os.path.join(temp_dir, self.secure_path(value.filename))
|
|
224
248
|
if not os.path.isdir(os.path.dirname(dest)):
|
|
225
249
|
os.makedirs(os.path.dirname(dest))
|
|
226
|
-
self.log_for_run(
|
|
250
|
+
self.log_for_run(
|
|
251
|
+
run_id, f"Staging attachment '{value.filename}' to '{dest}'"
|
|
252
|
+
)
|
|
227
253
|
value.save(dest)
|
|
228
254
|
has_attachments = True
|
|
229
|
-
body[key] =
|
|
230
|
-
|
|
231
|
-
|
|
255
|
+
body[key] = (
|
|
256
|
+
f"file://{temp_dir}" # Reference to temp working dir.
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
elif key in (
|
|
260
|
+
"workflow_params",
|
|
261
|
+
"tags",
|
|
262
|
+
"workflow_engine_parameters",
|
|
263
|
+
):
|
|
232
264
|
content = value.read()
|
|
233
265
|
body[key] = json.loads(content.decode("utf-8"))
|
|
234
266
|
else:
|
|
@@ -252,17 +284,23 @@ class WESBackend:
|
|
|
252
284
|
url, ref = urldefrag(body["workflow_url"])
|
|
253
285
|
if ":" not in url:
|
|
254
286
|
if not has_attachments:
|
|
255
|
-
raise MalformedRequestException(
|
|
287
|
+
raise MalformedRequestException(
|
|
288
|
+
"Relative 'workflow_url' but missing 'workflow_attachment'"
|
|
289
|
+
)
|
|
256
290
|
body["workflow_url"] = self.secure_path(url) # keep this relative
|
|
257
291
|
if ref:
|
|
258
292
|
# append "#ref" after the url
|
|
259
293
|
body["workflow_url"] += "#" + self.secure_path(ref)
|
|
260
|
-
self.log_for_run(
|
|
294
|
+
self.log_for_run(
|
|
295
|
+
run_id, "Using workflow_url '%s'" % body.get("workflow_url")
|
|
296
|
+
)
|
|
261
297
|
else:
|
|
262
298
|
raise MalformedRequestException("Missing 'workflow_url' in submission")
|
|
263
299
|
|
|
264
300
|
if "workflow_params" in body and not isinstance(body["workflow_params"], dict):
|
|
265
301
|
# They sent us something silly like "workflow_params": "5"
|
|
266
|
-
raise MalformedRequestException(
|
|
302
|
+
raise MalformedRequestException(
|
|
303
|
+
"Got a 'workflow_params' which does not decode to a JSON object"
|
|
304
|
+
)
|
|
267
305
|
|
|
268
306
|
return temp_dir, body
|
|
@@ -20,21 +20,15 @@
|
|
|
20
20
|
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
|
-
import sys
|
|
24
23
|
import zipfile
|
|
25
24
|
from os import path
|
|
26
|
-
from typing import IO,
|
|
27
|
-
|
|
28
|
-
if sys.version_info >= (3, 8):
|
|
29
|
-
from typing import TypedDict
|
|
30
|
-
else:
|
|
31
|
-
from typing_extensions import TypedDict
|
|
32
|
-
|
|
25
|
+
from typing import IO, Optional, TypedDict, cast
|
|
33
26
|
from urllib.parse import ParseResult, urlparse
|
|
34
27
|
|
|
35
28
|
from toil.bus import JobStatus
|
|
36
|
-
from toil.server.wes.abstract_backend import
|
|
37
|
-
MalformedRequestException as InvalidRequestError
|
|
29
|
+
from toil.server.wes.abstract_backend import (
|
|
30
|
+
MalformedRequestException as InvalidRequestError,
|
|
31
|
+
)
|
|
38
32
|
from toil.server.wes.abstract_backend import TaskLog
|
|
39
33
|
|
|
40
34
|
logger = logging.getLogger(__name__)
|
|
@@ -53,20 +47,25 @@ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
53
47
|
|
|
54
48
|
# The official spec we are working with here is: https://aws.github.io/amazon-genomics-cli/docs/concepts/workflows/#multi-file-workflows
|
|
55
49
|
|
|
50
|
+
|
|
56
51
|
class WorkflowPlan(TypedDict):
|
|
57
52
|
"""
|
|
58
53
|
These functions pass around dicts of a certain type, with `data` and `files` keys.
|
|
59
54
|
"""
|
|
55
|
+
|
|
60
56
|
data: "DataDict"
|
|
61
57
|
files: "FilesDict"
|
|
62
58
|
|
|
59
|
+
|
|
63
60
|
class DataDict(TypedDict, total=False):
|
|
64
61
|
"""
|
|
65
62
|
Under `data`, there can be:
|
|
66
63
|
* `workflowUrl` (required if no `workflowSource`): URL to main workflow code.
|
|
67
64
|
"""
|
|
65
|
+
|
|
68
66
|
workflowUrl: str
|
|
69
67
|
|
|
68
|
+
|
|
70
69
|
class FilesDict(TypedDict, total=False):
|
|
71
70
|
"""
|
|
72
71
|
Under `files`, there can be:
|
|
@@ -75,11 +74,13 @@ class FilesDict(TypedDict, total=False):
|
|
|
75
74
|
* `workflowOptions`: Open binary-mode file for a JSON of options sent along with the workflow.
|
|
76
75
|
* `workflowDependencies`: Open binary-mode file for the zip the workflow came in, if any.
|
|
77
76
|
"""
|
|
77
|
+
|
|
78
78
|
workflowSource: IO[bytes]
|
|
79
|
-
workflowInputFiles:
|
|
79
|
+
workflowInputFiles: list[IO[bytes]]
|
|
80
80
|
workflowOptions: IO[bytes]
|
|
81
81
|
workflowDependencies: IO[bytes]
|
|
82
82
|
|
|
83
|
+
|
|
83
84
|
def parse_workflow_zip_file(file: str, workflow_type: str) -> WorkflowPlan:
|
|
84
85
|
r"""
|
|
85
86
|
Processes a workflow zip bundle
|
|
@@ -163,9 +164,9 @@ def parse_workflow_manifest_file(manifest_file: str) -> WorkflowPlan:
|
|
|
163
164
|
:rtype: dict of `data` and `files`
|
|
164
165
|
|
|
165
166
|
MANIFEST.json is expected to be formatted like:
|
|
166
|
-
|
|
167
|
+
|
|
167
168
|
.. code-block:: json
|
|
168
|
-
|
|
169
|
+
|
|
169
170
|
{
|
|
170
171
|
"mainWorkflowURL": "relpath/to/workflow",
|
|
171
172
|
"inputFileURLs": [
|
|
@@ -235,7 +236,9 @@ def parse_workflow_manifest_file(manifest_file: str) -> WorkflowPlan:
|
|
|
235
236
|
return {"data": data, "files": files}
|
|
236
237
|
|
|
237
238
|
|
|
238
|
-
def workflow_manifest_url_to_path(
|
|
239
|
+
def workflow_manifest_url_to_path(
|
|
240
|
+
url: ParseResult, parent_dir: Optional[str] = None
|
|
241
|
+
) -> str:
|
|
239
242
|
"""
|
|
240
243
|
Interpret a possibly-relative parsed URL, relative to the given parent directory.
|
|
241
244
|
"""
|
|
@@ -244,6 +247,7 @@ def workflow_manifest_url_to_path(url: ParseResult, parent_dir: Optional[str] =
|
|
|
244
247
|
return path.join(parent_dir, relpath)
|
|
245
248
|
return relpath
|
|
246
249
|
|
|
250
|
+
|
|
247
251
|
# This one is all UCSC code
|
|
248
252
|
def task_filter(task: TaskLog, job_status: JobStatus) -> Optional[TaskLog]:
|
|
249
253
|
"""
|
|
@@ -264,6 +268,8 @@ def task_filter(task: TaskLog, job_status: JobStatus) -> Optional[TaskLog]:
|
|
|
264
268
|
|
|
265
269
|
modified_task = dict(task)
|
|
266
270
|
# Tack the batch ID onto the end of the name with the required separator
|
|
267
|
-
modified_task["name"] = "|".join(
|
|
271
|
+
modified_task["name"] = "|".join(
|
|
272
|
+
[cast(str, modified_task.get("name", "")), batch_id]
|
|
273
|
+
)
|
|
268
274
|
logger.info("Transformed task %s to %s", task, modified_task)
|
|
269
275
|
return modified_task
|