toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/server/utils.py
CHANGED
|
@@ -16,7 +16,7 @@ import logging
|
|
|
16
16
|
import os
|
|
17
17
|
from abc import abstractmethod
|
|
18
18
|
from datetime import datetime
|
|
19
|
-
from typing import
|
|
19
|
+
from typing import Optional
|
|
20
20
|
from urllib.parse import urlparse
|
|
21
21
|
|
|
22
22
|
import requests
|
|
@@ -28,12 +28,14 @@ try:
|
|
|
28
28
|
from toil.lib.aws import get_current_aws_region
|
|
29
29
|
from toil.lib.aws.session import client
|
|
30
30
|
from toil.lib.aws.utils import retry_s3
|
|
31
|
+
|
|
31
32
|
HAVE_S3 = True
|
|
32
33
|
except ImportError:
|
|
33
34
|
HAVE_S3 = False
|
|
34
35
|
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
38
|
+
|
|
37
39
|
def get_iso_time() -> str:
|
|
38
40
|
"""
|
|
39
41
|
Return the current time in ISO 8601 format.
|
|
@@ -55,7 +57,9 @@ def link_file(src: str, dest: str) -> None:
|
|
|
55
57
|
os.symlink(src, dest)
|
|
56
58
|
|
|
57
59
|
|
|
58
|
-
def download_file_from_internet(
|
|
60
|
+
def download_file_from_internet(
|
|
61
|
+
src: str, dest: str, content_type: Optional[str] = None
|
|
62
|
+
) -> None:
|
|
59
63
|
"""
|
|
60
64
|
Download a file from the Internet and write it to dest.
|
|
61
65
|
"""
|
|
@@ -64,14 +68,19 @@ def download_file_from_internet(src: str, dest: str, content_type: Optional[str]
|
|
|
64
68
|
if not response.ok:
|
|
65
69
|
raise RuntimeError("Request failed with a client error or a server error.")
|
|
66
70
|
|
|
67
|
-
if content_type and not response.headers.get("Content-Type", "").startswith(
|
|
71
|
+
if content_type and not response.headers.get("Content-Type", "").startswith(
|
|
72
|
+
content_type
|
|
73
|
+
):
|
|
68
74
|
val = response.headers.get("Content-Type")
|
|
69
75
|
raise RuntimeError(f"Expected content type to be '{content_type}'. Not {val}.")
|
|
70
76
|
|
|
71
77
|
with open(dest, "wb") as f:
|
|
72
78
|
f.write(response.content)
|
|
73
79
|
|
|
74
|
-
|
|
80
|
+
|
|
81
|
+
def download_file_from_s3(
|
|
82
|
+
src: str, dest: str, content_type: Optional[str] = None
|
|
83
|
+
) -> None:
|
|
75
84
|
"""
|
|
76
85
|
Download a file from Amazon S3 and write it to dest.
|
|
77
86
|
"""
|
|
@@ -81,10 +90,11 @@ def download_file_from_s3(src: str, dest: str, content_type: Optional[str] = Non
|
|
|
81
90
|
except ImportError:
|
|
82
91
|
raise RuntimeError("Cannot access S3 as AWS modules are not available")
|
|
83
92
|
|
|
84
|
-
with open(dest,
|
|
93
|
+
with open(dest, "wb") as out_stream:
|
|
85
94
|
obj = get_object_for_url(urlparse(src), existing=True)
|
|
86
95
|
obj.download_fileobj(out_stream)
|
|
87
96
|
|
|
97
|
+
|
|
88
98
|
def get_file_class(path: str) -> str:
|
|
89
99
|
"""
|
|
90
100
|
Return the type of the file as a human readable string.
|
|
@@ -97,6 +107,7 @@ def get_file_class(path: str) -> str:
|
|
|
97
107
|
return "Directory"
|
|
98
108
|
return "Unknown"
|
|
99
109
|
|
|
110
|
+
|
|
100
111
|
@retry(errors=[OSError, BlockingIOError])
|
|
101
112
|
def safe_read_file(file: str) -> Optional[str]:
|
|
102
113
|
"""
|
|
@@ -153,6 +164,7 @@ def safe_write_file(file: str, s: str) -> None:
|
|
|
153
164
|
with open(temp_name, "w") as file_obj:
|
|
154
165
|
file_obj.write(s)
|
|
155
166
|
|
|
167
|
+
|
|
156
168
|
class MemoryStateCache:
|
|
157
169
|
"""
|
|
158
170
|
An in-memory place to store workflow state.
|
|
@@ -164,7 +176,7 @@ class MemoryStateCache:
|
|
|
164
176
|
"""
|
|
165
177
|
|
|
166
178
|
super().__init__()
|
|
167
|
-
self._data:
|
|
179
|
+
self._data: dict[tuple[str, str], Optional[str]] = {}
|
|
168
180
|
|
|
169
181
|
def get(self, workflow_id: str, key: str) -> Optional[str]:
|
|
170
182
|
"""
|
|
@@ -185,6 +197,7 @@ class MemoryStateCache:
|
|
|
185
197
|
else:
|
|
186
198
|
self._data[(workflow_id, key)] = value
|
|
187
199
|
|
|
200
|
+
|
|
188
201
|
class AbstractStateStore:
|
|
189
202
|
"""
|
|
190
203
|
A place for the WES server to keep its state: the set of workflows that
|
|
@@ -250,6 +263,7 @@ class AbstractStateStore:
|
|
|
250
263
|
"""
|
|
251
264
|
self._cache.set(workflow_id, key, value)
|
|
252
265
|
|
|
266
|
+
|
|
253
267
|
class MemoryStateStore(MemoryStateCache, AbstractStateStore):
|
|
254
268
|
"""
|
|
255
269
|
An in-memory place to store workflow state, for testing.
|
|
@@ -261,6 +275,7 @@ class MemoryStateStore(MemoryStateCache, AbstractStateStore):
|
|
|
261
275
|
def __init__(self):
|
|
262
276
|
super().__init__()
|
|
263
277
|
|
|
278
|
+
|
|
264
279
|
class FileStateStore(AbstractStateStore):
|
|
265
280
|
"""
|
|
266
281
|
A place to store workflow state that uses a POSIX-compatible file system.
|
|
@@ -275,7 +290,7 @@ class FileStateStore(AbstractStateStore):
|
|
|
275
290
|
"""
|
|
276
291
|
super().__init__()
|
|
277
292
|
parse = urlparse(url)
|
|
278
|
-
if parse.scheme.lower() not in [
|
|
293
|
+
if parse.scheme.lower() not in ["file", ""]:
|
|
279
294
|
# We want to catch if we get the wrong argument.
|
|
280
295
|
raise RuntimeError(f"{url} doesn't look like a local path")
|
|
281
296
|
if not os.path.exists(parse.path):
|
|
@@ -309,7 +324,9 @@ class FileStateStore(AbstractStateStore):
|
|
|
309
324
|
# Set the value in the file
|
|
310
325
|
safe_write_file(file_path, value)
|
|
311
326
|
|
|
327
|
+
|
|
312
328
|
if HAVE_S3:
|
|
329
|
+
|
|
313
330
|
class S3StateStore(AbstractStateStore):
|
|
314
331
|
"""
|
|
315
332
|
A place to store workflow state that uses an S3-compatible object store.
|
|
@@ -327,7 +344,7 @@ if HAVE_S3:
|
|
|
327
344
|
|
|
328
345
|
parse = urlparse(url)
|
|
329
346
|
|
|
330
|
-
if parse.scheme.lower() !=
|
|
347
|
+
if parse.scheme.lower() != "s3":
|
|
331
348
|
# We want to catch if we get the wrong argument.
|
|
332
349
|
raise RuntimeError(f"{url} doesn't look like an S3 URL")
|
|
333
350
|
|
|
@@ -335,12 +352,14 @@ if HAVE_S3:
|
|
|
335
352
|
# urlparse keeps the leading '/', but here we want a path in the
|
|
336
353
|
# bucket without a leading '/'. We also need to support an empty
|
|
337
354
|
# path.
|
|
338
|
-
self._base_path =
|
|
339
|
-
|
|
355
|
+
self._base_path = (
|
|
356
|
+
parse.path[1:] if parse.path.startswith("/") else parse.path
|
|
357
|
+
)
|
|
358
|
+
self._client = client("s3", region_name=get_current_aws_region())
|
|
340
359
|
|
|
341
360
|
logger.debug("Connected to S3StateStore at %s", url)
|
|
342
361
|
|
|
343
|
-
def _get_bucket_and_path(self, workflow_id: str, key: str) ->
|
|
362
|
+
def _get_bucket_and_path(self, workflow_id: str, key: str) -> tuple[str, str]:
|
|
344
363
|
"""
|
|
345
364
|
Get the bucket and path in the bucket at which a key value belongs.
|
|
346
365
|
"""
|
|
@@ -354,13 +373,12 @@ if HAVE_S3:
|
|
|
354
373
|
bucket, path = self._get_bucket_and_path(workflow_id, key)
|
|
355
374
|
for attempt in retry_s3():
|
|
356
375
|
try:
|
|
357
|
-
logger.debug(
|
|
376
|
+
logger.debug("Fetch %s path %s", bucket, path)
|
|
358
377
|
response = self._client.get_object(Bucket=bucket, Key=path)
|
|
359
|
-
return response[
|
|
378
|
+
return response["Body"].read().decode("utf-8")
|
|
360
379
|
except self._client.exceptions.NoSuchKey:
|
|
361
380
|
return None
|
|
362
381
|
|
|
363
|
-
|
|
364
382
|
def set(self, workflow_id: str, key: str, value: Optional[str]) -> None:
|
|
365
383
|
"""
|
|
366
384
|
Set or clear a key value on S3.
|
|
@@ -369,18 +387,21 @@ if HAVE_S3:
|
|
|
369
387
|
for attempt in retry_s3():
|
|
370
388
|
if value is None:
|
|
371
389
|
# Get rid of it.
|
|
372
|
-
logger.debug(
|
|
390
|
+
logger.debug("Clear %s path %s", bucket, path)
|
|
373
391
|
self._client.delete_object(Bucket=bucket, Key=path)
|
|
374
392
|
return
|
|
375
393
|
else:
|
|
376
394
|
# Store it, clobbering anything there already.
|
|
377
|
-
logger.debug(
|
|
378
|
-
self._client.put_object(
|
|
379
|
-
|
|
395
|
+
logger.debug("Set %s path %s", bucket, path)
|
|
396
|
+
self._client.put_object(
|
|
397
|
+
Bucket=bucket, Key=path, Body=value.encode("utf-8")
|
|
398
|
+
)
|
|
380
399
|
return
|
|
381
400
|
|
|
401
|
+
|
|
382
402
|
# We want to memoize state stores so we can cache on them.
|
|
383
|
-
state_store_cache:
|
|
403
|
+
state_store_cache: dict[str, AbstractStateStore] = {}
|
|
404
|
+
|
|
384
405
|
|
|
385
406
|
def connect_to_state_store(url: str) -> AbstractStateStore:
|
|
386
407
|
"""
|
|
@@ -392,25 +413,30 @@ def connect_to_state_store(url: str) -> AbstractStateStore:
|
|
|
392
413
|
if url not in state_store_cache:
|
|
393
414
|
# We need to actually make the state store
|
|
394
415
|
parse = urlparse(url)
|
|
395
|
-
if parse.scheme.lower() ==
|
|
416
|
+
if parse.scheme.lower() == "s3":
|
|
396
417
|
# It's an S3 URL
|
|
397
418
|
if HAVE_S3:
|
|
398
419
|
# And we can use S3, so make the right implementation for S3.
|
|
399
420
|
state_store_cache[url] = S3StateStore(url)
|
|
400
421
|
else:
|
|
401
422
|
# We can't actually use S3, so complain.
|
|
402
|
-
raise RuntimeError(
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
423
|
+
raise RuntimeError(
|
|
424
|
+
f"Cannot connect to {url} because Toil AWS "
|
|
425
|
+
f"dependencies are not available. Did you "
|
|
426
|
+
f"install Toil with the [aws] extra?"
|
|
427
|
+
)
|
|
428
|
+
elif parse.scheme.lower() in ["file", ""]:
|
|
406
429
|
# It's a file URL or path
|
|
407
430
|
state_store_cache[url] = FileStateStore(url)
|
|
408
431
|
else:
|
|
409
|
-
raise RuntimeError(
|
|
410
|
-
|
|
432
|
+
raise RuntimeError(
|
|
433
|
+
f"Cannot connect to {url} because we do not "
|
|
434
|
+
f"implement its URL scheme"
|
|
435
|
+
)
|
|
411
436
|
|
|
412
437
|
return state_store_cache[url]
|
|
413
438
|
|
|
439
|
+
|
|
414
440
|
class WorkflowStateStore:
|
|
415
441
|
"""
|
|
416
442
|
Slice of a state store for the state of a particular workflow.
|
|
@@ -463,6 +489,7 @@ def connect_to_workflow_state_store(url: str, workflow_id: str) -> WorkflowState
|
|
|
463
489
|
|
|
464
490
|
return WorkflowStateStore(connect_to_state_store(url), workflow_id)
|
|
465
491
|
|
|
492
|
+
|
|
466
493
|
# When we see one of these terminal states, we stay there forever.
|
|
467
494
|
TERMINAL_STATES = {"COMPLETE", "EXECUTOR_ERROR", "SYSTEM_ERROR", "CANCELED"}
|
|
468
495
|
|
|
@@ -470,6 +497,7 @@ TERMINAL_STATES = {"COMPLETE", "EXECUTOR_ERROR", "SYSTEM_ERROR", "CANCELED"}
|
|
|
470
497
|
# workflow running task is gone and move it to CANCELED?
|
|
471
498
|
MAX_CANCELING_SECONDS = 30
|
|
472
499
|
|
|
500
|
+
|
|
473
501
|
class WorkflowStateMachine:
|
|
474
502
|
"""
|
|
475
503
|
Class for managing the WES workflow state machine.
|
|
@@ -628,5 +656,3 @@ class WorkflowStateMachine:
|
|
|
628
656
|
state = "UNKNOWN"
|
|
629
657
|
|
|
630
658
|
return state
|
|
631
|
-
|
|
632
|
-
|
|
@@ -4,7 +4,7 @@ import json
|
|
|
4
4
|
import logging
|
|
5
5
|
import os
|
|
6
6
|
from abc import abstractmethod
|
|
7
|
-
from typing import Any, Callable,
|
|
7
|
+
from typing import Any, Callable, Optional, Union
|
|
8
8
|
from urllib.parse import urldefrag
|
|
9
9
|
|
|
10
10
|
import connexion # type: ignore
|
|
@@ -16,18 +16,25 @@ logger = logging.getLogger(__name__)
|
|
|
16
16
|
|
|
17
17
|
# Define a type for WES task log entries in responses
|
|
18
18
|
# TODO: make this a typed dict with all the WES task log field names and their types.
|
|
19
|
-
TaskLog =
|
|
19
|
+
TaskLog = dict[str, Union[str, int, None]]
|
|
20
20
|
|
|
21
21
|
|
|
22
22
|
class VersionNotImplementedException(Exception):
|
|
23
23
|
"""
|
|
24
24
|
Raised when the requested workflow version is not implemented.
|
|
25
25
|
"""
|
|
26
|
-
|
|
27
|
-
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
wf_type: str,
|
|
30
|
+
version: Optional[str] = None,
|
|
31
|
+
supported_versions: Optional[list[str]] = None,
|
|
32
|
+
) -> None:
|
|
28
33
|
if version:
|
|
29
|
-
message = (
|
|
30
|
-
|
|
34
|
+
message = (
|
|
35
|
+
"workflow_type '{}' requires 'workflow_type_version' to be one of '{}'. "
|
|
36
|
+
"Got '{}' instead.".format(wf_type, str(supported_versions), version)
|
|
37
|
+
)
|
|
31
38
|
else:
|
|
32
39
|
message = f"workflow_type '{wf_type}' is not supported."
|
|
33
40
|
|
|
@@ -38,6 +45,7 @@ class MalformedRequestException(Exception):
|
|
|
38
45
|
"""
|
|
39
46
|
Raised when the request is malformed.
|
|
40
47
|
"""
|
|
48
|
+
|
|
41
49
|
def __init__(self, message: str) -> None:
|
|
42
50
|
super().__init__(message)
|
|
43
51
|
|
|
@@ -46,6 +54,7 @@ class WorkflowNotFoundException(Exception):
|
|
|
46
54
|
"""
|
|
47
55
|
Raised when the requested run ID is not found.
|
|
48
56
|
"""
|
|
57
|
+
|
|
49
58
|
def __init__(self) -> None:
|
|
50
59
|
super().__init__("The requested workflow run wasn't found.")
|
|
51
60
|
|
|
@@ -54,6 +63,7 @@ class WorkflowConflictException(Exception):
|
|
|
54
63
|
"""
|
|
55
64
|
Raised when the requested workflow is not in the expected state.
|
|
56
65
|
"""
|
|
66
|
+
|
|
57
67
|
def __init__(self, run_id: str):
|
|
58
68
|
super().__init__(f"Workflow {run_id} exists when it shouldn't.")
|
|
59
69
|
|
|
@@ -62,6 +72,7 @@ class OperationForbidden(Exception):
|
|
|
62
72
|
"""
|
|
63
73
|
Raised when the request is forbidden.
|
|
64
74
|
"""
|
|
75
|
+
|
|
65
76
|
def __init__(self, message: str) -> None:
|
|
66
77
|
super().__init__(message)
|
|
67
78
|
|
|
@@ -70,6 +81,7 @@ class WorkflowExecutionException(Exception):
|
|
|
70
81
|
"""
|
|
71
82
|
Raised when an internal error occurred during the execution of the workflow.
|
|
72
83
|
"""
|
|
84
|
+
|
|
73
85
|
def __init__(self, message: str) -> None:
|
|
74
86
|
super().__init__(message)
|
|
75
87
|
|
|
@@ -81,8 +93,10 @@ def handle_errors(func: Callable[..., Any]) -> Callable[..., Any]:
|
|
|
81
93
|
GA4GH WES spec.
|
|
82
94
|
"""
|
|
83
95
|
|
|
84
|
-
def error(msg: Any, code: int = 500) ->
|
|
85
|
-
logger.warning(
|
|
96
|
+
def error(msg: Any, code: int = 500) -> tuple[dict[str, Any], int]:
|
|
97
|
+
logger.warning(
|
|
98
|
+
f"Exception raised when calling '{func.__name__}()':", exc_info=True
|
|
99
|
+
)
|
|
86
100
|
return {"msg": str(msg), "status_code": code}, code
|
|
87
101
|
|
|
88
102
|
@functools.wraps(func)
|
|
@@ -114,7 +128,7 @@ class WESBackend:
|
|
|
114
128
|
to handle user requests when they hit different endpoints.
|
|
115
129
|
"""
|
|
116
130
|
|
|
117
|
-
def __init__(self, options:
|
|
131
|
+
def __init__(self, options: list[str]):
|
|
118
132
|
"""
|
|
119
133
|
:param options: A list of default engine options to use when executing
|
|
120
134
|
a workflow. Example options:
|
|
@@ -135,7 +149,7 @@ class WESBackend:
|
|
|
135
149
|
return getattr(self, operation_id.split(".")[-1])
|
|
136
150
|
|
|
137
151
|
@abstractmethod
|
|
138
|
-
def get_service_info(self) ->
|
|
152
|
+
def get_service_info(self) -> dict[str, Any]:
|
|
139
153
|
"""
|
|
140
154
|
Get information about the Workflow Execution Service.
|
|
141
155
|
|
|
@@ -144,7 +158,9 @@ class WESBackend:
|
|
|
144
158
|
raise NotImplementedError
|
|
145
159
|
|
|
146
160
|
@abstractmethod
|
|
147
|
-
def list_runs(
|
|
161
|
+
def list_runs(
|
|
162
|
+
self, page_size: Optional[int] = None, page_token: Optional[str] = None
|
|
163
|
+
) -> dict[str, Any]:
|
|
148
164
|
"""
|
|
149
165
|
List the workflow runs.
|
|
150
166
|
|
|
@@ -153,7 +169,7 @@ class WESBackend:
|
|
|
153
169
|
raise NotImplementedError
|
|
154
170
|
|
|
155
171
|
@abstractmethod
|
|
156
|
-
def run_workflow(self) ->
|
|
172
|
+
def run_workflow(self) -> dict[str, str]:
|
|
157
173
|
"""
|
|
158
174
|
Run a workflow. This endpoint creates a new workflow run and returns
|
|
159
175
|
a `RunId` to monitor its progress.
|
|
@@ -163,7 +179,7 @@ class WESBackend:
|
|
|
163
179
|
raise NotImplementedError
|
|
164
180
|
|
|
165
181
|
@abstractmethod
|
|
166
|
-
def get_run_log(self, run_id: str) ->
|
|
182
|
+
def get_run_log(self, run_id: str) -> dict[str, Any]:
|
|
167
183
|
"""
|
|
168
184
|
Get detailed info about a workflow run.
|
|
169
185
|
|
|
@@ -172,7 +188,7 @@ class WESBackend:
|
|
|
172
188
|
raise NotImplementedError
|
|
173
189
|
|
|
174
190
|
@abstractmethod
|
|
175
|
-
def cancel_run(self, run_id: str) ->
|
|
191
|
+
def cancel_run(self, run_id: str) -> dict[str, str]:
|
|
176
192
|
"""
|
|
177
193
|
Cancel a running workflow.
|
|
178
194
|
|
|
@@ -181,7 +197,7 @@ class WESBackend:
|
|
|
181
197
|
raise NotImplementedError
|
|
182
198
|
|
|
183
199
|
@abstractmethod
|
|
184
|
-
def get_run_status(self, run_id: str) ->
|
|
200
|
+
def get_run_status(self, run_id: str) -> dict[str, str]:
|
|
185
201
|
"""
|
|
186
202
|
Get quick status info about a workflow run, returning a simple result
|
|
187
203
|
with the overall state of the workflow run.
|
|
@@ -199,9 +215,17 @@ class WESBackend:
|
|
|
199
215
|
|
|
200
216
|
@staticmethod
|
|
201
217
|
def secure_path(path: str) -> str:
|
|
202
|
-
return os.path.join(
|
|
203
|
-
|
|
204
|
-
|
|
218
|
+
return os.path.join(
|
|
219
|
+
*[
|
|
220
|
+
str(secure_filename(p))
|
|
221
|
+
for p in path.split("/")
|
|
222
|
+
if p not in ("", ".", "..")
|
|
223
|
+
]
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
def collect_attachments(
|
|
227
|
+
self, run_id: Optional[str], temp_dir: Optional[str]
|
|
228
|
+
) -> tuple[str, dict[str, Any]]:
|
|
205
229
|
"""
|
|
206
230
|
Collect attachments from the current request by staging uploaded files
|
|
207
231
|
to temp_dir, and return the temp_dir and parsed body of the request.
|
|
@@ -212,7 +236,7 @@ class WESBackend:
|
|
|
212
236
|
"""
|
|
213
237
|
if not temp_dir:
|
|
214
238
|
temp_dir = mkdtemp()
|
|
215
|
-
body:
|
|
239
|
+
body: dict[str, Any] = {}
|
|
216
240
|
has_attachments = False
|
|
217
241
|
for key, ls in connexion.request.files.lists():
|
|
218
242
|
try:
|
|
@@ -223,12 +247,20 @@ class WESBackend:
|
|
|
223
247
|
dest = os.path.join(temp_dir, self.secure_path(value.filename))
|
|
224
248
|
if not os.path.isdir(os.path.dirname(dest)):
|
|
225
249
|
os.makedirs(os.path.dirname(dest))
|
|
226
|
-
self.log_for_run(
|
|
250
|
+
self.log_for_run(
|
|
251
|
+
run_id, f"Staging attachment '{value.filename}' to '{dest}'"
|
|
252
|
+
)
|
|
227
253
|
value.save(dest)
|
|
228
254
|
has_attachments = True
|
|
229
|
-
body[key] =
|
|
230
|
-
|
|
231
|
-
|
|
255
|
+
body[key] = (
|
|
256
|
+
f"file://{temp_dir}" # Reference to temp working dir.
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
elif key in (
|
|
260
|
+
"workflow_params",
|
|
261
|
+
"tags",
|
|
262
|
+
"workflow_engine_parameters",
|
|
263
|
+
):
|
|
232
264
|
content = value.read()
|
|
233
265
|
body[key] = json.loads(content.decode("utf-8"))
|
|
234
266
|
else:
|
|
@@ -252,17 +284,23 @@ class WESBackend:
|
|
|
252
284
|
url, ref = urldefrag(body["workflow_url"])
|
|
253
285
|
if ":" not in url:
|
|
254
286
|
if not has_attachments:
|
|
255
|
-
raise MalformedRequestException(
|
|
287
|
+
raise MalformedRequestException(
|
|
288
|
+
"Relative 'workflow_url' but missing 'workflow_attachment'"
|
|
289
|
+
)
|
|
256
290
|
body["workflow_url"] = self.secure_path(url) # keep this relative
|
|
257
291
|
if ref:
|
|
258
292
|
# append "#ref" after the url
|
|
259
293
|
body["workflow_url"] += "#" + self.secure_path(ref)
|
|
260
|
-
self.log_for_run(
|
|
294
|
+
self.log_for_run(
|
|
295
|
+
run_id, "Using workflow_url '%s'" % body.get("workflow_url")
|
|
296
|
+
)
|
|
261
297
|
else:
|
|
262
298
|
raise MalformedRequestException("Missing 'workflow_url' in submission")
|
|
263
299
|
|
|
264
300
|
if "workflow_params" in body and not isinstance(body["workflow_params"], dict):
|
|
265
301
|
# They sent us something silly like "workflow_params": "5"
|
|
266
|
-
raise MalformedRequestException(
|
|
302
|
+
raise MalformedRequestException(
|
|
303
|
+
"Got a 'workflow_params' which does not decode to a JSON object"
|
|
304
|
+
)
|
|
267
305
|
|
|
268
306
|
return temp_dir, body
|
|
@@ -20,21 +20,15 @@
|
|
|
20
20
|
|
|
21
21
|
import json
|
|
22
22
|
import logging
|
|
23
|
-
import sys
|
|
24
23
|
import zipfile
|
|
25
24
|
from os import path
|
|
26
|
-
from typing import IO,
|
|
27
|
-
|
|
28
|
-
if sys.version_info >= (3, 8):
|
|
29
|
-
from typing import TypedDict
|
|
30
|
-
else:
|
|
31
|
-
from typing_extensions import TypedDict
|
|
32
|
-
|
|
25
|
+
from typing import IO, Optional, TypedDict, cast
|
|
33
26
|
from urllib.parse import ParseResult, urlparse
|
|
34
27
|
|
|
35
28
|
from toil.bus import JobStatus
|
|
36
|
-
from toil.server.wes.abstract_backend import
|
|
37
|
-
MalformedRequestException as InvalidRequestError
|
|
29
|
+
from toil.server.wes.abstract_backend import (
|
|
30
|
+
MalformedRequestException as InvalidRequestError,
|
|
31
|
+
)
|
|
38
32
|
from toil.server.wes.abstract_backend import TaskLog
|
|
39
33
|
|
|
40
34
|
logger = logging.getLogger(__name__)
|
|
@@ -53,20 +47,25 @@ Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
|
53
47
|
|
|
54
48
|
# The official spec we are working with here is: https://aws.github.io/amazon-genomics-cli/docs/concepts/workflows/#multi-file-workflows
|
|
55
49
|
|
|
50
|
+
|
|
56
51
|
class WorkflowPlan(TypedDict):
|
|
57
52
|
"""
|
|
58
53
|
These functions pass around dicts of a certain type, with `data` and `files` keys.
|
|
59
54
|
"""
|
|
55
|
+
|
|
60
56
|
data: "DataDict"
|
|
61
57
|
files: "FilesDict"
|
|
62
58
|
|
|
59
|
+
|
|
63
60
|
class DataDict(TypedDict, total=False):
|
|
64
61
|
"""
|
|
65
62
|
Under `data`, there can be:
|
|
66
63
|
* `workflowUrl` (required if no `workflowSource`): URL to main workflow code.
|
|
67
64
|
"""
|
|
65
|
+
|
|
68
66
|
workflowUrl: str
|
|
69
67
|
|
|
68
|
+
|
|
70
69
|
class FilesDict(TypedDict, total=False):
|
|
71
70
|
"""
|
|
72
71
|
Under `files`, there can be:
|
|
@@ -75,11 +74,13 @@ class FilesDict(TypedDict, total=False):
|
|
|
75
74
|
* `workflowOptions`: Open binary-mode file for a JSON of options sent along with the workflow.
|
|
76
75
|
* `workflowDependencies`: Open binary-mode file for the zip the workflow came in, if any.
|
|
77
76
|
"""
|
|
77
|
+
|
|
78
78
|
workflowSource: IO[bytes]
|
|
79
|
-
workflowInputFiles:
|
|
79
|
+
workflowInputFiles: list[IO[bytes]]
|
|
80
80
|
workflowOptions: IO[bytes]
|
|
81
81
|
workflowDependencies: IO[bytes]
|
|
82
82
|
|
|
83
|
+
|
|
83
84
|
def parse_workflow_zip_file(file: str, workflow_type: str) -> WorkflowPlan:
|
|
84
85
|
r"""
|
|
85
86
|
Processes a workflow zip bundle
|
|
@@ -163,9 +164,9 @@ def parse_workflow_manifest_file(manifest_file: str) -> WorkflowPlan:
|
|
|
163
164
|
:rtype: dict of `data` and `files`
|
|
164
165
|
|
|
165
166
|
MANIFEST.json is expected to be formatted like:
|
|
166
|
-
|
|
167
|
+
|
|
167
168
|
.. code-block:: json
|
|
168
|
-
|
|
169
|
+
|
|
169
170
|
{
|
|
170
171
|
"mainWorkflowURL": "relpath/to/workflow",
|
|
171
172
|
"inputFileURLs": [
|
|
@@ -235,7 +236,9 @@ def parse_workflow_manifest_file(manifest_file: str) -> WorkflowPlan:
|
|
|
235
236
|
return {"data": data, "files": files}
|
|
236
237
|
|
|
237
238
|
|
|
238
|
-
def workflow_manifest_url_to_path(
|
|
239
|
+
def workflow_manifest_url_to_path(
|
|
240
|
+
url: ParseResult, parent_dir: Optional[str] = None
|
|
241
|
+
) -> str:
|
|
239
242
|
"""
|
|
240
243
|
Interpret a possibly-relative parsed URL, relative to the given parent directory.
|
|
241
244
|
"""
|
|
@@ -244,6 +247,7 @@ def workflow_manifest_url_to_path(url: ParseResult, parent_dir: Optional[str] =
|
|
|
244
247
|
return path.join(parent_dir, relpath)
|
|
245
248
|
return relpath
|
|
246
249
|
|
|
250
|
+
|
|
247
251
|
# This one is all UCSC code
|
|
248
252
|
def task_filter(task: TaskLog, job_status: JobStatus) -> Optional[TaskLog]:
|
|
249
253
|
"""
|
|
@@ -264,6 +268,8 @@ def task_filter(task: TaskLog, job_status: JobStatus) -> Optional[TaskLog]:
|
|
|
264
268
|
|
|
265
269
|
modified_task = dict(task)
|
|
266
270
|
# Tack the batch ID onto the end of the name with the required separator
|
|
267
|
-
modified_task["name"] = "|".join(
|
|
271
|
+
modified_task["name"] = "|".join(
|
|
272
|
+
[cast(str, modified_task.get("name", "")), batch_id]
|
|
273
|
+
)
|
|
268
274
|
logger.info("Transformed task %s to %s", task, modified_task)
|
|
269
275
|
return modified_task
|