toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/batchSystems/abstractBatchSystem.py +19 -4
- toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/slurm.py +85 -14
- toil/bus.py +38 -0
- toil/common.py +20 -18
- toil/cwl/cwltoil.py +81 -63
- toil/exceptions.py +1 -1
- toil/fileStores/abstractFileStore.py +53 -4
- toil/fileStores/cachingFileStore.py +4 -20
- toil/fileStores/nonCachingFileStore.py +5 -14
- toil/job.py +46 -30
- toil/jobStores/abstractJobStore.py +21 -23
- toil/jobStores/aws/utils.py +5 -4
- toil/jobStores/fileJobStore.py +1 -1
- toil/leader.py +17 -14
- toil/lib/conversions.py +19 -0
- toil/lib/generatedEC2Lists.py +8 -8
- toil/lib/io.py +28 -2
- toil/lib/resources.py +8 -1
- toil/lib/threading.py +27 -12
- toil/options/common.py +5 -7
- toil/options/wdl.py +5 -0
- toil/provisioners/abstractProvisioner.py +8 -0
- toil/statsAndLogging.py +36 -8
- toil/test/batchSystems/test_slurm.py +21 -6
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +243 -151
- toil/test/docs/scriptsTest.py +2 -2
- toil/test/jobStores/jobStoreTest.py +7 -5
- toil/test/lib/test_ec2.py +1 -1
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +37 -0
- toil/test/provisioners/clusterTest.py +9 -8
- toil/test/utils/toilDebugTest.py +1 -1
- toil/test/utils/utilsTest.py +3 -3
- toil/test/wdl/wdltoil_test.py +91 -16
- toil/utils/toilDebugFile.py +1 -1
- toil/utils/toilStats.py +309 -266
- toil/utils/toilStatus.py +1 -1
- toil/version.py +9 -9
- toil/wdl/wdltoil.py +341 -189
- toil/worker.py +31 -16
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
- {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py
CHANGED
|
@@ -110,7 +110,7 @@ F = TypeVar('F', bound=Callable[..., Any])
|
|
|
110
110
|
def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]:
|
|
111
111
|
"""
|
|
112
112
|
Create a decorator to report WDL errors with the given task message.
|
|
113
|
-
|
|
113
|
+
|
|
114
114
|
Decorator can then be applied to a function, and if a WDL error happens it
|
|
115
115
|
will say that it could not {task}.
|
|
116
116
|
"""
|
|
@@ -551,6 +551,18 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
551
551
|
on the local host.
|
|
552
552
|
"""
|
|
553
553
|
|
|
554
|
+
return self.devirtualze_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
|
|
555
|
+
|
|
556
|
+
@staticmethod
|
|
557
|
+
def devirtualze_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
|
|
558
|
+
"""
|
|
559
|
+
Download or export a WDL virtualized filename/URL to the given directory.
|
|
560
|
+
|
|
561
|
+
Makes sure sibling files stay siblings and files with the same name don't clobber each other. Called from within this class for tasks, and statically at the end of the workflow for outputs.
|
|
562
|
+
|
|
563
|
+
Returns the local path to the file.
|
|
564
|
+
"""
|
|
565
|
+
|
|
554
566
|
# TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
|
|
555
567
|
# TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
|
|
556
568
|
if is_url(filename):
|
|
@@ -564,8 +576,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
564
576
|
# Use UUID as folder name rather than a new temp folder to reduce internal clutter.
|
|
565
577
|
# Put the UUID in the destination path in order for tasks to
|
|
566
578
|
# see where to put files depending on their parents.
|
|
567
|
-
dir_path = os.path.join(
|
|
568
|
-
|
|
579
|
+
dir_path = os.path.join(dest_dir, parent_id)
|
|
580
|
+
|
|
569
581
|
else:
|
|
570
582
|
# Parse the URL and extract the basename
|
|
571
583
|
file_basename = os.path.basename(urlsplit(filename).path)
|
|
@@ -574,8 +586,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
574
586
|
# in, not relative to the thing.
|
|
575
587
|
parent_url = urljoin(filename, ".")
|
|
576
588
|
# Turn it into a string we can make a directory for
|
|
577
|
-
dir_path = os.path.join(
|
|
578
|
-
|
|
589
|
+
dir_path = os.path.join(dest_dir, quote(parent_url, safe=''))
|
|
590
|
+
|
|
579
591
|
if not os.path.exists(dir_path):
|
|
580
592
|
# Make sure the chosen directory exists
|
|
581
593
|
os.mkdir(dir_path)
|
|
@@ -584,7 +596,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
584
596
|
|
|
585
597
|
if filename.startswith(TOIL_URI_SCHEME):
|
|
586
598
|
# Get a local path to the file
|
|
587
|
-
|
|
599
|
+
if isinstance(file_source, AbstractFileStore):
|
|
600
|
+
# Read from the file store
|
|
601
|
+
result = file_source.readGlobalFile(file_id, dest_path)
|
|
602
|
+
elif isinstance(file_source, Toil):
|
|
603
|
+
# Read from the Toil context
|
|
604
|
+
file_source.export_file(file_id, dest_path)
|
|
605
|
+
result = dest_path
|
|
588
606
|
else:
|
|
589
607
|
# Download to a local file with the right name and execute bit.
|
|
590
608
|
# Open it exclusively
|
|
@@ -600,8 +618,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
|
|
|
600
618
|
# This is a local file
|
|
601
619
|
# To support relative paths, join the execution dir and filename
|
|
602
620
|
# if filename is already an abs path, join() will do nothing
|
|
603
|
-
if
|
|
604
|
-
result = os.path.join(
|
|
621
|
+
if execution_dir is not None:
|
|
622
|
+
result = os.path.join(execution_dir, filename)
|
|
605
623
|
else:
|
|
606
624
|
result = filename
|
|
607
625
|
|
|
@@ -712,10 +730,14 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
712
730
|
# WDL.StdLib.TaskOutputs next.
|
|
713
731
|
super().__init__(file_store)
|
|
714
732
|
|
|
715
|
-
# Remember task
|
|
733
|
+
# Remember task output files
|
|
716
734
|
self._stdout_path = stdout_path
|
|
717
735
|
self._stderr_path = stderr_path
|
|
718
736
|
|
|
737
|
+
# Remember that the WDL code has not referenced them yet.
|
|
738
|
+
self._stdout_used = False
|
|
739
|
+
self._stderr_used = False
|
|
740
|
+
|
|
719
741
|
# Remember current directory
|
|
720
742
|
self._current_directory_override = current_directory_override
|
|
721
743
|
|
|
@@ -741,14 +763,28 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
|
|
|
741
763
|
"""
|
|
742
764
|
Get the standard output of the command that ran, as a WDL File, outside the container.
|
|
743
765
|
"""
|
|
766
|
+
self._stdout_used = True
|
|
744
767
|
return WDL.Value.File(self._stdout_path)
|
|
745
768
|
|
|
769
|
+
def stdout_used(self) -> bool:
|
|
770
|
+
"""
|
|
771
|
+
Return True if the standard output was read by the WDL.
|
|
772
|
+
"""
|
|
773
|
+
return self._stdout_used
|
|
774
|
+
|
|
746
775
|
def _stderr(self) -> WDL.Value.File:
|
|
747
776
|
"""
|
|
748
777
|
Get the standard error of the command that ran, as a WDL File, outside the container.
|
|
749
778
|
"""
|
|
779
|
+
self._stderr_used = True
|
|
750
780
|
return WDL.Value.File(self._stderr_path)
|
|
751
781
|
|
|
782
|
+
def stderr_used(self) -> bool:
|
|
783
|
+
"""
|
|
784
|
+
Return True if the standard error was read by the WDL.
|
|
785
|
+
"""
|
|
786
|
+
return self._stderr_used
|
|
787
|
+
|
|
752
788
|
def _glob(self, pattern: WDL.Value.String) -> WDL.Value.Array:
|
|
753
789
|
"""
|
|
754
790
|
Get a WDL Array of WDL Files left behind by the job that ran, matching the given glob pattern, outside the container.
|
|
@@ -1009,7 +1045,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
1009
1045
|
# we have no auth.
|
|
1010
1046
|
logger.error("Something went wrong importing %s", candidate_uri)
|
|
1011
1047
|
raise
|
|
1012
|
-
|
|
1048
|
+
|
|
1013
1049
|
if imported is None:
|
|
1014
1050
|
# Wasn't found there
|
|
1015
1051
|
continue
|
|
@@ -1022,7 +1058,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
|
|
|
1022
1058
|
# We can't have files with no basename because we need to
|
|
1023
1059
|
# download them at that basename later.
|
|
1024
1060
|
raise RuntimeError(f"File {candidate_uri} has no basename and so cannot be a WDL File")
|
|
1025
|
-
|
|
1061
|
+
|
|
1026
1062
|
# Was actually found
|
|
1027
1063
|
if is_url(candidate_uri):
|
|
1028
1064
|
# Might be a file URI or other URI.
|
|
@@ -1184,9 +1220,11 @@ class WDLBaseJob(Job):
|
|
|
1184
1220
|
null values for things not defined in a section. Post-processing operations
|
|
1185
1221
|
can be added onto any job before it is saved, and will be applied as long
|
|
1186
1222
|
as the job's run method calls postprocess().
|
|
1223
|
+
|
|
1224
|
+
Also responsible for remembering the Toil WDL configuration keys and values.
|
|
1187
1225
|
"""
|
|
1188
1226
|
|
|
1189
|
-
def __init__(self,
|
|
1227
|
+
def __init__(self, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1190
1228
|
"""
|
|
1191
1229
|
Make a WDL-related job.
|
|
1192
1230
|
|
|
@@ -1212,8 +1250,10 @@ class WDLBaseJob(Job):
|
|
|
1212
1250
|
# may have coalesced postprocessing steps deferred by several levels of
|
|
1213
1251
|
# jobs returning other jobs' promised RVs.
|
|
1214
1252
|
self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
|
|
1253
|
+
|
|
1254
|
+
self._wdl_options = wdl_options if wdl_options is not None else {}
|
|
1215
1255
|
|
|
1216
|
-
self.
|
|
1256
|
+
assert self._wdl_options.get("container") is not None
|
|
1217
1257
|
|
|
1218
1258
|
# TODO: We're not allowed by MyPy to override a method and widen the return
|
|
1219
1259
|
# type, so this has to be Any.
|
|
@@ -1307,65 +1347,44 @@ class WDLBaseJob(Job):
|
|
|
1307
1347
|
|
|
1308
1348
|
logger.debug("Assigned postprocessing steps from %s to %s", self, other)
|
|
1309
1349
|
|
|
1310
|
-
|
|
1311
|
-
class WDLTaskJob(WDLBaseJob):
|
|
1350
|
+
class WDLTaskWrapperJob(WDLBaseJob):
|
|
1312
1351
|
"""
|
|
1313
|
-
Job that
|
|
1352
|
+
Job that determines the resources needed to run a WDL job.
|
|
1314
1353
|
|
|
1315
1354
|
Responsible for evaluating the input declarations for unspecified inputs,
|
|
1316
|
-
evaluating the runtime section,
|
|
1317
|
-
|
|
1355
|
+
evaluating the runtime section, and scheduling or chaining to the real WDL
|
|
1356
|
+
job.
|
|
1318
1357
|
|
|
1319
1358
|
All bindings are in terms of task-internal names.
|
|
1320
1359
|
"""
|
|
1321
1360
|
|
|
1322
|
-
def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, **kwargs: Any) -> None:
|
|
1361
|
+
def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
|
|
1323
1362
|
"""
|
|
1324
|
-
Make a new job to run a task.
|
|
1363
|
+
Make a new job to determine resources and run a task.
|
|
1325
1364
|
|
|
1326
1365
|
:param namespace: The namespace that the task's *contents* exist in.
|
|
1327
1366
|
The caller has alredy added the task's own name.
|
|
1367
|
+
|
|
1368
|
+
:param task_path: Like the namespace, but including subscript numbers
|
|
1369
|
+
for scatters.
|
|
1328
1370
|
"""
|
|
1371
|
+
super().__init__(unitName=task_path + ".inputs", displayName=namespace + ".inputs", local=True, **kwargs)
|
|
1329
1372
|
|
|
1330
|
-
|
|
1331
|
-
# TODO: Instead of re-scheduling with more resources, add a local
|
|
1332
|
-
# "wrapper" job like CWL uses to determine the actual requirements.
|
|
1333
|
-
super().__init__(unitName=namespace, displayName=namespace, local=False, **kwargs)
|
|
1334
|
-
|
|
1335
|
-
logger.info("Preparing to run task %s as %s", task.name, namespace)
|
|
1373
|
+
logger.info("Preparing to run task code for %s as %s", task.name, namespace)
|
|
1336
1374
|
|
|
1337
1375
|
self._task = task
|
|
1338
1376
|
self._prev_node_results = prev_node_results
|
|
1339
1377
|
self._task_id = task_id
|
|
1340
1378
|
self._namespace = namespace
|
|
1379
|
+
self._task_path = task_path
|
|
1341
1380
|
|
|
1342
|
-
|
|
1343
|
-
"""
|
|
1344
|
-
Determie if --fakeroot is likely to work for Singularity.
|
|
1345
|
-
"""
|
|
1346
|
-
|
|
1347
|
-
# We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
|
|
1348
|
-
try:
|
|
1349
|
-
subuid_file = open('/etc/subuid')
|
|
1350
|
-
except OSError as e:
|
|
1351
|
-
logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
|
|
1352
|
-
return False
|
|
1353
|
-
username = get_user_name()
|
|
1354
|
-
for line in subuid_file:
|
|
1355
|
-
if line.split(':')[0].strip() == username:
|
|
1356
|
-
# We have a line assigning subuids
|
|
1357
|
-
return True
|
|
1358
|
-
# If there is no line, we have no subuids
|
|
1359
|
-
logger.warning('No subuids are assigned to %s; cannot fake root.', username)
|
|
1360
|
-
return False
|
|
1361
|
-
|
|
1362
|
-
@report_wdl_errors("run task")
|
|
1381
|
+
@report_wdl_errors("evaluate task code")
|
|
1363
1382
|
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1364
1383
|
"""
|
|
1365
|
-
|
|
1384
|
+
Evaluate inputs and runtime and schedule the task.
|
|
1366
1385
|
"""
|
|
1367
1386
|
super().run(file_store)
|
|
1368
|
-
logger.info("
|
|
1387
|
+
logger.info("Evaluating inputs and runtime for task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
|
|
1369
1388
|
|
|
1370
1389
|
# Combine the bindings we get from previous jobs.
|
|
1371
1390
|
# For a task we are only passed the inside-the-task namespace.
|
|
@@ -1375,19 +1394,20 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1375
1394
|
standard_library = ToilWDLStdLibBase(file_store)
|
|
1376
1395
|
|
|
1377
1396
|
if self._task.inputs:
|
|
1378
|
-
logger.debug("Evaluating task
|
|
1397
|
+
logger.debug("Evaluating task code")
|
|
1379
1398
|
for input_decl in self._task.inputs:
|
|
1380
1399
|
# Evaluate all the inputs that aren't pre-set
|
|
1381
1400
|
bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
|
|
1382
1401
|
for postinput_decl in self._task.postinputs:
|
|
1383
|
-
# Evaluate all the postinput decls
|
|
1402
|
+
# Evaluate all the postinput decls.
|
|
1403
|
+
# We need these in order to evaluate the runtime.
|
|
1404
|
+
# TODO: What if they wanted resources from the runtime?
|
|
1384
1405
|
bindings = bindings.bind(postinput_decl.name, evaluate_defaultable_decl(postinput_decl, bindings, standard_library))
|
|
1385
1406
|
|
|
1386
1407
|
# Evaluate the runtime section
|
|
1387
1408
|
runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
|
|
1388
1409
|
|
|
1389
|
-
# Fill these in with not-None if
|
|
1390
|
-
# TODO: Can this break out into a function somehow?
|
|
1410
|
+
# Fill these in with not-None if the workflow asks for each resource.
|
|
1391
1411
|
runtime_memory: Optional[int] = None
|
|
1392
1412
|
runtime_cores: Optional[float] = None
|
|
1393
1413
|
runtime_disk: Optional[int] = None
|
|
@@ -1395,21 +1415,14 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1395
1415
|
|
|
1396
1416
|
if runtime_bindings.has_binding('cpu'):
|
|
1397
1417
|
cpu_spec: int = runtime_bindings.resolve('cpu').value
|
|
1398
|
-
|
|
1399
|
-
# We need to get more cores
|
|
1400
|
-
runtime_cores = float(cpu_spec)
|
|
1401
|
-
logger.info('Need to reschedule to get %s cores; have %s', runtime_cores, self.cores)
|
|
1418
|
+
runtime_cores = float(cpu_spec)
|
|
1402
1419
|
|
|
1403
1420
|
if runtime_bindings.has_binding('memory'):
|
|
1404
1421
|
# Get the memory requirement and convert to bytes
|
|
1405
1422
|
memory_spec: Union[int, str] = runtime_bindings.resolve('memory').value
|
|
1406
1423
|
if isinstance(memory_spec, str):
|
|
1407
1424
|
memory_spec = human2bytes(memory_spec)
|
|
1408
|
-
|
|
1409
|
-
if memory_spec > self.memory:
|
|
1410
|
-
# We need to go get more memory
|
|
1411
|
-
runtime_memory = memory_spec
|
|
1412
|
-
logger.info('Need to reschedule to get %s memory; have %s', runtime_memory, self.memory)
|
|
1425
|
+
runtime_memory = memory_spec
|
|
1413
1426
|
|
|
1414
1427
|
if runtime_bindings.has_binding('disks'):
|
|
1415
1428
|
# Miniwdl doesn't have this, but we need to be able to parse things like:
|
|
@@ -1445,9 +1458,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1445
1458
|
if spec_parts[2] == 'LOCAL':
|
|
1446
1459
|
logger.warning('Not rounding LOCAL disk to the nearest 375 GB; workflow execution will differ from Cromwell!')
|
|
1447
1460
|
total_bytes: float = convert_units(total_gb, 'GB')
|
|
1448
|
-
|
|
1449
|
-
runtime_disk = int(total_bytes)
|
|
1450
|
-
logger.info('Need to reschedule to get %s disk, have %s', runtime_disk, self.disk)
|
|
1461
|
+
runtime_disk = int(total_bytes)
|
|
1451
1462
|
|
|
1452
1463
|
if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
|
|
1453
1464
|
# We want to have GPUs
|
|
@@ -1467,69 +1478,145 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1467
1478
|
accelerator_spec['brand'] = gpu_brand
|
|
1468
1479
|
|
|
1469
1480
|
accelerator_requirement = parse_accelerator(accelerator_spec)
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1473
|
-
|
|
1474
|
-
|
|
1475
|
-
|
|
1476
|
-
|
|
1477
|
-
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
|
|
1481
|
-
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1487
|
-
|
|
1488
|
-
|
|
1489
|
-
|
|
1490
|
-
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1496
|
-
|
|
1497
|
-
|
|
1498
|
-
|
|
1499
|
-
# And return its result.
|
|
1500
|
-
return rescheduled.rv()
|
|
1501
|
-
|
|
1502
|
-
# If we get here we have all the resources we need, so run the task
|
|
1503
|
-
|
|
1504
|
-
if shutil.which('singularity'):
|
|
1481
|
+
runtime_accelerators = [accelerator_requirement]
|
|
1482
|
+
|
|
1483
|
+
# Schedule to get resources. Pass along the bindings from evaluating all the inputs and decls, and the runtime, with files virtualized.
|
|
1484
|
+
run_job = WDLTaskJob(self._task, virtualize_files(bindings, standard_library), virtualize_files(runtime_bindings, standard_library), self._task_id, self._namespace, self._task_path, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators, wdl_options=self._wdl_options)
|
|
1485
|
+
# Run that as a child
|
|
1486
|
+
self.addChild(run_job)
|
|
1487
|
+
|
|
1488
|
+
# Give it our postprocessing steps
|
|
1489
|
+
self.defer_postprocessing(run_job)
|
|
1490
|
+
|
|
1491
|
+
# And return its result.
|
|
1492
|
+
return run_job.rv()
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
|
|
1496
|
+
class WDLTaskJob(WDLBaseJob):
|
|
1497
|
+
"""
|
|
1498
|
+
Job that runs a WDL task.
|
|
1499
|
+
|
|
1500
|
+
Responsible for re-evaluating input declarations for unspecified inputs,
|
|
1501
|
+
evaluating the runtime section, re-scheduling if resources are not
|
|
1502
|
+
available, running any command, and evaluating the outputs.
|
|
1503
|
+
|
|
1504
|
+
All bindings are in terms of task-internal names.
|
|
1505
|
+
"""
|
|
1506
|
+
|
|
1507
|
+
def __init__(self, task: WDL.Tree.Task, task_internal_bindings: Promised[WDLBindings], runtime_bindings: Promised[WDLBindings], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
|
|
1508
|
+
"""
|
|
1509
|
+
Make a new job to run a task.
|
|
1505
1510
|
|
|
1511
|
+
:param namespace: The namespace that the task's *contents* exist in.
|
|
1512
|
+
The caller has alredy added the task's own name.
|
|
1513
|
+
|
|
1514
|
+
:param task_path: Like the namespace, but including subscript numbers
|
|
1515
|
+
for scatters.
|
|
1516
|
+
"""
|
|
1517
|
+
|
|
1518
|
+
# This job should not be local because it represents a real workflow task.
|
|
1519
|
+
# TODO: Instead of re-scheduling with more resources, add a local
|
|
1520
|
+
# "wrapper" job like CWL uses to determine the actual requirements.
|
|
1521
|
+
super().__init__(unitName=task_path + ".command", displayName=namespace + ".command", local=False, **kwargs)
|
|
1522
|
+
|
|
1523
|
+
logger.info("Preparing to run task %s as %s", task.name, namespace)
|
|
1524
|
+
|
|
1525
|
+
self._task = task
|
|
1526
|
+
self._task_internal_bindings = task_internal_bindings
|
|
1527
|
+
self._runtime_bindings = runtime_bindings
|
|
1528
|
+
self._task_id = task_id
|
|
1529
|
+
self._namespace = namespace
|
|
1530
|
+
self._task_path = task_path
|
|
1531
|
+
|
|
1532
|
+
def can_fake_root(self) -> bool:
|
|
1533
|
+
"""
|
|
1534
|
+
Determine if --fakeroot is likely to work for Singularity.
|
|
1535
|
+
"""
|
|
1536
|
+
|
|
1537
|
+
# We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
|
|
1538
|
+
try:
|
|
1539
|
+
subuid_file = open('/etc/subuid')
|
|
1540
|
+
except OSError as e:
|
|
1541
|
+
logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
|
|
1542
|
+
return False
|
|
1543
|
+
username = get_user_name()
|
|
1544
|
+
for line in subuid_file:
|
|
1545
|
+
if line.split(':')[0].strip() == username:
|
|
1546
|
+
# We have a line assigning subuids
|
|
1547
|
+
return True
|
|
1548
|
+
# If there is no line, we have no subuids
|
|
1549
|
+
logger.warning('No subuids are assigned to %s; cannot fake root.', username)
|
|
1550
|
+
return False
|
|
1551
|
+
|
|
1552
|
+
def can_mount_proc(self) -> bool:
|
|
1553
|
+
"""
|
|
1554
|
+
Determine if --containall will work for Singularity. On Kubernetes, this will result in operation not permitted
|
|
1555
|
+
See: https://github.com/apptainer/singularity/issues/5857
|
|
1556
|
+
|
|
1557
|
+
So if Kubernetes is detected, return False
|
|
1558
|
+
:return: bool
|
|
1559
|
+
"""
|
|
1560
|
+
return "KUBERNETES_SERVICE_HOST" not in os.environ
|
|
1561
|
+
|
|
1562
|
+
@report_wdl_errors("run task command")
|
|
1563
|
+
def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
|
|
1564
|
+
"""
|
|
1565
|
+
Actually run the task.
|
|
1566
|
+
"""
|
|
1567
|
+
super().run(file_store)
|
|
1568
|
+
logger.info("Running task command for %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
|
|
1569
|
+
|
|
1570
|
+
# Set up the WDL standard library
|
|
1571
|
+
# UUID to use for virtualizing files
|
|
1572
|
+
standard_library = ToilWDLStdLibBase(file_store)
|
|
1573
|
+
|
|
1574
|
+
# Get the bindings from after the input section
|
|
1575
|
+
bindings = unwrap(self._task_internal_bindings)
|
|
1576
|
+
# And the bindings from evaluating the runtime section
|
|
1577
|
+
runtime_bindings = unwrap(self._runtime_bindings)
|
|
1578
|
+
|
|
1579
|
+
# We have all the resources we need, so run the task
|
|
1580
|
+
|
|
1581
|
+
if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
|
|
1506
1582
|
# Prepare to use Singularity. We will need plenty of space to
|
|
1507
1583
|
# download images.
|
|
1508
|
-
|
|
1509
|
-
|
|
1510
|
-
|
|
1584
|
+
# Default the Singularity and MiniWDL cache directories. This sets the cache to the same place as
|
|
1585
|
+
# Singularity/MiniWDL's default cache directory
|
|
1586
|
+
# With launch-cluster, the singularity and miniwdl cache is set to /var/lib/toil in abstractProvisioner.py
|
|
1587
|
+
# A current limitation with the singularity/miniwdl cache is it cannot check for image updates if the
|
|
1588
|
+
# filename is the same
|
|
1589
|
+
singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
|
|
1590
|
+
miniwdl_cache = os.path.join(os.path.expanduser("~"), ".cache/miniwdl")
|
|
1591
|
+
|
|
1592
|
+
# Cache Singularity's layers somewhere known to have space
|
|
1593
|
+
os.environ['SINGULARITY_CACHEDIR'] = os.environ.get("SINGULARITY_CACHEDIR", singularity_cache)
|
|
1594
|
+
|
|
1511
1595
|
# Make sure it exists.
|
|
1512
1596
|
os.makedirs(os.environ['SINGULARITY_CACHEDIR'], exist_ok=True)
|
|
1513
1597
|
|
|
1514
|
-
|
|
1515
|
-
|
|
1516
|
-
|
|
1517
|
-
|
|
1518
|
-
|
|
1598
|
+
# Cache Singularity images for the workflow on this machine.
|
|
1599
|
+
# Since MiniWDL does only within-process synchronization for pulls,
|
|
1600
|
+
# we also will need to pre-pull one image into here at a time.
|
|
1601
|
+
os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.environ.get("MINIWDL__SINGULARITY__IMAGE_CACHE", miniwdl_cache)
|
|
1602
|
+
|
|
1519
1603
|
# Make sure it exists.
|
|
1520
1604
|
os.makedirs(os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'], exist_ok=True)
|
|
1521
1605
|
|
|
1522
1606
|
# Run containers with Singularity
|
|
1523
1607
|
TaskContainerImplementation: Type[TaskContainer] = SingularityContainer
|
|
1524
|
-
|
|
1608
|
+
elif self._wdl_options.get("container") in ["docker", "auto"]:
|
|
1525
1609
|
# Run containers with Docker
|
|
1610
|
+
# TODO: Poll if it is available and don't just try and fail.
|
|
1526
1611
|
TaskContainerImplementation = SwarmContainer
|
|
1527
|
-
if
|
|
1612
|
+
if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
|
|
1528
1613
|
# Complain to the user that this is unlikely to work.
|
|
1529
|
-
logger.warning("Running job that
|
|
1530
|
-
"
|
|
1614
|
+
logger.warning("Running job that might need accelerators with Docker. "
|
|
1615
|
+
"Accelerator and GPU support "
|
|
1531
1616
|
"is not yet implemented in the MiniWDL Docker "
|
|
1532
1617
|
"containerization implementation.")
|
|
1618
|
+
else:
|
|
1619
|
+
raise RuntimeError(f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}")
|
|
1533
1620
|
|
|
1534
1621
|
# Set up the MiniWDL container running stuff
|
|
1535
1622
|
miniwdl_logger = logging.getLogger("MiniWDLContainers")
|
|
@@ -1597,6 +1684,10 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1597
1684
|
# We can't fake root so don't try.
|
|
1598
1685
|
command_line.remove('--fakeroot')
|
|
1599
1686
|
|
|
1687
|
+
# If on Kubernetes and proc cannot be mounted, get rid of --containall
|
|
1688
|
+
if '--containall' in command_line and not self.can_mount_proc():
|
|
1689
|
+
command_line.remove('--containall')
|
|
1690
|
+
|
|
1600
1691
|
extra_flags: Set[str] = set()
|
|
1601
1692
|
accelerators_needed: Optional[List[AcceleratorRequirement]] = self.accelerators
|
|
1602
1693
|
if accelerators_needed is not None:
|
|
@@ -1624,7 +1715,7 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1624
1715
|
task_container._run_invocation = patched_run_invocation # type: ignore
|
|
1625
1716
|
|
|
1626
1717
|
# Show the runtime info to the container
|
|
1627
|
-
task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in runtime_bindings})
|
|
1718
|
+
task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in devirtualize_files(runtime_bindings, standard_library)})
|
|
1628
1719
|
|
|
1629
1720
|
# Tell the container to take up all these files. It will assign
|
|
1630
1721
|
# them all new paths in task_container.input_path_map which we can
|
|
@@ -1638,12 +1729,43 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1638
1729
|
|
|
1639
1730
|
# Make a new standard library for evaluating the command specifically, which only deals with in-container paths and out-of-container paths.
|
|
1640
1731
|
command_library = ToilWDLStdLibTaskCommand(file_store, task_container)
|
|
1641
|
-
|
|
1642
|
-
|
|
1643
|
-
|
|
1732
|
+
|
|
1733
|
+
def hacky_dedent(text: str) -> str:
|
|
1734
|
+
"""
|
|
1735
|
+
Guess what result we would have gotten if we dedented the
|
|
1736
|
+
command before substituting placeholder expressions, given the
|
|
1737
|
+
command after substituting placeholder expressions. Workaround
|
|
1738
|
+
for mimicking MiniWDL making us also suffer from
|
|
1739
|
+
<https://github.com/chanzuckerberg/miniwdl/issues/674>.
|
|
1740
|
+
"""
|
|
1741
|
+
|
|
1742
|
+
# First just run MiniWDL's dedent
|
|
1743
|
+
# Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
|
|
1744
|
+
dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
|
|
1745
|
+
|
|
1746
|
+
text = dedent(text)[1]
|
|
1747
|
+
|
|
1748
|
+
# But this can still leave dedenting to do. Find the first
|
|
1749
|
+
# not-all-whitespace line and get its leading whitespace.
|
|
1750
|
+
to_strip: Optional[str] = None
|
|
1751
|
+
for line in text.split("\n"):
|
|
1752
|
+
if len(line.strip()) > 0:
|
|
1753
|
+
# This is the first not-all-whitespace line.
|
|
1754
|
+
# Drop the leading whitespace.
|
|
1755
|
+
rest = line.lstrip()
|
|
1756
|
+
# Grab the part that gets removed by lstrip
|
|
1757
|
+
to_strip = line[0:(len(line) - len(rest))]
|
|
1758
|
+
break
|
|
1759
|
+
if to_strip is None or len(to_strip) == 0:
|
|
1760
|
+
# Nothing to cut
|
|
1761
|
+
return text
|
|
1762
|
+
|
|
1763
|
+
# Cut to_strip off each line that it appears at the start of.
|
|
1764
|
+
return "\n".join((line.removeprefix(to_strip) for line in text.split("\n")))
|
|
1765
|
+
|
|
1644
1766
|
|
|
1645
1767
|
# Work out the command string, and unwrap it
|
|
1646
|
-
command_string: str =
|
|
1768
|
+
command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
|
|
1647
1769
|
|
|
1648
1770
|
# Grab the standard out and error paths. MyPy complains if we call
|
|
1649
1771
|
# them because in the current MiniWDL version they are untyped.
|
|
@@ -1668,12 +1790,37 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1668
1790
|
logger.info('Executing command in %s: %s', task_container, command_string)
|
|
1669
1791
|
try:
|
|
1670
1792
|
task_container.run(miniwdl_logger, command_string)
|
|
1671
|
-
|
|
1793
|
+
except Exception:
|
|
1672
1794
|
if os.path.exists(host_stderr_txt):
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1795
|
+
size = os.path.getsize(host_stderr_txt)
|
|
1796
|
+
logger.error('Failed task left standard error at %s of %d bytes', host_stderr_txt, size)
|
|
1797
|
+
if size > 0:
|
|
1798
|
+
# Send the whole error stream.
|
|
1799
|
+
file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
|
|
1800
|
+
if logger.isEnabledFor(logging.DEBUG):
|
|
1801
|
+
logger.debug("MiniWDL already logged standard error")
|
|
1802
|
+
else:
|
|
1803
|
+
# At debug level, MiniWDL itself logs command error lines.
|
|
1804
|
+
# But otherwise we just dump into StatsAndLogging;
|
|
1805
|
+
# we also want the messages in the job log that
|
|
1806
|
+
# gets printed at the end of the workflow. So log
|
|
1807
|
+
# the error log ourselves.
|
|
1808
|
+
logger.error("====TASK ERROR LOG====")
|
|
1809
|
+
for line in open(host_stderr_txt, 'r', errors="replace"):
|
|
1810
|
+
logger.error("> %s", line.rstrip('\n'))
|
|
1811
|
+
logger.error("====TASK ERROR LOG====")
|
|
1676
1812
|
|
|
1813
|
+
if os.path.exists(host_stdout_txt):
|
|
1814
|
+
size = os.path.getsize(host_stdout_txt)
|
|
1815
|
+
logger.info('Failed task left standard output at %s of %d bytes', host_stdout_txt, size)
|
|
1816
|
+
if size > 0:
|
|
1817
|
+
# Save the whole output stream.
|
|
1818
|
+
# TODO: We can't tell if this was supposed to be
|
|
1819
|
+
# captured. It might really be huge binary data.
|
|
1820
|
+
file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
|
|
1821
|
+
|
|
1822
|
+
# Keep crashing
|
|
1823
|
+
raise
|
|
1677
1824
|
else:
|
|
1678
1825
|
# We need to fake stdout and stderr, since nothing ran but the
|
|
1679
1826
|
# standard library lets you grab them. TODO: Can these be None?
|
|
@@ -1690,9 +1837,26 @@ class WDLTaskJob(WDLBaseJob):
|
|
|
1690
1837
|
outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
|
|
1691
1838
|
output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
|
|
1692
1839
|
|
|
1840
|
+
# Now we know if the standard output and error were sent somewhere by
|
|
1841
|
+
# the workflow. If not, we should report them to the leader.
|
|
1842
|
+
|
|
1693
1843
|
# Drop any files from the output which don't actually exist
|
|
1694
1844
|
output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
|
|
1695
1845
|
|
|
1846
|
+
if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
|
|
1847
|
+
size = os.path.getsize(host_stderr_txt)
|
|
1848
|
+
logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
|
|
1849
|
+
if size > 0:
|
|
1850
|
+
# Save the whole error stream because the workflow didn't capture it.
|
|
1851
|
+
file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
|
|
1852
|
+
|
|
1853
|
+
if not outputs_library.stdout_used() and os.path.exists(host_stdout_txt):
|
|
1854
|
+
size = os.path.getsize(host_stdout_txt)
|
|
1855
|
+
logger.info('Unused standard output at %s of %d bytes', host_stdout_txt, size)
|
|
1856
|
+
if size > 0:
|
|
1857
|
+
# Save the whole output stream because the workflow didn't capture it.
|
|
1858
|
+
file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
|
|
1859
|
+
|
|
1696
1860
|
# TODO: Check the output bindings against the types of the decls so we
|
|
1697
1861
|
# can tell if we have a null in a value that is supposed to not be
|
|
1698
1862
|
# nullable. We can't just look at the types on the values themselves
|
|
@@ -1711,15 +1875,16 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1711
1875
|
Job that evaluates a WDL workflow node.
|
|
1712
1876
|
"""
|
|
1713
1877
|
|
|
1714
|
-
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str,
|
|
1878
|
+
def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1715
1879
|
"""
|
|
1716
1880
|
Make a new job to run a workflow node to completion.
|
|
1717
1881
|
"""
|
|
1718
|
-
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id,
|
|
1882
|
+
super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, wdl_options=wdl_options or {}, **kwargs)
|
|
1719
1883
|
|
|
1720
1884
|
self._node = node
|
|
1721
1885
|
self._prev_node_results = prev_node_results
|
|
1722
1886
|
self._namespace = namespace
|
|
1887
|
+
self._task_path = task_path
|
|
1723
1888
|
|
|
1724
1889
|
if isinstance(self._node, WDL.Tree.Call):
|
|
1725
1890
|
logger.debug("Preparing job for call node %s", self._node.workflow_node_id)
|
|
@@ -1735,7 +1900,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1735
1900
|
# Combine the bindings we get from previous jobs
|
|
1736
1901
|
incoming_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1737
1902
|
# Set up the WDL standard library
|
|
1738
|
-
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self.
|
|
1903
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
1739
1904
|
with monkeypatch_coerce(standard_library):
|
|
1740
1905
|
if isinstance(self._node, WDL.Tree.Decl):
|
|
1741
1906
|
# This is a variable assignment
|
|
@@ -1763,11 +1928,11 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1763
1928
|
|
|
1764
1929
|
if isinstance(self._node.callee, WDL.Tree.Workflow):
|
|
1765
1930
|
# This is a call of a workflow
|
|
1766
|
-
subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', self.
|
|
1931
|
+
subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
|
|
1767
1932
|
self.addChild(subjob)
|
|
1768
1933
|
elif isinstance(self._node.callee, WDL.Tree.Task):
|
|
1769
1934
|
# This is a call of a task
|
|
1770
|
-
subjob =
|
|
1935
|
+
subjob = WDLTaskWrapperJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
|
|
1771
1936
|
self.addChild(subjob)
|
|
1772
1937
|
else:
|
|
1773
1938
|
raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
|
|
@@ -1778,14 +1943,14 @@ class WDLWorkflowNodeJob(WDLBaseJob):
|
|
|
1778
1943
|
self.defer_postprocessing(subjob)
|
|
1779
1944
|
return subjob.rv()
|
|
1780
1945
|
elif isinstance(self._node, WDL.Tree.Scatter):
|
|
1781
|
-
subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self.
|
|
1946
|
+
subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
1782
1947
|
self.addChild(subjob)
|
|
1783
1948
|
# Scatters don't really make a namespace, just kind of a scope?
|
|
1784
1949
|
# TODO: Let stuff leave scope!
|
|
1785
1950
|
self.defer_postprocessing(subjob)
|
|
1786
1951
|
return subjob.rv()
|
|
1787
1952
|
elif isinstance(self._node, WDL.Tree.Conditional):
|
|
1788
|
-
subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self.
|
|
1953
|
+
subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
1789
1954
|
self.addChild(subjob)
|
|
1790
1955
|
# Conditionals don't really make a namespace, just kind of a scope?
|
|
1791
1956
|
# TODO: Let stuff leave scope!
|
|
@@ -1801,11 +1966,11 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
1801
1966
|
workflows or tasks or sections.
|
|
1802
1967
|
"""
|
|
1803
1968
|
|
|
1804
|
-
def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str,
|
|
1969
|
+
def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
1805
1970
|
"""
|
|
1806
1971
|
Make a new job to run a list of workflow nodes to completion.
|
|
1807
1972
|
"""
|
|
1808
|
-
super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+',
|
|
1973
|
+
super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', wdl_options=wdl_options, **kwargs)
|
|
1809
1974
|
|
|
1810
1975
|
self._nodes = nodes
|
|
1811
1976
|
self._prev_node_results = prev_node_results
|
|
@@ -1825,7 +1990,7 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
|
|
|
1825
1990
|
# Combine the bindings we get from previous jobs
|
|
1826
1991
|
current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
1827
1992
|
# Set up the WDL standard library
|
|
1828
|
-
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self.
|
|
1993
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
1829
1994
|
|
|
1830
1995
|
with monkeypatch_coerce(standard_library):
|
|
1831
1996
|
for node in self._nodes:
|
|
@@ -2005,13 +2170,14 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
2005
2170
|
Job that can create more graph for a section of the wrokflow.
|
|
2006
2171
|
"""
|
|
2007
2172
|
|
|
2008
|
-
def __init__(self, namespace: str,
|
|
2173
|
+
def __init__(self, namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
2009
2174
|
"""
|
|
2010
2175
|
Make a WDLSectionJob where the interior runs in the given namespace,
|
|
2011
2176
|
starting with the root workflow.
|
|
2012
2177
|
"""
|
|
2013
|
-
super().__init__(
|
|
2178
|
+
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
2014
2179
|
self._namespace = namespace
|
|
2180
|
+
self._task_path = task_path
|
|
2015
2181
|
|
|
2016
2182
|
@staticmethod
|
|
2017
2183
|
def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
|
|
@@ -2079,7 +2245,7 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
2079
2245
|
|
|
2080
2246
|
|
|
2081
2247
|
|
|
2082
|
-
def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
|
|
2248
|
+
def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None, subscript: Optional[int] = None) -> WDLBaseJob:
|
|
2083
2249
|
"""
|
|
2084
2250
|
Make a Toil job to evaluate a subgraph inside a workflow or workflow
|
|
2085
2251
|
section.
|
|
@@ -2095,8 +2261,16 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
2095
2261
|
:param local_environment: Bindings in this environment will be
|
|
2096
2262
|
used to evaluate the subgraph but will go out of scope
|
|
2097
2263
|
at the end of the section.
|
|
2264
|
+
:param subscript: If the subgraph is being evaluated multiple times,
|
|
2265
|
+
this should be a disambiguating integer for logging.
|
|
2098
2266
|
"""
|
|
2099
2267
|
|
|
2268
|
+
# Work out what to call what we are working on
|
|
2269
|
+
task_path = self._task_path
|
|
2270
|
+
if subscript is not None:
|
|
2271
|
+
# We need to include a scatter loop number.
|
|
2272
|
+
task_path += f'.{subscript}'
|
|
2273
|
+
|
|
2100
2274
|
if local_environment is not None:
|
|
2101
2275
|
# Bring local environment into scope
|
|
2102
2276
|
environment = combine_bindings([environment, local_environment])
|
|
@@ -2156,10 +2330,10 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
2156
2330
|
|
|
2157
2331
|
if len(node_ids) == 1:
|
|
2158
2332
|
# Make a one-node job
|
|
2159
|
-
job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, self.
|
|
2333
|
+
job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, task_path, wdl_options=self._wdl_options)
|
|
2160
2334
|
else:
|
|
2161
2335
|
# Make a multi-node job
|
|
2162
|
-
job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, self.
|
|
2336
|
+
job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, wdl_options=self._wdl_options)
|
|
2163
2337
|
for prev_job in prev_jobs:
|
|
2164
2338
|
# Connect up the happens-after relationships to make sure the
|
|
2165
2339
|
# return values are available.
|
|
@@ -2189,7 +2363,7 @@ class WDLSectionJob(WDLBaseJob):
|
|
|
2189
2363
|
leaf_rvs.append(environment)
|
|
2190
2364
|
# And to fill in bindings from code not executed in this instantiation
|
|
2191
2365
|
# with Null, and filter out stuff that should leave scope.
|
|
2192
|
-
sink = WDLCombineBindingsJob(leaf_rvs)
|
|
2366
|
+
sink = WDLCombineBindingsJob(leaf_rvs, wdl_options=self._wdl_options)
|
|
2193
2367
|
# It runs inside us
|
|
2194
2368
|
self.addChild(sink)
|
|
2195
2369
|
for leaf_job in toil_leaves.values():
|
|
@@ -2256,11 +2430,11 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
2256
2430
|
instance of the body. If an instance of the body doesn't create a binding,
|
|
2257
2431
|
it gets a null value in the corresponding array.
|
|
2258
2432
|
"""
|
|
2259
|
-
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str,
|
|
2433
|
+
def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
2260
2434
|
"""
|
|
2261
2435
|
Create a subtree that will run a WDL scatter. The scatter itself and the contents live in the given namespace.
|
|
2262
2436
|
"""
|
|
2263
|
-
super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id,
|
|
2437
|
+
super().__init__(namespace, task_path, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, wdl_options=wdl_options)
|
|
2264
2438
|
|
|
2265
2439
|
# Because we need to return the return value of the workflow, we need
|
|
2266
2440
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -2297,7 +2471,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
2297
2471
|
raise RuntimeError("The returned value from a scatter is not an Array type.")
|
|
2298
2472
|
|
|
2299
2473
|
scatter_jobs = []
|
|
2300
|
-
for item in scatter_value.value:
|
|
2474
|
+
for subscript, item in enumerate(scatter_value.value):
|
|
2301
2475
|
# Make an instantiation of our subgraph for each possible value of
|
|
2302
2476
|
# the variable. Make sure the variable is bound only for the
|
|
2303
2477
|
# duration of the body.
|
|
@@ -2306,7 +2480,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
2306
2480
|
# TODO: We need to turn values() into a list because MyPy seems to
|
|
2307
2481
|
# think a dict_values isn't a Sequence. This is a waste of time to
|
|
2308
2482
|
# appease MyPy but probably better than a cast?
|
|
2309
|
-
scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings))
|
|
2483
|
+
scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings, subscript=subscript))
|
|
2310
2484
|
|
|
2311
2485
|
if len(scatter_jobs) == 0:
|
|
2312
2486
|
# No scattering is needed. We just need to bind all the names.
|
|
@@ -2326,7 +2500,7 @@ class WDLScatterJob(WDLSectionJob):
|
|
|
2326
2500
|
# of maybe-optional values. Each body execution will define names it
|
|
2327
2501
|
# doesn't make as nulls, so we don't have to worry about
|
|
2328
2502
|
# totally-missing names.
|
|
2329
|
-
gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings)
|
|
2503
|
+
gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings, wdl_options=self._wdl_options)
|
|
2330
2504
|
self.addChild(gather_job)
|
|
2331
2505
|
for j in scatter_jobs:
|
|
2332
2506
|
j.addFollowOn(gather_job)
|
|
@@ -2395,11 +2569,11 @@ class WDLConditionalJob(WDLSectionJob):
|
|
|
2395
2569
|
"""
|
|
2396
2570
|
Job that evaluates a conditional in a WDL workflow.
|
|
2397
2571
|
"""
|
|
2398
|
-
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str,
|
|
2572
|
+
def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
2399
2573
|
"""
|
|
2400
2574
|
Create a subtree that will run a WDL conditional. The conditional itself and its contents live in the given namespace.
|
|
2401
2575
|
"""
|
|
2402
|
-
super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id,
|
|
2576
|
+
super().__init__(namespace, task_path, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, wdl_options=wdl_options)
|
|
2403
2577
|
|
|
2404
2578
|
# Once again we need to ship the whole body template to be instantiated
|
|
2405
2579
|
# into Toil jobs only if it will actually run.
|
|
@@ -2447,7 +2621,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
2447
2621
|
Job that evaluates an entire WDL workflow.
|
|
2448
2622
|
"""
|
|
2449
2623
|
|
|
2450
|
-
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str,
|
|
2624
|
+
def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
2451
2625
|
"""
|
|
2452
2626
|
Create a subtree that will run a WDL workflow. The job returns the
|
|
2453
2627
|
return value of the workflow.
|
|
@@ -2455,7 +2629,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
2455
2629
|
:param namespace: the namespace that the workflow's *contents* will be
|
|
2456
2630
|
in. Caller has already added the workflow's own name.
|
|
2457
2631
|
"""
|
|
2458
|
-
super().__init__(namespace,
|
|
2632
|
+
super().__init__(namespace, task_path, wdl_options=wdl_options, **kwargs)
|
|
2459
2633
|
|
|
2460
2634
|
# Because we need to return the return value of the workflow, we need
|
|
2461
2635
|
# to return a Toil promise for the last/sink job in the workflow's
|
|
@@ -2485,7 +2659,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
2485
2659
|
# For a task we only see the insode-the-task namespace.
|
|
2486
2660
|
bindings = combine_bindings(unwrap_all(self._prev_node_results))
|
|
2487
2661
|
# Set up the WDL standard library
|
|
2488
|
-
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self.
|
|
2662
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
2489
2663
|
|
|
2490
2664
|
if self._workflow.inputs:
|
|
2491
2665
|
with monkeypatch_coerce(standard_library):
|
|
@@ -2499,7 +2673,7 @@ class WDLWorkflowJob(WDLSectionJob):
|
|
|
2499
2673
|
if self._workflow.outputs != []: # Compare against empty list as None means there should be outputs
|
|
2500
2674
|
# Either the output section is declared and nonempty or it is not declared
|
|
2501
2675
|
# Add evaluating the outputs after the sink
|
|
2502
|
-
outputs_job = WDLOutputsJob(self._workflow, sink.rv(), self.
|
|
2676
|
+
outputs_job = WDLOutputsJob(self._workflow, sink.rv(), wdl_options=self._wdl_options)
|
|
2503
2677
|
sink.addFollowOn(outputs_job)
|
|
2504
2678
|
# Caller is responsible for making sure namespaces are applied
|
|
2505
2679
|
self.defer_postprocessing(outputs_job)
|
|
@@ -2514,11 +2688,11 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
2514
2688
|
|
|
2515
2689
|
Returns an environment with just the outputs bound, in no namespace.
|
|
2516
2690
|
"""
|
|
2517
|
-
def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings],
|
|
2691
|
+
def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any):
|
|
2518
2692
|
"""
|
|
2519
2693
|
Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
|
|
2520
2694
|
"""
|
|
2521
|
-
super().__init__(
|
|
2695
|
+
super().__init__(wdl_options=wdl_options, **kwargs)
|
|
2522
2696
|
|
|
2523
2697
|
self._bindings = bindings
|
|
2524
2698
|
self._workflow = workflow
|
|
@@ -2548,7 +2722,7 @@ class WDLOutputsJob(WDLBaseJob):
|
|
|
2548
2722
|
else:
|
|
2549
2723
|
# Output section is declared and is nonempty, so evaluate normally
|
|
2550
2724
|
# Evaluate all the outputs in the normal, non-task-outputs library context
|
|
2551
|
-
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self.
|
|
2725
|
+
standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
|
|
2552
2726
|
# Combine the bindings from the previous job
|
|
2553
2727
|
output_bindings = evaluate_output_decls(self._workflow.outputs, unwrap(self._bindings), standard_library)
|
|
2554
2728
|
return self.postprocess(output_bindings)
|
|
@@ -2560,13 +2734,13 @@ class WDLRootJob(WDLSectionJob):
|
|
|
2560
2734
|
the workflow name; both forms are accepted.
|
|
2561
2735
|
"""
|
|
2562
2736
|
|
|
2563
|
-
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings,
|
|
2737
|
+
def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
|
|
2564
2738
|
"""
|
|
2565
2739
|
Create a subtree to run the workflow and namespace the outputs.
|
|
2566
2740
|
"""
|
|
2567
2741
|
|
|
2568
|
-
# The root workflow names the root namespace
|
|
2569
|
-
super().__init__(workflow.name,
|
|
2742
|
+
# The root workflow names the root namespace and task path.
|
|
2743
|
+
super().__init__(workflow.name, workflow.name, wdl_options=wdl_options, **kwargs)
|
|
2570
2744
|
|
|
2571
2745
|
self._workflow = workflow
|
|
2572
2746
|
self._inputs = inputs
|
|
@@ -2580,7 +2754,7 @@ class WDLRootJob(WDLSectionJob):
|
|
|
2580
2754
|
|
|
2581
2755
|
# Run the workflow. We rely in this to handle entering the input
|
|
2582
2756
|
# namespace if needed, or handling free-floating inputs.
|
|
2583
|
-
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self.
|
|
2757
|
+
workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._task_path, wdl_options=self._wdl_options)
|
|
2584
2758
|
workflow_job.then_namespace(self._namespace)
|
|
2585
2759
|
self.addChild(workflow_job)
|
|
2586
2760
|
self.defer_postprocessing(workflow_job)
|
|
@@ -2642,6 +2816,9 @@ def main() -> None:
|
|
|
2642
2816
|
# If we don't have a directory assigned, make one in the current directory.
|
|
2643
2817
|
output_directory: str = options.output_directory if options.output_directory else mkdtemp(prefix='wdl-out-', dir=os.getcwd())
|
|
2644
2818
|
|
|
2819
|
+
# Get the execution directory
|
|
2820
|
+
execution_dir = os.getcwd()
|
|
2821
|
+
|
|
2645
2822
|
with Toil(options) as toil:
|
|
2646
2823
|
if options.restart:
|
|
2647
2824
|
output_bindings = toil.restart()
|
|
@@ -2668,7 +2845,7 @@ def main() -> None:
|
|
|
2668
2845
|
raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e
|
|
2669
2846
|
else:
|
|
2670
2847
|
inputs = {}
|
|
2671
|
-
|
|
2848
|
+
|
|
2672
2849
|
# Parse out the available and required inputs. Each key in the
|
|
2673
2850
|
# JSON ought to start with the workflow's name and then a .
|
|
2674
2851
|
# TODO: WDL's Bindings[] isn't variant in the right way, so we
|
|
@@ -2703,8 +2880,14 @@ def main() -> None:
|
|
|
2703
2880
|
# Get the execution directory
|
|
2704
2881
|
execution_dir = os.getcwd()
|
|
2705
2882
|
|
|
2883
|
+
# Configure workflow interpreter options
|
|
2884
|
+
wdl_options: Dict[str, str] = {}
|
|
2885
|
+
wdl_options["execution_dir"] = execution_dir
|
|
2886
|
+
wdl_options["container"] = options.container
|
|
2887
|
+
assert wdl_options.get("container") is not None
|
|
2888
|
+
|
|
2706
2889
|
# Run the workflow and get its outputs namespaced with the workflow name.
|
|
2707
|
-
root_job = WDLRootJob(document.workflow, input_bindings,
|
|
2890
|
+
root_job = WDLRootJob(document.workflow, input_bindings, wdl_options=wdl_options)
|
|
2708
2891
|
output_bindings = toil.start(root_job)
|
|
2709
2892
|
if not isinstance(output_bindings, WDL.Env.Bindings):
|
|
2710
2893
|
raise RuntimeError("The output of the WDL job is not a binding.")
|
|
@@ -2716,38 +2899,7 @@ def main() -> None:
|
|
|
2716
2899
|
'devirtualize' a file using the "toil" object instead of a filestore.
|
|
2717
2900
|
Returns its local path.
|
|
2718
2901
|
"""
|
|
2719
|
-
|
|
2720
|
-
if filename.startswith(TOIL_URI_SCHEME):
|
|
2721
|
-
# This is a reference to the Toil filestore.
|
|
2722
|
-
# Deserialize the FileID and required basename
|
|
2723
|
-
file_id, parent_id, file_basename = unpack_toil_uri(filename)
|
|
2724
|
-
else:
|
|
2725
|
-
# Parse the URL and extract the basename
|
|
2726
|
-
file_basename = os.path.basename(urlsplit(filename).path)
|
|
2727
|
-
|
|
2728
|
-
# Figure out where it should go.
|
|
2729
|
-
# If a UUID is included, it will be omitted
|
|
2730
|
-
# TODO: Deal with name collisions in the export directory
|
|
2731
|
-
dest_name = os.path.join(output_directory, file_basename)
|
|
2732
|
-
|
|
2733
|
-
if filename.startswith(TOIL_URI_SCHEME):
|
|
2734
|
-
# Export the file
|
|
2735
|
-
toil.export_file(file_id, dest_name)
|
|
2736
|
-
else:
|
|
2737
|
-
# Download to a local file with the right name and execute bit.
|
|
2738
|
-
# Open it exclusively
|
|
2739
|
-
with open(dest_name, 'xb') as dest_file:
|
|
2740
|
-
# And save to it
|
|
2741
|
-
size, executable = AbstractJobStore.read_from_url(filename, dest_file)
|
|
2742
|
-
if executable:
|
|
2743
|
-
# Set the execute bit in the file's permissions
|
|
2744
|
-
os.chmod(dest_name, os.stat(dest_name).st_mode | stat.S_IXUSR)
|
|
2745
|
-
|
|
2746
|
-
# And return where we put it
|
|
2747
|
-
return dest_name
|
|
2748
|
-
else:
|
|
2749
|
-
# We already had a path
|
|
2750
|
-
return filename
|
|
2902
|
+
return ToilWDLStdLibBase.devirtualze_to(filename, output_directory, toil, execution_dir)
|
|
2751
2903
|
|
|
2752
2904
|
# Make all the files local files
|
|
2753
2905
|
output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)
|