toil 6.0.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. toil/batchSystems/abstractBatchSystem.py +19 -4
  2. toil/batchSystems/abstractGridEngineBatchSystem.py +22 -22
  3. toil/batchSystems/cleanup_support.py +7 -3
  4. toil/batchSystems/lsf.py +7 -7
  5. toil/batchSystems/slurm.py +85 -14
  6. toil/bus.py +38 -0
  7. toil/common.py +20 -18
  8. toil/cwl/cwltoil.py +81 -63
  9. toil/exceptions.py +1 -1
  10. toil/fileStores/abstractFileStore.py +53 -4
  11. toil/fileStores/cachingFileStore.py +4 -20
  12. toil/fileStores/nonCachingFileStore.py +5 -14
  13. toil/job.py +46 -30
  14. toil/jobStores/abstractJobStore.py +21 -23
  15. toil/jobStores/aws/utils.py +5 -4
  16. toil/jobStores/fileJobStore.py +1 -1
  17. toil/leader.py +17 -14
  18. toil/lib/conversions.py +19 -0
  19. toil/lib/generatedEC2Lists.py +8 -8
  20. toil/lib/io.py +28 -2
  21. toil/lib/resources.py +8 -1
  22. toil/lib/threading.py +27 -12
  23. toil/options/common.py +5 -7
  24. toil/options/wdl.py +5 -0
  25. toil/provisioners/abstractProvisioner.py +8 -0
  26. toil/statsAndLogging.py +36 -8
  27. toil/test/batchSystems/test_slurm.py +21 -6
  28. toil/test/cactus/__init__.py +0 -0
  29. toil/test/cactus/test_cactus_integration.py +58 -0
  30. toil/test/cwl/cwlTest.py +243 -151
  31. toil/test/docs/scriptsTest.py +2 -2
  32. toil/test/jobStores/jobStoreTest.py +7 -5
  33. toil/test/lib/test_ec2.py +1 -1
  34. toil/test/options/__init__.py +13 -0
  35. toil/test/options/options.py +37 -0
  36. toil/test/provisioners/clusterTest.py +9 -8
  37. toil/test/utils/toilDebugTest.py +1 -1
  38. toil/test/utils/utilsTest.py +3 -3
  39. toil/test/wdl/wdltoil_test.py +91 -16
  40. toil/utils/toilDebugFile.py +1 -1
  41. toil/utils/toilStats.py +309 -266
  42. toil/utils/toilStatus.py +1 -1
  43. toil/version.py +9 -9
  44. toil/wdl/wdltoil.py +341 -189
  45. toil/worker.py +31 -16
  46. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/METADATA +6 -7
  47. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/RECORD +51 -47
  48. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  49. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/WHEEL +0 -0
  50. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -0
  51. {toil-6.0.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/wdl/wdltoil.py CHANGED
@@ -110,7 +110,7 @@ F = TypeVar('F', bound=Callable[..., Any])
110
110
  def report_wdl_errors(task: str, exit: bool = False, log: Callable[[str], None] = logger.critical) -> Callable[[F], F]:
111
111
  """
112
112
  Create a decorator to report WDL errors with the given task message.
113
-
113
+
114
114
  Decorator can then be applied to a function, and if a WDL error happens it
115
115
  will say that it could not {task}.
116
116
  """
@@ -551,6 +551,18 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
551
551
  on the local host.
552
552
  """
553
553
 
554
+ return self.devirtualze_to(filename, self._file_store.localTempDir, self._file_store, self._execution_dir)
555
+
556
+ @staticmethod
557
+ def devirtualze_to(filename: str, dest_dir: str, file_source: Union[AbstractFileStore, Toil], execution_dir: Optional[str]) -> str:
558
+ """
559
+ Download or export a WDL virtualized filename/URL to the given directory.
560
+
561
+ Makes sure sibling files stay siblings and files with the same name don't clobber each other. Called from within this class for tasks, and statically at the end of the workflow for outputs.
562
+
563
+ Returns the local path to the file.
564
+ """
565
+
554
566
  # TODO: Support people doing path operations (join, split, get parent directory) on the virtualized filenames.
555
567
  # TODO: For task inputs, we are supposed to make sure to put things in the same directory if they came from the same directory. See <https://github.com/openwdl/wdl/blob/main/versions/1.0/SPEC.md#task-input-localization>
556
568
  if is_url(filename):
@@ -564,8 +576,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
564
576
  # Use UUID as folder name rather than a new temp folder to reduce internal clutter.
565
577
  # Put the UUID in the destination path in order for tasks to
566
578
  # see where to put files depending on their parents.
567
- dir_path = os.path.join(self._file_store.localTempDir, parent_id)
568
-
579
+ dir_path = os.path.join(dest_dir, parent_id)
580
+
569
581
  else:
570
582
  # Parse the URL and extract the basename
571
583
  file_basename = os.path.basename(urlsplit(filename).path)
@@ -574,8 +586,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
574
586
  # in, not relative to the thing.
575
587
  parent_url = urljoin(filename, ".")
576
588
  # Turn it into a string we can make a directory for
577
- dir_path = os.path.join(self._file_store.localTempDir, quote(parent_url, safe=''))
578
-
589
+ dir_path = os.path.join(dest_dir, quote(parent_url, safe=''))
590
+
579
591
  if not os.path.exists(dir_path):
580
592
  # Make sure the chosen directory exists
581
593
  os.mkdir(dir_path)
@@ -584,7 +596,13 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
584
596
 
585
597
  if filename.startswith(TOIL_URI_SCHEME):
586
598
  # Get a local path to the file
587
- result = self._file_store.readGlobalFile(file_id, dest_path)
599
+ if isinstance(file_source, AbstractFileStore):
600
+ # Read from the file store
601
+ result = file_source.readGlobalFile(file_id, dest_path)
602
+ elif isinstance(file_source, Toil):
603
+ # Read from the Toil context
604
+ file_source.export_file(file_id, dest_path)
605
+ result = dest_path
588
606
  else:
589
607
  # Download to a local file with the right name and execute bit.
590
608
  # Open it exclusively
@@ -600,8 +618,8 @@ class ToilWDLStdLibBase(WDL.StdLib.Base):
600
618
  # This is a local file
601
619
  # To support relative paths, join the execution dir and filename
602
620
  # if filename is already an abs path, join() will do nothing
603
- if self._execution_dir is not None:
604
- result = os.path.join(self._execution_dir, filename)
621
+ if execution_dir is not None:
622
+ result = os.path.join(execution_dir, filename)
605
623
  else:
606
624
  result = filename
607
625
 
@@ -712,10 +730,14 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
712
730
  # WDL.StdLib.TaskOutputs next.
713
731
  super().__init__(file_store)
714
732
 
715
- # Remember task putput files
733
+ # Remember task output files
716
734
  self._stdout_path = stdout_path
717
735
  self._stderr_path = stderr_path
718
736
 
737
+ # Remember that the WDL code has not referenced them yet.
738
+ self._stdout_used = False
739
+ self._stderr_used = False
740
+
719
741
  # Remember current directory
720
742
  self._current_directory_override = current_directory_override
721
743
 
@@ -741,14 +763,28 @@ class ToilWDLStdLibTaskOutputs(ToilWDLStdLibBase, WDL.StdLib.TaskOutputs):
741
763
  """
742
764
  Get the standard output of the command that ran, as a WDL File, outside the container.
743
765
  """
766
+ self._stdout_used = True
744
767
  return WDL.Value.File(self._stdout_path)
745
768
 
769
+ def stdout_used(self) -> bool:
770
+ """
771
+ Return True if the standard output was read by the WDL.
772
+ """
773
+ return self._stdout_used
774
+
746
775
  def _stderr(self) -> WDL.Value.File:
747
776
  """
748
777
  Get the standard error of the command that ran, as a WDL File, outside the container.
749
778
  """
779
+ self._stderr_used = True
750
780
  return WDL.Value.File(self._stderr_path)
751
781
 
782
+ def stderr_used(self) -> bool:
783
+ """
784
+ Return True if the standard error was read by the WDL.
785
+ """
786
+ return self._stderr_used
787
+
752
788
  def _glob(self, pattern: WDL.Value.String) -> WDL.Value.Array:
753
789
  """
754
790
  Get a WDL Array of WDL Files left behind by the job that ran, matching the given glob pattern, outside the container.
@@ -1009,7 +1045,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
1009
1045
  # we have no auth.
1010
1046
  logger.error("Something went wrong importing %s", candidate_uri)
1011
1047
  raise
1012
-
1048
+
1013
1049
  if imported is None:
1014
1050
  # Wasn't found there
1015
1051
  continue
@@ -1022,7 +1058,7 @@ def import_files(environment: WDLBindings, toil: Toil, path: Optional[List[str]]
1022
1058
  # We can't have files with no basename because we need to
1023
1059
  # download them at that basename later.
1024
1060
  raise RuntimeError(f"File {candidate_uri} has no basename and so cannot be a WDL File")
1025
-
1061
+
1026
1062
  # Was actually found
1027
1063
  if is_url(candidate_uri):
1028
1064
  # Might be a file URI or other URI.
@@ -1184,9 +1220,11 @@ class WDLBaseJob(Job):
1184
1220
  null values for things not defined in a section. Post-processing operations
1185
1221
  can be added onto any job before it is saved, and will be applied as long
1186
1222
  as the job's run method calls postprocess().
1223
+
1224
+ Also responsible for remembering the Toil WDL configuration keys and values.
1187
1225
  """
1188
1226
 
1189
- def __init__(self, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
1227
+ def __init__(self, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1190
1228
  """
1191
1229
  Make a WDL-related job.
1192
1230
 
@@ -1212,8 +1250,10 @@ class WDLBaseJob(Job):
1212
1250
  # may have coalesced postprocessing steps deferred by several levels of
1213
1251
  # jobs returning other jobs' promised RVs.
1214
1252
  self._postprocessing_steps: List[Tuple[str, Union[str, Promised[WDLBindings]]]] = []
1253
+
1254
+ self._wdl_options = wdl_options if wdl_options is not None else {}
1215
1255
 
1216
- self._execution_dir = execution_dir
1256
+ assert self._wdl_options.get("container") is not None
1217
1257
 
1218
1258
  # TODO: We're not allowed by MyPy to override a method and widen the return
1219
1259
  # type, so this has to be Any.
@@ -1307,65 +1347,44 @@ class WDLBaseJob(Job):
1307
1347
 
1308
1348
  logger.debug("Assigned postprocessing steps from %s to %s", self, other)
1309
1349
 
1310
-
1311
- class WDLTaskJob(WDLBaseJob):
1350
+ class WDLTaskWrapperJob(WDLBaseJob):
1312
1351
  """
1313
- Job that runs a WDL task.
1352
+ Job that determines the resources needed to run a WDL job.
1314
1353
 
1315
1354
  Responsible for evaluating the input declarations for unspecified inputs,
1316
- evaluating the runtime section, re-scheduling if resources are not
1317
- available, running any command, and evaluating the outputs.
1355
+ evaluating the runtime section, and scheduling or chaining to the real WDL
1356
+ job.
1318
1357
 
1319
1358
  All bindings are in terms of task-internal names.
1320
1359
  """
1321
1360
 
1322
- def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, **kwargs: Any) -> None:
1361
+ def __init__(self, task: WDL.Tree.Task, prev_node_results: Sequence[Promised[WDLBindings]], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
1323
1362
  """
1324
- Make a new job to run a task.
1363
+ Make a new job to determine resources and run a task.
1325
1364
 
1326
1365
  :param namespace: The namespace that the task's *contents* exist in.
1327
1366
  The caller has alredy added the task's own name.
1367
+
1368
+ :param task_path: Like the namespace, but including subscript numbers
1369
+ for scatters.
1328
1370
  """
1371
+ super().__init__(unitName=task_path + ".inputs", displayName=namespace + ".inputs", local=True, **kwargs)
1329
1372
 
1330
- # This job should not be local because it represents a real workflow task.
1331
- # TODO: Instead of re-scheduling with more resources, add a local
1332
- # "wrapper" job like CWL uses to determine the actual requirements.
1333
- super().__init__(unitName=namespace, displayName=namespace, local=False, **kwargs)
1334
-
1335
- logger.info("Preparing to run task %s as %s", task.name, namespace)
1373
+ logger.info("Preparing to run task code for %s as %s", task.name, namespace)
1336
1374
 
1337
1375
  self._task = task
1338
1376
  self._prev_node_results = prev_node_results
1339
1377
  self._task_id = task_id
1340
1378
  self._namespace = namespace
1379
+ self._task_path = task_path
1341
1380
 
1342
- def can_fake_root(self) -> bool:
1343
- """
1344
- Determie if --fakeroot is likely to work for Singularity.
1345
- """
1346
-
1347
- # We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
1348
- try:
1349
- subuid_file = open('/etc/subuid')
1350
- except OSError as e:
1351
- logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
1352
- return False
1353
- username = get_user_name()
1354
- for line in subuid_file:
1355
- if line.split(':')[0].strip() == username:
1356
- # We have a line assigning subuids
1357
- return True
1358
- # If there is no line, we have no subuids
1359
- logger.warning('No subuids are assigned to %s; cannot fake root.', username)
1360
- return False
1361
-
1362
- @report_wdl_errors("run task")
1381
+ @report_wdl_errors("evaluate task code")
1363
1382
  def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1364
1383
  """
1365
- Actually run the task.
1384
+ Evaluate inputs and runtime and schedule the task.
1366
1385
  """
1367
1386
  super().run(file_store)
1368
- logger.info("Running task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1387
+ logger.info("Evaluating inputs and runtime for task %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1369
1388
 
1370
1389
  # Combine the bindings we get from previous jobs.
1371
1390
  # For a task we are only passed the inside-the-task namespace.
@@ -1375,19 +1394,20 @@ class WDLTaskJob(WDLBaseJob):
1375
1394
  standard_library = ToilWDLStdLibBase(file_store)
1376
1395
 
1377
1396
  if self._task.inputs:
1378
- logger.debug("Evaluating task inputs")
1397
+ logger.debug("Evaluating task code")
1379
1398
  for input_decl in self._task.inputs:
1380
1399
  # Evaluate all the inputs that aren't pre-set
1381
1400
  bindings = bindings.bind(input_decl.name, evaluate_defaultable_decl(input_decl, bindings, standard_library))
1382
1401
  for postinput_decl in self._task.postinputs:
1383
- # Evaluate all the postinput decls
1402
+ # Evaluate all the postinput decls.
1403
+ # We need these in order to evaluate the runtime.
1404
+ # TODO: What if they wanted resources from the runtime?
1384
1405
  bindings = bindings.bind(postinput_decl.name, evaluate_defaultable_decl(postinput_decl, bindings, standard_library))
1385
1406
 
1386
1407
  # Evaluate the runtime section
1387
1408
  runtime_bindings = evaluate_call_inputs(self._task, self._task.runtime, bindings, standard_library)
1388
1409
 
1389
- # Fill these in with not-None if we need to bump up our resources from what we have available.
1390
- # TODO: Can this break out into a function somehow?
1410
+ # Fill these in with not-None if the workflow asks for each resource.
1391
1411
  runtime_memory: Optional[int] = None
1392
1412
  runtime_cores: Optional[float] = None
1393
1413
  runtime_disk: Optional[int] = None
@@ -1395,21 +1415,14 @@ class WDLTaskJob(WDLBaseJob):
1395
1415
 
1396
1416
  if runtime_bindings.has_binding('cpu'):
1397
1417
  cpu_spec: int = runtime_bindings.resolve('cpu').value
1398
- if cpu_spec > self.cores:
1399
- # We need to get more cores
1400
- runtime_cores = float(cpu_spec)
1401
- logger.info('Need to reschedule to get %s cores; have %s', runtime_cores, self.cores)
1418
+ runtime_cores = float(cpu_spec)
1402
1419
 
1403
1420
  if runtime_bindings.has_binding('memory'):
1404
1421
  # Get the memory requirement and convert to bytes
1405
1422
  memory_spec: Union[int, str] = runtime_bindings.resolve('memory').value
1406
1423
  if isinstance(memory_spec, str):
1407
1424
  memory_spec = human2bytes(memory_spec)
1408
-
1409
- if memory_spec > self.memory:
1410
- # We need to go get more memory
1411
- runtime_memory = memory_spec
1412
- logger.info('Need to reschedule to get %s memory; have %s', runtime_memory, self.memory)
1425
+ runtime_memory = memory_spec
1413
1426
 
1414
1427
  if runtime_bindings.has_binding('disks'):
1415
1428
  # Miniwdl doesn't have this, but we need to be able to parse things like:
@@ -1445,9 +1458,7 @@ class WDLTaskJob(WDLBaseJob):
1445
1458
  if spec_parts[2] == 'LOCAL':
1446
1459
  logger.warning('Not rounding LOCAL disk to the nearest 375 GB; workflow execution will differ from Cromwell!')
1447
1460
  total_bytes: float = convert_units(total_gb, 'GB')
1448
- if total_bytes > self.disk:
1449
- runtime_disk = int(total_bytes)
1450
- logger.info('Need to reschedule to get %s disk, have %s', runtime_disk, self.disk)
1461
+ runtime_disk = int(total_bytes)
1451
1462
 
1452
1463
  if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
1453
1464
  # We want to have GPUs
@@ -1467,69 +1478,145 @@ class WDLTaskJob(WDLBaseJob):
1467
1478
  accelerator_spec['brand'] = gpu_brand
1468
1479
 
1469
1480
  accelerator_requirement = parse_accelerator(accelerator_spec)
1470
- if not accelerators_fully_satisfy(self.accelerators, accelerator_requirement, ignore=['model']):
1471
- # We don't meet the accelerator requirement.
1472
- # We are loose on the model here since, really, we *should*
1473
- # have either no accelerators or the accelerators we asked for.
1474
- # If the batch system is ignoring the model, we don't want to
1475
- # loop forever trying for the right model.
1476
- # TODO: Change models overall to a hint???
1477
- runtime_accelerators = [accelerator_requirement]
1478
- logger.info('Need to reschedule to get %s accelerators, have %s', runtime_accelerators, self.accelerators)
1479
-
1480
- if runtime_cores or runtime_memory or runtime_disk or runtime_accelerators:
1481
- # We need to reschedule.
1482
- logger.info('Rescheduling %s with more resources', self)
1483
- # Make the new copy of this job with more resources.
1484
- # TODO: We don't pass along the input or runtime bindings, so they
1485
- # need to get re-evaluated. If we did pass them, we'd have to make
1486
- # sure to upload local files made by WDL code in the inputs/runtime
1487
- # sections and pass along that environment. Right now we just
1488
- # re-evaluate that whole section once we have the requested
1489
- # resources.
1490
- # TODO: What if the runtime section says we need a lot of disk to
1491
- # hold the large files that the inputs section is going to write???
1492
- rescheduled = WDLTaskJob(self._task, self._prev_node_results, self._task_id, self._namespace, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators)
1493
- # Run that as a child
1494
- self.addChild(rescheduled)
1495
-
1496
- # Give it our postprocessing steps
1497
- self.defer_postprocessing(rescheduled)
1498
-
1499
- # And return its result.
1500
- return rescheduled.rv()
1501
-
1502
- # If we get here we have all the resources we need, so run the task
1503
-
1504
- if shutil.which('singularity'):
1481
+ runtime_accelerators = [accelerator_requirement]
1482
+
1483
+ # Schedule to get resources. Pass along the bindings from evaluating all the inputs and decls, and the runtime, with files virtualized.
1484
+ run_job = WDLTaskJob(self._task, virtualize_files(bindings, standard_library), virtualize_files(runtime_bindings, standard_library), self._task_id, self._namespace, self._task_path, cores=runtime_cores or self.cores, memory=runtime_memory or self.memory, disk=runtime_disk or self.disk, accelerators=runtime_accelerators or self.accelerators, wdl_options=self._wdl_options)
1485
+ # Run that as a child
1486
+ self.addChild(run_job)
1487
+
1488
+ # Give it our postprocessing steps
1489
+ self.defer_postprocessing(run_job)
1490
+
1491
+ # And return its result.
1492
+ return run_job.rv()
1493
+
1494
+
1495
+
1496
+ class WDLTaskJob(WDLBaseJob):
1497
+ """
1498
+ Job that runs a WDL task.
1499
+
1500
+ Responsible for re-evaluating input declarations for unspecified inputs,
1501
+ evaluating the runtime section, re-scheduling if resources are not
1502
+ available, running any command, and evaluating the outputs.
1503
+
1504
+ All bindings are in terms of task-internal names.
1505
+ """
1506
+
1507
+ def __init__(self, task: WDL.Tree.Task, task_internal_bindings: Promised[WDLBindings], runtime_bindings: Promised[WDLBindings], task_id: List[str], namespace: str, task_path: str, **kwargs: Any) -> None:
1508
+ """
1509
+ Make a new job to run a task.
1505
1510
 
1511
+ :param namespace: The namespace that the task's *contents* exist in.
1512
+ The caller has alredy added the task's own name.
1513
+
1514
+ :param task_path: Like the namespace, but including subscript numbers
1515
+ for scatters.
1516
+ """
1517
+
1518
+ # This job should not be local because it represents a real workflow task.
1519
+ # TODO: Instead of re-scheduling with more resources, add a local
1520
+ # "wrapper" job like CWL uses to determine the actual requirements.
1521
+ super().__init__(unitName=task_path + ".command", displayName=namespace + ".command", local=False, **kwargs)
1522
+
1523
+ logger.info("Preparing to run task %s as %s", task.name, namespace)
1524
+
1525
+ self._task = task
1526
+ self._task_internal_bindings = task_internal_bindings
1527
+ self._runtime_bindings = runtime_bindings
1528
+ self._task_id = task_id
1529
+ self._namespace = namespace
1530
+ self._task_path = task_path
1531
+
1532
+ def can_fake_root(self) -> bool:
1533
+ """
1534
+ Determine if --fakeroot is likely to work for Singularity.
1535
+ """
1536
+
1537
+ # We need to have an entry for our user in /etc/subuid to grant us a range of UIDs to use, for fakeroot to work.
1538
+ try:
1539
+ subuid_file = open('/etc/subuid')
1540
+ except OSError as e:
1541
+ logger.warning('Cannot open /etc/subuid due to %s; assuming no subuids available', e)
1542
+ return False
1543
+ username = get_user_name()
1544
+ for line in subuid_file:
1545
+ if line.split(':')[0].strip() == username:
1546
+ # We have a line assigning subuids
1547
+ return True
1548
+ # If there is no line, we have no subuids
1549
+ logger.warning('No subuids are assigned to %s; cannot fake root.', username)
1550
+ return False
1551
+
1552
+ def can_mount_proc(self) -> bool:
1553
+ """
1554
+ Determine if --containall will work for Singularity. On Kubernetes, this will result in operation not permitted
1555
+ See: https://github.com/apptainer/singularity/issues/5857
1556
+
1557
+ So if Kubernetes is detected, return False
1558
+ :return: bool
1559
+ """
1560
+ return "KUBERNETES_SERVICE_HOST" not in os.environ
1561
+
1562
+ @report_wdl_errors("run task command")
1563
+ def run(self, file_store: AbstractFileStore) -> Promised[WDLBindings]:
1564
+ """
1565
+ Actually run the task.
1566
+ """
1567
+ super().run(file_store)
1568
+ logger.info("Running task command for %s (%s) called as %s", self._task.name, self._task_id, self._namespace)
1569
+
1570
+ # Set up the WDL standard library
1571
+ # UUID to use for virtualizing files
1572
+ standard_library = ToilWDLStdLibBase(file_store)
1573
+
1574
+ # Get the bindings from after the input section
1575
+ bindings = unwrap(self._task_internal_bindings)
1576
+ # And the bindings from evaluating the runtime section
1577
+ runtime_bindings = unwrap(self._runtime_bindings)
1578
+
1579
+ # We have all the resources we need, so run the task
1580
+
1581
+ if shutil.which('singularity') and self._wdl_options.get("container") in ["singularity", "auto"]:
1506
1582
  # Prepare to use Singularity. We will need plenty of space to
1507
1583
  # download images.
1508
- if 'SINGULARITY_CACHEDIR' not in os.environ:
1509
- # Cache Singularity's layers somehwere known to have space, not in home
1510
- os.environ['SINGULARITY_CACHEDIR'] = os.path.join(file_store.workflow_dir, 'singularity_cache')
1584
+ # Default the Singularity and MiniWDL cache directories. This sets the cache to the same place as
1585
+ # Singularity/MiniWDL's default cache directory
1586
+ # With launch-cluster, the singularity and miniwdl cache is set to /var/lib/toil in abstractProvisioner.py
1587
+ # A current limitation with the singularity/miniwdl cache is it cannot check for image updates if the
1588
+ # filename is the same
1589
+ singularity_cache = os.path.join(os.path.expanduser("~"), ".singularity")
1590
+ miniwdl_cache = os.path.join(os.path.expanduser("~"), ".cache/miniwdl")
1591
+
1592
+ # Cache Singularity's layers somewhere known to have space
1593
+ os.environ['SINGULARITY_CACHEDIR'] = os.environ.get("SINGULARITY_CACHEDIR", singularity_cache)
1594
+
1511
1595
  # Make sure it exists.
1512
1596
  os.makedirs(os.environ['SINGULARITY_CACHEDIR'], exist_ok=True)
1513
1597
 
1514
- if 'MINIWDL__SINGULARITY__IMAGE_CACHE' not in os.environ:
1515
- # Cache Singularity images for the workflow on this machine.
1516
- # Since MiniWDL does only within-process synchronization for pulls,
1517
- # we also will need to pre-pull one image into here at a time.
1518
- os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.path.join(file_store.workflow_dir, 'miniwdl_sif_cache')
1598
+ # Cache Singularity images for the workflow on this machine.
1599
+ # Since MiniWDL does only within-process synchronization for pulls,
1600
+ # we also will need to pre-pull one image into here at a time.
1601
+ os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'] = os.environ.get("MINIWDL__SINGULARITY__IMAGE_CACHE", miniwdl_cache)
1602
+
1519
1603
  # Make sure it exists.
1520
1604
  os.makedirs(os.environ['MINIWDL__SINGULARITY__IMAGE_CACHE'], exist_ok=True)
1521
1605
 
1522
1606
  # Run containers with Singularity
1523
1607
  TaskContainerImplementation: Type[TaskContainer] = SingularityContainer
1524
- else:
1608
+ elif self._wdl_options.get("container") in ["docker", "auto"]:
1525
1609
  # Run containers with Docker
1610
+ # TODO: Poll if it is available and don't just try and fail.
1526
1611
  TaskContainerImplementation = SwarmContainer
1527
- if runtime_accelerators:
1612
+ if runtime_bindings.has_binding('gpuType') or runtime_bindings.has_binding('gpuCount') or runtime_bindings.has_binding('nvidiaDriverVersion'):
1528
1613
  # Complain to the user that this is unlikely to work.
1529
- logger.warning("Running job that needs accelerators with Docker, because "
1530
- "Singularity is not available. Accelerator and GPU support "
1614
+ logger.warning("Running job that might need accelerators with Docker. "
1615
+ "Accelerator and GPU support "
1531
1616
  "is not yet implemented in the MiniWDL Docker "
1532
1617
  "containerization implementation.")
1618
+ else:
1619
+ raise RuntimeError(f"Could not find a working container engine to use; told to use {self._wdl_options.get('container')}")
1533
1620
 
1534
1621
  # Set up the MiniWDL container running stuff
1535
1622
  miniwdl_logger = logging.getLogger("MiniWDLContainers")
@@ -1597,6 +1684,10 @@ class WDLTaskJob(WDLBaseJob):
1597
1684
  # We can't fake root so don't try.
1598
1685
  command_line.remove('--fakeroot')
1599
1686
 
1687
+ # If on Kubernetes and proc cannot be mounted, get rid of --containall
1688
+ if '--containall' in command_line and not self.can_mount_proc():
1689
+ command_line.remove('--containall')
1690
+
1600
1691
  extra_flags: Set[str] = set()
1601
1692
  accelerators_needed: Optional[List[AcceleratorRequirement]] = self.accelerators
1602
1693
  if accelerators_needed is not None:
@@ -1624,7 +1715,7 @@ class WDLTaskJob(WDLBaseJob):
1624
1715
  task_container._run_invocation = patched_run_invocation # type: ignore
1625
1716
 
1626
1717
  # Show the runtime info to the container
1627
- task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in runtime_bindings})
1718
+ task_container.process_runtime(miniwdl_logger, {binding.name: binding.value for binding in devirtualize_files(runtime_bindings, standard_library)})
1628
1719
 
1629
1720
  # Tell the container to take up all these files. It will assign
1630
1721
  # them all new paths in task_container.input_path_map which we can
@@ -1638,12 +1729,43 @@ class WDLTaskJob(WDLBaseJob):
1638
1729
 
1639
1730
  # Make a new standard library for evaluating the command specifically, which only deals with in-container paths and out-of-container paths.
1640
1731
  command_library = ToilWDLStdLibTaskCommand(file_store, task_container)
1641
-
1642
- # Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
1643
- dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
1732
+
1733
+ def hacky_dedent(text: str) -> str:
1734
+ """
1735
+ Guess what result we would have gotten if we dedented the
1736
+ command before substituting placeholder expressions, given the
1737
+ command after substituting placeholder expressions. Workaround
1738
+ for mimicking MiniWDL making us also suffer from
1739
+ <https://github.com/chanzuckerberg/miniwdl/issues/674>.
1740
+ """
1741
+
1742
+ # First just run MiniWDL's dedent
1743
+ # Work around wrong types from MiniWDL. See <https://github.com/chanzuckerberg/miniwdl/issues/665>
1744
+ dedent = cast(Callable[[str], Tuple[int, str]], strip_leading_whitespace)
1745
+
1746
+ text = dedent(text)[1]
1747
+
1748
+ # But this can still leave dedenting to do. Find the first
1749
+ # not-all-whitespace line and get its leading whitespace.
1750
+ to_strip: Optional[str] = None
1751
+ for line in text.split("\n"):
1752
+ if len(line.strip()) > 0:
1753
+ # This is the first not-all-whitespace line.
1754
+ # Drop the leading whitespace.
1755
+ rest = line.lstrip()
1756
+ # Grab the part that gets removed by lstrip
1757
+ to_strip = line[0:(len(line) - len(rest))]
1758
+ break
1759
+ if to_strip is None or len(to_strip) == 0:
1760
+ # Nothing to cut
1761
+ return text
1762
+
1763
+ # Cut to_strip off each line that it appears at the start of.
1764
+ return "\n".join((line.removeprefix(to_strip) for line in text.split("\n")))
1765
+
1644
1766
 
1645
1767
  # Work out the command string, and unwrap it
1646
- command_string: str = dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)[1]
1768
+ command_string: str = hacky_dedent(evaluate_named_expression(self._task, "command", WDL.Type.String(), self._task.command, contained_bindings, command_library).coerce(WDL.Type.String()).value)
1647
1769
 
1648
1770
  # Grab the standard out and error paths. MyPy complains if we call
1649
1771
  # them because in the current MiniWDL version they are untyped.
@@ -1668,12 +1790,37 @@ class WDLTaskJob(WDLBaseJob):
1668
1790
  logger.info('Executing command in %s: %s', task_container, command_string)
1669
1791
  try:
1670
1792
  task_container.run(miniwdl_logger, command_string)
1671
- finally:
1793
+ except Exception:
1672
1794
  if os.path.exists(host_stderr_txt):
1673
- logger.info('Standard error at %s: %s', host_stderr_txt, open(host_stderr_txt).read())
1674
- if os.path.exists(host_stdout_txt):
1675
- logger.info('Standard output at %s: %s', host_stdout_txt, open(host_stdout_txt).read())
1795
+ size = os.path.getsize(host_stderr_txt)
1796
+ logger.error('Failed task left standard error at %s of %d bytes', host_stderr_txt, size)
1797
+ if size > 0:
1798
+ # Send the whole error stream.
1799
+ file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
1800
+ if logger.isEnabledFor(logging.DEBUG):
1801
+ logger.debug("MiniWDL already logged standard error")
1802
+ else:
1803
+ # At debug level, MiniWDL itself logs command error lines.
1804
+ # But otherwise we just dump into StatsAndLogging;
1805
+ # we also want the messages in the job log that
1806
+ # gets printed at the end of the workflow. So log
1807
+ # the error log ourselves.
1808
+ logger.error("====TASK ERROR LOG====")
1809
+ for line in open(host_stderr_txt, 'r', errors="replace"):
1810
+ logger.error("> %s", line.rstrip('\n'))
1811
+ logger.error("====TASK ERROR LOG====")
1676
1812
 
1813
+ if os.path.exists(host_stdout_txt):
1814
+ size = os.path.getsize(host_stdout_txt)
1815
+ logger.info('Failed task left standard output at %s of %d bytes', host_stdout_txt, size)
1816
+ if size > 0:
1817
+ # Save the whole output stream.
1818
+ # TODO: We can't tell if this was supposed to be
1819
+ # captured. It might really be huge binary data.
1820
+ file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
1821
+
1822
+ # Keep crashing
1823
+ raise
1677
1824
  else:
1678
1825
  # We need to fake stdout and stderr, since nothing ran but the
1679
1826
  # standard library lets you grab them. TODO: Can these be None?
@@ -1690,9 +1837,26 @@ class WDLTaskJob(WDLBaseJob):
1690
1837
  outputs_library = ToilWDLStdLibTaskOutputs(file_store, host_stdout_txt, host_stderr_txt, current_directory_override=workdir_in_container)
1691
1838
  output_bindings = evaluate_output_decls(self._task.outputs, bindings, outputs_library)
1692
1839
 
1840
+ # Now we know if the standard output and error were sent somewhere by
1841
+ # the workflow. If not, we should report them to the leader.
1842
+
1693
1843
  # Drop any files from the output which don't actually exist
1694
1844
  output_bindings = drop_missing_files(output_bindings, current_directory_override=workdir_in_container)
1695
1845
 
1846
+ if not outputs_library.stderr_used() and os.path.exists(host_stderr_txt):
1847
+ size = os.path.getsize(host_stderr_txt)
1848
+ logger.info('Unused standard error at %s of %d bytes', host_stderr_txt, size)
1849
+ if size > 0:
1850
+ # Save the whole error stream because the workflow didn't capture it.
1851
+ file_store.log_user_stream(self._task_path + '.stderr', open(host_stderr_txt, 'rb'))
1852
+
1853
+ if not outputs_library.stdout_used() and os.path.exists(host_stdout_txt):
1854
+ size = os.path.getsize(host_stdout_txt)
1855
+ logger.info('Unused standard output at %s of %d bytes', host_stdout_txt, size)
1856
+ if size > 0:
1857
+ # Save the whole output stream because the workflow didn't capture it.
1858
+ file_store.log_user_stream(self._task_path + '.stdout', open(host_stdout_txt, 'rb'))
1859
+
1696
1860
  # TODO: Check the output bindings against the types of the decls so we
1697
1861
  # can tell if we have a null in a value that is supposed to not be
1698
1862
  # nullable. We can't just look at the types on the values themselves
@@ -1711,15 +1875,16 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1711
1875
  Job that evaluates a WDL workflow node.
1712
1876
  """
1713
1877
 
1714
- def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
1878
+ def __init__(self, node: WDL.Tree.WorkflowNode, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1715
1879
  """
1716
1880
  Make a new job to run a workflow node to completion.
1717
1881
  """
1718
- super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, execution_dir=execution_dir, **kwargs)
1882
+ super().__init__(unitName=node.workflow_node_id, displayName=node.workflow_node_id, wdl_options=wdl_options or {}, **kwargs)
1719
1883
 
1720
1884
  self._node = node
1721
1885
  self._prev_node_results = prev_node_results
1722
1886
  self._namespace = namespace
1887
+ self._task_path = task_path
1723
1888
 
1724
1889
  if isinstance(self._node, WDL.Tree.Call):
1725
1890
  logger.debug("Preparing job for call node %s", self._node.workflow_node_id)
@@ -1735,7 +1900,7 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1735
1900
  # Combine the bindings we get from previous jobs
1736
1901
  incoming_bindings = combine_bindings(unwrap_all(self._prev_node_results))
1737
1902
  # Set up the WDL standard library
1738
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
1903
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
1739
1904
  with monkeypatch_coerce(standard_library):
1740
1905
  if isinstance(self._node, WDL.Tree.Decl):
1741
1906
  # This is a variable assignment
@@ -1763,11 +1928,11 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1763
1928
 
1764
1929
  if isinstance(self._node.callee, WDL.Tree.Workflow):
1765
1930
  # This is a call of a workflow
1766
- subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', self._execution_dir)
1931
+ subjob: WDLBaseJob = WDLWorkflowJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
1767
1932
  self.addChild(subjob)
1768
1933
  elif isinstance(self._node.callee, WDL.Tree.Task):
1769
1934
  # This is a call of a task
1770
- subjob = WDLTaskJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}')
1935
+ subjob = WDLTaskWrapperJob(self._node.callee, [input_bindings, passed_down_bindings], self._node.callee_id, f'{self._namespace}.{self._node.name}', f'{self._task_path}.{self._node.name}', wdl_options=self._wdl_options)
1771
1936
  self.addChild(subjob)
1772
1937
  else:
1773
1938
  raise WDL.Error.InvalidType(self._node, "Cannot call a " + str(type(self._node.callee)))
@@ -1778,14 +1943,14 @@ class WDLWorkflowNodeJob(WDLBaseJob):
1778
1943
  self.defer_postprocessing(subjob)
1779
1944
  return subjob.rv()
1780
1945
  elif isinstance(self._node, WDL.Tree.Scatter):
1781
- subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
1946
+ subjob = WDLScatterJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
1782
1947
  self.addChild(subjob)
1783
1948
  # Scatters don't really make a namespace, just kind of a scope?
1784
1949
  # TODO: Let stuff leave scope!
1785
1950
  self.defer_postprocessing(subjob)
1786
1951
  return subjob.rv()
1787
1952
  elif isinstance(self._node, WDL.Tree.Conditional):
1788
- subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._execution_dir)
1953
+ subjob = WDLConditionalJob(self._node, [incoming_bindings], self._namespace, self._task_path, wdl_options=self._wdl_options)
1789
1954
  self.addChild(subjob)
1790
1955
  # Conditionals don't really make a namespace, just kind of a scope?
1791
1956
  # TODO: Let stuff leave scope!
@@ -1801,11 +1966,11 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
1801
1966
  workflows or tasks or sections.
1802
1967
  """
1803
1968
 
1804
- def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
1969
+ def __init__(self, nodes: List[WDL.Tree.WorkflowNode], prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
1805
1970
  """
1806
1971
  Make a new job to run a list of workflow nodes to completion.
1807
1972
  """
1808
- super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', execution_dir=execution_dir, **kwargs)
1973
+ super().__init__(unitName=nodes[0].workflow_node_id + '+', displayName=nodes[0].workflow_node_id + '+', wdl_options=wdl_options, **kwargs)
1809
1974
 
1810
1975
  self._nodes = nodes
1811
1976
  self._prev_node_results = prev_node_results
@@ -1825,7 +1990,7 @@ class WDLWorkflowNodeListJob(WDLBaseJob):
1825
1990
  # Combine the bindings we get from previous jobs
1826
1991
  current_bindings = combine_bindings(unwrap_all(self._prev_node_results))
1827
1992
  # Set up the WDL standard library
1828
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
1993
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
1829
1994
 
1830
1995
  with monkeypatch_coerce(standard_library):
1831
1996
  for node in self._nodes:
@@ -2005,13 +2170,14 @@ class WDLSectionJob(WDLBaseJob):
2005
2170
  Job that can create more graph for a section of the wrokflow.
2006
2171
  """
2007
2172
 
2008
- def __init__(self, namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2173
+ def __init__(self, namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2009
2174
  """
2010
2175
  Make a WDLSectionJob where the interior runs in the given namespace,
2011
2176
  starting with the root workflow.
2012
2177
  """
2013
- super().__init__(execution_dir, **kwargs)
2178
+ super().__init__(wdl_options=wdl_options, **kwargs)
2014
2179
  self._namespace = namespace
2180
+ self._task_path = task_path
2015
2181
 
2016
2182
  @staticmethod
2017
2183
  def coalesce_nodes(order: List[str], section_graph: WDLWorkflowGraph) -> List[List[str]]:
@@ -2079,7 +2245,7 @@ class WDLSectionJob(WDLBaseJob):
2079
2245
 
2080
2246
 
2081
2247
 
2082
- def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None) -> WDLBaseJob:
2248
+ def create_subgraph(self, nodes: Sequence[WDL.Tree.WorkflowNode], gather_nodes: Sequence[WDL.Tree.Gather], environment: WDLBindings, local_environment: Optional[WDLBindings] = None, subscript: Optional[int] = None) -> WDLBaseJob:
2083
2249
  """
2084
2250
  Make a Toil job to evaluate a subgraph inside a workflow or workflow
2085
2251
  section.
@@ -2095,8 +2261,16 @@ class WDLSectionJob(WDLBaseJob):
2095
2261
  :param local_environment: Bindings in this environment will be
2096
2262
  used to evaluate the subgraph but will go out of scope
2097
2263
  at the end of the section.
2264
+ :param subscript: If the subgraph is being evaluated multiple times,
2265
+ this should be a disambiguating integer for logging.
2098
2266
  """
2099
2267
 
2268
+ # Work out what to call what we are working on
2269
+ task_path = self._task_path
2270
+ if subscript is not None:
2271
+ # We need to include a scatter loop number.
2272
+ task_path += f'.{subscript}'
2273
+
2100
2274
  if local_environment is not None:
2101
2275
  # Bring local environment into scope
2102
2276
  environment = combine_bindings([environment, local_environment])
@@ -2156,10 +2330,10 @@ class WDLSectionJob(WDLBaseJob):
2156
2330
 
2157
2331
  if len(node_ids) == 1:
2158
2332
  # Make a one-node job
2159
- job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, self._execution_dir)
2333
+ job: WDLBaseJob = WDLWorkflowNodeJob(section_graph.get(node_ids[0]), rvs, self._namespace, task_path, wdl_options=self._wdl_options)
2160
2334
  else:
2161
2335
  # Make a multi-node job
2162
- job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, self._execution_dir)
2336
+ job = WDLWorkflowNodeListJob([section_graph.get(node_id) for node_id in node_ids], rvs, self._namespace, wdl_options=self._wdl_options)
2163
2337
  for prev_job in prev_jobs:
2164
2338
  # Connect up the happens-after relationships to make sure the
2165
2339
  # return values are available.
@@ -2189,7 +2363,7 @@ class WDLSectionJob(WDLBaseJob):
2189
2363
  leaf_rvs.append(environment)
2190
2364
  # And to fill in bindings from code not executed in this instantiation
2191
2365
  # with Null, and filter out stuff that should leave scope.
2192
- sink = WDLCombineBindingsJob(leaf_rvs)
2366
+ sink = WDLCombineBindingsJob(leaf_rvs, wdl_options=self._wdl_options)
2193
2367
  # It runs inside us
2194
2368
  self.addChild(sink)
2195
2369
  for leaf_job in toil_leaves.values():
@@ -2256,11 +2430,11 @@ class WDLScatterJob(WDLSectionJob):
2256
2430
  instance of the body. If an instance of the body doesn't create a binding,
2257
2431
  it gets a null value in the corresponding array.
2258
2432
  """
2259
- def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2433
+ def __init__(self, scatter: WDL.Tree.Scatter, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2260
2434
  """
2261
2435
  Create a subtree that will run a WDL scatter. The scatter itself and the contents live in the given namespace.
2262
2436
  """
2263
- super().__init__(namespace, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, execution_dir=execution_dir)
2437
+ super().__init__(namespace, task_path, **kwargs, unitName=scatter.workflow_node_id, displayName=scatter.workflow_node_id, wdl_options=wdl_options)
2264
2438
 
2265
2439
  # Because we need to return the return value of the workflow, we need
2266
2440
  # to return a Toil promise for the last/sink job in the workflow's
@@ -2297,7 +2471,7 @@ class WDLScatterJob(WDLSectionJob):
2297
2471
  raise RuntimeError("The returned value from a scatter is not an Array type.")
2298
2472
 
2299
2473
  scatter_jobs = []
2300
- for item in scatter_value.value:
2474
+ for subscript, item in enumerate(scatter_value.value):
2301
2475
  # Make an instantiation of our subgraph for each possible value of
2302
2476
  # the variable. Make sure the variable is bound only for the
2303
2477
  # duration of the body.
@@ -2306,7 +2480,7 @@ class WDLScatterJob(WDLSectionJob):
2306
2480
  # TODO: We need to turn values() into a list because MyPy seems to
2307
2481
  # think a dict_values isn't a Sequence. This is a waste of time to
2308
2482
  # appease MyPy but probably better than a cast?
2309
- scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings))
2483
+ scatter_jobs.append(self.create_subgraph(self._scatter.body, list(self._scatter.gathers.values()), bindings, local_bindings, subscript=subscript))
2310
2484
 
2311
2485
  if len(scatter_jobs) == 0:
2312
2486
  # No scattering is needed. We just need to bind all the names.
@@ -2326,7 +2500,7 @@ class WDLScatterJob(WDLSectionJob):
2326
2500
  # of maybe-optional values. Each body execution will define names it
2327
2501
  # doesn't make as nulls, so we don't have to worry about
2328
2502
  # totally-missing names.
2329
- gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings)
2503
+ gather_job = WDLArrayBindingsJob([j.rv() for j in scatter_jobs], bindings, wdl_options=self._wdl_options)
2330
2504
  self.addChild(gather_job)
2331
2505
  for j in scatter_jobs:
2332
2506
  j.addFollowOn(gather_job)
@@ -2395,11 +2569,11 @@ class WDLConditionalJob(WDLSectionJob):
2395
2569
  """
2396
2570
  Job that evaluates a conditional in a WDL workflow.
2397
2571
  """
2398
- def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2572
+ def __init__(self, conditional: WDL.Tree.Conditional, prev_node_results: Sequence[Promised[WDLBindings]], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2399
2573
  """
2400
2574
  Create a subtree that will run a WDL conditional. The conditional itself and its contents live in the given namespace.
2401
2575
  """
2402
- super().__init__(namespace, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, execution_dir=execution_dir)
2576
+ super().__init__(namespace, task_path, **kwargs, unitName=conditional.workflow_node_id, displayName=conditional.workflow_node_id, wdl_options=wdl_options)
2403
2577
 
2404
2578
  # Once again we need to ship the whole body template to be instantiated
2405
2579
  # into Toil jobs only if it will actually run.
@@ -2447,7 +2621,7 @@ class WDLWorkflowJob(WDLSectionJob):
2447
2621
  Job that evaluates an entire WDL workflow.
2448
2622
  """
2449
2623
 
2450
- def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2624
+ def __init__(self, workflow: WDL.Tree.Workflow, prev_node_results: Sequence[Promised[WDLBindings]], workflow_id: List[str], namespace: str, task_path: str, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2451
2625
  """
2452
2626
  Create a subtree that will run a WDL workflow. The job returns the
2453
2627
  return value of the workflow.
@@ -2455,7 +2629,7 @@ class WDLWorkflowJob(WDLSectionJob):
2455
2629
  :param namespace: the namespace that the workflow's *contents* will be
2456
2630
  in. Caller has already added the workflow's own name.
2457
2631
  """
2458
- super().__init__(namespace, execution_dir, **kwargs)
2632
+ super().__init__(namespace, task_path, wdl_options=wdl_options, **kwargs)
2459
2633
 
2460
2634
  # Because we need to return the return value of the workflow, we need
2461
2635
  # to return a Toil promise for the last/sink job in the workflow's
@@ -2485,7 +2659,7 @@ class WDLWorkflowJob(WDLSectionJob):
2485
2659
  # For a task we only see the insode-the-task namespace.
2486
2660
  bindings = combine_bindings(unwrap_all(self._prev_node_results))
2487
2661
  # Set up the WDL standard library
2488
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2662
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
2489
2663
 
2490
2664
  if self._workflow.inputs:
2491
2665
  with monkeypatch_coerce(standard_library):
@@ -2499,7 +2673,7 @@ class WDLWorkflowJob(WDLSectionJob):
2499
2673
  if self._workflow.outputs != []: # Compare against empty list as None means there should be outputs
2500
2674
  # Either the output section is declared and nonempty or it is not declared
2501
2675
  # Add evaluating the outputs after the sink
2502
- outputs_job = WDLOutputsJob(self._workflow, sink.rv(), self._execution_dir)
2676
+ outputs_job = WDLOutputsJob(self._workflow, sink.rv(), wdl_options=self._wdl_options)
2503
2677
  sink.addFollowOn(outputs_job)
2504
2678
  # Caller is responsible for making sure namespaces are applied
2505
2679
  self.defer_postprocessing(outputs_job)
@@ -2514,11 +2688,11 @@ class WDLOutputsJob(WDLBaseJob):
2514
2688
 
2515
2689
  Returns an environment with just the outputs bound, in no namespace.
2516
2690
  """
2517
- def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], execution_dir: Optional[str] = None, **kwargs: Any):
2691
+ def __init__(self, workflow: WDL.Tree.Workflow, bindings: Promised[WDLBindings], wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any):
2518
2692
  """
2519
2693
  Make a new WDLWorkflowOutputsJob for the given workflow, with the given set of bindings after its body runs.
2520
2694
  """
2521
- super().__init__(execution_dir, **kwargs)
2695
+ super().__init__(wdl_options=wdl_options, **kwargs)
2522
2696
 
2523
2697
  self._bindings = bindings
2524
2698
  self._workflow = workflow
@@ -2548,7 +2722,7 @@ class WDLOutputsJob(WDLBaseJob):
2548
2722
  else:
2549
2723
  # Output section is declared and is nonempty, so evaluate normally
2550
2724
  # Evaluate all the outputs in the normal, non-task-outputs library context
2551
- standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._execution_dir)
2725
+ standard_library = ToilWDLStdLibBase(file_store, execution_dir=self._wdl_options.get("execution_dir"))
2552
2726
  # Combine the bindings from the previous job
2553
2727
  output_bindings = evaluate_output_decls(self._workflow.outputs, unwrap(self._bindings), standard_library)
2554
2728
  return self.postprocess(output_bindings)
@@ -2560,13 +2734,13 @@ class WDLRootJob(WDLSectionJob):
2560
2734
  the workflow name; both forms are accepted.
2561
2735
  """
2562
2736
 
2563
- def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, execution_dir: Optional[str] = None, **kwargs: Any) -> None:
2737
+ def __init__(self, workflow: WDL.Tree.Workflow, inputs: WDLBindings, wdl_options: Optional[Dict[str, str]] = None, **kwargs: Any) -> None:
2564
2738
  """
2565
2739
  Create a subtree to run the workflow and namespace the outputs.
2566
2740
  """
2567
2741
 
2568
- # The root workflow names the root namespace
2569
- super().__init__(workflow.name, execution_dir, **kwargs)
2742
+ # The root workflow names the root namespace and task path.
2743
+ super().__init__(workflow.name, workflow.name, wdl_options=wdl_options, **kwargs)
2570
2744
 
2571
2745
  self._workflow = workflow
2572
2746
  self._inputs = inputs
@@ -2580,7 +2754,7 @@ class WDLRootJob(WDLSectionJob):
2580
2754
 
2581
2755
  # Run the workflow. We rely in this to handle entering the input
2582
2756
  # namespace if needed, or handling free-floating inputs.
2583
- workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._execution_dir)
2757
+ workflow_job = WDLWorkflowJob(self._workflow, [self._inputs], [self._workflow.name], self._namespace, self._task_path, wdl_options=self._wdl_options)
2584
2758
  workflow_job.then_namespace(self._namespace)
2585
2759
  self.addChild(workflow_job)
2586
2760
  self.defer_postprocessing(workflow_job)
@@ -2642,6 +2816,9 @@ def main() -> None:
2642
2816
  # If we don't have a directory assigned, make one in the current directory.
2643
2817
  output_directory: str = options.output_directory if options.output_directory else mkdtemp(prefix='wdl-out-', dir=os.getcwd())
2644
2818
 
2819
+ # Get the execution directory
2820
+ execution_dir = os.getcwd()
2821
+
2645
2822
  with Toil(options) as toil:
2646
2823
  if options.restart:
2647
2824
  output_bindings = toil.restart()
@@ -2668,7 +2845,7 @@ def main() -> None:
2668
2845
  raise WDL.Error.ValidationError(WDL.Error.SourcePosition(options.inputs_uri, inputs_abspath, e.lineno, e.colno, e.lineno, e.colno + 1), "Cannot parse input JSON: " + e.msg) from e
2669
2846
  else:
2670
2847
  inputs = {}
2671
-
2848
+
2672
2849
  # Parse out the available and required inputs. Each key in the
2673
2850
  # JSON ought to start with the workflow's name and then a .
2674
2851
  # TODO: WDL's Bindings[] isn't variant in the right way, so we
@@ -2703,8 +2880,14 @@ def main() -> None:
2703
2880
  # Get the execution directory
2704
2881
  execution_dir = os.getcwd()
2705
2882
 
2883
+ # Configure workflow interpreter options
2884
+ wdl_options: Dict[str, str] = {}
2885
+ wdl_options["execution_dir"] = execution_dir
2886
+ wdl_options["container"] = options.container
2887
+ assert wdl_options.get("container") is not None
2888
+
2706
2889
  # Run the workflow and get its outputs namespaced with the workflow name.
2707
- root_job = WDLRootJob(document.workflow, input_bindings, execution_dir)
2890
+ root_job = WDLRootJob(document.workflow, input_bindings, wdl_options=wdl_options)
2708
2891
  output_bindings = toil.start(root_job)
2709
2892
  if not isinstance(output_bindings, WDL.Env.Bindings):
2710
2893
  raise RuntimeError("The output of the WDL job is not a binding.")
@@ -2716,38 +2899,7 @@ def main() -> None:
2716
2899
  'devirtualize' a file using the "toil" object instead of a filestore.
2717
2900
  Returns its local path.
2718
2901
  """
2719
- if is_url(filename):
2720
- if filename.startswith(TOIL_URI_SCHEME):
2721
- # This is a reference to the Toil filestore.
2722
- # Deserialize the FileID and required basename
2723
- file_id, parent_id, file_basename = unpack_toil_uri(filename)
2724
- else:
2725
- # Parse the URL and extract the basename
2726
- file_basename = os.path.basename(urlsplit(filename).path)
2727
-
2728
- # Figure out where it should go.
2729
- # If a UUID is included, it will be omitted
2730
- # TODO: Deal with name collisions in the export directory
2731
- dest_name = os.path.join(output_directory, file_basename)
2732
-
2733
- if filename.startswith(TOIL_URI_SCHEME):
2734
- # Export the file
2735
- toil.export_file(file_id, dest_name)
2736
- else:
2737
- # Download to a local file with the right name and execute bit.
2738
- # Open it exclusively
2739
- with open(dest_name, 'xb') as dest_file:
2740
- # And save to it
2741
- size, executable = AbstractJobStore.read_from_url(filename, dest_file)
2742
- if executable:
2743
- # Set the execute bit in the file's permissions
2744
- os.chmod(dest_name, os.stat(dest_name).st_mode | stat.S_IXUSR)
2745
-
2746
- # And return where we put it
2747
- return dest_name
2748
- else:
2749
- # We already had a path
2750
- return filename
2902
+ return ToilWDLStdLibBase.devirtualze_to(filename, output_directory, toil, execution_dir)
2751
2903
 
2752
2904
  # Make all the files local files
2753
2905
  output_bindings = map_over_files_in_bindings(output_bindings, devirtualize_output)