toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/__init__.py CHANGED
@@ -20,6 +20,7 @@ import sys
20
20
  import time
21
21
  from datetime import datetime
22
22
  from typing import TYPE_CHECKING, Optional, Tuple
23
+
23
24
  import requests
24
25
  from pytz import timezone
25
26
 
@@ -106,7 +107,8 @@ def toilPackageDirPath() -> str:
106
107
  The return value is guaranteed to end in '/toil'.
107
108
  """
108
109
  result = os.path.dirname(os.path.realpath(__file__))
109
- assert result.endswith('/toil')
110
+ if not result.endswith('/toil'):
111
+ raise RuntimeError("The top-level toil package is not named Toil.")
110
112
  return result
111
113
 
112
114
 
@@ -132,7 +134,8 @@ def resolveEntryPoint(entryPoint: str) -> str:
132
134
  # opposed to being included via --system-site-packages). For clusters this means that
133
135
  # if Toil is installed in a virtualenv on the leader, it must be installed in
134
136
  # a virtualenv located at the same path on each worker as well.
135
- assert os.access(path, os.X_OK)
137
+ if not os.access(path, os.X_OK):
138
+ raise RuntimeError("Cannot access the Toil virtualenv. If installed in a virtualenv on a cluster, make sure that the virtualenv path is the same for the leader and workers.")
136
139
  return path
137
140
  # Otherwise, we aren't in a virtualenv, or we're in a virtualenv but Toil
138
141
  # came in via --system-site-packages, or we think the virtualenv might not
@@ -238,7 +241,8 @@ def customInitCmd() -> str:
238
241
 
239
242
  def _check_custom_bash_cmd(cmd_str):
240
243
  """Ensure that the Bash command doesn't contain invalid characters."""
241
- assert not re.search(r'[\n\r\t]', cmd_str), f'"{cmd_str}" contains invalid characters (newline and/or tab).'
244
+ if re.search(r'[\n\r\t]', cmd_str):
245
+ raise RuntimeError(f'"{cmd_str}" contains invalid characters (newline and/or tab).')
242
246
 
243
247
 
244
248
  def lookupEnvVar(name: str, envName: str, defaultValue: str) -> str:
@@ -370,11 +374,10 @@ def requestCheckRegularDocker(origAppliance: str, registryName: str, imageName:
370
374
  separate check is done for docker.io images.
371
375
 
372
376
  :param origAppliance: The full url of the docker image originally
373
- specified by the user (or the default).
374
- e.g. ``quay.io/ucsc_cgl/toil:latest``
375
- :param registryName: The url of a docker image's registry. e.g. ``quay.io``
376
- :param imageName: The image, including path and excluding the tag. e.g. ``ucsc_cgl/toil``
377
- :param tag: The tag used at that docker image's registry. e.g. ``latest``
377
+ specified by the user (or the default). For example, ``quay.io/ucsc_cgl/toil:latest``.
378
+ :param registryName: The url of a docker image's registry. For example, ``quay.io``.
379
+ :param imageName: The image, including path and excluding the tag. For example, ``ucsc_cgl/toil``.
380
+ :param tag: The tag used at that docker image's registry. For example, ``latest``.
378
381
  :raises: ApplianceImageNotFound if no match is found.
379
382
  :return: Return True if match found.
380
383
  """
@@ -399,9 +402,9 @@ def requestCheckDockerIo(origAppliance: str, imageName: str, tag: str) -> bool:
399
402
  URL is based on the docker v2 schema. Requires that an access token be fetched first.
400
403
 
401
404
  :param origAppliance: The full url of the docker image originally
402
- specified by the user (or the default). e.g. "ubuntu:latest"
403
- :param imageName: The image, including path and excluding the tag. e.g. "ubuntu"
404
- :param tag: The tag used at that docker image's registry. e.g. "latest"
405
+ specified by the user (or the default). For example, ``ubuntu:latest``.
406
+ :param imageName: The image, including path and excluding the tag. For example, ``ubuntu``.
407
+ :param tag: The tag used at that docker image's registry. For example, ``latest``.
405
408
  :raises: ApplianceImageNotFound if no match is found.
406
409
  :return: Return True if match found.
407
410
  """
@@ -548,7 +551,8 @@ try:
548
551
  So if we ever want to refresh, Boto 3 wants to refresh too.
549
552
  """
550
553
  # This should only happen if we have expiring credentials, which we should only get from boto3
551
- assert (self._boto3_resolver is not None)
554
+ if self._boto3_resolver is None:
555
+ raise RuntimeError("The Boto3 resolver should not be None.")
552
556
 
553
557
  self._obtain_credentials_from_cache_or_boto3()
554
558
 
@@ -612,7 +616,8 @@ try:
612
616
  content = f.read()
613
617
  if content:
614
618
  record = content.split('\n')
615
- assert len(record) == 4
619
+ if len(record) != 4:
620
+ raise RuntimeError("Number of cached credentials is not 4.")
616
621
  self._access_key = record[0]
617
622
  self._secret_key = record[1]
618
623
  self._security_token = record[2]
@@ -15,19 +15,19 @@ import enum
15
15
  import logging
16
16
  import os
17
17
  import shutil
18
+ import time
18
19
  from abc import ABC, abstractmethod
19
20
  from argparse import ArgumentParser, _ArgumentGroup
20
21
  from contextlib import contextmanager
21
22
  from threading import Condition
22
- import time
23
23
  from typing import (Any,
24
24
  ContextManager,
25
25
  Dict,
26
- List,
27
- Set,
28
26
  Iterator,
27
+ List,
29
28
  NamedTuple,
30
29
  Optional,
30
+ Set,
31
31
  Union,
32
32
  cast)
33
33
 
@@ -37,6 +37,7 @@ from toil.common import Config, Toil, cacheDirName
37
37
  from toil.deferred import DeferredFunctionManager
38
38
  from toil.fileStores.abstractFileStore import AbstractFileStore
39
39
  from toil.job import JobDescription, ParsedRequirement, Requirer
40
+ from toil.lib.memoize import memoize
40
41
  from toil.resource import Resource
41
42
 
42
43
  logger = logging.getLogger(__name__)
@@ -106,6 +107,8 @@ class AbstractBatchSystem(ABC):
106
107
  @abstractmethod
107
108
  def supportsWorkerCleanup(cls) -> bool:
108
109
  """
110
+ Whether this batch system supports worker cleanup.
111
+
109
112
  Indicates whether this batch system invokes
110
113
  :meth:`BatchSystemSupport.workerCleanup` after the last job for a
111
114
  particular workflow invocation finishes. Note that the term *worker*
@@ -119,7 +122,9 @@ class AbstractBatchSystem(ABC):
119
122
 
120
123
  def setUserScript(self, userScript: Resource) -> None:
121
124
  """
122
- Set the user script for this workflow. This method must be called before the first job is
125
+ Set the user script for this workflow.
126
+
127
+ This method must be called before the first job is
123
128
  issued to this batch system, and only if :meth:`.supportsAutoDeployment` returns True,
124
129
  otherwise it will raise an exception.
125
130
 
@@ -134,7 +139,6 @@ class AbstractBatchSystem(ABC):
134
139
  bus, so that it can send informational messages about the jobs it is
135
140
  running to other Toil components.
136
141
  """
137
- pass
138
142
 
139
143
  @abstractmethod
140
144
  def issueBatchJob(self, jobDesc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int:
@@ -263,7 +267,6 @@ class AbstractBatchSystem(ABC):
263
267
  setOption(option_name, parsing_function=None, check_function=None, default=None, env=None)
264
268
  returning nothing, used to update run configuration as a side effect.
265
269
  """
266
- pass
267
270
 
268
271
  def getWorkerContexts(self) -> List[ContextManager[Any]]:
269
272
  """
@@ -372,7 +375,7 @@ class BatchSystemSupport(AbstractBatchSystem):
372
375
  :param name: the environment variable to be set on the worker.
373
376
 
374
377
  :param value: if given, the environment variable given by name will be set to this value.
375
- if None, the variable's current value will be used as the value on the worker
378
+ If None, the variable's current value will be used as the value on the worker
376
379
 
377
380
  :raise RuntimeError: if value is None and the name cannot be found in the environment
378
381
  """
@@ -392,6 +395,7 @@ class BatchSystemSupport(AbstractBatchSystem):
392
395
  # We do in fact send messages to the message bus.
393
396
  self._outbox = message_bus.outbox()
394
397
 
398
+ @memoize
395
399
  def get_batch_logs_dir(self) -> str:
396
400
  """
397
401
  Get the directory where the backing batch system should save its logs.
@@ -404,6 +408,9 @@ class BatchSystemSupport(AbstractBatchSystem):
404
408
  """
405
409
  if self.config.batch_logs_dir:
406
410
  # Use what is specified
411
+ if not os.path.isdir(self.config.batch_logs_dir):
412
+ # But if it doesn't exist, make it exist
413
+ os.makedirs(self.config.batch_logs_dir, exist_ok=True)
407
414
  return self.config.batch_logs_dir
408
415
  # And if nothing is specified use the workDir.
409
416
  return Toil.getToilWorkDir(self.config.workDir)
@@ -442,7 +449,9 @@ class BatchSystemSupport(AbstractBatchSystem):
442
449
  @staticmethod
443
450
  def workerCleanup(info: WorkerCleanupInfo) -> None:
444
451
  """
445
- Cleans up the worker node on batch system shutdown. Also see :meth:`supportsWorkerCleanup`.
452
+ Cleans up the worker node on batch system shutdown.
453
+
454
+ Also see :meth:`supportsWorkerCleanup`.
446
455
 
447
456
  :param WorkerCleanupInfo info: A named tuple consisting of all the relevant information
448
457
  for cleaning up the worker.
@@ -498,8 +507,10 @@ class NodeInfo:
498
507
 
499
508
  class AbstractScalableBatchSystem(AbstractBatchSystem):
500
509
  """
501
- A batch system that supports a variable number of worker nodes. Used by :class:`toil.
502
- provisioners.clusterScaler.ClusterScaler` to scale the number of worker nodes in the cluster
510
+ A batch system that supports a variable number of worker nodes.
511
+
512
+ Used by :class:`toil.provisioners.clusterScaler.ClusterScaler`
513
+ to scale the number of worker nodes in the cluster
503
514
  up or down depending on overall load.
504
515
  """
505
516
 
@@ -17,13 +17,13 @@ from abc import ABCMeta, abstractmethod
17
17
  from datetime import datetime
18
18
  from queue import Empty, Queue
19
19
  from threading import Lock, Thread
20
- from typing import Any, Dict, List, Optional, Tuple, Union
20
+ from typing import Dict, List, Optional, Tuple, Union
21
21
 
22
22
  from toil.batchSystems.abstractBatchSystem import (BatchJobExitReason,
23
23
  UpdatedBatchJobInfo)
24
24
  from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
25
25
  from toil.bus import ExternalBatchIdMessage
26
- from toil.job import JobDescription, AcceleratorRequirement
26
+ from toil.job import AcceleratorRequirement
27
27
  from toil.lib.misc import CalledProcessErrorStderr
28
28
 
29
29
  logger = logging.getLogger(__name__)
@@ -34,25 +34,25 @@ import tempfile
34
34
  import time
35
35
  import uuid
36
36
  from argparse import ArgumentParser, _ArgumentGroup
37
- from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Union
37
+ from typing import Any, Dict, Iterator, List, Optional, Set, Union
38
38
 
39
39
  from boto.exception import BotoServerError
40
40
 
41
41
  from toil import applianceSelf
42
42
  from toil.batchSystems.abstractBatchSystem import (EXIT_STATUS_UNAVAILABLE_VALUE,
43
43
  BatchJobExitReason,
44
- UpdatedBatchJobInfo,
45
- InsufficientSystemResources)
46
- from toil.batchSystems.options import OptionSetter
44
+ InsufficientSystemResources,
45
+ UpdatedBatchJobInfo)
47
46
  from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
48
47
  from toil.batchSystems.contained_executor import pack_job
49
- from toil.bus import ExternalBatchIdMessage, MessageBus, MessageOutbox
48
+ from toil.batchSystems.options import OptionSetter
49
+ from toil.bus import ExternalBatchIdMessage
50
50
  from toil.common import Config, Toil
51
51
  from toil.job import JobDescription, Requirer
52
- from toil.lib.aws import get_current_aws_region, zone_to_region
52
+ from toil.lib.aws import get_current_aws_region
53
53
  from toil.lib.aws.session import establish_boto3_session
54
- from toil.lib.conversions import b_to_mib, mib_to_b
55
- from toil.lib.misc import slow_down, unix_now_ms, utc_now
54
+ from toil.lib.conversions import b_to_mib
55
+ from toil.lib.misc import slow_down, unix_now_ms
56
56
  from toil.lib.retry import retry
57
57
  from toil.resource import Resource
58
58
 
@@ -559,17 +559,17 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
559
559
 
560
560
  @classmethod
561
561
  def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
562
- parser.add_argument("--awsBatchRegion", dest="aws_batch_region", default=None,
562
+ parser.add_argument("--awsBatchRegion", dest="aws_batch_region", default=None, env_var="TOIL_AWS_REGION",
563
563
  help="The AWS region containing the AWS Batch queue to submit to.")
564
- parser.add_argument("--awsBatchQueue", dest="aws_batch_queue", default=None,
564
+ parser.add_argument("--awsBatchQueue", dest="aws_batch_queue", default=None, env_var="TOIL_AWS_BATCH_QUEUE",
565
565
  help="The name or ARN of the AWS Batch queue to submit to.")
566
- parser.add_argument("--awsBatchJobRoleArn", dest="aws_batch_job_role_arn", default=None,
566
+ parser.add_argument("--awsBatchJobRoleArn", dest="aws_batch_job_role_arn", default=None, env_var="TOIL_AWS_BATCH_JOB_ROLE_ARN",
567
567
  help=("The ARN of an IAM role to run AWS Batch jobs as, so they "
568
568
  "can e.g. access a job store. Must be assumable by "
569
569
  "ecs-tasks.amazonaws.com."))
570
570
 
571
571
  @classmethod
572
572
  def setOptions(cls, setOption: OptionSetter) -> None:
573
- setOption("aws_batch_region", default=None)
574
- setOption("aws_batch_queue", default=None, env=["TOIL_AWS_BATCH_QUEUE"])
575
- setOption("aws_batch_job_role_arn", default=None, env=["TOIL_AWS_BATCH_JOB_ROLE_ARN"])
573
+ setOption("aws_batch_region")
574
+ setOption("aws_batch_queue")
575
+ setOption("aws_batch_job_role_arn")
@@ -14,7 +14,7 @@
14
14
  """
15
15
  Executor for running inside a container.
16
16
 
17
- Useful for Kubernetes and TES batch systems.
17
+ Useful for Kubernetes batch system and TES batch system plugin.
18
18
  """
19
19
  import base64
20
20
  import logging
@@ -39,10 +39,10 @@ def pack_job(job_desc: JobDescription, user_script: Optional[Resource] = None, e
39
39
  :param job_desc: Job description for the job to run.
40
40
  :param user_script: User script that will be loaded before the job is run.
41
41
  :param environment: Environment variable dict that will be applied before
42
- the job is run.
42
+ the job is run.
43
43
 
44
44
  :returns: Command to run the job, as an argument list that can be run
45
- inside the Toil appliance container.
45
+ inside the Toil appliance container.
46
46
  """
47
47
  # Make a job dict to send to the executor.
48
48
  # TODO: Factor out executor setup from here and Kubernetes and TES
@@ -24,7 +24,6 @@ import htcondor
24
24
 
25
25
  from toil.batchSystems.abstractGridEngineBatchSystem import \
26
26
  AbstractGridEngineBatchSystem
27
-
28
27
  from toil.job import AcceleratorRequirement
29
28
  from toil.lib.retry import retry
30
29
 
@@ -24,22 +24,22 @@ import datetime
24
24
  import logging
25
25
  import math
26
26
  import os
27
- from queue import Empty, Queue
28
27
  import string
29
28
  import sys
30
29
  import tempfile
31
- from threading import Event, Thread, Condition, RLock
32
30
  import time
33
31
  import uuid
34
32
  from argparse import ArgumentParser, _ArgumentGroup
33
+ from queue import Empty, Queue
34
+ from threading import Condition, Event, RLock, Thread
35
35
  from typing import (Any,
36
36
  Callable,
37
37
  Dict,
38
38
  Iterator,
39
39
  List,
40
- Set,
41
40
  Literal,
42
41
  Optional,
42
+ Set,
43
43
  Tuple,
44
44
  Type,
45
45
  TypeVar,
@@ -104,7 +104,8 @@ from toil.batchSystems.abstractBatchSystem import (EXIT_STATUS_UNAVAILABLE_VALUE
104
104
  from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
105
105
  from toil.batchSystems.contained_executor import pack_job
106
106
  from toil.batchSystems.options import OptionSetter
107
- from toil.common import Config, Toil, SYS_MAX_SIZE
107
+ from toil.common import Config, Toil
108
+ from toil.options.common import SYS_MAX_SIZE
108
109
  from toil.job import JobDescription, Requirer
109
110
  from toil.lib.conversions import human2bytes
110
111
  from toil.lib.misc import get_user_name, slow_down, utc_now
@@ -152,6 +153,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
152
153
  super().__init__(config, maxCores, maxMemory, maxDisk)
153
154
 
154
155
  # Re-type the config to make sure it has all the fields we need.
156
+ # This convinces MyPy we really do have this type.
155
157
  assert isinstance(config, KubernetesBatchSystem.KubernetesConfig)
156
158
 
157
159
  # Turn down log level for Kubernetes modules and dependencies.
@@ -167,26 +169,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
167
169
  self._apis: KubernetesBatchSystem._ApiStorageDict = {}
168
170
 
169
171
  # Get our namespace (and our Kubernetes credentials to make sure they exist)
170
- self.namespace = self._api('namespace')
172
+ self.namespace: str = self._api('namespace')
171
173
 
172
174
  # Decide if we are going to mount a Kubernetes host path as the Toil
173
175
  # work dir in the workers, for shared caching.
174
- self.host_path = config.kubernetes_host_path
176
+ self.host_path: Optional[str] = config.kubernetes_host_path
175
177
 
176
178
  # Get the service account name to use, if any.
177
- self.service_account = config.kubernetes_service_account
179
+ self.service_account: Optional[str] = config.kubernetes_service_account
178
180
 
179
181
  # Get how long we should wait for a pod that lands on a node to
180
182
  # actually start.
181
- self.pod_timeout = config.kubernetes_pod_timeout
183
+ self.pod_timeout: float = config.kubernetes_pod_timeout
182
184
 
183
185
  # Get the username to mark jobs with
184
- username = config.kubernetes_owner
186
+ username = config.kubernetes_owner or self.get_default_kubernetes_owner()
185
187
  # And a unique ID for the run
186
188
  self.unique_id = uuid.uuid4()
187
189
 
188
190
  # Create a prefix for jobs, starting with our username
189
- self.job_prefix = f'{username}-toil-{self.unique_id}-'
191
+ self.job_prefix: str = f'{username}-toil-{self.unique_id}-'
190
192
  # Instead of letting Kubernetes assign unique job names, we assign our
191
193
  # own based on a numerical job ID. This functionality is managed by the
192
194
  # BatchSystemLocalSupport.
@@ -199,17 +201,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
199
201
  # conformance tests. To work around this, we tag all our jobs with an
200
202
  # explicit TTL that is long enough that we're sure we can deal with all
201
203
  # the finished jobs before they expire.
202
- self.finished_job_ttl = 3600 # seconds
204
+ self.finished_job_ttl: int = 3600 # seconds
203
205
 
204
206
  # Here is where we will store the user script resource object if we get one.
205
207
  self.user_script: Optional[Resource] = None
206
208
 
207
209
  # Ge the image to deploy from Toil's configuration
208
- self.docker_image = applianceSelf()
210
+ self.docker_image: str = applianceSelf()
209
211
 
210
212
  # Try and guess what Toil work dir the workers will use.
211
213
  # We need to be able to provision (possibly shared) space there.
212
- self.worker_work_dir = Toil.getToilWorkDir(config.workDir)
214
+ self.worker_work_dir: str = Toil.getToilWorkDir(config.workDir)
213
215
  if (config.workDir is None and
214
216
  os.getenv('TOIL_WORKDIR') is None and
215
217
  self.worker_work_dir == tempfile.gettempdir()):
@@ -226,17 +228,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
226
228
  self.environment['TMPDIR'] = '/var/tmp'
227
229
 
228
230
  # Get the name of the AWS secret, if any, to mount in containers.
229
- self.aws_secret_name = os.environ.get("TOIL_AWS_SECRET_NAME", None)
231
+ self.aws_secret_name: Optional[str] = os.environ.get("TOIL_AWS_SECRET_NAME", None)
230
232
 
231
233
  # Set this to True to enable the experimental wait-for-job-update code
232
- self.enable_watching = os.environ.get("KUBE_WATCH_ENABLED", False)
234
+ self.enable_watching: bool = os.environ.get("KUBE_WATCH_ENABLED", False)
233
235
 
234
236
  # This will be a label to select all our jobs.
235
- self.run_id = f'toil-{self.unique_id}'
237
+ self.run_id: str = f'toil-{self.unique_id}'
236
238
 
237
239
  # Keep track of available resources.
238
240
  maxMillicores = int(SYS_MAX_SIZE if self.maxCores == SYS_MAX_SIZE else self.maxCores * 1000)
239
- self.resource_sources = [
241
+ self.resource_sources: List[ResourcePool] = [
240
242
  # A pool representing available job slots
241
243
  ResourcePool(self.config.max_jobs, 'job slots'),
242
244
  # A pool representing available CPU in units of millicores (1 CPU
@@ -261,16 +263,16 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
261
263
  self._killed_queue_jobs: Set[int] = set()
262
264
 
263
265
  # We use this event to signal shutdown
264
- self._shutting_down = Event()
266
+ self._shutting_down: Event = Event()
265
267
 
266
268
  # A lock to protect critical regions when working with queued jobs.
267
- self._mutex = RLock()
269
+ self._mutex: RLock = RLock()
268
270
 
269
271
  # A condition set to true when there is more work to do. e.g.: new job
270
272
  # in the queue or any resource becomes available.
271
- self._work_available = Condition(lock=self._mutex)
273
+ self._work_available: Condition = Condition(lock=self._mutex)
272
274
 
273
- self.schedulingThread = Thread(target=self._scheduler, daemon=True)
275
+ self.schedulingThread: Thread = Thread(target=self._scheduler, daemon=True)
274
276
  self.schedulingThread.start()
275
277
 
276
278
  def _pretty_print(self, kubernetes_object: Any) -> str:
@@ -1864,24 +1866,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
1864
1866
 
1865
1867
  @classmethod
1866
1868
  def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
1867
- parser.add_argument("--kubernetesHostPath", dest="kubernetes_host_path", default=None,
1869
+ parser.add_argument("--kubernetesHostPath", dest="kubernetes_host_path", default=None, env_var="TOIL_KUBERNETES_HOST_PATH",
1868
1870
  help="Path on Kubernetes hosts to use as shared inter-pod temp directory. "
1869
1871
  "(default: %(default)s)")
1870
- parser.add_argument("--kubernetesOwner", dest="kubernetes_owner", default=cls.get_default_kubernetes_owner(),
1871
- help="Username to mark Kubernetes jobs with. "
1872
- "(default: %(default)s)")
1873
- parser.add_argument("--kubernetesServiceAccount", dest="kubernetes_service_account", default=None,
1872
+ parser.add_argument("--kubernetesOwner", dest="kubernetes_owner", default=None, env_var="TOIL_KUBERNETES_OWNER",
1873
+ help=f"Username to mark Kubernetes jobs with. If the provided value is None, the value will "
1874
+ f"be generated at runtime. "
1875
+ f"(Generated default: {cls.get_default_kubernetes_owner()})")
1876
+ parser.add_argument("--kubernetesServiceAccount", dest="kubernetes_service_account", default=None, env_var="TOIL_KUBERNETES_SERVICE_ACCOUNT",
1874
1877
  help="Service account to run jobs as. "
1875
1878
  "(default: %(default)s)")
1876
- parser.add_argument("--kubernetesPodTimeout", dest="kubernetes_pod_timeout", default=120,
1879
+ parser.add_argument("--kubernetesPodTimeout", dest="kubernetes_pod_timeout", default=120, env_var="TOIL_KUBERNETES_POD_TIMEOUT", type=float,
1877
1880
  help="Seconds to wait for a scheduled Kubernetes pod to start running. "
1878
1881
  "(default: %(default)s)")
1879
1882
 
1880
1883
  OptionType = TypeVar('OptionType')
1881
1884
  @classmethod
1882
1885
  def setOptions(cls, setOption: OptionSetter) -> None:
1883
- setOption("kubernetes_host_path", default=None, env=['TOIL_KUBERNETES_HOST_PATH'])
1884
- setOption("kubernetes_owner", default=cls.get_default_kubernetes_owner(), env=['TOIL_KUBERNETES_OWNER'])
1885
- setOption("kubernetes_service_account", default=None, env=['TOIL_KUBERNETES_SERVICE_ACCOUNT'])
1886
- setOption("kubernetes_pod_timeout", default=120, env=['TOIL_KUBERNETES_POD_TIMEOUT'])
1886
+ setOption("kubernetes_host_path")
1887
+ setOption("kubernetes_owner")
1888
+ setOption("kubernetes_service_account",)
1889
+ setOption("kubernetes_pod_timeout")
1887
1890
 
@@ -19,6 +19,7 @@ from toil.batchSystems.abstractBatchSystem import (BatchSystemSupport,
19
19
  from toil.batchSystems.singleMachine import SingleMachineBatchSystem
20
20
  from toil.common import Config
21
21
  from toil.job import JobDescription
22
+ from toil.lib.threading import cpu_count
22
23
 
23
24
  logger = logging.getLogger(__name__)
24
25
 
@@ -28,8 +29,9 @@ class BatchSystemLocalSupport(BatchSystemSupport):
28
29
 
29
30
  def __init__(self, config: Config, maxCores: float, maxMemory: int, maxDisk: int) -> None:
30
31
  super().__init__(config, maxCores, maxMemory, maxDisk)
32
+ max_local_jobs = config.max_local_jobs if config.max_local_jobs is not None else cpu_count()
31
33
  self.localBatch: SingleMachineBatchSystem = SingleMachineBatchSystem(
32
- config, maxCores, maxMemory, maxDisk, max_jobs=config.max_local_jobs
34
+ config, maxCores, maxMemory, maxDisk, max_jobs=max_local_jobs
33
35
  )
34
36
 
35
37
  def handleLocalJob(self, jobDesc: JobDescription) -> Optional[int]:
@@ -18,11 +18,9 @@ import os
18
18
  import pickle
19
19
  import pwd
20
20
  import socket
21
- import sys
22
21
  import time
23
22
  import traceback
24
23
  from argparse import ArgumentParser, _ArgumentGroup
25
- from contextlib import contextmanager
26
24
  from queue import Empty, Queue
27
25
  from typing import Dict, Optional, Union
28
26
  from urllib.parse import quote_plus
@@ -93,7 +91,7 @@ class MesosBatchSystem(BatchSystemLocalSupport,
93
91
  self.jobQueues = JobQueue()
94
92
 
95
93
  # Address of the Mesos master in the form host:port where host can be an IP or a hostname
96
- self.mesos_endpoint = config.mesos_endpoint
94
+ self.mesos_endpoint = config.mesos_endpoint or self.get_default_mesos_endpoint()
97
95
  if config.mesos_role is not None:
98
96
  self.mesos_role = config.mesos_role
99
97
  self.mesos_name = config.mesos_name
@@ -846,8 +844,10 @@ class MesosBatchSystem(BatchSystemLocalSupport,
846
844
 
847
845
  @classmethod
848
846
  def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
849
- parser.add_argument("--mesosEndpoint", "--mesosMaster", dest="mesos_endpoint", default=cls.get_default_mesos_endpoint(),
850
- help="The host and port of the Mesos master separated by colon. (default: %(default)s)")
847
+ parser.add_argument("--mesosEndpoint", "--mesosMaster", dest="mesos_endpoint", default=None,
848
+ help=f"The host and port of the Mesos master separated by colon. If the provided value "
849
+ f"is None, the value will be generated at runtime. "
850
+ f"(Generated default: {cls.get_default_mesos_endpoint})")
851
851
  parser.add_argument("--mesosFrameworkId", dest="mesos_framework_id",
852
852
  help="Use a specific Mesos framework ID.")
853
853
  parser.add_argument("--mesosRole", dest="mesos_role",
@@ -857,8 +857,8 @@ class MesosBatchSystem(BatchSystemLocalSupport,
857
857
 
858
858
  @classmethod
859
859
  def setOptions(cls, setOption: OptionSetter):
860
- setOption("mesos_endpoint", None, None, cls.get_default_mesos_endpoint(), old_names=["mesosMasterAddress"])
861
- setOption("mesos_name", None, None, "toil")
860
+ setOption("mesos_endpoint", old_names=["mesosMasterAddress"])
861
+ setOption("mesos_name")
862
862
  setOption("mesos_role")
863
863
  setOption("mesos_framework_id")
864
864