toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/__init__.py
CHANGED
|
@@ -20,6 +20,7 @@ import sys
|
|
|
20
20
|
import time
|
|
21
21
|
from datetime import datetime
|
|
22
22
|
from typing import TYPE_CHECKING, Optional, Tuple
|
|
23
|
+
|
|
23
24
|
import requests
|
|
24
25
|
from pytz import timezone
|
|
25
26
|
|
|
@@ -106,7 +107,8 @@ def toilPackageDirPath() -> str:
|
|
|
106
107
|
The return value is guaranteed to end in '/toil'.
|
|
107
108
|
"""
|
|
108
109
|
result = os.path.dirname(os.path.realpath(__file__))
|
|
109
|
-
|
|
110
|
+
if not result.endswith('/toil'):
|
|
111
|
+
raise RuntimeError("The top-level toil package is not named Toil.")
|
|
110
112
|
return result
|
|
111
113
|
|
|
112
114
|
|
|
@@ -132,7 +134,8 @@ def resolveEntryPoint(entryPoint: str) -> str:
|
|
|
132
134
|
# opposed to being included via --system-site-packages). For clusters this means that
|
|
133
135
|
# if Toil is installed in a virtualenv on the leader, it must be installed in
|
|
134
136
|
# a virtualenv located at the same path on each worker as well.
|
|
135
|
-
|
|
137
|
+
if not os.access(path, os.X_OK):
|
|
138
|
+
raise RuntimeError("Cannot access the Toil virtualenv. If installed in a virtualenv on a cluster, make sure that the virtualenv path is the same for the leader and workers.")
|
|
136
139
|
return path
|
|
137
140
|
# Otherwise, we aren't in a virtualenv, or we're in a virtualenv but Toil
|
|
138
141
|
# came in via --system-site-packages, or we think the virtualenv might not
|
|
@@ -238,7 +241,8 @@ def customInitCmd() -> str:
|
|
|
238
241
|
|
|
239
242
|
def _check_custom_bash_cmd(cmd_str):
|
|
240
243
|
"""Ensure that the Bash command doesn't contain invalid characters."""
|
|
241
|
-
|
|
244
|
+
if re.search(r'[\n\r\t]', cmd_str):
|
|
245
|
+
raise RuntimeError(f'"{cmd_str}" contains invalid characters (newline and/or tab).')
|
|
242
246
|
|
|
243
247
|
|
|
244
248
|
def lookupEnvVar(name: str, envName: str, defaultValue: str) -> str:
|
|
@@ -370,11 +374,10 @@ def requestCheckRegularDocker(origAppliance: str, registryName: str, imageName:
|
|
|
370
374
|
separate check is done for docker.io images.
|
|
371
375
|
|
|
372
376
|
:param origAppliance: The full url of the docker image originally
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
:param
|
|
376
|
-
:param
|
|
377
|
-
:param tag: The tag used at that docker image's registry. e.g. ``latest``
|
|
377
|
+
specified by the user (or the default). For example, ``quay.io/ucsc_cgl/toil:latest``.
|
|
378
|
+
:param registryName: The url of a docker image's registry. For example, ``quay.io``.
|
|
379
|
+
:param imageName: The image, including path and excluding the tag. For example, ``ucsc_cgl/toil``.
|
|
380
|
+
:param tag: The tag used at that docker image's registry. For example, ``latest``.
|
|
378
381
|
:raises: ApplianceImageNotFound if no match is found.
|
|
379
382
|
:return: Return True if match found.
|
|
380
383
|
"""
|
|
@@ -399,9 +402,9 @@ def requestCheckDockerIo(origAppliance: str, imageName: str, tag: str) -> bool:
|
|
|
399
402
|
URL is based on the docker v2 schema. Requires that an access token be fetched first.
|
|
400
403
|
|
|
401
404
|
:param origAppliance: The full url of the docker image originally
|
|
402
|
-
|
|
403
|
-
:param imageName: The image, including path and excluding the tag.
|
|
404
|
-
:param tag: The tag used at that docker image's registry.
|
|
405
|
+
specified by the user (or the default). For example, ``ubuntu:latest``.
|
|
406
|
+
:param imageName: The image, including path and excluding the tag. For example, ``ubuntu``.
|
|
407
|
+
:param tag: The tag used at that docker image's registry. For example, ``latest``.
|
|
405
408
|
:raises: ApplianceImageNotFound if no match is found.
|
|
406
409
|
:return: Return True if match found.
|
|
407
410
|
"""
|
|
@@ -548,7 +551,8 @@ try:
|
|
|
548
551
|
So if we ever want to refresh, Boto 3 wants to refresh too.
|
|
549
552
|
"""
|
|
550
553
|
# This should only happen if we have expiring credentials, which we should only get from boto3
|
|
551
|
-
|
|
554
|
+
if self._boto3_resolver is None:
|
|
555
|
+
raise RuntimeError("The Boto3 resolver should not be None.")
|
|
552
556
|
|
|
553
557
|
self._obtain_credentials_from_cache_or_boto3()
|
|
554
558
|
|
|
@@ -612,7 +616,8 @@ try:
|
|
|
612
616
|
content = f.read()
|
|
613
617
|
if content:
|
|
614
618
|
record = content.split('\n')
|
|
615
|
-
|
|
619
|
+
if len(record) != 4:
|
|
620
|
+
raise RuntimeError("Number of cached credentials is not 4.")
|
|
616
621
|
self._access_key = record[0]
|
|
617
622
|
self._secret_key = record[1]
|
|
618
623
|
self._security_token = record[2]
|
|
@@ -15,19 +15,19 @@ import enum
|
|
|
15
15
|
import logging
|
|
16
16
|
import os
|
|
17
17
|
import shutil
|
|
18
|
+
import time
|
|
18
19
|
from abc import ABC, abstractmethod
|
|
19
20
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
20
21
|
from contextlib import contextmanager
|
|
21
22
|
from threading import Condition
|
|
22
|
-
import time
|
|
23
23
|
from typing import (Any,
|
|
24
24
|
ContextManager,
|
|
25
25
|
Dict,
|
|
26
|
-
List,
|
|
27
|
-
Set,
|
|
28
26
|
Iterator,
|
|
27
|
+
List,
|
|
29
28
|
NamedTuple,
|
|
30
29
|
Optional,
|
|
30
|
+
Set,
|
|
31
31
|
Union,
|
|
32
32
|
cast)
|
|
33
33
|
|
|
@@ -37,6 +37,7 @@ from toil.common import Config, Toil, cacheDirName
|
|
|
37
37
|
from toil.deferred import DeferredFunctionManager
|
|
38
38
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
39
39
|
from toil.job import JobDescription, ParsedRequirement, Requirer
|
|
40
|
+
from toil.lib.memoize import memoize
|
|
40
41
|
from toil.resource import Resource
|
|
41
42
|
|
|
42
43
|
logger = logging.getLogger(__name__)
|
|
@@ -106,6 +107,8 @@ class AbstractBatchSystem(ABC):
|
|
|
106
107
|
@abstractmethod
|
|
107
108
|
def supportsWorkerCleanup(cls) -> bool:
|
|
108
109
|
"""
|
|
110
|
+
Whether this batch system supports worker cleanup.
|
|
111
|
+
|
|
109
112
|
Indicates whether this batch system invokes
|
|
110
113
|
:meth:`BatchSystemSupport.workerCleanup` after the last job for a
|
|
111
114
|
particular workflow invocation finishes. Note that the term *worker*
|
|
@@ -119,7 +122,9 @@ class AbstractBatchSystem(ABC):
|
|
|
119
122
|
|
|
120
123
|
def setUserScript(self, userScript: Resource) -> None:
|
|
121
124
|
"""
|
|
122
|
-
Set the user script for this workflow.
|
|
125
|
+
Set the user script for this workflow.
|
|
126
|
+
|
|
127
|
+
This method must be called before the first job is
|
|
123
128
|
issued to this batch system, and only if :meth:`.supportsAutoDeployment` returns True,
|
|
124
129
|
otherwise it will raise an exception.
|
|
125
130
|
|
|
@@ -134,7 +139,6 @@ class AbstractBatchSystem(ABC):
|
|
|
134
139
|
bus, so that it can send informational messages about the jobs it is
|
|
135
140
|
running to other Toil components.
|
|
136
141
|
"""
|
|
137
|
-
pass
|
|
138
142
|
|
|
139
143
|
@abstractmethod
|
|
140
144
|
def issueBatchJob(self, jobDesc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int:
|
|
@@ -263,7 +267,6 @@ class AbstractBatchSystem(ABC):
|
|
|
263
267
|
setOption(option_name, parsing_function=None, check_function=None, default=None, env=None)
|
|
264
268
|
returning nothing, used to update run configuration as a side effect.
|
|
265
269
|
"""
|
|
266
|
-
pass
|
|
267
270
|
|
|
268
271
|
def getWorkerContexts(self) -> List[ContextManager[Any]]:
|
|
269
272
|
"""
|
|
@@ -372,7 +375,7 @@ class BatchSystemSupport(AbstractBatchSystem):
|
|
|
372
375
|
:param name: the environment variable to be set on the worker.
|
|
373
376
|
|
|
374
377
|
:param value: if given, the environment variable given by name will be set to this value.
|
|
375
|
-
|
|
378
|
+
If None, the variable's current value will be used as the value on the worker
|
|
376
379
|
|
|
377
380
|
:raise RuntimeError: if value is None and the name cannot be found in the environment
|
|
378
381
|
"""
|
|
@@ -392,6 +395,7 @@ class BatchSystemSupport(AbstractBatchSystem):
|
|
|
392
395
|
# We do in fact send messages to the message bus.
|
|
393
396
|
self._outbox = message_bus.outbox()
|
|
394
397
|
|
|
398
|
+
@memoize
|
|
395
399
|
def get_batch_logs_dir(self) -> str:
|
|
396
400
|
"""
|
|
397
401
|
Get the directory where the backing batch system should save its logs.
|
|
@@ -404,6 +408,9 @@ class BatchSystemSupport(AbstractBatchSystem):
|
|
|
404
408
|
"""
|
|
405
409
|
if self.config.batch_logs_dir:
|
|
406
410
|
# Use what is specified
|
|
411
|
+
if not os.path.isdir(self.config.batch_logs_dir):
|
|
412
|
+
# But if it doesn't exist, make it exist
|
|
413
|
+
os.makedirs(self.config.batch_logs_dir, exist_ok=True)
|
|
407
414
|
return self.config.batch_logs_dir
|
|
408
415
|
# And if nothing is specified use the workDir.
|
|
409
416
|
return Toil.getToilWorkDir(self.config.workDir)
|
|
@@ -442,7 +449,9 @@ class BatchSystemSupport(AbstractBatchSystem):
|
|
|
442
449
|
@staticmethod
|
|
443
450
|
def workerCleanup(info: WorkerCleanupInfo) -> None:
|
|
444
451
|
"""
|
|
445
|
-
Cleans up the worker node on batch system shutdown.
|
|
452
|
+
Cleans up the worker node on batch system shutdown.
|
|
453
|
+
|
|
454
|
+
Also see :meth:`supportsWorkerCleanup`.
|
|
446
455
|
|
|
447
456
|
:param WorkerCleanupInfo info: A named tuple consisting of all the relevant information
|
|
448
457
|
for cleaning up the worker.
|
|
@@ -498,8 +507,10 @@ class NodeInfo:
|
|
|
498
507
|
|
|
499
508
|
class AbstractScalableBatchSystem(AbstractBatchSystem):
|
|
500
509
|
"""
|
|
501
|
-
A batch system that supports a variable number of worker nodes.
|
|
502
|
-
|
|
510
|
+
A batch system that supports a variable number of worker nodes.
|
|
511
|
+
|
|
512
|
+
Used by :class:`toil.provisioners.clusterScaler.ClusterScaler`
|
|
513
|
+
to scale the number of worker nodes in the cluster
|
|
503
514
|
up or down depending on overall load.
|
|
504
515
|
"""
|
|
505
516
|
|
|
@@ -17,13 +17,13 @@ from abc import ABCMeta, abstractmethod
|
|
|
17
17
|
from datetime import datetime
|
|
18
18
|
from queue import Empty, Queue
|
|
19
19
|
from threading import Lock, Thread
|
|
20
|
-
from typing import
|
|
20
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
21
21
|
|
|
22
22
|
from toil.batchSystems.abstractBatchSystem import (BatchJobExitReason,
|
|
23
23
|
UpdatedBatchJobInfo)
|
|
24
24
|
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
25
25
|
from toil.bus import ExternalBatchIdMessage
|
|
26
|
-
from toil.job import
|
|
26
|
+
from toil.job import AcceleratorRequirement
|
|
27
27
|
from toil.lib.misc import CalledProcessErrorStderr
|
|
28
28
|
|
|
29
29
|
logger = logging.getLogger(__name__)
|
toil/batchSystems/awsBatch.py
CHANGED
|
@@ -34,25 +34,25 @@ import tempfile
|
|
|
34
34
|
import time
|
|
35
35
|
import uuid
|
|
36
36
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
37
|
-
from typing import Any,
|
|
37
|
+
from typing import Any, Dict, Iterator, List, Optional, Set, Union
|
|
38
38
|
|
|
39
39
|
from boto.exception import BotoServerError
|
|
40
40
|
|
|
41
41
|
from toil import applianceSelf
|
|
42
42
|
from toil.batchSystems.abstractBatchSystem import (EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
43
43
|
BatchJobExitReason,
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
from toil.batchSystems.options import OptionSetter
|
|
44
|
+
InsufficientSystemResources,
|
|
45
|
+
UpdatedBatchJobInfo)
|
|
47
46
|
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
48
47
|
from toil.batchSystems.contained_executor import pack_job
|
|
49
|
-
from toil.
|
|
48
|
+
from toil.batchSystems.options import OptionSetter
|
|
49
|
+
from toil.bus import ExternalBatchIdMessage
|
|
50
50
|
from toil.common import Config, Toil
|
|
51
51
|
from toil.job import JobDescription, Requirer
|
|
52
|
-
from toil.lib.aws import get_current_aws_region
|
|
52
|
+
from toil.lib.aws import get_current_aws_region
|
|
53
53
|
from toil.lib.aws.session import establish_boto3_session
|
|
54
|
-
from toil.lib.conversions import b_to_mib
|
|
55
|
-
from toil.lib.misc import slow_down, unix_now_ms
|
|
54
|
+
from toil.lib.conversions import b_to_mib
|
|
55
|
+
from toil.lib.misc import slow_down, unix_now_ms
|
|
56
56
|
from toil.lib.retry import retry
|
|
57
57
|
from toil.resource import Resource
|
|
58
58
|
|
|
@@ -559,17 +559,17 @@ class AWSBatchBatchSystem(BatchSystemCleanupSupport):
|
|
|
559
559
|
|
|
560
560
|
@classmethod
|
|
561
561
|
def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
|
|
562
|
-
parser.add_argument("--awsBatchRegion", dest="aws_batch_region", default=None,
|
|
562
|
+
parser.add_argument("--awsBatchRegion", dest="aws_batch_region", default=None, env_var="TOIL_AWS_REGION",
|
|
563
563
|
help="The AWS region containing the AWS Batch queue to submit to.")
|
|
564
|
-
parser.add_argument("--awsBatchQueue", dest="aws_batch_queue", default=None,
|
|
564
|
+
parser.add_argument("--awsBatchQueue", dest="aws_batch_queue", default=None, env_var="TOIL_AWS_BATCH_QUEUE",
|
|
565
565
|
help="The name or ARN of the AWS Batch queue to submit to.")
|
|
566
|
-
parser.add_argument("--awsBatchJobRoleArn", dest="aws_batch_job_role_arn", default=None,
|
|
566
|
+
parser.add_argument("--awsBatchJobRoleArn", dest="aws_batch_job_role_arn", default=None, env_var="TOIL_AWS_BATCH_JOB_ROLE_ARN",
|
|
567
567
|
help=("The ARN of an IAM role to run AWS Batch jobs as, so they "
|
|
568
568
|
"can e.g. access a job store. Must be assumable by "
|
|
569
569
|
"ecs-tasks.amazonaws.com."))
|
|
570
570
|
|
|
571
571
|
@classmethod
|
|
572
572
|
def setOptions(cls, setOption: OptionSetter) -> None:
|
|
573
|
-
setOption("aws_batch_region"
|
|
574
|
-
setOption("aws_batch_queue"
|
|
575
|
-
setOption("aws_batch_job_role_arn"
|
|
573
|
+
setOption("aws_batch_region")
|
|
574
|
+
setOption("aws_batch_queue")
|
|
575
|
+
setOption("aws_batch_job_role_arn")
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
"""
|
|
15
15
|
Executor for running inside a container.
|
|
16
16
|
|
|
17
|
-
Useful for Kubernetes and TES batch
|
|
17
|
+
Useful for Kubernetes batch system and TES batch system plugin.
|
|
18
18
|
"""
|
|
19
19
|
import base64
|
|
20
20
|
import logging
|
|
@@ -39,10 +39,10 @@ def pack_job(job_desc: JobDescription, user_script: Optional[Resource] = None, e
|
|
|
39
39
|
:param job_desc: Job description for the job to run.
|
|
40
40
|
:param user_script: User script that will be loaded before the job is run.
|
|
41
41
|
:param environment: Environment variable dict that will be applied before
|
|
42
|
-
|
|
42
|
+
the job is run.
|
|
43
43
|
|
|
44
44
|
:returns: Command to run the job, as an argument list that can be run
|
|
45
|
-
|
|
45
|
+
inside the Toil appliance container.
|
|
46
46
|
"""
|
|
47
47
|
# Make a job dict to send to the executor.
|
|
48
48
|
# TODO: Factor out executor setup from here and Kubernetes and TES
|
toil/batchSystems/htcondor.py
CHANGED
toil/batchSystems/kubernetes.py
CHANGED
|
@@ -24,22 +24,22 @@ import datetime
|
|
|
24
24
|
import logging
|
|
25
25
|
import math
|
|
26
26
|
import os
|
|
27
|
-
from queue import Empty, Queue
|
|
28
27
|
import string
|
|
29
28
|
import sys
|
|
30
29
|
import tempfile
|
|
31
|
-
from threading import Event, Thread, Condition, RLock
|
|
32
30
|
import time
|
|
33
31
|
import uuid
|
|
34
32
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
33
|
+
from queue import Empty, Queue
|
|
34
|
+
from threading import Condition, Event, RLock, Thread
|
|
35
35
|
from typing import (Any,
|
|
36
36
|
Callable,
|
|
37
37
|
Dict,
|
|
38
38
|
Iterator,
|
|
39
39
|
List,
|
|
40
|
-
Set,
|
|
41
40
|
Literal,
|
|
42
41
|
Optional,
|
|
42
|
+
Set,
|
|
43
43
|
Tuple,
|
|
44
44
|
Type,
|
|
45
45
|
TypeVar,
|
|
@@ -104,7 +104,8 @@ from toil.batchSystems.abstractBatchSystem import (EXIT_STATUS_UNAVAILABLE_VALUE
|
|
|
104
104
|
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
105
105
|
from toil.batchSystems.contained_executor import pack_job
|
|
106
106
|
from toil.batchSystems.options import OptionSetter
|
|
107
|
-
from toil.common import Config, Toil
|
|
107
|
+
from toil.common import Config, Toil
|
|
108
|
+
from toil.options.common import SYS_MAX_SIZE
|
|
108
109
|
from toil.job import JobDescription, Requirer
|
|
109
110
|
from toil.lib.conversions import human2bytes
|
|
110
111
|
from toil.lib.misc import get_user_name, slow_down, utc_now
|
|
@@ -152,6 +153,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
152
153
|
super().__init__(config, maxCores, maxMemory, maxDisk)
|
|
153
154
|
|
|
154
155
|
# Re-type the config to make sure it has all the fields we need.
|
|
156
|
+
# This convinces MyPy we really do have this type.
|
|
155
157
|
assert isinstance(config, KubernetesBatchSystem.KubernetesConfig)
|
|
156
158
|
|
|
157
159
|
# Turn down log level for Kubernetes modules and dependencies.
|
|
@@ -167,26 +169,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
167
169
|
self._apis: KubernetesBatchSystem._ApiStorageDict = {}
|
|
168
170
|
|
|
169
171
|
# Get our namespace (and our Kubernetes credentials to make sure they exist)
|
|
170
|
-
self.namespace = self._api('namespace')
|
|
172
|
+
self.namespace: str = self._api('namespace')
|
|
171
173
|
|
|
172
174
|
# Decide if we are going to mount a Kubernetes host path as the Toil
|
|
173
175
|
# work dir in the workers, for shared caching.
|
|
174
|
-
self.host_path = config.kubernetes_host_path
|
|
176
|
+
self.host_path: Optional[str] = config.kubernetes_host_path
|
|
175
177
|
|
|
176
178
|
# Get the service account name to use, if any.
|
|
177
|
-
self.service_account = config.kubernetes_service_account
|
|
179
|
+
self.service_account: Optional[str] = config.kubernetes_service_account
|
|
178
180
|
|
|
179
181
|
# Get how long we should wait for a pod that lands on a node to
|
|
180
182
|
# actually start.
|
|
181
|
-
self.pod_timeout = config.kubernetes_pod_timeout
|
|
183
|
+
self.pod_timeout: float = config.kubernetes_pod_timeout
|
|
182
184
|
|
|
183
185
|
# Get the username to mark jobs with
|
|
184
|
-
username = config.kubernetes_owner
|
|
186
|
+
username = config.kubernetes_owner or self.get_default_kubernetes_owner()
|
|
185
187
|
# And a unique ID for the run
|
|
186
188
|
self.unique_id = uuid.uuid4()
|
|
187
189
|
|
|
188
190
|
# Create a prefix for jobs, starting with our username
|
|
189
|
-
self.job_prefix = f'{username}-toil-{self.unique_id}-'
|
|
191
|
+
self.job_prefix: str = f'{username}-toil-{self.unique_id}-'
|
|
190
192
|
# Instead of letting Kubernetes assign unique job names, we assign our
|
|
191
193
|
# own based on a numerical job ID. This functionality is managed by the
|
|
192
194
|
# BatchSystemLocalSupport.
|
|
@@ -199,17 +201,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
199
201
|
# conformance tests. To work around this, we tag all our jobs with an
|
|
200
202
|
# explicit TTL that is long enough that we're sure we can deal with all
|
|
201
203
|
# the finished jobs before they expire.
|
|
202
|
-
self.finished_job_ttl = 3600 # seconds
|
|
204
|
+
self.finished_job_ttl: int = 3600 # seconds
|
|
203
205
|
|
|
204
206
|
# Here is where we will store the user script resource object if we get one.
|
|
205
207
|
self.user_script: Optional[Resource] = None
|
|
206
208
|
|
|
207
209
|
# Ge the image to deploy from Toil's configuration
|
|
208
|
-
self.docker_image = applianceSelf()
|
|
210
|
+
self.docker_image: str = applianceSelf()
|
|
209
211
|
|
|
210
212
|
# Try and guess what Toil work dir the workers will use.
|
|
211
213
|
# We need to be able to provision (possibly shared) space there.
|
|
212
|
-
self.worker_work_dir = Toil.getToilWorkDir(config.workDir)
|
|
214
|
+
self.worker_work_dir: str = Toil.getToilWorkDir(config.workDir)
|
|
213
215
|
if (config.workDir is None and
|
|
214
216
|
os.getenv('TOIL_WORKDIR') is None and
|
|
215
217
|
self.worker_work_dir == tempfile.gettempdir()):
|
|
@@ -226,17 +228,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
226
228
|
self.environment['TMPDIR'] = '/var/tmp'
|
|
227
229
|
|
|
228
230
|
# Get the name of the AWS secret, if any, to mount in containers.
|
|
229
|
-
self.aws_secret_name = os.environ.get("TOIL_AWS_SECRET_NAME", None)
|
|
231
|
+
self.aws_secret_name: Optional[str] = os.environ.get("TOIL_AWS_SECRET_NAME", None)
|
|
230
232
|
|
|
231
233
|
# Set this to True to enable the experimental wait-for-job-update code
|
|
232
|
-
self.enable_watching = os.environ.get("KUBE_WATCH_ENABLED", False)
|
|
234
|
+
self.enable_watching: bool = os.environ.get("KUBE_WATCH_ENABLED", False)
|
|
233
235
|
|
|
234
236
|
# This will be a label to select all our jobs.
|
|
235
|
-
self.run_id = f'toil-{self.unique_id}'
|
|
237
|
+
self.run_id: str = f'toil-{self.unique_id}'
|
|
236
238
|
|
|
237
239
|
# Keep track of available resources.
|
|
238
240
|
maxMillicores = int(SYS_MAX_SIZE if self.maxCores == SYS_MAX_SIZE else self.maxCores * 1000)
|
|
239
|
-
self.resource_sources = [
|
|
241
|
+
self.resource_sources: List[ResourcePool] = [
|
|
240
242
|
# A pool representing available job slots
|
|
241
243
|
ResourcePool(self.config.max_jobs, 'job slots'),
|
|
242
244
|
# A pool representing available CPU in units of millicores (1 CPU
|
|
@@ -261,16 +263,16 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
261
263
|
self._killed_queue_jobs: Set[int] = set()
|
|
262
264
|
|
|
263
265
|
# We use this event to signal shutdown
|
|
264
|
-
self._shutting_down = Event()
|
|
266
|
+
self._shutting_down: Event = Event()
|
|
265
267
|
|
|
266
268
|
# A lock to protect critical regions when working with queued jobs.
|
|
267
|
-
self._mutex = RLock()
|
|
269
|
+
self._mutex: RLock = RLock()
|
|
268
270
|
|
|
269
271
|
# A condition set to true when there is more work to do. e.g.: new job
|
|
270
272
|
# in the queue or any resource becomes available.
|
|
271
|
-
self._work_available = Condition(lock=self._mutex)
|
|
273
|
+
self._work_available: Condition = Condition(lock=self._mutex)
|
|
272
274
|
|
|
273
|
-
self.schedulingThread = Thread(target=self._scheduler, daemon=True)
|
|
275
|
+
self.schedulingThread: Thread = Thread(target=self._scheduler, daemon=True)
|
|
274
276
|
self.schedulingThread.start()
|
|
275
277
|
|
|
276
278
|
def _pretty_print(self, kubernetes_object: Any) -> str:
|
|
@@ -1864,24 +1866,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1864
1866
|
|
|
1865
1867
|
@classmethod
|
|
1866
1868
|
def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
|
|
1867
|
-
parser.add_argument("--kubernetesHostPath", dest="kubernetes_host_path", default=None,
|
|
1869
|
+
parser.add_argument("--kubernetesHostPath", dest="kubernetes_host_path", default=None, env_var="TOIL_KUBERNETES_HOST_PATH",
|
|
1868
1870
|
help="Path on Kubernetes hosts to use as shared inter-pod temp directory. "
|
|
1869
1871
|
"(default: %(default)s)")
|
|
1870
|
-
parser.add_argument("--kubernetesOwner", dest="kubernetes_owner", default=
|
|
1871
|
-
help="Username to mark Kubernetes jobs with.
|
|
1872
|
-
"
|
|
1873
|
-
|
|
1872
|
+
parser.add_argument("--kubernetesOwner", dest="kubernetes_owner", default=None, env_var="TOIL_KUBERNETES_OWNER",
|
|
1873
|
+
help=f"Username to mark Kubernetes jobs with. If the provided value is None, the value will "
|
|
1874
|
+
f"be generated at runtime. "
|
|
1875
|
+
f"(Generated default: {cls.get_default_kubernetes_owner()})")
|
|
1876
|
+
parser.add_argument("--kubernetesServiceAccount", dest="kubernetes_service_account", default=None, env_var="TOIL_KUBERNETES_SERVICE_ACCOUNT",
|
|
1874
1877
|
help="Service account to run jobs as. "
|
|
1875
1878
|
"(default: %(default)s)")
|
|
1876
|
-
parser.add_argument("--kubernetesPodTimeout", dest="kubernetes_pod_timeout", default=120,
|
|
1879
|
+
parser.add_argument("--kubernetesPodTimeout", dest="kubernetes_pod_timeout", default=120, env_var="TOIL_KUBERNETES_POD_TIMEOUT", type=float,
|
|
1877
1880
|
help="Seconds to wait for a scheduled Kubernetes pod to start running. "
|
|
1878
1881
|
"(default: %(default)s)")
|
|
1879
1882
|
|
|
1880
1883
|
OptionType = TypeVar('OptionType')
|
|
1881
1884
|
@classmethod
|
|
1882
1885
|
def setOptions(cls, setOption: OptionSetter) -> None:
|
|
1883
|
-
setOption("kubernetes_host_path"
|
|
1884
|
-
setOption("kubernetes_owner"
|
|
1885
|
-
setOption("kubernetes_service_account",
|
|
1886
|
-
setOption("kubernetes_pod_timeout"
|
|
1886
|
+
setOption("kubernetes_host_path")
|
|
1887
|
+
setOption("kubernetes_owner")
|
|
1888
|
+
setOption("kubernetes_service_account",)
|
|
1889
|
+
setOption("kubernetes_pod_timeout")
|
|
1887
1890
|
|
|
@@ -19,6 +19,7 @@ from toil.batchSystems.abstractBatchSystem import (BatchSystemSupport,
|
|
|
19
19
|
from toil.batchSystems.singleMachine import SingleMachineBatchSystem
|
|
20
20
|
from toil.common import Config
|
|
21
21
|
from toil.job import JobDescription
|
|
22
|
+
from toil.lib.threading import cpu_count
|
|
22
23
|
|
|
23
24
|
logger = logging.getLogger(__name__)
|
|
24
25
|
|
|
@@ -28,8 +29,9 @@ class BatchSystemLocalSupport(BatchSystemSupport):
|
|
|
28
29
|
|
|
29
30
|
def __init__(self, config: Config, maxCores: float, maxMemory: int, maxDisk: int) -> None:
|
|
30
31
|
super().__init__(config, maxCores, maxMemory, maxDisk)
|
|
32
|
+
max_local_jobs = config.max_local_jobs if config.max_local_jobs is not None else cpu_count()
|
|
31
33
|
self.localBatch: SingleMachineBatchSystem = SingleMachineBatchSystem(
|
|
32
|
-
config, maxCores, maxMemory, maxDisk, max_jobs=
|
|
34
|
+
config, maxCores, maxMemory, maxDisk, max_jobs=max_local_jobs
|
|
33
35
|
)
|
|
34
36
|
|
|
35
37
|
def handleLocalJob(self, jobDesc: JobDescription) -> Optional[int]:
|
|
@@ -18,11 +18,9 @@ import os
|
|
|
18
18
|
import pickle
|
|
19
19
|
import pwd
|
|
20
20
|
import socket
|
|
21
|
-
import sys
|
|
22
21
|
import time
|
|
23
22
|
import traceback
|
|
24
23
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
25
|
-
from contextlib import contextmanager
|
|
26
24
|
from queue import Empty, Queue
|
|
27
25
|
from typing import Dict, Optional, Union
|
|
28
26
|
from urllib.parse import quote_plus
|
|
@@ -93,7 +91,7 @@ class MesosBatchSystem(BatchSystemLocalSupport,
|
|
|
93
91
|
self.jobQueues = JobQueue()
|
|
94
92
|
|
|
95
93
|
# Address of the Mesos master in the form host:port where host can be an IP or a hostname
|
|
96
|
-
self.mesos_endpoint = config.mesos_endpoint
|
|
94
|
+
self.mesos_endpoint = config.mesos_endpoint or self.get_default_mesos_endpoint()
|
|
97
95
|
if config.mesos_role is not None:
|
|
98
96
|
self.mesos_role = config.mesos_role
|
|
99
97
|
self.mesos_name = config.mesos_name
|
|
@@ -846,8 +844,10 @@ class MesosBatchSystem(BatchSystemLocalSupport,
|
|
|
846
844
|
|
|
847
845
|
@classmethod
|
|
848
846
|
def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
|
|
849
|
-
parser.add_argument("--mesosEndpoint", "--mesosMaster", dest="mesos_endpoint", default=
|
|
850
|
-
help="The host and port of the Mesos master separated by colon.
|
|
847
|
+
parser.add_argument("--mesosEndpoint", "--mesosMaster", dest="mesos_endpoint", default=None,
|
|
848
|
+
help=f"The host and port of the Mesos master separated by colon. If the provided value "
|
|
849
|
+
f"is None, the value will be generated at runtime. "
|
|
850
|
+
f"(Generated default: {cls.get_default_mesos_endpoint})")
|
|
851
851
|
parser.add_argument("--mesosFrameworkId", dest="mesos_framework_id",
|
|
852
852
|
help="Use a specific Mesos framework ID.")
|
|
853
853
|
parser.add_argument("--mesosRole", dest="mesos_role",
|
|
@@ -857,8 +857,8 @@ class MesosBatchSystem(BatchSystemLocalSupport,
|
|
|
857
857
|
|
|
858
858
|
@classmethod
|
|
859
859
|
def setOptions(cls, setOption: OptionSetter):
|
|
860
|
-
setOption("mesos_endpoint",
|
|
861
|
-
setOption("mesos_name"
|
|
860
|
+
setOption("mesos_endpoint", old_names=["mesosMasterAddress"])
|
|
861
|
+
setOption("mesos_name")
|
|
862
862
|
setOption("mesos_role")
|
|
863
863
|
setOption("mesos_framework_id")
|
|
864
864
|
|