toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +122 -315
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +173 -89
- toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
- toil/batchSystems/awsBatch.py +244 -135
- toil/batchSystems/cleanup_support.py +26 -16
- toil/batchSystems/contained_executor.py +31 -28
- toil/batchSystems/gridengine.py +86 -50
- toil/batchSystems/htcondor.py +166 -89
- toil/batchSystems/kubernetes.py +632 -382
- toil/batchSystems/local_support.py +20 -15
- toil/batchSystems/lsf.py +134 -81
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +290 -151
- toil/batchSystems/mesos/executor.py +79 -50
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +46 -28
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +296 -125
- toil/batchSystems/slurm.py +603 -138
- toil/batchSystems/torque.py +47 -33
- toil/bus.py +186 -76
- toil/common.py +664 -368
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1136 -483
- toil/cwl/utils.py +17 -22
- toil/deferred.py +63 -42
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +140 -60
- toil/fileStores/cachingFileStore.py +717 -269
- toil/fileStores/nonCachingFileStore.py +116 -87
- toil/job.py +1225 -368
- toil/jobStores/abstractJobStore.py +416 -266
- toil/jobStores/aws/jobStore.py +863 -477
- toil/jobStores/aws/utils.py +201 -120
- toil/jobStores/conftest.py +3 -2
- toil/jobStores/fileJobStore.py +292 -154
- toil/jobStores/googleJobStore.py +140 -74
- toil/jobStores/utils.py +36 -15
- toil/leader.py +668 -272
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +74 -31
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +214 -39
- toil/lib/aws/utils.py +287 -231
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +104 -47
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +361 -199
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +5 -3
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +141 -15
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +66 -21
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +68 -15
- toil/lib/retry.py +126 -81
- toil/lib/threading.py +299 -82
- toil/lib/throttle.py +16 -15
- toil/options/common.py +843 -409
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +73 -17
- toil/provisioners/__init__.py +117 -46
- toil/provisioners/abstractProvisioner.py +332 -157
- toil/provisioners/aws/__init__.py +70 -33
- toil/provisioners/aws/awsProvisioner.py +1145 -715
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +155 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +128 -62
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +224 -70
- toil/test/__init__.py +282 -183
- toil/test/batchSystems/batchSystemTest.py +460 -210
- toil/test/batchSystems/batch_system_plugin_test.py +90 -0
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +110 -49
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +56 -0
- toil/test/cwl/cwlTest.py +496 -287
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/cwl/seqtk_seq.cwl +1 -1
- toil/test/docs/scriptsTest.py +69 -46
- toil/test/jobStores/jobStoreTest.py +427 -264
- toil/test/lib/aws/test_iam.py +118 -50
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +58 -50
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/__init__.py +13 -0
- toil/test/options/options.py +42 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +166 -44
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +141 -101
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +32 -24
- toil/test/src/environmentTest.py +135 -0
- toil/test/src/fileStoreTest.py +539 -272
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +46 -21
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +121 -71
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +10 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +73 -23
- toil/test/utils/toilDebugTest.py +103 -33
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +245 -106
- toil/test/wdl/wdltoil_test.py +818 -149
- toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
- toil/toilState.py +120 -35
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +214 -27
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +256 -140
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +32 -14
- toil/utils/toilSshCluster.py +49 -22
- toil/utils/toilStats.py +356 -273
- toil/utils/toilStatus.py +292 -139
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +12 -12
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3913 -1033
- toil/worker.py +367 -184
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-6.1.0a1.dist-info/METADATA +0 -125
- toil-6.1.0a1.dist-info/RECORD +0 -237
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/batchSystems/kubernetes.py
CHANGED
|
@@ -30,93 +30,90 @@ import tempfile
|
|
|
30
30
|
import time
|
|
31
31
|
import uuid
|
|
32
32
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
33
|
+
from collections.abc import Iterator
|
|
33
34
|
from queue import Empty, Queue
|
|
34
35
|
from threading import Condition, Event, RLock, Thread
|
|
35
|
-
from typing import
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
Iterator,
|
|
39
|
-
List,
|
|
40
|
-
Literal,
|
|
41
|
-
Optional,
|
|
42
|
-
Set,
|
|
43
|
-
Tuple,
|
|
44
|
-
Type,
|
|
45
|
-
TypeVar,
|
|
46
|
-
Union,
|
|
47
|
-
cast,
|
|
48
|
-
overload)
|
|
36
|
+
from typing import Any, Callable, Literal, Optional, TypeVar, Union, cast, overload
|
|
37
|
+
|
|
38
|
+
from toil.lib.conversions import opt_strtobool
|
|
49
39
|
|
|
50
40
|
if sys.version_info < (3, 10):
|
|
51
41
|
from typing_extensions import ParamSpec
|
|
52
42
|
else:
|
|
53
43
|
from typing import ParamSpec
|
|
54
|
-
|
|
55
|
-
|
|
44
|
+
|
|
45
|
+
if sys.version_info < (3, 11):
|
|
46
|
+
from typing_extensions import NotRequired
|
|
56
47
|
else:
|
|
57
|
-
from
|
|
58
|
-
|
|
48
|
+
from typing import NotRequired
|
|
49
|
+
|
|
50
|
+
from typing import Protocol, TypedDict, runtime_checkable
|
|
51
|
+
|
|
59
52
|
import urllib3
|
|
60
53
|
import yaml
|
|
54
|
+
|
|
61
55
|
# The Right Way to use the Kubernetes module is to `import kubernetes` and then you get all your stuff as like ApiClient. But this doesn't work for the stubs: the stubs seem to only support importing things from the internal modules in `kubernetes` where they are actually defined. See for example <https://github.com/MaterializeInc/kubernetes-stubs/issues/9 and <https://github.com/MaterializeInc/kubernetes-stubs/issues/10>. So we just import all the things we use into our global namespace here.
|
|
62
|
-
from kubernetes.client import (
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
56
|
+
from kubernetes.client import (
|
|
57
|
+
BatchV1Api,
|
|
58
|
+
CoreV1Api,
|
|
59
|
+
CustomObjectsApi,
|
|
60
|
+
V1Affinity,
|
|
61
|
+
V1Container,
|
|
62
|
+
V1ContainerStatus,
|
|
63
|
+
V1EmptyDirVolumeSource,
|
|
64
|
+
V1HostPathVolumeSource,
|
|
65
|
+
V1Job,
|
|
66
|
+
V1JobCondition,
|
|
67
|
+
V1JobSpec,
|
|
68
|
+
V1NodeAffinity,
|
|
69
|
+
V1NodeSelector,
|
|
70
|
+
V1NodeSelectorRequirement,
|
|
71
|
+
V1NodeSelectorTerm,
|
|
72
|
+
V1ObjectMeta,
|
|
73
|
+
V1Pod,
|
|
74
|
+
V1PodSpec,
|
|
75
|
+
V1PodTemplateSpec,
|
|
76
|
+
V1PreferredSchedulingTerm,
|
|
77
|
+
V1ResourceRequirements,
|
|
78
|
+
V1SecretVolumeSource,
|
|
79
|
+
V1SecurityContext,
|
|
80
|
+
V1Toleration,
|
|
81
|
+
V1Volume,
|
|
82
|
+
V1VolumeMount,
|
|
83
|
+
)
|
|
87
84
|
from kubernetes.client.api_client import ApiClient
|
|
88
85
|
from kubernetes.client.exceptions import ApiException
|
|
89
86
|
from kubernetes.config.config_exception import ConfigException
|
|
90
87
|
from kubernetes.config.incluster_config import load_incluster_config
|
|
91
|
-
from kubernetes.config.kube_config import
|
|
92
|
-
|
|
88
|
+
from kubernetes.config.kube_config import list_kube_config_contexts, load_kube_config
|
|
89
|
+
|
|
93
90
|
# TODO: Watch API is not typed yet
|
|
94
91
|
from kubernetes.watch import Watch # type: ignore
|
|
95
|
-
# typing-extensions dependency on Pythons that are new enough.
|
|
96
|
-
from typing_extensions import NotRequired
|
|
97
92
|
|
|
98
93
|
from toil import applianceSelf
|
|
99
|
-
from toil.batchSystems.abstractBatchSystem import (
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
94
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
95
|
+
EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
96
|
+
BatchJobExitReason,
|
|
97
|
+
InsufficientSystemResources,
|
|
98
|
+
ResourcePool,
|
|
99
|
+
UpdatedBatchJobInfo,
|
|
100
|
+
)
|
|
104
101
|
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
105
102
|
from toil.batchSystems.contained_executor import pack_job
|
|
106
103
|
from toil.batchSystems.options import OptionSetter
|
|
107
104
|
from toil.common import Config, Toil
|
|
108
|
-
from toil.options.common import SYS_MAX_SIZE
|
|
109
105
|
from toil.job import JobDescription, Requirer
|
|
110
106
|
from toil.lib.conversions import human2bytes
|
|
111
107
|
from toil.lib.misc import get_user_name, slow_down, utc_now
|
|
112
108
|
from toil.lib.retry import ErrorCondition, retry
|
|
109
|
+
from toil.options.common import SYS_MAX_SIZE
|
|
113
110
|
from toil.resource import Resource
|
|
114
111
|
|
|
115
112
|
logger = logging.getLogger(__name__)
|
|
116
|
-
retryable_kubernetes_errors:
|
|
113
|
+
retryable_kubernetes_errors: list[Union[type[Exception], ErrorCondition]] = [
|
|
117
114
|
urllib3.exceptions.MaxRetryError,
|
|
118
115
|
urllib3.exceptions.ProtocolError,
|
|
119
|
-
ApiException
|
|
116
|
+
ApiException,
|
|
120
117
|
]
|
|
121
118
|
|
|
122
119
|
|
|
@@ -130,8 +127,10 @@ def is_retryable_kubernetes_error(e: Exception) -> bool:
|
|
|
130
127
|
return True
|
|
131
128
|
return False
|
|
132
129
|
|
|
130
|
+
|
|
133
131
|
# Represents a collection of label or taint keys and their sets of acceptable (or unacceptable) values.
|
|
134
|
-
KeyValuesList =
|
|
132
|
+
KeyValuesList = list[tuple[str, list[str]]]
|
|
133
|
+
|
|
135
134
|
|
|
136
135
|
class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
137
136
|
@classmethod
|
|
@@ -148,8 +147,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
148
147
|
core: NotRequired[CoreV1Api]
|
|
149
148
|
customObjects: NotRequired[CustomObjectsApi]
|
|
150
149
|
|
|
151
|
-
|
|
152
|
-
|
|
150
|
+
def __init__(
|
|
151
|
+
self, config: Config, maxCores: int, maxMemory: int, maxDisk: int
|
|
152
|
+
) -> None:
|
|
153
153
|
super().__init__(config, maxCores, maxMemory, maxDisk)
|
|
154
154
|
|
|
155
155
|
# Re-type the config to make sure it has all the fields we need.
|
|
@@ -160,8 +160,8 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
160
160
|
# Otherwise if we are at debug log level, we dump every
|
|
161
161
|
# request/response to Kubernetes, including tokens which we shouldn't
|
|
162
162
|
# reveal on CI.
|
|
163
|
-
logging.getLogger(
|
|
164
|
-
logging.getLogger(
|
|
163
|
+
logging.getLogger("kubernetes").setLevel(logging.ERROR)
|
|
164
|
+
logging.getLogger("requests_oauthlib").setLevel(logging.ERROR)
|
|
165
165
|
|
|
166
166
|
# This will hold the last time our Kubernetes credentials were refreshed
|
|
167
167
|
self.credential_time: Optional[datetime.datetime] = None
|
|
@@ -169,7 +169,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
169
169
|
self._apis: KubernetesBatchSystem._ApiStorageDict = {}
|
|
170
170
|
|
|
171
171
|
# Get our namespace (and our Kubernetes credentials to make sure they exist)
|
|
172
|
-
self.namespace: str = self._api(
|
|
172
|
+
self.namespace: str = self._api("namespace")
|
|
173
173
|
|
|
174
174
|
# Decide if we are going to mount a Kubernetes host path as the Toil
|
|
175
175
|
# work dir in the workers, for shared caching.
|
|
@@ -188,7 +188,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
188
188
|
self.unique_id = uuid.uuid4()
|
|
189
189
|
|
|
190
190
|
# Create a prefix for jobs, starting with our username
|
|
191
|
-
self.job_prefix: str = f
|
|
191
|
+
self.job_prefix: str = f"{username}-toil-{self.unique_id}-"
|
|
192
192
|
# Instead of letting Kubernetes assign unique job names, we assign our
|
|
193
193
|
# own based on a numerical job ID. This functionality is managed by the
|
|
194
194
|
# BatchSystemLocalSupport.
|
|
@@ -212,55 +212,61 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
212
212
|
# Try and guess what Toil work dir the workers will use.
|
|
213
213
|
# We need to be able to provision (possibly shared) space there.
|
|
214
214
|
self.worker_work_dir: str = Toil.getToilWorkDir(config.workDir)
|
|
215
|
-
if (
|
|
216
|
-
|
|
217
|
-
|
|
215
|
+
if (
|
|
216
|
+
config.workDir is None
|
|
217
|
+
and os.getenv("TOIL_WORKDIR") is None
|
|
218
|
+
and self.worker_work_dir == tempfile.gettempdir()
|
|
219
|
+
):
|
|
218
220
|
|
|
219
221
|
# We defaulted to the system temp directory. But we think the
|
|
220
222
|
# worker Dockerfiles will make them use /var/lib/toil instead.
|
|
221
223
|
# TODO: Keep this in sync with the Dockerfile.
|
|
222
|
-
self.worker_work_dir =
|
|
224
|
+
self.worker_work_dir = "/var/lib/toil"
|
|
223
225
|
|
|
224
226
|
# A Toil-managed Kubernetes cluster will have most of its temp space at
|
|
225
227
|
# /var/tmp, which is where really large temp files really belong
|
|
226
228
|
# according to https://systemd.io/TEMPORARY_DIRECTORIES/. So we will
|
|
227
229
|
# set the default temporary directory to there for all our jobs.
|
|
228
|
-
self.environment[
|
|
230
|
+
self.environment["TMPDIR"] = "/var/tmp"
|
|
229
231
|
|
|
230
232
|
# Get the name of the AWS secret, if any, to mount in containers.
|
|
231
|
-
self.aws_secret_name: Optional[str] = os.environ.get(
|
|
233
|
+
self.aws_secret_name: Optional[str] = os.environ.get(
|
|
234
|
+
"TOIL_AWS_SECRET_NAME", None
|
|
235
|
+
)
|
|
232
236
|
|
|
233
237
|
# Set this to True to enable the experimental wait-for-job-update code
|
|
234
238
|
self.enable_watching: bool = os.environ.get("KUBE_WATCH_ENABLED", False)
|
|
235
239
|
|
|
236
240
|
# This will be a label to select all our jobs.
|
|
237
|
-
self.run_id: str = f
|
|
241
|
+
self.run_id: str = f"toil-{self.unique_id}"
|
|
238
242
|
|
|
239
243
|
# Keep track of available resources.
|
|
240
|
-
maxMillicores = int(
|
|
241
|
-
|
|
244
|
+
maxMillicores = int(
|
|
245
|
+
SYS_MAX_SIZE if self.maxCores == SYS_MAX_SIZE else self.maxCores * 1000
|
|
246
|
+
)
|
|
247
|
+
self.resource_sources: list[ResourcePool] = [
|
|
242
248
|
# A pool representing available job slots
|
|
243
|
-
ResourcePool(self.config.max_jobs,
|
|
249
|
+
ResourcePool(self.config.max_jobs, "job slots"),
|
|
244
250
|
# A pool representing available CPU in units of millicores (1 CPU
|
|
245
251
|
# unit = 1000 millicores)
|
|
246
|
-
ResourcePool(maxMillicores,
|
|
252
|
+
ResourcePool(maxMillicores, "cores"),
|
|
247
253
|
# A pool representing available memory in bytes
|
|
248
|
-
ResourcePool(self.maxMemory,
|
|
254
|
+
ResourcePool(self.maxMemory, "memory"),
|
|
249
255
|
# A pool representing the available space in bytes
|
|
250
|
-
ResourcePool(self.maxDisk,
|
|
256
|
+
ResourcePool(self.maxDisk, "disk"),
|
|
251
257
|
]
|
|
252
258
|
|
|
253
259
|
# A set of job IDs that are queued (useful for getIssuedBatchJobIDs())
|
|
254
|
-
self._queued_job_ids:
|
|
260
|
+
self._queued_job_ids: set[int] = set()
|
|
255
261
|
|
|
256
262
|
# Keep track of the acquired resources for each job
|
|
257
|
-
self._acquired_resources:
|
|
263
|
+
self._acquired_resources: dict[str, list[int]] = {}
|
|
258
264
|
|
|
259
265
|
# Queue for jobs to be submitted to the Kubernetes cluster
|
|
260
|
-
self._jobs_queue: Queue[
|
|
266
|
+
self._jobs_queue: Queue[tuple[int, JobDescription, V1PodSpec]] = Queue()
|
|
261
267
|
|
|
262
268
|
# A set of job IDs that should be killed
|
|
263
|
-
self._killed_queue_jobs:
|
|
269
|
+
self._killed_queue_jobs: set[int] = set()
|
|
264
270
|
|
|
265
271
|
# We use this event to signal shutdown
|
|
266
272
|
self._shutting_down: Event = Event()
|
|
@@ -284,7 +290,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
284
290
|
"""
|
|
285
291
|
|
|
286
292
|
if not kubernetes_object:
|
|
287
|
-
return
|
|
293
|
+
return "None"
|
|
288
294
|
|
|
289
295
|
# We need a Kubernetes widget that knows how to translate
|
|
290
296
|
# its data structures to nice YAML-able dicts. See:
|
|
@@ -294,7 +300,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
294
300
|
# Convert to a dict
|
|
295
301
|
root_dict = api_client.sanitize_for_serialization(kubernetes_object)
|
|
296
302
|
|
|
297
|
-
def drop_boring(here:
|
|
303
|
+
def drop_boring(here: dict[str, Any]) -> None:
|
|
298
304
|
"""
|
|
299
305
|
Drop boring fields recursively.
|
|
300
306
|
"""
|
|
@@ -302,7 +308,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
302
308
|
for k, v in here.items():
|
|
303
309
|
if isinstance(v, dict):
|
|
304
310
|
drop_boring(v)
|
|
305
|
-
if k in [
|
|
311
|
+
if k in ["managedFields"]:
|
|
306
312
|
boring_keys.append(k)
|
|
307
313
|
for k in boring_keys:
|
|
308
314
|
del here[k]
|
|
@@ -312,33 +318,43 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
312
318
|
|
|
313
319
|
@overload
|
|
314
320
|
def _api(
|
|
315
|
-
self,
|
|
316
|
-
|
|
317
|
-
|
|
321
|
+
self,
|
|
322
|
+
kind: Literal["batch"],
|
|
323
|
+
max_age_seconds: float = 5 * 60,
|
|
324
|
+
errors: Optional[list[int]] = None,
|
|
325
|
+
) -> BatchV1Api: ...
|
|
318
326
|
|
|
319
327
|
@overload
|
|
320
328
|
def _api(
|
|
321
|
-
self,
|
|
322
|
-
|
|
323
|
-
|
|
329
|
+
self,
|
|
330
|
+
kind: Literal["core"],
|
|
331
|
+
max_age_seconds: float = 5 * 60,
|
|
332
|
+
errors: Optional[list[int]] = None,
|
|
333
|
+
) -> CoreV1Api: ...
|
|
324
334
|
|
|
325
335
|
@overload
|
|
326
336
|
def _api(
|
|
327
|
-
self,
|
|
328
|
-
|
|
329
|
-
|
|
337
|
+
self,
|
|
338
|
+
kind: Literal["customObjects"],
|
|
339
|
+
max_age_seconds: float = 5 * 60,
|
|
340
|
+
errors: Optional[list[int]] = None,
|
|
341
|
+
) -> CustomObjectsApi: ...
|
|
330
342
|
|
|
331
343
|
@overload
|
|
332
344
|
def _api(
|
|
333
|
-
self, kind: Literal[
|
|
334
|
-
) -> str:
|
|
335
|
-
...
|
|
345
|
+
self, kind: Literal["namespace"], max_age_seconds: float = 5 * 60
|
|
346
|
+
) -> str: ...
|
|
336
347
|
|
|
337
348
|
def _api(
|
|
338
349
|
self,
|
|
339
|
-
kind: Union[
|
|
350
|
+
kind: Union[
|
|
351
|
+
Literal["batch"],
|
|
352
|
+
Literal["core"],
|
|
353
|
+
Literal["customObjects"],
|
|
354
|
+
Literal["namespace"],
|
|
355
|
+
],
|
|
340
356
|
max_age_seconds: float = 5 * 60,
|
|
341
|
-
errors: Optional[
|
|
357
|
+
errors: Optional[list[int]] = None,
|
|
342
358
|
) -> Union[BatchV1Api, CoreV1Api, CustomObjectsApi, str]:
|
|
343
359
|
"""
|
|
344
360
|
The Kubernetes module isn't clever enough to renew its credentials when
|
|
@@ -371,44 +387,53 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
371
387
|
|
|
372
388
|
now = utc_now()
|
|
373
389
|
|
|
374
|
-
if
|
|
390
|
+
if (
|
|
391
|
+
self.credential_time is None
|
|
392
|
+
or (now - self.credential_time).total_seconds() > max_age_seconds
|
|
393
|
+
):
|
|
375
394
|
# Credentials need a refresh
|
|
376
395
|
try:
|
|
377
396
|
# Load ~/.kube/config or KUBECONFIG
|
|
378
397
|
load_kube_config()
|
|
379
398
|
# Worked. We're using kube config
|
|
380
|
-
config_source =
|
|
399
|
+
config_source = "kube"
|
|
381
400
|
except ConfigException:
|
|
382
401
|
# Didn't work. Try pod-based credentials in case we are in a pod.
|
|
383
402
|
try:
|
|
384
403
|
load_incluster_config()
|
|
385
404
|
# Worked. We're using in_cluster config
|
|
386
|
-
config_source =
|
|
405
|
+
config_source = "in_cluster"
|
|
387
406
|
except ConfigException:
|
|
388
|
-
raise RuntimeError(
|
|
407
|
+
raise RuntimeError(
|
|
408
|
+
"Could not load Kubernetes configuration from ~/.kube/config, $KUBECONFIG, or current pod."
|
|
409
|
+
)
|
|
389
410
|
|
|
390
411
|
# Now fill in the API objects with these credentials
|
|
391
|
-
self._apis[
|
|
392
|
-
self._apis[
|
|
393
|
-
self._apis[
|
|
412
|
+
self._apis["batch"] = BatchV1Api()
|
|
413
|
+
self._apis["core"] = CoreV1Api()
|
|
414
|
+
self._apis["customObjects"] = CustomObjectsApi()
|
|
394
415
|
|
|
395
416
|
# And save the time
|
|
396
417
|
self.credential_time = now
|
|
397
|
-
if kind ==
|
|
418
|
+
if kind == "namespace":
|
|
398
419
|
# We just need the namespace string
|
|
399
|
-
if config_source ==
|
|
420
|
+
if config_source == "in_cluster":
|
|
400
421
|
# Our namespace comes from a particular file.
|
|
401
|
-
with open(
|
|
422
|
+
with open(
|
|
423
|
+
"/var/run/secrets/kubernetes.io/serviceaccount/namespace"
|
|
424
|
+
) as fh:
|
|
402
425
|
return fh.read().strip()
|
|
403
426
|
else:
|
|
404
427
|
# Find all contexts and the active context.
|
|
405
428
|
# The active context gets us our namespace.
|
|
406
429
|
contexts, activeContext = list_kube_config_contexts()
|
|
407
430
|
if not contexts:
|
|
408
|
-
raise RuntimeError(
|
|
431
|
+
raise RuntimeError(
|
|
432
|
+
"No Kubernetes contexts available in ~/.kube/config or $KUBECONFIG"
|
|
433
|
+
)
|
|
409
434
|
|
|
410
435
|
# Identify the namespace to work in
|
|
411
|
-
namespace = activeContext.get(
|
|
436
|
+
namespace = activeContext.get("context", {}).get("namespace", "default")
|
|
412
437
|
assert isinstance(namespace, str)
|
|
413
438
|
return namespace
|
|
414
439
|
|
|
@@ -428,11 +453,13 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
428
453
|
ErrorCondition(
|
|
429
454
|
error=ApiException,
|
|
430
455
|
error_codes=errors,
|
|
431
|
-
retry_on_this_condition=False
|
|
456
|
+
retry_on_this_condition=False,
|
|
432
457
|
)
|
|
433
458
|
)
|
|
434
459
|
decorator = retry(errors=error_list)
|
|
435
|
-
wrapper = KubernetesBatchSystem.DecoratorWrapper(
|
|
460
|
+
wrapper = KubernetesBatchSystem.DecoratorWrapper(
|
|
461
|
+
api_object, decorator
|
|
462
|
+
)
|
|
436
463
|
return cast(Union[BatchV1Api, CoreV1Api, CustomObjectsApi], wrapper)
|
|
437
464
|
except KeyError:
|
|
438
465
|
raise RuntimeError(f"Unknown Kubernetes API type: {kind}")
|
|
@@ -443,7 +470,12 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
443
470
|
"""
|
|
444
471
|
|
|
445
472
|
P = ParamSpec("P")
|
|
446
|
-
|
|
473
|
+
|
|
474
|
+
def __init__(
|
|
475
|
+
self,
|
|
476
|
+
to_wrap: Any,
|
|
477
|
+
decorator: Callable[[Callable[P, Any]], Callable[P, Any]],
|
|
478
|
+
) -> None:
|
|
447
479
|
"""
|
|
448
480
|
Make a wrapper around the given object.
|
|
449
481
|
When methods on the object are called, they will be called through
|
|
@@ -467,16 +499,19 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
467
499
|
return attr
|
|
468
500
|
|
|
469
501
|
ItemT = TypeVar("ItemT")
|
|
502
|
+
|
|
470
503
|
class _ItemsHaver(Protocol[ItemT]):
|
|
471
504
|
"""
|
|
472
505
|
Anything that has a .items that is a list of something.
|
|
473
506
|
"""
|
|
507
|
+
|
|
474
508
|
# KubernetesBatchSystem isn't defined until the class executes, so any
|
|
475
509
|
# up-references to types from there that are in signatures (and not
|
|
476
510
|
# method code) need to be quoted
|
|
477
|
-
items:
|
|
511
|
+
items: list["KubernetesBatchSystem.ItemT"]
|
|
478
512
|
|
|
479
513
|
CovItemT = TypeVar("CovItemT", covariant=True)
|
|
514
|
+
|
|
480
515
|
class _WatchEvent(Protocol[CovItemT]):
|
|
481
516
|
"""
|
|
482
517
|
An event from a Kubernetes watch stream.
|
|
@@ -488,23 +523,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
488
523
|
# __getitem__ instead.
|
|
489
524
|
|
|
490
525
|
@overload
|
|
491
|
-
def __getitem__(self, name: Literal[
|
|
492
|
-
...
|
|
526
|
+
def __getitem__(self, name: Literal["type"]) -> str: ...
|
|
493
527
|
|
|
494
528
|
@overload
|
|
495
|
-
def __getitem__(
|
|
496
|
-
|
|
529
|
+
def __getitem__(
|
|
530
|
+
self, name: Literal["object"]
|
|
531
|
+
) -> "KubernetesBatchSystem.CovItemT": ...
|
|
497
532
|
|
|
498
533
|
@overload
|
|
499
|
-
def __getitem__(self, name: Literal[
|
|
500
|
-
...
|
|
534
|
+
def __getitem__(self, name: Literal["raw_object"]) -> dict[str, Any]: ...
|
|
501
535
|
|
|
502
|
-
def __getitem__(
|
|
503
|
-
|
|
536
|
+
def __getitem__(
|
|
537
|
+
self, name: Union[Literal["type"], Literal["object"], Literal["raw_object"]]
|
|
538
|
+
) -> Any: ...
|
|
504
539
|
|
|
505
540
|
P = ParamSpec("P")
|
|
506
541
|
R = TypeVar("R")
|
|
507
|
-
|
|
542
|
+
|
|
543
|
+
def _stream_until_error(
|
|
544
|
+
self, method: Callable[P, _ItemsHaver[R]], *args: P.args, **kwargs: P.kwargs
|
|
545
|
+
) -> Iterator[_WatchEvent[R]]:
|
|
508
546
|
"""
|
|
509
547
|
Kubernetes kubernetes.watch.Watch().stream() streams can fail and raise
|
|
510
548
|
errors. We don't want to have those errors fail the entire workflow, so
|
|
@@ -570,7 +608,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
570
608
|
|
|
571
609
|
# Loop through all jobs inside the queue and see if any of them
|
|
572
610
|
# could be launched.
|
|
573
|
-
jobs: Queue[
|
|
611
|
+
jobs: Queue[tuple[int, JobDescription, V1PodSpec]] = Queue()
|
|
574
612
|
while True:
|
|
575
613
|
try:
|
|
576
614
|
job = self._jobs_queue.get_nowait()
|
|
@@ -582,7 +620,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
582
620
|
logger.debug(f"Skipping killed job {job_id}")
|
|
583
621
|
continue
|
|
584
622
|
|
|
585
|
-
job_name = f
|
|
623
|
+
job_name = f"{self.job_prefix}{job_id}"
|
|
586
624
|
result = self._launch_job(job_name, job_desc, spec)
|
|
587
625
|
if result is False:
|
|
588
626
|
# Not enough resources to launch this job.
|
|
@@ -603,7 +641,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
603
641
|
logger.debug(f"Roughly {self._jobs_queue.qsize} jobs in the queue")
|
|
604
642
|
|
|
605
643
|
def setUserScript(self, userScript: Resource) -> None:
|
|
606
|
-
logger.info(f
|
|
644
|
+
logger.info(f"Setting user script for deployment: {userScript}")
|
|
607
645
|
self.user_script = userScript
|
|
608
646
|
|
|
609
647
|
# setEnv is provided by BatchSystemSupport, updates self.environment
|
|
@@ -655,18 +693,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
655
693
|
# Amazon just uses a label, while Google
|
|
656
694
|
# <https://cloud.google.com/kubernetes-engine/docs/how-to/preemptible-vms>
|
|
657
695
|
# uses a label and a taint.
|
|
658
|
-
PREEMPTIBLE_SCHEMES = {
|
|
659
|
-
|
|
660
|
-
|
|
696
|
+
PREEMPTIBLE_SCHEMES = {
|
|
697
|
+
"labels": [
|
|
698
|
+
("eks.amazonaws.com/capacityType", ["SPOT"]),
|
|
699
|
+
("cloud.google.com/gke-preemptible", ["true"]),
|
|
700
|
+
],
|
|
701
|
+
"taints": [("cloud.google.com/gke-preemptible", ["true"])],
|
|
702
|
+
}
|
|
661
703
|
|
|
662
704
|
if preemptible:
|
|
663
705
|
# We want to seek preemptible labels and tolerate preemptible taints.
|
|
664
|
-
self.desired_labels += PREEMPTIBLE_SCHEMES[
|
|
665
|
-
self.tolerated_taints += PREEMPTIBLE_SCHEMES[
|
|
706
|
+
self.desired_labels += PREEMPTIBLE_SCHEMES["labels"]
|
|
707
|
+
self.tolerated_taints += PREEMPTIBLE_SCHEMES["taints"]
|
|
666
708
|
else:
|
|
667
709
|
# We want to prohibit preemptible labels
|
|
668
|
-
self.prohibited_labels += PREEMPTIBLE_SCHEMES[
|
|
669
|
-
|
|
710
|
+
self.prohibited_labels += PREEMPTIBLE_SCHEMES["labels"]
|
|
670
711
|
|
|
671
712
|
def apply(self, pod_spec: V1PodSpec) -> None:
|
|
672
713
|
"""
|
|
@@ -677,29 +718,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
677
718
|
# Convert our collections to Kubernetes expressions.
|
|
678
719
|
|
|
679
720
|
# REQUIRE that ALL of these requirements be satisfied
|
|
680
|
-
required_selector_requirements:
|
|
721
|
+
required_selector_requirements: list[V1NodeSelectorRequirement] = []
|
|
681
722
|
# PREFER that EACH of these terms be satisfied
|
|
682
|
-
preferred_scheduling_terms:
|
|
723
|
+
preferred_scheduling_terms: list[V1PreferredSchedulingTerm] = []
|
|
683
724
|
# And this list of tolerations to apply
|
|
684
|
-
tolerations:
|
|
725
|
+
tolerations: list[V1Toleration] = []
|
|
685
726
|
|
|
686
727
|
for label, values in self.required_labels:
|
|
687
728
|
# Collect requirements for the required labels
|
|
688
|
-
has_label = V1NodeSelectorRequirement(
|
|
689
|
-
|
|
690
|
-
|
|
729
|
+
has_label = V1NodeSelectorRequirement(
|
|
730
|
+
key=label, operator="In", values=values
|
|
731
|
+
)
|
|
691
732
|
required_selector_requirements.append(has_label)
|
|
692
733
|
for label, values in self.desired_labels:
|
|
693
734
|
# Collect preferences for the preferred labels
|
|
694
|
-
has_label = V1NodeSelectorRequirement(
|
|
695
|
-
|
|
696
|
-
values=values)
|
|
697
|
-
term = V1NodeSelectorTerm(
|
|
698
|
-
match_expressions=[has_label]
|
|
735
|
+
has_label = V1NodeSelectorRequirement(
|
|
736
|
+
key=label, operator="In", values=values
|
|
699
737
|
)
|
|
738
|
+
term = V1NodeSelectorTerm(match_expressions=[has_label])
|
|
700
739
|
# Each becomes a separate preference, more is better.
|
|
701
|
-
preference = V1PreferredSchedulingTerm(weight=1,
|
|
702
|
-
preference=term)
|
|
740
|
+
preference = V1PreferredSchedulingTerm(weight=1, preference=term)
|
|
703
741
|
|
|
704
742
|
preferred_scheduling_terms.append(preference)
|
|
705
743
|
for label, values in self.prohibited_labels:
|
|
@@ -710,15 +748,14 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
710
748
|
# <https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#set-based-requirement>
|
|
711
749
|
# So we create a NotIn for each label and AND them
|
|
712
750
|
# all together.
|
|
713
|
-
not_labeled = V1NodeSelectorRequirement(
|
|
714
|
-
|
|
715
|
-
|
|
751
|
+
not_labeled = V1NodeSelectorRequirement(
|
|
752
|
+
key=label, operator="NotIn", values=values
|
|
753
|
+
)
|
|
716
754
|
required_selector_requirements.append(not_labeled)
|
|
717
755
|
for taint, values in self.tolerated_taints:
|
|
718
756
|
for value in values:
|
|
719
757
|
# Each toleration can tolerate one value
|
|
720
|
-
taint_ok = V1Toleration(key=taint,
|
|
721
|
-
value=value)
|
|
758
|
+
taint_ok = V1Toleration(key=taint, value=value)
|
|
722
759
|
tolerations.append(taint_ok)
|
|
723
760
|
|
|
724
761
|
# Now combine everything
|
|
@@ -732,16 +769,22 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
732
769
|
match_expressions=required_selector_requirements
|
|
733
770
|
)
|
|
734
771
|
# And a selector to hold the term
|
|
735
|
-
requirements_selector = V1NodeSelector(
|
|
772
|
+
requirements_selector = V1NodeSelector(
|
|
773
|
+
node_selector_terms=[requirements_term]
|
|
774
|
+
)
|
|
736
775
|
|
|
737
776
|
# Make an affinity that prefers the preferences and requires the requirements
|
|
738
777
|
node_affinity = V1NodeAffinity(
|
|
739
|
-
preferred_during_scheduling_ignored_during_execution=
|
|
740
|
-
|
|
778
|
+
preferred_during_scheduling_ignored_during_execution=(
|
|
779
|
+
preferred_scheduling_terms
|
|
780
|
+
if preferred_scheduling_terms
|
|
781
|
+
else None
|
|
782
|
+
),
|
|
783
|
+
required_during_scheduling_ignored_during_execution=requirements_selector,
|
|
741
784
|
)
|
|
742
785
|
|
|
743
786
|
# Apply the affinity
|
|
744
|
-
pod_spec.affinity = V1Affinity(node_affinity
|
|
787
|
+
pod_spec.affinity = V1Affinity(node_affinity=node_affinity)
|
|
745
788
|
|
|
746
789
|
if tolerations:
|
|
747
790
|
# Apply the tolerations
|
|
@@ -749,17 +792,22 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
749
792
|
|
|
750
793
|
def _check_accelerator_request(self, requirer: Requirer) -> None:
|
|
751
794
|
for accelerator in requirer.accelerators:
|
|
752
|
-
if accelerator[
|
|
795
|
+
if accelerator["kind"] != "gpu" and "model" not in accelerator:
|
|
753
796
|
# We can only provide GPUs or things with a model right now
|
|
754
|
-
raise InsufficientSystemResources(
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
797
|
+
raise InsufficientSystemResources(
|
|
798
|
+
requirer,
|
|
799
|
+
"accelerators",
|
|
800
|
+
details=[
|
|
801
|
+
f"The accelerator {accelerator} could not be provided.",
|
|
802
|
+
"The Toil Kubernetes batch system only knows how to request gpu accelerators or accelerators with a defined model.",
|
|
803
|
+
],
|
|
804
|
+
)
|
|
758
805
|
|
|
759
806
|
def _create_pod_spec(
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
807
|
+
self,
|
|
808
|
+
command: str,
|
|
809
|
+
job_desc: JobDescription,
|
|
810
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
763
811
|
) -> V1PodSpec:
|
|
764
812
|
"""
|
|
765
813
|
Make the specification for a pod that can execute the given job.
|
|
@@ -770,7 +818,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
770
818
|
environment.update(job_environment)
|
|
771
819
|
|
|
772
820
|
# Make a command to run it in the executor
|
|
773
|
-
command_list = pack_job(
|
|
821
|
+
command_list = pack_job(command, self.user_script, environment=environment)
|
|
774
822
|
|
|
775
823
|
# The Kubernetes API makes sense only in terms of the YAML format. Objects
|
|
776
824
|
# represent sections of the YAML files. Except from our point of view, all
|
|
@@ -786,9 +834,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
786
834
|
# OOMing. We also want to provision some extra space so that when
|
|
787
835
|
# we test _isPodStuckOOM we never get True unless the job has
|
|
788
836
|
# exceeded job_desc.memory.
|
|
789
|
-
requirements_dict = {
|
|
790
|
-
|
|
791
|
-
|
|
837
|
+
requirements_dict = {
|
|
838
|
+
"cpu": job_desc.cores,
|
|
839
|
+
"memory": job_desc.memory + 1024 * 1024 * 512,
|
|
840
|
+
"ephemeral-storage": job_desc.disk + 1024 * 1024 * 512,
|
|
841
|
+
}
|
|
792
842
|
|
|
793
843
|
# Also start on the placement constraints
|
|
794
844
|
placement = KubernetesBatchSystem.Placement()
|
|
@@ -798,19 +848,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
798
848
|
# Add in requirements for accelerators (GPUs).
|
|
799
849
|
# See https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
800
850
|
|
|
801
|
-
if accelerator[
|
|
851
|
+
if accelerator["kind"] == "gpu":
|
|
802
852
|
# We can't schedule GPUs without a brand, because the
|
|
803
853
|
# Kubernetes resources are <brand>.com/gpu. If no brand is
|
|
804
854
|
# specified, default to nvidia, which is very popular.
|
|
805
|
-
vendor = accelerator.get(
|
|
855
|
+
vendor = accelerator.get("brand", "nvidia")
|
|
806
856
|
key = f'{vendor}.com/{accelerator["kind"]}'
|
|
807
857
|
if key not in requirements_dict:
|
|
808
858
|
requirements_dict[key] = 0
|
|
809
|
-
requirements_dict[key] += accelerator[
|
|
859
|
+
requirements_dict[key] += accelerator["count"]
|
|
810
860
|
|
|
811
|
-
if
|
|
861
|
+
if "model" in accelerator:
|
|
812
862
|
# TODO: What if the cluster uses some other accelerator model labeling scheme?
|
|
813
|
-
placement.required_labels.append(
|
|
863
|
+
placement.required_labels.append(
|
|
864
|
+
("accelerator", [accelerator["model"]])
|
|
865
|
+
)
|
|
814
866
|
|
|
815
867
|
# TODO: Support AMD's labeling scheme: https://github.com/RadeonOpenCompute/k8s-device-plugin/tree/master/cmd/k8s-node-labeller
|
|
816
868
|
# That just has each trait of the accelerator as a separate label, but nothing that quite corresponds to a model.
|
|
@@ -822,14 +874,15 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
822
874
|
# the UCSC Kubernetes admins want it that way. For GPUs, Kubernetes
|
|
823
875
|
# requires them to be equal.
|
|
824
876
|
limits_dict = requests_dict
|
|
825
|
-
resources = V1ResourceRequirements(limits=limits_dict,
|
|
826
|
-
requests=requests_dict)
|
|
877
|
+
resources = V1ResourceRequirements(limits=limits_dict, requests=requests_dict)
|
|
827
878
|
|
|
828
879
|
# Collect volumes and mounts
|
|
829
880
|
volumes = []
|
|
830
881
|
mounts = []
|
|
831
882
|
|
|
832
|
-
def mount_host_path(
|
|
883
|
+
def mount_host_path(
|
|
884
|
+
volume_name: str, host_path: str, mount_path: str, create: bool = False
|
|
885
|
+
) -> None:
|
|
833
886
|
"""
|
|
834
887
|
Add a host path volume with the given name to mount the given path.
|
|
835
888
|
|
|
@@ -837,10 +890,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
837
890
|
not exist. Otherwise, when the directory does not exist, the
|
|
838
891
|
pod will wait for it to come into existence.
|
|
839
892
|
"""
|
|
840
|
-
volume_type =
|
|
893
|
+
volume_type = "DirectoryOrCreate" if create else "Directory"
|
|
841
894
|
volume_source = V1HostPathVolumeSource(path=host_path, type=volume_type)
|
|
842
|
-
volume = V1Volume(name=volume_name,
|
|
843
|
-
host_path=volume_source)
|
|
895
|
+
volume = V1Volume(name=volume_name, host_path=volume_source)
|
|
844
896
|
volumes.append(volume)
|
|
845
897
|
volume_mount = V1VolumeMount(mount_path=mount_path, name=volume_name)
|
|
846
898
|
mounts.append(volume_mount)
|
|
@@ -848,43 +900,63 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
848
900
|
if self.host_path is not None:
|
|
849
901
|
# Provision Toil WorkDir from a HostPath volume, to share with other pods.
|
|
850
902
|
# Create the directory if it doesn't exist already.
|
|
851
|
-
mount_host_path(
|
|
903
|
+
mount_host_path(
|
|
904
|
+
"workdir", self.host_path, self.worker_work_dir, create=True
|
|
905
|
+
)
|
|
852
906
|
# We also need to mount across /run/lock, where we will put
|
|
853
907
|
# per-node coordiantion info.
|
|
854
908
|
# Don't create this; it really should always exist.
|
|
855
|
-
mount_host_path(
|
|
909
|
+
mount_host_path("coordination", "/run/lock", "/run/lock")
|
|
856
910
|
else:
|
|
857
911
|
# Provision Toil WorkDir as an ephemeral volume
|
|
858
|
-
ephemeral_volume_name =
|
|
912
|
+
ephemeral_volume_name = "workdir"
|
|
859
913
|
ephemeral_volume_source = V1EmptyDirVolumeSource()
|
|
860
|
-
ephemeral_volume = V1Volume(
|
|
861
|
-
|
|
914
|
+
ephemeral_volume = V1Volume(
|
|
915
|
+
name=ephemeral_volume_name, empty_dir=ephemeral_volume_source
|
|
916
|
+
)
|
|
862
917
|
volumes.append(ephemeral_volume)
|
|
863
|
-
ephemeral_volume_mount = V1VolumeMount(
|
|
918
|
+
ephemeral_volume_mount = V1VolumeMount(
|
|
919
|
+
mount_path=self.worker_work_dir, name=ephemeral_volume_name
|
|
920
|
+
)
|
|
864
921
|
mounts.append(ephemeral_volume_mount)
|
|
865
922
|
# And don't share coordination directory
|
|
866
923
|
|
|
867
924
|
if self.aws_secret_name is not None:
|
|
868
925
|
# Also mount an AWS secret, if provided.
|
|
869
926
|
# TODO: make this generic somehow
|
|
870
|
-
secret_volume_name =
|
|
871
|
-
secret_volume_source = V1SecretVolumeSource(
|
|
872
|
-
|
|
873
|
-
|
|
927
|
+
secret_volume_name = "s3-credentials"
|
|
928
|
+
secret_volume_source = V1SecretVolumeSource(
|
|
929
|
+
secret_name=self.aws_secret_name
|
|
930
|
+
)
|
|
931
|
+
secret_volume = V1Volume(
|
|
932
|
+
name=secret_volume_name, secret=secret_volume_source
|
|
933
|
+
)
|
|
874
934
|
volumes.append(secret_volume)
|
|
875
|
-
secret_volume_mount = V1VolumeMount(
|
|
935
|
+
secret_volume_mount = V1VolumeMount(
|
|
936
|
+
mount_path="/root/.aws", name=secret_volume_name
|
|
937
|
+
)
|
|
876
938
|
mounts.append(secret_volume_mount)
|
|
877
939
|
|
|
878
940
|
# Make a container definition
|
|
879
|
-
container = V1Container(
|
|
880
|
-
|
|
881
|
-
|
|
882
|
-
|
|
883
|
-
|
|
941
|
+
container = V1Container(
|
|
942
|
+
command=command_list,
|
|
943
|
+
image=self.docker_image,
|
|
944
|
+
name="runner-container",
|
|
945
|
+
resources=resources,
|
|
946
|
+
volume_mounts=mounts,
|
|
947
|
+
)
|
|
948
|
+
|
|
949
|
+
# In case security context rules are not allowed to be set, we only apply
|
|
950
|
+
# a security context at all if we need to turn on privileged mode.
|
|
951
|
+
if self.config.kubernetes_privileged:
|
|
952
|
+
container.security_context = V1SecurityContext(
|
|
953
|
+
privileged=self.config.kubernetes_privileged
|
|
954
|
+
)
|
|
955
|
+
|
|
884
956
|
# Wrap the container in a spec
|
|
885
|
-
pod_spec = V1PodSpec(
|
|
886
|
-
|
|
887
|
-
|
|
957
|
+
pod_spec = V1PodSpec(
|
|
958
|
+
containers=[container], volumes=volumes, restart_policy="Never"
|
|
959
|
+
)
|
|
888
960
|
# Tell the spec where to land
|
|
889
961
|
placement.apply(pod_spec)
|
|
890
962
|
|
|
@@ -894,7 +966,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
894
966
|
|
|
895
967
|
return pod_spec
|
|
896
968
|
|
|
897
|
-
def _release_acquired_resources(
|
|
969
|
+
def _release_acquired_resources(
|
|
970
|
+
self, resources: list[int], notify: bool = False
|
|
971
|
+
) -> None:
|
|
898
972
|
"""
|
|
899
973
|
Release all resources acquired for a job.
|
|
900
974
|
|
|
@@ -913,10 +987,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
913
987
|
self._work_available.notify_all()
|
|
914
988
|
|
|
915
989
|
def _launch_job(
|
|
916
|
-
self,
|
|
917
|
-
job_name: str,
|
|
918
|
-
job_desc: JobDescription,
|
|
919
|
-
pod_spec: V1PodSpec
|
|
990
|
+
self, job_name: str, job_desc: JobDescription, pod_spec: V1PodSpec
|
|
920
991
|
) -> bool:
|
|
921
992
|
"""
|
|
922
993
|
Try to launch the given job to the Kubernetes cluster. Return False if
|
|
@@ -924,19 +995,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
924
995
|
"""
|
|
925
996
|
|
|
926
997
|
# Limit the amount of resources requested at a time.
|
|
927
|
-
resource_requests:
|
|
998
|
+
resource_requests: list[int] = [
|
|
999
|
+
1,
|
|
1000
|
+
int(job_desc.cores * 1000),
|
|
1001
|
+
job_desc.memory,
|
|
1002
|
+
job_desc.disk,
|
|
1003
|
+
]
|
|
928
1004
|
|
|
929
1005
|
acquired = []
|
|
930
1006
|
for source, request in zip(self.resource_sources, resource_requests):
|
|
931
1007
|
# For each kind of resource we want, go get it
|
|
932
|
-
assert
|
|
1008
|
+
assert isinstance(source, ResourcePool) and isinstance(request, int)
|
|
933
1009
|
if source.acquireNow(request):
|
|
934
1010
|
acquired.append(request)
|
|
935
1011
|
else:
|
|
936
1012
|
# We can't get everything
|
|
937
|
-
self._release_acquired_resources(
|
|
1013
|
+
self._release_acquired_resources(
|
|
1014
|
+
acquired,
|
|
938
1015
|
# Put it back quietly.
|
|
939
|
-
notify=False
|
|
1016
|
+
notify=False,
|
|
1017
|
+
)
|
|
940
1018
|
return False
|
|
941
1019
|
|
|
942
1020
|
self._acquired_resources[job_name] = acquired
|
|
@@ -945,9 +1023,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
945
1023
|
|
|
946
1024
|
# Make metadata to label the job/pod with info.
|
|
947
1025
|
# Don't let the cluster autoscaler evict any Toil jobs.
|
|
948
|
-
metadata = V1ObjectMeta(
|
|
949
|
-
|
|
950
|
-
|
|
1026
|
+
metadata = V1ObjectMeta(
|
|
1027
|
+
name=job_name,
|
|
1028
|
+
labels={"toil_run": self.run_id},
|
|
1029
|
+
annotations={"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"},
|
|
1030
|
+
)
|
|
951
1031
|
|
|
952
1032
|
# Wrap the spec in a template
|
|
953
1033
|
template = V1PodTemplateSpec(spec=pod_spec, metadata=metadata)
|
|
@@ -955,18 +1035,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
955
1035
|
# Make another spec for the job, asking to run the template with no
|
|
956
1036
|
# backoff/retry. Specify our own TTL to avoid catching the notice
|
|
957
1037
|
# of over-zealous abandoned job cleanup scripts.
|
|
958
|
-
job_spec = V1JobSpec(
|
|
959
|
-
|
|
960
|
-
|
|
1038
|
+
job_spec = V1JobSpec(
|
|
1039
|
+
template=template,
|
|
1040
|
+
backoff_limit=0,
|
|
1041
|
+
ttl_seconds_after_finished=self.finished_job_ttl,
|
|
1042
|
+
)
|
|
961
1043
|
|
|
962
1044
|
# And make the actual job
|
|
963
|
-
job = V1Job(
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
kind="Job")
|
|
1045
|
+
job = V1Job(
|
|
1046
|
+
spec=job_spec, metadata=metadata, api_version="batch/v1", kind="Job"
|
|
1047
|
+
)
|
|
967
1048
|
|
|
968
1049
|
# Launch the job
|
|
969
|
-
launched = self._api(
|
|
1050
|
+
launched = self._api("batch", errors=[]).create_namespaced_job(
|
|
1051
|
+
self.namespace, job
|
|
1052
|
+
)
|
|
970
1053
|
|
|
971
1054
|
logger.debug(f"Launched job: {job_name}")
|
|
972
1055
|
|
|
@@ -974,10 +1057,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
974
1057
|
|
|
975
1058
|
def _delete_job(
|
|
976
1059
|
self,
|
|
977
|
-
job_name: str,
|
|
1060
|
+
job_name: str,
|
|
1061
|
+
*,
|
|
978
1062
|
propagation_policy: Literal["Foreground", "Background"] = "Foreground",
|
|
979
1063
|
gone_ok: bool = False,
|
|
980
|
-
resource_notify: bool = True
|
|
1064
|
+
resource_notify: bool = True,
|
|
981
1065
|
) -> None:
|
|
982
1066
|
"""
|
|
983
1067
|
Given the name of a kubernetes job, delete the job and release all
|
|
@@ -990,11 +1074,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
990
1074
|
the self._work_available condition.
|
|
991
1075
|
"""
|
|
992
1076
|
try:
|
|
993
|
-
logger.debug(f
|
|
994
|
-
self._api(
|
|
995
|
-
job_name,
|
|
996
|
-
self.namespace,
|
|
997
|
-
propagation_policy=propagation_policy
|
|
1077
|
+
logger.debug(f"Deleting Kubernetes job {job_name}")
|
|
1078
|
+
self._api("batch", errors=[404] if gone_ok else []).delete_namespaced_job(
|
|
1079
|
+
job_name, self.namespace, propagation_policy=propagation_policy
|
|
998
1080
|
)
|
|
999
1081
|
finally:
|
|
1000
1082
|
# We should always release the acquired resources.
|
|
@@ -1005,9 +1087,14 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1005
1087
|
self._release_acquired_resources(resources, notify=resource_notify)
|
|
1006
1088
|
del self._acquired_resources[job_name]
|
|
1007
1089
|
|
|
1008
|
-
def issueBatchJob(
|
|
1090
|
+
def issueBatchJob(
|
|
1091
|
+
self,
|
|
1092
|
+
command: str,
|
|
1093
|
+
job_desc: JobDescription,
|
|
1094
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
1095
|
+
) -> int:
|
|
1009
1096
|
# Try the job as local
|
|
1010
|
-
localID = self.handleLocalJob(job_desc)
|
|
1097
|
+
localID = self.handleLocalJob(command, job_desc)
|
|
1011
1098
|
if localID is not None:
|
|
1012
1099
|
# It is a local job
|
|
1013
1100
|
return localID
|
|
@@ -1018,7 +1105,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1018
1105
|
self.check_resource_request(job_desc)
|
|
1019
1106
|
|
|
1020
1107
|
# Make a pod that describes running the job
|
|
1021
|
-
pod_spec = self._create_pod_spec(
|
|
1108
|
+
pod_spec = self._create_pod_spec(
|
|
1109
|
+
command, job_desc, job_environment=job_environment
|
|
1110
|
+
)
|
|
1022
1111
|
|
|
1023
1112
|
# Make a batch system scope job ID
|
|
1024
1113
|
job_id = self.getNextJobID()
|
|
@@ -1046,6 +1135,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1046
1135
|
kwargs, so we can't just set unused ones to None. But we also don't
|
|
1047
1136
|
want to duplicate code for every combination of possible present keys.
|
|
1048
1137
|
"""
|
|
1138
|
+
|
|
1049
1139
|
_continue: NotRequired[str]
|
|
1050
1140
|
label_selector: NotRequired[str]
|
|
1051
1141
|
field_selector: NotRequired[str]
|
|
@@ -1075,30 +1165,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1075
1165
|
token = None
|
|
1076
1166
|
|
|
1077
1167
|
while True:
|
|
1078
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1168
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1169
|
+
"label_selector": f"toil_run={self.run_id}"
|
|
1170
|
+
}
|
|
1079
1171
|
|
|
1080
1172
|
if onlySucceeded:
|
|
1081
|
-
kwargs[
|
|
1173
|
+
kwargs["field_selector"] = "status.successful==1"
|
|
1082
1174
|
|
|
1083
1175
|
if token is not None:
|
|
1084
|
-
kwargs[
|
|
1176
|
+
kwargs["_continue"] = token
|
|
1085
1177
|
|
|
1086
|
-
results = self._api(
|
|
1087
|
-
self.namespace,
|
|
1088
|
-
**kwargs
|
|
1178
|
+
results = self._api("batch", errors=[]).list_namespaced_job(
|
|
1179
|
+
self.namespace, **kwargs
|
|
1089
1180
|
)
|
|
1090
|
-
|
|
1181
|
+
|
|
1091
1182
|
# These jobs belong to us
|
|
1092
1183
|
yield from (j for j in results.items if not self._is_deleted(j))
|
|
1093
1184
|
|
|
1094
1185
|
# Remember the continuation token, if any
|
|
1095
|
-
token = getattr(results.metadata,
|
|
1186
|
+
token = getattr(results.metadata, "continue", None)
|
|
1096
1187
|
|
|
1097
1188
|
if token is None:
|
|
1098
1189
|
# There isn't one. We got everything.
|
|
1099
1190
|
break
|
|
1100
1191
|
|
|
1101
|
-
|
|
1102
1192
|
def _ourPodObject(self) -> Iterator[V1Pod]:
|
|
1103
1193
|
"""
|
|
1104
1194
|
Yield Kubernetes V1Pod objects that we are responsible for that the
|
|
@@ -1108,25 +1198,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1108
1198
|
token = None
|
|
1109
1199
|
|
|
1110
1200
|
while True:
|
|
1111
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1201
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1202
|
+
"label_selector": f"toil_run={self.run_id}"
|
|
1203
|
+
}
|
|
1112
1204
|
|
|
1113
1205
|
if token is not None:
|
|
1114
|
-
kwargs[
|
|
1206
|
+
kwargs["_continue"] = token
|
|
1115
1207
|
|
|
1116
|
-
results = self._api(
|
|
1117
|
-
self.namespace,
|
|
1118
|
-
**kwargs
|
|
1208
|
+
results = self._api("core", errors=[]).list_namespaced_pod(
|
|
1209
|
+
self.namespace, **kwargs
|
|
1119
1210
|
)
|
|
1120
1211
|
|
|
1121
1212
|
yield from (j for j in results.items if not self._is_deleted(j))
|
|
1122
1213
|
# Remember the continuation token, if any
|
|
1123
|
-
token = getattr(results.metadata,
|
|
1214
|
+
token = getattr(results.metadata, "continue", None)
|
|
1124
1215
|
|
|
1125
1216
|
if token is None:
|
|
1126
1217
|
# There isn't one. We got everything.
|
|
1127
1218
|
break
|
|
1128
1219
|
|
|
1129
|
-
|
|
1130
1220
|
def _getPodForJob(self, jobObject: V1Job) -> Optional[V1Pod]:
|
|
1131
1221
|
"""
|
|
1132
1222
|
Get the pod that belongs to the given job, or None if the job's pod is
|
|
@@ -1140,22 +1230,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1140
1230
|
"""
|
|
1141
1231
|
|
|
1142
1232
|
# Make sure the job has the fields we need
|
|
1143
|
-
assert
|
|
1233
|
+
assert jobObject.metadata is not None
|
|
1144
1234
|
|
|
1145
1235
|
token = None
|
|
1146
1236
|
|
|
1147
1237
|
while True:
|
|
1148
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1238
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1239
|
+
"label_selector": f"job-name={jobObject.metadata.name}"
|
|
1240
|
+
}
|
|
1149
1241
|
if token is not None:
|
|
1150
|
-
kwargs[
|
|
1151
|
-
results = self._api(
|
|
1242
|
+
kwargs["_continue"] = token
|
|
1243
|
+
results = self._api("core", errors=[]).list_namespaced_pod(
|
|
1244
|
+
self.namespace, **kwargs
|
|
1245
|
+
)
|
|
1152
1246
|
|
|
1153
1247
|
for pod in results.items:
|
|
1154
1248
|
# Return the first pod we find
|
|
1155
1249
|
return pod
|
|
1156
1250
|
|
|
1157
1251
|
# Remember the continuation token, if any
|
|
1158
|
-
token = getattr(results.metadata,
|
|
1252
|
+
token = getattr(results.metadata, "continue", None)
|
|
1159
1253
|
|
|
1160
1254
|
if token is None:
|
|
1161
1255
|
# There isn't one. We got everything.
|
|
@@ -1179,12 +1273,13 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1179
1273
|
assert podObject.metadata is not None
|
|
1180
1274
|
assert podObject.metadata.name is not None
|
|
1181
1275
|
|
|
1182
|
-
return self._api(
|
|
1183
|
-
podObject.metadata.name,
|
|
1184
|
-
namespace=self.namespace
|
|
1276
|
+
return self._api("core", errors=[]).read_namespaced_pod_log(
|
|
1277
|
+
podObject.metadata.name, namespace=self.namespace
|
|
1185
1278
|
)
|
|
1186
1279
|
|
|
1187
|
-
def _isPodStuckOOM(
|
|
1280
|
+
def _isPodStuckOOM(
|
|
1281
|
+
self, podObject: V1Pod, minFreeBytes: float = 1024 * 1024 * 2
|
|
1282
|
+
) -> bool:
|
|
1188
1283
|
"""
|
|
1189
1284
|
Poll the current memory usage for the pod from the cluster.
|
|
1190
1285
|
|
|
@@ -1214,14 +1309,18 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1214
1309
|
assert podObject.metadata.name is not None
|
|
1215
1310
|
|
|
1216
1311
|
# Compose a query to get just the pod we care about
|
|
1217
|
-
query = f
|
|
1312
|
+
query = f"metadata.name={podObject.metadata.name}"
|
|
1218
1313
|
|
|
1219
1314
|
# Look for it, but manage our own exceptions
|
|
1220
1315
|
try:
|
|
1221
1316
|
# TODO: When the Kubernetes Python API actually wraps the metrics API, switch to that
|
|
1222
|
-
response = self._api(
|
|
1223
|
-
|
|
1224
|
-
|
|
1317
|
+
response = self._api("customObjects").list_namespaced_custom_object(
|
|
1318
|
+
"metrics.k8s.io",
|
|
1319
|
+
"v1beta1",
|
|
1320
|
+
self.namespace,
|
|
1321
|
+
"pods",
|
|
1322
|
+
field_selector=query,
|
|
1323
|
+
)
|
|
1225
1324
|
except Exception as e:
|
|
1226
1325
|
# We couldn't talk to the metrics service on this attempt. We don't
|
|
1227
1326
|
# retry, but we also don't want to just ignore all errors. We only
|
|
@@ -1237,7 +1336,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1237
1336
|
raise
|
|
1238
1337
|
|
|
1239
1338
|
# Pull out the items
|
|
1240
|
-
items = response.get(
|
|
1339
|
+
items = response.get("items", [])
|
|
1241
1340
|
|
|
1242
1341
|
if len(items) == 0:
|
|
1243
1342
|
# If there's no statistics we can't say we're stuck OOM
|
|
@@ -1246,7 +1345,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1246
1345
|
# Assume the first result is the right one, because of the selector.
|
|
1247
1346
|
# That means we don't need to bother with _continue.
|
|
1248
1347
|
# Assume it has exactly one pod, because we made it.
|
|
1249
|
-
containers = items[0].get(
|
|
1348
|
+
containers = items[0].get("containers", [{}])
|
|
1250
1349
|
|
|
1251
1350
|
if len(containers) == 0:
|
|
1252
1351
|
# If there are no containers (because none have started yet?), we can't say we're stuck OOM
|
|
@@ -1255,26 +1354,37 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1255
1354
|
# Otherwise, assume it just has one container.
|
|
1256
1355
|
# Grab the memory usage string, like 123Ki, and convert to bytes.
|
|
1257
1356
|
# If anything is missing, assume 0 bytes used.
|
|
1258
|
-
bytesUsed = human2bytes(containers[0].get(
|
|
1357
|
+
bytesUsed = human2bytes(containers[0].get("usage", {}).get("memory", "0"))
|
|
1259
1358
|
|
|
1260
1359
|
# Also get the limit out of the pod object's spec
|
|
1261
1360
|
assert podObject.spec is not None
|
|
1262
1361
|
assert len(podObject.spec.containers) > 0
|
|
1263
1362
|
assert podObject.spec.containers[0].resources is not None
|
|
1264
1363
|
assert podObject.spec.containers[0].resources.limits is not None
|
|
1265
|
-
assert
|
|
1266
|
-
bytesAllowed = human2bytes(
|
|
1364
|
+
assert "memory" in podObject.spec.containers[0].resources.limits
|
|
1365
|
+
bytesAllowed = human2bytes(
|
|
1366
|
+
podObject.spec.containers[0].resources.limits["memory"]
|
|
1367
|
+
)
|
|
1267
1368
|
|
|
1268
1369
|
if bytesAllowed - bytesUsed < minFreeBytes:
|
|
1269
1370
|
# This is too much!
|
|
1270
|
-
logger.warning(
|
|
1271
|
-
|
|
1371
|
+
logger.warning(
|
|
1372
|
+
"Pod %s has used %d of %d bytes of memory; reporting as stuck due to OOM.",
|
|
1373
|
+
podObject.metadata.name,
|
|
1374
|
+
bytesUsed,
|
|
1375
|
+
bytesAllowed,
|
|
1376
|
+
)
|
|
1272
1377
|
|
|
1273
1378
|
return True
|
|
1274
1379
|
else:
|
|
1275
1380
|
return False
|
|
1276
1381
|
|
|
1277
|
-
def _isPodStuckWaiting(
|
|
1382
|
+
def _isPodStuckWaiting(
|
|
1383
|
+
self,
|
|
1384
|
+
pod_object: V1Pod,
|
|
1385
|
+
reason: Optional[str] = None,
|
|
1386
|
+
timeout: Optional[float] = None,
|
|
1387
|
+
) -> bool:
|
|
1278
1388
|
"""
|
|
1279
1389
|
Return True if the pod looks to be in a waiting state, and false otherwise.
|
|
1280
1390
|
|
|
@@ -1298,7 +1408,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1298
1408
|
# Can't be stuck
|
|
1299
1409
|
return False
|
|
1300
1410
|
|
|
1301
|
-
waiting_info = getattr(
|
|
1411
|
+
waiting_info = getattr(
|
|
1412
|
+
getattr(container_statuses[0], "state", None), "waiting", None
|
|
1413
|
+
)
|
|
1302
1414
|
if waiting_info is None:
|
|
1303
1415
|
# Pod is not waiting
|
|
1304
1416
|
return False
|
|
@@ -1307,15 +1419,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1307
1419
|
# Pod fails reason filter
|
|
1308
1420
|
return False
|
|
1309
1421
|
|
|
1310
|
-
start_time = getattr(pod_object.status,
|
|
1311
|
-
if timeout is not None and (
|
|
1422
|
+
start_time = getattr(pod_object.status, "start_time", None)
|
|
1423
|
+
if timeout is not None and (
|
|
1424
|
+
start_time is None or (utc_now() - start_time).total_seconds() < timeout
|
|
1425
|
+
):
|
|
1312
1426
|
# It hasn't been waiting too long, or we care but don't know how
|
|
1313
1427
|
# long it has been waiting
|
|
1314
1428
|
return False
|
|
1315
1429
|
|
|
1316
1430
|
return True
|
|
1317
1431
|
|
|
1318
|
-
def _is_deleted(self, kube_thing: Union[
|
|
1432
|
+
def _is_deleted(self, kube_thing: Union["V1Job", "V1Pod"]) -> bool:
|
|
1319
1433
|
"""
|
|
1320
1434
|
Determine if a job or pod is in the process od being deleted, and
|
|
1321
1435
|
shouldn't count anymore.
|
|
@@ -1324,7 +1438,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1324
1438
|
# Kubernetes "Terminating" is the same as having the deletion_timestamp
|
|
1325
1439
|
# set in the metadata of the object.
|
|
1326
1440
|
|
|
1327
|
-
deletion_timestamp: Optional[datetime.datetime] = getattr(
|
|
1441
|
+
deletion_timestamp: Optional[datetime.datetime] = getattr(
|
|
1442
|
+
getattr(kube_thing, "metadata", None), "deletion_timestamp", None
|
|
1443
|
+
)
|
|
1328
1444
|
# If the deletion timestamp is set to anything, it is in the process of
|
|
1329
1445
|
# being deleted. We will treat that as as good as gone.
|
|
1330
1446
|
return deletion_timestamp is not None
|
|
@@ -1341,8 +1457,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1341
1457
|
|
|
1342
1458
|
assert jobObject.metadata is not None
|
|
1343
1459
|
assert jobObject.metadata.name is not None
|
|
1344
|
-
return int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1345
|
-
|
|
1460
|
+
return int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1346
1461
|
|
|
1347
1462
|
def getUpdatedBatchJob(self, maxWait: float) -> Optional[UpdatedBatchJobInfo]:
|
|
1348
1463
|
|
|
@@ -1358,22 +1473,27 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1358
1473
|
# Otherwise we need to maybe wait.
|
|
1359
1474
|
if self.enable_watching and maxWait >= 1:
|
|
1360
1475
|
# We can try a watch. Watches can only work in whole seconds.
|
|
1361
|
-
for event in self._stream_until_error(
|
|
1362
|
-
|
|
1363
|
-
|
|
1364
|
-
|
|
1476
|
+
for event in self._stream_until_error(
|
|
1477
|
+
self._api("batch").list_namespaced_job,
|
|
1478
|
+
self.namespace,
|
|
1479
|
+
label_selector=f"toil_run={self.run_id}",
|
|
1480
|
+
timeout_seconds=math.floor(maxWait),
|
|
1481
|
+
):
|
|
1365
1482
|
# Grab the metadata data, ID, the list of conditions of the current job, and the total pods
|
|
1366
|
-
jobObject = event[
|
|
1367
|
-
|
|
1483
|
+
jobObject = event["object"]
|
|
1484
|
+
|
|
1368
1485
|
if self._is_deleted(jobObject):
|
|
1369
1486
|
# Job is already deleted, so ignore it.
|
|
1370
|
-
logger.warning(
|
|
1487
|
+
logger.warning(
|
|
1488
|
+
"Kubernetes job %s is deleted; ignore its update",
|
|
1489
|
+
getattr(getattr(jobObject, "metadata", None), "name", None),
|
|
1490
|
+
)
|
|
1371
1491
|
continue
|
|
1372
|
-
|
|
1492
|
+
|
|
1373
1493
|
assert jobObject.metadata is not None
|
|
1374
1494
|
assert jobObject.metadata.name is not None
|
|
1375
|
-
|
|
1376
|
-
jobID = int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1495
|
+
|
|
1496
|
+
jobID = int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1377
1497
|
if jobObject.status is None:
|
|
1378
1498
|
# Can't tell what is up with this job.
|
|
1379
1499
|
continue
|
|
@@ -1383,7 +1503,10 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1383
1503
|
failed_pods = jobObject.status.failed or 0
|
|
1384
1504
|
# Fetch out the condition object that has info about how the job is going.
|
|
1385
1505
|
condition: Optional[V1JobCondition] = None
|
|
1386
|
-
if
|
|
1506
|
+
if (
|
|
1507
|
+
jobObject.status.conditions is not None
|
|
1508
|
+
and len(jobObject.status.conditions) > 0
|
|
1509
|
+
):
|
|
1387
1510
|
condition = jobObject.status.conditions[0]
|
|
1388
1511
|
|
|
1389
1512
|
totalPods = active_pods + succeeded_pods + failed_pods
|
|
@@ -1393,14 +1516,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1393
1516
|
|
|
1394
1517
|
# Check if there are any active pods
|
|
1395
1518
|
if active_pods > 0:
|
|
1396
|
-
logger.info(
|
|
1519
|
+
logger.info(
|
|
1520
|
+
"%s has %d pods running" % jobObject.metadata.name, active_pods
|
|
1521
|
+
)
|
|
1397
1522
|
continue
|
|
1398
1523
|
elif succeeded_pods > 0 or failed_pods > 0:
|
|
1399
1524
|
# No more active pods in the current job ; must be finished
|
|
1400
|
-
logger.info(
|
|
1401
|
-
|
|
1525
|
+
logger.info(
|
|
1526
|
+
"%s RESULTS -> Succeeded: %d Failed:%d Active:%d"
|
|
1527
|
+
% jobObject.metadata.name,
|
|
1528
|
+
succeeded_pods,
|
|
1529
|
+
failed_pods,
|
|
1530
|
+
active_pods,
|
|
1531
|
+
)
|
|
1402
1532
|
# Log out success/failure given a reason
|
|
1403
|
-
logger.info(
|
|
1533
|
+
logger.info(
|
|
1534
|
+
"%s REASON: %s",
|
|
1535
|
+
getattr(condition, "type", None),
|
|
1536
|
+
getattr(condition, "reason", None),
|
|
1537
|
+
)
|
|
1404
1538
|
|
|
1405
1539
|
# Log out reason of failure and pod exit code
|
|
1406
1540
|
if failed_pods > 0:
|
|
@@ -1410,22 +1544,40 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1410
1544
|
if condition is not None:
|
|
1411
1545
|
logger.warning("Failed Job Message: %s", condition.message)
|
|
1412
1546
|
pod = self._getPodForJob(jobObject)
|
|
1413
|
-
statuses:
|
|
1414
|
-
|
|
1547
|
+
statuses: list[V1ContainerStatus] = getattr(
|
|
1548
|
+
getattr(pod, "status", None), "container_statuses", []
|
|
1549
|
+
)
|
|
1550
|
+
if (
|
|
1551
|
+
len(statuses) > 0
|
|
1552
|
+
and statuses[0].state is not None
|
|
1553
|
+
and statuses[0].state.terminated is not None
|
|
1554
|
+
):
|
|
1415
1555
|
exitCode = statuses[0].state.terminated.exit_code
|
|
1416
1556
|
|
|
1417
1557
|
raw_runtime = 0.0
|
|
1418
|
-
if
|
|
1419
|
-
|
|
1558
|
+
if (
|
|
1559
|
+
jobObject.status.completion_time is not None
|
|
1560
|
+
and jobObject.status.start_time is not None
|
|
1561
|
+
):
|
|
1562
|
+
raw_runtime = (
|
|
1563
|
+
jobObject.status.completion_time
|
|
1564
|
+
- jobObject.status.start_time
|
|
1565
|
+
).total_seconds()
|
|
1420
1566
|
runtime = slow_down(raw_runtime)
|
|
1421
|
-
result = UpdatedBatchJobInfo(
|
|
1567
|
+
result = UpdatedBatchJobInfo(
|
|
1568
|
+
jobID=jobID,
|
|
1569
|
+
exitStatus=exitCode,
|
|
1570
|
+
wallTime=runtime,
|
|
1571
|
+
exitReason=exitReason,
|
|
1572
|
+
)
|
|
1422
1573
|
|
|
1423
|
-
if (exitReason == BatchJobExitReason.FAILED) or (
|
|
1574
|
+
if (exitReason == BatchJobExitReason.FAILED) or (
|
|
1575
|
+
succeeded_pods + failed_pods == totalPods
|
|
1576
|
+
):
|
|
1424
1577
|
# Cleanup if job is all finished or there was a pod that failed
|
|
1425
1578
|
# TODO: use delete_job() to release acquired resources
|
|
1426
1579
|
self._delete_job(
|
|
1427
|
-
jobObject.metadata.name,
|
|
1428
|
-
propagation_policy='Foreground'
|
|
1580
|
+
jobObject.metadata.name, propagation_policy="Foreground"
|
|
1429
1581
|
)
|
|
1430
1582
|
# Make sure the job is deleted so we won't see it again.
|
|
1431
1583
|
self._waitForJobDeath(jobObject.metadata.name)
|
|
@@ -1433,12 +1585,19 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1433
1585
|
continue
|
|
1434
1586
|
else:
|
|
1435
1587
|
# Job is not running/updating ; no active, successful, or failed pods yet
|
|
1436
|
-
logger.debug(
|
|
1588
|
+
logger.debug(
|
|
1589
|
+
"Job {} -> {}".format(
|
|
1590
|
+
jobObject.metadata.name, getattr(condition, "reason", None)
|
|
1591
|
+
)
|
|
1592
|
+
)
|
|
1437
1593
|
# Pod could be pending; don't say it's lost.
|
|
1438
1594
|
continue
|
|
1439
1595
|
else:
|
|
1440
1596
|
# Try polling instead
|
|
1441
|
-
while
|
|
1597
|
+
while (
|
|
1598
|
+
result is None
|
|
1599
|
+
and (datetime.datetime.now() - entry).total_seconds() < maxWait
|
|
1600
|
+
):
|
|
1442
1601
|
# We still have nothing and we haven't hit the timeout.
|
|
1443
1602
|
|
|
1444
1603
|
# Poll
|
|
@@ -1446,12 +1605,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1446
1605
|
|
|
1447
1606
|
if result is None:
|
|
1448
1607
|
# Still nothing. Wait a second, or some fraction of our max wait time.
|
|
1449
|
-
time.sleep(min(maxWait/2, 1.0))
|
|
1608
|
+
time.sleep(min(maxWait / 2, 1.0))
|
|
1450
1609
|
|
|
1451
1610
|
# When we get here, either we found something or we ran out of time
|
|
1452
1611
|
return result
|
|
1453
1612
|
|
|
1454
|
-
|
|
1455
1613
|
def _getUpdatedBatchJobImmediately(self) -> Optional[UpdatedBatchJobInfo]:
|
|
1456
1614
|
"""
|
|
1457
1615
|
Return None if no updated (completed or failed) batch job is currently
|
|
@@ -1475,25 +1633,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1475
1633
|
# Find a job that is done, failed, or stuck
|
|
1476
1634
|
jobObject = None
|
|
1477
1635
|
# Put 'done', 'failed', or 'stuck' here
|
|
1478
|
-
chosenFor =
|
|
1636
|
+
chosenFor = ""
|
|
1479
1637
|
|
|
1480
1638
|
for j in self._ourJobObject(onlySucceeded=True):
|
|
1481
1639
|
# Look for succeeded jobs because that's the only filter Kubernetes has
|
|
1482
1640
|
jobObject = j
|
|
1483
|
-
chosenFor =
|
|
1641
|
+
chosenFor = "done"
|
|
1484
1642
|
|
|
1485
1643
|
if jobObject is None:
|
|
1486
1644
|
for j in self._ourJobObject():
|
|
1487
1645
|
# If there aren't any succeeded jobs, scan all jobs
|
|
1488
1646
|
# See how many times each failed
|
|
1489
|
-
failCount = getattr(j.status,
|
|
1647
|
+
failCount = getattr(j.status, "failed", 0)
|
|
1490
1648
|
if failCount is None:
|
|
1491
1649
|
# Make sure it is an int
|
|
1492
1650
|
failCount = 0
|
|
1493
1651
|
if failCount > 0:
|
|
1494
1652
|
# Take the first failed one you find
|
|
1495
1653
|
jobObject = j
|
|
1496
|
-
chosenFor =
|
|
1654
|
+
chosenFor = "failed"
|
|
1497
1655
|
break
|
|
1498
1656
|
|
|
1499
1657
|
if jobObject is None:
|
|
@@ -1506,23 +1664,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1506
1664
|
continue
|
|
1507
1665
|
|
|
1508
1666
|
# Containers can get stuck in Waiting with reason ImagePullBackOff
|
|
1509
|
-
if self._isPodStuckWaiting(pod, reason=
|
|
1667
|
+
if self._isPodStuckWaiting(pod, reason="ImagePullBackoff"):
|
|
1510
1668
|
# Assume it will never finish, even if the registry comes back or whatever.
|
|
1511
1669
|
# We can get into this state when we send in a non-existent image.
|
|
1512
1670
|
# See https://github.com/kubernetes/kubernetes/issues/58384
|
|
1513
1671
|
jobObject = j
|
|
1514
|
-
chosenFor =
|
|
1515
|
-
logger.warning(
|
|
1516
|
-
|
|
1672
|
+
chosenFor = "stuck"
|
|
1673
|
+
logger.warning(
|
|
1674
|
+
"Failing stuck job (ImagePullBackoff); did you try to run a non-existent Docker image?"
|
|
1675
|
+
" Check TOIL_APPLIANCE_SELF."
|
|
1676
|
+
)
|
|
1517
1677
|
break
|
|
1518
1678
|
|
|
1519
1679
|
# Containers can also get stuck in Waiting with reason
|
|
1520
1680
|
# ContainerCreating, if for example their mounts don't work.
|
|
1521
|
-
if self._isPodStuckWaiting(
|
|
1681
|
+
if self._isPodStuckWaiting(
|
|
1682
|
+
pod, reason="ContainerCreating", timeout=self.pod_timeout
|
|
1683
|
+
):
|
|
1522
1684
|
# Assume that it will never finish.
|
|
1523
1685
|
jobObject = j
|
|
1524
|
-
chosenFor =
|
|
1525
|
-
logger.warning(
|
|
1686
|
+
chosenFor = "stuck"
|
|
1687
|
+
logger.warning(
|
|
1688
|
+
"Failing stuck job (ContainerCreating longer than %s seconds); did you try to mount something impossible?",
|
|
1689
|
+
self.pod_timeout,
|
|
1690
|
+
)
|
|
1526
1691
|
break
|
|
1527
1692
|
|
|
1528
1693
|
# Pods can also get stuck nearly but not quite out of memory,
|
|
@@ -1532,7 +1697,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1532
1697
|
# We found a job that probably should be OOM! Report it as stuck.
|
|
1533
1698
|
# Polling function takes care of the logging.
|
|
1534
1699
|
jobObject = j
|
|
1535
|
-
chosenFor =
|
|
1700
|
+
chosenFor = "stuck"
|
|
1536
1701
|
break
|
|
1537
1702
|
|
|
1538
1703
|
if jobObject is None:
|
|
@@ -1540,25 +1705,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1540
1705
|
return None
|
|
1541
1706
|
else:
|
|
1542
1707
|
# We actually have something
|
|
1543
|
-
logger.debug(
|
|
1544
|
-
|
|
1708
|
+
logger.debug(
|
|
1709
|
+
"Identified stopped Kubernetes job %s as %s",
|
|
1710
|
+
getattr(jobObject.metadata, "name", None),
|
|
1711
|
+
chosenFor,
|
|
1712
|
+
)
|
|
1545
1713
|
|
|
1546
1714
|
# Otherwise we got something.
|
|
1547
1715
|
|
|
1548
1716
|
# Work out what the job's ID was (whatever came after our name prefix)
|
|
1549
1717
|
assert jobObject.metadata is not None
|
|
1550
1718
|
assert jobObject.metadata.name is not None
|
|
1551
|
-
jobID = int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1719
|
+
jobID = int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1552
1720
|
|
|
1553
1721
|
# Grab the pod
|
|
1554
1722
|
pod = self._getPodForJob(jobObject)
|
|
1555
1723
|
|
|
1556
1724
|
if pod is not None:
|
|
1557
|
-
if chosenFor ==
|
|
1725
|
+
if chosenFor == "done" or chosenFor == "failed":
|
|
1558
1726
|
# The job actually finished or failed
|
|
1559
1727
|
|
|
1560
1728
|
# Get the statuses of the pod's containers
|
|
1561
|
-
containerStatuses = getattr(
|
|
1729
|
+
containerStatuses = getattr(
|
|
1730
|
+
getattr(pod, "status", None), "container_statuses", None
|
|
1731
|
+
)
|
|
1562
1732
|
|
|
1563
1733
|
# Get when the pod started (reached the Kubelet) as a datetime
|
|
1564
1734
|
start_time = self._get_start_time(pod, jobObject)
|
|
@@ -1568,18 +1738,24 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1568
1738
|
# This happens when a pod is "Scheduled". But how could a
|
|
1569
1739
|
# 'done' or 'failed' pod be merely "Scheduled"?
|
|
1570
1740
|
# Complain so we can find out.
|
|
1571
|
-
logger.warning(
|
|
1572
|
-
|
|
1741
|
+
logger.warning(
|
|
1742
|
+
"Exit code and runtime unavailable; pod has no container statuses"
|
|
1743
|
+
)
|
|
1744
|
+
logger.warning("Pod: %s", str(pod))
|
|
1573
1745
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1574
1746
|
# Say it stopped now and started when it was scheduled/submitted.
|
|
1575
1747
|
# We still need a strictly positive runtime.
|
|
1576
1748
|
runtime = slow_down((utc_now() - start_time).total_seconds())
|
|
1577
1749
|
else:
|
|
1578
1750
|
# Get the termination info from the pod's main (only) container
|
|
1579
|
-
terminatedInfo = getattr(
|
|
1751
|
+
terminatedInfo = getattr(
|
|
1752
|
+
getattr(containerStatuses[0], "state", None), "terminated", None
|
|
1753
|
+
)
|
|
1580
1754
|
if terminatedInfo is None:
|
|
1581
|
-
logger.warning(
|
|
1582
|
-
|
|
1755
|
+
logger.warning(
|
|
1756
|
+
"Exit code and runtime unavailable; pod stopped without container terminating"
|
|
1757
|
+
)
|
|
1758
|
+
logger.warning("Pod: %s", str(pod))
|
|
1583
1759
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1584
1760
|
# Say it stopped now and started when it was scheduled/submitted.
|
|
1585
1761
|
# We still need a strictly positive runtime.
|
|
@@ -1594,34 +1770,42 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1594
1770
|
# created. And we need to look at the pod's end time
|
|
1595
1771
|
# because the job only gets a completion time if
|
|
1596
1772
|
# successful.
|
|
1597
|
-
runtime = slow_down(
|
|
1598
|
-
|
|
1773
|
+
runtime = slow_down(
|
|
1774
|
+
(terminatedInfo.finished_at - start_time).total_seconds()
|
|
1775
|
+
)
|
|
1599
1776
|
|
|
1600
|
-
if chosenFor ==
|
|
1777
|
+
if chosenFor == "failed":
|
|
1601
1778
|
# Warn the user with the failed pod's log
|
|
1602
1779
|
# TODO: cut this down somehow?
|
|
1603
|
-
logger.warning(
|
|
1780
|
+
logger.warning(
|
|
1781
|
+
"Log from failed pod: %s", self._getLogForPod(pod)
|
|
1782
|
+
)
|
|
1604
1783
|
|
|
1605
1784
|
else:
|
|
1606
1785
|
# The job has gotten stuck
|
|
1607
1786
|
|
|
1608
|
-
assert chosenFor ==
|
|
1787
|
+
assert chosenFor == "stuck"
|
|
1609
1788
|
|
|
1610
1789
|
# Synthesize an exit code
|
|
1611
1790
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1612
1791
|
# Say it ran from when the job was submitted to when the pod got stuck
|
|
1613
|
-
runtime = slow_down(
|
|
1792
|
+
runtime = slow_down(
|
|
1793
|
+
(utc_now() - self._get_start_time(job=jobObject)).total_seconds()
|
|
1794
|
+
)
|
|
1614
1795
|
else:
|
|
1615
1796
|
# The pod went away from under the job.
|
|
1616
|
-
logging.warning(
|
|
1797
|
+
logging.warning("Exit code and runtime unavailable; pod vanished")
|
|
1617
1798
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1618
1799
|
# Say it ran from when the job was submitted to when the pod vanished
|
|
1619
|
-
runtime = slow_down(
|
|
1620
|
-
|
|
1800
|
+
runtime = slow_down(
|
|
1801
|
+
(utc_now() - self._get_start_time(job=jobObject)).total_seconds()
|
|
1802
|
+
)
|
|
1621
1803
|
|
|
1622
1804
|
try:
|
|
1623
1805
|
# Delete the job and all dependents (pods), hoping to get a 404 if it's magically gone
|
|
1624
|
-
self._delete_job(
|
|
1806
|
+
self._delete_job(
|
|
1807
|
+
jobObject.metadata.name, propagation_policy="Foreground", gone_ok=True
|
|
1808
|
+
)
|
|
1625
1809
|
|
|
1626
1810
|
# That just kicks off the deletion process. Foreground doesn't
|
|
1627
1811
|
# actually block. See
|
|
@@ -1637,7 +1821,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1637
1821
|
# Otherwise everything is fine and the job is gone.
|
|
1638
1822
|
|
|
1639
1823
|
# Return the one finished job we found
|
|
1640
|
-
return UpdatedBatchJobInfo(
|
|
1824
|
+
return UpdatedBatchJobInfo(
|
|
1825
|
+
jobID=jobID, exitStatus=exitCode, wallTime=runtime, exitReason=None
|
|
1826
|
+
)
|
|
1641
1827
|
|
|
1642
1828
|
def _waitForJobDeath(self, jobName: str) -> None:
|
|
1643
1829
|
"""
|
|
@@ -1651,7 +1837,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1651
1837
|
while True:
|
|
1652
1838
|
try:
|
|
1653
1839
|
# Look for the job
|
|
1654
|
-
job_object = self._api(
|
|
1840
|
+
job_object = self._api("batch", errors=[404]).read_namespaced_job(
|
|
1841
|
+
jobName, self.namespace
|
|
1842
|
+
)
|
|
1655
1843
|
if self._is_deleted(job_object):
|
|
1656
1844
|
# The job looks deleted, so we can treat it as not being there.
|
|
1657
1845
|
return
|
|
@@ -1676,59 +1864,80 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1676
1864
|
# Shutdown scheduling thread
|
|
1677
1865
|
self._shutting_down.set()
|
|
1678
1866
|
with self._work_available:
|
|
1679
|
-
self._work_available.notify_all()
|
|
1867
|
+
self._work_available.notify_all() # Wake it up.
|
|
1680
1868
|
|
|
1681
1869
|
self.schedulingThread.join()
|
|
1682
1870
|
|
|
1683
1871
|
# Kill all of our jobs and clean up pods that are associated with those jobs
|
|
1684
1872
|
try:
|
|
1685
|
-
logger.debug(
|
|
1686
|
-
self._api(
|
|
1873
|
+
logger.debug("Deleting all Kubernetes jobs for toil_run=%s", self.run_id)
|
|
1874
|
+
self._api("batch", errors=[404]).delete_collection_namespaced_job(
|
|
1687
1875
|
self.namespace,
|
|
1688
1876
|
label_selector=f"toil_run={self.run_id}",
|
|
1689
|
-
propagation_policy=
|
|
1877
|
+
propagation_policy="Background",
|
|
1878
|
+
)
|
|
1879
|
+
logger.debug(
|
|
1880
|
+
"Killed jobs with delete_collection_namespaced_job; cleaned up"
|
|
1690
1881
|
)
|
|
1691
|
-
logger.debug('Killed jobs with delete_collection_namespaced_job; cleaned up')
|
|
1692
1882
|
# TODO: should we release all resources? We're shutting down so would it matter?
|
|
1693
1883
|
except ApiException as e:
|
|
1694
1884
|
if e.status != 404:
|
|
1695
1885
|
# Anything other than a 404 is weird here.
|
|
1696
|
-
logger.error(
|
|
1886
|
+
logger.error(
|
|
1887
|
+
"Exception when calling BatchV1Api->delete_collection_namespaced_job: %s"
|
|
1888
|
+
% e
|
|
1889
|
+
)
|
|
1697
1890
|
|
|
1698
1891
|
# If batch delete fails, try to delete all remaining jobs individually.
|
|
1699
|
-
logger.debug(
|
|
1892
|
+
logger.debug(
|
|
1893
|
+
"Deleting Kubernetes jobs individually for toil_run=%s", self.run_id
|
|
1894
|
+
)
|
|
1700
1895
|
for job_id in self._getIssuedNonLocalBatchJobIDs():
|
|
1701
|
-
job_name = f
|
|
1702
|
-
self._delete_job(
|
|
1896
|
+
job_name = f"{self.job_prefix}{job_id}"
|
|
1897
|
+
self._delete_job(
|
|
1898
|
+
job_name, propagation_policy="Background", resource_notify=False
|
|
1899
|
+
)
|
|
1703
1900
|
|
|
1704
1901
|
# Aggregate all pods and check if any pod has failed to cleanup or is orphaned.
|
|
1705
1902
|
ourPods = self._ourPodObject()
|
|
1706
1903
|
|
|
1707
1904
|
for pod in ourPods:
|
|
1708
1905
|
try:
|
|
1709
|
-
phase = getattr(pod.status,
|
|
1710
|
-
if phase ==
|
|
1711
|
-
|
|
1712
|
-
|
|
1713
|
-
|
|
1906
|
+
phase = getattr(pod.status, "phase", None)
|
|
1907
|
+
if phase == "Failed":
|
|
1908
|
+
logger.debug(
|
|
1909
|
+
"Failed pod encountered at shutdown:\n%s",
|
|
1910
|
+
self._pretty_print(pod),
|
|
1911
|
+
)
|
|
1912
|
+
if phase == "Orphaned":
|
|
1913
|
+
logger.debug(
|
|
1914
|
+
"Orphaned pod encountered at shutdown:\n%s",
|
|
1915
|
+
self._pretty_print(pod),
|
|
1916
|
+
)
|
|
1714
1917
|
except:
|
|
1715
1918
|
# Don't get mad if that doesn't work.
|
|
1716
1919
|
pass
|
|
1717
1920
|
if pod.metadata is not None and pod.metadata.name is not None:
|
|
1718
1921
|
try:
|
|
1719
|
-
logger.debug(
|
|
1720
|
-
|
|
1922
|
+
logger.debug(
|
|
1923
|
+
"Cleaning up pod at shutdown: %s", pod.metadata.name
|
|
1924
|
+
)
|
|
1925
|
+
response = self._api(
|
|
1926
|
+
"core", errors=[404]
|
|
1927
|
+
).delete_namespaced_pod(
|
|
1721
1928
|
pod.metadata.name,
|
|
1722
1929
|
self.namespace,
|
|
1723
|
-
propagation_policy=
|
|
1930
|
+
propagation_policy="Background",
|
|
1724
1931
|
)
|
|
1725
1932
|
except ApiException as e:
|
|
1726
1933
|
if e.status != 404:
|
|
1727
1934
|
# Anything other than a 404 is weird here.
|
|
1728
|
-
logger.error(
|
|
1729
|
-
|
|
1935
|
+
logger.error(
|
|
1936
|
+
"Exception when calling CoreV1Api->delete_namespaced_pod: %s"
|
|
1937
|
+
% e
|
|
1938
|
+
)
|
|
1730
1939
|
|
|
1731
|
-
def _getIssuedNonLocalBatchJobIDs(self) ->
|
|
1940
|
+
def _getIssuedNonLocalBatchJobIDs(self) -> list[int]:
|
|
1732
1941
|
"""
|
|
1733
1942
|
Get the issued batch job IDs that are not for local jobs.
|
|
1734
1943
|
"""
|
|
@@ -1740,29 +1949,35 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1740
1949
|
jobIDs.append(self._getIDForOurJob(job))
|
|
1741
1950
|
return jobIDs
|
|
1742
1951
|
|
|
1743
|
-
def getIssuedBatchJobIDs(self) ->
|
|
1952
|
+
def getIssuedBatchJobIDs(self) -> list[int]:
|
|
1744
1953
|
# Make sure to send the local jobs and queued jobs also
|
|
1745
1954
|
with self._mutex:
|
|
1746
1955
|
queued_jobs = list(self._queued_job_ids)
|
|
1747
|
-
return
|
|
1956
|
+
return (
|
|
1957
|
+
self._getIssuedNonLocalBatchJobIDs()
|
|
1958
|
+
+ list(self.getIssuedLocalJobIDs())
|
|
1959
|
+
+ queued_jobs
|
|
1960
|
+
)
|
|
1748
1961
|
|
|
1749
|
-
def _get_start_time(
|
|
1962
|
+
def _get_start_time(
|
|
1963
|
+
self, pod: Optional[V1Pod] = None, job: Optional[V1Job] = None
|
|
1964
|
+
) -> datetime.datetime:
|
|
1750
1965
|
"""
|
|
1751
1966
|
Get an actual or estimated start time for a pod.
|
|
1752
1967
|
"""
|
|
1753
1968
|
|
|
1754
1969
|
# Get when the pod started (reached the Kubelet) as a datetime
|
|
1755
|
-
start_time = getattr(getattr(pod,
|
|
1970
|
+
start_time = getattr(getattr(pod, "status", None), "start_time", None)
|
|
1756
1971
|
if start_time is None:
|
|
1757
1972
|
# If the pod never made it to the kubelet to get a
|
|
1758
1973
|
# start_time, say it was when the job was submitted.
|
|
1759
|
-
start_time = getattr(getattr(job,
|
|
1974
|
+
start_time = getattr(getattr(job, "status", None), "start_time", None)
|
|
1760
1975
|
if start_time is None:
|
|
1761
1976
|
# If this is still unset, say it was just now.
|
|
1762
1977
|
start_time = utc_now()
|
|
1763
1978
|
return start_time
|
|
1764
1979
|
|
|
1765
|
-
def getRunningBatchJobIDs(self) ->
|
|
1980
|
+
def getRunningBatchJobIDs(self) -> dict[int, float]:
|
|
1766
1981
|
# We need a dict from jobID (integer) to seconds it has been running
|
|
1767
1982
|
secondsPerJob = dict()
|
|
1768
1983
|
for job in self._ourJobObject():
|
|
@@ -1773,7 +1988,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1773
1988
|
# Jobs whose pods are gone are not running
|
|
1774
1989
|
continue
|
|
1775
1990
|
|
|
1776
|
-
if getattr(pod.status,
|
|
1991
|
+
if getattr(pod.status, "phase", None) == "Running":
|
|
1777
1992
|
# The job's pod is running
|
|
1778
1993
|
|
|
1779
1994
|
# Estimate the runtime
|
|
@@ -1785,7 +2000,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1785
2000
|
secondsPerJob.update(self.getRunningLocalJobIDs())
|
|
1786
2001
|
return secondsPerJob
|
|
1787
2002
|
|
|
1788
|
-
def killBatchJobs(self, jobIDs:
|
|
2003
|
+
def killBatchJobs(self, jobIDs: list[int]) -> None:
|
|
1789
2004
|
|
|
1790
2005
|
# Kill all the ones that are local
|
|
1791
2006
|
self.killLocalJobs(jobIDs)
|
|
@@ -1794,7 +2009,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1794
2009
|
|
|
1795
2010
|
# First get the jobs we even issued non-locally
|
|
1796
2011
|
issued_on_kubernetes = set(self._getIssuedNonLocalBatchJobIDs())
|
|
1797
|
-
deleted_jobs:
|
|
2012
|
+
deleted_jobs: list[str] = []
|
|
1798
2013
|
|
|
1799
2014
|
for job_id in jobIDs:
|
|
1800
2015
|
# For each job we are supposed to kill
|
|
@@ -1820,10 +2035,10 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1820
2035
|
|
|
1821
2036
|
# Delete the requested job in the foreground.
|
|
1822
2037
|
# This doesn't block, but it does delete expeditiously.
|
|
1823
|
-
self._delete_job(job_name, propagation_policy=
|
|
2038
|
+
self._delete_job(job_name, propagation_policy="Foreground")
|
|
1824
2039
|
|
|
1825
2040
|
deleted_jobs.append(job_name)
|
|
1826
|
-
logger.debug(
|
|
2041
|
+
logger.debug("Killed job by request: %s", job_name)
|
|
1827
2042
|
|
|
1828
2043
|
for job_name in deleted_jobs:
|
|
1829
2044
|
# Now we need to wait for all the jobs we killed to be gone.
|
|
@@ -1833,7 +2048,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1833
2048
|
# the potential deadlock (if the user code needs exclusive access to
|
|
1834
2049
|
# a resource) onto the user code, instead of always hanging
|
|
1835
2050
|
# whenever we can't certify that a faulty node is no longer running
|
|
1836
|
-
# the user code.
|
|
2051
|
+
# the user code.
|
|
1837
2052
|
self._waitForJobDeath(job_name)
|
|
1838
2053
|
|
|
1839
2054
|
@classmethod
|
|
@@ -1844,9 +2059,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1844
2059
|
|
|
1845
2060
|
# Make a Kubernetes-acceptable version of our username: not too long,
|
|
1846
2061
|
# and all lowercase letters, numbers, or - or .
|
|
1847
|
-
acceptable_chars = set(string.ascii_lowercase + string.digits +
|
|
2062
|
+
acceptable_chars = set(string.ascii_lowercase + string.digits + "-.")
|
|
1848
2063
|
|
|
1849
|
-
return
|
|
2064
|
+
return "".join([c for c in get_user_name().lower() if c in acceptable_chars])[
|
|
2065
|
+
:100
|
|
2066
|
+
]
|
|
1850
2067
|
|
|
1851
2068
|
@runtime_checkable
|
|
1852
2069
|
class KubernetesConfig(Protocol):
|
|
@@ -1858,33 +2075,66 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1858
2075
|
have to let the fact that this also has to be a Config just be manually
|
|
1859
2076
|
enforced.
|
|
1860
2077
|
"""
|
|
2078
|
+
|
|
1861
2079
|
kubernetes_host_path: Optional[str]
|
|
1862
2080
|
kubernetes_owner: str
|
|
1863
2081
|
kubernetes_service_account: Optional[str]
|
|
1864
2082
|
kubernetes_pod_timeout: float
|
|
1865
2083
|
|
|
1866
|
-
|
|
1867
2084
|
@classmethod
|
|
1868
2085
|
def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
|
|
1869
|
-
parser.add_argument(
|
|
1870
|
-
|
|
1871
|
-
|
|
1872
|
-
|
|
1873
|
-
|
|
1874
|
-
|
|
1875
|
-
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
2086
|
+
parser.add_argument(
|
|
2087
|
+
"--kubernetesHostPath",
|
|
2088
|
+
dest="kubernetes_host_path",
|
|
2089
|
+
default=None,
|
|
2090
|
+
env_var="TOIL_KUBERNETES_HOST_PATH",
|
|
2091
|
+
help="Path on Kubernetes hosts to use as shared inter-pod temp directory. "
|
|
2092
|
+
"(default: %(default)s)",
|
|
2093
|
+
)
|
|
2094
|
+
parser.add_argument(
|
|
2095
|
+
"--kubernetesOwner",
|
|
2096
|
+
dest="kubernetes_owner",
|
|
2097
|
+
default=None,
|
|
2098
|
+
env_var="TOIL_KUBERNETES_OWNER",
|
|
2099
|
+
help=f"Username to mark Kubernetes jobs with. If the provided value is None, the value will "
|
|
2100
|
+
f"be generated at runtime. "
|
|
2101
|
+
f"(Generated default: {cls.get_default_kubernetes_owner()})",
|
|
2102
|
+
)
|
|
2103
|
+
parser.add_argument(
|
|
2104
|
+
"--kubernetesServiceAccount",
|
|
2105
|
+
dest="kubernetes_service_account",
|
|
2106
|
+
default=None,
|
|
2107
|
+
env_var="TOIL_KUBERNETES_SERVICE_ACCOUNT",
|
|
2108
|
+
help="Service account to run jobs as. " "(default: %(default)s)",
|
|
2109
|
+
)
|
|
2110
|
+
parser.add_argument(
|
|
2111
|
+
"--kubernetesPodTimeout",
|
|
2112
|
+
dest="kubernetes_pod_timeout",
|
|
2113
|
+
default=120,
|
|
2114
|
+
env_var="TOIL_KUBERNETES_POD_TIMEOUT",
|
|
2115
|
+
type=float,
|
|
2116
|
+
help="Seconds to wait for a scheduled Kubernetes pod to start running. "
|
|
2117
|
+
"(default: %(default)s)",
|
|
2118
|
+
)
|
|
2119
|
+
parser.add_argument(
|
|
2120
|
+
"--kubernetesPrivileged",
|
|
2121
|
+
dest="kubernetes_privileged",
|
|
2122
|
+
default=False,
|
|
2123
|
+
env_var="TOIL_KUBERNETES_PRIVILEGED",
|
|
2124
|
+
type=opt_strtobool,
|
|
2125
|
+
help="Whether to ask worker pods to run in privileged mode. This should be used to access "
|
|
2126
|
+
"privileged operations, such as FUSE. On Toil-managed clusters with --enableFuse, "
|
|
2127
|
+
"this is set to True. (default: %(default)s)",
|
|
2128
|
+
)
|
|
2129
|
+
|
|
2130
|
+
OptionType = TypeVar("OptionType")
|
|
2131
|
+
|
|
1884
2132
|
@classmethod
|
|
1885
2133
|
def setOptions(cls, setOption: OptionSetter) -> None:
|
|
1886
2134
|
setOption("kubernetes_host_path")
|
|
1887
2135
|
setOption("kubernetes_owner")
|
|
1888
|
-
setOption(
|
|
2136
|
+
setOption(
|
|
2137
|
+
"kubernetes_service_account",
|
|
2138
|
+
)
|
|
1889
2139
|
setOption("kubernetes_pod_timeout")
|
|
1890
|
-
|
|
2140
|
+
setOption("kubernetes_privileged")
|