toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +121 -83
- toil/batchSystems/__init__.py +1 -0
- toil/batchSystems/abstractBatchSystem.py +137 -77
- toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
- toil/batchSystems/awsBatch.py +237 -128
- toil/batchSystems/cleanup_support.py +22 -16
- toil/batchSystems/contained_executor.py +30 -26
- toil/batchSystems/gridengine.py +85 -49
- toil/batchSystems/htcondor.py +164 -87
- toil/batchSystems/kubernetes.py +622 -386
- toil/batchSystems/local_support.py +17 -12
- toil/batchSystems/lsf.py +132 -79
- toil/batchSystems/lsfHelper.py +13 -11
- toil/batchSystems/mesos/__init__.py +41 -29
- toil/batchSystems/mesos/batchSystem.py +288 -149
- toil/batchSystems/mesos/executor.py +77 -49
- toil/batchSystems/mesos/test/__init__.py +31 -23
- toil/batchSystems/options.py +38 -29
- toil/batchSystems/registry.py +53 -19
- toil/batchSystems/singleMachine.py +293 -123
- toil/batchSystems/slurm.py +489 -137
- toil/batchSystems/torque.py +46 -32
- toil/bus.py +141 -73
- toil/common.py +630 -359
- toil/cwl/__init__.py +1 -1
- toil/cwl/cwltoil.py +1114 -532
- toil/cwl/utils.py +17 -22
- toil/deferred.py +62 -41
- toil/exceptions.py +5 -3
- toil/fileStores/__init__.py +5 -5
- toil/fileStores/abstractFileStore.py +88 -57
- toil/fileStores/cachingFileStore.py +711 -247
- toil/fileStores/nonCachingFileStore.py +113 -75
- toil/job.py +988 -315
- toil/jobStores/abstractJobStore.py +387 -243
- toil/jobStores/aws/jobStore.py +727 -403
- toil/jobStores/aws/utils.py +161 -109
- toil/jobStores/conftest.py +1 -0
- toil/jobStores/fileJobStore.py +289 -151
- toil/jobStores/googleJobStore.py +137 -70
- toil/jobStores/utils.py +36 -15
- toil/leader.py +614 -269
- toil/lib/accelerators.py +115 -18
- toil/lib/aws/__init__.py +55 -28
- toil/lib/aws/ami.py +122 -87
- toil/lib/aws/iam.py +284 -108
- toil/lib/aws/s3.py +31 -0
- toil/lib/aws/session.py +193 -58
- toil/lib/aws/utils.py +238 -218
- toil/lib/bioio.py +13 -5
- toil/lib/compatibility.py +11 -6
- toil/lib/conversions.py +83 -49
- toil/lib/docker.py +131 -103
- toil/lib/ec2.py +322 -209
- toil/lib/ec2nodes.py +174 -106
- toil/lib/encryption/_dummy.py +5 -3
- toil/lib/encryption/_nacl.py +10 -6
- toil/lib/encryption/conftest.py +1 -0
- toil/lib/exceptions.py +26 -7
- toil/lib/expando.py +4 -2
- toil/lib/ftp_utils.py +217 -0
- toil/lib/generatedEC2Lists.py +127 -19
- toil/lib/humanize.py +6 -2
- toil/lib/integration.py +341 -0
- toil/lib/io.py +99 -11
- toil/lib/iterables.py +4 -2
- toil/lib/memoize.py +12 -8
- toil/lib/misc.py +65 -18
- toil/lib/objects.py +2 -2
- toil/lib/resources.py +19 -7
- toil/lib/retry.py +115 -77
- toil/lib/threading.py +282 -80
- toil/lib/throttle.py +15 -14
- toil/options/common.py +834 -401
- toil/options/cwl.py +175 -90
- toil/options/runner.py +50 -0
- toil/options/wdl.py +70 -19
- toil/provisioners/__init__.py +111 -46
- toil/provisioners/abstractProvisioner.py +322 -157
- toil/provisioners/aws/__init__.py +62 -30
- toil/provisioners/aws/awsProvisioner.py +980 -627
- toil/provisioners/clusterScaler.py +541 -279
- toil/provisioners/gceProvisioner.py +282 -179
- toil/provisioners/node.py +147 -79
- toil/realtimeLogger.py +34 -22
- toil/resource.py +137 -75
- toil/server/app.py +127 -61
- toil/server/celery_app.py +3 -1
- toil/server/cli/wes_cwl_runner.py +82 -53
- toil/server/utils.py +54 -28
- toil/server/wes/abstract_backend.py +64 -26
- toil/server/wes/amazon_wes_utils.py +21 -15
- toil/server/wes/tasks.py +121 -63
- toil/server/wes/toil_backend.py +142 -107
- toil/server/wsgi_app.py +4 -3
- toil/serviceManager.py +58 -22
- toil/statsAndLogging.py +148 -64
- toil/test/__init__.py +263 -179
- toil/test/batchSystems/batchSystemTest.py +438 -195
- toil/test/batchSystems/batch_system_plugin_test.py +18 -7
- toil/test/batchSystems/test_gridengine.py +173 -0
- toil/test/batchSystems/test_lsf_helper.py +67 -58
- toil/test/batchSystems/test_slurm.py +93 -47
- toil/test/cactus/test_cactus_integration.py +20 -22
- toil/test/cwl/cwlTest.py +271 -71
- toil/test/cwl/measure_default_memory.cwl +12 -0
- toil/test/cwl/not_run_required_input.cwl +29 -0
- toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
- toil/test/docs/scriptsTest.py +60 -34
- toil/test/jobStores/jobStoreTest.py +412 -235
- toil/test/lib/aws/test_iam.py +116 -48
- toil/test/lib/aws/test_s3.py +16 -9
- toil/test/lib/aws/test_utils.py +5 -6
- toil/test/lib/dockerTest.py +118 -141
- toil/test/lib/test_conversions.py +113 -115
- toil/test/lib/test_ec2.py +57 -49
- toil/test/lib/test_integration.py +104 -0
- toil/test/lib/test_misc.py +12 -5
- toil/test/mesos/MesosDataStructuresTest.py +23 -10
- toil/test/mesos/helloWorld.py +7 -6
- toil/test/mesos/stress.py +25 -20
- toil/test/options/options.py +7 -2
- toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
- toil/test/provisioners/clusterScalerTest.py +440 -250
- toil/test/provisioners/clusterTest.py +81 -42
- toil/test/provisioners/gceProvisionerTest.py +174 -100
- toil/test/provisioners/provisionerTest.py +25 -13
- toil/test/provisioners/restartScript.py +5 -4
- toil/test/server/serverTest.py +188 -141
- toil/test/sort/restart_sort.py +137 -68
- toil/test/sort/sort.py +134 -66
- toil/test/sort/sortTest.py +91 -49
- toil/test/src/autoDeploymentTest.py +140 -100
- toil/test/src/busTest.py +20 -18
- toil/test/src/checkpointTest.py +8 -2
- toil/test/src/deferredFunctionTest.py +49 -35
- toil/test/src/dockerCheckTest.py +33 -26
- toil/test/src/environmentTest.py +20 -10
- toil/test/src/fileStoreTest.py +538 -271
- toil/test/src/helloWorldTest.py +7 -4
- toil/test/src/importExportFileTest.py +61 -31
- toil/test/src/jobDescriptionTest.py +32 -17
- toil/test/src/jobEncapsulationTest.py +2 -0
- toil/test/src/jobFileStoreTest.py +74 -50
- toil/test/src/jobServiceTest.py +187 -73
- toil/test/src/jobTest.py +120 -70
- toil/test/src/miscTests.py +19 -18
- toil/test/src/promisedRequirementTest.py +82 -36
- toil/test/src/promisesTest.py +7 -6
- toil/test/src/realtimeLoggerTest.py +6 -6
- toil/test/src/regularLogTest.py +71 -37
- toil/test/src/resourceTest.py +80 -49
- toil/test/src/restartDAGTest.py +36 -22
- toil/test/src/resumabilityTest.py +9 -2
- toil/test/src/retainTempDirTest.py +45 -14
- toil/test/src/systemTest.py +12 -8
- toil/test/src/threadingTest.py +44 -25
- toil/test/src/toilContextManagerTest.py +10 -7
- toil/test/src/userDefinedJobArgTypeTest.py +8 -5
- toil/test/src/workerTest.py +33 -16
- toil/test/utils/toilDebugTest.py +70 -58
- toil/test/utils/toilKillTest.py +4 -5
- toil/test/utils/utilsTest.py +239 -102
- toil/test/wdl/wdltoil_test.py +789 -148
- toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
- toil/toilState.py +52 -26
- toil/utils/toilConfig.py +13 -4
- toil/utils/toilDebugFile.py +44 -27
- toil/utils/toilDebugJob.py +85 -25
- toil/utils/toilDestroyCluster.py +11 -6
- toil/utils/toilKill.py +8 -3
- toil/utils/toilLaunchCluster.py +251 -145
- toil/utils/toilMain.py +37 -16
- toil/utils/toilRsyncCluster.py +27 -14
- toil/utils/toilSshCluster.py +45 -22
- toil/utils/toilStats.py +75 -36
- toil/utils/toilStatus.py +226 -119
- toil/utils/toilUpdateEC2Instances.py +3 -1
- toil/version.py +11 -11
- toil/wdl/utils.py +5 -5
- toil/wdl/wdltoil.py +3513 -1052
- toil/worker.py +269 -128
- toil-8.0.0.dist-info/METADATA +173 -0
- toil-8.0.0.dist-info/RECORD +253 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
- toil-7.0.0.dist-info/METADATA +0 -158
- toil-7.0.0.dist-info/RECORD +0 -244
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
- {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
toil/batchSystems/kubernetes.py
CHANGED
|
@@ -30,22 +30,10 @@ import tempfile
|
|
|
30
30
|
import time
|
|
31
31
|
import uuid
|
|
32
32
|
from argparse import ArgumentParser, _ArgumentGroup
|
|
33
|
+
from collections.abc import Iterator
|
|
33
34
|
from queue import Empty, Queue
|
|
34
35
|
from threading import Condition, Event, RLock, Thread
|
|
35
|
-
from typing import
|
|
36
|
-
Callable,
|
|
37
|
-
Dict,
|
|
38
|
-
Iterator,
|
|
39
|
-
List,
|
|
40
|
-
Literal,
|
|
41
|
-
Optional,
|
|
42
|
-
Set,
|
|
43
|
-
Tuple,
|
|
44
|
-
Type,
|
|
45
|
-
TypeVar,
|
|
46
|
-
Union,
|
|
47
|
-
cast,
|
|
48
|
-
overload)
|
|
36
|
+
from typing import Any, Callable, Literal, Optional, TypeVar, Union, cast, overload
|
|
49
37
|
|
|
50
38
|
from toil.lib.conversions import opt_strtobool
|
|
51
39
|
|
|
@@ -53,72 +41,79 @@ if sys.version_info < (3, 10):
|
|
|
53
41
|
from typing_extensions import ParamSpec
|
|
54
42
|
else:
|
|
55
43
|
from typing import ParamSpec
|
|
56
|
-
|
|
57
|
-
|
|
44
|
+
|
|
45
|
+
if sys.version_info < (3, 11):
|
|
46
|
+
from typing_extensions import NotRequired
|
|
58
47
|
else:
|
|
59
|
-
from
|
|
60
|
-
|
|
48
|
+
from typing import NotRequired
|
|
49
|
+
|
|
50
|
+
from typing import Protocol, TypedDict, runtime_checkable
|
|
51
|
+
|
|
61
52
|
import urllib3
|
|
62
53
|
import yaml
|
|
54
|
+
|
|
63
55
|
# The Right Way to use the Kubernetes module is to `import kubernetes` and then you get all your stuff as like ApiClient. But this doesn't work for the stubs: the stubs seem to only support importing things from the internal modules in `kubernetes` where they are actually defined. See for example <https://github.com/MaterializeInc/kubernetes-stubs/issues/9 and <https://github.com/MaterializeInc/kubernetes-stubs/issues/10>. So we just import all the things we use into our global namespace here.
|
|
64
|
-
from kubernetes.client import (
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
56
|
+
from kubernetes.client import (
|
|
57
|
+
BatchV1Api,
|
|
58
|
+
CoreV1Api,
|
|
59
|
+
CustomObjectsApi,
|
|
60
|
+
V1Affinity,
|
|
61
|
+
V1Container,
|
|
62
|
+
V1ContainerStatus,
|
|
63
|
+
V1EmptyDirVolumeSource,
|
|
64
|
+
V1HostPathVolumeSource,
|
|
65
|
+
V1Job,
|
|
66
|
+
V1JobCondition,
|
|
67
|
+
V1JobSpec,
|
|
68
|
+
V1NodeAffinity,
|
|
69
|
+
V1NodeSelector,
|
|
70
|
+
V1NodeSelectorRequirement,
|
|
71
|
+
V1NodeSelectorTerm,
|
|
72
|
+
V1ObjectMeta,
|
|
73
|
+
V1Pod,
|
|
74
|
+
V1PodSpec,
|
|
75
|
+
V1PodTemplateSpec,
|
|
76
|
+
V1PreferredSchedulingTerm,
|
|
77
|
+
V1ResourceRequirements,
|
|
78
|
+
V1SecretVolumeSource,
|
|
79
|
+
V1SecurityContext,
|
|
80
|
+
V1Toleration,
|
|
81
|
+
V1Volume,
|
|
82
|
+
V1VolumeMount,
|
|
83
|
+
)
|
|
89
84
|
from kubernetes.client.api_client import ApiClient
|
|
90
85
|
from kubernetes.client.exceptions import ApiException
|
|
91
86
|
from kubernetes.config.config_exception import ConfigException
|
|
92
87
|
from kubernetes.config.incluster_config import load_incluster_config
|
|
93
|
-
from kubernetes.config.kube_config import
|
|
94
|
-
|
|
88
|
+
from kubernetes.config.kube_config import list_kube_config_contexts, load_kube_config
|
|
89
|
+
|
|
95
90
|
# TODO: Watch API is not typed yet
|
|
96
91
|
from kubernetes.watch import Watch # type: ignore
|
|
97
|
-
# typing-extensions dependency on Pythons that are new enough.
|
|
98
|
-
from typing_extensions import NotRequired
|
|
99
92
|
|
|
100
93
|
from toil import applianceSelf
|
|
101
|
-
from toil.batchSystems.abstractBatchSystem import (
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
94
|
+
from toil.batchSystems.abstractBatchSystem import (
|
|
95
|
+
EXIT_STATUS_UNAVAILABLE_VALUE,
|
|
96
|
+
BatchJobExitReason,
|
|
97
|
+
InsufficientSystemResources,
|
|
98
|
+
ResourcePool,
|
|
99
|
+
UpdatedBatchJobInfo,
|
|
100
|
+
)
|
|
106
101
|
from toil.batchSystems.cleanup_support import BatchSystemCleanupSupport
|
|
107
102
|
from toil.batchSystems.contained_executor import pack_job
|
|
108
103
|
from toil.batchSystems.options import OptionSetter
|
|
109
104
|
from toil.common import Config, Toil
|
|
110
|
-
from toil.options.common import SYS_MAX_SIZE
|
|
111
105
|
from toil.job import JobDescription, Requirer
|
|
112
106
|
from toil.lib.conversions import human2bytes
|
|
113
107
|
from toil.lib.misc import get_user_name, slow_down, utc_now
|
|
114
108
|
from toil.lib.retry import ErrorCondition, retry
|
|
109
|
+
from toil.options.common import SYS_MAX_SIZE
|
|
115
110
|
from toil.resource import Resource
|
|
116
111
|
|
|
117
112
|
logger = logging.getLogger(__name__)
|
|
118
|
-
retryable_kubernetes_errors:
|
|
113
|
+
retryable_kubernetes_errors: list[Union[type[Exception], ErrorCondition]] = [
|
|
119
114
|
urllib3.exceptions.MaxRetryError,
|
|
120
115
|
urllib3.exceptions.ProtocolError,
|
|
121
|
-
ApiException
|
|
116
|
+
ApiException,
|
|
122
117
|
]
|
|
123
118
|
|
|
124
119
|
|
|
@@ -132,8 +127,10 @@ def is_retryable_kubernetes_error(e: Exception) -> bool:
|
|
|
132
127
|
return True
|
|
133
128
|
return False
|
|
134
129
|
|
|
130
|
+
|
|
135
131
|
# Represents a collection of label or taint keys and their sets of acceptable (or unacceptable) values.
|
|
136
|
-
KeyValuesList =
|
|
132
|
+
KeyValuesList = list[tuple[str, list[str]]]
|
|
133
|
+
|
|
137
134
|
|
|
138
135
|
class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
139
136
|
@classmethod
|
|
@@ -150,8 +147,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
150
147
|
core: NotRequired[CoreV1Api]
|
|
151
148
|
customObjects: NotRequired[CustomObjectsApi]
|
|
152
149
|
|
|
153
|
-
|
|
154
|
-
|
|
150
|
+
def __init__(
|
|
151
|
+
self, config: Config, maxCores: int, maxMemory: int, maxDisk: int
|
|
152
|
+
) -> None:
|
|
155
153
|
super().__init__(config, maxCores, maxMemory, maxDisk)
|
|
156
154
|
|
|
157
155
|
# Re-type the config to make sure it has all the fields we need.
|
|
@@ -162,8 +160,8 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
162
160
|
# Otherwise if we are at debug log level, we dump every
|
|
163
161
|
# request/response to Kubernetes, including tokens which we shouldn't
|
|
164
162
|
# reveal on CI.
|
|
165
|
-
logging.getLogger(
|
|
166
|
-
logging.getLogger(
|
|
163
|
+
logging.getLogger("kubernetes").setLevel(logging.ERROR)
|
|
164
|
+
logging.getLogger("requests_oauthlib").setLevel(logging.ERROR)
|
|
167
165
|
|
|
168
166
|
# This will hold the last time our Kubernetes credentials were refreshed
|
|
169
167
|
self.credential_time: Optional[datetime.datetime] = None
|
|
@@ -171,7 +169,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
171
169
|
self._apis: KubernetesBatchSystem._ApiStorageDict = {}
|
|
172
170
|
|
|
173
171
|
# Get our namespace (and our Kubernetes credentials to make sure they exist)
|
|
174
|
-
self.namespace: str = self._api(
|
|
172
|
+
self.namespace: str = self._api("namespace")
|
|
175
173
|
|
|
176
174
|
# Decide if we are going to mount a Kubernetes host path as the Toil
|
|
177
175
|
# work dir in the workers, for shared caching.
|
|
@@ -190,7 +188,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
190
188
|
self.unique_id = uuid.uuid4()
|
|
191
189
|
|
|
192
190
|
# Create a prefix for jobs, starting with our username
|
|
193
|
-
self.job_prefix: str = f
|
|
191
|
+
self.job_prefix: str = f"{username}-toil-{self.unique_id}-"
|
|
194
192
|
# Instead of letting Kubernetes assign unique job names, we assign our
|
|
195
193
|
# own based on a numerical job ID. This functionality is managed by the
|
|
196
194
|
# BatchSystemLocalSupport.
|
|
@@ -214,55 +212,61 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
214
212
|
# Try and guess what Toil work dir the workers will use.
|
|
215
213
|
# We need to be able to provision (possibly shared) space there.
|
|
216
214
|
self.worker_work_dir: str = Toil.getToilWorkDir(config.workDir)
|
|
217
|
-
if (
|
|
218
|
-
|
|
219
|
-
|
|
215
|
+
if (
|
|
216
|
+
config.workDir is None
|
|
217
|
+
and os.getenv("TOIL_WORKDIR") is None
|
|
218
|
+
and self.worker_work_dir == tempfile.gettempdir()
|
|
219
|
+
):
|
|
220
220
|
|
|
221
221
|
# We defaulted to the system temp directory. But we think the
|
|
222
222
|
# worker Dockerfiles will make them use /var/lib/toil instead.
|
|
223
223
|
# TODO: Keep this in sync with the Dockerfile.
|
|
224
|
-
self.worker_work_dir =
|
|
224
|
+
self.worker_work_dir = "/var/lib/toil"
|
|
225
225
|
|
|
226
226
|
# A Toil-managed Kubernetes cluster will have most of its temp space at
|
|
227
227
|
# /var/tmp, which is where really large temp files really belong
|
|
228
228
|
# according to https://systemd.io/TEMPORARY_DIRECTORIES/. So we will
|
|
229
229
|
# set the default temporary directory to there for all our jobs.
|
|
230
|
-
self.environment[
|
|
230
|
+
self.environment["TMPDIR"] = "/var/tmp"
|
|
231
231
|
|
|
232
232
|
# Get the name of the AWS secret, if any, to mount in containers.
|
|
233
|
-
self.aws_secret_name: Optional[str] = os.environ.get(
|
|
233
|
+
self.aws_secret_name: Optional[str] = os.environ.get(
|
|
234
|
+
"TOIL_AWS_SECRET_NAME", None
|
|
235
|
+
)
|
|
234
236
|
|
|
235
237
|
# Set this to True to enable the experimental wait-for-job-update code
|
|
236
238
|
self.enable_watching: bool = os.environ.get("KUBE_WATCH_ENABLED", False)
|
|
237
239
|
|
|
238
240
|
# This will be a label to select all our jobs.
|
|
239
|
-
self.run_id: str = f
|
|
241
|
+
self.run_id: str = f"toil-{self.unique_id}"
|
|
240
242
|
|
|
241
243
|
# Keep track of available resources.
|
|
242
|
-
maxMillicores = int(
|
|
243
|
-
|
|
244
|
+
maxMillicores = int(
|
|
245
|
+
SYS_MAX_SIZE if self.maxCores == SYS_MAX_SIZE else self.maxCores * 1000
|
|
246
|
+
)
|
|
247
|
+
self.resource_sources: list[ResourcePool] = [
|
|
244
248
|
# A pool representing available job slots
|
|
245
|
-
ResourcePool(self.config.max_jobs,
|
|
249
|
+
ResourcePool(self.config.max_jobs, "job slots"),
|
|
246
250
|
# A pool representing available CPU in units of millicores (1 CPU
|
|
247
251
|
# unit = 1000 millicores)
|
|
248
|
-
ResourcePool(maxMillicores,
|
|
252
|
+
ResourcePool(maxMillicores, "cores"),
|
|
249
253
|
# A pool representing available memory in bytes
|
|
250
|
-
ResourcePool(self.maxMemory,
|
|
254
|
+
ResourcePool(self.maxMemory, "memory"),
|
|
251
255
|
# A pool representing the available space in bytes
|
|
252
|
-
ResourcePool(self.maxDisk,
|
|
256
|
+
ResourcePool(self.maxDisk, "disk"),
|
|
253
257
|
]
|
|
254
258
|
|
|
255
259
|
# A set of job IDs that are queued (useful for getIssuedBatchJobIDs())
|
|
256
|
-
self._queued_job_ids:
|
|
260
|
+
self._queued_job_ids: set[int] = set()
|
|
257
261
|
|
|
258
262
|
# Keep track of the acquired resources for each job
|
|
259
|
-
self._acquired_resources:
|
|
263
|
+
self._acquired_resources: dict[str, list[int]] = {}
|
|
260
264
|
|
|
261
265
|
# Queue for jobs to be submitted to the Kubernetes cluster
|
|
262
|
-
self._jobs_queue: Queue[
|
|
266
|
+
self._jobs_queue: Queue[tuple[int, JobDescription, V1PodSpec]] = Queue()
|
|
263
267
|
|
|
264
268
|
# A set of job IDs that should be killed
|
|
265
|
-
self._killed_queue_jobs:
|
|
269
|
+
self._killed_queue_jobs: set[int] = set()
|
|
266
270
|
|
|
267
271
|
# We use this event to signal shutdown
|
|
268
272
|
self._shutting_down: Event = Event()
|
|
@@ -286,7 +290,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
286
290
|
"""
|
|
287
291
|
|
|
288
292
|
if not kubernetes_object:
|
|
289
|
-
return
|
|
293
|
+
return "None"
|
|
290
294
|
|
|
291
295
|
# We need a Kubernetes widget that knows how to translate
|
|
292
296
|
# its data structures to nice YAML-able dicts. See:
|
|
@@ -296,7 +300,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
296
300
|
# Convert to a dict
|
|
297
301
|
root_dict = api_client.sanitize_for_serialization(kubernetes_object)
|
|
298
302
|
|
|
299
|
-
def drop_boring(here:
|
|
303
|
+
def drop_boring(here: dict[str, Any]) -> None:
|
|
300
304
|
"""
|
|
301
305
|
Drop boring fields recursively.
|
|
302
306
|
"""
|
|
@@ -304,7 +308,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
304
308
|
for k, v in here.items():
|
|
305
309
|
if isinstance(v, dict):
|
|
306
310
|
drop_boring(v)
|
|
307
|
-
if k in [
|
|
311
|
+
if k in ["managedFields"]:
|
|
308
312
|
boring_keys.append(k)
|
|
309
313
|
for k in boring_keys:
|
|
310
314
|
del here[k]
|
|
@@ -314,33 +318,43 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
314
318
|
|
|
315
319
|
@overload
|
|
316
320
|
def _api(
|
|
317
|
-
self,
|
|
318
|
-
|
|
319
|
-
|
|
321
|
+
self,
|
|
322
|
+
kind: Literal["batch"],
|
|
323
|
+
max_age_seconds: float = 5 * 60,
|
|
324
|
+
errors: Optional[list[int]] = None,
|
|
325
|
+
) -> BatchV1Api: ...
|
|
320
326
|
|
|
321
327
|
@overload
|
|
322
328
|
def _api(
|
|
323
|
-
self,
|
|
324
|
-
|
|
325
|
-
|
|
329
|
+
self,
|
|
330
|
+
kind: Literal["core"],
|
|
331
|
+
max_age_seconds: float = 5 * 60,
|
|
332
|
+
errors: Optional[list[int]] = None,
|
|
333
|
+
) -> CoreV1Api: ...
|
|
326
334
|
|
|
327
335
|
@overload
|
|
328
336
|
def _api(
|
|
329
|
-
self,
|
|
330
|
-
|
|
331
|
-
|
|
337
|
+
self,
|
|
338
|
+
kind: Literal["customObjects"],
|
|
339
|
+
max_age_seconds: float = 5 * 60,
|
|
340
|
+
errors: Optional[list[int]] = None,
|
|
341
|
+
) -> CustomObjectsApi: ...
|
|
332
342
|
|
|
333
343
|
@overload
|
|
334
344
|
def _api(
|
|
335
|
-
self, kind: Literal[
|
|
336
|
-
) -> str:
|
|
337
|
-
...
|
|
345
|
+
self, kind: Literal["namespace"], max_age_seconds: float = 5 * 60
|
|
346
|
+
) -> str: ...
|
|
338
347
|
|
|
339
348
|
def _api(
|
|
340
349
|
self,
|
|
341
|
-
kind: Union[
|
|
350
|
+
kind: Union[
|
|
351
|
+
Literal["batch"],
|
|
352
|
+
Literal["core"],
|
|
353
|
+
Literal["customObjects"],
|
|
354
|
+
Literal["namespace"],
|
|
355
|
+
],
|
|
342
356
|
max_age_seconds: float = 5 * 60,
|
|
343
|
-
errors: Optional[
|
|
357
|
+
errors: Optional[list[int]] = None,
|
|
344
358
|
) -> Union[BatchV1Api, CoreV1Api, CustomObjectsApi, str]:
|
|
345
359
|
"""
|
|
346
360
|
The Kubernetes module isn't clever enough to renew its credentials when
|
|
@@ -373,44 +387,53 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
373
387
|
|
|
374
388
|
now = utc_now()
|
|
375
389
|
|
|
376
|
-
if
|
|
390
|
+
if (
|
|
391
|
+
self.credential_time is None
|
|
392
|
+
or (now - self.credential_time).total_seconds() > max_age_seconds
|
|
393
|
+
):
|
|
377
394
|
# Credentials need a refresh
|
|
378
395
|
try:
|
|
379
396
|
# Load ~/.kube/config or KUBECONFIG
|
|
380
397
|
load_kube_config()
|
|
381
398
|
# Worked. We're using kube config
|
|
382
|
-
config_source =
|
|
399
|
+
config_source = "kube"
|
|
383
400
|
except ConfigException:
|
|
384
401
|
# Didn't work. Try pod-based credentials in case we are in a pod.
|
|
385
402
|
try:
|
|
386
403
|
load_incluster_config()
|
|
387
404
|
# Worked. We're using in_cluster config
|
|
388
|
-
config_source =
|
|
405
|
+
config_source = "in_cluster"
|
|
389
406
|
except ConfigException:
|
|
390
|
-
raise RuntimeError(
|
|
407
|
+
raise RuntimeError(
|
|
408
|
+
"Could not load Kubernetes configuration from ~/.kube/config, $KUBECONFIG, or current pod."
|
|
409
|
+
)
|
|
391
410
|
|
|
392
411
|
# Now fill in the API objects with these credentials
|
|
393
|
-
self._apis[
|
|
394
|
-
self._apis[
|
|
395
|
-
self._apis[
|
|
412
|
+
self._apis["batch"] = BatchV1Api()
|
|
413
|
+
self._apis["core"] = CoreV1Api()
|
|
414
|
+
self._apis["customObjects"] = CustomObjectsApi()
|
|
396
415
|
|
|
397
416
|
# And save the time
|
|
398
417
|
self.credential_time = now
|
|
399
|
-
if kind ==
|
|
418
|
+
if kind == "namespace":
|
|
400
419
|
# We just need the namespace string
|
|
401
|
-
if config_source ==
|
|
420
|
+
if config_source == "in_cluster":
|
|
402
421
|
# Our namespace comes from a particular file.
|
|
403
|
-
with open(
|
|
422
|
+
with open(
|
|
423
|
+
"/var/run/secrets/kubernetes.io/serviceaccount/namespace"
|
|
424
|
+
) as fh:
|
|
404
425
|
return fh.read().strip()
|
|
405
426
|
else:
|
|
406
427
|
# Find all contexts and the active context.
|
|
407
428
|
# The active context gets us our namespace.
|
|
408
429
|
contexts, activeContext = list_kube_config_contexts()
|
|
409
430
|
if not contexts:
|
|
410
|
-
raise RuntimeError(
|
|
431
|
+
raise RuntimeError(
|
|
432
|
+
"No Kubernetes contexts available in ~/.kube/config or $KUBECONFIG"
|
|
433
|
+
)
|
|
411
434
|
|
|
412
435
|
# Identify the namespace to work in
|
|
413
|
-
namespace = activeContext.get(
|
|
436
|
+
namespace = activeContext.get("context", {}).get("namespace", "default")
|
|
414
437
|
assert isinstance(namespace, str)
|
|
415
438
|
return namespace
|
|
416
439
|
|
|
@@ -430,11 +453,13 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
430
453
|
ErrorCondition(
|
|
431
454
|
error=ApiException,
|
|
432
455
|
error_codes=errors,
|
|
433
|
-
retry_on_this_condition=False
|
|
456
|
+
retry_on_this_condition=False,
|
|
434
457
|
)
|
|
435
458
|
)
|
|
436
459
|
decorator = retry(errors=error_list)
|
|
437
|
-
wrapper = KubernetesBatchSystem.DecoratorWrapper(
|
|
460
|
+
wrapper = KubernetesBatchSystem.DecoratorWrapper(
|
|
461
|
+
api_object, decorator
|
|
462
|
+
)
|
|
438
463
|
return cast(Union[BatchV1Api, CoreV1Api, CustomObjectsApi], wrapper)
|
|
439
464
|
except KeyError:
|
|
440
465
|
raise RuntimeError(f"Unknown Kubernetes API type: {kind}")
|
|
@@ -445,7 +470,12 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
445
470
|
"""
|
|
446
471
|
|
|
447
472
|
P = ParamSpec("P")
|
|
448
|
-
|
|
473
|
+
|
|
474
|
+
def __init__(
|
|
475
|
+
self,
|
|
476
|
+
to_wrap: Any,
|
|
477
|
+
decorator: Callable[[Callable[P, Any]], Callable[P, Any]],
|
|
478
|
+
) -> None:
|
|
449
479
|
"""
|
|
450
480
|
Make a wrapper around the given object.
|
|
451
481
|
When methods on the object are called, they will be called through
|
|
@@ -469,16 +499,19 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
469
499
|
return attr
|
|
470
500
|
|
|
471
501
|
ItemT = TypeVar("ItemT")
|
|
502
|
+
|
|
472
503
|
class _ItemsHaver(Protocol[ItemT]):
|
|
473
504
|
"""
|
|
474
505
|
Anything that has a .items that is a list of something.
|
|
475
506
|
"""
|
|
507
|
+
|
|
476
508
|
# KubernetesBatchSystem isn't defined until the class executes, so any
|
|
477
509
|
# up-references to types from there that are in signatures (and not
|
|
478
510
|
# method code) need to be quoted
|
|
479
|
-
items:
|
|
511
|
+
items: list["KubernetesBatchSystem.ItemT"]
|
|
480
512
|
|
|
481
513
|
CovItemT = TypeVar("CovItemT", covariant=True)
|
|
514
|
+
|
|
482
515
|
class _WatchEvent(Protocol[CovItemT]):
|
|
483
516
|
"""
|
|
484
517
|
An event from a Kubernetes watch stream.
|
|
@@ -490,23 +523,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
490
523
|
# __getitem__ instead.
|
|
491
524
|
|
|
492
525
|
@overload
|
|
493
|
-
def __getitem__(self, name: Literal[
|
|
494
|
-
...
|
|
526
|
+
def __getitem__(self, name: Literal["type"]) -> str: ...
|
|
495
527
|
|
|
496
528
|
@overload
|
|
497
|
-
def __getitem__(
|
|
498
|
-
|
|
529
|
+
def __getitem__(
|
|
530
|
+
self, name: Literal["object"]
|
|
531
|
+
) -> "KubernetesBatchSystem.CovItemT": ...
|
|
499
532
|
|
|
500
533
|
@overload
|
|
501
|
-
def __getitem__(self, name: Literal[
|
|
502
|
-
...
|
|
534
|
+
def __getitem__(self, name: Literal["raw_object"]) -> dict[str, Any]: ...
|
|
503
535
|
|
|
504
|
-
def __getitem__(
|
|
505
|
-
|
|
536
|
+
def __getitem__(
|
|
537
|
+
self, name: Union[Literal["type"], Literal["object"], Literal["raw_object"]]
|
|
538
|
+
) -> Any: ...
|
|
506
539
|
|
|
507
540
|
P = ParamSpec("P")
|
|
508
541
|
R = TypeVar("R")
|
|
509
|
-
|
|
542
|
+
|
|
543
|
+
def _stream_until_error(
|
|
544
|
+
self, method: Callable[P, _ItemsHaver[R]], *args: P.args, **kwargs: P.kwargs
|
|
545
|
+
) -> Iterator[_WatchEvent[R]]:
|
|
510
546
|
"""
|
|
511
547
|
Kubernetes kubernetes.watch.Watch().stream() streams can fail and raise
|
|
512
548
|
errors. We don't want to have those errors fail the entire workflow, so
|
|
@@ -572,7 +608,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
572
608
|
|
|
573
609
|
# Loop through all jobs inside the queue and see if any of them
|
|
574
610
|
# could be launched.
|
|
575
|
-
jobs: Queue[
|
|
611
|
+
jobs: Queue[tuple[int, JobDescription, V1PodSpec]] = Queue()
|
|
576
612
|
while True:
|
|
577
613
|
try:
|
|
578
614
|
job = self._jobs_queue.get_nowait()
|
|
@@ -584,7 +620,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
584
620
|
logger.debug(f"Skipping killed job {job_id}")
|
|
585
621
|
continue
|
|
586
622
|
|
|
587
|
-
job_name = f
|
|
623
|
+
job_name = f"{self.job_prefix}{job_id}"
|
|
588
624
|
result = self._launch_job(job_name, job_desc, spec)
|
|
589
625
|
if result is False:
|
|
590
626
|
# Not enough resources to launch this job.
|
|
@@ -605,7 +641,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
605
641
|
logger.debug(f"Roughly {self._jobs_queue.qsize} jobs in the queue")
|
|
606
642
|
|
|
607
643
|
def setUserScript(self, userScript: Resource) -> None:
|
|
608
|
-
logger.info(f
|
|
644
|
+
logger.info(f"Setting user script for deployment: {userScript}")
|
|
609
645
|
self.user_script = userScript
|
|
610
646
|
|
|
611
647
|
# setEnv is provided by BatchSystemSupport, updates self.environment
|
|
@@ -657,18 +693,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
657
693
|
# Amazon just uses a label, while Google
|
|
658
694
|
# <https://cloud.google.com/kubernetes-engine/docs/how-to/preemptible-vms>
|
|
659
695
|
# uses a label and a taint.
|
|
660
|
-
PREEMPTIBLE_SCHEMES = {
|
|
661
|
-
|
|
662
|
-
|
|
696
|
+
PREEMPTIBLE_SCHEMES = {
|
|
697
|
+
"labels": [
|
|
698
|
+
("eks.amazonaws.com/capacityType", ["SPOT"]),
|
|
699
|
+
("cloud.google.com/gke-preemptible", ["true"]),
|
|
700
|
+
],
|
|
701
|
+
"taints": [("cloud.google.com/gke-preemptible", ["true"])],
|
|
702
|
+
}
|
|
663
703
|
|
|
664
704
|
if preemptible:
|
|
665
705
|
# We want to seek preemptible labels and tolerate preemptible taints.
|
|
666
|
-
self.desired_labels += PREEMPTIBLE_SCHEMES[
|
|
667
|
-
self.tolerated_taints += PREEMPTIBLE_SCHEMES[
|
|
706
|
+
self.desired_labels += PREEMPTIBLE_SCHEMES["labels"]
|
|
707
|
+
self.tolerated_taints += PREEMPTIBLE_SCHEMES["taints"]
|
|
668
708
|
else:
|
|
669
709
|
# We want to prohibit preemptible labels
|
|
670
|
-
self.prohibited_labels += PREEMPTIBLE_SCHEMES[
|
|
671
|
-
|
|
710
|
+
self.prohibited_labels += PREEMPTIBLE_SCHEMES["labels"]
|
|
672
711
|
|
|
673
712
|
def apply(self, pod_spec: V1PodSpec) -> None:
|
|
674
713
|
"""
|
|
@@ -679,29 +718,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
679
718
|
# Convert our collections to Kubernetes expressions.
|
|
680
719
|
|
|
681
720
|
# REQUIRE that ALL of these requirements be satisfied
|
|
682
|
-
required_selector_requirements:
|
|
721
|
+
required_selector_requirements: list[V1NodeSelectorRequirement] = []
|
|
683
722
|
# PREFER that EACH of these terms be satisfied
|
|
684
|
-
preferred_scheduling_terms:
|
|
723
|
+
preferred_scheduling_terms: list[V1PreferredSchedulingTerm] = []
|
|
685
724
|
# And this list of tolerations to apply
|
|
686
|
-
tolerations:
|
|
725
|
+
tolerations: list[V1Toleration] = []
|
|
687
726
|
|
|
688
727
|
for label, values in self.required_labels:
|
|
689
728
|
# Collect requirements for the required labels
|
|
690
|
-
has_label = V1NodeSelectorRequirement(
|
|
691
|
-
|
|
692
|
-
|
|
729
|
+
has_label = V1NodeSelectorRequirement(
|
|
730
|
+
key=label, operator="In", values=values
|
|
731
|
+
)
|
|
693
732
|
required_selector_requirements.append(has_label)
|
|
694
733
|
for label, values in self.desired_labels:
|
|
695
734
|
# Collect preferences for the preferred labels
|
|
696
|
-
has_label = V1NodeSelectorRequirement(
|
|
697
|
-
|
|
698
|
-
values=values)
|
|
699
|
-
term = V1NodeSelectorTerm(
|
|
700
|
-
match_expressions=[has_label]
|
|
735
|
+
has_label = V1NodeSelectorRequirement(
|
|
736
|
+
key=label, operator="In", values=values
|
|
701
737
|
)
|
|
738
|
+
term = V1NodeSelectorTerm(match_expressions=[has_label])
|
|
702
739
|
# Each becomes a separate preference, more is better.
|
|
703
|
-
preference = V1PreferredSchedulingTerm(weight=1,
|
|
704
|
-
preference=term)
|
|
740
|
+
preference = V1PreferredSchedulingTerm(weight=1, preference=term)
|
|
705
741
|
|
|
706
742
|
preferred_scheduling_terms.append(preference)
|
|
707
743
|
for label, values in self.prohibited_labels:
|
|
@@ -712,15 +748,14 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
712
748
|
# <https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/#set-based-requirement>
|
|
713
749
|
# So we create a NotIn for each label and AND them
|
|
714
750
|
# all together.
|
|
715
|
-
not_labeled = V1NodeSelectorRequirement(
|
|
716
|
-
|
|
717
|
-
|
|
751
|
+
not_labeled = V1NodeSelectorRequirement(
|
|
752
|
+
key=label, operator="NotIn", values=values
|
|
753
|
+
)
|
|
718
754
|
required_selector_requirements.append(not_labeled)
|
|
719
755
|
for taint, values in self.tolerated_taints:
|
|
720
756
|
for value in values:
|
|
721
757
|
# Each toleration can tolerate one value
|
|
722
|
-
taint_ok = V1Toleration(key=taint,
|
|
723
|
-
value=value)
|
|
758
|
+
taint_ok = V1Toleration(key=taint, value=value)
|
|
724
759
|
tolerations.append(taint_ok)
|
|
725
760
|
|
|
726
761
|
# Now combine everything
|
|
@@ -734,16 +769,22 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
734
769
|
match_expressions=required_selector_requirements
|
|
735
770
|
)
|
|
736
771
|
# And a selector to hold the term
|
|
737
|
-
requirements_selector = V1NodeSelector(
|
|
772
|
+
requirements_selector = V1NodeSelector(
|
|
773
|
+
node_selector_terms=[requirements_term]
|
|
774
|
+
)
|
|
738
775
|
|
|
739
776
|
# Make an affinity that prefers the preferences and requires the requirements
|
|
740
777
|
node_affinity = V1NodeAffinity(
|
|
741
|
-
preferred_during_scheduling_ignored_during_execution=
|
|
742
|
-
|
|
778
|
+
preferred_during_scheduling_ignored_during_execution=(
|
|
779
|
+
preferred_scheduling_terms
|
|
780
|
+
if preferred_scheduling_terms
|
|
781
|
+
else None
|
|
782
|
+
),
|
|
783
|
+
required_during_scheduling_ignored_during_execution=requirements_selector,
|
|
743
784
|
)
|
|
744
785
|
|
|
745
786
|
# Apply the affinity
|
|
746
|
-
pod_spec.affinity = V1Affinity(node_affinity
|
|
787
|
+
pod_spec.affinity = V1Affinity(node_affinity=node_affinity)
|
|
747
788
|
|
|
748
789
|
if tolerations:
|
|
749
790
|
# Apply the tolerations
|
|
@@ -751,18 +792,22 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
751
792
|
|
|
752
793
|
def _check_accelerator_request(self, requirer: Requirer) -> None:
|
|
753
794
|
for accelerator in requirer.accelerators:
|
|
754
|
-
if accelerator[
|
|
795
|
+
if accelerator["kind"] != "gpu" and "model" not in accelerator:
|
|
755
796
|
# We can only provide GPUs or things with a model right now
|
|
756
|
-
raise InsufficientSystemResources(
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
797
|
+
raise InsufficientSystemResources(
|
|
798
|
+
requirer,
|
|
799
|
+
"accelerators",
|
|
800
|
+
details=[
|
|
801
|
+
f"The accelerator {accelerator} could not be provided.",
|
|
802
|
+
"The Toil Kubernetes batch system only knows how to request gpu accelerators or accelerators with a defined model.",
|
|
803
|
+
],
|
|
804
|
+
)
|
|
760
805
|
|
|
761
806
|
def _create_pod_spec(
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
807
|
+
self,
|
|
808
|
+
command: str,
|
|
809
|
+
job_desc: JobDescription,
|
|
810
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
766
811
|
) -> V1PodSpec:
|
|
767
812
|
"""
|
|
768
813
|
Make the specification for a pod that can execute the given job.
|
|
@@ -789,9 +834,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
789
834
|
# OOMing. We also want to provision some extra space so that when
|
|
790
835
|
# we test _isPodStuckOOM we never get True unless the job has
|
|
791
836
|
# exceeded job_desc.memory.
|
|
792
|
-
requirements_dict = {
|
|
793
|
-
|
|
794
|
-
|
|
837
|
+
requirements_dict = {
|
|
838
|
+
"cpu": job_desc.cores,
|
|
839
|
+
"memory": job_desc.memory + 1024 * 1024 * 512,
|
|
840
|
+
"ephemeral-storage": job_desc.disk + 1024 * 1024 * 512,
|
|
841
|
+
}
|
|
795
842
|
|
|
796
843
|
# Also start on the placement constraints
|
|
797
844
|
placement = KubernetesBatchSystem.Placement()
|
|
@@ -801,19 +848,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
801
848
|
# Add in requirements for accelerators (GPUs).
|
|
802
849
|
# See https://kubernetes.io/docs/tasks/manage-gpus/scheduling-gpus/
|
|
803
850
|
|
|
804
|
-
if accelerator[
|
|
851
|
+
if accelerator["kind"] == "gpu":
|
|
805
852
|
# We can't schedule GPUs without a brand, because the
|
|
806
853
|
# Kubernetes resources are <brand>.com/gpu. If no brand is
|
|
807
854
|
# specified, default to nvidia, which is very popular.
|
|
808
|
-
vendor = accelerator.get(
|
|
855
|
+
vendor = accelerator.get("brand", "nvidia")
|
|
809
856
|
key = f'{vendor}.com/{accelerator["kind"]}'
|
|
810
857
|
if key not in requirements_dict:
|
|
811
858
|
requirements_dict[key] = 0
|
|
812
|
-
requirements_dict[key] += accelerator[
|
|
859
|
+
requirements_dict[key] += accelerator["count"]
|
|
813
860
|
|
|
814
|
-
if
|
|
861
|
+
if "model" in accelerator:
|
|
815
862
|
# TODO: What if the cluster uses some other accelerator model labeling scheme?
|
|
816
|
-
placement.required_labels.append(
|
|
863
|
+
placement.required_labels.append(
|
|
864
|
+
("accelerator", [accelerator["model"]])
|
|
865
|
+
)
|
|
817
866
|
|
|
818
867
|
# TODO: Support AMD's labeling scheme: https://github.com/RadeonOpenCompute/k8s-device-plugin/tree/master/cmd/k8s-node-labeller
|
|
819
868
|
# That just has each trait of the accelerator as a separate label, but nothing that quite corresponds to a model.
|
|
@@ -825,14 +874,15 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
825
874
|
# the UCSC Kubernetes admins want it that way. For GPUs, Kubernetes
|
|
826
875
|
# requires them to be equal.
|
|
827
876
|
limits_dict = requests_dict
|
|
828
|
-
resources = V1ResourceRequirements(limits=limits_dict,
|
|
829
|
-
requests=requests_dict)
|
|
877
|
+
resources = V1ResourceRequirements(limits=limits_dict, requests=requests_dict)
|
|
830
878
|
|
|
831
879
|
# Collect volumes and mounts
|
|
832
880
|
volumes = []
|
|
833
881
|
mounts = []
|
|
834
882
|
|
|
835
|
-
def mount_host_path(
|
|
883
|
+
def mount_host_path(
|
|
884
|
+
volume_name: str, host_path: str, mount_path: str, create: bool = False
|
|
885
|
+
) -> None:
|
|
836
886
|
"""
|
|
837
887
|
Add a host path volume with the given name to mount the given path.
|
|
838
888
|
|
|
@@ -840,10 +890,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
840
890
|
not exist. Otherwise, when the directory does not exist, the
|
|
841
891
|
pod will wait for it to come into existence.
|
|
842
892
|
"""
|
|
843
|
-
volume_type =
|
|
893
|
+
volume_type = "DirectoryOrCreate" if create else "Directory"
|
|
844
894
|
volume_source = V1HostPathVolumeSource(path=host_path, type=volume_type)
|
|
845
|
-
volume = V1Volume(name=volume_name,
|
|
846
|
-
host_path=volume_source)
|
|
895
|
+
volume = V1Volume(name=volume_name, host_path=volume_source)
|
|
847
896
|
volumes.append(volume)
|
|
848
897
|
volume_mount = V1VolumeMount(mount_path=mount_path, name=volume_name)
|
|
849
898
|
mounts.append(volume_mount)
|
|
@@ -851,49 +900,63 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
851
900
|
if self.host_path is not None:
|
|
852
901
|
# Provision Toil WorkDir from a HostPath volume, to share with other pods.
|
|
853
902
|
# Create the directory if it doesn't exist already.
|
|
854
|
-
mount_host_path(
|
|
903
|
+
mount_host_path(
|
|
904
|
+
"workdir", self.host_path, self.worker_work_dir, create=True
|
|
905
|
+
)
|
|
855
906
|
# We also need to mount across /run/lock, where we will put
|
|
856
907
|
# per-node coordiantion info.
|
|
857
908
|
# Don't create this; it really should always exist.
|
|
858
|
-
mount_host_path(
|
|
909
|
+
mount_host_path("coordination", "/run/lock", "/run/lock")
|
|
859
910
|
else:
|
|
860
911
|
# Provision Toil WorkDir as an ephemeral volume
|
|
861
|
-
ephemeral_volume_name =
|
|
912
|
+
ephemeral_volume_name = "workdir"
|
|
862
913
|
ephemeral_volume_source = V1EmptyDirVolumeSource()
|
|
863
|
-
ephemeral_volume = V1Volume(
|
|
864
|
-
|
|
914
|
+
ephemeral_volume = V1Volume(
|
|
915
|
+
name=ephemeral_volume_name, empty_dir=ephemeral_volume_source
|
|
916
|
+
)
|
|
865
917
|
volumes.append(ephemeral_volume)
|
|
866
|
-
ephemeral_volume_mount = V1VolumeMount(
|
|
918
|
+
ephemeral_volume_mount = V1VolumeMount(
|
|
919
|
+
mount_path=self.worker_work_dir, name=ephemeral_volume_name
|
|
920
|
+
)
|
|
867
921
|
mounts.append(ephemeral_volume_mount)
|
|
868
922
|
# And don't share coordination directory
|
|
869
923
|
|
|
870
924
|
if self.aws_secret_name is not None:
|
|
871
925
|
# Also mount an AWS secret, if provided.
|
|
872
926
|
# TODO: make this generic somehow
|
|
873
|
-
secret_volume_name =
|
|
874
|
-
secret_volume_source = V1SecretVolumeSource(
|
|
875
|
-
|
|
876
|
-
|
|
927
|
+
secret_volume_name = "s3-credentials"
|
|
928
|
+
secret_volume_source = V1SecretVolumeSource(
|
|
929
|
+
secret_name=self.aws_secret_name
|
|
930
|
+
)
|
|
931
|
+
secret_volume = V1Volume(
|
|
932
|
+
name=secret_volume_name, secret=secret_volume_source
|
|
933
|
+
)
|
|
877
934
|
volumes.append(secret_volume)
|
|
878
|
-
secret_volume_mount = V1VolumeMount(
|
|
935
|
+
secret_volume_mount = V1VolumeMount(
|
|
936
|
+
mount_path="/root/.aws", name=secret_volume_name
|
|
937
|
+
)
|
|
879
938
|
mounts.append(secret_volume_mount)
|
|
880
939
|
|
|
881
940
|
# Make a container definition
|
|
882
|
-
container = V1Container(
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
886
|
-
|
|
941
|
+
container = V1Container(
|
|
942
|
+
command=command_list,
|
|
943
|
+
image=self.docker_image,
|
|
944
|
+
name="runner-container",
|
|
945
|
+
resources=resources,
|
|
946
|
+
volume_mounts=mounts,
|
|
947
|
+
)
|
|
887
948
|
|
|
888
949
|
# In case security context rules are not allowed to be set, we only apply
|
|
889
950
|
# a security context at all if we need to turn on privileged mode.
|
|
890
951
|
if self.config.kubernetes_privileged:
|
|
891
|
-
container.security_context = V1SecurityContext(
|
|
952
|
+
container.security_context = V1SecurityContext(
|
|
953
|
+
privileged=self.config.kubernetes_privileged
|
|
954
|
+
)
|
|
892
955
|
|
|
893
956
|
# Wrap the container in a spec
|
|
894
|
-
pod_spec = V1PodSpec(
|
|
895
|
-
|
|
896
|
-
|
|
957
|
+
pod_spec = V1PodSpec(
|
|
958
|
+
containers=[container], volumes=volumes, restart_policy="Never"
|
|
959
|
+
)
|
|
897
960
|
# Tell the spec where to land
|
|
898
961
|
placement.apply(pod_spec)
|
|
899
962
|
|
|
@@ -903,7 +966,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
903
966
|
|
|
904
967
|
return pod_spec
|
|
905
968
|
|
|
906
|
-
def _release_acquired_resources(
|
|
969
|
+
def _release_acquired_resources(
|
|
970
|
+
self, resources: list[int], notify: bool = False
|
|
971
|
+
) -> None:
|
|
907
972
|
"""
|
|
908
973
|
Release all resources acquired for a job.
|
|
909
974
|
|
|
@@ -922,10 +987,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
922
987
|
self._work_available.notify_all()
|
|
923
988
|
|
|
924
989
|
def _launch_job(
|
|
925
|
-
self,
|
|
926
|
-
job_name: str,
|
|
927
|
-
job_desc: JobDescription,
|
|
928
|
-
pod_spec: V1PodSpec
|
|
990
|
+
self, job_name: str, job_desc: JobDescription, pod_spec: V1PodSpec
|
|
929
991
|
) -> bool:
|
|
930
992
|
"""
|
|
931
993
|
Try to launch the given job to the Kubernetes cluster. Return False if
|
|
@@ -933,19 +995,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
933
995
|
"""
|
|
934
996
|
|
|
935
997
|
# Limit the amount of resources requested at a time.
|
|
936
|
-
resource_requests:
|
|
998
|
+
resource_requests: list[int] = [
|
|
999
|
+
1,
|
|
1000
|
+
int(job_desc.cores * 1000),
|
|
1001
|
+
job_desc.memory,
|
|
1002
|
+
job_desc.disk,
|
|
1003
|
+
]
|
|
937
1004
|
|
|
938
1005
|
acquired = []
|
|
939
1006
|
for source, request in zip(self.resource_sources, resource_requests):
|
|
940
1007
|
# For each kind of resource we want, go get it
|
|
941
|
-
assert
|
|
1008
|
+
assert isinstance(source, ResourcePool) and isinstance(request, int)
|
|
942
1009
|
if source.acquireNow(request):
|
|
943
1010
|
acquired.append(request)
|
|
944
1011
|
else:
|
|
945
1012
|
# We can't get everything
|
|
946
|
-
self._release_acquired_resources(
|
|
1013
|
+
self._release_acquired_resources(
|
|
1014
|
+
acquired,
|
|
947
1015
|
# Put it back quietly.
|
|
948
|
-
notify=False
|
|
1016
|
+
notify=False,
|
|
1017
|
+
)
|
|
949
1018
|
return False
|
|
950
1019
|
|
|
951
1020
|
self._acquired_resources[job_name] = acquired
|
|
@@ -954,9 +1023,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
954
1023
|
|
|
955
1024
|
# Make metadata to label the job/pod with info.
|
|
956
1025
|
# Don't let the cluster autoscaler evict any Toil jobs.
|
|
957
|
-
metadata = V1ObjectMeta(
|
|
958
|
-
|
|
959
|
-
|
|
1026
|
+
metadata = V1ObjectMeta(
|
|
1027
|
+
name=job_name,
|
|
1028
|
+
labels={"toil_run": self.run_id},
|
|
1029
|
+
annotations={"cluster-autoscaler.kubernetes.io/safe-to-evict": "false"},
|
|
1030
|
+
)
|
|
960
1031
|
|
|
961
1032
|
# Wrap the spec in a template
|
|
962
1033
|
template = V1PodTemplateSpec(spec=pod_spec, metadata=metadata)
|
|
@@ -964,18 +1035,21 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
964
1035
|
# Make another spec for the job, asking to run the template with no
|
|
965
1036
|
# backoff/retry. Specify our own TTL to avoid catching the notice
|
|
966
1037
|
# of over-zealous abandoned job cleanup scripts.
|
|
967
|
-
job_spec = V1JobSpec(
|
|
968
|
-
|
|
969
|
-
|
|
1038
|
+
job_spec = V1JobSpec(
|
|
1039
|
+
template=template,
|
|
1040
|
+
backoff_limit=0,
|
|
1041
|
+
ttl_seconds_after_finished=self.finished_job_ttl,
|
|
1042
|
+
)
|
|
970
1043
|
|
|
971
1044
|
# And make the actual job
|
|
972
|
-
job = V1Job(
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
kind="Job")
|
|
1045
|
+
job = V1Job(
|
|
1046
|
+
spec=job_spec, metadata=metadata, api_version="batch/v1", kind="Job"
|
|
1047
|
+
)
|
|
976
1048
|
|
|
977
1049
|
# Launch the job
|
|
978
|
-
launched = self._api(
|
|
1050
|
+
launched = self._api("batch", errors=[]).create_namespaced_job(
|
|
1051
|
+
self.namespace, job
|
|
1052
|
+
)
|
|
979
1053
|
|
|
980
1054
|
logger.debug(f"Launched job: {job_name}")
|
|
981
1055
|
|
|
@@ -983,10 +1057,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
983
1057
|
|
|
984
1058
|
def _delete_job(
|
|
985
1059
|
self,
|
|
986
|
-
job_name: str,
|
|
1060
|
+
job_name: str,
|
|
1061
|
+
*,
|
|
987
1062
|
propagation_policy: Literal["Foreground", "Background"] = "Foreground",
|
|
988
1063
|
gone_ok: bool = False,
|
|
989
|
-
resource_notify: bool = True
|
|
1064
|
+
resource_notify: bool = True,
|
|
990
1065
|
) -> None:
|
|
991
1066
|
"""
|
|
992
1067
|
Given the name of a kubernetes job, delete the job and release all
|
|
@@ -999,11 +1074,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
999
1074
|
the self._work_available condition.
|
|
1000
1075
|
"""
|
|
1001
1076
|
try:
|
|
1002
|
-
logger.debug(f
|
|
1003
|
-
self._api(
|
|
1004
|
-
job_name,
|
|
1005
|
-
self.namespace,
|
|
1006
|
-
propagation_policy=propagation_policy
|
|
1077
|
+
logger.debug(f"Deleting Kubernetes job {job_name}")
|
|
1078
|
+
self._api("batch", errors=[404] if gone_ok else []).delete_namespaced_job(
|
|
1079
|
+
job_name, self.namespace, propagation_policy=propagation_policy
|
|
1007
1080
|
)
|
|
1008
1081
|
finally:
|
|
1009
1082
|
# We should always release the acquired resources.
|
|
@@ -1014,7 +1087,12 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1014
1087
|
self._release_acquired_resources(resources, notify=resource_notify)
|
|
1015
1088
|
del self._acquired_resources[job_name]
|
|
1016
1089
|
|
|
1017
|
-
def issueBatchJob(
|
|
1090
|
+
def issueBatchJob(
|
|
1091
|
+
self,
|
|
1092
|
+
command: str,
|
|
1093
|
+
job_desc: JobDescription,
|
|
1094
|
+
job_environment: Optional[dict[str, str]] = None,
|
|
1095
|
+
) -> int:
|
|
1018
1096
|
# Try the job as local
|
|
1019
1097
|
localID = self.handleLocalJob(command, job_desc)
|
|
1020
1098
|
if localID is not None:
|
|
@@ -1027,7 +1105,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1027
1105
|
self.check_resource_request(job_desc)
|
|
1028
1106
|
|
|
1029
1107
|
# Make a pod that describes running the job
|
|
1030
|
-
pod_spec = self._create_pod_spec(
|
|
1108
|
+
pod_spec = self._create_pod_spec(
|
|
1109
|
+
command, job_desc, job_environment=job_environment
|
|
1110
|
+
)
|
|
1031
1111
|
|
|
1032
1112
|
# Make a batch system scope job ID
|
|
1033
1113
|
job_id = self.getNextJobID()
|
|
@@ -1055,6 +1135,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1055
1135
|
kwargs, so we can't just set unused ones to None. But we also don't
|
|
1056
1136
|
want to duplicate code for every combination of possible present keys.
|
|
1057
1137
|
"""
|
|
1138
|
+
|
|
1058
1139
|
_continue: NotRequired[str]
|
|
1059
1140
|
label_selector: NotRequired[str]
|
|
1060
1141
|
field_selector: NotRequired[str]
|
|
@@ -1084,30 +1165,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1084
1165
|
token = None
|
|
1085
1166
|
|
|
1086
1167
|
while True:
|
|
1087
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1168
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1169
|
+
"label_selector": f"toil_run={self.run_id}"
|
|
1170
|
+
}
|
|
1088
1171
|
|
|
1089
1172
|
if onlySucceeded:
|
|
1090
|
-
kwargs[
|
|
1173
|
+
kwargs["field_selector"] = "status.successful==1"
|
|
1091
1174
|
|
|
1092
1175
|
if token is not None:
|
|
1093
|
-
kwargs[
|
|
1176
|
+
kwargs["_continue"] = token
|
|
1094
1177
|
|
|
1095
|
-
results = self._api(
|
|
1096
|
-
self.namespace,
|
|
1097
|
-
**kwargs
|
|
1178
|
+
results = self._api("batch", errors=[]).list_namespaced_job(
|
|
1179
|
+
self.namespace, **kwargs
|
|
1098
1180
|
)
|
|
1099
|
-
|
|
1181
|
+
|
|
1100
1182
|
# These jobs belong to us
|
|
1101
1183
|
yield from (j for j in results.items if not self._is_deleted(j))
|
|
1102
1184
|
|
|
1103
1185
|
# Remember the continuation token, if any
|
|
1104
|
-
token = getattr(results.metadata,
|
|
1186
|
+
token = getattr(results.metadata, "continue", None)
|
|
1105
1187
|
|
|
1106
1188
|
if token is None:
|
|
1107
1189
|
# There isn't one. We got everything.
|
|
1108
1190
|
break
|
|
1109
1191
|
|
|
1110
|
-
|
|
1111
1192
|
def _ourPodObject(self) -> Iterator[V1Pod]:
|
|
1112
1193
|
"""
|
|
1113
1194
|
Yield Kubernetes V1Pod objects that we are responsible for that the
|
|
@@ -1117,25 +1198,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1117
1198
|
token = None
|
|
1118
1199
|
|
|
1119
1200
|
while True:
|
|
1120
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1201
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1202
|
+
"label_selector": f"toil_run={self.run_id}"
|
|
1203
|
+
}
|
|
1121
1204
|
|
|
1122
1205
|
if token is not None:
|
|
1123
|
-
kwargs[
|
|
1206
|
+
kwargs["_continue"] = token
|
|
1124
1207
|
|
|
1125
|
-
results = self._api(
|
|
1126
|
-
self.namespace,
|
|
1127
|
-
**kwargs
|
|
1208
|
+
results = self._api("core", errors=[]).list_namespaced_pod(
|
|
1209
|
+
self.namespace, **kwargs
|
|
1128
1210
|
)
|
|
1129
1211
|
|
|
1130
1212
|
yield from (j for j in results.items if not self._is_deleted(j))
|
|
1131
1213
|
# Remember the continuation token, if any
|
|
1132
|
-
token = getattr(results.metadata,
|
|
1214
|
+
token = getattr(results.metadata, "continue", None)
|
|
1133
1215
|
|
|
1134
1216
|
if token is None:
|
|
1135
1217
|
# There isn't one. We got everything.
|
|
1136
1218
|
break
|
|
1137
1219
|
|
|
1138
|
-
|
|
1139
1220
|
def _getPodForJob(self, jobObject: V1Job) -> Optional[V1Pod]:
|
|
1140
1221
|
"""
|
|
1141
1222
|
Get the pod that belongs to the given job, or None if the job's pod is
|
|
@@ -1149,22 +1230,26 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1149
1230
|
"""
|
|
1150
1231
|
|
|
1151
1232
|
# Make sure the job has the fields we need
|
|
1152
|
-
assert
|
|
1233
|
+
assert jobObject.metadata is not None
|
|
1153
1234
|
|
|
1154
1235
|
token = None
|
|
1155
1236
|
|
|
1156
1237
|
while True:
|
|
1157
|
-
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1238
|
+
kwargs: KubernetesBatchSystem._ArgsDict = {
|
|
1239
|
+
"label_selector": f"job-name={jobObject.metadata.name}"
|
|
1240
|
+
}
|
|
1158
1241
|
if token is not None:
|
|
1159
|
-
kwargs[
|
|
1160
|
-
results = self._api(
|
|
1242
|
+
kwargs["_continue"] = token
|
|
1243
|
+
results = self._api("core", errors=[]).list_namespaced_pod(
|
|
1244
|
+
self.namespace, **kwargs
|
|
1245
|
+
)
|
|
1161
1246
|
|
|
1162
1247
|
for pod in results.items:
|
|
1163
1248
|
# Return the first pod we find
|
|
1164
1249
|
return pod
|
|
1165
1250
|
|
|
1166
1251
|
# Remember the continuation token, if any
|
|
1167
|
-
token = getattr(results.metadata,
|
|
1252
|
+
token = getattr(results.metadata, "continue", None)
|
|
1168
1253
|
|
|
1169
1254
|
if token is None:
|
|
1170
1255
|
# There isn't one. We got everything.
|
|
@@ -1188,12 +1273,13 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1188
1273
|
assert podObject.metadata is not None
|
|
1189
1274
|
assert podObject.metadata.name is not None
|
|
1190
1275
|
|
|
1191
|
-
return self._api(
|
|
1192
|
-
podObject.metadata.name,
|
|
1193
|
-
namespace=self.namespace
|
|
1276
|
+
return self._api("core", errors=[]).read_namespaced_pod_log(
|
|
1277
|
+
podObject.metadata.name, namespace=self.namespace
|
|
1194
1278
|
)
|
|
1195
1279
|
|
|
1196
|
-
def _isPodStuckOOM(
|
|
1280
|
+
def _isPodStuckOOM(
|
|
1281
|
+
self, podObject: V1Pod, minFreeBytes: float = 1024 * 1024 * 2
|
|
1282
|
+
) -> bool:
|
|
1197
1283
|
"""
|
|
1198
1284
|
Poll the current memory usage for the pod from the cluster.
|
|
1199
1285
|
|
|
@@ -1223,14 +1309,18 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1223
1309
|
assert podObject.metadata.name is not None
|
|
1224
1310
|
|
|
1225
1311
|
# Compose a query to get just the pod we care about
|
|
1226
|
-
query = f
|
|
1312
|
+
query = f"metadata.name={podObject.metadata.name}"
|
|
1227
1313
|
|
|
1228
1314
|
# Look for it, but manage our own exceptions
|
|
1229
1315
|
try:
|
|
1230
1316
|
# TODO: When the Kubernetes Python API actually wraps the metrics API, switch to that
|
|
1231
|
-
response = self._api(
|
|
1232
|
-
|
|
1233
|
-
|
|
1317
|
+
response = self._api("customObjects").list_namespaced_custom_object(
|
|
1318
|
+
"metrics.k8s.io",
|
|
1319
|
+
"v1beta1",
|
|
1320
|
+
self.namespace,
|
|
1321
|
+
"pods",
|
|
1322
|
+
field_selector=query,
|
|
1323
|
+
)
|
|
1234
1324
|
except Exception as e:
|
|
1235
1325
|
# We couldn't talk to the metrics service on this attempt. We don't
|
|
1236
1326
|
# retry, but we also don't want to just ignore all errors. We only
|
|
@@ -1246,7 +1336,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1246
1336
|
raise
|
|
1247
1337
|
|
|
1248
1338
|
# Pull out the items
|
|
1249
|
-
items = response.get(
|
|
1339
|
+
items = response.get("items", [])
|
|
1250
1340
|
|
|
1251
1341
|
if len(items) == 0:
|
|
1252
1342
|
# If there's no statistics we can't say we're stuck OOM
|
|
@@ -1255,7 +1345,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1255
1345
|
# Assume the first result is the right one, because of the selector.
|
|
1256
1346
|
# That means we don't need to bother with _continue.
|
|
1257
1347
|
# Assume it has exactly one pod, because we made it.
|
|
1258
|
-
containers = items[0].get(
|
|
1348
|
+
containers = items[0].get("containers", [{}])
|
|
1259
1349
|
|
|
1260
1350
|
if len(containers) == 0:
|
|
1261
1351
|
# If there are no containers (because none have started yet?), we can't say we're stuck OOM
|
|
@@ -1264,26 +1354,37 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1264
1354
|
# Otherwise, assume it just has one container.
|
|
1265
1355
|
# Grab the memory usage string, like 123Ki, and convert to bytes.
|
|
1266
1356
|
# If anything is missing, assume 0 bytes used.
|
|
1267
|
-
bytesUsed = human2bytes(containers[0].get(
|
|
1357
|
+
bytesUsed = human2bytes(containers[0].get("usage", {}).get("memory", "0"))
|
|
1268
1358
|
|
|
1269
1359
|
# Also get the limit out of the pod object's spec
|
|
1270
1360
|
assert podObject.spec is not None
|
|
1271
1361
|
assert len(podObject.spec.containers) > 0
|
|
1272
1362
|
assert podObject.spec.containers[0].resources is not None
|
|
1273
1363
|
assert podObject.spec.containers[0].resources.limits is not None
|
|
1274
|
-
assert
|
|
1275
|
-
bytesAllowed = human2bytes(
|
|
1364
|
+
assert "memory" in podObject.spec.containers[0].resources.limits
|
|
1365
|
+
bytesAllowed = human2bytes(
|
|
1366
|
+
podObject.spec.containers[0].resources.limits["memory"]
|
|
1367
|
+
)
|
|
1276
1368
|
|
|
1277
1369
|
if bytesAllowed - bytesUsed < minFreeBytes:
|
|
1278
1370
|
# This is too much!
|
|
1279
|
-
logger.warning(
|
|
1280
|
-
|
|
1371
|
+
logger.warning(
|
|
1372
|
+
"Pod %s has used %d of %d bytes of memory; reporting as stuck due to OOM.",
|
|
1373
|
+
podObject.metadata.name,
|
|
1374
|
+
bytesUsed,
|
|
1375
|
+
bytesAllowed,
|
|
1376
|
+
)
|
|
1281
1377
|
|
|
1282
1378
|
return True
|
|
1283
1379
|
else:
|
|
1284
1380
|
return False
|
|
1285
1381
|
|
|
1286
|
-
def _isPodStuckWaiting(
|
|
1382
|
+
def _isPodStuckWaiting(
|
|
1383
|
+
self,
|
|
1384
|
+
pod_object: V1Pod,
|
|
1385
|
+
reason: Optional[str] = None,
|
|
1386
|
+
timeout: Optional[float] = None,
|
|
1387
|
+
) -> bool:
|
|
1287
1388
|
"""
|
|
1288
1389
|
Return True if the pod looks to be in a waiting state, and false otherwise.
|
|
1289
1390
|
|
|
@@ -1307,7 +1408,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1307
1408
|
# Can't be stuck
|
|
1308
1409
|
return False
|
|
1309
1410
|
|
|
1310
|
-
waiting_info = getattr(
|
|
1411
|
+
waiting_info = getattr(
|
|
1412
|
+
getattr(container_statuses[0], "state", None), "waiting", None
|
|
1413
|
+
)
|
|
1311
1414
|
if waiting_info is None:
|
|
1312
1415
|
# Pod is not waiting
|
|
1313
1416
|
return False
|
|
@@ -1316,15 +1419,17 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1316
1419
|
# Pod fails reason filter
|
|
1317
1420
|
return False
|
|
1318
1421
|
|
|
1319
|
-
start_time = getattr(pod_object.status,
|
|
1320
|
-
if timeout is not None and (
|
|
1422
|
+
start_time = getattr(pod_object.status, "start_time", None)
|
|
1423
|
+
if timeout is not None and (
|
|
1424
|
+
start_time is None or (utc_now() - start_time).total_seconds() < timeout
|
|
1425
|
+
):
|
|
1321
1426
|
# It hasn't been waiting too long, or we care but don't know how
|
|
1322
1427
|
# long it has been waiting
|
|
1323
1428
|
return False
|
|
1324
1429
|
|
|
1325
1430
|
return True
|
|
1326
1431
|
|
|
1327
|
-
def _is_deleted(self, kube_thing: Union[
|
|
1432
|
+
def _is_deleted(self, kube_thing: Union["V1Job", "V1Pod"]) -> bool:
|
|
1328
1433
|
"""
|
|
1329
1434
|
Determine if a job or pod is in the process od being deleted, and
|
|
1330
1435
|
shouldn't count anymore.
|
|
@@ -1333,7 +1438,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1333
1438
|
# Kubernetes "Terminating" is the same as having the deletion_timestamp
|
|
1334
1439
|
# set in the metadata of the object.
|
|
1335
1440
|
|
|
1336
|
-
deletion_timestamp: Optional[datetime.datetime] = getattr(
|
|
1441
|
+
deletion_timestamp: Optional[datetime.datetime] = getattr(
|
|
1442
|
+
getattr(kube_thing, "metadata", None), "deletion_timestamp", None
|
|
1443
|
+
)
|
|
1337
1444
|
# If the deletion timestamp is set to anything, it is in the process of
|
|
1338
1445
|
# being deleted. We will treat that as as good as gone.
|
|
1339
1446
|
return deletion_timestamp is not None
|
|
@@ -1350,8 +1457,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1350
1457
|
|
|
1351
1458
|
assert jobObject.metadata is not None
|
|
1352
1459
|
assert jobObject.metadata.name is not None
|
|
1353
|
-
return int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1354
|
-
|
|
1460
|
+
return int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1355
1461
|
|
|
1356
1462
|
def getUpdatedBatchJob(self, maxWait: float) -> Optional[UpdatedBatchJobInfo]:
|
|
1357
1463
|
|
|
@@ -1367,22 +1473,27 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1367
1473
|
# Otherwise we need to maybe wait.
|
|
1368
1474
|
if self.enable_watching and maxWait >= 1:
|
|
1369
1475
|
# We can try a watch. Watches can only work in whole seconds.
|
|
1370
|
-
for event in self._stream_until_error(
|
|
1371
|
-
|
|
1372
|
-
|
|
1373
|
-
|
|
1476
|
+
for event in self._stream_until_error(
|
|
1477
|
+
self._api("batch").list_namespaced_job,
|
|
1478
|
+
self.namespace,
|
|
1479
|
+
label_selector=f"toil_run={self.run_id}",
|
|
1480
|
+
timeout_seconds=math.floor(maxWait),
|
|
1481
|
+
):
|
|
1374
1482
|
# Grab the metadata data, ID, the list of conditions of the current job, and the total pods
|
|
1375
|
-
jobObject = event[
|
|
1376
|
-
|
|
1483
|
+
jobObject = event["object"]
|
|
1484
|
+
|
|
1377
1485
|
if self._is_deleted(jobObject):
|
|
1378
1486
|
# Job is already deleted, so ignore it.
|
|
1379
|
-
logger.warning(
|
|
1487
|
+
logger.warning(
|
|
1488
|
+
"Kubernetes job %s is deleted; ignore its update",
|
|
1489
|
+
getattr(getattr(jobObject, "metadata", None), "name", None),
|
|
1490
|
+
)
|
|
1380
1491
|
continue
|
|
1381
|
-
|
|
1492
|
+
|
|
1382
1493
|
assert jobObject.metadata is not None
|
|
1383
1494
|
assert jobObject.metadata.name is not None
|
|
1384
|
-
|
|
1385
|
-
jobID = int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1495
|
+
|
|
1496
|
+
jobID = int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1386
1497
|
if jobObject.status is None:
|
|
1387
1498
|
# Can't tell what is up with this job.
|
|
1388
1499
|
continue
|
|
@@ -1392,7 +1503,10 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1392
1503
|
failed_pods = jobObject.status.failed or 0
|
|
1393
1504
|
# Fetch out the condition object that has info about how the job is going.
|
|
1394
1505
|
condition: Optional[V1JobCondition] = None
|
|
1395
|
-
if
|
|
1506
|
+
if (
|
|
1507
|
+
jobObject.status.conditions is not None
|
|
1508
|
+
and len(jobObject.status.conditions) > 0
|
|
1509
|
+
):
|
|
1396
1510
|
condition = jobObject.status.conditions[0]
|
|
1397
1511
|
|
|
1398
1512
|
totalPods = active_pods + succeeded_pods + failed_pods
|
|
@@ -1402,14 +1516,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1402
1516
|
|
|
1403
1517
|
# Check if there are any active pods
|
|
1404
1518
|
if active_pods > 0:
|
|
1405
|
-
logger.info(
|
|
1519
|
+
logger.info(
|
|
1520
|
+
"%s has %d pods running" % jobObject.metadata.name, active_pods
|
|
1521
|
+
)
|
|
1406
1522
|
continue
|
|
1407
1523
|
elif succeeded_pods > 0 or failed_pods > 0:
|
|
1408
1524
|
# No more active pods in the current job ; must be finished
|
|
1409
|
-
logger.info(
|
|
1410
|
-
|
|
1525
|
+
logger.info(
|
|
1526
|
+
"%s RESULTS -> Succeeded: %d Failed:%d Active:%d"
|
|
1527
|
+
% jobObject.metadata.name,
|
|
1528
|
+
succeeded_pods,
|
|
1529
|
+
failed_pods,
|
|
1530
|
+
active_pods,
|
|
1531
|
+
)
|
|
1411
1532
|
# Log out success/failure given a reason
|
|
1412
|
-
logger.info(
|
|
1533
|
+
logger.info(
|
|
1534
|
+
"%s REASON: %s",
|
|
1535
|
+
getattr(condition, "type", None),
|
|
1536
|
+
getattr(condition, "reason", None),
|
|
1537
|
+
)
|
|
1413
1538
|
|
|
1414
1539
|
# Log out reason of failure and pod exit code
|
|
1415
1540
|
if failed_pods > 0:
|
|
@@ -1419,22 +1544,40 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1419
1544
|
if condition is not None:
|
|
1420
1545
|
logger.warning("Failed Job Message: %s", condition.message)
|
|
1421
1546
|
pod = self._getPodForJob(jobObject)
|
|
1422
|
-
statuses:
|
|
1423
|
-
|
|
1547
|
+
statuses: list[V1ContainerStatus] = getattr(
|
|
1548
|
+
getattr(pod, "status", None), "container_statuses", []
|
|
1549
|
+
)
|
|
1550
|
+
if (
|
|
1551
|
+
len(statuses) > 0
|
|
1552
|
+
and statuses[0].state is not None
|
|
1553
|
+
and statuses[0].state.terminated is not None
|
|
1554
|
+
):
|
|
1424
1555
|
exitCode = statuses[0].state.terminated.exit_code
|
|
1425
1556
|
|
|
1426
1557
|
raw_runtime = 0.0
|
|
1427
|
-
if
|
|
1428
|
-
|
|
1558
|
+
if (
|
|
1559
|
+
jobObject.status.completion_time is not None
|
|
1560
|
+
and jobObject.status.start_time is not None
|
|
1561
|
+
):
|
|
1562
|
+
raw_runtime = (
|
|
1563
|
+
jobObject.status.completion_time
|
|
1564
|
+
- jobObject.status.start_time
|
|
1565
|
+
).total_seconds()
|
|
1429
1566
|
runtime = slow_down(raw_runtime)
|
|
1430
|
-
result = UpdatedBatchJobInfo(
|
|
1567
|
+
result = UpdatedBatchJobInfo(
|
|
1568
|
+
jobID=jobID,
|
|
1569
|
+
exitStatus=exitCode,
|
|
1570
|
+
wallTime=runtime,
|
|
1571
|
+
exitReason=exitReason,
|
|
1572
|
+
)
|
|
1431
1573
|
|
|
1432
|
-
if (exitReason == BatchJobExitReason.FAILED) or (
|
|
1574
|
+
if (exitReason == BatchJobExitReason.FAILED) or (
|
|
1575
|
+
succeeded_pods + failed_pods == totalPods
|
|
1576
|
+
):
|
|
1433
1577
|
# Cleanup if job is all finished or there was a pod that failed
|
|
1434
1578
|
# TODO: use delete_job() to release acquired resources
|
|
1435
1579
|
self._delete_job(
|
|
1436
|
-
jobObject.metadata.name,
|
|
1437
|
-
propagation_policy='Foreground'
|
|
1580
|
+
jobObject.metadata.name, propagation_policy="Foreground"
|
|
1438
1581
|
)
|
|
1439
1582
|
# Make sure the job is deleted so we won't see it again.
|
|
1440
1583
|
self._waitForJobDeath(jobObject.metadata.name)
|
|
@@ -1442,12 +1585,19 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1442
1585
|
continue
|
|
1443
1586
|
else:
|
|
1444
1587
|
# Job is not running/updating ; no active, successful, or failed pods yet
|
|
1445
|
-
logger.debug(
|
|
1588
|
+
logger.debug(
|
|
1589
|
+
"Job {} -> {}".format(
|
|
1590
|
+
jobObject.metadata.name, getattr(condition, "reason", None)
|
|
1591
|
+
)
|
|
1592
|
+
)
|
|
1446
1593
|
# Pod could be pending; don't say it's lost.
|
|
1447
1594
|
continue
|
|
1448
1595
|
else:
|
|
1449
1596
|
# Try polling instead
|
|
1450
|
-
while
|
|
1597
|
+
while (
|
|
1598
|
+
result is None
|
|
1599
|
+
and (datetime.datetime.now() - entry).total_seconds() < maxWait
|
|
1600
|
+
):
|
|
1451
1601
|
# We still have nothing and we haven't hit the timeout.
|
|
1452
1602
|
|
|
1453
1603
|
# Poll
|
|
@@ -1455,12 +1605,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1455
1605
|
|
|
1456
1606
|
if result is None:
|
|
1457
1607
|
# Still nothing. Wait a second, or some fraction of our max wait time.
|
|
1458
|
-
time.sleep(min(maxWait/2, 1.0))
|
|
1608
|
+
time.sleep(min(maxWait / 2, 1.0))
|
|
1459
1609
|
|
|
1460
1610
|
# When we get here, either we found something or we ran out of time
|
|
1461
1611
|
return result
|
|
1462
1612
|
|
|
1463
|
-
|
|
1464
1613
|
def _getUpdatedBatchJobImmediately(self) -> Optional[UpdatedBatchJobInfo]:
|
|
1465
1614
|
"""
|
|
1466
1615
|
Return None if no updated (completed or failed) batch job is currently
|
|
@@ -1484,25 +1633,25 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1484
1633
|
# Find a job that is done, failed, or stuck
|
|
1485
1634
|
jobObject = None
|
|
1486
1635
|
# Put 'done', 'failed', or 'stuck' here
|
|
1487
|
-
chosenFor =
|
|
1636
|
+
chosenFor = ""
|
|
1488
1637
|
|
|
1489
1638
|
for j in self._ourJobObject(onlySucceeded=True):
|
|
1490
1639
|
# Look for succeeded jobs because that's the only filter Kubernetes has
|
|
1491
1640
|
jobObject = j
|
|
1492
|
-
chosenFor =
|
|
1641
|
+
chosenFor = "done"
|
|
1493
1642
|
|
|
1494
1643
|
if jobObject is None:
|
|
1495
1644
|
for j in self._ourJobObject():
|
|
1496
1645
|
# If there aren't any succeeded jobs, scan all jobs
|
|
1497
1646
|
# See how many times each failed
|
|
1498
|
-
failCount = getattr(j.status,
|
|
1647
|
+
failCount = getattr(j.status, "failed", 0)
|
|
1499
1648
|
if failCount is None:
|
|
1500
1649
|
# Make sure it is an int
|
|
1501
1650
|
failCount = 0
|
|
1502
1651
|
if failCount > 0:
|
|
1503
1652
|
# Take the first failed one you find
|
|
1504
1653
|
jobObject = j
|
|
1505
|
-
chosenFor =
|
|
1654
|
+
chosenFor = "failed"
|
|
1506
1655
|
break
|
|
1507
1656
|
|
|
1508
1657
|
if jobObject is None:
|
|
@@ -1515,23 +1664,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1515
1664
|
continue
|
|
1516
1665
|
|
|
1517
1666
|
# Containers can get stuck in Waiting with reason ImagePullBackOff
|
|
1518
|
-
if self._isPodStuckWaiting(pod, reason=
|
|
1667
|
+
if self._isPodStuckWaiting(pod, reason="ImagePullBackoff"):
|
|
1519
1668
|
# Assume it will never finish, even if the registry comes back or whatever.
|
|
1520
1669
|
# We can get into this state when we send in a non-existent image.
|
|
1521
1670
|
# See https://github.com/kubernetes/kubernetes/issues/58384
|
|
1522
1671
|
jobObject = j
|
|
1523
|
-
chosenFor =
|
|
1524
|
-
logger.warning(
|
|
1525
|
-
|
|
1672
|
+
chosenFor = "stuck"
|
|
1673
|
+
logger.warning(
|
|
1674
|
+
"Failing stuck job (ImagePullBackoff); did you try to run a non-existent Docker image?"
|
|
1675
|
+
" Check TOIL_APPLIANCE_SELF."
|
|
1676
|
+
)
|
|
1526
1677
|
break
|
|
1527
1678
|
|
|
1528
1679
|
# Containers can also get stuck in Waiting with reason
|
|
1529
1680
|
# ContainerCreating, if for example their mounts don't work.
|
|
1530
|
-
if self._isPodStuckWaiting(
|
|
1681
|
+
if self._isPodStuckWaiting(
|
|
1682
|
+
pod, reason="ContainerCreating", timeout=self.pod_timeout
|
|
1683
|
+
):
|
|
1531
1684
|
# Assume that it will never finish.
|
|
1532
1685
|
jobObject = j
|
|
1533
|
-
chosenFor =
|
|
1534
|
-
logger.warning(
|
|
1686
|
+
chosenFor = "stuck"
|
|
1687
|
+
logger.warning(
|
|
1688
|
+
"Failing stuck job (ContainerCreating longer than %s seconds); did you try to mount something impossible?",
|
|
1689
|
+
self.pod_timeout,
|
|
1690
|
+
)
|
|
1535
1691
|
break
|
|
1536
1692
|
|
|
1537
1693
|
# Pods can also get stuck nearly but not quite out of memory,
|
|
@@ -1541,7 +1697,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1541
1697
|
# We found a job that probably should be OOM! Report it as stuck.
|
|
1542
1698
|
# Polling function takes care of the logging.
|
|
1543
1699
|
jobObject = j
|
|
1544
|
-
chosenFor =
|
|
1700
|
+
chosenFor = "stuck"
|
|
1545
1701
|
break
|
|
1546
1702
|
|
|
1547
1703
|
if jobObject is None:
|
|
@@ -1549,25 +1705,30 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1549
1705
|
return None
|
|
1550
1706
|
else:
|
|
1551
1707
|
# We actually have something
|
|
1552
|
-
logger.debug(
|
|
1553
|
-
|
|
1708
|
+
logger.debug(
|
|
1709
|
+
"Identified stopped Kubernetes job %s as %s",
|
|
1710
|
+
getattr(jobObject.metadata, "name", None),
|
|
1711
|
+
chosenFor,
|
|
1712
|
+
)
|
|
1554
1713
|
|
|
1555
1714
|
# Otherwise we got something.
|
|
1556
1715
|
|
|
1557
1716
|
# Work out what the job's ID was (whatever came after our name prefix)
|
|
1558
1717
|
assert jobObject.metadata is not None
|
|
1559
1718
|
assert jobObject.metadata.name is not None
|
|
1560
|
-
jobID = int(jobObject.metadata.name[len(self.job_prefix):])
|
|
1719
|
+
jobID = int(jobObject.metadata.name[len(self.job_prefix) :])
|
|
1561
1720
|
|
|
1562
1721
|
# Grab the pod
|
|
1563
1722
|
pod = self._getPodForJob(jobObject)
|
|
1564
1723
|
|
|
1565
1724
|
if pod is not None:
|
|
1566
|
-
if chosenFor ==
|
|
1725
|
+
if chosenFor == "done" or chosenFor == "failed":
|
|
1567
1726
|
# The job actually finished or failed
|
|
1568
1727
|
|
|
1569
1728
|
# Get the statuses of the pod's containers
|
|
1570
|
-
containerStatuses = getattr(
|
|
1729
|
+
containerStatuses = getattr(
|
|
1730
|
+
getattr(pod, "status", None), "container_statuses", None
|
|
1731
|
+
)
|
|
1571
1732
|
|
|
1572
1733
|
# Get when the pod started (reached the Kubelet) as a datetime
|
|
1573
1734
|
start_time = self._get_start_time(pod, jobObject)
|
|
@@ -1577,18 +1738,24 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1577
1738
|
# This happens when a pod is "Scheduled". But how could a
|
|
1578
1739
|
# 'done' or 'failed' pod be merely "Scheduled"?
|
|
1579
1740
|
# Complain so we can find out.
|
|
1580
|
-
logger.warning(
|
|
1581
|
-
|
|
1741
|
+
logger.warning(
|
|
1742
|
+
"Exit code and runtime unavailable; pod has no container statuses"
|
|
1743
|
+
)
|
|
1744
|
+
logger.warning("Pod: %s", str(pod))
|
|
1582
1745
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1583
1746
|
# Say it stopped now and started when it was scheduled/submitted.
|
|
1584
1747
|
# We still need a strictly positive runtime.
|
|
1585
1748
|
runtime = slow_down((utc_now() - start_time).total_seconds())
|
|
1586
1749
|
else:
|
|
1587
1750
|
# Get the termination info from the pod's main (only) container
|
|
1588
|
-
terminatedInfo = getattr(
|
|
1751
|
+
terminatedInfo = getattr(
|
|
1752
|
+
getattr(containerStatuses[0], "state", None), "terminated", None
|
|
1753
|
+
)
|
|
1589
1754
|
if terminatedInfo is None:
|
|
1590
|
-
logger.warning(
|
|
1591
|
-
|
|
1755
|
+
logger.warning(
|
|
1756
|
+
"Exit code and runtime unavailable; pod stopped without container terminating"
|
|
1757
|
+
)
|
|
1758
|
+
logger.warning("Pod: %s", str(pod))
|
|
1592
1759
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1593
1760
|
# Say it stopped now and started when it was scheduled/submitted.
|
|
1594
1761
|
# We still need a strictly positive runtime.
|
|
@@ -1603,34 +1770,42 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1603
1770
|
# created. And we need to look at the pod's end time
|
|
1604
1771
|
# because the job only gets a completion time if
|
|
1605
1772
|
# successful.
|
|
1606
|
-
runtime = slow_down(
|
|
1607
|
-
|
|
1773
|
+
runtime = slow_down(
|
|
1774
|
+
(terminatedInfo.finished_at - start_time).total_seconds()
|
|
1775
|
+
)
|
|
1608
1776
|
|
|
1609
|
-
if chosenFor ==
|
|
1777
|
+
if chosenFor == "failed":
|
|
1610
1778
|
# Warn the user with the failed pod's log
|
|
1611
1779
|
# TODO: cut this down somehow?
|
|
1612
|
-
logger.warning(
|
|
1780
|
+
logger.warning(
|
|
1781
|
+
"Log from failed pod: %s", self._getLogForPod(pod)
|
|
1782
|
+
)
|
|
1613
1783
|
|
|
1614
1784
|
else:
|
|
1615
1785
|
# The job has gotten stuck
|
|
1616
1786
|
|
|
1617
|
-
assert chosenFor ==
|
|
1787
|
+
assert chosenFor == "stuck"
|
|
1618
1788
|
|
|
1619
1789
|
# Synthesize an exit code
|
|
1620
1790
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1621
1791
|
# Say it ran from when the job was submitted to when the pod got stuck
|
|
1622
|
-
runtime = slow_down(
|
|
1792
|
+
runtime = slow_down(
|
|
1793
|
+
(utc_now() - self._get_start_time(job=jobObject)).total_seconds()
|
|
1794
|
+
)
|
|
1623
1795
|
else:
|
|
1624
1796
|
# The pod went away from under the job.
|
|
1625
|
-
logging.warning(
|
|
1797
|
+
logging.warning("Exit code and runtime unavailable; pod vanished")
|
|
1626
1798
|
exitCode = EXIT_STATUS_UNAVAILABLE_VALUE
|
|
1627
1799
|
# Say it ran from when the job was submitted to when the pod vanished
|
|
1628
|
-
runtime = slow_down(
|
|
1629
|
-
|
|
1800
|
+
runtime = slow_down(
|
|
1801
|
+
(utc_now() - self._get_start_time(job=jobObject)).total_seconds()
|
|
1802
|
+
)
|
|
1630
1803
|
|
|
1631
1804
|
try:
|
|
1632
1805
|
# Delete the job and all dependents (pods), hoping to get a 404 if it's magically gone
|
|
1633
|
-
self._delete_job(
|
|
1806
|
+
self._delete_job(
|
|
1807
|
+
jobObject.metadata.name, propagation_policy="Foreground", gone_ok=True
|
|
1808
|
+
)
|
|
1634
1809
|
|
|
1635
1810
|
# That just kicks off the deletion process. Foreground doesn't
|
|
1636
1811
|
# actually block. See
|
|
@@ -1646,7 +1821,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1646
1821
|
# Otherwise everything is fine and the job is gone.
|
|
1647
1822
|
|
|
1648
1823
|
# Return the one finished job we found
|
|
1649
|
-
return UpdatedBatchJobInfo(
|
|
1824
|
+
return UpdatedBatchJobInfo(
|
|
1825
|
+
jobID=jobID, exitStatus=exitCode, wallTime=runtime, exitReason=None
|
|
1826
|
+
)
|
|
1650
1827
|
|
|
1651
1828
|
def _waitForJobDeath(self, jobName: str) -> None:
|
|
1652
1829
|
"""
|
|
@@ -1660,7 +1837,9 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1660
1837
|
while True:
|
|
1661
1838
|
try:
|
|
1662
1839
|
# Look for the job
|
|
1663
|
-
job_object = self._api(
|
|
1840
|
+
job_object = self._api("batch", errors=[404]).read_namespaced_job(
|
|
1841
|
+
jobName, self.namespace
|
|
1842
|
+
)
|
|
1664
1843
|
if self._is_deleted(job_object):
|
|
1665
1844
|
# The job looks deleted, so we can treat it as not being there.
|
|
1666
1845
|
return
|
|
@@ -1685,59 +1864,80 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1685
1864
|
# Shutdown scheduling thread
|
|
1686
1865
|
self._shutting_down.set()
|
|
1687
1866
|
with self._work_available:
|
|
1688
|
-
self._work_available.notify_all()
|
|
1867
|
+
self._work_available.notify_all() # Wake it up.
|
|
1689
1868
|
|
|
1690
1869
|
self.schedulingThread.join()
|
|
1691
1870
|
|
|
1692
1871
|
# Kill all of our jobs and clean up pods that are associated with those jobs
|
|
1693
1872
|
try:
|
|
1694
|
-
logger.debug(
|
|
1695
|
-
self._api(
|
|
1873
|
+
logger.debug("Deleting all Kubernetes jobs for toil_run=%s", self.run_id)
|
|
1874
|
+
self._api("batch", errors=[404]).delete_collection_namespaced_job(
|
|
1696
1875
|
self.namespace,
|
|
1697
1876
|
label_selector=f"toil_run={self.run_id}",
|
|
1698
|
-
propagation_policy=
|
|
1877
|
+
propagation_policy="Background",
|
|
1878
|
+
)
|
|
1879
|
+
logger.debug(
|
|
1880
|
+
"Killed jobs with delete_collection_namespaced_job; cleaned up"
|
|
1699
1881
|
)
|
|
1700
|
-
logger.debug('Killed jobs with delete_collection_namespaced_job; cleaned up')
|
|
1701
1882
|
# TODO: should we release all resources? We're shutting down so would it matter?
|
|
1702
1883
|
except ApiException as e:
|
|
1703
1884
|
if e.status != 404:
|
|
1704
1885
|
# Anything other than a 404 is weird here.
|
|
1705
|
-
logger.error(
|
|
1886
|
+
logger.error(
|
|
1887
|
+
"Exception when calling BatchV1Api->delete_collection_namespaced_job: %s"
|
|
1888
|
+
% e
|
|
1889
|
+
)
|
|
1706
1890
|
|
|
1707
1891
|
# If batch delete fails, try to delete all remaining jobs individually.
|
|
1708
|
-
logger.debug(
|
|
1892
|
+
logger.debug(
|
|
1893
|
+
"Deleting Kubernetes jobs individually for toil_run=%s", self.run_id
|
|
1894
|
+
)
|
|
1709
1895
|
for job_id in self._getIssuedNonLocalBatchJobIDs():
|
|
1710
|
-
job_name = f
|
|
1711
|
-
self._delete_job(
|
|
1896
|
+
job_name = f"{self.job_prefix}{job_id}"
|
|
1897
|
+
self._delete_job(
|
|
1898
|
+
job_name, propagation_policy="Background", resource_notify=False
|
|
1899
|
+
)
|
|
1712
1900
|
|
|
1713
1901
|
# Aggregate all pods and check if any pod has failed to cleanup or is orphaned.
|
|
1714
1902
|
ourPods = self._ourPodObject()
|
|
1715
1903
|
|
|
1716
1904
|
for pod in ourPods:
|
|
1717
1905
|
try:
|
|
1718
|
-
phase = getattr(pod.status,
|
|
1719
|
-
if phase ==
|
|
1720
|
-
|
|
1721
|
-
|
|
1722
|
-
|
|
1906
|
+
phase = getattr(pod.status, "phase", None)
|
|
1907
|
+
if phase == "Failed":
|
|
1908
|
+
logger.debug(
|
|
1909
|
+
"Failed pod encountered at shutdown:\n%s",
|
|
1910
|
+
self._pretty_print(pod),
|
|
1911
|
+
)
|
|
1912
|
+
if phase == "Orphaned":
|
|
1913
|
+
logger.debug(
|
|
1914
|
+
"Orphaned pod encountered at shutdown:\n%s",
|
|
1915
|
+
self._pretty_print(pod),
|
|
1916
|
+
)
|
|
1723
1917
|
except:
|
|
1724
1918
|
# Don't get mad if that doesn't work.
|
|
1725
1919
|
pass
|
|
1726
1920
|
if pod.metadata is not None and pod.metadata.name is not None:
|
|
1727
1921
|
try:
|
|
1728
|
-
logger.debug(
|
|
1729
|
-
|
|
1922
|
+
logger.debug(
|
|
1923
|
+
"Cleaning up pod at shutdown: %s", pod.metadata.name
|
|
1924
|
+
)
|
|
1925
|
+
response = self._api(
|
|
1926
|
+
"core", errors=[404]
|
|
1927
|
+
).delete_namespaced_pod(
|
|
1730
1928
|
pod.metadata.name,
|
|
1731
1929
|
self.namespace,
|
|
1732
|
-
propagation_policy=
|
|
1930
|
+
propagation_policy="Background",
|
|
1733
1931
|
)
|
|
1734
1932
|
except ApiException as e:
|
|
1735
1933
|
if e.status != 404:
|
|
1736
1934
|
# Anything other than a 404 is weird here.
|
|
1737
|
-
logger.error(
|
|
1738
|
-
|
|
1935
|
+
logger.error(
|
|
1936
|
+
"Exception when calling CoreV1Api->delete_namespaced_pod: %s"
|
|
1937
|
+
% e
|
|
1938
|
+
)
|
|
1739
1939
|
|
|
1740
|
-
def _getIssuedNonLocalBatchJobIDs(self) ->
|
|
1940
|
+
def _getIssuedNonLocalBatchJobIDs(self) -> list[int]:
|
|
1741
1941
|
"""
|
|
1742
1942
|
Get the issued batch job IDs that are not for local jobs.
|
|
1743
1943
|
"""
|
|
@@ -1749,29 +1949,35 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1749
1949
|
jobIDs.append(self._getIDForOurJob(job))
|
|
1750
1950
|
return jobIDs
|
|
1751
1951
|
|
|
1752
|
-
def getIssuedBatchJobIDs(self) ->
|
|
1952
|
+
def getIssuedBatchJobIDs(self) -> list[int]:
|
|
1753
1953
|
# Make sure to send the local jobs and queued jobs also
|
|
1754
1954
|
with self._mutex:
|
|
1755
1955
|
queued_jobs = list(self._queued_job_ids)
|
|
1756
|
-
return
|
|
1956
|
+
return (
|
|
1957
|
+
self._getIssuedNonLocalBatchJobIDs()
|
|
1958
|
+
+ list(self.getIssuedLocalJobIDs())
|
|
1959
|
+
+ queued_jobs
|
|
1960
|
+
)
|
|
1757
1961
|
|
|
1758
|
-
def _get_start_time(
|
|
1962
|
+
def _get_start_time(
|
|
1963
|
+
self, pod: Optional[V1Pod] = None, job: Optional[V1Job] = None
|
|
1964
|
+
) -> datetime.datetime:
|
|
1759
1965
|
"""
|
|
1760
1966
|
Get an actual or estimated start time for a pod.
|
|
1761
1967
|
"""
|
|
1762
1968
|
|
|
1763
1969
|
# Get when the pod started (reached the Kubelet) as a datetime
|
|
1764
|
-
start_time = getattr(getattr(pod,
|
|
1970
|
+
start_time = getattr(getattr(pod, "status", None), "start_time", None)
|
|
1765
1971
|
if start_time is None:
|
|
1766
1972
|
# If the pod never made it to the kubelet to get a
|
|
1767
1973
|
# start_time, say it was when the job was submitted.
|
|
1768
|
-
start_time = getattr(getattr(job,
|
|
1974
|
+
start_time = getattr(getattr(job, "status", None), "start_time", None)
|
|
1769
1975
|
if start_time is None:
|
|
1770
1976
|
# If this is still unset, say it was just now.
|
|
1771
1977
|
start_time = utc_now()
|
|
1772
1978
|
return start_time
|
|
1773
1979
|
|
|
1774
|
-
def getRunningBatchJobIDs(self) ->
|
|
1980
|
+
def getRunningBatchJobIDs(self) -> dict[int, float]:
|
|
1775
1981
|
# We need a dict from jobID (integer) to seconds it has been running
|
|
1776
1982
|
secondsPerJob = dict()
|
|
1777
1983
|
for job in self._ourJobObject():
|
|
@@ -1782,7 +1988,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1782
1988
|
# Jobs whose pods are gone are not running
|
|
1783
1989
|
continue
|
|
1784
1990
|
|
|
1785
|
-
if getattr(pod.status,
|
|
1991
|
+
if getattr(pod.status, "phase", None) == "Running":
|
|
1786
1992
|
# The job's pod is running
|
|
1787
1993
|
|
|
1788
1994
|
# Estimate the runtime
|
|
@@ -1794,7 +2000,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1794
2000
|
secondsPerJob.update(self.getRunningLocalJobIDs())
|
|
1795
2001
|
return secondsPerJob
|
|
1796
2002
|
|
|
1797
|
-
def killBatchJobs(self, jobIDs:
|
|
2003
|
+
def killBatchJobs(self, jobIDs: list[int]) -> None:
|
|
1798
2004
|
|
|
1799
2005
|
# Kill all the ones that are local
|
|
1800
2006
|
self.killLocalJobs(jobIDs)
|
|
@@ -1803,7 +2009,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1803
2009
|
|
|
1804
2010
|
# First get the jobs we even issued non-locally
|
|
1805
2011
|
issued_on_kubernetes = set(self._getIssuedNonLocalBatchJobIDs())
|
|
1806
|
-
deleted_jobs:
|
|
2012
|
+
deleted_jobs: list[str] = []
|
|
1807
2013
|
|
|
1808
2014
|
for job_id in jobIDs:
|
|
1809
2015
|
# For each job we are supposed to kill
|
|
@@ -1829,10 +2035,10 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1829
2035
|
|
|
1830
2036
|
# Delete the requested job in the foreground.
|
|
1831
2037
|
# This doesn't block, but it does delete expeditiously.
|
|
1832
|
-
self._delete_job(job_name, propagation_policy=
|
|
2038
|
+
self._delete_job(job_name, propagation_policy="Foreground")
|
|
1833
2039
|
|
|
1834
2040
|
deleted_jobs.append(job_name)
|
|
1835
|
-
logger.debug(
|
|
2041
|
+
logger.debug("Killed job by request: %s", job_name)
|
|
1836
2042
|
|
|
1837
2043
|
for job_name in deleted_jobs:
|
|
1838
2044
|
# Now we need to wait for all the jobs we killed to be gone.
|
|
@@ -1842,7 +2048,7 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1842
2048
|
# the potential deadlock (if the user code needs exclusive access to
|
|
1843
2049
|
# a resource) onto the user code, instead of always hanging
|
|
1844
2050
|
# whenever we can't certify that a faulty node is no longer running
|
|
1845
|
-
# the user code.
|
|
2051
|
+
# the user code.
|
|
1846
2052
|
self._waitForJobDeath(job_name)
|
|
1847
2053
|
|
|
1848
2054
|
@classmethod
|
|
@@ -1853,9 +2059,11 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1853
2059
|
|
|
1854
2060
|
# Make a Kubernetes-acceptable version of our username: not too long,
|
|
1855
2061
|
# and all lowercase letters, numbers, or - or .
|
|
1856
|
-
acceptable_chars = set(string.ascii_lowercase + string.digits +
|
|
2062
|
+
acceptable_chars = set(string.ascii_lowercase + string.digits + "-.")
|
|
1857
2063
|
|
|
1858
|
-
return
|
|
2064
|
+
return "".join([c for c in get_user_name().lower() if c in acceptable_chars])[
|
|
2065
|
+
:100
|
|
2066
|
+
]
|
|
1859
2067
|
|
|
1860
2068
|
@runtime_checkable
|
|
1861
2069
|
class KubernetesConfig(Protocol):
|
|
@@ -1867,38 +2075,66 @@ class KubernetesBatchSystem(BatchSystemCleanupSupport):
|
|
|
1867
2075
|
have to let the fact that this also has to be a Config just be manually
|
|
1868
2076
|
enforced.
|
|
1869
2077
|
"""
|
|
2078
|
+
|
|
1870
2079
|
kubernetes_host_path: Optional[str]
|
|
1871
2080
|
kubernetes_owner: str
|
|
1872
2081
|
kubernetes_service_account: Optional[str]
|
|
1873
2082
|
kubernetes_pod_timeout: float
|
|
1874
2083
|
|
|
1875
|
-
|
|
1876
2084
|
@classmethod
|
|
1877
2085
|
def add_options(cls, parser: Union[ArgumentParser, _ArgumentGroup]) -> None:
|
|
1878
|
-
parser.add_argument(
|
|
1879
|
-
|
|
1880
|
-
|
|
1881
|
-
|
|
1882
|
-
|
|
1883
|
-
|
|
1884
|
-
|
|
1885
|
-
|
|
1886
|
-
|
|
1887
|
-
|
|
1888
|
-
|
|
1889
|
-
|
|
1890
|
-
|
|
1891
|
-
|
|
1892
|
-
|
|
1893
|
-
|
|
1894
|
-
|
|
1895
|
-
|
|
1896
|
-
|
|
2086
|
+
parser.add_argument(
|
|
2087
|
+
"--kubernetesHostPath",
|
|
2088
|
+
dest="kubernetes_host_path",
|
|
2089
|
+
default=None,
|
|
2090
|
+
env_var="TOIL_KUBERNETES_HOST_PATH",
|
|
2091
|
+
help="Path on Kubernetes hosts to use as shared inter-pod temp directory. "
|
|
2092
|
+
"(default: %(default)s)",
|
|
2093
|
+
)
|
|
2094
|
+
parser.add_argument(
|
|
2095
|
+
"--kubernetesOwner",
|
|
2096
|
+
dest="kubernetes_owner",
|
|
2097
|
+
default=None,
|
|
2098
|
+
env_var="TOIL_KUBERNETES_OWNER",
|
|
2099
|
+
help=f"Username to mark Kubernetes jobs with. If the provided value is None, the value will "
|
|
2100
|
+
f"be generated at runtime. "
|
|
2101
|
+
f"(Generated default: {cls.get_default_kubernetes_owner()})",
|
|
2102
|
+
)
|
|
2103
|
+
parser.add_argument(
|
|
2104
|
+
"--kubernetesServiceAccount",
|
|
2105
|
+
dest="kubernetes_service_account",
|
|
2106
|
+
default=None,
|
|
2107
|
+
env_var="TOIL_KUBERNETES_SERVICE_ACCOUNT",
|
|
2108
|
+
help="Service account to run jobs as. " "(default: %(default)s)",
|
|
2109
|
+
)
|
|
2110
|
+
parser.add_argument(
|
|
2111
|
+
"--kubernetesPodTimeout",
|
|
2112
|
+
dest="kubernetes_pod_timeout",
|
|
2113
|
+
default=120,
|
|
2114
|
+
env_var="TOIL_KUBERNETES_POD_TIMEOUT",
|
|
2115
|
+
type=float,
|
|
2116
|
+
help="Seconds to wait for a scheduled Kubernetes pod to start running. "
|
|
2117
|
+
"(default: %(default)s)",
|
|
2118
|
+
)
|
|
2119
|
+
parser.add_argument(
|
|
2120
|
+
"--kubernetesPrivileged",
|
|
2121
|
+
dest="kubernetes_privileged",
|
|
2122
|
+
default=False,
|
|
2123
|
+
env_var="TOIL_KUBERNETES_PRIVILEGED",
|
|
2124
|
+
type=opt_strtobool,
|
|
2125
|
+
help="Whether to ask worker pods to run in privileged mode. This should be used to access "
|
|
2126
|
+
"privileged operations, such as FUSE. On Toil-managed clusters with --enableFuse, "
|
|
2127
|
+
"this is set to True. (default: %(default)s)",
|
|
2128
|
+
)
|
|
2129
|
+
|
|
2130
|
+
OptionType = TypeVar("OptionType")
|
|
2131
|
+
|
|
1897
2132
|
@classmethod
|
|
1898
2133
|
def setOptions(cls, setOption: OptionSetter) -> None:
|
|
1899
2134
|
setOption("kubernetes_host_path")
|
|
1900
2135
|
setOption("kubernetes_owner")
|
|
1901
|
-
setOption(
|
|
2136
|
+
setOption(
|
|
2137
|
+
"kubernetes_service_account",
|
|
2138
|
+
)
|
|
1902
2139
|
setOption("kubernetes_pod_timeout")
|
|
1903
2140
|
setOption("kubernetes_privileged")
|
|
1904
|
-
|