xpk 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/__init__.py +15 -0
- xpk/api/__init__.py +15 -0
- xpk/api/storage_crd.yaml +52 -0
- xpk/commands/__init__.py +15 -0
- xpk/commands/batch.py +131 -0
- xpk/commands/cluster.py +808 -0
- xpk/commands/cluster_gcluster.py +269 -0
- xpk/commands/common.py +44 -0
- xpk/commands/config.py +29 -0
- xpk/commands/info.py +243 -0
- xpk/commands/inspector.py +357 -0
- xpk/commands/job.py +199 -0
- xpk/commands/kind.py +283 -0
- xpk/commands/kjob_common.py +44 -0
- xpk/commands/run.py +128 -0
- xpk/commands/shell.py +140 -0
- xpk/commands/storage.py +267 -0
- xpk/commands/version.py +27 -0
- xpk/commands/workload.py +889 -0
- xpk/core/__init__.py +15 -0
- xpk/core/blueprint/__init__.py +15 -0
- xpk/core/blueprint/blueprint_definitions.py +62 -0
- xpk/core/blueprint/blueprint_generator.py +708 -0
- xpk/core/capacity.py +185 -0
- xpk/core/cluster.py +564 -0
- xpk/core/cluster_private.py +200 -0
- xpk/core/commands.py +356 -0
- xpk/core/config.py +179 -0
- xpk/core/docker_container.py +225 -0
- xpk/core/docker_image.py +210 -0
- xpk/core/docker_manager.py +308 -0
- xpk/core/docker_resources.py +350 -0
- xpk/core/filestore.py +251 -0
- xpk/core/gcloud_context.py +196 -0
- xpk/core/gcluster_manager.py +176 -0
- xpk/core/gcsfuse.py +50 -0
- xpk/core/kjob.py +444 -0
- xpk/core/kueue.py +358 -0
- xpk/core/monitoring.py +134 -0
- xpk/core/nap.py +361 -0
- xpk/core/network.py +377 -0
- xpk/core/nodepool.py +581 -0
- xpk/core/pathways.py +377 -0
- xpk/core/ray.py +222 -0
- xpk/core/remote_state/__init__.py +15 -0
- xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk/core/remote_state/remote_state_client.py +38 -0
- xpk/core/resources.py +238 -0
- xpk/core/scheduling.py +253 -0
- xpk/core/storage.py +581 -0
- xpk/core/system_characteristics.py +1432 -0
- xpk/core/vertex.py +105 -0
- xpk/core/workload.py +341 -0
- xpk/core/workload_decorators/__init__.py +15 -0
- xpk/core/workload_decorators/rdma_decorator.py +129 -0
- xpk/core/workload_decorators/storage_decorator.py +52 -0
- xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
- xpk/main.py +75 -0
- xpk/parser/__init__.py +15 -0
- xpk/parser/batch.py +43 -0
- xpk/parser/cluster.py +662 -0
- xpk/parser/common.py +259 -0
- xpk/parser/config.py +49 -0
- xpk/parser/core.py +135 -0
- xpk/parser/info.py +64 -0
- xpk/parser/inspector.py +65 -0
- xpk/parser/job.py +147 -0
- xpk/parser/kind.py +95 -0
- xpk/parser/run.py +47 -0
- xpk/parser/shell.py +59 -0
- xpk/parser/storage.py +316 -0
- xpk/parser/validators.py +39 -0
- xpk/parser/version.py +23 -0
- xpk/parser/workload.py +726 -0
- xpk/templates/__init__.py +15 -0
- xpk/templates/storage.yaml +13 -0
- xpk/utils/__init__.py +15 -0
- xpk/utils/console.py +55 -0
- xpk/utils/file.py +82 -0
- xpk/utils/gcs_utils.py +125 -0
- xpk/utils/kubectl.py +57 -0
- xpk/utils/network.py +168 -0
- xpk/utils/objects.py +88 -0
- xpk/utils/templates.py +28 -0
- xpk/utils/validation.py +80 -0
- xpk/utils/yaml.py +30 -0
- xpk-0.0.1.dist-info/LICENSE +202 -0
- xpk-0.0.1.dist-info/METADATA +1498 -0
- xpk-0.0.1.dist-info/RECORD +92 -0
- xpk-0.0.1.dist-info/WHEEL +5 -0
- xpk-0.0.1.dist-info/entry_points.txt +2 -0
- xpk-0.0.1.dist-info/top_level.txt +1 -0
xpk/core/kjob.py
ADDED
|
@@ -0,0 +1,444 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ..core.blueprint.blueprint_generator import get_subnetworks_for_a3mega, get_subnetworks_for_a3ultra
|
|
18
|
+
from ..core.capacity import H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE
|
|
19
|
+
from argparse import Namespace
|
|
20
|
+
import yaml
|
|
21
|
+
from .workload_decorators.tcpxo_decorator import get_tcpxo_deamon_entry
|
|
22
|
+
from ..utils.console import xpk_print, xpk_exit
|
|
23
|
+
|
|
24
|
+
from ..utils import templates
|
|
25
|
+
from kubernetes import client as k8s_client
|
|
26
|
+
from kubernetes.client import ApiClient
|
|
27
|
+
from kubernetes.client.rest import ApiException
|
|
28
|
+
from .cluster import setup_k8s_env, XPK_SA, DEFAULT_NAMESPACE
|
|
29
|
+
from .storage import get_auto_mount_storages, get_auto_mount_gcsfuse_storages
|
|
30
|
+
from .commands import run_command_for_value, run_kubectl_apply, run_command_with_updates
|
|
31
|
+
from .config import XpkConfig, KJOB_SHELL_IMAGE, KJOB_SHELL_INTERACTIVE_COMMAND, KJOB_SHELL_WORKING_DIRECTORY, KJOB_BATCH_IMAGE, KJOB_BATCH_WORKING_DIRECTORY
|
|
32
|
+
from .resources import get_cluster_system_characteristics, SystemCharacteristics, AcceleratorType
|
|
33
|
+
from enum import Enum
|
|
34
|
+
|
|
35
|
+
from ..core.workload_decorators import tcpxo_decorator
|
|
36
|
+
|
|
37
|
+
from ..core.workload_decorators import rdma_decorator
|
|
38
|
+
|
|
39
|
+
KJOB_API_GROUP_NAME = "kjobctl.x-k8s.io"
|
|
40
|
+
KJOB_API_GROUP_VERSION = "v1alpha1"
|
|
41
|
+
KJOB_API_VOLUME_BUNDLE_PLURAL = "volumebundles"
|
|
42
|
+
VOLUME_BUNDLE_TEMPLATE_PATH = "/../templates/volume_bundle.yaml"
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class AppProfileDefaults(Enum):
|
|
46
|
+
NAME = "xpk-def-app-profile"
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class JobTemplateDefaults(Enum):
|
|
50
|
+
NAME = "xpk-def-batch"
|
|
51
|
+
PARALLELISM = 1
|
|
52
|
+
COMPLETIONS = 1
|
|
53
|
+
CONTAINER_NAME = "xpk-batch-container"
|
|
54
|
+
IMAGE = "ubuntu:22.04"
|
|
55
|
+
WORKING_DIRECTORY = "/"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class PodTemplateDefaults(Enum):
|
|
59
|
+
NAME = "xpk-def-pod"
|
|
60
|
+
CONTAINER_NAME = "xpk-interactive-container"
|
|
61
|
+
IMAGE = "busybox:1.28"
|
|
62
|
+
WORKING_DIRECTORY = "/"
|
|
63
|
+
INTERACTIVE_COMMAND = "/bin/sh"
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
job_template_yaml = """
|
|
67
|
+
apiVersion: kjobctl.x-k8s.io/v1alpha1
|
|
68
|
+
kind: JobTemplate
|
|
69
|
+
metadata:
|
|
70
|
+
name: {name}
|
|
71
|
+
namespace: default
|
|
72
|
+
template:
|
|
73
|
+
spec:
|
|
74
|
+
parallelism: {parallelism}
|
|
75
|
+
completions: {completions}
|
|
76
|
+
completionMode: Indexed
|
|
77
|
+
template:
|
|
78
|
+
spec:
|
|
79
|
+
dnsPolicy: ClusterFirstWithHostNet
|
|
80
|
+
tolerations:
|
|
81
|
+
- operator: "Exists"
|
|
82
|
+
key: nvidia.com/gpu
|
|
83
|
+
containers:
|
|
84
|
+
- name: {container_name}
|
|
85
|
+
image: {image}
|
|
86
|
+
workingDir: {working_directory}
|
|
87
|
+
{resources}
|
|
88
|
+
{node_selector}
|
|
89
|
+
priorityClassName: {priority}
|
|
90
|
+
restartPolicy: OnFailure
|
|
91
|
+
serviceAccountName: {service_account}
|
|
92
|
+
"""
|
|
93
|
+
job_node_selector_template = """
|
|
94
|
+
nodeSelector:
|
|
95
|
+
cloud.google.com/gke-accelerator: {gpu_name}
|
|
96
|
+
"""
|
|
97
|
+
job_resources_template = """
|
|
98
|
+
resources:
|
|
99
|
+
limits:
|
|
100
|
+
nvidia.com/gpu: {gpu_per_node}
|
|
101
|
+
"""
|
|
102
|
+
|
|
103
|
+
app_profile_yaml = """
|
|
104
|
+
apiVersion: kjobctl.x-k8s.io/v1alpha1
|
|
105
|
+
kind: ApplicationProfile
|
|
106
|
+
metadata:
|
|
107
|
+
name: {name}
|
|
108
|
+
namespace: default
|
|
109
|
+
spec:
|
|
110
|
+
supportedModes:
|
|
111
|
+
- name: Slurm
|
|
112
|
+
template: {batch_template}
|
|
113
|
+
requiredFlags: []
|
|
114
|
+
- name: Interactive
|
|
115
|
+
template: {interactive_template}
|
|
116
|
+
volumeBundles: {volume_bundles}
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
pod_template_yaml = """
|
|
120
|
+
apiVersion: v1
|
|
121
|
+
kind: PodTemplate
|
|
122
|
+
metadata:
|
|
123
|
+
name: {name}
|
|
124
|
+
namespace: default
|
|
125
|
+
template:
|
|
126
|
+
spec:
|
|
127
|
+
tolerations:
|
|
128
|
+
- effect: NoSchedule
|
|
129
|
+
key: components.gke.io/gke-managed-components
|
|
130
|
+
operator: Equal
|
|
131
|
+
value: "true"
|
|
132
|
+
containers:
|
|
133
|
+
- name: {container_name}
|
|
134
|
+
image: {image}
|
|
135
|
+
command: [{interactive_command}]
|
|
136
|
+
workingDir: {working_directory}
|
|
137
|
+
initContainers:
|
|
138
|
+
- name: init
|
|
139
|
+
image: {image}
|
|
140
|
+
command: ['/bin/mkdir', '-p', '{working_directory}']
|
|
141
|
+
serviceAccountName: {service_account}
|
|
142
|
+
"""
|
|
143
|
+
|
|
144
|
+
Kueue_TAS_annotation = "kueue.x-k8s.io/podset-preferred-topology=cloud.google.com/gce-topology-host"
|
|
145
|
+
|
|
146
|
+
default_interface_annotation = "networking.gke.io/default-interface=eth0"
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def get_a3ultra_pod_template_annotations(args: Namespace) -> tuple[str, str]:
|
|
150
|
+
sub_networks = get_subnetworks_for_a3ultra(args.cluster)
|
|
151
|
+
interfaces_key, interfaces_value = rdma_decorator.get_interfaces_entry(
|
|
152
|
+
sub_networks
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
return (
|
|
156
|
+
default_interface_annotation,
|
|
157
|
+
f"{interfaces_key}=$'{interfaces_value}'",
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def get_a3mega_pod_template_annotations(
|
|
162
|
+
args: Namespace,
|
|
163
|
+
) -> tuple[str, str, str]:
|
|
164
|
+
"""Adds or updates annotations in the Pod template."""
|
|
165
|
+
sub_networks = get_subnetworks_for_a3mega(args.cluster)
|
|
166
|
+
tcpxo_deamon_key, tcpxo_deamon_paths = get_tcpxo_deamon_entry()
|
|
167
|
+
interfaces_key, interfaces_value = tcpxo_decorator.get_interfaces_entry(
|
|
168
|
+
sub_networks
|
|
169
|
+
)
|
|
170
|
+
tcpxo = f"{tcpxo_deamon_key}=$'{tcpxo_deamon_paths}'"
|
|
171
|
+
interfaces = f"{interfaces_key}=$'{interfaces_value}'"
|
|
172
|
+
return tcpxo, interfaces, default_interface_annotation
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def verify_kjob_installed(args: Namespace) -> int:
|
|
176
|
+
"""Check if kjob is installed. If not provide user with proper communicate and exit.
|
|
177
|
+
Args:
|
|
178
|
+
args - user provided arguments.
|
|
179
|
+
Returns:
|
|
180
|
+
error code > if kjob not installed, otherwise 0
|
|
181
|
+
"""
|
|
182
|
+
command = "kubectl-kjob help"
|
|
183
|
+
task = "Verify kjob installation "
|
|
184
|
+
verify_kjob_installed_code, _ = run_command_for_value(command, task, args)
|
|
185
|
+
|
|
186
|
+
if verify_kjob_installed_code == 0:
|
|
187
|
+
xpk_print("kjob found")
|
|
188
|
+
return 0
|
|
189
|
+
|
|
190
|
+
if verify_kjob_installed_code != 0:
|
|
191
|
+
xpk_print(
|
|
192
|
+
" kjob not found. Please follow"
|
|
193
|
+
" https://github.com/kubernetes-sigs/kjob/blob/main/docs/installation.md"
|
|
194
|
+
" to install kjob."
|
|
195
|
+
)
|
|
196
|
+
return verify_kjob_installed_code
|
|
197
|
+
return 0
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def get_pod_template_interactive_command() -> str:
|
|
201
|
+
"""Gets the interactive command for PodTemplate from config otherwise the default value.
|
|
202
|
+
|
|
203
|
+
Args:
|
|
204
|
+
args - user provided arguments
|
|
205
|
+
Returns:
|
|
206
|
+
str - PodTemplate's interactive command
|
|
207
|
+
"""
|
|
208
|
+
config = XpkConfig()
|
|
209
|
+
pod_command = config.get(KJOB_SHELL_INTERACTIVE_COMMAND)
|
|
210
|
+
if pod_command is None or len(pod_command) == 0:
|
|
211
|
+
pod_command = PodTemplateDefaults.INTERACTIVE_COMMAND.value
|
|
212
|
+
|
|
213
|
+
return pod_command
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def create_app_profile_instance(
|
|
217
|
+
args: Namespace, volume_bundles: list[str]
|
|
218
|
+
) -> int:
|
|
219
|
+
"""Create new AppProfile instance on cluster with default settings.
|
|
220
|
+
|
|
221
|
+
Args:
|
|
222
|
+
args - user provided arguments
|
|
223
|
+
Returns:
|
|
224
|
+
exit_code > 0 if creating AppProfile fails, 0 otherwise
|
|
225
|
+
"""
|
|
226
|
+
return run_kubectl_apply(
|
|
227
|
+
yml_string=app_profile_yaml.format(
|
|
228
|
+
name=AppProfileDefaults.NAME.value,
|
|
229
|
+
batch_template=JobTemplateDefaults.NAME.value,
|
|
230
|
+
interactive_template=PodTemplateDefaults.NAME.value,
|
|
231
|
+
volume_bundles=volume_bundles,
|
|
232
|
+
),
|
|
233
|
+
task="Creating AppProfile",
|
|
234
|
+
args=args,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def decorate_job_template_with_gpu(yml_string: str, gpu_type: str) -> str:
|
|
239
|
+
job_spec = yaml.safe_load(yml_string)["template"]
|
|
240
|
+
if gpu_type == H100_MEGA_DEVICE_TYPE:
|
|
241
|
+
job_spec = tcpxo_decorator.decorate_kjob_template(job_spec)
|
|
242
|
+
if gpu_type == H200_DEVICE_TYPE:
|
|
243
|
+
job_spec = rdma_decorator.decorate_kjob_template(job_spec)
|
|
244
|
+
job_template_dict = yaml.safe_load(yml_string)
|
|
245
|
+
job_template_dict["template"] = job_spec
|
|
246
|
+
return yaml.dump(job_template_dict, sort_keys=False)
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def create_job_template_instance(
|
|
250
|
+
args: Namespace,
|
|
251
|
+
system: SystemCharacteristics | None,
|
|
252
|
+
service_account: str,
|
|
253
|
+
) -> int:
|
|
254
|
+
"""Create new JobTemplate instance on cluster with default settings.
|
|
255
|
+
|
|
256
|
+
Args:
|
|
257
|
+
args - user provided arguments
|
|
258
|
+
Returns:
|
|
259
|
+
exit_code > 0 if creating JobTemplate fails, 0 otherwise
|
|
260
|
+
"""
|
|
261
|
+
config = XpkConfig()
|
|
262
|
+
job_image = config.get(KJOB_BATCH_IMAGE)
|
|
263
|
+
if job_image is None or len(job_image) == 0:
|
|
264
|
+
job_image = JobTemplateDefaults.IMAGE.value
|
|
265
|
+
working_directory = config.get(KJOB_BATCH_WORKING_DIRECTORY)
|
|
266
|
+
if working_directory is None or len(working_directory) == 0:
|
|
267
|
+
working_directory = JobTemplateDefaults.WORKING_DIRECTORY.value
|
|
268
|
+
resources = (
|
|
269
|
+
job_resources_template.format(gpu_per_node=system.chips_per_vm)
|
|
270
|
+
if system is not None
|
|
271
|
+
and system.accelerator_type == AcceleratorType["GPU"]
|
|
272
|
+
else ""
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
node_selector = (
|
|
276
|
+
job_node_selector_template.format(gpu_name=system.gke_accelerator)
|
|
277
|
+
if system is not None
|
|
278
|
+
and system.accelerator_type == AcceleratorType["GPU"]
|
|
279
|
+
else ""
|
|
280
|
+
)
|
|
281
|
+
yml_string = job_template_yaml.format(
|
|
282
|
+
name=JobTemplateDefaults.NAME.value,
|
|
283
|
+
parallelism=JobTemplateDefaults.PARALLELISM.value,
|
|
284
|
+
completions=JobTemplateDefaults.COMPLETIONS.value,
|
|
285
|
+
container_name=JobTemplateDefaults.CONTAINER_NAME.value,
|
|
286
|
+
image=job_image,
|
|
287
|
+
working_directory=working_directory,
|
|
288
|
+
resources=resources,
|
|
289
|
+
node_selector=node_selector,
|
|
290
|
+
priority=args.priority if hasattr(args, "priority") else "medium",
|
|
291
|
+
service_account=service_account,
|
|
292
|
+
)
|
|
293
|
+
if system is not None and system.accelerator_type == AcceleratorType["GPU"]:
|
|
294
|
+
yml_string = decorate_job_template_with_gpu(yml_string, system.device_type)
|
|
295
|
+
|
|
296
|
+
return run_kubectl_apply(
|
|
297
|
+
yml_string,
|
|
298
|
+
task="Creating JobTemplate",
|
|
299
|
+
args=args,
|
|
300
|
+
)
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def create_pod_template_instance(args: Namespace, service_account: str) -> int:
|
|
304
|
+
"""Create new PodTemplate instance on cluster with default settings.
|
|
305
|
+
|
|
306
|
+
Args:
|
|
307
|
+
args - user provided arguments
|
|
308
|
+
Returns:
|
|
309
|
+
exit_code > 0 if creating PodTemplate fails, 0 otherwise
|
|
310
|
+
"""
|
|
311
|
+
config = XpkConfig()
|
|
312
|
+
pod_image = config.get(KJOB_SHELL_IMAGE)
|
|
313
|
+
if pod_image is None or len(pod_image) == 0:
|
|
314
|
+
pod_image = PodTemplateDefaults.IMAGE.value
|
|
315
|
+
working_directory = config.get(KJOB_SHELL_WORKING_DIRECTORY)
|
|
316
|
+
if working_directory is None or len(working_directory) == 0:
|
|
317
|
+
working_directory = PodTemplateDefaults.WORKING_DIRECTORY.value
|
|
318
|
+
|
|
319
|
+
return run_kubectl_apply(
|
|
320
|
+
yml_string=pod_template_yaml.format(
|
|
321
|
+
name=PodTemplateDefaults.NAME.value,
|
|
322
|
+
container_name=PodTemplateDefaults.CONTAINER_NAME.value,
|
|
323
|
+
image=pod_image,
|
|
324
|
+
working_directory=working_directory,
|
|
325
|
+
interactive_command=get_pod_template_interactive_command(),
|
|
326
|
+
service_account=service_account,
|
|
327
|
+
),
|
|
328
|
+
task="Creating PodTemplate",
|
|
329
|
+
args=args,
|
|
330
|
+
)
|
|
331
|
+
|
|
332
|
+
|
|
333
|
+
def prepare_kjob(args: Namespace) -> int:
|
|
334
|
+
system = get_cluster_system_characteristics(args)
|
|
335
|
+
|
|
336
|
+
k8s_api_client = setup_k8s_env(args)
|
|
337
|
+
storages = get_auto_mount_storages(k8s_api_client)
|
|
338
|
+
|
|
339
|
+
service_account = ""
|
|
340
|
+
if len(storages) > 0:
|
|
341
|
+
service_account = XPK_SA
|
|
342
|
+
|
|
343
|
+
job_err_code = create_job_template_instance(args, system, service_account)
|
|
344
|
+
if job_err_code > 0:
|
|
345
|
+
return job_err_code
|
|
346
|
+
|
|
347
|
+
pod_err_code = create_pod_template_instance(args, service_account)
|
|
348
|
+
if pod_err_code > 0:
|
|
349
|
+
return pod_err_code
|
|
350
|
+
|
|
351
|
+
volume_bundles = [item.name for item in storages]
|
|
352
|
+
|
|
353
|
+
return create_app_profile_instance(args, volume_bundles)
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def apply_kjob_crds(args: Namespace) -> int:
|
|
357
|
+
"""Apply kjob CRDs on cluster.
|
|
358
|
+
|
|
359
|
+
This function install kjob CRDs files from kjobctl printcrds.
|
|
360
|
+
It creates all neccessary kjob CRDs.
|
|
361
|
+
|
|
362
|
+
Args:
|
|
363
|
+
args - user provided arguments
|
|
364
|
+
Returns:
|
|
365
|
+
None
|
|
366
|
+
"""
|
|
367
|
+
command = "kubectl kjob printcrds | kubectl apply --server-side -f -"
|
|
368
|
+
task = "Create kjob CRDs on cluster"
|
|
369
|
+
return_code = run_command_with_updates(command, task, args)
|
|
370
|
+
if return_code != 0:
|
|
371
|
+
xpk_print(f"{task} returned ERROR {return_code}")
|
|
372
|
+
return return_code
|
|
373
|
+
xpk_print("Creating kjob CRDs succeeded")
|
|
374
|
+
return 0
|
|
375
|
+
|
|
376
|
+
|
|
377
|
+
def create_volume_bundle_instance(
|
|
378
|
+
k8s_api_client: ApiClient,
|
|
379
|
+
name: str,
|
|
380
|
+
manifest: list[dict],
|
|
381
|
+
readonly: bool,
|
|
382
|
+
mount_point: str,
|
|
383
|
+
) -> None:
|
|
384
|
+
"""
|
|
385
|
+
Creates a new VolumeBundle resource in the Kubernetes cluster.
|
|
386
|
+
|
|
387
|
+
This function reads a VolumeBundle template from a YAML file, populates it with
|
|
388
|
+
values from the provided arguments, and then creates the VolumeBundle object
|
|
389
|
+
in the cluster.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
|
|
393
|
+
args: An argparse Namespace object containing the arguments for creating
|
|
394
|
+
the Storage resource.
|
|
395
|
+
"""
|
|
396
|
+
data = templates.load(VOLUME_BUNDLE_TEMPLATE_PATH)
|
|
397
|
+
data["metadata"]["name"] = name
|
|
398
|
+
spec = data["spec"]
|
|
399
|
+
spec["volumes"] = []
|
|
400
|
+
spec["containerVolumeMounts"] = []
|
|
401
|
+
|
|
402
|
+
for obj in manifest:
|
|
403
|
+
if obj["kind"] == "PersistentVolumeClaim":
|
|
404
|
+
spec["volumes"].append({
|
|
405
|
+
"name": obj["metadata"]["name"],
|
|
406
|
+
"persistentVolumeClaim": {
|
|
407
|
+
"claimName": obj["metadata"]["name"],
|
|
408
|
+
"readOnly": readonly,
|
|
409
|
+
},
|
|
410
|
+
})
|
|
411
|
+
spec["containerVolumeMounts"].append({
|
|
412
|
+
"name": obj["metadata"]["name"],
|
|
413
|
+
"mountPath": mount_point,
|
|
414
|
+
})
|
|
415
|
+
|
|
416
|
+
data["spec"] = spec
|
|
417
|
+
|
|
418
|
+
api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
|
|
419
|
+
try:
|
|
420
|
+
api_instance.create_namespaced_custom_object(
|
|
421
|
+
namespace=DEFAULT_NAMESPACE,
|
|
422
|
+
group=KJOB_API_GROUP_NAME,
|
|
423
|
+
version=KJOB_API_GROUP_VERSION,
|
|
424
|
+
plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
|
|
425
|
+
body=data,
|
|
426
|
+
)
|
|
427
|
+
xpk_print(
|
|
428
|
+
f"Created {KJOB_API_VOLUME_BUNDLE_PLURAL}.{KJOB_API_GROUP_NAME} object:"
|
|
429
|
+
f" {data['metadata']['name']}"
|
|
430
|
+
)
|
|
431
|
+
except ApiException as e:
|
|
432
|
+
if e.status == 409:
|
|
433
|
+
xpk_print(f"VolumeBundle: {name} already exists. Skipping its creation")
|
|
434
|
+
else:
|
|
435
|
+
xpk_print(f"Encountered error during VolumeBundle creation: {e}")
|
|
436
|
+
xpk_exit(1)
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def get_gcsfuse_annotation(args: Namespace) -> str | None:
|
|
440
|
+
k8s_api_client = setup_k8s_env(args)
|
|
441
|
+
gcsfuse_storages = get_auto_mount_gcsfuse_storages(k8s_api_client)
|
|
442
|
+
if len(gcsfuse_storages) > 0:
|
|
443
|
+
return "gke-gcsfuse/volumes=true"
|
|
444
|
+
return None
|