xpk 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. xpk/api/__init__.py +15 -0
  2. xpk/api/storage_crd.yaml +52 -0
  3. xpk/commands/batch.py +27 -5
  4. xpk/commands/cluster.py +104 -80
  5. xpk/commands/cluster_gcluster.py +94 -10
  6. xpk/commands/common.py +44 -0
  7. xpk/commands/config.py +29 -0
  8. xpk/commands/info.py +8 -10
  9. xpk/commands/inspector.py +5 -11
  10. xpk/commands/job.py +9 -7
  11. xpk/commands/kind.py +34 -4
  12. xpk/commands/kjob_common.py +44 -0
  13. xpk/commands/run.py +128 -0
  14. xpk/commands/shell.py +27 -7
  15. xpk/commands/storage.py +280 -0
  16. xpk/commands/version.py +6 -18
  17. xpk/commands/workload.py +381 -184
  18. xpk/core/blueprint/blueprint_definitions.py +1 -0
  19. xpk/core/blueprint/blueprint_generator.py +132 -76
  20. xpk/core/capacity.py +185 -0
  21. xpk/core/cluster.py +564 -0
  22. xpk/core/cluster_private.py +6 -3
  23. xpk/core/commands.py +18 -14
  24. xpk/core/config.py +179 -0
  25. xpk/core/docker_container.py +225 -0
  26. xpk/core/docker_image.py +210 -0
  27. xpk/core/docker_resources.py +350 -0
  28. xpk/core/filestore.py +251 -0
  29. xpk/core/gcloud_context.py +196 -0
  30. xpk/core/gcluster_manager.py +20 -2
  31. xpk/core/gcsfuse.py +50 -0
  32. xpk/core/kjob.py +257 -18
  33. xpk/core/kueue.py +12 -6
  34. xpk/core/monitoring.py +134 -0
  35. xpk/core/nap.py +32 -20
  36. xpk/core/network.py +377 -0
  37. xpk/core/nodepool.py +581 -0
  38. xpk/core/pathways.py +124 -45
  39. xpk/core/remote_state/__init__.py +15 -0
  40. xpk/core/remote_state/fuse_remote_state.py +99 -0
  41. xpk/core/remote_state/remote_state_client.py +38 -0
  42. xpk/core/resources.py +238 -0
  43. xpk/core/scheduling.py +253 -0
  44. xpk/core/storage.py +581 -0
  45. xpk/core/system_characteristics.py +38 -1
  46. xpk/core/vertex.py +105 -0
  47. xpk/core/workload.py +209 -1
  48. xpk/core/workload_decorators/rdma_decorator.py +25 -5
  49. xpk/core/workload_decorators/storage_decorator.py +52 -0
  50. xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
  51. xpk/main.py +3 -1
  52. xpk/parser/batch.py +10 -151
  53. xpk/parser/cluster.py +49 -8
  54. xpk/parser/common.py +189 -1
  55. xpk/parser/config.py +49 -0
  56. xpk/parser/core.py +27 -1
  57. xpk/parser/info.py +2 -1
  58. xpk/parser/inspector.py +3 -3
  59. xpk/parser/job.py +25 -4
  60. xpk/parser/kind.py +3 -2
  61. xpk/parser/run.py +47 -0
  62. xpk/parser/shell.py +10 -1
  63. xpk/parser/storage.py +326 -0
  64. xpk/parser/validators.py +3 -3
  65. xpk/parser/workload.py +118 -76
  66. xpk/templates/__init__.py +15 -0
  67. xpk/templates/storage.yaml +13 -0
  68. xpk/utils/gcs_utils.py +125 -0
  69. xpk/utils/kubectl.py +57 -0
  70. xpk/utils/objects.py +8 -5
  71. xpk/utils/templates.py +28 -0
  72. xpk/utils/validation.py +80 -0
  73. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/METADATA +169 -15
  74. xpk-0.7.1.dist-info/RECORD +92 -0
  75. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/WHEEL +1 -1
  76. xpk/core/core.py +0 -2824
  77. xpk-0.6.0.dist-info/RECORD +0 -57
  78. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/entry_points.txt +0 -0
  79. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info/licenses}/LICENSE +0 -0
  80. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/top_level.txt +0 -0
xpk/core/storage.py ADDED
@@ -0,0 +1,581 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import os
18
+ from argparse import Namespace
19
+ from dataclasses import dataclass
20
+ from typing import Any
21
+
22
+ import ruamel.yaml
23
+ from google.cloud import storage as gcp_storage
24
+ from kubernetes import client as k8s_client
25
+ from kubernetes import utils
26
+ from kubernetes.client import ApiClient
27
+ from kubernetes.client.models.v1_persistent_volume import V1PersistentVolume
28
+ from kubernetes.client.rest import ApiException
29
+ from kubernetes.utils import FailToCreateError
30
+ from tabulate import tabulate
31
+
32
+ from ..utils.console import xpk_exit, xpk_print
33
+ from ..utils.file import ensure_directory_exists
34
+ from ..utils import templates
35
+ from .cluster import XPK_SA
36
+
37
+ yaml = ruamel.yaml.YAML()
38
+
39
+ STORAGE_CRD_PATH = "/../api/storage_crd.yaml"
40
+ STORAGE_TEMPLATE_PATH = "/../templates/storage.yaml"
41
+ XPK_API_GROUP_NAME = "xpk.x-k8s.io"
42
+ XPK_API_GROUP_VERSION = "v1"
43
+ STORAGE_CRD_KIND = "Storage"
44
+ STORAGE_CRD_PLURAL = "storages"
45
+ STORAGE_CRD_NAME = f"{XPK_API_GROUP_NAME}.{STORAGE_CRD_PLURAL}"
46
+ GCS_FUSE_TYPE = "gcsfuse"
47
+ GCP_FILESTORE_TYPE = "gcpfilestore"
48
+ MANIFESTS_PATH = os.path.abspath("xpkclusters/storage-manifests")
49
+ GCS_FUSE_ANNOTATION = 'gke-gcsfuse/volumes: "true"'
50
+
51
+
52
+ @dataclass
53
+ class Storage:
54
+ """
55
+ Represents a Storage custom resource in Kubernetes.
56
+
57
+ Attributes:
58
+ name: The name of the Storage resource.
59
+ type: The type of storage (e.g., 'GCSFuse').
60
+ cluster: The cluster where the storage is located.
61
+ auto_mount: Whether the storage should be automatically mounted to every workload.
62
+ mount_point: The path on which a given storage should be mounted for a workload.
63
+ readonly: Whether the storage is read-only.
64
+ manifest: The path to a yaml file containing PersistentVolume and PersistentVolumeClaim for a given storage.
65
+ pvc: The name of the PersistentVolumeClaim associated with the storage.
66
+ pv: The name of the PersistentVolume associated with the storage.
67
+ bucket: The name of the GCS Fuse bucket/ GCP Filestore PersistentVolume refers to.
68
+ """
69
+
70
+ name: str
71
+ type: str
72
+ auto_mount: bool
73
+ mount_point: str
74
+ readonly: bool
75
+ manifest: str
76
+ pvc: str
77
+ pv: str
78
+ bucket: str
79
+
80
+ def __init__(self, data: dict):
81
+ """
82
+ Initializes a Storage object from a dictionary.
83
+
84
+ Args:
85
+ data: A dictionary containing the Storage resource definition.
86
+ """
87
+ metadata: k8s_client.V1ObjectMeta = data.get("metadata", {})
88
+ self.name = metadata.get("name")
89
+ spec = data.get("spec", {})
90
+ self.type: str = spec.get("type")
91
+ self.auto_mount: bool = spec.get("auto_mount")
92
+ self.mount_point: bool = spec.get("mount_point")
93
+ self.readonly: bool = spec.get("readonly")
94
+ self.manifest: str = spec.get("manifest")
95
+ self.pvc: str = spec.get("pvc")
96
+ self.pv: str = spec.get("pv")
97
+ self.bucket: str = self._get_bucket()
98
+
99
+ def fields_as_list(self) -> list[str]:
100
+ """
101
+ Returns a list of fields for display purposes.
102
+
103
+ Returns:
104
+ A list of strings representing the Storage object's fields.
105
+ """
106
+ return [
107
+ self.name,
108
+ self.type,
109
+ self.auto_mount,
110
+ self.mount_point,
111
+ self.readonly,
112
+ self.manifest,
113
+ ]
114
+
115
+ def _get_bucket(self) -> str:
116
+ """
117
+ Retrieves the bucket name from PersistentVolume definition associated with the storage.
118
+
119
+ Returns:
120
+ The name of the bucket.
121
+ """
122
+ client = k8s_client.CoreV1Api()
123
+ try:
124
+ pv: V1PersistentVolume = client.read_persistent_volume(self.pv)
125
+ return pv.spec.csi.volume_handle
126
+ except ApiException as e:
127
+ xpk_print(
128
+ f"Exception when calling CoreV1Api->read_persistent_volume: {e}"
129
+ )
130
+ return ""
131
+
132
+ def get_mount_options(self) -> list[str]:
133
+ """
134
+ Retrieves the mount options for the PersistentVolume.
135
+
136
+ Returns:
137
+ A list of mount options.
138
+ """
139
+ client = k8s_client.CoreV1Api()
140
+ try:
141
+ pv: V1PersistentVolume = client.read_persistent_volume(self.pv)
142
+ return pv.spec.mount_options
143
+ except ApiException as e:
144
+ xpk_print(
145
+ f"Exception when calling CoreV1Api->read_persistent_volume: {e}"
146
+ )
147
+ return []
148
+
149
+
150
+ def list_storages(k8s_api_client: ApiClient) -> list[Storage]:
151
+ """
152
+ Lists all Storage custom resources in the cluster.
153
+
154
+ Args:
155
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
156
+
157
+ Returns:
158
+ A list of Storage objects representing the Storage resources.
159
+ """
160
+ api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
161
+ try:
162
+ resp = api_instance.list_cluster_custom_object(
163
+ group=XPK_API_GROUP_NAME,
164
+ version=XPK_API_GROUP_VERSION,
165
+ plural=STORAGE_CRD_PLURAL,
166
+ )
167
+ except ApiException as e:
168
+ xpk_print(f"Kubernetes API exception while listing Storages: {e}")
169
+ if e.status == 404:
170
+ xpk_print("Storages not found, skipping")
171
+ return []
172
+ # If it's a different error, then we should just exit.
173
+ xpk_exit(1)
174
+
175
+ storages = []
176
+ for stg in resp["items"]:
177
+ storage = Storage(stg)
178
+ storages.append(storage)
179
+ return storages
180
+
181
+
182
+ def get_auto_mount_storages(k8s_api_client: ApiClient) -> list[Storage]:
183
+ """
184
+ Retrieves all Storage resources that have --auto-mount flag set to true.
185
+
186
+ Args:
187
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
188
+
189
+ Returns:
190
+ A list of Storage objects that have `auto_mount` set to True.
191
+ """
192
+ auto_mount_storages: list[Storage] = []
193
+ for storage in list_storages(k8s_api_client):
194
+ if storage.auto_mount is True:
195
+ auto_mount_storages.append(storage)
196
+ return auto_mount_storages
197
+
198
+
199
+ def get_auto_mount_gcsfuse_storages(k8s_api_client: ApiClient) -> list[Storage]:
200
+ """
201
+ Retrieves all GCS Fuse Storage resources that have --auto-mount flag set to true.
202
+
203
+ Args:
204
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
205
+
206
+ Returns:
207
+ A list of GCS Fuse Storage objects that have `auto_mount` set to True.
208
+ """
209
+ storages: list[Storage] = get_auto_mount_storages(k8s_api_client)
210
+ return list(filter(lambda storage: storage.type == GCS_FUSE_TYPE, storages))
211
+
212
+
213
+ def get_storages(
214
+ k8s_api_client: ApiClient, requested_storages: list[str]
215
+ ) -> list[Storage]:
216
+ """
217
+ Retrieves a list of Storage resources by their names.
218
+
219
+ Args:
220
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
221
+ names: A list of Storage resource names to retrieve.
222
+
223
+ Returns:
224
+ A list of Storage objects matching the given names.
225
+ """
226
+ all_storages = list_storages(k8s_api_client)
227
+ all_storage_names = {storage.name for storage in all_storages}
228
+
229
+ for storage_name in requested_storages:
230
+ if storage_name not in all_storage_names:
231
+ xpk_print(
232
+ f"Storage: {storage_name} not found. Choose one of the available"
233
+ f" storages: {list(all_storage_names)}"
234
+ )
235
+ xpk_exit(1)
236
+
237
+ storages: list[Storage] = list(
238
+ storage for storage in all_storages if storage.name in requested_storages
239
+ )
240
+ return storages
241
+
242
+
243
+ def get_storages_to_mount(
244
+ k8s_api_client: ApiClient, requested_storages: list[str]
245
+ ) -> list[Storage]:
246
+ """
247
+ Retrieves a list of Storage resources by their names, including auto-mounted storages.
248
+
249
+ Args:
250
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
251
+ names: A list of Storage resource names to retrieve.
252
+
253
+ Returns:
254
+ A list of Storage objects matching the given names and any auto-mounted storages.
255
+ """
256
+ storages = get_storages(k8s_api_client, requested_storages)
257
+ for auto_mounted_stg in get_auto_mount_storages(k8s_api_client):
258
+ # prevent duplicating storages
259
+ if auto_mounted_stg.name not in requested_storages:
260
+ storages.append(auto_mounted_stg)
261
+
262
+ return storages
263
+
264
+
265
+ def get_storage(k8s_api_client: ApiClient, name: str) -> Storage:
266
+ """
267
+ Retrieves a specific Storage custom resource by its name.
268
+
269
+ Args:
270
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
271
+ name: The name of the Storage resource to retrieve.
272
+
273
+ Returns:
274
+ A Storage object representing the retrieved Storage resource.
275
+ """
276
+ api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
277
+ try:
278
+ resp = api_instance.get_cluster_custom_object(
279
+ name=name,
280
+ group=XPK_API_GROUP_NAME,
281
+ version=XPK_API_GROUP_VERSION,
282
+ plural=STORAGE_CRD_PLURAL,
283
+ )
284
+ return Storage(resp)
285
+ except ApiException as e:
286
+ xpk_print(f"Kubernetes API exception while getting Storage {name}: {e}")
287
+ xpk_exit(1)
288
+
289
+
290
+ def install_storage_crd(k8s_api_client: ApiClient) -> None:
291
+ """
292
+ Installs the Storage custom resource definition (CRD) in the Kubernetes cluster.
293
+
294
+ Args:
295
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
296
+ """
297
+ xpk_print(f"Creating a new CRD: {STORAGE_CRD_NAME}")
298
+ try:
299
+ utils.create_from_yaml(
300
+ k8s_api_client,
301
+ f"{os.path.dirname(__file__)}{STORAGE_CRD_PATH}",
302
+ verbose=True,
303
+ )
304
+ xpk_print(f"Created a CRD: {STORAGE_CRD_NAME} successfully")
305
+ except FailToCreateError as e:
306
+ for api_exception in e.api_exceptions:
307
+ if api_exception.status == 409:
308
+ xpk_print(
309
+ f"CRD: {STORAGE_CRD_NAME} already exists. Skipping its creation"
310
+ )
311
+ break
312
+ else:
313
+ xpk_print(f"Encountered error during installing Storage CRD: {e}")
314
+ xpk_exit(1)
315
+
316
+
317
+ def get_storage_volume_mounts_yaml(storages: list[Storage]) -> str:
318
+ """
319
+ Generates the YAML representation of the volumeMounts section for the given Storages.
320
+
321
+ This function creates the YAML snippet that defines how the storage volumes
322
+ should be mounted within a Pod's containers.
323
+
324
+ Args:
325
+ storages: A list of Storage objects.
326
+
327
+ Returns:
328
+ A string containing the YAML representation of the volumeMounts section.
329
+ """
330
+ yaml_str = ""
331
+ for storage in storages:
332
+ yaml_str += f"""- name: {storage.pv}
333
+ mountPath: {storage.mount_point}
334
+ readOnly: {storage.readonly}
335
+ """
336
+ return yaml_str
337
+
338
+
339
+ def get_storage_volumes_yaml(storages: list[Storage]) -> str:
340
+ """
341
+ Generates the YAML representation of the volumes section for the given Storages.
342
+
343
+ This function creates the YAML snippet that defines the volumes to be
344
+ mounted in a Pod, including the PersistentVolumeClaim associated with
345
+ each Storage.
346
+
347
+ Args:
348
+ storages: A list of Storage objects.
349
+
350
+ Returns:
351
+ A string containing the YAML representation of the volumes section.
352
+ """
353
+ yaml_str = ""
354
+ for storage in storages:
355
+ yaml_str += f"""- name: {storage.pv}
356
+ persistentVolumeClaim:
357
+ claimName: {storage.pvc}
358
+ readOnly: {storage.readonly}
359
+ """
360
+ return yaml_str
361
+
362
+
363
+ def get_storage_volume_mounts_yaml_for_gpu(storages: list[Storage]) -> str:
364
+ """
365
+ Generates the YAML representation of the volumeMounts section for the given Storages.
366
+
367
+ This function creates the YAML snippet that defines how the storage volumes
368
+ should be mounted within a Pod's containers.
369
+
370
+ Args:
371
+ storages: A list of Storage objects.
372
+
373
+ Returns:
374
+ A string containing the YAML representation of the volumeMounts section.
375
+ """
376
+ yaml_str = ""
377
+ for storage in storages:
378
+ yaml_str += f"""- name: {storage.pv}
379
+ mountPath: {storage.mount_point}
380
+ readOnly: {storage.readonly}
381
+ """
382
+ return yaml_str
383
+
384
+
385
+ def get_storage_volumes_yaml_for_gpu(storages: list[Storage]) -> str:
386
+ """
387
+ Generates the YAML representation of the volumes section for the given Storages.
388
+
389
+ This function creates the YAML snippet that defines the volumes to be
390
+ mounted in a Pod, including the PersistentVolumeClaim associated with
391
+ each Storage.
392
+
393
+ Args:
394
+ storages: A list of Storage objects.
395
+
396
+ Returns:
397
+ A string containing the YAML representation of the volumes section.
398
+ """
399
+ yaml_str = ""
400
+ for storage in storages:
401
+ yaml_str += f"""- name: {storage.pv}
402
+ persistentVolumeClaim:
403
+ claimName: {storage.pvc}
404
+ readOnly: {storage.readonly}
405
+ """
406
+ return yaml_str
407
+
408
+
409
+ def get_storage_volumes_yaml_dict(storages: list[Storage]) -> list[dict]:
410
+ vols = []
411
+ for storage in storages:
412
+ vols.append({
413
+ "name": storage.pv,
414
+ "persistentVolumeClaim": {
415
+ "claimName": storage.pvc,
416
+ "readOnly": storage.readonly,
417
+ },
418
+ })
419
+ return vols
420
+
421
+
422
+ def add_bucket_iam_members(args: Namespace, storages: list[Storage]) -> None:
423
+ """
424
+ Adds IAM members to the GCS buckets associated with the given Storages.
425
+
426
+ This function grants the necessary permissions to the XPK service account
427
+ to access the GCS buckets. The specific role (viewer or user) is determined
428
+ based on the `readonly` attribute of each Storage object.
429
+
430
+ Args:
431
+ args: An argparse Namespace object containing command-line arguments.
432
+ storages: A list of Storage objects.
433
+ """
434
+ storage_client = gcp_storage.Client()
435
+
436
+ for storage in storages:
437
+ if storage.type == GCS_FUSE_TYPE:
438
+ bucket = storage_client.bucket(storage.bucket)
439
+ policy = bucket.get_iam_policy(requested_policy_version=3)
440
+ if storage.readonly:
441
+ role = "roles/storage.objectViewer"
442
+ else:
443
+ role = "roles/storage.objectUser"
444
+
445
+ member = (
446
+ f"principal://iam.googleapis.com/projects/{args.project_number}/"
447
+ f"locations/global/workloadIdentityPools/{args.project}.svc.id.goog/"
448
+ f"subject/ns/default/sa/{XPK_SA}"
449
+ )
450
+
451
+ policy.bindings.append({"role": role, "members": {member}})
452
+ bucket.set_iam_policy(policy)
453
+ xpk_print(f"Added {member} with role {role} to {storage.bucket}.")
454
+
455
+
456
+ def print_storages_for_cluster(storages: list[Storage]) -> None:
457
+ """
458
+ Prints in human readable manner a table of Storage resources that belong to the specified cluster.
459
+
460
+ Args:
461
+ storages: A list of Storage objects.
462
+ cluster: The name of the cluster to filter by.
463
+ """
464
+ headers = [
465
+ "NAME",
466
+ "TYPE",
467
+ "AUTO MOUNT",
468
+ "MOUNT POINT",
469
+ "READONLY",
470
+ "MANIFEST",
471
+ ]
472
+ storage_tab = []
473
+ for storage in storages:
474
+ storage_tab.append(storage.fields_as_list())
475
+
476
+ print(
477
+ tabulate(
478
+ storage_tab,
479
+ headers=headers,
480
+ )
481
+ )
482
+
483
+
484
+ def save_manifest(args: Namespace, manifest: list[dict]):
485
+ """
486
+ Saves manifest to file in xpkclusters/storage-manifests.
487
+
488
+ Args:
489
+ args: An argparser Namespace object containing arguments for creating the
490
+ Storage resource.
491
+ manifest: A list of some of: PersistentVolume, PersistentVolumeClaim and
492
+ StorageClass definitions
493
+
494
+ Returns:
495
+ manifest_path: Manifest file path
496
+ """
497
+ ensure_directory_exists(MANIFESTS_PATH)
498
+ manifest_path = f"{MANIFESTS_PATH}/{args.project}-{args.zone}-{args.cluster}-{args.name}-manifest.yaml"
499
+ with open(manifest_path, "w", encoding="utf-8") as f:
500
+ yaml.dump_all(manifest, f)
501
+ return manifest_path
502
+
503
+
504
+ def save_storage_crds(k8s_api_client: ApiClient, data: Any):
505
+ """
506
+ Saves a new Storage custom resource in the Kubernetes cluster.
507
+
508
+ Args:
509
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
510
+ data: A dictionary containing data to save.
511
+ """
512
+ api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
513
+
514
+ api_instance.create_cluster_custom_object(
515
+ group=XPK_API_GROUP_NAME,
516
+ version=XPK_API_GROUP_VERSION,
517
+ plural=STORAGE_CRD_PLURAL,
518
+ body=data,
519
+ )
520
+ xpk_print(f"Created {STORAGE_CRD_KIND} object: {data['metadata']['name']}")
521
+
522
+
523
+ def fill_storage_template(
524
+ template: dict, args: Namespace, manifest: list[dict], manifest_path: str
525
+ ):
526
+ """
527
+ Populates storage.yaml template with data.
528
+
529
+ Args:
530
+ template: A storage custom resource definition template
531
+ args: An argparse Namespace object containing the arguments for creating
532
+ the Storage resource.
533
+ manifest: A list of some of: PersistentVolume, PersistentVolumeClaim and
534
+ StorageClass definitions
535
+ """
536
+ template["metadata"]["name"] = args.name
537
+ template["spec"] = {
538
+ "auto_mount": args.auto_mount,
539
+ "cluster": args.cluster,
540
+ "mount_point": args.mount_point,
541
+ "readonly": args.readonly,
542
+ "type": args.type,
543
+ "manifest": manifest_path,
544
+ }
545
+
546
+ for obj in manifest:
547
+ if obj["kind"] == "PersistentVolume":
548
+ template["spec"]["pv"] = obj["metadata"]["name"]
549
+ elif obj["kind"] == "PersistentVolumeClaim":
550
+ template["spec"]["pvc"] = obj["metadata"]["name"]
551
+
552
+
553
+ def create_storage_crds(
554
+ k8s_api_client: ApiClient, args: Namespace, manifest: list[dict]
555
+ ) -> None:
556
+ """
557
+ Creates a new Storage custom resource in the Kubernetes cluster.
558
+
559
+ This function reads a Storage template from a YAML file, populates it with
560
+ values from the provided arguments, and then creates the Storage object
561
+ in the cluster.
562
+
563
+ Args:
564
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
565
+ args: An argparse Namespace object containing the arguments for creating
566
+ the Storage resource.
567
+ manifest: A list of some of: PersistentVolume, PersistentVolumeClaim and
568
+ StorageClass definitions
569
+ """
570
+ try:
571
+ template = templates.load(STORAGE_TEMPLATE_PATH)
572
+
573
+ manifest_path = save_manifest(args, manifest)
574
+ fill_storage_template(template, args, manifest, manifest_path)
575
+ save_storage_crds(k8s_api_client, template)
576
+ except ApiException as e:
577
+ if e.status == 409:
578
+ xpk_print(f"Storage: {args.name} already exists. Skipping its creation")
579
+ else:
580
+ xpk_print(f"Encountered error during storage creation: {e}")
581
+ xpk_exit(1)
@@ -99,7 +99,44 @@ IN MaxText/accelerator_to_spec_map.py !!!!! """
99
99
  # vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv
100
100
  UserFacingNameToSystemCharacteristics = {
101
101
  # GPU system characteristics
102
- # A100-40gb-$CHIPS
102
+ # l4-$CHIPSc
103
+ 'l4-1': SystemCharacteristics(
104
+ 'N/A',
105
+ 1,
106
+ 'nvidia-l4',
107
+ 'g2-standard-12',
108
+ 1,
109
+ AcceleratorType['GPU'],
110
+ 'l4-1',
111
+ ),
112
+ 'l4-2': SystemCharacteristics(
113
+ 'N/A',
114
+ 1,
115
+ 'nvidia-l4',
116
+ 'g2-standard-24',
117
+ 2,
118
+ AcceleratorType['GPU'],
119
+ 'l4-2',
120
+ ),
121
+ 'l4-4': SystemCharacteristics(
122
+ 'N/A',
123
+ 1,
124
+ 'nvidia-l4',
125
+ 'g2-standard-48',
126
+ 4,
127
+ AcceleratorType['GPU'],
128
+ 'l4-4',
129
+ ),
130
+ 'l4-8': SystemCharacteristics(
131
+ 'N/A',
132
+ 1,
133
+ 'nvidia-l4',
134
+ 'g2-standard-96',
135
+ 8,
136
+ AcceleratorType['GPU'],
137
+ 'l4-8',
138
+ ),
139
+ # A100-40gb-$CHIPSc
103
140
  'a100-40gb-1': SystemCharacteristics(
104
141
  'N/A',
105
142
  1,