xpk 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. xpk/api/__init__.py +15 -0
  2. xpk/api/storage_crd.yaml +52 -0
  3. xpk/commands/batch.py +27 -5
  4. xpk/commands/cluster.py +104 -80
  5. xpk/commands/cluster_gcluster.py +94 -10
  6. xpk/commands/common.py +44 -0
  7. xpk/commands/config.py +29 -0
  8. xpk/commands/info.py +8 -10
  9. xpk/commands/inspector.py +5 -11
  10. xpk/commands/job.py +9 -7
  11. xpk/commands/kind.py +34 -4
  12. xpk/commands/kjob_common.py +44 -0
  13. xpk/commands/run.py +128 -0
  14. xpk/commands/shell.py +27 -7
  15. xpk/commands/storage.py +267 -0
  16. xpk/commands/version.py +6 -18
  17. xpk/commands/workload.py +381 -184
  18. xpk/core/blueprint/blueprint_definitions.py +1 -0
  19. xpk/core/blueprint/blueprint_generator.py +132 -76
  20. xpk/core/capacity.py +185 -0
  21. xpk/core/cluster.py +564 -0
  22. xpk/core/cluster_private.py +6 -3
  23. xpk/core/commands.py +18 -14
  24. xpk/core/config.py +179 -0
  25. xpk/core/docker_container.py +225 -0
  26. xpk/core/docker_image.py +210 -0
  27. xpk/core/docker_resources.py +350 -0
  28. xpk/core/filestore.py +251 -0
  29. xpk/core/gcloud_context.py +196 -0
  30. xpk/core/gcluster_manager.py +20 -2
  31. xpk/core/gcsfuse.py +50 -0
  32. xpk/core/kjob.py +257 -18
  33. xpk/core/kueue.py +12 -6
  34. xpk/core/monitoring.py +134 -0
  35. xpk/core/nap.py +32 -20
  36. xpk/core/network.py +377 -0
  37. xpk/core/nodepool.py +581 -0
  38. xpk/core/pathways.py +124 -45
  39. xpk/core/remote_state/__init__.py +15 -0
  40. xpk/core/remote_state/fuse_remote_state.py +99 -0
  41. xpk/core/remote_state/remote_state_client.py +38 -0
  42. xpk/core/resources.py +238 -0
  43. xpk/core/scheduling.py +253 -0
  44. xpk/core/storage.py +581 -0
  45. xpk/core/system_characteristics.py +38 -1
  46. xpk/core/vertex.py +105 -0
  47. xpk/core/workload.py +209 -1
  48. xpk/core/workload_decorators/rdma_decorator.py +25 -5
  49. xpk/core/workload_decorators/storage_decorator.py +52 -0
  50. xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
  51. xpk/main.py +3 -1
  52. xpk/parser/batch.py +10 -151
  53. xpk/parser/cluster.py +49 -8
  54. xpk/parser/common.py +189 -1
  55. xpk/parser/config.py +49 -0
  56. xpk/parser/core.py +27 -1
  57. xpk/parser/info.py +2 -1
  58. xpk/parser/inspector.py +3 -3
  59. xpk/parser/job.py +25 -4
  60. xpk/parser/kind.py +3 -2
  61. xpk/parser/run.py +47 -0
  62. xpk/parser/shell.py +10 -1
  63. xpk/parser/storage.py +316 -0
  64. xpk/parser/validators.py +3 -3
  65. xpk/parser/workload.py +118 -76
  66. xpk/templates/__init__.py +15 -0
  67. xpk/templates/storage.yaml +13 -0
  68. xpk/utils/gcs_utils.py +125 -0
  69. xpk/utils/kubectl.py +57 -0
  70. xpk/utils/objects.py +8 -5
  71. xpk/utils/templates.py +28 -0
  72. xpk/utils/validation.py +80 -0
  73. {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/METADATA +165 -14
  74. xpk-0.7.0.dist-info/RECORD +92 -0
  75. {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/WHEEL +1 -1
  76. xpk/core/core.py +0 -2824
  77. xpk-0.6.0.dist-info/RECORD +0 -57
  78. {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/LICENSE +0 -0
  79. {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/entry_points.txt +0 -0
  80. {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/top_level.txt +0 -0
xpk/parser/workload.py CHANGED
@@ -20,9 +20,9 @@ from ..commands.workload import (
20
20
  workload_delete,
21
21
  workload_list,
22
22
  )
23
- from ..core.core import default_docker_image, default_script_dir
24
- from .validators import directory_path_type, workload_name_type
23
+ from ..core.docker_image import DEFAULT_DOCKER_IMAGE, DEFAULT_SCRIPT_DIR
25
24
  from .common import add_shared_arguments
25
+ from .validators import directory_path_type, name_type
26
26
 
27
27
 
28
28
  def set_workload_parsers(workload_parser):
@@ -67,11 +67,7 @@ def set_workload_parsers(workload_parser):
67
67
  'Arguments for configuring autoprovisioning.',
68
68
  )
69
69
  )
70
- workload_pathways_workload_arguments = workload_create_parser.add_argument_group(
71
- 'Pathways Image Arguments',
72
- 'If --use-pathways is provided, user wants to set up a'
73
- 'Pathways workload on xpk.',
74
- )
70
+
75
71
  workload_vertex_tensorboard_arguments = (
76
72
  workload_create_parser.add_argument_group(
77
73
  'Vertex Tensorboard Arguments',
@@ -114,6 +110,12 @@ def set_workload_parsers(workload_parser):
114
110
  ),
115
111
  )
116
112
 
113
+ workload_create_parser_optional_arguments.add_argument(
114
+ '--storage',
115
+ action='append',
116
+ default=[],
117
+ help='Names of storages the workload uses',
118
+ )
117
119
  workload_create_parser_optional_arguments.add_argument(
118
120
  '--num-nodes',
119
121
  type=int,
@@ -151,6 +153,15 @@ def set_workload_parsers(workload_parser):
151
153
  ),
152
154
  )
153
155
 
156
+ workload_create_parser_optional_arguments.add_argument(
157
+ '--use-pathways',
158
+ action='store_true',
159
+ help=(
160
+ 'Please use `xpk workload create-pathways` instead to'
161
+ ' create Pathways workloads.'
162
+ ),
163
+ )
164
+
154
165
  # Autoprovisioning workload arguments
155
166
  workload_create_autoprovisioning_arguments.add_argument(
156
167
  '--on-demand',
@@ -178,16 +189,6 @@ def set_workload_parsers(workload_parser):
178
189
  ),
179
190
  )
180
191
 
181
- # Pathways workload arguments
182
- workload_pathways_workload_arguments.add_argument(
183
- '--use-pathways',
184
- action='store_true',
185
- help=(
186
- 'DECRATING SOON!!! Please use `xpk workload create-pathways` instead.'
187
- ' Provide this argument to create Pathways workloads.'
188
- ),
189
- )
190
-
191
192
  # "workload create-pathways" command parser.
192
193
  workload_create_pathways_parser = workload_subcommands.add_parser(
193
194
  'create-pathways', help='Create a new job.'
@@ -230,6 +231,45 @@ def set_workload_parsers(workload_parser):
230
231
  help='The tpu type to use, v5litepod-16, etc.',
231
232
  )
232
233
 
234
+ ### "workload create-pathways" Optional arguments, specific to Pathways
235
+ workload_create_pathways_parser_optional_arguments.add_argument(
236
+ '--headless',
237
+ action='store_true',
238
+ help=(
239
+ 'Please provide this argument to create Pathways workloads in'
240
+ ' headless mode. This arg can only be used in `xpk workload'
241
+ ' create-pathways`.'
242
+ ),
243
+ )
244
+ workload_create_pathways_parser_optional_arguments.add_argument(
245
+ '--proxy-server-image',
246
+ type=str,
247
+ default=(
248
+ 'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest'
249
+ ),
250
+ help=(
251
+ 'Please provide the proxy server image for Pathways. This arg can'
252
+ ' only be used in `xpk workload create-pathways`.'
253
+ ),
254
+ )
255
+ workload_create_pathways_parser_optional_arguments.add_argument(
256
+ '--server-image',
257
+ type=str,
258
+ default='us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest',
259
+ help=(
260
+ 'Please provide the server image for Pathways. This arg can only be'
261
+ ' used in `xpk workload create-pathways`.'
262
+ ),
263
+ )
264
+ workload_create_pathways_parser_optional_arguments.add_argument(
265
+ '--pathways-gcs-location',
266
+ type=str,
267
+ default='gs://cloud-pathways-staging/tmp',
268
+ help=(
269
+ 'Please provide the GCS location to store Pathways artifacts. This'
270
+ ' arg can only be used in `xpk workload create-pathways`.'
271
+ ),
272
+ )
233
273
  workload_create_pathways_parser_optional_arguments.add_argument(
234
274
  '--command',
235
275
  type=str,
@@ -243,6 +283,45 @@ def set_workload_parsers(workload_parser):
243
283
  ),
244
284
  required=False,
245
285
  )
286
+ workload_create_pathways_parser_optional_arguments.add_argument(
287
+ '--storage',
288
+ action='append',
289
+ default=[],
290
+ help='Names of storages the workload uses',
291
+ )
292
+
293
+ workload_create_pathways_parser_optional_arguments.add_argument(
294
+ '--custom-pathways-server-args',
295
+ type=str,
296
+ default=None,
297
+ help=(
298
+ 'Provide custom Pathways server args as follows -'
299
+ " --custom-pathways-server-args='--arg_1=xxx --arg2=yyy'"
300
+ ),
301
+ required=False,
302
+ )
303
+
304
+ workload_create_pathways_parser_optional_arguments.add_argument(
305
+ '--custom-pathways-proxy-server-args',
306
+ type=str,
307
+ default=None,
308
+ help=(
309
+ 'Provide custom Pathways proxy server args as follows -'
310
+ " --custom-pathways-proxy-server-args='--arg_1=xxx --arg2=yyy'"
311
+ ),
312
+ required=False,
313
+ )
314
+
315
+ workload_create_pathways_parser_optional_arguments.add_argument(
316
+ '--custom-pathways-worker-args',
317
+ type=str,
318
+ default=None,
319
+ help=(
320
+ 'Provide custom Pathways worker args as follows -'
321
+ " --custom-pathways-worker-args='--arg_1=xxx --arg2=yyy'"
322
+ ),
323
+ required=False,
324
+ )
246
325
 
247
326
  add_shared_workload_create_required_arguments([
248
327
  workload_create_parser_required_arguments,
@@ -293,7 +372,7 @@ def set_workload_parsers(workload_parser):
293
372
  ### "workload delete" Required arguments
294
373
  workload_delete_parser_required_arguments.add_argument(
295
374
  '--cluster',
296
- type=str,
375
+ type=name_type,
297
376
  default=None,
298
377
  help='The name of the cluster to delete the job on.',
299
378
  required=True,
@@ -301,7 +380,7 @@ def set_workload_parsers(workload_parser):
301
380
  ### "workload delete" Optional arguments
302
381
  workload_delete_parser_optional_arguments.add_argument(
303
382
  '--workload',
304
- type=workload_name_type,
383
+ type=name_type,
305
384
  default=None,
306
385
  help=(
307
386
  'The name of the workload to delete. If the workload is not'
@@ -352,7 +431,7 @@ def set_workload_parsers(workload_parser):
352
431
 
353
432
  workload_list_parser.add_argument(
354
433
  '--cluster',
355
- type=str,
434
+ type=name_type,
356
435
  default=None,
357
436
  help='The name of the cluster to list jobs on.',
358
437
  required=True,
@@ -428,14 +507,14 @@ def add_shared_workload_create_required_arguments(args_parsers):
428
507
  for custom_parser in args_parsers:
429
508
  custom_parser.add_argument(
430
509
  '--workload',
431
- type=workload_name_type,
510
+ type=name_type,
432
511
  default=None,
433
512
  help='The name of the workload to run.',
434
513
  required=True,
435
514
  )
436
515
  custom_parser.add_argument(
437
516
  '--cluster',
438
- type=str,
517
+ type=name_type,
439
518
  default=None,
440
519
  help='The name of the cluster to run the job on.',
441
520
  required=True,
@@ -503,6 +582,12 @@ def add_shared_workload_create_optional_arguments(args_parsers):
503
582
  ' event or deletion request.Defaults to 30 seconds.'
504
583
  ),
505
584
  )
585
+ custom_parser.add_argument(
586
+ '--remote-python-sidecar-image',
587
+ type=str,
588
+ default=None,
589
+ help='Remote Python sidecar server image.',
590
+ )
506
591
  custom_parser.add_argument(
507
592
  '--enable-debug-logs',
508
593
  action='store_true',
@@ -512,59 +597,16 @@ def add_shared_workload_create_optional_arguments(args_parsers):
512
597
  ),
513
598
  )
514
599
  custom_parser.add_argument(
515
- '--restart-on-user-code-failure',
516
- action='store_true',
517
- help=(
518
- 'Adding this argument will return user failures back to the jobset'
519
- ' manager allowing restarts on user code when --max-restarts is set'
520
- ' greater than 0. By default, this is not enabled, and workloads'
521
- ' will not restart from user code failures. This is enabled by'
522
- ' default on Pathways workloads.'
523
- ),
524
- )
525
- custom_parser.add_argument(
526
- '--headless',
527
- action='store_true',
528
- help=(
529
- 'Please provide this argument to create Pathways workloads in'
530
- ' headless mode. This arg can only be used in `xpk workload'
531
- ' create-pathways`(preferred) or `xpk workload create'
532
- ' --use-pathways.` (--use-pathways will be deprecated soon).'
533
- ),
534
- )
535
- custom_parser.add_argument(
536
- '--proxy-server-image',
600
+ '--restart-on-exit-codes',
537
601
  type=str,
538
- default=(
539
- 'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest'
540
- ),
541
- help=(
542
- 'Please provide the proxy server image for Pathways. This arg can'
543
- ' only be used in `xpk workload create-pathways`(preferred) or `xpk'
544
- ' workload create --use-pathways.` (--use-pathways will be'
545
- ' deprecated soon).'
546
- ),
547
- )
548
- custom_parser.add_argument(
549
- '--server-image',
550
- type=str,
551
- default='us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest',
552
- help=(
553
- 'Please provide the server image for Pathways. This arg can only be'
554
- ' used in `xpk workload create-pathways`(preferred) or `xpk'
555
- ' workload create --use-pathways.` (--use-pathways will be'
556
- ' deprecated soon).'
557
- ),
558
- )
559
- custom_parser.add_argument(
560
- '--pathways-gcs-location',
561
- type=str,
562
- default='gs://cloud-pathways-staging/tmp',
602
+ default=None,
563
603
  help=(
564
- 'Please provide the GCS location to store Pathways artifacts. This'
565
- ' arg can only be used in `xpk workload create-pathways`(preferred)'
566
- ' or `xpk workload create --use-pathways.` (--use-pathways will be'
567
- ' deprecated soon).'
604
+ 'Adding this argument specifies additional user-defined exit codes'
605
+ ' that allow restarting the workload when --max-restarts is set to'
606
+ ' a value greater than 0. By default, workloads restart on exit'
607
+ ' codes 42 and 127-255. Any exit codes provided through this flag'
608
+ ' will be included alongside the default codes for restarting'
609
+ ' conditions.'
568
610
  ),
569
611
  )
570
612
  custom_parser.add_argument(
@@ -618,10 +660,10 @@ def add_shared_workload_base_docker_image_arguments(args_parsers):
618
660
  custom_parser.add_argument(
619
661
  '--base-docker-image',
620
662
  type=str,
621
- default=default_docker_image,
663
+ default=DEFAULT_DOCKER_IMAGE,
622
664
  help=(
623
665
  'The base docker-image to use, default'
624
- f' {default_docker_image}. If using a custom docker image it'
666
+ f' {DEFAULT_DOCKER_IMAGE}. If using a custom docker image it'
625
667
  ' is typically addressed as gcr.io/${PROJECT}/${NAME}:latest.'
626
668
  ' This docker image will be used as a base image by default and'
627
669
  ' the `--script-dir` by default will be added to the image.'
@@ -630,7 +672,7 @@ def add_shared_workload_base_docker_image_arguments(args_parsers):
630
672
  custom_parser.add_argument(
631
673
  '--script-dir',
632
674
  type=directory_path_type,
633
- default=default_script_dir,
675
+ default=DEFAULT_SCRIPT_DIR,
634
676
  help=(
635
677
  'The local location of the directory to copy to the docker image'
636
678
  ' and run the main command from. Defaults to current working'
@@ -0,0 +1,15 @@
1
+ """
2
+ Copyright 2023 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
@@ -0,0 +1,13 @@
1
+ apiVersion: xpk.x-k8s.io/v1
2
+ kind: Storage
3
+ metadata:
4
+ name: $NAME
5
+ spec:
6
+ auto_mount:
7
+ cluster:
8
+ manifest:
9
+ mount_point:
10
+ readonly:
11
+ type: $NAME
12
+ pvc:
13
+ pv:
xpk/utils/gcs_utils.py ADDED
@@ -0,0 +1,125 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from pathlib import Path
18
+
19
+ from google.cloud.storage import transfer_manager, Client
20
+ from .console import xpk_print
21
+
22
+
23
+ def upload_file_to_gcs(
24
+ storage_client: Client, bucket_name: str, bucket_path: str, file: str
25
+ ):
26
+ bucket = storage_client.bucket(bucket_name)
27
+ blob = bucket.blob(bucket_path)
28
+ blob.upload_from_filename(file)
29
+
30
+
31
+ def upload_directory_to_gcs(
32
+ storage_client: Client,
33
+ bucket_name: str,
34
+ bucket_path: str,
35
+ source_directory: str,
36
+ workers: int = 8,
37
+ ):
38
+ """Upload every file in a directory, including all files in subdirectories.
39
+
40
+ Each blob name is derived from the filename, not including the `directory`
41
+ parameter itself. For complete control of the blob name for each file (and
42
+ other aspects of individual blob metadata), use
43
+ transfer_manager.upload_many() instead.
44
+ """
45
+ xpk_print(f"Uploading directory {source_directory} to bucket {bucket_name}")
46
+ bucket = storage_client.bucket(bucket_name)
47
+
48
+ directory_as_path_obj = Path(source_directory)
49
+ paths = directory_as_path_obj.rglob("*")
50
+
51
+ # Filter so the list only includes files, not directories themselves.
52
+ file_paths = [path for path in paths if path.is_file()]
53
+
54
+ # These paths are relative to the current working directory. Next, make them
55
+ # relative to `directory`
56
+ relative_paths = [path.relative_to(source_directory) for path in file_paths]
57
+
58
+ # Finally, convert them all to strings.
59
+ string_paths = [str(path) for path in relative_paths]
60
+
61
+ xpk_print(f"Found {len(string_paths)} files.")
62
+ # Start the upload.
63
+ results = transfer_manager.upload_many_from_filenames(
64
+ bucket=bucket,
65
+ filenames=string_paths,
66
+ source_directory=source_directory,
67
+ max_workers=workers,
68
+ blob_name_prefix=bucket_path,
69
+ )
70
+
71
+ for name, result in zip(string_paths, results):
72
+ # The results list is either `None` or an exception for each filename in
73
+ # the input list, in order.
74
+
75
+ if isinstance(result, Exception):
76
+ xpk_print(f"Failed to upload {name} due to exception: {result}")
77
+ else:
78
+ xpk_print(f"Uploaded {name} to {bucket.name}.")
79
+
80
+
81
+ def check_file_exists(
82
+ storage_client: Client, bucket_name: str, filename: str
83
+ ) -> bool:
84
+ xpk_print(f"Checking if file {filename} exists in bucket: {bucket_name}")
85
+ bucket = storage_client.get_bucket(bucket_name)
86
+ is_file: bool = bucket.blob(filename).exists()
87
+ return is_file
88
+
89
+
90
+ def download_bucket_to_dir(
91
+ storage_client: Client,
92
+ bucket_name: str,
93
+ bucket_path: str,
94
+ destination_directory: str = "",
95
+ workers: int = 8,
96
+ max_results: int = 1000,
97
+ ):
98
+ """Download all of the blobs in a bucket, concurrently in a process pool.
99
+
100
+ The filename of each blob once downloaded is derived from the blob name and
101
+ the `destination_directory `parameter. For complete control of the filename
102
+ of each blob, use transfer_manager.download_many() instead.
103
+
104
+ Directories will be created automatically as needed, for instance to
105
+ accommodate blob names that include slashes.
106
+ """
107
+ bucket = storage_client.bucket(bucket_name)
108
+
109
+ blob_names = [
110
+ blob.name
111
+ for blob in bucket.list_blobs(max_results=max_results, prefix=bucket_path)
112
+ ]
113
+
114
+ results = transfer_manager.download_many_to_path(
115
+ bucket,
116
+ blob_names,
117
+ destination_directory=destination_directory,
118
+ max_workers=workers,
119
+ )
120
+
121
+ for name, result in zip(blob_names, results):
122
+ if isinstance(result, Exception):
123
+ xpk_print(f"Failed to download {name} due to exception: {result}")
124
+ else:
125
+ xpk_print(f"Downloaded {name} to {destination_directory + name}.")
xpk/utils/kubectl.py ADDED
@@ -0,0 +1,57 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from kubernetes.client.exceptions import ApiException
18
+ from kubernetes.dynamic import DynamicClient
19
+
20
+ from .console import xpk_print
21
+
22
+
23
+ def apply_kubectl_manifest(client, manifest):
24
+ xpk_print('Applying manifest')
25
+ dynamic_client = DynamicClient(client)
26
+
27
+ for obj in manifest:
28
+ api_version = obj['apiVersion']
29
+ kind = obj['kind']
30
+ namespace = obj.get('metadata', {}).get('namespace', 'default')
31
+
32
+ api_resource = dynamic_client.resources.get(
33
+ api_version=api_version, kind=kind
34
+ )
35
+
36
+ try:
37
+ api_resource.get(name=obj['metadata']['name'], namespace=namespace)
38
+ api_resource.patch(
39
+ body=obj,
40
+ namespace=namespace,
41
+ name=obj['metadata']['name'],
42
+ content_type='application/merge-patch+json',
43
+ )
44
+ xpk_print(
45
+ f"Updated {kind} '{obj['metadata']['name']}' in namespace"
46
+ f" '{namespace}'"
47
+ )
48
+
49
+ except ApiException as e:
50
+ if e.status == 404:
51
+ api_resource.create(body=obj, namespace=namespace)
52
+ xpk_print(
53
+ f"Applied {kind} '{obj['metadata']['name']}' in namespace"
54
+ f" '{namespace}'"
55
+ )
56
+ else:
57
+ xpk_print(f'Error applying {kind}: {e}')
xpk/utils/objects.py CHANGED
@@ -31,7 +31,9 @@ def chunks(lst: list, n: int):
31
31
  return [lst[i : i + n] for i in range(0, len(lst), n)]
32
32
 
33
33
 
34
- def get_value_from_map(key: str, map_to_search: dict) -> tuple[int, str | None]:
34
+ def get_value_from_map(
35
+ key: str, map_to_search: dict, verbose: bool = True
36
+ ) -> tuple[int, str | None]:
35
37
  """Helper function to get value from a map if the key exists.
36
38
 
37
39
  Args:
@@ -47,10 +49,11 @@ def get_value_from_map(key: str, map_to_search: dict) -> tuple[int, str | None]:
47
49
  if value:
48
50
  return 0, value
49
51
  else:
50
- xpk_print(
51
- f'Unable to find key: {key} in map: {map_to_search}.'
52
- f'The map has the following keys: {map_to_search.keys()}'
53
- )
52
+ if verbose:
53
+ xpk_print(
54
+ f'Unable to find key: {key} in map: {map_to_search}.'
55
+ f'The map has the following keys: {map_to_search.keys()}'
56
+ )
54
57
  return 1, value
55
58
 
56
59
 
xpk/utils/templates.py ADDED
@@ -0,0 +1,28 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import os
18
+
19
+ import ruamel.yaml
20
+
21
+ yaml = ruamel.yaml.YAML()
22
+
23
+
24
+ def load(path: str) -> dict:
25
+ template_path = os.path.dirname(__file__) + path
26
+ with open(template_path, "r", encoding="utf-8") as file:
27
+ data: dict = yaml.load(file)
28
+ return data
@@ -0,0 +1,80 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.commands import run_command_for_value
18
+ from .console import xpk_exit, xpk_print
19
+ from ..commands.config import xpk_cfg
20
+ from ..core.config import DEPENDENCIES_KEY
21
+ from ..commands.version import get_xpk_version
22
+
23
+
24
+ validation_commands = {
25
+ 'kubectl': {
26
+ 'command': 'kubectl --help',
27
+ 'message': (
28
+ '`kubectl` not installed. Please follow'
29
+ ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
30
+ ' to install xpk prerequisites.'
31
+ ),
32
+ },
33
+ 'kjob': {
34
+ 'command': 'kubectl kjob --help',
35
+ 'message': (
36
+ '`kjobctl` not installed. Please follow'
37
+ ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
38
+ ' to install xpk prerequisites.'
39
+ ),
40
+ },
41
+ 'gcloud': {
42
+ 'command': 'gcloud version',
43
+ 'message': (
44
+ '`gcloud not installed. Please follow'
45
+ ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
46
+ ' to install xpk prerequisites.'
47
+ ),
48
+ },
49
+ 'docker': {
50
+ 'command': 'docker version',
51
+ 'message': (
52
+ '`docker` not installed. Please follow'
53
+ ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
54
+ ' to install xpk prerequisites.'
55
+ ),
56
+ },
57
+ 'kueuectl': {
58
+ 'command': 'kubectl kueue --help',
59
+ 'message': (
60
+ '`kueuectl` not installed. Please follow'
61
+ ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
62
+ ' to install xpk prerequisites.'
63
+ ),
64
+ },
65
+ }
66
+
67
+
68
+ def validate_dependencies():
69
+ deps_version = xpk_cfg.get(DEPENDENCIES_KEY)
70
+ xpk_version = get_xpk_version()
71
+ if deps_version is None or deps_version != xpk_version:
72
+ for name, check in validation_commands.items():
73
+ cmd, message = check['command'], check['message']
74
+ code, _ = run_command_for_value(
75
+ cmd, f'Validate {name} installation.', None
76
+ )
77
+ if code != 0:
78
+ xpk_print(message)
79
+ xpk_exit(code)
80
+ xpk_cfg.set(DEPENDENCIES_KEY, get_xpk_version())