xpk 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/api/__init__.py +15 -0
- xpk/api/storage_crd.yaml +52 -0
- xpk/commands/batch.py +27 -5
- xpk/commands/cluster.py +104 -80
- xpk/commands/cluster_gcluster.py +94 -10
- xpk/commands/common.py +44 -0
- xpk/commands/config.py +29 -0
- xpk/commands/info.py +8 -10
- xpk/commands/inspector.py +5 -11
- xpk/commands/job.py +9 -7
- xpk/commands/kind.py +34 -4
- xpk/commands/kjob_common.py +44 -0
- xpk/commands/run.py +128 -0
- xpk/commands/shell.py +27 -7
- xpk/commands/storage.py +280 -0
- xpk/commands/version.py +6 -18
- xpk/commands/workload.py +381 -184
- xpk/core/blueprint/blueprint_definitions.py +1 -0
- xpk/core/blueprint/blueprint_generator.py +132 -76
- xpk/core/capacity.py +185 -0
- xpk/core/cluster.py +564 -0
- xpk/core/cluster_private.py +6 -3
- xpk/core/commands.py +18 -14
- xpk/core/config.py +179 -0
- xpk/core/docker_container.py +225 -0
- xpk/core/docker_image.py +210 -0
- xpk/core/docker_resources.py +350 -0
- xpk/core/filestore.py +251 -0
- xpk/core/gcloud_context.py +196 -0
- xpk/core/gcluster_manager.py +20 -2
- xpk/core/gcsfuse.py +50 -0
- xpk/core/kjob.py +257 -18
- xpk/core/kueue.py +12 -6
- xpk/core/monitoring.py +134 -0
- xpk/core/nap.py +32 -20
- xpk/core/network.py +377 -0
- xpk/core/nodepool.py +581 -0
- xpk/core/pathways.py +124 -45
- xpk/core/remote_state/__init__.py +15 -0
- xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk/core/remote_state/remote_state_client.py +38 -0
- xpk/core/resources.py +238 -0
- xpk/core/scheduling.py +253 -0
- xpk/core/storage.py +581 -0
- xpk/core/system_characteristics.py +38 -1
- xpk/core/vertex.py +105 -0
- xpk/core/workload.py +209 -1
- xpk/core/workload_decorators/rdma_decorator.py +25 -5
- xpk/core/workload_decorators/storage_decorator.py +52 -0
- xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
- xpk/main.py +3 -1
- xpk/parser/batch.py +10 -151
- xpk/parser/cluster.py +49 -8
- xpk/parser/common.py +189 -1
- xpk/parser/config.py +49 -0
- xpk/parser/core.py +27 -1
- xpk/parser/info.py +2 -1
- xpk/parser/inspector.py +3 -3
- xpk/parser/job.py +25 -4
- xpk/parser/kind.py +3 -2
- xpk/parser/run.py +47 -0
- xpk/parser/shell.py +10 -1
- xpk/parser/storage.py +326 -0
- xpk/parser/validators.py +3 -3
- xpk/parser/workload.py +118 -76
- xpk/templates/__init__.py +15 -0
- xpk/templates/storage.yaml +13 -0
- xpk/utils/gcs_utils.py +125 -0
- xpk/utils/kubectl.py +57 -0
- xpk/utils/objects.py +8 -5
- xpk/utils/templates.py +28 -0
- xpk/utils/validation.py +80 -0
- {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/METADATA +169 -15
- xpk-0.7.1.dist-info/RECORD +92 -0
- {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/WHEEL +1 -1
- xpk/core/core.py +0 -2824
- xpk-0.6.0.dist-info/RECORD +0 -57
- {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/entry_points.txt +0 -0
- {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info/licenses}/LICENSE +0 -0
- {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/top_level.txt +0 -0
xpk/parser/workload.py
CHANGED
|
@@ -20,9 +20,9 @@ from ..commands.workload import (
|
|
|
20
20
|
workload_delete,
|
|
21
21
|
workload_list,
|
|
22
22
|
)
|
|
23
|
-
from ..core.
|
|
24
|
-
from .validators import directory_path_type, workload_name_type
|
|
23
|
+
from ..core.docker_image import DEFAULT_DOCKER_IMAGE, DEFAULT_SCRIPT_DIR
|
|
25
24
|
from .common import add_shared_arguments
|
|
25
|
+
from .validators import directory_path_type, name_type
|
|
26
26
|
|
|
27
27
|
|
|
28
28
|
def set_workload_parsers(workload_parser):
|
|
@@ -67,11 +67,7 @@ def set_workload_parsers(workload_parser):
|
|
|
67
67
|
'Arguments for configuring autoprovisioning.',
|
|
68
68
|
)
|
|
69
69
|
)
|
|
70
|
-
|
|
71
|
-
'Pathways Image Arguments',
|
|
72
|
-
'If --use-pathways is provided, user wants to set up a'
|
|
73
|
-
'Pathways workload on xpk.',
|
|
74
|
-
)
|
|
70
|
+
|
|
75
71
|
workload_vertex_tensorboard_arguments = (
|
|
76
72
|
workload_create_parser.add_argument_group(
|
|
77
73
|
'Vertex Tensorboard Arguments',
|
|
@@ -114,6 +110,12 @@ def set_workload_parsers(workload_parser):
|
|
|
114
110
|
),
|
|
115
111
|
)
|
|
116
112
|
|
|
113
|
+
workload_create_parser_optional_arguments.add_argument(
|
|
114
|
+
'--storage',
|
|
115
|
+
action='append',
|
|
116
|
+
default=[],
|
|
117
|
+
help='Names of storages the workload uses',
|
|
118
|
+
)
|
|
117
119
|
workload_create_parser_optional_arguments.add_argument(
|
|
118
120
|
'--num-nodes',
|
|
119
121
|
type=int,
|
|
@@ -151,6 +153,15 @@ def set_workload_parsers(workload_parser):
|
|
|
151
153
|
),
|
|
152
154
|
)
|
|
153
155
|
|
|
156
|
+
workload_create_parser_optional_arguments.add_argument(
|
|
157
|
+
'--use-pathways',
|
|
158
|
+
action='store_true',
|
|
159
|
+
help=(
|
|
160
|
+
'Please use `xpk workload create-pathways` instead to'
|
|
161
|
+
' create Pathways workloads.'
|
|
162
|
+
),
|
|
163
|
+
)
|
|
164
|
+
|
|
154
165
|
# Autoprovisioning workload arguments
|
|
155
166
|
workload_create_autoprovisioning_arguments.add_argument(
|
|
156
167
|
'--on-demand',
|
|
@@ -178,16 +189,6 @@ def set_workload_parsers(workload_parser):
|
|
|
178
189
|
),
|
|
179
190
|
)
|
|
180
191
|
|
|
181
|
-
# Pathways workload arguments
|
|
182
|
-
workload_pathways_workload_arguments.add_argument(
|
|
183
|
-
'--use-pathways',
|
|
184
|
-
action='store_true',
|
|
185
|
-
help=(
|
|
186
|
-
'DECRATING SOON!!! Please use `xpk workload create-pathways` instead.'
|
|
187
|
-
' Provide this argument to create Pathways workloads.'
|
|
188
|
-
),
|
|
189
|
-
)
|
|
190
|
-
|
|
191
192
|
# "workload create-pathways" command parser.
|
|
192
193
|
workload_create_pathways_parser = workload_subcommands.add_parser(
|
|
193
194
|
'create-pathways', help='Create a new job.'
|
|
@@ -230,6 +231,45 @@ def set_workload_parsers(workload_parser):
|
|
|
230
231
|
help='The tpu type to use, v5litepod-16, etc.',
|
|
231
232
|
)
|
|
232
233
|
|
|
234
|
+
### "workload create-pathways" Optional arguments, specific to Pathways
|
|
235
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
236
|
+
'--headless',
|
|
237
|
+
action='store_true',
|
|
238
|
+
help=(
|
|
239
|
+
'Please provide this argument to create Pathways workloads in'
|
|
240
|
+
' headless mode. This arg can only be used in `xpk workload'
|
|
241
|
+
' create-pathways`.'
|
|
242
|
+
),
|
|
243
|
+
)
|
|
244
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
245
|
+
'--proxy-server-image',
|
|
246
|
+
type=str,
|
|
247
|
+
default=(
|
|
248
|
+
'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest'
|
|
249
|
+
),
|
|
250
|
+
help=(
|
|
251
|
+
'Please provide the proxy server image for Pathways. This arg can'
|
|
252
|
+
' only be used in `xpk workload create-pathways`.'
|
|
253
|
+
),
|
|
254
|
+
)
|
|
255
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
256
|
+
'--server-image',
|
|
257
|
+
type=str,
|
|
258
|
+
default='us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest',
|
|
259
|
+
help=(
|
|
260
|
+
'Please provide the server image for Pathways. This arg can only be'
|
|
261
|
+
' used in `xpk workload create-pathways`.'
|
|
262
|
+
),
|
|
263
|
+
)
|
|
264
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
265
|
+
'--pathways-gcs-location',
|
|
266
|
+
type=str,
|
|
267
|
+
default='gs://cloud-pathways-staging/tmp',
|
|
268
|
+
help=(
|
|
269
|
+
'Please provide the GCS location to store Pathways artifacts. This'
|
|
270
|
+
' arg can only be used in `xpk workload create-pathways`.'
|
|
271
|
+
),
|
|
272
|
+
)
|
|
233
273
|
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
234
274
|
'--command',
|
|
235
275
|
type=str,
|
|
@@ -243,6 +283,45 @@ def set_workload_parsers(workload_parser):
|
|
|
243
283
|
),
|
|
244
284
|
required=False,
|
|
245
285
|
)
|
|
286
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
287
|
+
'--storage',
|
|
288
|
+
action='append',
|
|
289
|
+
default=[],
|
|
290
|
+
help='Names of storages the workload uses',
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
294
|
+
'--custom-pathways-server-args',
|
|
295
|
+
type=str,
|
|
296
|
+
default=None,
|
|
297
|
+
help=(
|
|
298
|
+
'Provide custom Pathways server args as follows -'
|
|
299
|
+
" --custom-pathways-server-args='--arg_1=xxx --arg2=yyy'"
|
|
300
|
+
),
|
|
301
|
+
required=False,
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
305
|
+
'--custom-pathways-proxy-server-args',
|
|
306
|
+
type=str,
|
|
307
|
+
default=None,
|
|
308
|
+
help=(
|
|
309
|
+
'Provide custom Pathways proxy server args as follows -'
|
|
310
|
+
" --custom-pathways-proxy-server-args='--arg_1=xxx --arg2=yyy'"
|
|
311
|
+
),
|
|
312
|
+
required=False,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
workload_create_pathways_parser_optional_arguments.add_argument(
|
|
316
|
+
'--custom-pathways-worker-args',
|
|
317
|
+
type=str,
|
|
318
|
+
default=None,
|
|
319
|
+
help=(
|
|
320
|
+
'Provide custom Pathways worker args as follows -'
|
|
321
|
+
" --custom-pathways-worker-args='--arg_1=xxx --arg2=yyy'"
|
|
322
|
+
),
|
|
323
|
+
required=False,
|
|
324
|
+
)
|
|
246
325
|
|
|
247
326
|
add_shared_workload_create_required_arguments([
|
|
248
327
|
workload_create_parser_required_arguments,
|
|
@@ -293,7 +372,7 @@ def set_workload_parsers(workload_parser):
|
|
|
293
372
|
### "workload delete" Required arguments
|
|
294
373
|
workload_delete_parser_required_arguments.add_argument(
|
|
295
374
|
'--cluster',
|
|
296
|
-
type=
|
|
375
|
+
type=name_type,
|
|
297
376
|
default=None,
|
|
298
377
|
help='The name of the cluster to delete the job on.',
|
|
299
378
|
required=True,
|
|
@@ -301,7 +380,7 @@ def set_workload_parsers(workload_parser):
|
|
|
301
380
|
### "workload delete" Optional arguments
|
|
302
381
|
workload_delete_parser_optional_arguments.add_argument(
|
|
303
382
|
'--workload',
|
|
304
|
-
type=
|
|
383
|
+
type=name_type,
|
|
305
384
|
default=None,
|
|
306
385
|
help=(
|
|
307
386
|
'The name of the workload to delete. If the workload is not'
|
|
@@ -352,7 +431,7 @@ def set_workload_parsers(workload_parser):
|
|
|
352
431
|
|
|
353
432
|
workload_list_parser.add_argument(
|
|
354
433
|
'--cluster',
|
|
355
|
-
type=
|
|
434
|
+
type=name_type,
|
|
356
435
|
default=None,
|
|
357
436
|
help='The name of the cluster to list jobs on.',
|
|
358
437
|
required=True,
|
|
@@ -428,14 +507,14 @@ def add_shared_workload_create_required_arguments(args_parsers):
|
|
|
428
507
|
for custom_parser in args_parsers:
|
|
429
508
|
custom_parser.add_argument(
|
|
430
509
|
'--workload',
|
|
431
|
-
type=
|
|
510
|
+
type=name_type,
|
|
432
511
|
default=None,
|
|
433
512
|
help='The name of the workload to run.',
|
|
434
513
|
required=True,
|
|
435
514
|
)
|
|
436
515
|
custom_parser.add_argument(
|
|
437
516
|
'--cluster',
|
|
438
|
-
type=
|
|
517
|
+
type=name_type,
|
|
439
518
|
default=None,
|
|
440
519
|
help='The name of the cluster to run the job on.',
|
|
441
520
|
required=True,
|
|
@@ -503,6 +582,12 @@ def add_shared_workload_create_optional_arguments(args_parsers):
|
|
|
503
582
|
' event or deletion request.Defaults to 30 seconds.'
|
|
504
583
|
),
|
|
505
584
|
)
|
|
585
|
+
custom_parser.add_argument(
|
|
586
|
+
'--remote-python-sidecar-image',
|
|
587
|
+
type=str,
|
|
588
|
+
default=None,
|
|
589
|
+
help='Remote Python sidecar server image.',
|
|
590
|
+
)
|
|
506
591
|
custom_parser.add_argument(
|
|
507
592
|
'--enable-debug-logs',
|
|
508
593
|
action='store_true',
|
|
@@ -512,59 +597,16 @@ def add_shared_workload_create_optional_arguments(args_parsers):
|
|
|
512
597
|
),
|
|
513
598
|
)
|
|
514
599
|
custom_parser.add_argument(
|
|
515
|
-
'--restart-on-
|
|
516
|
-
action='store_true',
|
|
517
|
-
help=(
|
|
518
|
-
'Adding this argument will return user failures back to the jobset'
|
|
519
|
-
' manager allowing restarts on user code when --max-restarts is set'
|
|
520
|
-
' greater than 0. By default, this is not enabled, and workloads'
|
|
521
|
-
' will not restart from user code failures. This is enabled by'
|
|
522
|
-
' default on Pathways workloads.'
|
|
523
|
-
),
|
|
524
|
-
)
|
|
525
|
-
custom_parser.add_argument(
|
|
526
|
-
'--headless',
|
|
527
|
-
action='store_true',
|
|
528
|
-
help=(
|
|
529
|
-
'Please provide this argument to create Pathways workloads in'
|
|
530
|
-
' headless mode. This arg can only be used in `xpk workload'
|
|
531
|
-
' create-pathways`(preferred) or `xpk workload create'
|
|
532
|
-
' --use-pathways.` (--use-pathways will be deprecated soon).'
|
|
533
|
-
),
|
|
534
|
-
)
|
|
535
|
-
custom_parser.add_argument(
|
|
536
|
-
'--proxy-server-image',
|
|
600
|
+
'--restart-on-exit-codes',
|
|
537
601
|
type=str,
|
|
538
|
-
default=
|
|
539
|
-
'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest'
|
|
540
|
-
),
|
|
541
|
-
help=(
|
|
542
|
-
'Please provide the proxy server image for Pathways. This arg can'
|
|
543
|
-
' only be used in `xpk workload create-pathways`(preferred) or `xpk'
|
|
544
|
-
' workload create --use-pathways.` (--use-pathways will be'
|
|
545
|
-
' deprecated soon).'
|
|
546
|
-
),
|
|
547
|
-
)
|
|
548
|
-
custom_parser.add_argument(
|
|
549
|
-
'--server-image',
|
|
550
|
-
type=str,
|
|
551
|
-
default='us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest',
|
|
552
|
-
help=(
|
|
553
|
-
'Please provide the server image for Pathways. This arg can only be'
|
|
554
|
-
' used in `xpk workload create-pathways`(preferred) or `xpk'
|
|
555
|
-
' workload create --use-pathways.` (--use-pathways will be'
|
|
556
|
-
' deprecated soon).'
|
|
557
|
-
),
|
|
558
|
-
)
|
|
559
|
-
custom_parser.add_argument(
|
|
560
|
-
'--pathways-gcs-location',
|
|
561
|
-
type=str,
|
|
562
|
-
default='gs://cloud-pathways-staging/tmp',
|
|
602
|
+
default=None,
|
|
563
603
|
help=(
|
|
564
|
-
'
|
|
565
|
-
'
|
|
566
|
-
'
|
|
567
|
-
'
|
|
604
|
+
'Adding this argument specifies additional user-defined exit codes'
|
|
605
|
+
' that allow restarting the workload when --max-restarts is set to'
|
|
606
|
+
' a value greater than 0. By default, workloads restart on exit'
|
|
607
|
+
' codes 42 and 127-255. Any exit codes provided through this flag'
|
|
608
|
+
' will be included alongside the default codes for restarting'
|
|
609
|
+
' conditions.'
|
|
568
610
|
),
|
|
569
611
|
)
|
|
570
612
|
custom_parser.add_argument(
|
|
@@ -618,10 +660,10 @@ def add_shared_workload_base_docker_image_arguments(args_parsers):
|
|
|
618
660
|
custom_parser.add_argument(
|
|
619
661
|
'--base-docker-image',
|
|
620
662
|
type=str,
|
|
621
|
-
default=
|
|
663
|
+
default=DEFAULT_DOCKER_IMAGE,
|
|
622
664
|
help=(
|
|
623
665
|
'The base docker-image to use, default'
|
|
624
|
-
f' {
|
|
666
|
+
f' {DEFAULT_DOCKER_IMAGE}. If using a custom docker image it'
|
|
625
667
|
' is typically addressed as gcr.io/${PROJECT}/${NAME}:latest.'
|
|
626
668
|
' This docker image will be used as a base image by default and'
|
|
627
669
|
' the `--script-dir` by default will be added to the image.'
|
|
@@ -630,7 +672,7 @@ def add_shared_workload_base_docker_image_arguments(args_parsers):
|
|
|
630
672
|
custom_parser.add_argument(
|
|
631
673
|
'--script-dir',
|
|
632
674
|
type=directory_path_type,
|
|
633
|
-
default=
|
|
675
|
+
default=DEFAULT_SCRIPT_DIR,
|
|
634
676
|
help=(
|
|
635
677
|
'The local location of the directory to copy to the docker image'
|
|
636
678
|
' and run the main command from. Defaults to current working'
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2023 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
xpk/utils/gcs_utils.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
|
|
19
|
+
from google.cloud.storage import transfer_manager, Client
|
|
20
|
+
from .console import xpk_print
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def upload_file_to_gcs(
|
|
24
|
+
storage_client: Client, bucket_name: str, bucket_path: str, file: str
|
|
25
|
+
):
|
|
26
|
+
bucket = storage_client.bucket(bucket_name)
|
|
27
|
+
blob = bucket.blob(bucket_path)
|
|
28
|
+
blob.upload_from_filename(file)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def upload_directory_to_gcs(
|
|
32
|
+
storage_client: Client,
|
|
33
|
+
bucket_name: str,
|
|
34
|
+
bucket_path: str,
|
|
35
|
+
source_directory: str,
|
|
36
|
+
workers: int = 8,
|
|
37
|
+
):
|
|
38
|
+
"""Upload every file in a directory, including all files in subdirectories.
|
|
39
|
+
|
|
40
|
+
Each blob name is derived from the filename, not including the `directory`
|
|
41
|
+
parameter itself. For complete control of the blob name for each file (and
|
|
42
|
+
other aspects of individual blob metadata), use
|
|
43
|
+
transfer_manager.upload_many() instead.
|
|
44
|
+
"""
|
|
45
|
+
xpk_print(f"Uploading directory {source_directory} to bucket {bucket_name}")
|
|
46
|
+
bucket = storage_client.bucket(bucket_name)
|
|
47
|
+
|
|
48
|
+
directory_as_path_obj = Path(source_directory)
|
|
49
|
+
paths = directory_as_path_obj.rglob("*")
|
|
50
|
+
|
|
51
|
+
# Filter so the list only includes files, not directories themselves.
|
|
52
|
+
file_paths = [path for path in paths if path.is_file()]
|
|
53
|
+
|
|
54
|
+
# These paths are relative to the current working directory. Next, make them
|
|
55
|
+
# relative to `directory`
|
|
56
|
+
relative_paths = [path.relative_to(source_directory) for path in file_paths]
|
|
57
|
+
|
|
58
|
+
# Finally, convert them all to strings.
|
|
59
|
+
string_paths = [str(path) for path in relative_paths]
|
|
60
|
+
|
|
61
|
+
xpk_print(f"Found {len(string_paths)} files.")
|
|
62
|
+
# Start the upload.
|
|
63
|
+
results = transfer_manager.upload_many_from_filenames(
|
|
64
|
+
bucket=bucket,
|
|
65
|
+
filenames=string_paths,
|
|
66
|
+
source_directory=source_directory,
|
|
67
|
+
max_workers=workers,
|
|
68
|
+
blob_name_prefix=bucket_path,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
for name, result in zip(string_paths, results):
|
|
72
|
+
# The results list is either `None` or an exception for each filename in
|
|
73
|
+
# the input list, in order.
|
|
74
|
+
|
|
75
|
+
if isinstance(result, Exception):
|
|
76
|
+
xpk_print(f"Failed to upload {name} due to exception: {result}")
|
|
77
|
+
else:
|
|
78
|
+
xpk_print(f"Uploaded {name} to {bucket.name}.")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def check_file_exists(
|
|
82
|
+
storage_client: Client, bucket_name: str, filename: str
|
|
83
|
+
) -> bool:
|
|
84
|
+
xpk_print(f"Checking if file {filename} exists in bucket: {bucket_name}")
|
|
85
|
+
bucket = storage_client.get_bucket(bucket_name)
|
|
86
|
+
is_file: bool = bucket.blob(filename).exists()
|
|
87
|
+
return is_file
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def download_bucket_to_dir(
|
|
91
|
+
storage_client: Client,
|
|
92
|
+
bucket_name: str,
|
|
93
|
+
bucket_path: str,
|
|
94
|
+
destination_directory: str = "",
|
|
95
|
+
workers: int = 8,
|
|
96
|
+
max_results: int = 1000,
|
|
97
|
+
):
|
|
98
|
+
"""Download all of the blobs in a bucket, concurrently in a process pool.
|
|
99
|
+
|
|
100
|
+
The filename of each blob once downloaded is derived from the blob name and
|
|
101
|
+
the `destination_directory `parameter. For complete control of the filename
|
|
102
|
+
of each blob, use transfer_manager.download_many() instead.
|
|
103
|
+
|
|
104
|
+
Directories will be created automatically as needed, for instance to
|
|
105
|
+
accommodate blob names that include slashes.
|
|
106
|
+
"""
|
|
107
|
+
bucket = storage_client.bucket(bucket_name)
|
|
108
|
+
|
|
109
|
+
blob_names = [
|
|
110
|
+
blob.name
|
|
111
|
+
for blob in bucket.list_blobs(max_results=max_results, prefix=bucket_path)
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
results = transfer_manager.download_many_to_path(
|
|
115
|
+
bucket,
|
|
116
|
+
blob_names,
|
|
117
|
+
destination_directory=destination_directory,
|
|
118
|
+
max_workers=workers,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
for name, result in zip(blob_names, results):
|
|
122
|
+
if isinstance(result, Exception):
|
|
123
|
+
xpk_print(f"Failed to download {name} due to exception: {result}")
|
|
124
|
+
else:
|
|
125
|
+
xpk_print(f"Downloaded {name} to {destination_directory + name}.")
|
xpk/utils/kubectl.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from kubernetes.client.exceptions import ApiException
|
|
18
|
+
from kubernetes.dynamic import DynamicClient
|
|
19
|
+
|
|
20
|
+
from .console import xpk_print
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def apply_kubectl_manifest(client, manifest):
|
|
24
|
+
xpk_print('Applying manifest')
|
|
25
|
+
dynamic_client = DynamicClient(client)
|
|
26
|
+
|
|
27
|
+
for obj in manifest:
|
|
28
|
+
api_version = obj['apiVersion']
|
|
29
|
+
kind = obj['kind']
|
|
30
|
+
namespace = obj.get('metadata', {}).get('namespace', 'default')
|
|
31
|
+
|
|
32
|
+
api_resource = dynamic_client.resources.get(
|
|
33
|
+
api_version=api_version, kind=kind
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
try:
|
|
37
|
+
api_resource.get(name=obj['metadata']['name'], namespace=namespace)
|
|
38
|
+
api_resource.patch(
|
|
39
|
+
body=obj,
|
|
40
|
+
namespace=namespace,
|
|
41
|
+
name=obj['metadata']['name'],
|
|
42
|
+
content_type='application/merge-patch+json',
|
|
43
|
+
)
|
|
44
|
+
xpk_print(
|
|
45
|
+
f"Updated {kind} '{obj['metadata']['name']}' in namespace"
|
|
46
|
+
f" '{namespace}'"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
except ApiException as e:
|
|
50
|
+
if e.status == 404:
|
|
51
|
+
api_resource.create(body=obj, namespace=namespace)
|
|
52
|
+
xpk_print(
|
|
53
|
+
f"Applied {kind} '{obj['metadata']['name']}' in namespace"
|
|
54
|
+
f" '{namespace}'"
|
|
55
|
+
)
|
|
56
|
+
else:
|
|
57
|
+
xpk_print(f'Error applying {kind}: {e}')
|
xpk/utils/objects.py
CHANGED
|
@@ -31,7 +31,9 @@ def chunks(lst: list, n: int):
|
|
|
31
31
|
return [lst[i : i + n] for i in range(0, len(lst), n)]
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def get_value_from_map(
|
|
34
|
+
def get_value_from_map(
|
|
35
|
+
key: str, map_to_search: dict, verbose: bool = True
|
|
36
|
+
) -> tuple[int, str | None]:
|
|
35
37
|
"""Helper function to get value from a map if the key exists.
|
|
36
38
|
|
|
37
39
|
Args:
|
|
@@ -47,10 +49,11 @@ def get_value_from_map(key: str, map_to_search: dict) -> tuple[int, str | None]:
|
|
|
47
49
|
if value:
|
|
48
50
|
return 0, value
|
|
49
51
|
else:
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
52
|
+
if verbose:
|
|
53
|
+
xpk_print(
|
|
54
|
+
f'Unable to find key: {key} in map: {map_to_search}.'
|
|
55
|
+
f'The map has the following keys: {map_to_search.keys()}'
|
|
56
|
+
)
|
|
54
57
|
return 1, value
|
|
55
58
|
|
|
56
59
|
|
xpk/utils/templates.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import os
|
|
18
|
+
|
|
19
|
+
import ruamel.yaml
|
|
20
|
+
|
|
21
|
+
yaml = ruamel.yaml.YAML()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def load(path: str) -> dict:
|
|
25
|
+
template_path = os.path.dirname(__file__) + path
|
|
26
|
+
with open(template_path, "r", encoding="utf-8") as file:
|
|
27
|
+
data: dict = yaml.load(file)
|
|
28
|
+
return data
|
xpk/utils/validation.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ..core.commands import run_command_for_value
|
|
18
|
+
from .console import xpk_exit, xpk_print
|
|
19
|
+
from ..commands.config import xpk_cfg
|
|
20
|
+
from ..core.config import DEPENDENCIES_KEY
|
|
21
|
+
from ..commands.version import get_xpk_version
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
validation_commands = {
|
|
25
|
+
'kubectl': {
|
|
26
|
+
'command': 'kubectl --help',
|
|
27
|
+
'message': (
|
|
28
|
+
'`kubectl` not installed. Please follow'
|
|
29
|
+
' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
|
|
30
|
+
' to install xpk prerequisites.'
|
|
31
|
+
),
|
|
32
|
+
},
|
|
33
|
+
'kjob': {
|
|
34
|
+
'command': 'kubectl kjob --help',
|
|
35
|
+
'message': (
|
|
36
|
+
'`kjobctl` not installed. Please follow'
|
|
37
|
+
' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
|
|
38
|
+
' to install xpk prerequisites.'
|
|
39
|
+
),
|
|
40
|
+
},
|
|
41
|
+
'gcloud': {
|
|
42
|
+
'command': 'gcloud version',
|
|
43
|
+
'message': (
|
|
44
|
+
'`gcloud not installed. Please follow'
|
|
45
|
+
' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
|
|
46
|
+
' to install xpk prerequisites.'
|
|
47
|
+
),
|
|
48
|
+
},
|
|
49
|
+
'docker': {
|
|
50
|
+
'command': 'docker version',
|
|
51
|
+
'message': (
|
|
52
|
+
'`docker` not installed. Please follow'
|
|
53
|
+
' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
|
|
54
|
+
' to install xpk prerequisites.'
|
|
55
|
+
),
|
|
56
|
+
},
|
|
57
|
+
'kueuectl': {
|
|
58
|
+
'command': 'kubectl kueue --help',
|
|
59
|
+
'message': (
|
|
60
|
+
'`kueuectl` not installed. Please follow'
|
|
61
|
+
' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
|
|
62
|
+
' to install xpk prerequisites.'
|
|
63
|
+
),
|
|
64
|
+
},
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def validate_dependencies():
|
|
69
|
+
deps_version = xpk_cfg.get(DEPENDENCIES_KEY)
|
|
70
|
+
xpk_version = get_xpk_version()
|
|
71
|
+
if deps_version is None or deps_version != xpk_version:
|
|
72
|
+
for name, check in validation_commands.items():
|
|
73
|
+
cmd, message = check['command'], check['message']
|
|
74
|
+
code, _ = run_command_for_value(
|
|
75
|
+
cmd, f'Validate {name} installation.', None
|
|
76
|
+
)
|
|
77
|
+
if code != 0:
|
|
78
|
+
xpk_print(message)
|
|
79
|
+
xpk_exit(code)
|
|
80
|
+
xpk_cfg.set(DEPENDENCIES_KEY, get_xpk_version())
|