xpk 0.6.0__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. xpk/api/__init__.py +15 -0
  2. xpk/api/storage_crd.yaml +52 -0
  3. xpk/commands/batch.py +27 -5
  4. xpk/commands/cluster.py +104 -80
  5. xpk/commands/cluster_gcluster.py +94 -10
  6. xpk/commands/common.py +44 -0
  7. xpk/commands/config.py +29 -0
  8. xpk/commands/info.py +8 -10
  9. xpk/commands/inspector.py +5 -11
  10. xpk/commands/job.py +9 -7
  11. xpk/commands/kind.py +34 -4
  12. xpk/commands/kjob_common.py +44 -0
  13. xpk/commands/run.py +128 -0
  14. xpk/commands/shell.py +27 -7
  15. xpk/commands/storage.py +280 -0
  16. xpk/commands/version.py +6 -18
  17. xpk/commands/workload.py +381 -184
  18. xpk/core/blueprint/blueprint_definitions.py +1 -0
  19. xpk/core/blueprint/blueprint_generator.py +132 -76
  20. xpk/core/capacity.py +185 -0
  21. xpk/core/cluster.py +564 -0
  22. xpk/core/cluster_private.py +6 -3
  23. xpk/core/commands.py +18 -14
  24. xpk/core/config.py +179 -0
  25. xpk/core/docker_container.py +225 -0
  26. xpk/core/docker_image.py +210 -0
  27. xpk/core/docker_resources.py +350 -0
  28. xpk/core/filestore.py +251 -0
  29. xpk/core/gcloud_context.py +196 -0
  30. xpk/core/gcluster_manager.py +20 -2
  31. xpk/core/gcsfuse.py +50 -0
  32. xpk/core/kjob.py +257 -18
  33. xpk/core/kueue.py +12 -6
  34. xpk/core/monitoring.py +134 -0
  35. xpk/core/nap.py +32 -20
  36. xpk/core/network.py +377 -0
  37. xpk/core/nodepool.py +581 -0
  38. xpk/core/pathways.py +124 -45
  39. xpk/core/remote_state/__init__.py +15 -0
  40. xpk/core/remote_state/fuse_remote_state.py +99 -0
  41. xpk/core/remote_state/remote_state_client.py +38 -0
  42. xpk/core/resources.py +238 -0
  43. xpk/core/scheduling.py +253 -0
  44. xpk/core/storage.py +581 -0
  45. xpk/core/system_characteristics.py +38 -1
  46. xpk/core/vertex.py +105 -0
  47. xpk/core/workload.py +209 -1
  48. xpk/core/workload_decorators/rdma_decorator.py +25 -5
  49. xpk/core/workload_decorators/storage_decorator.py +52 -0
  50. xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
  51. xpk/main.py +3 -1
  52. xpk/parser/batch.py +10 -151
  53. xpk/parser/cluster.py +49 -8
  54. xpk/parser/common.py +189 -1
  55. xpk/parser/config.py +49 -0
  56. xpk/parser/core.py +27 -1
  57. xpk/parser/info.py +2 -1
  58. xpk/parser/inspector.py +3 -3
  59. xpk/parser/job.py +25 -4
  60. xpk/parser/kind.py +3 -2
  61. xpk/parser/run.py +47 -0
  62. xpk/parser/shell.py +10 -1
  63. xpk/parser/storage.py +326 -0
  64. xpk/parser/validators.py +3 -3
  65. xpk/parser/workload.py +118 -76
  66. xpk/templates/__init__.py +15 -0
  67. xpk/templates/storage.yaml +13 -0
  68. xpk/utils/gcs_utils.py +125 -0
  69. xpk/utils/kubectl.py +57 -0
  70. xpk/utils/objects.py +8 -5
  71. xpk/utils/templates.py +28 -0
  72. xpk/utils/validation.py +80 -0
  73. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/METADATA +169 -15
  74. xpk-0.7.1.dist-info/RECORD +92 -0
  75. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/WHEEL +1 -1
  76. xpk/core/core.py +0 -2824
  77. xpk-0.6.0.dist-info/RECORD +0 -57
  78. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/entry_points.txt +0 -0
  79. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info/licenses}/LICENSE +0 -0
  80. {xpk-0.6.0.dist-info → xpk-0.7.1.dist-info}/top_level.txt +0 -0
xpk/commands/info.py CHANGED
@@ -14,19 +14,17 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- from ..utils.console import xpk_exit, xpk_print
18
- from ..core.kueue import verify_kueuectl
19
- from .cluster import set_cluster_command
20
- from ..core.commands import (
21
- run_command_for_value,
22
- )
23
- from ..core.core import (
24
- add_zone_and_project,
25
- )
26
17
  import json
27
- from tabulate import tabulate
28
18
  from argparse import Namespace
29
19
 
20
+ from tabulate import tabulate
21
+
22
+ from ..core.commands import run_command_for_value
23
+ from ..core.gcloud_context import add_zone_and_project
24
+ from ..core.kueue import verify_kueuectl
25
+ from ..utils.console import xpk_exit, xpk_print
26
+ from .common import set_cluster_command
27
+
30
28
  table_fmt = 'plain'
31
29
 
32
30
 
xpk/commands/inspector.py CHANGED
@@ -14,17 +14,13 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
+ from ..core.cluster import get_cluster_credentials
17
18
  from ..core.commands import run_command_for_value
18
- from ..core.core import (
19
- CLUSTER_METADATA_CONFIGMAP,
20
- CLUSTER_RESOURCES_CONFIGMAP,
21
- add_zone_and_project,
22
- zone_to_region,
23
- )
19
+ from ..core.gcloud_context import add_zone_and_project, zone_to_region
24
20
  from ..core.kueue import CLUSTER_QUEUE_NAME, LOCAL_QUEUE_NAME
25
- from ..utils.file import append_tmp_file, write_tmp_file
21
+ from ..core.resources import CLUSTER_METADATA_CONFIGMAP, CLUSTER_RESOURCES_CONFIGMAP
26
22
  from ..utils.console import xpk_exit, xpk_print
27
- from .cluster import set_cluster_command
23
+ from ..utils.file import append_tmp_file, write_tmp_file
28
24
  from .workload import get_workload_list
29
25
 
30
26
 
@@ -125,9 +121,7 @@ def inspector(args) -> None:
125
121
  xpk_print(args)
126
122
 
127
123
  add_zone_and_project(args)
128
- set_cluster_command_code = set_cluster_command(args)
129
- if set_cluster_command_code != 0:
130
- xpk_exit(set_cluster_command_code)
124
+ get_cluster_credentials(args)
131
125
 
132
126
  inspector_file = write_tmp_file(
133
127
  '==================\nXPK inspector OUTPUT:\n==================\n'
xpk/commands/job.py CHANGED
@@ -14,16 +14,18 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- from .cluster import set_cluster_command
18
- from .kind import set_local_cluster_command
19
- from ..core.commands import run_command_for_value, run_command_with_updates
20
- from ..utils.console import xpk_exit, xpk_print
21
- from ..core.kjob import AppProfileDefaults
22
- from ..core.core import add_zone_and_project
23
- from ruamel.yaml import YAML
24
17
  import re
25
18
  import sys
26
19
 
20
+ from ruamel.yaml import YAML
21
+
22
+ from ..core.commands import run_command_for_value, run_command_with_updates
23
+ from ..core.gcloud_context import add_zone_and_project
24
+ from ..core.kjob import AppProfileDefaults
25
+ from ..utils.console import xpk_exit, xpk_print
26
+ from .common import set_cluster_command
27
+ from .kind import set_local_cluster_command
28
+
27
29
 
28
30
  def job_info(args):
29
31
  """Run commands obtaining information about a job given by name.
xpk/commands/kind.py CHANGED
@@ -18,9 +18,7 @@ from ..core.commands import (
18
18
  run_command_for_value,
19
19
  run_command_with_updates,
20
20
  )
21
- from ..core.core import (
22
- set_jobset_on_cluster,
23
- )
21
+ from ..core.cluster import set_jobset_on_cluster, setup_k8s_env
24
22
  from ..core.kjob import (
25
23
  verify_kjob_installed,
26
24
  prepare_kjob,
@@ -28,6 +26,13 @@ from ..core.kjob import (
28
26
  )
29
27
  from ..core.kueue import (
30
28
  install_kueue_on_cluster,
29
+ install_kueue_crs,
30
+ wait_for_kueue_available,
31
+ )
32
+ from ..core.storage import install_storage_crd
33
+ from ..core.system_characteristics import (
34
+ SystemCharacteristics,
35
+ AcceleratorType,
31
36
  )
32
37
  from ..utils.console import (xpk_exit, xpk_print)
33
38
 
@@ -74,11 +79,36 @@ def cluster_create(args) -> None:
74
79
  if err_code > 0:
75
80
  xpk_exit(err_code)
76
81
 
77
- xpk_print('Preparing kjob')
82
+ args.kind_cluster = True
78
83
  err_code = prepare_kjob(args)
79
84
  if err_code > 0:
80
85
  xpk_exit(err_code)
81
86
 
87
+ k8s_client = setup_k8s_env(args)
88
+ install_storage_crd(k8s_client)
89
+
90
+ xpk_print('Wait for Kueue to be fully available')
91
+ wait_for_kueue_available_code = wait_for_kueue_available(args)
92
+ if wait_for_kueue_available_code != 0:
93
+ xpk_exit(wait_for_kueue_available_code)
94
+
95
+ args.num_slices = 1
96
+ args.enable_pathways = False
97
+ system = SystemCharacteristics(
98
+ 'N/A',
99
+ 1,
100
+ 'N/A',
101
+ 'N/A',
102
+ 1,
103
+ AcceleratorType['CPU'],
104
+ 'kind',
105
+ )
106
+
107
+ xpk_print('Install Kueue Custom Resources')
108
+ enable_kueue_credentials_code = install_kueue_crs(args, system, None)
109
+ if enable_kueue_credentials_code != 0:
110
+ xpk_exit(enable_kueue_credentials_code)
111
+
82
112
  xpk_print('Kind commands done! Resources are created.')
83
113
  xpk_exit(0)
84
114
 
@@ -0,0 +1,44 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.kjob import get_a3mega_pod_template_annotations, get_a3ultra_pod_template_annotations
18
+ from ..core.capacity import H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE
19
+ from ..core.cluster import get_gpu_type_from_cluster
20
+
21
+
22
+ def add_tcpxo_annotations(args, cmd: str) -> str:
23
+ tcpxo, interfaces, eth0 = get_a3mega_pod_template_annotations(args)
24
+ cmd += f" --pod-template-annotation {tcpxo} \\\n"
25
+ cmd += f" --pod-template-annotation {eth0} \\\n"
26
+ cmd += f" --pod-template-annotation {interfaces} "
27
+ return cmd
28
+
29
+
30
+ def add_rdma_annotations(args, cmd) -> str:
31
+ eth0, interfaces = get_a3ultra_pod_template_annotations(args)
32
+ cmd += f" --pod-template-annotation {eth0} \\\n"
33
+ cmd += f" --pod-template-annotation {interfaces} \\\n"
34
+ return cmd
35
+
36
+
37
+ def add_gpu_networking_annotations_to_command(args, cmd: str) -> str:
38
+ gpu_type = get_gpu_type_from_cluster(args)
39
+
40
+ if gpu_type == H100_MEGA_DEVICE_TYPE:
41
+ return add_tcpxo_annotations(args, cmd)
42
+ if gpu_type == H200_DEVICE_TYPE:
43
+ return add_rdma_annotations(args, cmd)
44
+ return cmd
xpk/commands/run.py ADDED
@@ -0,0 +1,128 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from argparse import Namespace
18
+
19
+ from ..core.cluster import create_xpk_k8s_service_account
20
+ from ..core.commands import run_command_with_full_controls
21
+ from ..core.gcloud_context import add_zone_and_project
22
+ from ..core.kueue import LOCAL_QUEUE_NAME
23
+ from ..utils.console import xpk_exit, xpk_print
24
+ from .common import set_cluster_command
25
+ from ..core.kjob import JobTemplateDefaults, AppProfileDefaults, prepare_kjob, Kueue_TAS_annotation, get_gcsfuse_annotation
26
+ from .kjob_common import add_gpu_networking_annotations_to_command
27
+ from .kind import set_local_cluster_command
28
+
29
+
30
+ def run(args: Namespace) -> None:
31
+ """Run task.
32
+ This function runs passed script in non-blocking manner.
33
+ Args:
34
+ args: user provided arguments for running the command.
35
+ Returns:
36
+ None
37
+ """
38
+ if not args.kind_cluster:
39
+ add_zone_and_project(args)
40
+ set_cluster_command_code = set_cluster_command(args)
41
+ else:
42
+ set_cluster_command_code = set_local_cluster_command(args)
43
+
44
+ if set_cluster_command_code != 0:
45
+ xpk_exit(set_cluster_command_code)
46
+
47
+ err_code = prepare_kjob(args)
48
+ if err_code > 0:
49
+ xpk_exit(err_code)
50
+ create_xpk_k8s_service_account()
51
+
52
+ submit_job(args)
53
+
54
+
55
+ def submit_job(args: Namespace) -> None:
56
+ cmd = (
57
+ 'kubectl kjob create slurm --profile'
58
+ f' {AppProfileDefaults.NAME.value} '
59
+ f' --localqueue {LOCAL_QUEUE_NAME} '
60
+ f" --pod-template-annotation '{Kueue_TAS_annotation}'"
61
+ f' --stream-container {JobTemplateDefaults.CONTAINER_NAME.value}'
62
+ f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
63
+ ' --wait --rm --first-node-ip'
64
+ )
65
+ cmd = add_gpu_networking_annotations_to_command(args, cmd)
66
+
67
+ gcsfuse_annotation = get_gcsfuse_annotation(args)
68
+ if gcsfuse_annotation is not None:
69
+ cmd += f' --pod-template-annotation {gcsfuse_annotation}'
70
+
71
+ if args.timeout:
72
+ cmd += f' --wait-timeout {args.timeout}s'
73
+
74
+ if args.ignore_unknown_flags:
75
+ cmd += ' --ignore-unknown-flags'
76
+
77
+ cmd += f' -- {args.script} --partition {LOCAL_QUEUE_NAME}'
78
+
79
+ if args.array is not None:
80
+ cmd += f' --array {args.array}'
81
+
82
+ if args.cpus_per_task is not None:
83
+ cmd += f' --cpus-per-task {args.cpus_per_task}'
84
+
85
+ if args.gpus_per_task is not None:
86
+ cmd += f' --gpus-per-task {args.gpus_per_task}'
87
+
88
+ if args.mem is not None:
89
+ cmd += f' --mem {args.mem}'
90
+
91
+ if args.mem_per_task is not None:
92
+ cmd += f' --mem-per-task {args.mem_per_task}'
93
+
94
+ if args.mem_per_cpu is not None:
95
+ cmd += f' --mem-per-cpu {args.mem_per_cpu}'
96
+
97
+ if args.mem_per_gpu is not None:
98
+ cmd += f' --mem-per-gpu {args.mem_per_gpu}'
99
+
100
+ if args.nodes is not None:
101
+ cmd += f' --nodes {args.nodes}'
102
+
103
+ if args.ntasks is not None:
104
+ cmd += f' --ntasks {args.ntasks}'
105
+
106
+ if args.output is not None:
107
+ cmd += f' --output {args.output}'
108
+
109
+ if args.error is not None:
110
+ cmd += f' --error {args.error}'
111
+
112
+ if args.input is not None:
113
+ cmd += f' --input {args.input}'
114
+
115
+ if args.job_name is not None:
116
+ cmd += f' --job-name {args.job_name}'
117
+
118
+ if args.chdir is not None:
119
+ cmd += f' --chdir {args.chdir}'
120
+
121
+ if args.time is not None:
122
+ cmd += f' --time {args.time}'
123
+
124
+ return_code = run_command_with_full_controls(cmd, 'run task', args)
125
+
126
+ if return_code != 0:
127
+ xpk_print(f'Running task returned ERROR {return_code}')
128
+ xpk_exit(return_code)
xpk/commands/shell.py CHANGED
@@ -12,11 +12,16 @@ limitations under the License.
12
12
  """
13
13
 
14
14
  from ..core.commands import run_command_with_full_controls, run_command_for_value, run_command_with_updates
15
+ from ..core.cluster import get_cluster_credentials, add_zone_and_project, create_xpk_k8s_service_account
15
16
  from ..utils.console import xpk_exit, xpk_print
16
17
  from argparse import Namespace
17
18
 
18
- from ..core.kjob import AppProfileDefaults, PodTemplateDefaults
19
-
19
+ from ..core.kjob import (
20
+ AppProfileDefaults,
21
+ prepare_kjob,
22
+ get_pod_template_interactive_command,
23
+ get_gcsfuse_annotation,
24
+ )
20
25
 
21
26
  exit_instructions = 'To exit the shell input "exit".'
22
27
 
@@ -45,6 +50,10 @@ def shell(args: Namespace):
45
50
 
46
51
 
47
52
  def get_existing_shell_pod_name(args: Namespace) -> str | None:
53
+ if not args.kind_cluster:
54
+ add_zone_and_project(args)
55
+ get_cluster_credentials(args)
56
+
48
57
  return_code, shell_name = run_command_for_value(
49
58
  command=(
50
59
  'kubectl get pods --no-headers --field-selector status.phase=Running'
@@ -70,11 +79,22 @@ def get_existing_shell_pod_name(args: Namespace) -> str | None:
70
79
 
71
80
 
72
81
  def connect_to_new_interactive_shell(args: Namespace) -> int:
82
+ err_code = prepare_kjob(args)
83
+ if err_code > 0:
84
+ xpk_exit(err_code)
85
+ create_xpk_k8s_service_account()
86
+
87
+ cmd = (
88
+ 'kubectl-kjob create interactive --profile'
89
+ f' {AppProfileDefaults.NAME.value} --pod-running-timeout 180s'
90
+ )
91
+
92
+ gcsfuse_annotation = get_gcsfuse_annotation(args)
93
+ if gcsfuse_annotation is not None:
94
+ cmd += f' --pod-template-annotation {gcsfuse_annotation}'
95
+
73
96
  return run_command_with_full_controls(
74
- command=(
75
- 'kubectl-kjob create interactive --profile'
76
- f' {AppProfileDefaults.NAME.value} --pod-running-timeout 30s'
77
- ),
97
+ command=cmd,
78
98
  task='Creating new interactive shell and entering it',
79
99
  global_args=args,
80
100
  instructions=exit_instructions,
@@ -87,7 +107,7 @@ def connect_to_existing_interactive_shell(
87
107
  return run_command_with_full_controls(
88
108
  command=(
89
109
  f'kubectl exec --stdin --tty {pod_name} --'
90
- f' {PodTemplateDefaults.INTERACTIVE_COMMAND.value}'
110
+ f' {get_pod_template_interactive_command()}'
91
111
  ),
92
112
  task='Entering existing interactive shell',
93
113
  global_args=args,
@@ -0,0 +1,280 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from argparse import Namespace
18
+
19
+ import yaml
20
+ from kubernetes import client as k8s_client
21
+ from kubernetes.client import ApiClient
22
+ from kubernetes.client.rest import ApiException
23
+
24
+ from ..core import gcsfuse
25
+ from ..core.cluster import (
26
+ DEFAULT_NAMESPACE,
27
+ add_zone_and_project,
28
+ get_cluster_network,
29
+ setup_k8s_env,
30
+ update_cluster_with_gcpfilestore_driver_if_necessary,
31
+ update_cluster_with_gcsfuse_driver_if_necessary,
32
+ update_cluster_with_workload_identity_if_necessary,
33
+ )
34
+ from ..core.filestore import FilestoreClient, get_storage_class_name
35
+ from ..core.kjob import (
36
+ KJOB_API_GROUP_NAME,
37
+ KJOB_API_GROUP_VERSION,
38
+ KJOB_API_VOLUME_BUNDLE_PLURAL,
39
+ create_volume_bundle_instance,
40
+ )
41
+ from ..core.storage import (
42
+ GCP_FILESTORE_TYPE,
43
+ GCS_FUSE_TYPE,
44
+ STORAGE_CRD_PLURAL,
45
+ XPK_API_GROUP_NAME,
46
+ XPK_API_GROUP_VERSION,
47
+ Storage,
48
+ create_storage_crds,
49
+ get_storage,
50
+ list_storages,
51
+ print_storages_for_cluster,
52
+ )
53
+ from ..utils.console import get_user_input, xpk_exit, xpk_print
54
+ from ..utils.kubectl import apply_kubectl_manifest
55
+
56
+
57
+ def storage_create(args: Namespace) -> None:
58
+ add_zone_and_project(args)
59
+ if args.type == GCP_FILESTORE_TYPE:
60
+ if args.instance is None:
61
+ args.instance = args.name
62
+
63
+ filestore_client = FilestoreClient(args.zone, args.instance, args.project)
64
+ filestore_exists = filestore_client.check_instance_exists()
65
+ if filestore_exists:
66
+ xpk_print(f"Filestore instance {args.instance} already exists.")
67
+ xpk_exit(1)
68
+ filestore_network = get_cluster_network(args)
69
+ xpk_print(
70
+ f"Creating Filestore instance {args.instance} in network:"
71
+ f" {filestore_network}"
72
+ )
73
+ filestore_client.create_instance(
74
+ vol=args.vol, size=args.size, tier=args.tier, network=filestore_network
75
+ )
76
+ if args.manifest is not None:
77
+ with open(args.manifest, "r", encoding="utf-8") as f:
78
+ manifest = list(yaml.safe_load_all(f))
79
+ else:
80
+ manifest = filestore_client.manifest(
81
+ args.name, args.vol, args.access_mode, filestore_network
82
+ )
83
+
84
+ k8s_api_client = setup_k8s_env(args)
85
+ create_storage_crds(k8s_api_client, args, manifest)
86
+ create_volume_bundle_instance(
87
+ k8s_api_client, args.name, manifest, args.readonly, args.mount_point
88
+ )
89
+ return_code = update_cluster_with_workload_identity_if_necessary(args)
90
+ if return_code > 0:
91
+ xpk_exit(return_code)
92
+ return_code = update_cluster_with_gcpfilestore_driver_if_necessary(args)
93
+ if return_code > 0:
94
+ xpk_exit(return_code)
95
+ apply_kubectl_manifest(k8s_api_client, manifest)
96
+
97
+
98
+ def storage_delete(args: Namespace) -> None:
99
+ add_zone_and_project(args)
100
+ k8s_api_client = setup_k8s_env(args)
101
+ storages = list_storages(k8s_api_client)
102
+ filestore_client = FilestoreClient(args.zone, args.name, args.project)
103
+
104
+ if not filestore_client.check_instance_exists():
105
+ xpk_print(f"Filestore instance {args.name} does not exist.")
106
+ xpk_exit(1)
107
+
108
+ filestore_instance_name = filestore_client.get_instance_fullname()
109
+
110
+ children = [
111
+ storage
112
+ for storage in storages
113
+ if storage.bucket.startswith(filestore_instance_name)
114
+ ]
115
+
116
+ if children and not args.force:
117
+ detach = get_user_input(
118
+ "Deleting a filestore storage will destroy your filestore instance and"
119
+ " all its data in all volumes will be lost. Do you wish to delete the"
120
+ f" filestore instance {filestore_instance_name}?\n y (yes) / n (no):\n'"
121
+ )
122
+ if not detach:
123
+ xpk_print("Deleting storage canceled.")
124
+ xpk_exit(0)
125
+
126
+ for child in children:
127
+ delete_storage_resources(k8s_api_client, child)
128
+
129
+ filestore_client.delete_filestore_instance()
130
+
131
+
132
+ def storage_attach(args: Namespace) -> None:
133
+ add_zone_and_project(args)
134
+ if args.type == GCP_FILESTORE_TYPE:
135
+ if args.instance is None:
136
+ args.instance = args.name
137
+
138
+ filestore_client = FilestoreClient(args.zone, args.instance, args.project)
139
+
140
+ filestore_exists = filestore_client.check_instance_exists()
141
+ if not filestore_exists:
142
+ xpk_print(f"Filestore instance {args.instance} does not exists.")
143
+ xpk_exit(1)
144
+
145
+ if args.manifest is not None:
146
+ with open(args.manifest, "r", encoding="utf-8") as f:
147
+ manifest = list(yaml.safe_load_all(f))
148
+ else:
149
+ filestore_network = get_cluster_network(args)
150
+ manifest = filestore_client.manifest(
151
+ args.name, args.vol, args.access_mode, filestore_network
152
+ )
153
+
154
+ else: # args.type == GCS_FUSE_TYPE:
155
+ if args.manifest is None and args.size is None:
156
+ xpk_print("--size is required when attaching gcsfuse storage.")
157
+ xpk_exit(1)
158
+
159
+ if args.bucket is None:
160
+ args.bucket = args.name
161
+
162
+ if args.manifest is not None:
163
+ with open(args.manifest, "r", encoding="utf-8") as f:
164
+ manifest = list(yaml.safe_load_all(f))
165
+ else:
166
+ manifest = gcsfuse.manifest(
167
+ name=args.name, bucket=args.bucket, size=args.size
168
+ )
169
+
170
+ k8s_api_client = setup_k8s_env(args)
171
+ create_storage_crds(k8s_api_client, args, manifest)
172
+ create_volume_bundle_instance(
173
+ k8s_api_client, args.name, manifest, args.readonly, args.mount_point
174
+ )
175
+ return_code = update_cluster_with_workload_identity_if_necessary(args)
176
+ if return_code > 0:
177
+ xpk_exit(return_code)
178
+
179
+ # args.type can have only two values after parsing
180
+ return_code = (
181
+ update_cluster_with_gcsfuse_driver_if_necessary(args)
182
+ if args.type == GCS_FUSE_TYPE
183
+ else update_cluster_with_gcpfilestore_driver_if_necessary(args)
184
+ )
185
+ if return_code > 0:
186
+ xpk_exit(return_code)
187
+
188
+ apply_kubectl_manifest(k8s_api_client, manifest)
189
+
190
+
191
+ def storage_list(args: Namespace) -> None:
192
+ k8s_api_client = setup_k8s_env(args)
193
+ storages = list_storages(k8s_api_client)
194
+ print_storages_for_cluster(storages)
195
+
196
+
197
+ def storage_detach(args: Namespace) -> None:
198
+ k8s_api_client = setup_k8s_env(args)
199
+ storage = get_storage(k8s_api_client, args.name)
200
+ delete_storage_resources(k8s_api_client, storage)
201
+
202
+
203
+ def delete_resource(api_call, resource_name: str, resource_kind: str) -> None:
204
+ """
205
+ Deletes a Kubernetes resource and handles potential API exceptions.
206
+
207
+ Args:
208
+ api_call: The function to call for deleting the resource.
209
+ resource_name: The name of the resource to delete.
210
+ resource_type: The type of the resource (e.g., "Persistent Volume Claim").
211
+ """
212
+ xpk_print(f"Deleting {resource_kind}:{resource_name}")
213
+ try:
214
+ api_call(resource_name)
215
+ except ApiException as e:
216
+ if e.status == 404:
217
+ xpk_print(
218
+ f"{resource_kind}: {resource_name} not found. "
219
+ f"Might be already deleted. Error: {e}"
220
+ )
221
+ return
222
+ else:
223
+ xpk_print(f"Encountered error during {resource_kind} deletion: {e}")
224
+ xpk_exit(1)
225
+ xpk_print(f"Deleted {resource_kind}:{resource_name}")
226
+
227
+
228
+ def delete_storage_resources(k8s_api_client: ApiClient, storage: Storage):
229
+ """
230
+ Deletes storage PV, PVC, SC and custom resources (if they exist).
231
+
232
+ Args:
233
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
234
+ storage: Storage to delete
235
+ """
236
+ api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
237
+ core_api = k8s_client.CoreV1Api()
238
+ storage_api = k8s_client.StorageV1Api()
239
+
240
+ delete_resource(
241
+ lambda name: core_api.delete_namespaced_persistent_volume_claim(
242
+ name, "default"
243
+ ),
244
+ storage.pvc,
245
+ "Persistent Volume Claim",
246
+ )
247
+
248
+ delete_resource(
249
+ core_api.delete_persistent_volume, storage.pv, "Persistent Volume"
250
+ )
251
+
252
+ if storage.type == GCP_FILESTORE_TYPE:
253
+ delete_resource(
254
+ storage_api.delete_storage_class,
255
+ get_storage_class_name(storage.name),
256
+ "Storage Class",
257
+ )
258
+
259
+ delete_resource(
260
+ lambda name: api_instance.delete_namespaced_custom_object(
261
+ namespace=DEFAULT_NAMESPACE,
262
+ name=name,
263
+ group=KJOB_API_GROUP_NAME,
264
+ version=KJOB_API_GROUP_VERSION,
265
+ plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
266
+ ),
267
+ storage.name,
268
+ "VolumeBundle",
269
+ )
270
+
271
+ delete_resource(
272
+ lambda name: api_instance.delete_cluster_custom_object(
273
+ name=name,
274
+ group=XPK_API_GROUP_NAME,
275
+ version=XPK_API_GROUP_VERSION,
276
+ plural=STORAGE_CRD_PLURAL,
277
+ ),
278
+ storage.name,
279
+ "Storage",
280
+ )
xpk/commands/version.py CHANGED
@@ -14,26 +14,14 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- from argparse import Namespace
18
- import os
17
+ from ..core.config import __version__
18
+ from ..utils.console import xpk_print
19
19
 
20
- from ..core.commands import run_command_for_value
21
20
 
22
- XPK_VERSION = 'v0.6.0'
21
+ def get_xpk_version() -> str:
22
+ return __version__
23
23
 
24
- from ..utils.console import xpk_exit, xpk_print
25
24
 
26
-
27
- def version(args: Namespace) -> None:
25
+ def version(args) -> None: # pylint: disable=unused-argument
28
26
  """Get version of xpk."""
29
- xpk_print('xpk_version:', XPK_VERSION)
30
- if os.path.exists(os.path.join(os.getcwd(), '.git')):
31
- code, xpk_version = run_command_for_value(
32
- 'git rev-parse HEAD',
33
- task='Get latest hash',
34
- global_args=args,
35
- quiet=True,
36
- )
37
- if code != 0:
38
- xpk_exit(code)
39
- xpk_print('git commit:', xpk_version.strip('\n'))
27
+ xpk_print('xpk_version:', __version__)