xpk 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/__init__.py +15 -0
- xpk/api/__init__.py +15 -0
- xpk/api/storage_crd.yaml +52 -0
- xpk/commands/__init__.py +15 -0
- xpk/commands/batch.py +131 -0
- xpk/commands/cluster.py +808 -0
- xpk/commands/cluster_gcluster.py +269 -0
- xpk/commands/common.py +44 -0
- xpk/commands/config.py +29 -0
- xpk/commands/info.py +243 -0
- xpk/commands/inspector.py +357 -0
- xpk/commands/job.py +199 -0
- xpk/commands/kind.py +283 -0
- xpk/commands/kjob_common.py +44 -0
- xpk/commands/run.py +128 -0
- xpk/commands/shell.py +140 -0
- xpk/commands/storage.py +267 -0
- xpk/commands/version.py +27 -0
- xpk/commands/workload.py +889 -0
- xpk/core/__init__.py +15 -0
- xpk/core/blueprint/__init__.py +15 -0
- xpk/core/blueprint/blueprint_definitions.py +62 -0
- xpk/core/blueprint/blueprint_generator.py +708 -0
- xpk/core/capacity.py +185 -0
- xpk/core/cluster.py +564 -0
- xpk/core/cluster_private.py +200 -0
- xpk/core/commands.py +356 -0
- xpk/core/config.py +179 -0
- xpk/core/docker_container.py +225 -0
- xpk/core/docker_image.py +210 -0
- xpk/core/docker_manager.py +308 -0
- xpk/core/docker_resources.py +350 -0
- xpk/core/filestore.py +251 -0
- xpk/core/gcloud_context.py +196 -0
- xpk/core/gcluster_manager.py +176 -0
- xpk/core/gcsfuse.py +50 -0
- xpk/core/kjob.py +444 -0
- xpk/core/kueue.py +358 -0
- xpk/core/monitoring.py +134 -0
- xpk/core/nap.py +361 -0
- xpk/core/network.py +377 -0
- xpk/core/nodepool.py +581 -0
- xpk/core/pathways.py +377 -0
- xpk/core/ray.py +222 -0
- xpk/core/remote_state/__init__.py +15 -0
- xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk/core/remote_state/remote_state_client.py +38 -0
- xpk/core/resources.py +238 -0
- xpk/core/scheduling.py +253 -0
- xpk/core/storage.py +581 -0
- xpk/core/system_characteristics.py +1432 -0
- xpk/core/vertex.py +105 -0
- xpk/core/workload.py +341 -0
- xpk/core/workload_decorators/__init__.py +15 -0
- xpk/core/workload_decorators/rdma_decorator.py +129 -0
- xpk/core/workload_decorators/storage_decorator.py +52 -0
- xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
- xpk/main.py +75 -0
- xpk/parser/__init__.py +15 -0
- xpk/parser/batch.py +43 -0
- xpk/parser/cluster.py +662 -0
- xpk/parser/common.py +259 -0
- xpk/parser/config.py +49 -0
- xpk/parser/core.py +135 -0
- xpk/parser/info.py +64 -0
- xpk/parser/inspector.py +65 -0
- xpk/parser/job.py +147 -0
- xpk/parser/kind.py +95 -0
- xpk/parser/run.py +47 -0
- xpk/parser/shell.py +59 -0
- xpk/parser/storage.py +316 -0
- xpk/parser/validators.py +39 -0
- xpk/parser/version.py +23 -0
- xpk/parser/workload.py +726 -0
- xpk/templates/__init__.py +15 -0
- xpk/templates/storage.yaml +13 -0
- xpk/utils/__init__.py +15 -0
- xpk/utils/console.py +55 -0
- xpk/utils/file.py +82 -0
- xpk/utils/gcs_utils.py +125 -0
- xpk/utils/kubectl.py +57 -0
- xpk/utils/network.py +168 -0
- xpk/utils/objects.py +88 -0
- xpk/utils/templates.py +28 -0
- xpk/utils/validation.py +80 -0
- xpk/utils/yaml.py +30 -0
- {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/METADATA +456 -32
- xpk-0.7.0.dist-info/RECORD +92 -0
- {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/WHEEL +1 -1
- xpk-0.7.0.dist-info/entry_points.txt +2 -0
- xpk-0.5.0.dist-info/RECORD +0 -7
- xpk-0.5.0.dist-info/entry_points.txt +0 -2
- xpk.py +0 -7282
- {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/LICENSE +0 -0
- {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/top_level.txt +0 -0
xpk/commands/storage.py
ADDED
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from argparse import Namespace
|
|
18
|
+
|
|
19
|
+
from kubernetes import client as k8s_client
|
|
20
|
+
from kubernetes.client import ApiClient
|
|
21
|
+
from kubernetes.client.rest import ApiException
|
|
22
|
+
|
|
23
|
+
from ..core import gcsfuse
|
|
24
|
+
from ..core.cluster import (
|
|
25
|
+
DEFAULT_NAMESPACE,
|
|
26
|
+
add_zone_and_project,
|
|
27
|
+
get_cluster_network,
|
|
28
|
+
setup_k8s_env,
|
|
29
|
+
update_cluster_with_gcpfilestore_driver_if_necessary,
|
|
30
|
+
update_cluster_with_gcsfuse_driver_if_necessary,
|
|
31
|
+
update_cluster_with_workload_identity_if_necessary,
|
|
32
|
+
)
|
|
33
|
+
from ..core.filestore import FilestoreClient, get_storage_class_name
|
|
34
|
+
from ..core.kjob import (
|
|
35
|
+
KJOB_API_GROUP_NAME,
|
|
36
|
+
KJOB_API_GROUP_VERSION,
|
|
37
|
+
KJOB_API_VOLUME_BUNDLE_PLURAL,
|
|
38
|
+
create_volume_bundle_instance,
|
|
39
|
+
)
|
|
40
|
+
from ..core.storage import (
|
|
41
|
+
GCP_FILESTORE_TYPE,
|
|
42
|
+
GCS_FUSE_TYPE,
|
|
43
|
+
STORAGE_CRD_PLURAL,
|
|
44
|
+
XPK_API_GROUP_NAME,
|
|
45
|
+
XPK_API_GROUP_VERSION,
|
|
46
|
+
Storage,
|
|
47
|
+
create_storage_crds,
|
|
48
|
+
get_storage,
|
|
49
|
+
list_storages,
|
|
50
|
+
print_storages_for_cluster,
|
|
51
|
+
)
|
|
52
|
+
from ..utils.console import get_user_input, xpk_exit, xpk_print
|
|
53
|
+
from ..utils.kubectl import apply_kubectl_manifest
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def storage_create(args: Namespace) -> None:
|
|
57
|
+
add_zone_and_project(args)
|
|
58
|
+
if args.type == GCP_FILESTORE_TYPE:
|
|
59
|
+
if args.instance is None:
|
|
60
|
+
args.instance = args.name
|
|
61
|
+
|
|
62
|
+
filestore_client = FilestoreClient(args.zone, args.instance, args.project)
|
|
63
|
+
filestore_exists = filestore_client.check_instance_exists()
|
|
64
|
+
if filestore_exists:
|
|
65
|
+
xpk_print(f"Filestore instance {args.instance} already exists.")
|
|
66
|
+
xpk_exit(1)
|
|
67
|
+
filestore_network = get_cluster_network(args)
|
|
68
|
+
xpk_print(
|
|
69
|
+
f"Creating Filestore instance {args.instance} in network:"
|
|
70
|
+
f" {filestore_network}"
|
|
71
|
+
)
|
|
72
|
+
filestore_client.create_instance(
|
|
73
|
+
vol=args.vol, size=args.size, tier=args.tier, network=filestore_network
|
|
74
|
+
)
|
|
75
|
+
manifest = filestore_client.manifest(
|
|
76
|
+
args.name, args.vol, args.access_mode, filestore_network
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
k8s_api_client = setup_k8s_env(args)
|
|
80
|
+
create_storage_crds(k8s_api_client, args, manifest)
|
|
81
|
+
create_volume_bundle_instance(
|
|
82
|
+
k8s_api_client, args.name, manifest, args.readonly, args.mount_point
|
|
83
|
+
)
|
|
84
|
+
return_code = update_cluster_with_workload_identity_if_necessary(args)
|
|
85
|
+
if return_code > 0:
|
|
86
|
+
xpk_exit(return_code)
|
|
87
|
+
return_code = update_cluster_with_gcpfilestore_driver_if_necessary(args)
|
|
88
|
+
if return_code > 0:
|
|
89
|
+
xpk_exit(return_code)
|
|
90
|
+
apply_kubectl_manifest(k8s_api_client, manifest)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def storage_delete(args: Namespace) -> None:
|
|
94
|
+
add_zone_and_project(args)
|
|
95
|
+
k8s_api_client = setup_k8s_env(args)
|
|
96
|
+
storages = list_storages(k8s_api_client)
|
|
97
|
+
filestore_client = FilestoreClient(args.zone, args.name, args.project)
|
|
98
|
+
|
|
99
|
+
if not filestore_client.check_instance_exists():
|
|
100
|
+
xpk_print(f"Filestore instance {args.name} does not exist.")
|
|
101
|
+
xpk_exit(1)
|
|
102
|
+
|
|
103
|
+
filestore_instance_name = filestore_client.get_instance_fullname()
|
|
104
|
+
|
|
105
|
+
children = [
|
|
106
|
+
storage
|
|
107
|
+
for storage in storages
|
|
108
|
+
if storage.bucket.startswith(filestore_instance_name)
|
|
109
|
+
]
|
|
110
|
+
|
|
111
|
+
if children and not args.force:
|
|
112
|
+
detach = get_user_input(
|
|
113
|
+
"Deleting a filestore storage will destroy your filestore instance and"
|
|
114
|
+
" all its data in all volumes will be lost. Do you wish to delete the"
|
|
115
|
+
f" filestore instance {filestore_instance_name}?\n y (yes) / n (no):\n'"
|
|
116
|
+
)
|
|
117
|
+
if not detach:
|
|
118
|
+
xpk_print("Deleting storage canceled.")
|
|
119
|
+
xpk_exit(0)
|
|
120
|
+
|
|
121
|
+
for child in children:
|
|
122
|
+
delete_storage_resources(k8s_api_client, child)
|
|
123
|
+
|
|
124
|
+
filestore_client.delete_filestore_instance()
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def storage_attach(args: Namespace) -> None:
|
|
128
|
+
add_zone_and_project(args)
|
|
129
|
+
if args.type == GCP_FILESTORE_TYPE:
|
|
130
|
+
if args.instance is None:
|
|
131
|
+
args.instance = args.name
|
|
132
|
+
|
|
133
|
+
filestore_client = FilestoreClient(args.zone, args.instance, args.project)
|
|
134
|
+
|
|
135
|
+
filestore_exists = filestore_client.check_instance_exists()
|
|
136
|
+
if not filestore_exists:
|
|
137
|
+
xpk_print(f"Filestore instance {args.instance} does not exists.")
|
|
138
|
+
xpk_exit(1)
|
|
139
|
+
|
|
140
|
+
filestore_network = get_cluster_network(args)
|
|
141
|
+
manifest = filestore_client.manifest(
|
|
142
|
+
args.name, args.vol, args.access_mode, filestore_network
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
else: # args.type == GCS_FUSE_TYPE:
|
|
146
|
+
if args.size is None:
|
|
147
|
+
xpk_print("--size is required when attaching gcsfuse storage.")
|
|
148
|
+
xpk_exit(1)
|
|
149
|
+
|
|
150
|
+
if args.bucket is None:
|
|
151
|
+
args.bucket = args.name
|
|
152
|
+
|
|
153
|
+
manifest = gcsfuse.manifest(
|
|
154
|
+
name=args.name, bucket=args.bucket, size=args.size
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
k8s_api_client = setup_k8s_env(args)
|
|
158
|
+
create_storage_crds(k8s_api_client, args, manifest)
|
|
159
|
+
create_volume_bundle_instance(
|
|
160
|
+
k8s_api_client, args.name, manifest, args.readonly, args.mount_point
|
|
161
|
+
)
|
|
162
|
+
return_code = update_cluster_with_workload_identity_if_necessary(args)
|
|
163
|
+
if return_code > 0:
|
|
164
|
+
xpk_exit(return_code)
|
|
165
|
+
|
|
166
|
+
# args.type can have only two values after parsing
|
|
167
|
+
return_code = (
|
|
168
|
+
update_cluster_with_gcsfuse_driver_if_necessary(args)
|
|
169
|
+
if args.type == GCS_FUSE_TYPE
|
|
170
|
+
else update_cluster_with_gcpfilestore_driver_if_necessary(args)
|
|
171
|
+
)
|
|
172
|
+
if return_code > 0:
|
|
173
|
+
xpk_exit(return_code)
|
|
174
|
+
|
|
175
|
+
apply_kubectl_manifest(k8s_api_client, manifest)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def storage_list(args: Namespace) -> None:
|
|
179
|
+
k8s_api_client = setup_k8s_env(args)
|
|
180
|
+
storages = list_storages(k8s_api_client)
|
|
181
|
+
print_storages_for_cluster(storages)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def storage_detach(args: Namespace) -> None:
|
|
185
|
+
k8s_api_client = setup_k8s_env(args)
|
|
186
|
+
storage = get_storage(k8s_api_client, args.name)
|
|
187
|
+
delete_storage_resources(k8s_api_client, storage)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def delete_resource(api_call, resource_name: str, resource_kind: str) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Deletes a Kubernetes resource and handles potential API exceptions.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
api_call: The function to call for deleting the resource.
|
|
196
|
+
resource_name: The name of the resource to delete.
|
|
197
|
+
resource_type: The type of the resource (e.g., "Persistent Volume Claim").
|
|
198
|
+
"""
|
|
199
|
+
xpk_print(f"Deleting {resource_kind}:{resource_name}")
|
|
200
|
+
try:
|
|
201
|
+
api_call(resource_name)
|
|
202
|
+
except ApiException as e:
|
|
203
|
+
if e.status == 404:
|
|
204
|
+
xpk_print(
|
|
205
|
+
f"{resource_kind}: {resource_name} not found. "
|
|
206
|
+
f"Might be already deleted. Error: {e}"
|
|
207
|
+
)
|
|
208
|
+
return
|
|
209
|
+
else:
|
|
210
|
+
xpk_print(f"Encountered error during {resource_kind} deletion: {e}")
|
|
211
|
+
xpk_exit(1)
|
|
212
|
+
xpk_print(f"Deleted {resource_kind}:{resource_name}")
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def delete_storage_resources(k8s_api_client: ApiClient, storage: Storage):
|
|
216
|
+
"""
|
|
217
|
+
Deletes storage PV, PVC, SC and custom resources (if they exist).
|
|
218
|
+
|
|
219
|
+
Args:
|
|
220
|
+
k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
|
|
221
|
+
storage: Storage to delete
|
|
222
|
+
"""
|
|
223
|
+
api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
|
|
224
|
+
core_api = k8s_client.CoreV1Api()
|
|
225
|
+
storage_api = k8s_client.StorageV1Api()
|
|
226
|
+
|
|
227
|
+
delete_resource(
|
|
228
|
+
lambda name: core_api.delete_namespaced_persistent_volume_claim(
|
|
229
|
+
name, "default"
|
|
230
|
+
),
|
|
231
|
+
storage.pvc,
|
|
232
|
+
"Persistent Volume Claim",
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
delete_resource(
|
|
236
|
+
core_api.delete_persistent_volume, storage.pv, "Persistent Volume"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
if storage.type == GCP_FILESTORE_TYPE:
|
|
240
|
+
delete_resource(
|
|
241
|
+
storage_api.delete_storage_class,
|
|
242
|
+
get_storage_class_name(storage.name),
|
|
243
|
+
"Storage Class",
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
delete_resource(
|
|
247
|
+
lambda name: api_instance.delete_namespaced_custom_object(
|
|
248
|
+
namespace=DEFAULT_NAMESPACE,
|
|
249
|
+
name=name,
|
|
250
|
+
group=KJOB_API_GROUP_NAME,
|
|
251
|
+
version=KJOB_API_GROUP_VERSION,
|
|
252
|
+
plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
|
|
253
|
+
),
|
|
254
|
+
storage.name,
|
|
255
|
+
"VolumeBundle",
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
delete_resource(
|
|
259
|
+
lambda name: api_instance.delete_cluster_custom_object(
|
|
260
|
+
name=name,
|
|
261
|
+
group=XPK_API_GROUP_NAME,
|
|
262
|
+
version=XPK_API_GROUP_VERSION,
|
|
263
|
+
plural=STORAGE_CRD_PLURAL,
|
|
264
|
+
),
|
|
265
|
+
storage.name,
|
|
266
|
+
"Storage",
|
|
267
|
+
)
|
xpk/commands/version.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ..core.config import __version__
|
|
18
|
+
from ..utils.console import xpk_print
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_xpk_version() -> str:
|
|
22
|
+
return __version__
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def version(args) -> None: # pylint: disable=unused-argument
|
|
26
|
+
"""Get version of xpk."""
|
|
27
|
+
xpk_print('xpk_version:', __version__)
|