xpk 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. xpk/__init__.py +15 -0
  2. xpk/api/__init__.py +15 -0
  3. xpk/api/storage_crd.yaml +52 -0
  4. xpk/commands/__init__.py +15 -0
  5. xpk/commands/batch.py +131 -0
  6. xpk/commands/cluster.py +808 -0
  7. xpk/commands/cluster_gcluster.py +269 -0
  8. xpk/commands/common.py +44 -0
  9. xpk/commands/config.py +29 -0
  10. xpk/commands/info.py +243 -0
  11. xpk/commands/inspector.py +357 -0
  12. xpk/commands/job.py +199 -0
  13. xpk/commands/kind.py +283 -0
  14. xpk/commands/kjob_common.py +44 -0
  15. xpk/commands/run.py +128 -0
  16. xpk/commands/shell.py +140 -0
  17. xpk/commands/storage.py +267 -0
  18. xpk/commands/version.py +27 -0
  19. xpk/commands/workload.py +889 -0
  20. xpk/core/__init__.py +15 -0
  21. xpk/core/blueprint/__init__.py +15 -0
  22. xpk/core/blueprint/blueprint_definitions.py +62 -0
  23. xpk/core/blueprint/blueprint_generator.py +708 -0
  24. xpk/core/capacity.py +185 -0
  25. xpk/core/cluster.py +564 -0
  26. xpk/core/cluster_private.py +200 -0
  27. xpk/core/commands.py +356 -0
  28. xpk/core/config.py +179 -0
  29. xpk/core/docker_container.py +225 -0
  30. xpk/core/docker_image.py +210 -0
  31. xpk/core/docker_manager.py +308 -0
  32. xpk/core/docker_resources.py +350 -0
  33. xpk/core/filestore.py +251 -0
  34. xpk/core/gcloud_context.py +196 -0
  35. xpk/core/gcluster_manager.py +176 -0
  36. xpk/core/gcsfuse.py +50 -0
  37. xpk/core/kjob.py +444 -0
  38. xpk/core/kueue.py +358 -0
  39. xpk/core/monitoring.py +134 -0
  40. xpk/core/nap.py +361 -0
  41. xpk/core/network.py +377 -0
  42. xpk/core/nodepool.py +581 -0
  43. xpk/core/pathways.py +377 -0
  44. xpk/core/ray.py +222 -0
  45. xpk/core/remote_state/__init__.py +15 -0
  46. xpk/core/remote_state/fuse_remote_state.py +99 -0
  47. xpk/core/remote_state/remote_state_client.py +38 -0
  48. xpk/core/resources.py +238 -0
  49. xpk/core/scheduling.py +253 -0
  50. xpk/core/storage.py +581 -0
  51. xpk/core/system_characteristics.py +1432 -0
  52. xpk/core/vertex.py +105 -0
  53. xpk/core/workload.py +341 -0
  54. xpk/core/workload_decorators/__init__.py +15 -0
  55. xpk/core/workload_decorators/rdma_decorator.py +129 -0
  56. xpk/core/workload_decorators/storage_decorator.py +52 -0
  57. xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
  58. xpk/main.py +75 -0
  59. xpk/parser/__init__.py +15 -0
  60. xpk/parser/batch.py +43 -0
  61. xpk/parser/cluster.py +662 -0
  62. xpk/parser/common.py +259 -0
  63. xpk/parser/config.py +49 -0
  64. xpk/parser/core.py +135 -0
  65. xpk/parser/info.py +64 -0
  66. xpk/parser/inspector.py +65 -0
  67. xpk/parser/job.py +147 -0
  68. xpk/parser/kind.py +95 -0
  69. xpk/parser/run.py +47 -0
  70. xpk/parser/shell.py +59 -0
  71. xpk/parser/storage.py +316 -0
  72. xpk/parser/validators.py +39 -0
  73. xpk/parser/version.py +23 -0
  74. xpk/parser/workload.py +726 -0
  75. xpk/templates/__init__.py +15 -0
  76. xpk/templates/storage.yaml +13 -0
  77. xpk/utils/__init__.py +15 -0
  78. xpk/utils/console.py +55 -0
  79. xpk/utils/file.py +82 -0
  80. xpk/utils/gcs_utils.py +125 -0
  81. xpk/utils/kubectl.py +57 -0
  82. xpk/utils/network.py +168 -0
  83. xpk/utils/objects.py +88 -0
  84. xpk/utils/templates.py +28 -0
  85. xpk/utils/validation.py +80 -0
  86. xpk/utils/yaml.py +30 -0
  87. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/METADATA +456 -32
  88. xpk-0.7.0.dist-info/RECORD +92 -0
  89. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/WHEEL +1 -1
  90. xpk-0.7.0.dist-info/entry_points.txt +2 -0
  91. xpk-0.5.0.dist-info/RECORD +0 -7
  92. xpk-0.5.0.dist-info/entry_points.txt +0 -2
  93. xpk.py +0 -7282
  94. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/LICENSE +0 -0
  95. {xpk-0.5.0.dist-info → xpk-0.7.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,267 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from argparse import Namespace
18
+
19
+ from kubernetes import client as k8s_client
20
+ from kubernetes.client import ApiClient
21
+ from kubernetes.client.rest import ApiException
22
+
23
+ from ..core import gcsfuse
24
+ from ..core.cluster import (
25
+ DEFAULT_NAMESPACE,
26
+ add_zone_and_project,
27
+ get_cluster_network,
28
+ setup_k8s_env,
29
+ update_cluster_with_gcpfilestore_driver_if_necessary,
30
+ update_cluster_with_gcsfuse_driver_if_necessary,
31
+ update_cluster_with_workload_identity_if_necessary,
32
+ )
33
+ from ..core.filestore import FilestoreClient, get_storage_class_name
34
+ from ..core.kjob import (
35
+ KJOB_API_GROUP_NAME,
36
+ KJOB_API_GROUP_VERSION,
37
+ KJOB_API_VOLUME_BUNDLE_PLURAL,
38
+ create_volume_bundle_instance,
39
+ )
40
+ from ..core.storage import (
41
+ GCP_FILESTORE_TYPE,
42
+ GCS_FUSE_TYPE,
43
+ STORAGE_CRD_PLURAL,
44
+ XPK_API_GROUP_NAME,
45
+ XPK_API_GROUP_VERSION,
46
+ Storage,
47
+ create_storage_crds,
48
+ get_storage,
49
+ list_storages,
50
+ print_storages_for_cluster,
51
+ )
52
+ from ..utils.console import get_user_input, xpk_exit, xpk_print
53
+ from ..utils.kubectl import apply_kubectl_manifest
54
+
55
+
56
+ def storage_create(args: Namespace) -> None:
57
+ add_zone_and_project(args)
58
+ if args.type == GCP_FILESTORE_TYPE:
59
+ if args.instance is None:
60
+ args.instance = args.name
61
+
62
+ filestore_client = FilestoreClient(args.zone, args.instance, args.project)
63
+ filestore_exists = filestore_client.check_instance_exists()
64
+ if filestore_exists:
65
+ xpk_print(f"Filestore instance {args.instance} already exists.")
66
+ xpk_exit(1)
67
+ filestore_network = get_cluster_network(args)
68
+ xpk_print(
69
+ f"Creating Filestore instance {args.instance} in network:"
70
+ f" {filestore_network}"
71
+ )
72
+ filestore_client.create_instance(
73
+ vol=args.vol, size=args.size, tier=args.tier, network=filestore_network
74
+ )
75
+ manifest = filestore_client.manifest(
76
+ args.name, args.vol, args.access_mode, filestore_network
77
+ )
78
+
79
+ k8s_api_client = setup_k8s_env(args)
80
+ create_storage_crds(k8s_api_client, args, manifest)
81
+ create_volume_bundle_instance(
82
+ k8s_api_client, args.name, manifest, args.readonly, args.mount_point
83
+ )
84
+ return_code = update_cluster_with_workload_identity_if_necessary(args)
85
+ if return_code > 0:
86
+ xpk_exit(return_code)
87
+ return_code = update_cluster_with_gcpfilestore_driver_if_necessary(args)
88
+ if return_code > 0:
89
+ xpk_exit(return_code)
90
+ apply_kubectl_manifest(k8s_api_client, manifest)
91
+
92
+
93
+ def storage_delete(args: Namespace) -> None:
94
+ add_zone_and_project(args)
95
+ k8s_api_client = setup_k8s_env(args)
96
+ storages = list_storages(k8s_api_client)
97
+ filestore_client = FilestoreClient(args.zone, args.name, args.project)
98
+
99
+ if not filestore_client.check_instance_exists():
100
+ xpk_print(f"Filestore instance {args.name} does not exist.")
101
+ xpk_exit(1)
102
+
103
+ filestore_instance_name = filestore_client.get_instance_fullname()
104
+
105
+ children = [
106
+ storage
107
+ for storage in storages
108
+ if storage.bucket.startswith(filestore_instance_name)
109
+ ]
110
+
111
+ if children and not args.force:
112
+ detach = get_user_input(
113
+ "Deleting a filestore storage will destroy your filestore instance and"
114
+ " all its data in all volumes will be lost. Do you wish to delete the"
115
+ f" filestore instance {filestore_instance_name}?\n y (yes) / n (no):\n'"
116
+ )
117
+ if not detach:
118
+ xpk_print("Deleting storage canceled.")
119
+ xpk_exit(0)
120
+
121
+ for child in children:
122
+ delete_storage_resources(k8s_api_client, child)
123
+
124
+ filestore_client.delete_filestore_instance()
125
+
126
+
127
+ def storage_attach(args: Namespace) -> None:
128
+ add_zone_and_project(args)
129
+ if args.type == GCP_FILESTORE_TYPE:
130
+ if args.instance is None:
131
+ args.instance = args.name
132
+
133
+ filestore_client = FilestoreClient(args.zone, args.instance, args.project)
134
+
135
+ filestore_exists = filestore_client.check_instance_exists()
136
+ if not filestore_exists:
137
+ xpk_print(f"Filestore instance {args.instance} does not exists.")
138
+ xpk_exit(1)
139
+
140
+ filestore_network = get_cluster_network(args)
141
+ manifest = filestore_client.manifest(
142
+ args.name, args.vol, args.access_mode, filestore_network
143
+ )
144
+
145
+ else: # args.type == GCS_FUSE_TYPE:
146
+ if args.size is None:
147
+ xpk_print("--size is required when attaching gcsfuse storage.")
148
+ xpk_exit(1)
149
+
150
+ if args.bucket is None:
151
+ args.bucket = args.name
152
+
153
+ manifest = gcsfuse.manifest(
154
+ name=args.name, bucket=args.bucket, size=args.size
155
+ )
156
+
157
+ k8s_api_client = setup_k8s_env(args)
158
+ create_storage_crds(k8s_api_client, args, manifest)
159
+ create_volume_bundle_instance(
160
+ k8s_api_client, args.name, manifest, args.readonly, args.mount_point
161
+ )
162
+ return_code = update_cluster_with_workload_identity_if_necessary(args)
163
+ if return_code > 0:
164
+ xpk_exit(return_code)
165
+
166
+ # args.type can have only two values after parsing
167
+ return_code = (
168
+ update_cluster_with_gcsfuse_driver_if_necessary(args)
169
+ if args.type == GCS_FUSE_TYPE
170
+ else update_cluster_with_gcpfilestore_driver_if_necessary(args)
171
+ )
172
+ if return_code > 0:
173
+ xpk_exit(return_code)
174
+
175
+ apply_kubectl_manifest(k8s_api_client, manifest)
176
+
177
+
178
+ def storage_list(args: Namespace) -> None:
179
+ k8s_api_client = setup_k8s_env(args)
180
+ storages = list_storages(k8s_api_client)
181
+ print_storages_for_cluster(storages)
182
+
183
+
184
+ def storage_detach(args: Namespace) -> None:
185
+ k8s_api_client = setup_k8s_env(args)
186
+ storage = get_storage(k8s_api_client, args.name)
187
+ delete_storage_resources(k8s_api_client, storage)
188
+
189
+
190
+ def delete_resource(api_call, resource_name: str, resource_kind: str) -> None:
191
+ """
192
+ Deletes a Kubernetes resource and handles potential API exceptions.
193
+
194
+ Args:
195
+ api_call: The function to call for deleting the resource.
196
+ resource_name: The name of the resource to delete.
197
+ resource_type: The type of the resource (e.g., "Persistent Volume Claim").
198
+ """
199
+ xpk_print(f"Deleting {resource_kind}:{resource_name}")
200
+ try:
201
+ api_call(resource_name)
202
+ except ApiException as e:
203
+ if e.status == 404:
204
+ xpk_print(
205
+ f"{resource_kind}: {resource_name} not found. "
206
+ f"Might be already deleted. Error: {e}"
207
+ )
208
+ return
209
+ else:
210
+ xpk_print(f"Encountered error during {resource_kind} deletion: {e}")
211
+ xpk_exit(1)
212
+ xpk_print(f"Deleted {resource_kind}:{resource_name}")
213
+
214
+
215
+ def delete_storage_resources(k8s_api_client: ApiClient, storage: Storage):
216
+ """
217
+ Deletes storage PV, PVC, SC and custom resources (if they exist).
218
+
219
+ Args:
220
+ k8s_api_client: An ApiClient object for interacting with the Kubernetes API.
221
+ storage: Storage to delete
222
+ """
223
+ api_instance = k8s_client.CustomObjectsApi(k8s_api_client)
224
+ core_api = k8s_client.CoreV1Api()
225
+ storage_api = k8s_client.StorageV1Api()
226
+
227
+ delete_resource(
228
+ lambda name: core_api.delete_namespaced_persistent_volume_claim(
229
+ name, "default"
230
+ ),
231
+ storage.pvc,
232
+ "Persistent Volume Claim",
233
+ )
234
+
235
+ delete_resource(
236
+ core_api.delete_persistent_volume, storage.pv, "Persistent Volume"
237
+ )
238
+
239
+ if storage.type == GCP_FILESTORE_TYPE:
240
+ delete_resource(
241
+ storage_api.delete_storage_class,
242
+ get_storage_class_name(storage.name),
243
+ "Storage Class",
244
+ )
245
+
246
+ delete_resource(
247
+ lambda name: api_instance.delete_namespaced_custom_object(
248
+ namespace=DEFAULT_NAMESPACE,
249
+ name=name,
250
+ group=KJOB_API_GROUP_NAME,
251
+ version=KJOB_API_GROUP_VERSION,
252
+ plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
253
+ ),
254
+ storage.name,
255
+ "VolumeBundle",
256
+ )
257
+
258
+ delete_resource(
259
+ lambda name: api_instance.delete_cluster_custom_object(
260
+ name=name,
261
+ group=XPK_API_GROUP_NAME,
262
+ version=XPK_API_GROUP_VERSION,
263
+ plural=STORAGE_CRD_PLURAL,
264
+ ),
265
+ storage.name,
266
+ "Storage",
267
+ )
@@ -0,0 +1,27 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.config import __version__
18
+ from ..utils.console import xpk_print
19
+
20
+
21
+ def get_xpk_version() -> str:
22
+ return __version__
23
+
24
+
25
+ def version(args) -> None: # pylint: disable=unused-argument
26
+ """Get version of xpk."""
27
+ xpk_print('xpk_version:', __version__)