xpk 0.13.0__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- integration/__init__.py +15 -0
- integration/docker_manager_test.py +102 -0
- integration/gcluster_a3mega_test.py +204 -0
- integration/gcluster_a3ultra_test.py +176 -0
- integration/gcluster_a4_test.py +176 -0
- integration/gcluster_test.py +107 -0
- xpk/commands/batch.py +9 -2
- xpk/commands/cluster.py +143 -117
- xpk/commands/cluster_gcluster.py +81 -14
- xpk/commands/cluster_gcluster_test.py +177 -0
- xpk/commands/cluster_test.py +92 -0
- xpk/commands/common.py +14 -26
- xpk/commands/info.py +11 -9
- xpk/commands/inspector.py +21 -10
- xpk/commands/job.py +25 -9
- xpk/commands/kind.py +39 -40
- xpk/commands/kjob_common.py +4 -4
- xpk/commands/run.py +9 -2
- xpk/commands/shell.py +13 -10
- xpk/commands/storage.py +21 -0
- xpk/commands/version.py +0 -4
- xpk/commands/workload.py +84 -29
- xpk/commands/workload_test.py +81 -0
- xpk/core/blueprint/blueprint_generator.py +4 -40
- xpk/core/blueprint/blueprint_test.py +0 -6
- xpk/core/blueprint/testing/__init__.py +15 -0
- xpk/core/capacity.py +6 -5
- xpk/core/cluster.py +91 -194
- xpk/core/cluster_private.py +6 -11
- xpk/core/commands.py +11 -18
- xpk/core/config.py +1 -1
- xpk/core/docker_image.py +3 -4
- xpk/core/gcloud_context.py +26 -2
- xpk/core/gcloud_context_test.py +96 -0
- xpk/core/gcluster_manager.py +0 -3
- xpk/core/jobset.py +4 -7
- xpk/core/kjob.py +14 -27
- xpk/core/kueue_manager.py +423 -0
- xpk/core/kueue_manager_test.py +574 -0
- xpk/core/monitoring.py +1 -1
- xpk/core/nap.py +10 -15
- xpk/core/network.py +17 -18
- xpk/core/nodepool.py +66 -77
- xpk/core/nodepool_test.py +198 -1
- xpk/core/pathways.py +5 -5
- xpk/core/ray.py +10 -14
- xpk/core/resources.py +6 -11
- xpk/core/scheduling.py +19 -1
- xpk/core/scheduling_test.py +31 -0
- xpk/core/system_characteristics.py +350 -232
- xpk/core/system_characteristics_test.py +73 -0
- xpk/core/vertex.py +1 -1
- xpk/core/workload.py +7 -8
- xpk/main.py +2 -4
- xpk/parser/cluster.py +7 -0
- xpk/parser/cluster_test.py +66 -0
- xpk/parser/common.py +11 -0
- xpk/parser/workload.py +62 -25
- xpk/parser/workload_test.py +82 -0
- xpk/templates/cluster_preheat.yaml.j2 +31 -0
- xpk/templates/filestore-pv.yaml +17 -0
- xpk/templates/filestore-pvc.yaml +11 -0
- xpk/templates/filestore-sc.yaml +10 -0
- xpk/templates/fuse-pv.yaml +17 -0
- xpk/templates/fuse-pvc.yaml +13 -0
- xpk/templates/kueue_config.yaml.j2 +95 -0
- xpk/templates/kueue_gke_default_topology.yaml.j2 +10 -0
- xpk/templates/kueue_sub_slicing_topology.yaml.j2 +14 -0
- xpk/templates/mtc-cpc.yaml +15 -0
- xpk/templates/volume_bundle.yaml +7 -0
- xpk/utils/feature_flags.py +28 -0
- xpk/utils/kueue.py +20 -0
- xpk/utils/templates.py +15 -0
- xpk/utils/topology.py +46 -0
- xpk/utils/topology_test.py +63 -0
- xpk/utils/validation.py +79 -55
- xpk/utils/validation_test.py +37 -0
- {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/METADATA +6 -1
- xpk-0.14.1.dist-info/RECORD +133 -0
- xpk-0.14.1.dist-info/top_level.txt +2 -0
- xpk/core/kueue.py +0 -561
- xpk-0.13.0.dist-info/RECORD +0 -101
- xpk-0.13.0.dist-info/top_level.txt +0 -1
- {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/WHEEL +0 -0
- {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/entry_points.txt +0 -0
- {xpk-0.13.0.dist-info → xpk-0.14.1.dist-info}/licenses/LICENSE +0 -0
xpk/commands/job.py
CHANGED
|
@@ -25,6 +25,7 @@ from ..core.cluster import get_cluster_credentials
|
|
|
25
25
|
from ..core.gcloud_context import add_zone_and_project
|
|
26
26
|
from ..core.kjob import AppProfileDefaults
|
|
27
27
|
from ..utils.console import xpk_exit, xpk_print
|
|
28
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
28
29
|
from .kind import set_local_cluster_command
|
|
29
30
|
|
|
30
31
|
|
|
@@ -59,12 +60,16 @@ def job_info(args):
|
|
|
59
60
|
Returns:
|
|
60
61
|
None
|
|
61
62
|
"""
|
|
63
|
+
if should_validate_dependencies(args):
|
|
64
|
+
validate_dependencies_list([
|
|
65
|
+
SystemDependency.KUBECTL,
|
|
66
|
+
SystemDependency.KJOB,
|
|
67
|
+
SystemDependency.GCLOUD,
|
|
68
|
+
])
|
|
62
69
|
job_name = args.name
|
|
63
70
|
|
|
64
71
|
desc_command = f'kubectl-kjob describe slurm {job_name}'
|
|
65
|
-
desc_code, desc_text = run_command_for_value(
|
|
66
|
-
desc_command, 'Getting job data', args
|
|
67
|
-
)
|
|
72
|
+
desc_code, desc_text = run_command_for_value(desc_command, 'Getting job data')
|
|
68
73
|
if desc_code != 0:
|
|
69
74
|
xpk_print(f'Data info request returned ERROR {desc_code}')
|
|
70
75
|
xpk_exit(desc_code)
|
|
@@ -76,7 +81,6 @@ def job_info(args):
|
|
|
76
81
|
job_code, job_text = run_command_for_value(
|
|
77
82
|
job_command,
|
|
78
83
|
'Getting job info',
|
|
79
|
-
args,
|
|
80
84
|
dry_run_return_val=JOBS_DRY_RUN_YAML,
|
|
81
85
|
)
|
|
82
86
|
if job_code != 0:
|
|
@@ -87,7 +91,6 @@ def job_info(args):
|
|
|
87
91
|
pods_code, pods_text = run_command_for_value(
|
|
88
92
|
pods_command,
|
|
89
93
|
'Getting pods list',
|
|
90
|
-
args,
|
|
91
94
|
dry_run_return_val=PODS_DRY_RUN_RESULT,
|
|
92
95
|
)
|
|
93
96
|
if pods_code != 0:
|
|
@@ -171,6 +174,12 @@ def job_list(args) -> None:
|
|
|
171
174
|
Returns:
|
|
172
175
|
None
|
|
173
176
|
"""
|
|
177
|
+
if should_validate_dependencies(args):
|
|
178
|
+
validate_dependencies_list([
|
|
179
|
+
SystemDependency.KUBECTL,
|
|
180
|
+
SystemDependency.KJOB,
|
|
181
|
+
SystemDependency.GCLOUD,
|
|
182
|
+
])
|
|
174
183
|
if not args.kind_cluster:
|
|
175
184
|
add_zone_and_project(args)
|
|
176
185
|
get_cluster_credentials(args)
|
|
@@ -183,14 +192,14 @@ def job_list(args) -> None:
|
|
|
183
192
|
|
|
184
193
|
xpk_print(msg, flush=True)
|
|
185
194
|
|
|
186
|
-
return_code = run_slurm_job_list_command(
|
|
195
|
+
return_code = run_slurm_job_list_command()
|
|
187
196
|
xpk_exit(return_code)
|
|
188
197
|
|
|
189
198
|
|
|
190
|
-
def run_slurm_job_list_command(
|
|
199
|
+
def run_slurm_job_list_command() -> int:
|
|
191
200
|
cmd = f'kubectl-kjob list slurm --profile {AppProfileDefaults.NAME.value}'
|
|
192
201
|
|
|
193
|
-
return_code = run_command_with_updates(cmd, 'list jobs'
|
|
202
|
+
return_code = run_command_with_updates(cmd, 'list jobs')
|
|
194
203
|
if return_code != 0:
|
|
195
204
|
xpk_print(f'Listing jobs returned ERROR {return_code}')
|
|
196
205
|
return return_code
|
|
@@ -205,6 +214,13 @@ def job_cancel(args) -> None:
|
|
|
205
214
|
Returns:
|
|
206
215
|
None
|
|
207
216
|
"""
|
|
217
|
+
if should_validate_dependencies(args):
|
|
218
|
+
validate_dependencies_list([
|
|
219
|
+
SystemDependency.KUBECTL,
|
|
220
|
+
SystemDependency.KJOB,
|
|
221
|
+
SystemDependency.GCLOUD,
|
|
222
|
+
])
|
|
223
|
+
|
|
208
224
|
xpk_print(f'Starting job cancel for job: {args.name}', flush=True)
|
|
209
225
|
if not args.kind_cluster:
|
|
210
226
|
add_zone_and_project(args)
|
|
@@ -222,7 +238,7 @@ def run_slurm_job_delete_command(args) -> int:
|
|
|
222
238
|
list_of_jobs = ' '.join(args.name)
|
|
223
239
|
cmd = f'kubectl-kjob delete slurm {list_of_jobs}'
|
|
224
240
|
|
|
225
|
-
return_code = run_command_with_updates(cmd, 'delete job'
|
|
241
|
+
return_code = run_command_with_updates(cmd, 'delete job')
|
|
226
242
|
if return_code != 0:
|
|
227
243
|
xpk_print(f'Delete job request returned ERROR {return_code}')
|
|
228
244
|
return return_code
|
xpk/commands/kind.py
CHANGED
|
@@ -14,6 +14,7 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
from ..core.kueue_manager import (KueueConfig, KueueManager)
|
|
17
18
|
from ..core.commands import (
|
|
18
19
|
run_command_for_value,
|
|
19
20
|
run_command_with_updates,
|
|
@@ -24,17 +25,14 @@ from ..core.kjob import (
|
|
|
24
25
|
prepare_kjob,
|
|
25
26
|
apply_kjob_crds,
|
|
26
27
|
)
|
|
27
|
-
from ..core.
|
|
28
|
-
install_kueue_on_cluster,
|
|
29
|
-
install_kueue_crs,
|
|
30
|
-
wait_for_kueue_available,
|
|
31
|
-
)
|
|
28
|
+
from ..core.scheduling import get_total_chips_requested_from_args
|
|
32
29
|
from ..core.storage import install_storage_crd
|
|
33
30
|
from ..core.system_characteristics import (
|
|
34
31
|
SystemCharacteristics,
|
|
35
32
|
AcceleratorType,
|
|
36
33
|
)
|
|
37
34
|
from ..utils.console import (xpk_exit, xpk_print)
|
|
35
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
38
36
|
|
|
39
37
|
|
|
40
38
|
def cluster_create(args) -> None:
|
|
@@ -46,6 +44,12 @@ def cluster_create(args) -> None:
|
|
|
46
44
|
Returns:
|
|
47
45
|
0 if successful and 1 otherwise.
|
|
48
46
|
"""
|
|
47
|
+
if should_validate_dependencies(args):
|
|
48
|
+
validate_dependencies_list([
|
|
49
|
+
SystemDependency.KUBECTL,
|
|
50
|
+
SystemDependency.KJOB,
|
|
51
|
+
SystemDependency.GCLOUD,
|
|
52
|
+
])
|
|
49
53
|
xpk_print(f'Starting cluster create for cluster {args.cluster}:', flush=True)
|
|
50
54
|
|
|
51
55
|
create_cluster_command_code = create_cluster_if_necessary(args)
|
|
@@ -64,18 +68,13 @@ def cluster_create(args) -> None:
|
|
|
64
68
|
if set_jobset_on_cluster_code != 0:
|
|
65
69
|
xpk_exit(set_jobset_on_cluster_code)
|
|
66
70
|
|
|
67
|
-
xpk_print('Enabling Kueue on the cluster')
|
|
68
|
-
install_kueue_on_cluster_code = install_kueue_on_cluster(args)
|
|
69
|
-
if install_kueue_on_cluster_code != 0:
|
|
70
|
-
xpk_exit(install_kueue_on_cluster_code)
|
|
71
|
-
|
|
72
71
|
xpk_print('Verifying kjob installation')
|
|
73
|
-
err_code = verify_kjob_installed(
|
|
72
|
+
err_code = verify_kjob_installed()
|
|
74
73
|
if err_code > 0:
|
|
75
74
|
xpk_exit(err_code)
|
|
76
75
|
|
|
77
76
|
xpk_print('Applying kjob CDRs')
|
|
78
|
-
err_code = apply_kjob_crds(
|
|
77
|
+
err_code = apply_kjob_crds()
|
|
79
78
|
if err_code > 0:
|
|
80
79
|
xpk_exit(err_code)
|
|
81
80
|
|
|
@@ -87,11 +86,6 @@ def cluster_create(args) -> None:
|
|
|
87
86
|
k8s_client = setup_k8s_env(args)
|
|
88
87
|
install_storage_crd(k8s_client)
|
|
89
88
|
|
|
90
|
-
xpk_print('Wait for Kueue to be fully available')
|
|
91
|
-
wait_for_kueue_available_code = wait_for_kueue_available(args)
|
|
92
|
-
if wait_for_kueue_available_code != 0:
|
|
93
|
-
xpk_exit(wait_for_kueue_available_code)
|
|
94
|
-
|
|
95
89
|
args.num_slices = 1
|
|
96
90
|
args.enable_pathways = False
|
|
97
91
|
system = SystemCharacteristics(
|
|
@@ -102,12 +96,23 @@ def cluster_create(args) -> None:
|
|
|
102
96
|
1,
|
|
103
97
|
AcceleratorType['CPU'],
|
|
104
98
|
'kind',
|
|
99
|
+
supports_sub_slicing=False,
|
|
105
100
|
)
|
|
106
101
|
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
102
|
+
kueue_manager = KueueManager()
|
|
103
|
+
kueue_manager.install_or_upgrade(
|
|
104
|
+
KueueConfig(
|
|
105
|
+
system,
|
|
106
|
+
total_chips=get_total_chips_requested_from_args(args, system),
|
|
107
|
+
autoprovisioning_enabled=False,
|
|
108
|
+
num_slices=args.num_slices,
|
|
109
|
+
memory_limit='',
|
|
110
|
+
cpu_limit=0,
|
|
111
|
+
is_pathways_cluster=False,
|
|
112
|
+
flex=False,
|
|
113
|
+
configure_sub_slicing=False,
|
|
114
|
+
),
|
|
115
|
+
)
|
|
111
116
|
|
|
112
117
|
xpk_print('Kind commands done! Resources are created.')
|
|
113
118
|
xpk_exit(0)
|
|
@@ -122,6 +127,8 @@ def cluster_delete(args) -> None:
|
|
|
122
127
|
Returns:
|
|
123
128
|
0 if successful and 1 otherwise.
|
|
124
129
|
"""
|
|
130
|
+
if should_validate_dependencies(args):
|
|
131
|
+
validate_dependencies_list([SystemDependency.GCLOUD])
|
|
125
132
|
xpk_print(f'Starting cluster delete for cluster: {args.cluster}', flush=True)
|
|
126
133
|
|
|
127
134
|
run_kind_cluster_delete_command_code = run_kind_cluster_delete_command(args)
|
|
@@ -134,13 +141,12 @@ def cluster_delete(args) -> None:
|
|
|
134
141
|
def cluster_list(args) -> None:
|
|
135
142
|
"""Function around cluster list.
|
|
136
143
|
|
|
137
|
-
Args:
|
|
138
|
-
args: user provided arguments for running the command.
|
|
139
|
-
|
|
140
144
|
Returns:
|
|
141
145
|
0 if successful and 1 otherwise.
|
|
142
146
|
"""
|
|
143
|
-
if
|
|
147
|
+
if should_validate_dependencies(args):
|
|
148
|
+
validate_dependencies_list([SystemDependency.GCLOUD])
|
|
149
|
+
if run_kind_clusters_list_command():
|
|
144
150
|
xpk_exit(1)
|
|
145
151
|
xpk_exit(0)
|
|
146
152
|
|
|
@@ -154,7 +160,7 @@ def create_cluster_if_necessary(args) -> int:
|
|
|
154
160
|
Returns:
|
|
155
161
|
0 if successful and 1 otherwise.
|
|
156
162
|
"""
|
|
157
|
-
all_clusters, return_code = get_all_local_clusters_programmatic(
|
|
163
|
+
all_clusters, return_code = get_all_local_clusters_programmatic()
|
|
158
164
|
if return_code > 0:
|
|
159
165
|
xpk_print('Listing all clusters failed!')
|
|
160
166
|
return 1
|
|
@@ -179,7 +185,7 @@ def run_kind_cluster_delete_command(args) -> int:
|
|
|
179
185
|
if args.cluster:
|
|
180
186
|
command += f' --name={args.cluster}'
|
|
181
187
|
|
|
182
|
-
return_code = run_command_with_updates(command, 'Cluster Delete'
|
|
188
|
+
return_code = run_command_with_updates(command, 'Cluster Delete')
|
|
183
189
|
if return_code != 0:
|
|
184
190
|
xpk_print(f'Cluster delete request returned ERROR {return_code}')
|
|
185
191
|
return 1
|
|
@@ -187,17 +193,14 @@ def run_kind_cluster_delete_command(args) -> int:
|
|
|
187
193
|
return 0
|
|
188
194
|
|
|
189
195
|
|
|
190
|
-
def run_kind_clusters_list_command(
|
|
196
|
+
def run_kind_clusters_list_command() -> int:
|
|
191
197
|
"""List Kind Clusters within the project and location.
|
|
192
198
|
|
|
193
|
-
Args:
|
|
194
|
-
args: user provided arguments for running the command.
|
|
195
|
-
|
|
196
199
|
Returns:
|
|
197
200
|
0 if successful and 1 otherwise.
|
|
198
201
|
"""
|
|
199
202
|
command = 'kind get clusters'
|
|
200
|
-
return_code = run_command_with_updates(command, 'Cluster List'
|
|
203
|
+
return_code = run_command_with_updates(command, 'Cluster List')
|
|
201
204
|
if return_code != 0:
|
|
202
205
|
xpk_print(f'Cluster list request returned ERROR {return_code}')
|
|
203
206
|
return 1
|
|
@@ -222,25 +225,22 @@ def run_kind_cluster_create_command(args) -> int:
|
|
|
222
225
|
if args.k8s_version:
|
|
223
226
|
command += f' --image=kindest/node:v{args.k8s_version}'
|
|
224
227
|
|
|
225
|
-
return_code = run_command_with_updates(command, 'Kind Cluster Create'
|
|
228
|
+
return_code = run_command_with_updates(command, 'Kind Cluster Create')
|
|
226
229
|
if return_code != 0:
|
|
227
230
|
xpk_print(f'GKE Cluster Create request returned ERROR {return_code}')
|
|
228
231
|
return 1
|
|
229
232
|
return 0
|
|
230
233
|
|
|
231
234
|
|
|
232
|
-
def get_all_local_clusters_programmatic(
|
|
235
|
+
def get_all_local_clusters_programmatic() -> tuple[list[str], int]:
|
|
233
236
|
"""Gets all the local clusters.
|
|
234
237
|
|
|
235
|
-
Args:
|
|
236
|
-
args: user provided arguments for running the command.
|
|
237
|
-
|
|
238
238
|
Returns:
|
|
239
239
|
List of cluster names and 0 if successful and 1 otherwise.
|
|
240
240
|
"""
|
|
241
241
|
command = 'kind get clusters'
|
|
242
242
|
return_code, raw_cluster_output = run_command_for_value(
|
|
243
|
-
command, 'Find if Cluster Exists'
|
|
243
|
+
command, 'Find if Cluster Exists'
|
|
244
244
|
)
|
|
245
245
|
if return_code != 0:
|
|
246
246
|
xpk_print(f'Find if Cluster Exists returned ERROR {return_code}')
|
|
@@ -261,7 +261,7 @@ def set_local_cluster_command(args) -> int:
|
|
|
261
261
|
if not args.cluster:
|
|
262
262
|
command = 'kubectl config current-context'
|
|
263
263
|
return_code, current_context = run_command_for_value(
|
|
264
|
-
command, 'get current-context'
|
|
264
|
+
command, 'get current-context'
|
|
265
265
|
)
|
|
266
266
|
xpk_print(
|
|
267
267
|
'No local cluster name specified. Using current-context'
|
|
@@ -276,7 +276,6 @@ def set_local_cluster_command(args) -> int:
|
|
|
276
276
|
return_code = run_command_with_updates(
|
|
277
277
|
command,
|
|
278
278
|
task,
|
|
279
|
-
args,
|
|
280
279
|
)
|
|
281
280
|
if return_code != 0:
|
|
282
281
|
xpk_print(f'{task} returned ERROR {return_code}')
|
xpk/commands/kjob_common.py
CHANGED
|
@@ -35,11 +35,11 @@ def add_gpu_networking_annotations_to_command(args, cmd: str) -> str:
|
|
|
35
35
|
|
|
36
36
|
annotations: tuple
|
|
37
37
|
if gpu_type == H100_MEGA_DEVICE_TYPE:
|
|
38
|
-
annotations = get_a3mega_pod_template_annotations(
|
|
38
|
+
annotations = get_a3mega_pod_template_annotations()
|
|
39
39
|
elif gpu_type == H200_DEVICE_TYPE:
|
|
40
|
-
annotations = get_a3ultra_pod_template_annotations(
|
|
40
|
+
annotations = get_a3ultra_pod_template_annotations()
|
|
41
41
|
elif gpu_type == B200_DEVICE_TYPE:
|
|
42
|
-
annotations = get_a4_pod_template_annotations(
|
|
42
|
+
annotations = get_a4_pod_template_annotations()
|
|
43
43
|
else:
|
|
44
44
|
annotations = tuple()
|
|
45
45
|
|
|
@@ -54,7 +54,7 @@ def add_gpu_networking_annotations_to_command(args, cmd: str) -> str:
|
|
|
54
54
|
def add_TAS_annotations_to_command(args, cmd: str) -> str:
|
|
55
55
|
system_characteristics = get_cluster_system_characteristics(args)
|
|
56
56
|
capacity_type = get_cluster_capacity_type(args)
|
|
57
|
-
if is_TAS_possible(system_characteristics, capacity_type
|
|
57
|
+
if is_TAS_possible(system_characteristics, capacity_type):
|
|
58
58
|
cmd += f" --pod-template-annotation {Kueue_TAS_annotation}"
|
|
59
59
|
|
|
60
60
|
return cmd
|
xpk/commands/run.py
CHANGED
|
@@ -28,8 +28,9 @@ from ..core.kjob import (
|
|
|
28
28
|
get_storage_annotations,
|
|
29
29
|
prepare_kjob,
|
|
30
30
|
)
|
|
31
|
-
from ..core.
|
|
31
|
+
from ..core.kueue_manager import LOCAL_QUEUE_NAME
|
|
32
32
|
from ..utils.console import xpk_exit, xpk_print
|
|
33
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
33
34
|
from .kind import set_local_cluster_command
|
|
34
35
|
from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
|
|
35
36
|
|
|
@@ -42,6 +43,12 @@ def run(args: Namespace) -> None:
|
|
|
42
43
|
Returns:
|
|
43
44
|
None
|
|
44
45
|
"""
|
|
46
|
+
if should_validate_dependencies(args):
|
|
47
|
+
validate_dependencies_list([
|
|
48
|
+
SystemDependency.KUBECTL,
|
|
49
|
+
SystemDependency.KJOB,
|
|
50
|
+
SystemDependency.GCLOUD,
|
|
51
|
+
])
|
|
45
52
|
if not args.kind_cluster:
|
|
46
53
|
add_zone_and_project(args)
|
|
47
54
|
get_cluster_credentials(args)
|
|
@@ -126,7 +133,7 @@ def submit_job(args: Namespace) -> None:
|
|
|
126
133
|
if args.time is not None:
|
|
127
134
|
cmd += f' --time {args.time}'
|
|
128
135
|
|
|
129
|
-
return_code = run_command_with_full_controls(cmd, 'run task'
|
|
136
|
+
return_code = run_command_with_full_controls(cmd, 'run task')
|
|
130
137
|
|
|
131
138
|
if return_code != 0:
|
|
132
139
|
xpk_print(f'Running task returned ERROR {return_code}')
|
xpk/commands/shell.py
CHANGED
|
@@ -14,6 +14,7 @@ limitations under the License.
|
|
|
14
14
|
from ..core.commands import run_command_with_full_controls, run_command_for_value, run_command_with_updates
|
|
15
15
|
from ..core.cluster import get_cluster_credentials, add_zone_and_project, setup_k8s_service_accounts
|
|
16
16
|
from ..utils.console import xpk_exit, xpk_print
|
|
17
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
17
18
|
from argparse import Namespace
|
|
18
19
|
|
|
19
20
|
from ..core.kjob import (
|
|
@@ -33,14 +34,18 @@ def shell(args: Namespace):
|
|
|
33
34
|
Returns:
|
|
34
35
|
0 if successful and 1 otherwise.
|
|
35
36
|
"""
|
|
37
|
+
if should_validate_dependencies(args):
|
|
38
|
+
validate_dependencies_list([
|
|
39
|
+
SystemDependency.KUBECTL,
|
|
40
|
+
SystemDependency.KJOB,
|
|
41
|
+
SystemDependency.GCLOUD,
|
|
42
|
+
])
|
|
36
43
|
exisitng_shell_pod_name = get_existing_shell_pod_name(args)
|
|
37
44
|
|
|
38
45
|
if exisitng_shell_pod_name is None:
|
|
39
46
|
return_code = connect_to_new_interactive_shell(args)
|
|
40
47
|
else:
|
|
41
|
-
return_code = connect_to_existing_interactive_shell(
|
|
42
|
-
exisitng_shell_pod_name, args
|
|
43
|
-
)
|
|
48
|
+
return_code = connect_to_existing_interactive_shell(exisitng_shell_pod_name)
|
|
44
49
|
|
|
45
50
|
if return_code != 0:
|
|
46
51
|
xpk_print(f'The command failed with code {return_code}.')
|
|
@@ -60,7 +65,6 @@ def get_existing_shell_pod_name(args: Namespace) -> str | None:
|
|
|
60
65
|
' -o custom-columns=":metadata.name"'
|
|
61
66
|
),
|
|
62
67
|
task='Get existing interactive shell pod name.',
|
|
63
|
-
global_args=args,
|
|
64
68
|
)
|
|
65
69
|
if return_code != 0:
|
|
66
70
|
xpk_print(
|
|
@@ -95,21 +99,17 @@ def connect_to_new_interactive_shell(args: Namespace) -> int:
|
|
|
95
99
|
return run_command_with_full_controls(
|
|
96
100
|
command=cmd,
|
|
97
101
|
task='Creating new interactive shell and entering it',
|
|
98
|
-
global_args=args,
|
|
99
102
|
instructions=exit_instructions,
|
|
100
103
|
)
|
|
101
104
|
|
|
102
105
|
|
|
103
|
-
def connect_to_existing_interactive_shell(
|
|
104
|
-
pod_name: str, args: Namespace
|
|
105
|
-
) -> int:
|
|
106
|
+
def connect_to_existing_interactive_shell(pod_name: str) -> int:
|
|
106
107
|
return run_command_with_full_controls(
|
|
107
108
|
command=(
|
|
108
109
|
f'kubectl exec --stdin --tty {pod_name} --'
|
|
109
110
|
f' {get_pod_template_interactive_command()}'
|
|
110
111
|
),
|
|
111
112
|
task='Entering existing interactive shell',
|
|
112
|
-
global_args=args,
|
|
113
113
|
instructions=exit_instructions,
|
|
114
114
|
)
|
|
115
115
|
|
|
@@ -121,6 +121,10 @@ def shell_stop(args: Namespace):
|
|
|
121
121
|
Returns:
|
|
122
122
|
0 if successful and 1 otherwise.
|
|
123
123
|
"""
|
|
124
|
+
if should_validate_dependencies(args):
|
|
125
|
+
validate_dependencies_list(
|
|
126
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
127
|
+
)
|
|
124
128
|
exisitng_shell_pod_name = get_existing_shell_pod_name(args)
|
|
125
129
|
|
|
126
130
|
if exisitng_shell_pod_name is None:
|
|
@@ -130,7 +134,6 @@ def shell_stop(args: Namespace):
|
|
|
130
134
|
return_code = run_command_with_updates(
|
|
131
135
|
command=f'kubectl delete pod {exisitng_shell_pod_name}',
|
|
132
136
|
task='Deleting the existing shell.',
|
|
133
|
-
global_args=args,
|
|
134
137
|
)
|
|
135
138
|
if return_code != 0:
|
|
136
139
|
xpk_exit(return_code)
|
xpk/commands/storage.py
CHANGED
|
@@ -59,9 +59,14 @@ from ..core.storage import (
|
|
|
59
59
|
from ..utils.console import get_user_input, xpk_exit, xpk_print
|
|
60
60
|
from ..utils.kubectl import apply_kubectl_manifest
|
|
61
61
|
from ..utils.execution_context import is_dry_run
|
|
62
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
62
63
|
|
|
63
64
|
|
|
64
65
|
def storage_create(args: Namespace) -> None:
|
|
66
|
+
if should_validate_dependencies(args):
|
|
67
|
+
validate_dependencies_list(
|
|
68
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
69
|
+
)
|
|
65
70
|
add_zone_and_project(args)
|
|
66
71
|
if args.type == GCP_FILESTORE_TYPE:
|
|
67
72
|
if args.instance is None:
|
|
@@ -107,6 +112,10 @@ def storage_create(args: Namespace) -> None:
|
|
|
107
112
|
|
|
108
113
|
|
|
109
114
|
def storage_delete(args: Namespace) -> None:
|
|
115
|
+
if should_validate_dependencies(args):
|
|
116
|
+
validate_dependencies_list(
|
|
117
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
118
|
+
)
|
|
110
119
|
add_zone_and_project(args)
|
|
111
120
|
k8s_api_client = setup_k8s_env(args)
|
|
112
121
|
storages = list_storages(k8s_api_client)
|
|
@@ -141,6 +150,10 @@ def storage_delete(args: Namespace) -> None:
|
|
|
141
150
|
|
|
142
151
|
|
|
143
152
|
def storage_attach(args: Namespace) -> None:
|
|
153
|
+
if should_validate_dependencies(args):
|
|
154
|
+
validate_dependencies_list(
|
|
155
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
156
|
+
)
|
|
144
157
|
add_zone_and_project(args)
|
|
145
158
|
manifest: list[dict] = [{}]
|
|
146
159
|
if args.type == GCP_FILESTORE_TYPE:
|
|
@@ -244,6 +257,10 @@ def enable_csi_drivers_if_necessary(args: Namespace) -> None:
|
|
|
244
257
|
|
|
245
258
|
|
|
246
259
|
def storage_list(args: Namespace) -> None:
|
|
260
|
+
if should_validate_dependencies(args):
|
|
261
|
+
validate_dependencies_list(
|
|
262
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
263
|
+
)
|
|
247
264
|
storages = []
|
|
248
265
|
if not is_dry_run():
|
|
249
266
|
k8s_api_client = setup_k8s_env(args)
|
|
@@ -252,6 +269,10 @@ def storage_list(args: Namespace) -> None:
|
|
|
252
269
|
|
|
253
270
|
|
|
254
271
|
def storage_detach(args: Namespace) -> None:
|
|
272
|
+
if should_validate_dependencies(args):
|
|
273
|
+
validate_dependencies_list(
|
|
274
|
+
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
275
|
+
)
|
|
255
276
|
k8s_api_client = setup_k8s_env(args)
|
|
256
277
|
storage = get_storage(k8s_api_client, args.name)
|
|
257
278
|
delete_storage_resources(k8s_api_client, storage)
|
xpk/commands/version.py
CHANGED
|
@@ -18,10 +18,6 @@ from ..core.config import __version__
|
|
|
18
18
|
from ..utils.console import xpk_print
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
def get_xpk_version() -> str:
|
|
22
|
-
return __version__
|
|
23
|
-
|
|
24
|
-
|
|
25
21
|
def version(args) -> None: # pylint: disable=unused-argument
|
|
26
22
|
"""Get version of xpk."""
|
|
27
23
|
xpk_print('xpk_version:', __version__)
|