xpk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. xpk/__init__.py +15 -0
  2. xpk/api/__init__.py +15 -0
  3. xpk/api/storage_crd.yaml +52 -0
  4. xpk/commands/__init__.py +15 -0
  5. xpk/commands/batch.py +131 -0
  6. xpk/commands/cluster.py +808 -0
  7. xpk/commands/cluster_gcluster.py +269 -0
  8. xpk/commands/common.py +44 -0
  9. xpk/commands/config.py +29 -0
  10. xpk/commands/info.py +243 -0
  11. xpk/commands/inspector.py +357 -0
  12. xpk/commands/job.py +199 -0
  13. xpk/commands/kind.py +283 -0
  14. xpk/commands/kjob_common.py +44 -0
  15. xpk/commands/run.py +128 -0
  16. xpk/commands/shell.py +140 -0
  17. xpk/commands/storage.py +267 -0
  18. xpk/commands/version.py +27 -0
  19. xpk/commands/workload.py +889 -0
  20. xpk/core/__init__.py +15 -0
  21. xpk/core/blueprint/__init__.py +15 -0
  22. xpk/core/blueprint/blueprint_definitions.py +62 -0
  23. xpk/core/blueprint/blueprint_generator.py +708 -0
  24. xpk/core/capacity.py +185 -0
  25. xpk/core/cluster.py +564 -0
  26. xpk/core/cluster_private.py +200 -0
  27. xpk/core/commands.py +356 -0
  28. xpk/core/config.py +179 -0
  29. xpk/core/docker_container.py +225 -0
  30. xpk/core/docker_image.py +210 -0
  31. xpk/core/docker_manager.py +308 -0
  32. xpk/core/docker_resources.py +350 -0
  33. xpk/core/filestore.py +251 -0
  34. xpk/core/gcloud_context.py +196 -0
  35. xpk/core/gcluster_manager.py +176 -0
  36. xpk/core/gcsfuse.py +50 -0
  37. xpk/core/kjob.py +444 -0
  38. xpk/core/kueue.py +358 -0
  39. xpk/core/monitoring.py +134 -0
  40. xpk/core/nap.py +361 -0
  41. xpk/core/network.py +377 -0
  42. xpk/core/nodepool.py +581 -0
  43. xpk/core/pathways.py +377 -0
  44. xpk/core/ray.py +222 -0
  45. xpk/core/remote_state/__init__.py +15 -0
  46. xpk/core/remote_state/fuse_remote_state.py +99 -0
  47. xpk/core/remote_state/remote_state_client.py +38 -0
  48. xpk/core/resources.py +238 -0
  49. xpk/core/scheduling.py +253 -0
  50. xpk/core/storage.py +581 -0
  51. xpk/core/system_characteristics.py +1432 -0
  52. xpk/core/vertex.py +105 -0
  53. xpk/core/workload.py +341 -0
  54. xpk/core/workload_decorators/__init__.py +15 -0
  55. xpk/core/workload_decorators/rdma_decorator.py +129 -0
  56. xpk/core/workload_decorators/storage_decorator.py +52 -0
  57. xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
  58. xpk/main.py +75 -0
  59. xpk/parser/__init__.py +15 -0
  60. xpk/parser/batch.py +43 -0
  61. xpk/parser/cluster.py +662 -0
  62. xpk/parser/common.py +259 -0
  63. xpk/parser/config.py +49 -0
  64. xpk/parser/core.py +135 -0
  65. xpk/parser/info.py +64 -0
  66. xpk/parser/inspector.py +65 -0
  67. xpk/parser/job.py +147 -0
  68. xpk/parser/kind.py +95 -0
  69. xpk/parser/run.py +47 -0
  70. xpk/parser/shell.py +59 -0
  71. xpk/parser/storage.py +316 -0
  72. xpk/parser/validators.py +39 -0
  73. xpk/parser/version.py +23 -0
  74. xpk/parser/workload.py +726 -0
  75. xpk/templates/__init__.py +15 -0
  76. xpk/templates/storage.yaml +13 -0
  77. xpk/utils/__init__.py +15 -0
  78. xpk/utils/console.py +55 -0
  79. xpk/utils/file.py +82 -0
  80. xpk/utils/gcs_utils.py +125 -0
  81. xpk/utils/kubectl.py +57 -0
  82. xpk/utils/network.py +168 -0
  83. xpk/utils/objects.py +88 -0
  84. xpk/utils/templates.py +28 -0
  85. xpk/utils/validation.py +80 -0
  86. xpk/utils/yaml.py +30 -0
  87. xpk-0.0.1.dist-info/LICENSE +202 -0
  88. xpk-0.0.1.dist-info/METADATA +1498 -0
  89. xpk-0.0.1.dist-info/RECORD +92 -0
  90. xpk-0.0.1.dist-info/WHEEL +5 -0
  91. xpk-0.0.1.dist-info/entry_points.txt +2 -0
  92. xpk-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,357 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.cluster import get_cluster_credentials
18
+ from ..core.commands import run_command_for_value
19
+ from ..core.gcloud_context import add_zone_and_project, zone_to_region
20
+ from ..core.kueue import CLUSTER_QUEUE_NAME, LOCAL_QUEUE_NAME
21
+ from ..core.resources import CLUSTER_METADATA_CONFIGMAP, CLUSTER_RESOURCES_CONFIGMAP
22
+ from ..utils.console import xpk_exit, xpk_print
23
+ from ..utils.file import append_tmp_file, write_tmp_file
24
+ from .workload import get_workload_list
25
+
26
+
27
+ def inspector_run_command_helper(
28
+ args, command, command_description, file
29
+ ) -> int:
30
+ """Runs a command for xpk inspector, and build the output file.
31
+
32
+ Args:
33
+ args: user provided arguments for running the command.
34
+ command: the cli command to run.
35
+ command_description: a brief description of the command run.
36
+ file: file to add command output to.
37
+
38
+ Returns:
39
+ 0 if successful and 1 otherwise.
40
+ """
41
+ prefix = f'Command: {command}\nCommand Description: {command_description}\n'
42
+ postfix = '========================================================'
43
+ return_code, command_output = run_command_for_value(
44
+ command, f'{command_description}', args
45
+ )
46
+
47
+ if return_code != 0:
48
+ xpk_print(
49
+ f'{command} returned ERROR {return_code} with output: {command_output}'
50
+ )
51
+ return 1
52
+
53
+ inspector_command_output = f'{prefix} \n{command_output} \n{postfix} \n'
54
+ append_tmp_file(inspector_command_output, file)
55
+
56
+ if args.print_to_terminal:
57
+ xpk_print(inspector_command_output)
58
+ return 0
59
+
60
+
61
+ def inspector_run_workload_list_helper(args, command_description, file) -> int:
62
+ """Runs a workload list command for xpk inspector, and build the output file.
63
+
64
+ Args:
65
+ args: user provided arguments for running the command.
66
+ command_description: a brief description of the command run.
67
+ file: file to add command output to.
68
+
69
+ Returns:
70
+ 0 if successful and 1 otherwise.
71
+ """
72
+ prefix = f'Command Description: {command_description}\n'
73
+ postfix = '========================================================'
74
+ return_code, command_output = get_workload_list(args)
75
+ if return_code != 0:
76
+ xpk_exit(return_code)
77
+ inspector_command_output = f'{prefix} \n{command_output} \n{postfix} \n'
78
+ append_tmp_file(inspector_command_output, file)
79
+ if args.print_to_terminal:
80
+ xpk_print(inspector_command_output)
81
+ return 0
82
+
83
+
84
+ def inspector_output_link_helper(args, link, link_description, file) -> int:
85
+ """Outputs a link for xpk inspector to the output file.
86
+
87
+ Args:
88
+ args: user provided arguments for.
89
+ link: link to output.
90
+ link_description: describes what the link is for.
91
+ file: file to add command output to.
92
+
93
+ Returns:
94
+ 0 if successful and 1 otherwise.
95
+ """
96
+ inspector_link = (
97
+ f'Link Description: {link_description}\n'
98
+ f'Link: {link}\n'
99
+ '========================================================'
100
+ )
101
+ append_tmp_file(inspector_link, file)
102
+ if args.print_to_terminal:
103
+ xpk_print(inspector_link)
104
+ return 0
105
+
106
+
107
+ def inspector(args) -> None:
108
+ """Function around inspector which investigates failures in the kueue.
109
+
110
+ Args:
111
+ args: user provided arguments for running the command.
112
+
113
+ Returns:
114
+ 0 if successful and 1 otherwise.
115
+ """
116
+ # Future Improvements for inspector:
117
+ # 2. List what is next in Queue.
118
+ # 3. Split inspector into different subcommands to parse info easier.
119
+
120
+ final_return_code = 0
121
+ xpk_print(args)
122
+
123
+ add_zone_and_project(args)
124
+ get_cluster_credentials(args)
125
+
126
+ inspector_file = write_tmp_file(
127
+ '==================\nXPK inspector OUTPUT:\n==================\n'
128
+ )
129
+ command_and_descriptions = [
130
+ ('gcloud version', 'Local Setup: gcloud version'),
131
+ (
132
+ (
133
+ 'gcloud config get project; gcloud config get compute/zone;'
134
+ ' gcloud config get compute/region'
135
+ ),
136
+ 'Local Setup: Project / Zone / Region',
137
+ ),
138
+ (
139
+ (
140
+ 'gcloud beta container clusters list --project'
141
+ f' {args.project} --region {zone_to_region(args.zone)} | grep -e'
142
+ f' NAME -e {args.cluster}'
143
+ ),
144
+ 'GKE: Cluster Details',
145
+ ),
146
+ (
147
+ (
148
+ 'kubectl get configmap'
149
+ f' {args.cluster}-{CLUSTER_METADATA_CONFIGMAP} -o yaml'
150
+ ),
151
+ 'GKE: Cluster Metadata ConfigMap Details',
152
+ ),
153
+ (
154
+ (
155
+ 'kubectl get configmap'
156
+ f' {args.cluster}-{CLUSTER_RESOURCES_CONFIGMAP} -o yaml'
157
+ ),
158
+ 'GKE: Cluster Resources ConfigMap Details',
159
+ ),
160
+ (
161
+ (
162
+ f'gcloud beta container node-pools list --cluster {args.cluster} '
163
+ f' --project={args.project} --region={zone_to_region(args.zone)}'
164
+ ),
165
+ 'GKE: Node pool Details',
166
+ ),
167
+ (
168
+ (
169
+ "kubectl get node -o custom-columns='NODE_NAME:metadata.name,"
170
+ ' READY_STATUS:.status.conditions[?(@.type=="Ready")].status,'
171
+ " NODEPOOL:metadata.labels.cloud\\.google\\.com/gke-nodepool'"
172
+ ),
173
+ 'Kubectl: All Nodes',
174
+ ),
175
+ (
176
+ (
177
+ 'kubectl get node -o'
178
+ " custom-columns=':metadata.labels.cloud\\.google\\.com/gke-nodepool'"
179
+ ' | sort | uniq -c'
180
+ ),
181
+ 'Kubectl: Number of Nodes per Node Pool',
182
+ ),
183
+ (
184
+ (
185
+ "kubectl get node -o custom-columns='NODE_NAME:metadata.name,"
186
+ ' READY_STATUS:.status.conditions[?(@.type=="Ready")].status,'
187
+ " NODEPOOL:metadata.labels.cloud\\.google\\.com/gke-nodepool' |"
188
+ " grep -w True | awk {'print $3'} | sort | uniq -c"
189
+ ),
190
+ 'Kubectl: Healthy Node Count Per Node Pool',
191
+ ),
192
+ (
193
+ f'kubectl describe ClusterQueue {CLUSTER_QUEUE_NAME}',
194
+ 'Kueue: ClusterQueue Details',
195
+ ),
196
+ (
197
+ f'kubectl describe LocalQueue {LOCAL_QUEUE_NAME}',
198
+ 'Kueue: LocalQueue Details',
199
+ ),
200
+ ('kubectl describe ResourceFlavor', 'Kueue: ResourceFlavor Details'),
201
+ (
202
+ (
203
+ 'kubectl describe Deployment kueue-controller-manager -n'
204
+ ' kueue-system'
205
+ ),
206
+ 'Kueue: Kueue Deployment Details',
207
+ ),
208
+ (
209
+ (
210
+ 'kubectl describe Deployment jobset-controller-manager -n'
211
+ ' jobset-system'
212
+ ),
213
+ 'Jobset: Deployment Details',
214
+ ),
215
+ (
216
+ (
217
+ 'kubectl logs deployment/kueue-controller-manager -n kueue-system'
218
+ ' --tail=100 --prefix=True'
219
+ ),
220
+ 'Kueue Manager Logs',
221
+ ),
222
+ (
223
+ (
224
+ 'kubectl logs deployment/jobset-controller-manager -n'
225
+ ' jobset-system --tail=100 --prefix=True'
226
+ ),
227
+ 'Jobset Manager Logs',
228
+ ),
229
+ ]
230
+
231
+ for command, description in command_and_descriptions:
232
+ return_code = inspector_run_command_helper(
233
+ args, command, description, inspector_file
234
+ )
235
+ if return_code != 0:
236
+ final_return_code = return_code
237
+ xpk_print(
238
+ f'inspector failed in command: {command} description:'
239
+ f' {description} return code: {return_code}'
240
+ )
241
+
242
+ # Workload list views:
243
+ filter_by_statuses = ['EVERYTHING', 'QUEUED', 'RUNNING']
244
+ for filter_by_status in filter_by_statuses:
245
+ args.filter_by_job = None
246
+ args.filter_by_status = filter_by_status
247
+ command_description = (
248
+ f'xpk workload list --filter-by-status={args.filter_by_status}'
249
+ f' --filter-by-job={args.filter_by_job} --project={args.project} --zone={args.zone}'
250
+ f' --cluster={args.cluster}'
251
+ )
252
+ return_code = inspector_run_workload_list_helper(
253
+ args, command_description, inspector_file
254
+ )
255
+ if return_code != 0:
256
+ final_return_code = return_code
257
+ xpk_print(
258
+ f'inspector failed in description: {command_description} return code:'
259
+ f' {return_code}'
260
+ )
261
+
262
+ # If a workload argument is provided, list out workload specific details.
263
+ if args.workload:
264
+ xpk_print(args.workload)
265
+ args.filter_by_job = args.workload
266
+ args.filter_by_status = 'EVERYTHING'
267
+ command_description = (
268
+ f'xpk workload list --filter-by-status={args.filter_by_status}'
269
+ f' --filter-by-job={args.filter_by_job} --project={args.project} --zone={args.zone}'
270
+ f' --cluster={args.cluster}'
271
+ )
272
+ return_code = inspector_run_workload_list_helper(
273
+ args, command_description, inspector_file
274
+ )
275
+ if return_code != 0:
276
+ final_return_code = return_code
277
+ xpk_print(
278
+ f'inspector failed in description: {command_description} return code:'
279
+ f' {return_code}'
280
+ )
281
+
282
+ command = f'kubectl describe jobsets {args.workload}'
283
+ command_description = f'Jobset config for {args.workload}'
284
+ return_code = inspector_run_command_helper(
285
+ args, command, command_description, inspector_file
286
+ )
287
+ if return_code != 0:
288
+ final_return_code = return_code
289
+ xpk_print(
290
+ f'inspector failed in command: {command} description:'
291
+ f' {command_description} return code: {return_code}'
292
+ )
293
+
294
+ command = f'kubectl describe workloads jobset-{args.workload}'
295
+ command_description = f'Workload config for {args.workload}'
296
+ return_code = inspector_run_command_helper(
297
+ args, command, command_description, inspector_file
298
+ )
299
+ if return_code != 0:
300
+ final_return_code = return_code
301
+ xpk_print(
302
+ f'inspector failed in command: {command} description:'
303
+ f' {command_description} return code: {return_code}'
304
+ )
305
+
306
+ # Cloud Console Links:
307
+ workload_links = []
308
+ if args.workload:
309
+ workload_links = [(
310
+ f'Cloud Console for the workload {args.workload}',
311
+ # pylint: disable=line-too-long
312
+ f'https://console.cloud.google.com/kubernetes/service/{zone_to_region(args.zone)}/{args.cluster}/default/{args.workload}/details?project={args.project}',
313
+ )]
314
+
315
+ links = [
316
+ (
317
+ 'Cloud Console for the GKE Cluster',
318
+ # pylint: disable=line-too-long
319
+ f'https://console.cloud.google.com/kubernetes/clusters/details/{zone_to_region(args.zone)}/{args.cluster}/details?project={args.project}',
320
+ ),
321
+ (
322
+ 'Cloud Console for all workloads in GKE Cluster',
323
+ # pylint: disable=line-too-long
324
+ f'https://console.cloud.google.com/kubernetes/workload/overview?project={args.project}&pageState=((gke%2F{zone_to_region(args.zone)}%2F{args.cluster}))',
325
+ ),
326
+ (
327
+ 'Cloud Console for IAM Permissions',
328
+ f'https://console.cloud.google.com/iam-admin/iam?project={args.project}',
329
+ ),
330
+ (
331
+ 'Cloud Console for Quotas',
332
+ f'https://console.cloud.google.com/iam-admin/quotas?project={args.project}',
333
+ ),
334
+ ]
335
+ links.extend(workload_links)
336
+
337
+ for description, workload_link in links:
338
+ return_code = inspector_output_link_helper(
339
+ args, workload_link, description, inspector_file
340
+ )
341
+ if return_code != 0:
342
+ final_return_code = return_code
343
+ xpk_print(
344
+ f'inspector failed in link: {workload_link} description:'
345
+ f' {description} return code: {return_code}'
346
+ )
347
+
348
+ # Summarize inspector:
349
+ xpk_print(f'Find xpk inspector output file: {inspector_file.name}')
350
+
351
+ if final_return_code != 0:
352
+ xpk_print(
353
+ 'Something was unable to run in xpk inspector, please look through the'
354
+ ' output as it may clue to the failure reason. Return Code:'
355
+ f' {final_return_code}'
356
+ )
357
+ xpk_exit(final_return_code)
xpk/commands/job.py ADDED
@@ -0,0 +1,199 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import re
18
+ import sys
19
+
20
+ from ruamel.yaml import YAML
21
+
22
+ from ..core.commands import run_command_for_value, run_command_with_updates
23
+ from ..core.gcloud_context import add_zone_and_project
24
+ from ..core.kjob import AppProfileDefaults
25
+ from ..utils.console import xpk_exit, xpk_print
26
+ from .common import set_cluster_command
27
+ from .kind import set_local_cluster_command
28
+
29
+
30
+ def job_info(args):
31
+ """Run commands obtaining information about a job given by name.
32
+
33
+ Args:
34
+ args: user provided arguments for running the command.
35
+
36
+ Returns:
37
+ None
38
+ """
39
+ job_name = args.name
40
+
41
+ desc_command = f'kubectl-kjob describe slurm {job_name}'
42
+ desc_code, desc_text = run_command_for_value(
43
+ desc_command, 'Getting job data', args
44
+ )
45
+ if desc_code != 0:
46
+ xpk_print(f'Data info request returned ERROR {desc_code}')
47
+ xpk_exit(desc_code)
48
+
49
+ job_command = (
50
+ 'kubectl-kjob list slurm -o yaml --field-selector'
51
+ f' metadata.name=={job_name}'
52
+ )
53
+ job_code, job_text = run_command_for_value(
54
+ job_command, 'Getting job info', args
55
+ )
56
+ if job_code != 0:
57
+ xpk_print(f'Job info request returned ERROR {job_code}')
58
+ xpk_exit(job_code)
59
+
60
+ pods_command = f'kubectl get pods -l=job-name={job_name} --no-headers'
61
+ pods_code, pods_text = run_command_for_value(
62
+ pods_command, 'Getting pods list', args
63
+ )
64
+ if pods_code != 0:
65
+ xpk_print(f'Pods list request returned ERROR {pods_code}')
66
+ xpk_exit(pods_code)
67
+
68
+ yaml = YAML(typ='safe')
69
+ job_yaml = yaml.load(job_text)['items'][0]
70
+
71
+ output = {
72
+ 'Job name': job_name,
73
+ 'Script name': get_script_name(job_yaml),
74
+ 'Profile': get_profile(job_yaml),
75
+ 'Labels': job_yaml.get('metadata').get('labels', []),
76
+ 'Mounts': get_mounts(job_yaml),
77
+ 'Pods': get_pods(pods_text),
78
+ 'Entrypoint environment variables template': get_kjob_env_vars(desc_text),
79
+ }
80
+
81
+ yaml.default_flow_style = False
82
+ yaml.sort_base_mapping_type_on_output = False
83
+ yaml.dump(output, sys.stdout)
84
+
85
+
86
+ def get_profile(job_yaml: dict) -> str:
87
+ containers = (
88
+ job_yaml.get('spec', {})
89
+ .get('template', {})
90
+ .get('spec', {})
91
+ .get('containers', [])
92
+ )
93
+ env_vars = next(iter(containers), {}).get('env', [])
94
+ profile = next((x['value'] for x in env_vars if x['name'] == 'PROFILE'), '')
95
+ return profile
96
+
97
+
98
+ def get_mounts(job_yaml: dict) -> list[dict]:
99
+ containers = (
100
+ job_yaml.get('spec', {})
101
+ .get('template', {})
102
+ .get('spec', {})
103
+ .get('containers', [])
104
+ )
105
+ mounts = next(iter(containers), {}).get('volumeMounts', [])
106
+ return mounts
107
+
108
+
109
+ def get_kjob_env_vars(job_desc_text: str) -> list[tuple[str, str]]:
110
+ regex = r'(SLURM_[A-Z_]*=.*)'
111
+ search_res = re.findall(regex, job_desc_text)
112
+ return search_res
113
+
114
+
115
+ def get_pods(pods_text: str) -> list[str]:
116
+ pods_lines = pods_text.strip().split('\n')
117
+ pods_lines = [line.split() for line in pods_lines]
118
+ return [
119
+ {
120
+ 'Name': line[0],
121
+ 'Status': line[2],
122
+ }
123
+ for line in pods_lines
124
+ ]
125
+
126
+
127
+ def get_script_name(job_yaml: dict) -> str | None:
128
+ return (
129
+ job_yaml.get('metadata', {})
130
+ .get('annotations', {})
131
+ .get('kjobctl.x-k8s.io/script', '')
132
+ )
133
+
134
+
135
+ def job_list(args) -> None:
136
+ """Function around job list.
137
+
138
+ Args:
139
+ args: user provided arguments for running the command.
140
+
141
+ Returns:
142
+ None
143
+ """
144
+ if not args.kind_cluster:
145
+ add_zone_and_project(args)
146
+ set_cluster_command_code = set_cluster_command(args)
147
+ msg = f'Listing jobs for project {args.project} and zone {args.zone}:'
148
+ else:
149
+ set_cluster_command_code = set_local_cluster_command(args)
150
+ msg = 'Listing jobs:'
151
+
152
+ if set_cluster_command_code != 0:
153
+ xpk_exit(set_cluster_command_code)
154
+ xpk_print(msg, flush=True)
155
+
156
+ return_code = run_slurm_job_list_command(args)
157
+ xpk_exit(return_code)
158
+
159
+
160
+ def run_slurm_job_list_command(args) -> int:
161
+ cmd = f'kubectl-kjob list slurm --profile {AppProfileDefaults.NAME.value}'
162
+
163
+ return_code = run_command_with_updates(cmd, 'list jobs', args)
164
+ if return_code != 0:
165
+ xpk_print(f'Listing jobs returned ERROR {return_code}')
166
+ return return_code
167
+
168
+
169
+ def job_cancel(args) -> None:
170
+ """Function around job cancel.
171
+
172
+ Args:
173
+ args: user provided arguments for running the command.
174
+
175
+ Returns:
176
+ None
177
+ """
178
+ xpk_print(f'Starting job cancel for job: {args.name}', flush=True)
179
+ if not args.kind_cluster:
180
+ add_zone_and_project(args)
181
+ set_cluster_command_code = set_cluster_command(args)
182
+ else:
183
+ set_cluster_command_code = set_local_cluster_command(args)
184
+
185
+ if set_cluster_command_code != 0:
186
+ xpk_exit(set_cluster_command_code)
187
+
188
+ return_code = run_slurm_job_delete_command(args)
189
+ xpk_exit(return_code)
190
+
191
+
192
+ def run_slurm_job_delete_command(args) -> int:
193
+ list_of_jobs = ' '.join(args.name)
194
+ cmd = f'kubectl-kjob delete slurm {list_of_jobs}'
195
+
196
+ return_code = run_command_with_updates(cmd, 'delete job', args)
197
+ if return_code != 0:
198
+ xpk_print(f'Delete job request returned ERROR {return_code}')
199
+ return return_code