xpk 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/commands/cluster.py +0 -22
- xpk/commands/cluster_gcluster.py +1 -13
- xpk/commands/cluster_gcluster_test.py +0 -10
- xpk/commands/cluster_test.py +0 -4
- xpk/commands/kind.py +0 -21
- xpk/commands/storage.py +0 -25
- xpk/core/cluster.py +1 -3
- xpk/core/config.py +0 -15
- xpk/core/system_characteristics.py +1 -16
- xpk/core/workload_decorators/rdma_decorator.py +0 -15
- xpk/core/workload_decorators/tcpx_decorator.py +0 -8
- xpk/core/workload_decorators/tcpx_decorator_test.py +0 -78
- xpk/core/workload_decorators/tcpxo_decorator.py +0 -16
- xpk/parser/common.py +0 -151
- xpk/parser/core.py +0 -31
- xpk/utils/validation.py +0 -8
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/METADATA +1 -1
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/RECORD +22 -33
- xpk/commands/batch.py +0 -144
- xpk/commands/job.py +0 -244
- xpk/commands/kjob_common.py +0 -60
- xpk/commands/run.py +0 -140
- xpk/commands/shell.py +0 -142
- xpk/core/kjob.py +0 -473
- xpk/parser/batch.py +0 -43
- xpk/parser/job.py +0 -147
- xpk/parser/run.py +0 -47
- xpk/parser/shell.py +0 -59
- xpk/templates/volume_bundle.yaml +0 -7
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/WHEEL +0 -0
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/entry_points.txt +0 -0
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/licenses/LICENSE +0 -0
- {xpk-0.17.1.dist-info → xpk-0.17.3.dist-info}/top_level.txt +0 -0
xpk/commands/run.py
DELETED
|
@@ -1,140 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Copyright 2025 Google LLC
|
|
3
|
-
|
|
4
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
-
you may not use this file except in compliance with the License.
|
|
6
|
-
You may obtain a copy of the License at
|
|
7
|
-
|
|
8
|
-
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
-
|
|
10
|
-
Unless required by applicable law or agreed to in writing, software
|
|
11
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
-
See the License for the specific language governing permissions and
|
|
14
|
-
limitations under the License.
|
|
15
|
-
"""
|
|
16
|
-
|
|
17
|
-
from argparse import Namespace
|
|
18
|
-
|
|
19
|
-
from ..core.cluster import (
|
|
20
|
-
setup_k8s_service_accounts,
|
|
21
|
-
get_cluster_credentials,
|
|
22
|
-
)
|
|
23
|
-
from ..core.commands import run_command_with_full_controls
|
|
24
|
-
from ..core.gcloud_context import add_zone_and_project
|
|
25
|
-
from ..core.kjob import (
|
|
26
|
-
AppProfileDefaults,
|
|
27
|
-
JobTemplateDefaults,
|
|
28
|
-
get_storage_annotations,
|
|
29
|
-
prepare_kjob,
|
|
30
|
-
)
|
|
31
|
-
from ..core.kueue_manager import LOCAL_QUEUE_NAME
|
|
32
|
-
from ..utils.console import xpk_exit, xpk_print
|
|
33
|
-
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
34
|
-
from .kind import set_local_cluster_command
|
|
35
|
-
from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def run(args: Namespace) -> None:
|
|
39
|
-
"""Run task.
|
|
40
|
-
This function runs passed script in non-blocking manner.
|
|
41
|
-
Args:
|
|
42
|
-
args: user provided arguments for running the command.
|
|
43
|
-
Returns:
|
|
44
|
-
None
|
|
45
|
-
"""
|
|
46
|
-
if should_validate_dependencies(args):
|
|
47
|
-
validate_dependencies_list([
|
|
48
|
-
SystemDependency.KUBECTL,
|
|
49
|
-
SystemDependency.KJOB,
|
|
50
|
-
SystemDependency.GCLOUD,
|
|
51
|
-
])
|
|
52
|
-
if not args.kind_cluster:
|
|
53
|
-
add_zone_and_project(args)
|
|
54
|
-
get_cluster_credentials(args)
|
|
55
|
-
else:
|
|
56
|
-
set_cluster_command_code = set_local_cluster_command(args)
|
|
57
|
-
if set_cluster_command_code != 0:
|
|
58
|
-
xpk_exit(set_cluster_command_code)
|
|
59
|
-
|
|
60
|
-
err_code = prepare_kjob(args)
|
|
61
|
-
if err_code > 0:
|
|
62
|
-
xpk_exit(err_code)
|
|
63
|
-
setup_k8s_service_accounts()
|
|
64
|
-
|
|
65
|
-
submit_job(args)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def submit_job(args: Namespace) -> None:
|
|
69
|
-
cmd = (
|
|
70
|
-
'kubectl kjob create slurm --profile'
|
|
71
|
-
f' {AppProfileDefaults.NAME.value} '
|
|
72
|
-
f' --localqueue {LOCAL_QUEUE_NAME} '
|
|
73
|
-
f' --stream-container {JobTemplateDefaults.CONTAINER_NAME.value}'
|
|
74
|
-
f' --worker-container {JobTemplateDefaults.CONTAINER_NAME.value}'
|
|
75
|
-
' --wait --rm --first-node-ip'
|
|
76
|
-
)
|
|
77
|
-
cmd = add_gpu_networking_annotations_to_command(args, cmd)
|
|
78
|
-
cmd = add_TAS_annotations_to_command(args, cmd)
|
|
79
|
-
|
|
80
|
-
for annotation in get_storage_annotations(args):
|
|
81
|
-
cmd += f' --pod-template-annotation {annotation}'
|
|
82
|
-
|
|
83
|
-
if args.timeout:
|
|
84
|
-
cmd += f' --wait-timeout {args.timeout}s'
|
|
85
|
-
|
|
86
|
-
if args.ignore_unknown_flags:
|
|
87
|
-
cmd += ' --ignore-unknown-flags'
|
|
88
|
-
|
|
89
|
-
cmd += f' -- {args.script} --partition {LOCAL_QUEUE_NAME}'
|
|
90
|
-
|
|
91
|
-
if args.array is not None:
|
|
92
|
-
cmd += f' --array {args.array}'
|
|
93
|
-
|
|
94
|
-
if args.cpus_per_task is not None:
|
|
95
|
-
cmd += f' --cpus-per-task {args.cpus_per_task}'
|
|
96
|
-
|
|
97
|
-
if args.gpus_per_task is not None:
|
|
98
|
-
cmd += f' --gpus-per-task {args.gpus_per_task}'
|
|
99
|
-
|
|
100
|
-
if args.mem is not None:
|
|
101
|
-
cmd += f' --mem {args.mem}'
|
|
102
|
-
|
|
103
|
-
if args.mem_per_task is not None:
|
|
104
|
-
cmd += f' --mem-per-task {args.mem_per_task}'
|
|
105
|
-
|
|
106
|
-
if args.mem_per_cpu is not None:
|
|
107
|
-
cmd += f' --mem-per-cpu {args.mem_per_cpu}'
|
|
108
|
-
|
|
109
|
-
if args.mem_per_gpu is not None:
|
|
110
|
-
cmd += f' --mem-per-gpu {args.mem_per_gpu}'
|
|
111
|
-
|
|
112
|
-
if args.nodes is not None:
|
|
113
|
-
cmd += f' --nodes {args.nodes}'
|
|
114
|
-
|
|
115
|
-
if args.ntasks is not None:
|
|
116
|
-
cmd += f' --ntasks {args.ntasks}'
|
|
117
|
-
|
|
118
|
-
if args.output is not None:
|
|
119
|
-
cmd += f' --output {args.output}'
|
|
120
|
-
|
|
121
|
-
if args.error is not None:
|
|
122
|
-
cmd += f' --error {args.error}'
|
|
123
|
-
|
|
124
|
-
if args.input is not None:
|
|
125
|
-
cmd += f' --input {args.input}'
|
|
126
|
-
|
|
127
|
-
if args.job_name is not None:
|
|
128
|
-
cmd += f' --job-name {args.job_name}'
|
|
129
|
-
|
|
130
|
-
if args.chdir is not None:
|
|
131
|
-
cmd += f' --chdir {args.chdir}'
|
|
132
|
-
|
|
133
|
-
if args.time is not None:
|
|
134
|
-
cmd += f' --time {args.time}'
|
|
135
|
-
|
|
136
|
-
return_code = run_command_with_full_controls(cmd, 'run task')
|
|
137
|
-
|
|
138
|
-
if return_code != 0:
|
|
139
|
-
xpk_print(f'Running task returned ERROR {return_code}')
|
|
140
|
-
xpk_exit(return_code)
|
xpk/commands/shell.py
DELETED
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Copyright 2024 Google LLC
|
|
3
|
-
Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
-
you may not use this file except in compliance with the License.
|
|
5
|
-
You may obtain a copy of the License at
|
|
6
|
-
https://www.apache.org/licenses/LICENSE-2.0
|
|
7
|
-
Unless required by applicable law or agreed to in writing, software
|
|
8
|
-
distributed under the License is distributed on an "AS IS" BASIS,
|
|
9
|
-
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
10
|
-
See the License for the specific language governing permissions and
|
|
11
|
-
limitations under the License.
|
|
12
|
-
"""
|
|
13
|
-
|
|
14
|
-
from ..core.commands import run_command_with_full_controls, run_command_for_value, run_command_with_updates
|
|
15
|
-
from ..core.cluster import get_cluster_credentials, add_zone_and_project, setup_k8s_service_accounts
|
|
16
|
-
from ..utils.console import xpk_exit, xpk_print
|
|
17
|
-
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
18
|
-
from argparse import Namespace
|
|
19
|
-
|
|
20
|
-
from ..core.kjob import (
|
|
21
|
-
AppProfileDefaults,
|
|
22
|
-
prepare_kjob,
|
|
23
|
-
get_pod_template_interactive_command,
|
|
24
|
-
get_storage_annotations,
|
|
25
|
-
)
|
|
26
|
-
|
|
27
|
-
exit_instructions = 'To exit the shell input "exit".'
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
def shell(args: Namespace):
|
|
31
|
-
"""Enter interactive shell.
|
|
32
|
-
Args:
|
|
33
|
-
args: user provided arguments for running the command.
|
|
34
|
-
Returns:
|
|
35
|
-
0 if successful and 1 otherwise.
|
|
36
|
-
"""
|
|
37
|
-
if should_validate_dependencies(args):
|
|
38
|
-
validate_dependencies_list([
|
|
39
|
-
SystemDependency.KUBECTL,
|
|
40
|
-
SystemDependency.KJOB,
|
|
41
|
-
SystemDependency.GCLOUD,
|
|
42
|
-
])
|
|
43
|
-
exisitng_shell_pod_name = get_existing_shell_pod_name(args)
|
|
44
|
-
|
|
45
|
-
if exisitng_shell_pod_name is None:
|
|
46
|
-
return_code = connect_to_new_interactive_shell(args)
|
|
47
|
-
else:
|
|
48
|
-
return_code = connect_to_existing_interactive_shell(exisitng_shell_pod_name)
|
|
49
|
-
|
|
50
|
-
if return_code != 0:
|
|
51
|
-
xpk_print(f'The command failed with code {return_code}.')
|
|
52
|
-
xpk_exit(return_code)
|
|
53
|
-
|
|
54
|
-
xpk_exit(0)
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
def get_existing_shell_pod_name(args: Namespace) -> str | None:
|
|
58
|
-
if not args.kind_cluster:
|
|
59
|
-
add_zone_and_project(args)
|
|
60
|
-
get_cluster_credentials(args)
|
|
61
|
-
|
|
62
|
-
return_code, shell_name = run_command_for_value(
|
|
63
|
-
command=(
|
|
64
|
-
'kubectl get pods --no-headers --field-selector status.phase=Running'
|
|
65
|
-
' -o custom-columns=":metadata.name"'
|
|
66
|
-
),
|
|
67
|
-
task='Get existing interactive shell pod name.',
|
|
68
|
-
)
|
|
69
|
-
if return_code != 0:
|
|
70
|
-
xpk_print(
|
|
71
|
-
f'Encounter an error with a code {return_code} when checking for'
|
|
72
|
-
' existing running shell.'
|
|
73
|
-
)
|
|
74
|
-
xpk_exit(return_code)
|
|
75
|
-
|
|
76
|
-
pod_names = shell_name.strip().split('\n')
|
|
77
|
-
kjob_pod_names = [
|
|
78
|
-
name for name in pod_names if AppProfileDefaults.NAME.value in name
|
|
79
|
-
]
|
|
80
|
-
shell_pod_names = [name for name in kjob_pod_names if 'interactive' in name]
|
|
81
|
-
|
|
82
|
-
return shell_pod_names[0] if shell_pod_names else None
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
def connect_to_new_interactive_shell(args: Namespace) -> int:
|
|
86
|
-
err_code = prepare_kjob(args)
|
|
87
|
-
if err_code > 0:
|
|
88
|
-
xpk_exit(err_code)
|
|
89
|
-
setup_k8s_service_accounts()
|
|
90
|
-
|
|
91
|
-
cmd = (
|
|
92
|
-
'kubectl-kjob create interactive --profile'
|
|
93
|
-
f' {AppProfileDefaults.NAME.value} --pod-running-timeout 180s'
|
|
94
|
-
)
|
|
95
|
-
|
|
96
|
-
for annotation in get_storage_annotations(args):
|
|
97
|
-
cmd += f' --pod-template-annotation {annotation}'
|
|
98
|
-
|
|
99
|
-
return run_command_with_full_controls(
|
|
100
|
-
command=cmd,
|
|
101
|
-
task='Creating new interactive shell and entering it',
|
|
102
|
-
instructions=exit_instructions,
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def connect_to_existing_interactive_shell(pod_name: str) -> int:
|
|
107
|
-
return run_command_with_full_controls(
|
|
108
|
-
command=(
|
|
109
|
-
f'kubectl exec --stdin --tty {pod_name} --'
|
|
110
|
-
f' {get_pod_template_interactive_command()}'
|
|
111
|
-
),
|
|
112
|
-
task='Entering existing interactive shell',
|
|
113
|
-
instructions=exit_instructions,
|
|
114
|
-
)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def shell_stop(args: Namespace):
|
|
118
|
-
"""Stop the running interactive shell by deleting the pod.
|
|
119
|
-
Args:
|
|
120
|
-
args: user provided arguments for running the command.
|
|
121
|
-
Returns:
|
|
122
|
-
0 if successful and 1 otherwise.
|
|
123
|
-
"""
|
|
124
|
-
if should_validate_dependencies(args):
|
|
125
|
-
validate_dependencies_list(
|
|
126
|
-
[SystemDependency.KUBECTL, SystemDependency.GCLOUD]
|
|
127
|
-
)
|
|
128
|
-
exisitng_shell_pod_name = get_existing_shell_pod_name(args)
|
|
129
|
-
|
|
130
|
-
if exisitng_shell_pod_name is None:
|
|
131
|
-
xpk_print('There is no shell running to stop')
|
|
132
|
-
xpk_exit(0)
|
|
133
|
-
|
|
134
|
-
return_code = run_command_with_updates(
|
|
135
|
-
command=f'kubectl delete pod {exisitng_shell_pod_name}',
|
|
136
|
-
task='Deleting the existing shell.',
|
|
137
|
-
)
|
|
138
|
-
if return_code != 0:
|
|
139
|
-
xpk_exit(return_code)
|
|
140
|
-
|
|
141
|
-
xpk_print('The shell was deleted successfully.')
|
|
142
|
-
xpk_exit(0)
|