xpk 0.12.0__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/commands/batch.py +17 -10
- xpk/commands/cluster.py +137 -123
- xpk/commands/cluster_gcluster.py +77 -14
- xpk/commands/cluster_gcluster_test.py +177 -0
- xpk/commands/common.py +13 -27
- xpk/commands/info.py +11 -9
- xpk/commands/inspector.py +22 -11
- xpk/commands/job.py +53 -9
- xpk/commands/kind.py +38 -40
- xpk/commands/kjob_common.py +4 -4
- xpk/commands/run.py +9 -2
- xpk/commands/shell.py +13 -10
- xpk/commands/storage.py +26 -2
- xpk/commands/version.py +0 -4
- xpk/commands/workload.py +58 -30
- xpk/core/blueprint/blueprint_generator.py +4 -40
- xpk/core/blueprint/blueprint_test.py +0 -6
- xpk/core/capacity.py +6 -5
- xpk/core/cluster.py +96 -195
- xpk/core/cluster_private.py +9 -12
- xpk/core/commands.py +21 -25
- xpk/core/config.py +1 -1
- xpk/core/docker_image.py +17 -9
- xpk/core/docker_resources.py +9 -4
- xpk/core/gcloud_context.py +26 -2
- xpk/core/gcloud_context_test.py +96 -0
- xpk/core/gcluster_manager.py +0 -3
- xpk/core/jobset.py +5 -8
- xpk/core/kjob.py +19 -29
- xpk/core/kueue_manager.py +383 -0
- xpk/core/kueue_manager_test.py +542 -0
- xpk/core/monitoring.py +1 -1
- xpk/core/nap.py +11 -16
- xpk/core/network.py +18 -19
- xpk/core/nodepool.py +65 -71
- xpk/core/nodepool_test.py +198 -1
- xpk/core/pathways.py +9 -5
- xpk/core/ray.py +11 -15
- xpk/core/resources.py +15 -10
- xpk/core/scheduling.py +23 -1
- xpk/core/scheduling_test.py +31 -0
- xpk/core/system_characteristics.py +335 -229
- xpk/core/vertex.py +1 -1
- xpk/core/workload.py +7 -8
- xpk/main.py +3 -2
- xpk/parser/cluster.py +50 -0
- xpk/parser/cluster_test.py +66 -0
- xpk/parser/common.py +11 -0
- xpk/parser/workload.py +62 -25
- xpk/parser/workload_test.py +82 -0
- xpk/utils/execution_context.py +28 -0
- xpk/utils/feature_flags.py +28 -0
- xpk/utils/file.py +25 -10
- xpk/utils/kueue.py +20 -0
- xpk/utils/network.py +4 -0
- xpk/utils/templates.py +2 -0
- xpk/utils/topology.py +37 -0
- xpk/utils/topology_test.py +43 -0
- xpk/utils/validation.py +79 -55
- xpk/utils/validation_test.py +37 -0
- {xpk-0.12.0.dist-info → xpk-0.14.0.dist-info}/METADATA +6 -1
- xpk-0.14.0.dist-info/RECORD +112 -0
- xpk/core/kueue.py +0 -545
- xpk-0.12.0.dist-info/RECORD +0 -100
- {xpk-0.12.0.dist-info → xpk-0.14.0.dist-info}/WHEEL +0 -0
- {xpk-0.12.0.dist-info → xpk-0.14.0.dist-info}/entry_points.txt +0 -0
- {xpk-0.12.0.dist-info → xpk-0.14.0.dist-info}/licenses/LICENSE +0 -0
- {xpk-0.12.0.dist-info → xpk-0.14.0.dist-info}/top_level.txt +0 -0
xpk/commands/batch.py
CHANGED
|
@@ -29,8 +29,10 @@ from ..core.kjob import (
|
|
|
29
29
|
get_storage_annotations,
|
|
30
30
|
prepare_kjob,
|
|
31
31
|
)
|
|
32
|
-
from ..core.
|
|
32
|
+
from ..core.kueue_manager import LOCAL_QUEUE_NAME
|
|
33
33
|
from ..utils.console import xpk_exit, xpk_print
|
|
34
|
+
from ..utils.execution_context import is_dry_run
|
|
35
|
+
from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
|
|
34
36
|
from .kind import set_local_cluster_command
|
|
35
37
|
from .kjob_common import add_gpu_networking_annotations_to_command, add_TAS_annotations_to_command
|
|
36
38
|
|
|
@@ -43,6 +45,12 @@ def batch(args: Namespace) -> None:
|
|
|
43
45
|
Returns:
|
|
44
46
|
None
|
|
45
47
|
"""
|
|
48
|
+
if should_validate_dependencies(args):
|
|
49
|
+
validate_dependencies_list([
|
|
50
|
+
SystemDependency.KUBECTL,
|
|
51
|
+
SystemDependency.KJOB,
|
|
52
|
+
SystemDependency.GCLOUD,
|
|
53
|
+
])
|
|
46
54
|
if not args.kind_cluster:
|
|
47
55
|
add_zone_and_project(args)
|
|
48
56
|
get_cluster_credentials(args)
|
|
@@ -51,18 +59,16 @@ def batch(args: Namespace) -> None:
|
|
|
51
59
|
if set_cluster_command_code != 0:
|
|
52
60
|
xpk_exit(set_cluster_command_code)
|
|
53
61
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
62
|
+
if not is_dry_run():
|
|
63
|
+
err_code = prepare_kjob(args)
|
|
64
|
+
if err_code > 0:
|
|
65
|
+
xpk_exit(err_code)
|
|
66
|
+
setup_k8s_service_accounts()
|
|
58
67
|
|
|
59
68
|
submit_job(args)
|
|
60
69
|
|
|
61
70
|
|
|
62
71
|
def submit_job(args: Namespace) -> None:
|
|
63
|
-
|
|
64
|
-
setup_k8s_service_accounts()
|
|
65
|
-
|
|
66
72
|
cmd = (
|
|
67
73
|
'kubectl kjob create slurm'
|
|
68
74
|
f' --profile {AppProfileDefaults.NAME.value}'
|
|
@@ -73,7 +79,8 @@ def submit_job(args: Namespace) -> None:
|
|
|
73
79
|
cmd = add_gpu_networking_annotations_to_command(args, cmd)
|
|
74
80
|
cmd = add_TAS_annotations_to_command(args, cmd)
|
|
75
81
|
|
|
76
|
-
|
|
82
|
+
annotations = [] if is_dry_run() else get_storage_annotations(args)
|
|
83
|
+
for annotation in annotations:
|
|
77
84
|
cmd += f' --pod-template-annotation {annotation}'
|
|
78
85
|
|
|
79
86
|
if args.ignore_unknown_flags:
|
|
@@ -126,7 +133,7 @@ def submit_job(args: Namespace) -> None:
|
|
|
126
133
|
if args.time is not None:
|
|
127
134
|
cmd += f' --time {args.time}'
|
|
128
135
|
|
|
129
|
-
return_code, return_value = run_command_for_value(cmd, 'submit job'
|
|
136
|
+
return_code, return_value = run_command_for_value(cmd, 'submit job')
|
|
130
137
|
|
|
131
138
|
if return_code != 0:
|
|
132
139
|
xpk_print(f'Running batch job returned ERROR {return_code}')
|