xpk 1.1.1__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/commands/cluster.py +15 -6
- xpk/commands/cluster_test.py +16 -1
- xpk/core/cluster.py +31 -1
- xpk/core/cluster_test.py +61 -4
- xpk/core/docker_container.py +3 -1
- xpk/core/docker_resources.py +5 -5
- xpk/core/kubectl_common.py +77 -0
- xpk/core/kubectl_common_test.py +174 -0
- xpk/core/kueue_manager.py +26 -26
- xpk/core/kueue_manager_test.py +52 -12
- xpk/core/nodepool.py +34 -0
- xpk/core/nodepool_test.py +104 -0
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/METADATA +1 -1
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/RECORD +18 -16
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/WHEEL +1 -1
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/entry_points.txt +0 -0
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {xpk-1.1.1.dist-info → xpk-1.2.0.dist-info}/top_level.txt +0 -0
xpk/commands/cluster.py
CHANGED
|
@@ -369,7 +369,7 @@ def cluster_create(args) -> None:
|
|
|
369
369
|
|
|
370
370
|
get_cluster_credentials(args)
|
|
371
371
|
|
|
372
|
-
update_coredns_command_code = update_coredns_if_necessary()
|
|
372
|
+
update_coredns_command_code = update_coredns_if_necessary(args)
|
|
373
373
|
if update_coredns_command_code != 0:
|
|
374
374
|
xpk_exit(update_coredns_command_code)
|
|
375
375
|
|
|
@@ -927,7 +927,7 @@ def scale_down_deployment(deployment_name: str, namespace: str = 'kube-system'):
|
|
|
927
927
|
xpk_print(f'{deployment_name} has been scaled down.')
|
|
928
928
|
|
|
929
929
|
|
|
930
|
-
def scale_up_coredns(replicas: int
|
|
930
|
+
def scale_up_coredns(replicas: int, namespace: str = 'kube-system'):
|
|
931
931
|
"""Scales up the CoreDNS deployment to a specified number of replicas."""
|
|
932
932
|
command_coredns_scale = (
|
|
933
933
|
f'kubectl scale deployment coredns --replicas={replicas} -n {namespace}'
|
|
@@ -1008,7 +1008,14 @@ def cleanup_coredns_repo(coredns_repo_full_path: str):
|
|
|
1008
1008
|
xpk_print(f'Error deleting directory {coredns_repo_full_path}: {e}')
|
|
1009
1009
|
|
|
1010
1010
|
|
|
1011
|
-
def
|
|
1011
|
+
def _get_coredns_replica_count(args) -> int:
|
|
1012
|
+
# XPK large scale guide recommends 15 coreDNS replicas for clusters with 5000 VMs.
|
|
1013
|
+
# Otherwise, limit the replica count to the desired number of default pool nodes.
|
|
1014
|
+
default_pool_node_count: int = args.default_pool_cpu_num_nodes
|
|
1015
|
+
return min(15, default_pool_node_count)
|
|
1016
|
+
|
|
1017
|
+
|
|
1018
|
+
def update_coredns(args) -> int:
|
|
1012
1019
|
"""Updates and deploys CoreDNS within a cluster.
|
|
1013
1020
|
|
|
1014
1021
|
Returns:
|
|
@@ -1018,6 +1025,8 @@ def update_coredns() -> int:
|
|
|
1018
1025
|
coredns_repo_dir_name = 'deployment'
|
|
1019
1026
|
coredns_repo_full_path = os.path.join(coredns_repo_dir, coredns_repo_dir_name)
|
|
1020
1027
|
coredns_k8s_path = os.path.join(coredns_repo_full_path, 'kubernetes')
|
|
1028
|
+
coredns_replica_count = _get_coredns_replica_count(args)
|
|
1029
|
+
|
|
1021
1030
|
# 1. Install jq
|
|
1022
1031
|
install_jq()
|
|
1023
1032
|
|
|
@@ -1034,7 +1043,7 @@ def update_coredns() -> int:
|
|
|
1034
1043
|
scale_down_deployment('kube-dns')
|
|
1035
1044
|
|
|
1036
1045
|
# 6. Scale up coredns and verify readiness
|
|
1037
|
-
scale_up_coredns(
|
|
1046
|
+
scale_up_coredns(coredns_replica_count)
|
|
1038
1047
|
verify_coredns_readiness()
|
|
1039
1048
|
|
|
1040
1049
|
xpk_print('The CoreDNS setup process has been completed.')
|
|
@@ -1074,7 +1083,7 @@ def coredns_deployment_exists(namespace: str = 'kube-system') -> bool:
|
|
|
1074
1083
|
return False
|
|
1075
1084
|
|
|
1076
1085
|
|
|
1077
|
-
def update_coredns_if_necessary() -> int:
|
|
1086
|
+
def update_coredns_if_necessary(args) -> int:
|
|
1078
1087
|
"""Updates and deploys CoreDNS within the cluster if it's not already present.
|
|
1079
1088
|
|
|
1080
1089
|
This function checks for the existence of the CoreDNS deployment.
|
|
@@ -1089,7 +1098,7 @@ def update_coredns_if_necessary() -> int:
|
|
|
1089
1098
|
return 0
|
|
1090
1099
|
else:
|
|
1091
1100
|
xpk_print('CoreDNS deployment not found. Proceeding with CoreDNS setup.')
|
|
1092
|
-
return update_coredns()
|
|
1101
|
+
return update_coredns(args)
|
|
1093
1102
|
|
|
1094
1103
|
|
|
1095
1104
|
def create_cluster_if_necessary(
|
xpk/commands/cluster_test.py
CHANGED
|
@@ -22,7 +22,7 @@ from unittest.mock import MagicMock, patch
|
|
|
22
22
|
import pytest
|
|
23
23
|
|
|
24
24
|
from xpk.core.telemetry import MetricsCollector
|
|
25
|
-
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command, cluster_create, _log_cluster_create_telemetry
|
|
25
|
+
from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, _get_coredns_replica_count, run_gke_cluster_create_command, cluster_create, _log_cluster_create_telemetry
|
|
26
26
|
from xpk.core.capacity import CapacityType
|
|
27
27
|
from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
|
|
28
28
|
from xpk.core.testing.commands_tester import CommandsTester
|
|
@@ -787,3 +787,18 @@ def test_validate_cluster_create_args_sets_correct_num_slices(
|
|
|
787
787
|
_validate_cluster_create_args(args, SUPER_SLICING_SYSTEM)
|
|
788
788
|
|
|
789
789
|
assert args.num_slices == expected
|
|
790
|
+
|
|
791
|
+
|
|
792
|
+
def test_get_coredns_replica_count_lower_limit_is_number_of_nodes():
|
|
793
|
+
args = construct_args(
|
|
794
|
+
default_pool_cpu_num_nodes=7,
|
|
795
|
+
)
|
|
796
|
+
|
|
797
|
+
assert _get_coredns_replica_count(args) == 7
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def test_get_coredns_replica_count_upper_limit_is_15():
|
|
801
|
+
args = construct_args(
|
|
802
|
+
default_pool_cpu_num_nodes=20,
|
|
803
|
+
)
|
|
804
|
+
assert _get_coredns_replica_count(args) == 15
|
xpk/core/cluster.py
CHANGED
|
@@ -21,6 +21,8 @@ from kubernetes import client as k8s_client
|
|
|
21
21
|
from kubernetes import config
|
|
22
22
|
from kubernetes.client.exceptions import ApiException
|
|
23
23
|
|
|
24
|
+
from .kubectl_common import PatchResources, patch_controller_manager_resources
|
|
25
|
+
from ..utils.feature_flags import FeatureFlags
|
|
24
26
|
from ..utils.console import xpk_exit, xpk_print
|
|
25
27
|
from .capacity import H200_DEVICE_TYPE
|
|
26
28
|
from .commands import (
|
|
@@ -33,6 +35,7 @@ from .gcloud_context import (
|
|
|
33
35
|
get_cluster_location,
|
|
34
36
|
zone_to_region,
|
|
35
37
|
)
|
|
38
|
+
from .nodepool import recreate_nodes_in_existing_node_pools
|
|
36
39
|
from .resources import get_cluster_system_characteristics
|
|
37
40
|
from .system_characteristics import INSTALLER_NCCL_TCPXO, SystemCharacteristics
|
|
38
41
|
|
|
@@ -72,7 +75,21 @@ def set_jobset_on_cluster(args) -> int:
|
|
|
72
75
|
' https://github.com/google/xpk/blob/main/README.md#troubleshooting for'
|
|
73
76
|
' instructions on how to fix these permissions.'
|
|
74
77
|
)
|
|
75
|
-
|
|
78
|
+
return return_code
|
|
79
|
+
|
|
80
|
+
if FeatureFlags.SUPER_SLICING_ENABLED and args.super_slicing:
|
|
81
|
+
return patch_controller_manager_resources(
|
|
82
|
+
name='jobset-controller-manager',
|
|
83
|
+
namespace='jobset-system',
|
|
84
|
+
patch_resources=PatchResources(
|
|
85
|
+
cpu_request=4,
|
|
86
|
+
cpu_limit=4,
|
|
87
|
+
memory_request='16Gi',
|
|
88
|
+
memory_limit='16Gi',
|
|
89
|
+
),
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
return 0
|
|
76
93
|
|
|
77
94
|
|
|
78
95
|
def set_pathways_job_on_cluster(args) -> int:
|
|
@@ -605,6 +622,19 @@ def update_gke_cluster_with_lustre_driver_enabled(args) -> int:
|
|
|
605
622
|
if return_code != 0:
|
|
606
623
|
xpk_print(f'GKE Cluster Update request returned ERROR {return_code}')
|
|
607
624
|
return 1
|
|
625
|
+
|
|
626
|
+
xpk_print(
|
|
627
|
+
'Recreating existing nodes (if any) to complete the Lustre CSI driver'
|
|
628
|
+
' installation.'
|
|
629
|
+
)
|
|
630
|
+
return_code = recreate_nodes_in_existing_node_pools(args)
|
|
631
|
+
if return_code != 0:
|
|
632
|
+
xpk_print(
|
|
633
|
+
f'Node recreation failed with ERROR {return_code}. You must recreate'
|
|
634
|
+
' the nodes manually in order to access Lustre storage from your'
|
|
635
|
+
' workloads.'
|
|
636
|
+
)
|
|
637
|
+
return 1
|
|
608
638
|
return 0
|
|
609
639
|
|
|
610
640
|
|
xpk/core/cluster_test.py
CHANGED
|
@@ -14,10 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
+
from unittest.mock import MagicMock
|
|
17
18
|
import pytest
|
|
18
19
|
from .testing.commands_tester import CommandsTester
|
|
19
|
-
from .cluster import get_cluster_credentials, update_gke_cluster_with_lustre_driver_enabled, update_cluster_with_lustre_driver_if_necessary
|
|
20
|
+
from .cluster import get_cluster_credentials, update_gke_cluster_with_lustre_driver_enabled, update_cluster_with_lustre_driver_if_necessary, set_jobset_on_cluster
|
|
20
21
|
from pytest_mock import MockerFixture
|
|
22
|
+
from ..utils.feature_flags import FeatureFlags
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
@pytest.fixture(autouse=True)
|
|
@@ -26,6 +28,9 @@ def commands_tester(mocker: MockerFixture) -> CommandsTester:
|
|
|
26
28
|
mocker=mocker,
|
|
27
29
|
run_command_for_value_path="xpk.core.cluster.run_command_for_value",
|
|
28
30
|
run_command_with_updates_path="xpk.core.cluster.run_command_with_updates",
|
|
31
|
+
run_command_with_updates_retry_path=(
|
|
32
|
+
"xpk.core.cluster.run_command_with_updates_retry"
|
|
33
|
+
),
|
|
29
34
|
)
|
|
30
35
|
|
|
31
36
|
|
|
@@ -38,7 +43,17 @@ def mock_location(mocker: MockerFixture):
|
|
|
38
43
|
|
|
39
44
|
@pytest.fixture(autouse=True)
|
|
40
45
|
def command_args(mocker: MockerFixture):
|
|
41
|
-
return mocker.Mock(
|
|
46
|
+
return mocker.Mock(
|
|
47
|
+
cluster="cluster", project="project", zone="zone", super_slicing=False
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@pytest.fixture(autouse=True)
|
|
52
|
+
def mock_patch_controller_manager_resources(mocker: MockerFixture) -> MagicMock:
|
|
53
|
+
return mocker.patch(
|
|
54
|
+
"xpk.core.cluster.patch_controller_manager_resources",
|
|
55
|
+
return_value=0,
|
|
56
|
+
)
|
|
42
57
|
|
|
43
58
|
|
|
44
59
|
def test_get_cluster_credentials_returns_1_when_retrieval_commands_fail(
|
|
@@ -166,11 +181,14 @@ def test_update_cluster_with_lustre_driver_if_necessary_with_legacy_port_runs_co
|
|
|
166
181
|
|
|
167
182
|
|
|
168
183
|
def test_update_gke_cluster_with_lustre_driver_enabled_default_port(
|
|
169
|
-
commands_tester: CommandsTester, command_args
|
|
184
|
+
commands_tester: CommandsTester, command_args, mocker: MockerFixture
|
|
170
185
|
):
|
|
171
186
|
commands_tester.set_result_for_command(
|
|
172
187
|
(0, ""), "gcloud container clusters update"
|
|
173
188
|
)
|
|
189
|
+
mocker.patch(
|
|
190
|
+
"xpk.core.cluster.recreate_nodes_in_existing_node_pools", return_value=0
|
|
191
|
+
)
|
|
174
192
|
command_args.enable_legacy_lustre_port = None
|
|
175
193
|
update_gke_cluster_with_lustre_driver_enabled(command_args)
|
|
176
194
|
|
|
@@ -181,12 +199,30 @@ def test_update_gke_cluster_with_lustre_driver_enabled_default_port(
|
|
|
181
199
|
]
|
|
182
200
|
|
|
183
201
|
|
|
202
|
+
def test_update_gke_cluster_with_lustre_driver_enabled_fails_if_node_recreation_failed(
|
|
203
|
+
commands_tester: CommandsTester, command_args, mocker: MockerFixture
|
|
204
|
+
):
|
|
205
|
+
commands_tester.set_result_for_command(
|
|
206
|
+
(0, ""), "gcloud container clusters update"
|
|
207
|
+
)
|
|
208
|
+
mocker.patch(
|
|
209
|
+
"xpk.core.cluster.recreate_nodes_in_existing_node_pools", return_value=123
|
|
210
|
+
)
|
|
211
|
+
command_args.enable_legacy_lustre_port = None
|
|
212
|
+
return_code = update_gke_cluster_with_lustre_driver_enabled(command_args)
|
|
213
|
+
|
|
214
|
+
assert return_code != 0
|
|
215
|
+
|
|
216
|
+
|
|
184
217
|
def test_update_gke_cluster_with_lustre_driver_enabled_legacy_port(
|
|
185
|
-
commands_tester: CommandsTester, command_args
|
|
218
|
+
commands_tester: CommandsTester, command_args, mocker: MockerFixture
|
|
186
219
|
):
|
|
187
220
|
commands_tester.set_result_for_command(
|
|
188
221
|
(0, ""), "gcloud container clusters update"
|
|
189
222
|
)
|
|
223
|
+
mocker.patch(
|
|
224
|
+
"xpk.core.cluster.recreate_nodes_in_existing_node_pools", return_value=0
|
|
225
|
+
)
|
|
190
226
|
command_args.enable_legacy_lustre_port = True
|
|
191
227
|
update_gke_cluster_with_lustre_driver_enabled(command_args)
|
|
192
228
|
|
|
@@ -195,3 +231,24 @@ def test_update_gke_cluster_with_lustre_driver_enabled_legacy_port(
|
|
|
195
231
|
"gcloud container clusters update cluster --project=project"
|
|
196
232
|
" --location=us-central1 --quiet --enable-legacy-lustre-port"
|
|
197
233
|
]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def test_set_jobset_on_cluster_not_setting_resources_by_default(
|
|
237
|
+
mock_patch_controller_manager_resources: MagicMock, command_args
|
|
238
|
+
):
|
|
239
|
+
result = set_jobset_on_cluster(command_args)
|
|
240
|
+
|
|
241
|
+
assert result == 0
|
|
242
|
+
mock_patch_controller_manager_resources.assert_not_called()
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def test_set_jobset_on_cluster_super_slicing_resources(
|
|
246
|
+
mock_patch_controller_manager_resources: MagicMock, command_args
|
|
247
|
+
):
|
|
248
|
+
FeatureFlags.SUPER_SLICING_ENABLED = True
|
|
249
|
+
command_args.super_slicing = True
|
|
250
|
+
|
|
251
|
+
result = set_jobset_on_cluster(command_args)
|
|
252
|
+
|
|
253
|
+
assert result == 0
|
|
254
|
+
mock_patch_controller_manager_resources.assert_called()
|
xpk/core/docker_container.py
CHANGED
|
@@ -181,7 +181,9 @@ def get_main_container(
|
|
|
181
181
|
tpu_stacktrace_terminate_command=tpu_stacktrace_terminate_command,
|
|
182
182
|
gpu_workload_terminate_command=gpu_workload_terminate_command,
|
|
183
183
|
xpk_internal_commands=xpk_internal_commands,
|
|
184
|
-
resources=get_main_container_resources(
|
|
184
|
+
resources=get_main_container_resources(
|
|
185
|
+
args, system, resource_type, parallel_containers
|
|
186
|
+
),
|
|
185
187
|
volume_mounts=volume_mounts,
|
|
186
188
|
)
|
|
187
189
|
)
|
xpk/core/docker_resources.py
CHANGED
|
@@ -23,7 +23,10 @@ from ..utils.execution_context import is_dry_run
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
def get_main_container_resources(
|
|
26
|
-
args,
|
|
26
|
+
args,
|
|
27
|
+
system: SystemCharacteristics,
|
|
28
|
+
resource_type: str,
|
|
29
|
+
parallel_containers: int,
|
|
27
30
|
) -> str:
|
|
28
31
|
"""Resources for the main container.
|
|
29
32
|
Args:
|
|
@@ -53,10 +56,7 @@ def get_main_container_resources(
|
|
|
53
56
|
offset_vCPUs = int(system.chips_per_vm) * 0.95
|
|
54
57
|
return f'{resource_type}: {offset_vCPUs}'
|
|
55
58
|
|
|
56
|
-
return (
|
|
57
|
-
f'{resource_type}:'
|
|
58
|
-
f' {int(system.chips_per_vm / system.parallel_containers)}'
|
|
59
|
-
)
|
|
59
|
+
return f'{resource_type}: {int(system.chips_per_vm / parallel_containers)}'
|
|
60
60
|
|
|
61
61
|
|
|
62
62
|
def get_env_container(args, system: SystemCharacteristics) -> str:
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2026 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from collections import defaultdict
|
|
18
|
+
from dataclasses import dataclass
|
|
19
|
+
import json
|
|
20
|
+
from typing import Any
|
|
21
|
+
|
|
22
|
+
from .commands import run_command_with_updates_retry
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass(frozen=True)
|
|
26
|
+
class PatchResources:
|
|
27
|
+
cpu_request: int | None = None
|
|
28
|
+
cpu_limit: int | None = None
|
|
29
|
+
memory_request: str | None = None
|
|
30
|
+
memory_limit: str | None = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_EMPTY_PATCH_RESOURCES = PatchResources()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def patch_controller_manager_resources(
|
|
37
|
+
name: str,
|
|
38
|
+
namespace: str,
|
|
39
|
+
replicas: int | None = None,
|
|
40
|
+
patch_resources: PatchResources | None = None,
|
|
41
|
+
) -> int:
|
|
42
|
+
if replicas is None and patch_resources is None:
|
|
43
|
+
return 0
|
|
44
|
+
|
|
45
|
+
patch: dict[str, Any] = {"spec": {}}
|
|
46
|
+
|
|
47
|
+
if replicas is not None:
|
|
48
|
+
patch["spec"]["replicas"] = replicas
|
|
49
|
+
|
|
50
|
+
if patch_resources and patch_resources != _EMPTY_PATCH_RESOURCES:
|
|
51
|
+
resources: dict[str, dict[str, str]] = defaultdict(dict)
|
|
52
|
+
if patch_resources.cpu_request is not None:
|
|
53
|
+
resources["requests"]["cpu"] = str(patch_resources.cpu_request)
|
|
54
|
+
if patch_resources.cpu_limit is not None:
|
|
55
|
+
resources["limits"]["cpu"] = str(patch_resources.cpu_limit)
|
|
56
|
+
if patch_resources.memory_request is not None:
|
|
57
|
+
resources["requests"]["memory"] = patch_resources.memory_request
|
|
58
|
+
if patch_resources.memory_limit is not None:
|
|
59
|
+
resources["limits"]["memory"] = patch_resources.memory_limit
|
|
60
|
+
patch["spec"]["template"] = {
|
|
61
|
+
"spec": {
|
|
62
|
+
"containers": [{
|
|
63
|
+
"name": "manager",
|
|
64
|
+
"resources": resources,
|
|
65
|
+
}]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
patch_str = json.dumps(patch)
|
|
70
|
+
patch_command = (
|
|
71
|
+
f"kubectl patch deployment {name} -n {namespace}"
|
|
72
|
+
f" --type='strategic' --patch='{patch_str}'"
|
|
73
|
+
)
|
|
74
|
+
return run_command_with_updates_retry(
|
|
75
|
+
patch_command,
|
|
76
|
+
"Updating Controller Manager resources",
|
|
77
|
+
)
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2026 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import json
|
|
18
|
+
import pytest
|
|
19
|
+
from xpk.core.kubectl_common import PatchResources, patch_controller_manager_resources
|
|
20
|
+
from xpk.core.testing.commands_tester import CommandsTester
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
@pytest.fixture
|
|
24
|
+
def commands_tester(mocker):
|
|
25
|
+
return CommandsTester(
|
|
26
|
+
mocker,
|
|
27
|
+
run_command_with_updates_retry_path=(
|
|
28
|
+
"xpk.core.kubectl_common.run_command_with_updates_retry"
|
|
29
|
+
),
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def test_patch_controller_manager_resources_full(
|
|
34
|
+
commands_tester: CommandsTester,
|
|
35
|
+
):
|
|
36
|
+
result = patch_controller_manager_resources(
|
|
37
|
+
name="name",
|
|
38
|
+
namespace="namespace",
|
|
39
|
+
replicas=7,
|
|
40
|
+
patch_resources=PatchResources(
|
|
41
|
+
cpu_request=1,
|
|
42
|
+
cpu_limit=2,
|
|
43
|
+
memory_request="10Gi",
|
|
44
|
+
memory_limit="20Gi",
|
|
45
|
+
),
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
assert result == 0
|
|
49
|
+
commands_tester.assert_command_run(
|
|
50
|
+
"kubectl patch deployment", "name", "-n namespace"
|
|
51
|
+
)
|
|
52
|
+
expected_patch_dict = {
|
|
53
|
+
"spec": {
|
|
54
|
+
"replicas": 7,
|
|
55
|
+
"template": {
|
|
56
|
+
"spec": {
|
|
57
|
+
"containers": [{
|
|
58
|
+
"name": "manager",
|
|
59
|
+
"resources": {
|
|
60
|
+
"requests": {"cpu": "1", "memory": "10Gi"},
|
|
61
|
+
"limits": {"cpu": "2", "memory": "20Gi"},
|
|
62
|
+
},
|
|
63
|
+
}]
|
|
64
|
+
}
|
|
65
|
+
},
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
commands_tester.assert_command_run(
|
|
69
|
+
"kubectl patch", json.dumps(expected_patch_dict)
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_patch_controller_manager_resources_only_replicas(
|
|
74
|
+
commands_tester: CommandsTester,
|
|
75
|
+
):
|
|
76
|
+
result = patch_controller_manager_resources(
|
|
77
|
+
name="name",
|
|
78
|
+
namespace="namespace",
|
|
79
|
+
replicas=7,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
assert result == 0
|
|
83
|
+
expected_patch_dict = {
|
|
84
|
+
"spec": {
|
|
85
|
+
"replicas": 7,
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
commands_tester.assert_command_run(
|
|
89
|
+
"kubectl patch", json.dumps(expected_patch_dict)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def test_patch_controller_manager_resources_only_requests(
|
|
94
|
+
commands_tester: CommandsTester,
|
|
95
|
+
):
|
|
96
|
+
result = patch_controller_manager_resources(
|
|
97
|
+
name="name",
|
|
98
|
+
namespace="namespace",
|
|
99
|
+
patch_resources=PatchResources(
|
|
100
|
+
cpu_request=1,
|
|
101
|
+
memory_request="10Gi",
|
|
102
|
+
),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
assert result == 0
|
|
106
|
+
commands_tester.assert_command_run(
|
|
107
|
+
"kubectl patch deployment", "name", "-n namespace"
|
|
108
|
+
)
|
|
109
|
+
expected_patch_dict = {
|
|
110
|
+
"spec": {
|
|
111
|
+
"template": {
|
|
112
|
+
"spec": {
|
|
113
|
+
"containers": [{
|
|
114
|
+
"name": "manager",
|
|
115
|
+
"resources": {
|
|
116
|
+
"requests": {"cpu": "1", "memory": "10Gi"},
|
|
117
|
+
},
|
|
118
|
+
}]
|
|
119
|
+
}
|
|
120
|
+
},
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
commands_tester.assert_command_run(
|
|
124
|
+
"kubectl patch", json.dumps(expected_patch_dict)
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_patch_controller_manager_resources_only_limits(
|
|
129
|
+
commands_tester: CommandsTester,
|
|
130
|
+
):
|
|
131
|
+
result = patch_controller_manager_resources(
|
|
132
|
+
name="name",
|
|
133
|
+
namespace="namespace",
|
|
134
|
+
patch_resources=PatchResources(
|
|
135
|
+
cpu_limit=2,
|
|
136
|
+
memory_limit="20Gi",
|
|
137
|
+
),
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
assert result == 0
|
|
141
|
+
commands_tester.assert_command_run(
|
|
142
|
+
"kubectl patch deployment", "name", "-n namespace"
|
|
143
|
+
)
|
|
144
|
+
expected_patch_dict = {
|
|
145
|
+
"spec": {
|
|
146
|
+
"template": {
|
|
147
|
+
"spec": {
|
|
148
|
+
"containers": [{
|
|
149
|
+
"name": "manager",
|
|
150
|
+
"resources": {
|
|
151
|
+
"limits": {"cpu": "2", "memory": "20Gi"},
|
|
152
|
+
},
|
|
153
|
+
}]
|
|
154
|
+
}
|
|
155
|
+
},
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
commands_tester.assert_command_run(
|
|
159
|
+
"kubectl patch", json.dumps(expected_patch_dict)
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def test_patch_controller_manager_resources_propagates_error(
|
|
164
|
+
commands_tester: CommandsTester,
|
|
165
|
+
):
|
|
166
|
+
commands_tester.set_result_for_command((123, "kubectl patch"))
|
|
167
|
+
|
|
168
|
+
result = patch_controller_manager_resources(
|
|
169
|
+
name="name",
|
|
170
|
+
namespace="namespace",
|
|
171
|
+
replicas=7,
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
assert result == 123
|
xpk/core/kueue_manager.py
CHANGED
|
@@ -20,6 +20,7 @@ from typing import Optional, List, Dict, Any
|
|
|
20
20
|
import json
|
|
21
21
|
from jinja2 import Environment, FileSystemLoader
|
|
22
22
|
|
|
23
|
+
from .kubectl_common import PatchResources, patch_controller_manager_resources
|
|
23
24
|
from ..utils.topology import get_slice_topology_level, get_topology_product, is_topology_contained
|
|
24
25
|
from ..utils.kueue import is_queued_cluster
|
|
25
26
|
from kubernetes.utils import parse_quantity
|
|
@@ -304,7 +305,9 @@ class KueueManager:
|
|
|
304
305
|
if return_code != 0:
|
|
305
306
|
return return_code
|
|
306
307
|
|
|
307
|
-
return self.__update_kueue_resources_if_necessary(
|
|
308
|
+
return self.__update_kueue_resources_if_necessary(
|
|
309
|
+
configure_super_slicing=kueue_config.configure_super_slicing
|
|
310
|
+
)
|
|
308
311
|
|
|
309
312
|
def __build_template_context(
|
|
310
313
|
self,
|
|
@@ -452,8 +455,23 @@ class KueueManager:
|
|
|
452
455
|
command = f"kubectl apply -f {tmp_file}"
|
|
453
456
|
return run_command_with_updates(command, task)
|
|
454
457
|
|
|
455
|
-
def __update_kueue_resources_if_necessary(
|
|
458
|
+
def __update_kueue_resources_if_necessary(
|
|
459
|
+
self, configure_super_slicing: bool
|
|
460
|
+
) -> int:
|
|
456
461
|
"""Patch memory size limit if necessary."""
|
|
462
|
+
if configure_super_slicing:
|
|
463
|
+
return patch_controller_manager_resources(
|
|
464
|
+
name="kueue-controller-manager",
|
|
465
|
+
namespace="kueue-system",
|
|
466
|
+
replicas=3,
|
|
467
|
+
patch_resources=PatchResources(
|
|
468
|
+
cpu_request=16,
|
|
469
|
+
cpu_limit=16,
|
|
470
|
+
memory_request="64Gi",
|
|
471
|
+
memory_limit="64Gi",
|
|
472
|
+
),
|
|
473
|
+
)
|
|
474
|
+
|
|
457
475
|
# Get total number of nodes
|
|
458
476
|
cmd_total_node_num = "kubectl get node --no-headers | wc -l"
|
|
459
477
|
return_code, out = run_command_for_value(
|
|
@@ -465,31 +483,13 @@ class KueueManager:
|
|
|
465
483
|
new_memory_limit = (
|
|
466
484
|
f"{max(math.ceil(int(out) * MEMORY_SIZE_PER_VM), MIN_MEMORY_LIMIT_SIZE)}Mi"
|
|
467
485
|
)
|
|
468
|
-
|
|
469
|
-
"
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
"resources": {"limits": {"memory": new_memory_limit}},
|
|
475
|
-
}]
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
}
|
|
480
|
-
patch_str = json.dumps(patch)
|
|
481
|
-
patch_command = (
|
|
482
|
-
"kubectl patch deployment kueue-controller-manager -n kueue-system"
|
|
483
|
-
f" --type='strategic' --patch='{patch_str}'"
|
|
484
|
-
)
|
|
485
|
-
task = "Updating Kueue Controller Manager resources"
|
|
486
|
-
return_code = run_command_with_updates_retry(
|
|
487
|
-
patch_command,
|
|
488
|
-
task,
|
|
486
|
+
return patch_controller_manager_resources(
|
|
487
|
+
name="kueue-controller-manager",
|
|
488
|
+
namespace="kueue-system",
|
|
489
|
+
patch_resources=PatchResources(
|
|
490
|
+
memory_limit=new_memory_limit,
|
|
491
|
+
),
|
|
489
492
|
)
|
|
490
|
-
if return_code != 0:
|
|
491
|
-
xpk_print(f"{task} returned ERROR {return_code}")
|
|
492
|
-
return return_code
|
|
493
493
|
|
|
494
494
|
def __autocorrect_resource_limits(
|
|
495
495
|
self, kueue_config: KueueConfig
|
xpk/core/kueue_manager_test.py
CHANGED
|
@@ -21,6 +21,7 @@ from pytest_mock import MockerFixture
|
|
|
21
21
|
import yaml
|
|
22
22
|
from unittest.mock import MagicMock, patch
|
|
23
23
|
|
|
24
|
+
from xpk.core.kubectl_common import PatchResources
|
|
24
25
|
from xpk.core.kueue_manager import KueueConfig, KueueManager, has_sub_slicing_enabled, has_super_slicing_enabled
|
|
25
26
|
from xpk.core.system_characteristics import GpuConfig, DockerPlatform, AcceleratorType, SystemCharacteristics, UserFacingNameToSystemCharacteristics
|
|
26
27
|
from xpk.core.testing.commands_tester import CommandsTester
|
|
@@ -86,6 +87,14 @@ def mock_commands(mocker: MockerFixture) -> CommandsTester:
|
|
|
86
87
|
)
|
|
87
88
|
|
|
88
89
|
|
|
90
|
+
@pytest.fixture(autouse=True)
|
|
91
|
+
def mock_patch_controller_manager_resources(mocker: MockerFixture) -> MagicMock:
|
|
92
|
+
return mocker.patch(
|
|
93
|
+
"xpk.core.kueue_manager.patch_controller_manager_resources",
|
|
94
|
+
return_value=0,
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
|
|
89
98
|
@pytest.fixture(autouse=True)
|
|
90
99
|
@patch("jinja2.Environment", return_value=MagicMock())
|
|
91
100
|
def kueue_manager(mock_env: MagicMock) -> KueueManager:
|
|
@@ -239,7 +248,9 @@ def test_installation_without_tolerations(
|
|
|
239
248
|
|
|
240
249
|
|
|
241
250
|
def test_resource_update_for_small_cluster(
|
|
242
|
-
mock_commands: CommandsTester,
|
|
251
|
+
mock_commands: CommandsTester,
|
|
252
|
+
kueue_manager: KueueManager,
|
|
253
|
+
mock_patch_controller_manager_resources: MagicMock,
|
|
243
254
|
):
|
|
244
255
|
"""Test resource update logic for a small cluster."""
|
|
245
256
|
set_installed_kueue_version(mock_commands, None)
|
|
@@ -248,17 +259,21 @@ def test_resource_update_for_small_cluster(
|
|
|
248
259
|
result = kueue_manager.install_or_upgrade(KUEUE_CONFIG)
|
|
249
260
|
|
|
250
261
|
assert result == 0
|
|
262
|
+
|
|
251
263
|
# 100 * 1.2 = 120, which is less than 4096. So it should be 4096.
|
|
252
|
-
|
|
253
|
-
"
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
264
|
+
mock_patch_controller_manager_resources.assert_called_with(
|
|
265
|
+
name="kueue-controller-manager",
|
|
266
|
+
namespace="kueue-system",
|
|
267
|
+
patch_resources=PatchResources(
|
|
268
|
+
memory_limit="4096Mi",
|
|
269
|
+
),
|
|
257
270
|
)
|
|
258
271
|
|
|
259
272
|
|
|
260
273
|
def test_resource_update_for_large_cluster(
|
|
261
|
-
mock_commands: CommandsTester,
|
|
274
|
+
mock_commands: CommandsTester,
|
|
275
|
+
kueue_manager: KueueManager,
|
|
276
|
+
mock_patch_controller_manager_resources: MagicMock,
|
|
262
277
|
):
|
|
263
278
|
"""Test resource update logic for a large cluster."""
|
|
264
279
|
set_installed_kueue_version(mock_commands, None)
|
|
@@ -268,11 +283,36 @@ def test_resource_update_for_large_cluster(
|
|
|
268
283
|
|
|
269
284
|
assert result == 0
|
|
270
285
|
# 5000 * 1.2 = 6000, which is > 4096.
|
|
271
|
-
|
|
272
|
-
"
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
286
|
+
mock_patch_controller_manager_resources.assert_called_with(
|
|
287
|
+
name="kueue-controller-manager",
|
|
288
|
+
namespace="kueue-system",
|
|
289
|
+
patch_resources=PatchResources(
|
|
290
|
+
memory_limit="6000Mi",
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
|
|
295
|
+
def test_resource_update_for_super_slicing_cluster(
|
|
296
|
+
mock_commands: CommandsTester,
|
|
297
|
+
kueue_manager: KueueManager,
|
|
298
|
+
mock_patch_controller_manager_resources: MagicMock,
|
|
299
|
+
):
|
|
300
|
+
set_installed_kueue_version(mock_commands, None)
|
|
301
|
+
kueue_config = dataclasses.replace(KUEUE_CONFIG, configure_super_slicing=True)
|
|
302
|
+
|
|
303
|
+
result = kueue_manager.install_or_upgrade(kueue_config)
|
|
304
|
+
|
|
305
|
+
assert result == 0
|
|
306
|
+
mock_patch_controller_manager_resources.assert_called_with(
|
|
307
|
+
name="kueue-controller-manager",
|
|
308
|
+
namespace="kueue-system",
|
|
309
|
+
replicas=3,
|
|
310
|
+
patch_resources=PatchResources(
|
|
311
|
+
cpu_request=16,
|
|
312
|
+
cpu_limit=16,
|
|
313
|
+
memory_request="64Gi",
|
|
314
|
+
memory_limit="64Gi",
|
|
315
|
+
),
|
|
276
316
|
)
|
|
277
317
|
|
|
278
318
|
|
xpk/core/nodepool.py
CHANGED
|
@@ -691,3 +691,37 @@ def _validate_reservation_count(
|
|
|
691
691
|
)
|
|
692
692
|
return 1
|
|
693
693
|
return 0
|
|
694
|
+
|
|
695
|
+
|
|
696
|
+
def recreate_nodes_in_existing_node_pools(args) -> int:
|
|
697
|
+
"""Triggers a manual upgrade of nodepools to the same version to force recreation
|
|
698
|
+
of nodes.
|
|
699
|
+
"""
|
|
700
|
+
|
|
701
|
+
existing_node_pool_names, return_code = get_all_nodepools_programmatic(args)
|
|
702
|
+
if return_code > 0:
|
|
703
|
+
xpk_print('Listing all node pools failed!')
|
|
704
|
+
return return_code
|
|
705
|
+
|
|
706
|
+
commands = [
|
|
707
|
+
(
|
|
708
|
+
f'gcloud container clusters upgrade {args.cluster}'
|
|
709
|
+
f' --project={args.project}'
|
|
710
|
+
f' --node-pool={node_pool_name}'
|
|
711
|
+
f' --location={get_cluster_location(args.project, args.cluster, args.zone)}'
|
|
712
|
+
' --quiet'
|
|
713
|
+
)
|
|
714
|
+
for node_pool_name in existing_node_pool_names
|
|
715
|
+
]
|
|
716
|
+
task_names = [
|
|
717
|
+
f'NodesRecreate-{node_pool_name}'
|
|
718
|
+
for node_pool_name in existing_node_pool_names
|
|
719
|
+
]
|
|
720
|
+
for i, command in enumerate(commands):
|
|
721
|
+
xpk_print(f'To complete {task_names[i]} we are executing {command}')
|
|
722
|
+
maybe_failure = run_commands(
|
|
723
|
+
commands,
|
|
724
|
+
'Recreate nodes in nodepools',
|
|
725
|
+
task_names,
|
|
726
|
+
)
|
|
727
|
+
return maybe_failure.return_code if maybe_failure is not None else 0
|
xpk/core/nodepool_test.py
CHANGED
|
@@ -20,6 +20,7 @@ from xpk.core.nodepool import (
|
|
|
20
20
|
ensure_resource_policy_exists,
|
|
21
21
|
get_desired_node_pool_names,
|
|
22
22
|
run_gke_node_pool_create_command,
|
|
23
|
+
recreate_nodes_in_existing_node_pools,
|
|
23
24
|
_validate_reservation_count,
|
|
24
25
|
)
|
|
25
26
|
from xpk.core.system_characteristics import AcceleratorType, SystemCharacteristics, DockerPlatform, GpuConfig
|
|
@@ -568,3 +569,106 @@ def test_run_gke_node_pool_create_command_partial_reservations(
|
|
|
568
569
|
commands_tester.assert_command_run(
|
|
569
570
|
"gcloud", "node-pools create", "test-cluster-np-2", "--reservation=res2"
|
|
570
571
|
)
|
|
572
|
+
|
|
573
|
+
|
|
574
|
+
def test_recreate_nodes_in_existing_node_pools_upgrades_existing_nodepools(
|
|
575
|
+
mocker,
|
|
576
|
+
commands_tester: CommandsTester,
|
|
577
|
+
):
|
|
578
|
+
mocker.patch(
|
|
579
|
+
"xpk.core.nodepool.get_all_nodepools_programmatic",
|
|
580
|
+
return_value=(["nodepool1", "nodepool2"], 0),
|
|
581
|
+
)
|
|
582
|
+
mocker.patch(
|
|
583
|
+
"xpk.core.nodepool.get_cluster_location", return_value="us-central1"
|
|
584
|
+
)
|
|
585
|
+
args = mocker.Mock(
|
|
586
|
+
cluster="test-cluster",
|
|
587
|
+
project="test-project",
|
|
588
|
+
zone="us-central1-a",
|
|
589
|
+
)
|
|
590
|
+
|
|
591
|
+
commands_tester.set_result_for_command(
|
|
592
|
+
(0, ""), "gcloud container clusters upgrade"
|
|
593
|
+
)
|
|
594
|
+
|
|
595
|
+
result = recreate_nodes_in_existing_node_pools(args)
|
|
596
|
+
|
|
597
|
+
assert result == 0
|
|
598
|
+
commands_tester.assert_command_run(
|
|
599
|
+
"gcloud",
|
|
600
|
+
"container clusters upgrade test-cluster",
|
|
601
|
+
"--project=test-project",
|
|
602
|
+
"--node-pool=nodepool1",
|
|
603
|
+
"--location=us-central1",
|
|
604
|
+
"--quiet",
|
|
605
|
+
)
|
|
606
|
+
commands_tester.assert_command_run(
|
|
607
|
+
"gcloud",
|
|
608
|
+
"container clusters upgrade test-cluster",
|
|
609
|
+
"--project=test-project",
|
|
610
|
+
"--node-pool=nodepool2",
|
|
611
|
+
"--location=us-central1",
|
|
612
|
+
"--quiet",
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
|
|
616
|
+
def test_recreate_nodes_in_existing_node_pools_returns_error_code_if_listing_fails(
|
|
617
|
+
mocker,
|
|
618
|
+
commands_tester: CommandsTester,
|
|
619
|
+
):
|
|
620
|
+
mocker.patch(
|
|
621
|
+
"xpk.core.nodepool.get_all_nodepools_programmatic",
|
|
622
|
+
return_value=([], 123),
|
|
623
|
+
)
|
|
624
|
+
args = mocker.Mock(
|
|
625
|
+
cluster="test-cluster",
|
|
626
|
+
project="test-project",
|
|
627
|
+
zone="us-central1-a",
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
result = recreate_nodes_in_existing_node_pools(args)
|
|
631
|
+
|
|
632
|
+
assert result == 123
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def test_recreate_nodes_in_existing_node_pools_returns_error_code_if_upgrade_fails(
|
|
636
|
+
mocker,
|
|
637
|
+
commands_tester: CommandsTester,
|
|
638
|
+
):
|
|
639
|
+
mocker.patch(
|
|
640
|
+
"xpk.core.nodepool.get_all_nodepools_programmatic",
|
|
641
|
+
return_value=(["nodepool1", "nodepool2"], 0),
|
|
642
|
+
)
|
|
643
|
+
mocker.patch(
|
|
644
|
+
"xpk.core.nodepool.get_cluster_location", return_value="us-central1"
|
|
645
|
+
)
|
|
646
|
+
args = mocker.Mock(
|
|
647
|
+
cluster="test-cluster",
|
|
648
|
+
project="test-project",
|
|
649
|
+
zone="us-central1-a",
|
|
650
|
+
)
|
|
651
|
+
|
|
652
|
+
commands_tester.set_result_for_command(
|
|
653
|
+
(123, ""), "gcloud container clusters upgrade"
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
result = recreate_nodes_in_existing_node_pools(args)
|
|
657
|
+
|
|
658
|
+
assert result == 123
|
|
659
|
+
commands_tester.assert_command_run(
|
|
660
|
+
"gcloud",
|
|
661
|
+
"container clusters upgrade test-cluster",
|
|
662
|
+
"--project=test-project",
|
|
663
|
+
"--node-pool=nodepool1",
|
|
664
|
+
"--location=us-central1",
|
|
665
|
+
"--quiet",
|
|
666
|
+
)
|
|
667
|
+
commands_tester.assert_command_not_run(
|
|
668
|
+
"gcloud",
|
|
669
|
+
"container clusters upgrade test-cluster",
|
|
670
|
+
"--project=test-project",
|
|
671
|
+
"--node-pool=nodepool2",
|
|
672
|
+
"--location=us-central1",
|
|
673
|
+
"--quiet",
|
|
674
|
+
)
|
|
@@ -13,10 +13,10 @@ xpk/blueprints/a4/config-map.yaml.tftpl,sha256=o6LeGIYUfFGyj3vj-8ztV5ildQ46QZVl7
|
|
|
13
13
|
xpk/blueprints/a4/nccl-rdma-installer-a4.yaml,sha256=if3WOmNLVGTJIJHU76EWC1FyiIXDTRIXcwo4OsBxarQ,2113
|
|
14
14
|
xpk/blueprints/a4/storage_crd.yaml,sha256=r4WFXnSJJ25EUF-t4Ljfbl-cJoSaiFiZkP8451eTub4,1260
|
|
15
15
|
xpk/commands/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
|
|
16
|
-
xpk/commands/cluster.py,sha256=
|
|
16
|
+
xpk/commands/cluster.py,sha256=9Eqj0d77_4FZ4ztsYFiIvX514jp-XvOTkZIVMdvitOI,45715
|
|
17
17
|
xpk/commands/cluster_gcluster.py,sha256=H6_pphIrIzDCHZg-ZH3o-xA2AgVQKSjE2HKbhIW6-Xo,13727
|
|
18
18
|
xpk/commands/cluster_gcluster_test.py,sha256=zdxz5gAMu3HRVNsj7F-VYRf4TYSPMjuOG7DolQN2Pb4,6263
|
|
19
|
-
xpk/commands/cluster_test.py,sha256=
|
|
19
|
+
xpk/commands/cluster_test.py,sha256=osBhaZTdB2kgQw1aY3W_ybFOqyb0UZncvVLyZNuKRpU,23878
|
|
20
20
|
xpk/commands/common.py,sha256=fsM4Sud3y1RU6a8JHi99l13O4raYvW2oPahCBzvMwh4,3884
|
|
21
21
|
xpk/commands/common_test.py,sha256=BDYFtN-cVfpEpj6akZy4R2KrnP53AIV1Lh1FEImhXx0,6106
|
|
22
22
|
xpk/commands/config.py,sha256=L_zRpQTxMcSh6rxOT8gG263V6YGqzVoz4UxdWywTFdA,850
|
|
@@ -32,30 +32,32 @@ xpk/commands/workload_test.py,sha256=m79x6YDYn-36BX0CttTtAMdt_O-WJY40FLTGa6KwKg8
|
|
|
32
32
|
xpk/core/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
|
|
33
33
|
xpk/core/capacity.py,sha256=MGiNOwBCwg8Ci-hsssbZYIJ2xXTm6Y5yKTO4J5ozqEk,11053
|
|
34
34
|
xpk/core/capacity_test.py,sha256=04ecANSECL3APmFCjdtkw2Wz6JxWkRZwE_QHa2m1_68,6026
|
|
35
|
-
xpk/core/cluster.py,sha256=
|
|
35
|
+
xpk/core/cluster.py,sha256=JLHFlrcc3Ch3ggLv2EJrOdQOOTn9MB-0IF5fQLIRPD0,25159
|
|
36
36
|
xpk/core/cluster_private.py,sha256=43BEO49MA-p1KfKuchxVZk6wwIlRCUU17j-6MMMeq4I,6868
|
|
37
|
-
xpk/core/cluster_test.py,sha256=
|
|
37
|
+
xpk/core/cluster_test.py,sha256=0ymagvKxyX909NjA5J23lutq7xyHXlIWwvGRFoVE0n8,8492
|
|
38
38
|
xpk/core/commands.py,sha256=at73VJHdZ4rVA8uvW997tNrvnCjP9v6zaw96bU0kd74,10841
|
|
39
39
|
xpk/core/config.py,sha256=U2JDXx-XBuqQpZJf2iUDoww5--E8ejZfgmIxKeGu-gU,4668
|
|
40
40
|
xpk/core/config_test.py,sha256=POSuofK0LFbNNygDAo2fjtKY4NMrRjUFeGcpBh9JOS4,3569
|
|
41
|
-
xpk/core/docker_container.py,sha256=
|
|
41
|
+
xpk/core/docker_container.py,sha256=9kJpTEholW_d_GamjcqunCWT4XwrDyZs3fcvcPNCb8Y,8294
|
|
42
42
|
xpk/core/docker_image.py,sha256=9vwqbb6Mc3C5ZEOph03WS-EWI5hxMYGGigqzIMkDTjE,6909
|
|
43
43
|
xpk/core/docker_manager.py,sha256=vGPCWPDB507sxEsXvSD4IM-h5HqQzYLk7WSdCUmSDb4,10568
|
|
44
|
-
xpk/core/docker_resources.py,sha256=
|
|
44
|
+
xpk/core/docker_resources.py,sha256=bwHGNh_gOtprVOeoFC8NObgKGD9aDjNc2XBMS6syD2Q,12562
|
|
45
45
|
xpk/core/filestore.py,sha256=mcuUzsAPARbnrBG4fIGsEoN8NmzjaQ6k0tvIwMtjO9k,8068
|
|
46
46
|
xpk/core/gcloud_context.py,sha256=d1wQ76zp7QMdG5BxB3sJz4b4OF5Mc8OzmPd_m0xd-Ys,6810
|
|
47
47
|
xpk/core/gcloud_context_test.py,sha256=M8rp6S1zaEcAI7u4Bt8ukWKzv82HH5h9oYVojBcKgHk,5987
|
|
48
48
|
xpk/core/gcluster_manager.py,sha256=lyv_MvdnkByy9_PEBj_ugAEBwnCbFNiWTSrEFjrMlPc,6236
|
|
49
49
|
xpk/core/gcsfuse.py,sha256=kg5pgxdTjgiqquuGjev9fXzJPb8oiWPTK6wzCddzheQ,2125
|
|
50
50
|
xpk/core/jobset.py,sha256=PJ4Fd8TNNLuYKNOMehoMYRIUEXyc5jsbHctJGqfW_8Y,4037
|
|
51
|
-
xpk/core/
|
|
52
|
-
xpk/core/
|
|
51
|
+
xpk/core/kubectl_common.py,sha256=mVuHJERdRXVEigU9Fxcmq7SHABkWDUmfYTJFvkHEbLs,2353
|
|
52
|
+
xpk/core/kubectl_common_test.py,sha256=2PTTcr5d8pSifuki2L7uA8-UWOqcIpv6PJQ1hlFruJQ,4469
|
|
53
|
+
xpk/core/kueue_manager.py,sha256=8VVOEvkh6Cif3s9ii0HfUR27HuxEJWUwnt4x4YM6ydc,20031
|
|
54
|
+
xpk/core/kueue_manager_test.py,sha256=Xb-w_eF11doqkpfNPb5EYm9BneEo3dtOxHuH-jtfjAI,23001
|
|
53
55
|
xpk/core/monitoring.py,sha256=__bzTq_DIDAK8yIaN4F3MJh-yjYw5X1OlxmRgYOpf1g,4332
|
|
54
56
|
xpk/core/mtc.py,sha256=pO7p3l-EzLFdTE8MdwWV8i0Zu-7epGql_kPoksVofIU,6259
|
|
55
57
|
xpk/core/nap.py,sha256=gBxXu8Png1-BlAHbxLWZgbSXeLMGVixufkQVMR0fmvk,12963
|
|
56
58
|
xpk/core/network.py,sha256=Oulb7U69lWkpOKxOC1C7ekJDpC51TLwd7XdZA3NQ7E0,10505
|
|
57
|
-
xpk/core/nodepool.py,sha256=
|
|
58
|
-
xpk/core/nodepool_test.py,sha256=
|
|
59
|
+
xpk/core/nodepool.py,sha256=dgE4l-HETNvnVj6WfCNWZkegzqrLiNg61RotGTMPDd0,26575
|
|
60
|
+
xpk/core/nodepool_test.py,sha256=Zf1NW9Hoj0D517NxSsxcj6p8NxeEbFmRpdIACz9NPdM,20207
|
|
59
61
|
xpk/core/pathways.py,sha256=9w_VrpLLjQSSdNd8HJLWWtIYzA0NpR7t70knRSVLK0w,11574
|
|
60
62
|
xpk/core/pathways_test.py,sha256=UeuSo_g9BNI27to-wflQwc6dJFVSA5-kOK_cjmY5qgU,1809
|
|
61
63
|
xpk/core/ray.py,sha256=JWhc_ToRHpF4_URGnuE_47FMgamaRsA4KVUMpqThWzw,6145
|
|
@@ -142,9 +144,9 @@ xpk/utils/validation.py,sha256=rE9LTkXJT7jIesodFb9pONL7ixhLqiQleyoaz7N39Dw,2765
|
|
|
142
144
|
xpk/utils/validation_test.py,sha256=PEDSMUqZdt_Lx1FSR-LOTXKKtsJ47JH1fxugM0Gfz6Y,1168
|
|
143
145
|
xpk/utils/versions.py,sha256=_Ep68W70a9605XjiaOOpBa9Is9jXlsoOiwL8v5Xt-WA,897
|
|
144
146
|
xpk/utils/yaml.py,sha256=j8xuAJ9yAAwnQi6ozwZ-nMnDyDnc3xWkeBZMtSuP4RU,844
|
|
145
|
-
xpk-1.
|
|
146
|
-
xpk-1.
|
|
147
|
-
xpk-1.
|
|
148
|
-
xpk-1.
|
|
149
|
-
xpk-1.
|
|
150
|
-
xpk-1.
|
|
147
|
+
xpk-1.2.0.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
|
|
148
|
+
xpk-1.2.0.dist-info/METADATA,sha256=Jkgz_1jDxS-vJnbqKI_Kdd5LRH9mikljvnEFXTk8lWA,10013
|
|
149
|
+
xpk-1.2.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
150
|
+
xpk-1.2.0.dist-info/entry_points.txt,sha256=mzEtiIesFkT1kmcTUVDA1o3uOhiniX6tIz2wmOlMu1M,38
|
|
151
|
+
xpk-1.2.0.dist-info/top_level.txt,sha256=aDe4N0jicmuWExx_6w0TxWQJaEuPSs9BnLU-3aF1GLo,4
|
|
152
|
+
xpk-1.2.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|