xpk 0.17.3__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/commands/cluster.py +4 -13
- xpk/commands/cluster_gcluster_test.py +2 -0
- xpk/commands/workload.py +10 -3
- xpk/commands/workload_test.py +1 -0
- xpk/core/cluster.py +10 -9
- xpk/core/config.py +5 -2
- xpk/core/kueue_manager_test.py +2 -0
- xpk/core/nodepool.py +6 -0
- xpk/core/nodepool_test.py +4 -0
- xpk/core/scheduling.py +28 -3
- xpk/core/scheduling_test.py +38 -1
- xpk/core/system_characteristics.py +38 -0
- xpk/core/system_characteristics_test.py +11 -0
- xpk/parser/common.py +0 -17
- xpk/parser/core.py +0 -8
- xpk/parser/storage.py +0 -11
- xpk/utils/feature_flags.py +1 -1
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/METADATA +15 -4
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/RECORD +23 -25
- xpk/commands/kind.py +0 -265
- xpk/parser/kind.py +0 -95
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/WHEEL +0 -0
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/entry_points.txt +0 -0
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/licenses/LICENSE +0 -0
- {xpk-0.17.3.dist-info → xpk-1.0.0.dist-info}/top_level.txt +0 -0
xpk/commands/cluster.py
CHANGED
|
@@ -1233,29 +1233,20 @@ def run_gke_cluster_create_command(
|
|
|
1233
1233
|
' --autoscaling-profile=optimize-utilization'
|
|
1234
1234
|
' --labels=gke_product_type=xpk'
|
|
1235
1235
|
f' --release-channel={release_channel.value.lower()}'
|
|
1236
|
+
' --enable-ip-alias'
|
|
1237
|
+
' --enable-dataplane-v2'
|
|
1238
|
+
' --enable-multi-networking'
|
|
1236
1239
|
)
|
|
1237
1240
|
|
|
1238
1241
|
if args.gke_version:
|
|
1239
1242
|
command += ' --no-enable-autoupgrade'
|
|
1240
1243
|
|
|
1241
|
-
enable_ip_alias = False
|
|
1242
|
-
|
|
1243
1244
|
if args.private or args.authorized_networks is not None:
|
|
1244
|
-
enable_ip_alias = True
|
|
1245
1245
|
command += ' --enable-master-authorized-networks --enable-private-nodes'
|
|
1246
1246
|
|
|
1247
|
-
if system.accelerator_type
|
|
1248
|
-
enable_ip_alias = True
|
|
1249
|
-
command += ' --enable-dataplane-v2 --enable-multi-networking'
|
|
1250
|
-
else:
|
|
1247
|
+
if system.accelerator_type != AcceleratorType.GPU:
|
|
1251
1248
|
command += ' --location-policy=BALANCED --scopes=storage-full,gke-default'
|
|
1252
1249
|
|
|
1253
|
-
if args.enable_pathways:
|
|
1254
|
-
enable_ip_alias = True
|
|
1255
|
-
|
|
1256
|
-
if enable_ip_alias:
|
|
1257
|
-
command += ' --enable-ip-alias'
|
|
1258
|
-
|
|
1259
1250
|
if args.enable_ray_cluster:
|
|
1260
1251
|
command += ' --addons RayOperator'
|
|
1261
1252
|
|
|
@@ -91,6 +91,7 @@ def test_install_kueue_standard(
|
|
|
91
91
|
device_type="h100-mega-80gb-8",
|
|
92
92
|
supports_sub_slicing=False,
|
|
93
93
|
supports_super_slicing=False,
|
|
94
|
+
supports_accelerator_network_profile=True,
|
|
94
95
|
docker_platform=DockerPlatform.ARM,
|
|
95
96
|
gpu_config=GpuConfig(requires_topology=True),
|
|
96
97
|
)
|
|
@@ -142,6 +143,7 @@ def test_install_kueue_with_autoprovisioning(
|
|
|
142
143
|
device_type="h100-mega-80gb-8",
|
|
143
144
|
supports_sub_slicing=False,
|
|
144
145
|
supports_super_slicing=False,
|
|
146
|
+
supports_accelerator_network_profile=True,
|
|
145
147
|
docker_platform=DockerPlatform.ARM,
|
|
146
148
|
gpu_config=GpuConfig(requires_topology=True),
|
|
147
149
|
)
|
xpk/commands/workload.py
CHANGED
|
@@ -57,6 +57,7 @@ from ..core.scheduling import (
|
|
|
57
57
|
WorkloadScheduling,
|
|
58
58
|
check_if_workload_can_schedule,
|
|
59
59
|
create_tpu_machine_type,
|
|
60
|
+
create_tpu_slice_topology_annotation,
|
|
60
61
|
create_tpu_topology,
|
|
61
62
|
get_cpu_affinity,
|
|
62
63
|
get_gpu_scheduler,
|
|
@@ -132,7 +133,7 @@ spec:
|
|
|
132
133
|
annotations:
|
|
133
134
|
{storage_annotations}
|
|
134
135
|
{sub_slicing_annotations}
|
|
135
|
-
{
|
|
136
|
+
{tpu_slice_topology_annotation}
|
|
136
137
|
spec:
|
|
137
138
|
schedulerName: {args.scheduler}
|
|
138
139
|
imagePullSecrets:
|
|
@@ -518,6 +519,8 @@ def workload_create(args) -> None:
|
|
|
518
519
|
workload_system, super_slicing=False
|
|
519
520
|
)
|
|
520
521
|
|
|
522
|
+
# TODO(b/466943057): Add ANP label for NAP (if not possible, use CCC)
|
|
523
|
+
|
|
521
524
|
# Create the workload file based on accelerator type or workload type.
|
|
522
525
|
if workload_system.accelerator_type == AcceleratorType.GPU:
|
|
523
526
|
container, debugging_dashboard_id = get_user_workload_container(
|
|
@@ -640,7 +643,11 @@ def workload_create(args) -> None:
|
|
|
640
643
|
else create_machine_label(workload_system)
|
|
641
644
|
)
|
|
642
645
|
node_selector_machine_label = machine_label if not use_super_slicing else ''
|
|
643
|
-
|
|
646
|
+
tpu_slice_topology_annotation = (
|
|
647
|
+
create_tpu_slice_topology_annotation(workload_system.topology)
|
|
648
|
+
if use_super_slicing
|
|
649
|
+
else ''
|
|
650
|
+
)
|
|
644
651
|
|
|
645
652
|
yml_string = WORKLOAD_CREATE_YAML.format(
|
|
646
653
|
args=args,
|
|
@@ -657,7 +664,7 @@ def workload_create(args) -> None:
|
|
|
657
664
|
),
|
|
658
665
|
placement_policy_label=placement_policy_label,
|
|
659
666
|
node_selector_machine_label=node_selector_machine_label,
|
|
660
|
-
|
|
667
|
+
tpu_slice_topology_annotation=tpu_slice_topology_annotation,
|
|
661
668
|
local_queue_name=LOCAL_QUEUE_NAME,
|
|
662
669
|
autoprovisioning_args=autoprovisioning_args,
|
|
663
670
|
volumes=get_volumes(args, workload_system),
|
xpk/commands/workload_test.py
CHANGED
xpk/core/cluster.py
CHANGED
|
@@ -391,14 +391,13 @@ def project_id_to_project_number(project_id: str) -> str:
|
|
|
391
391
|
|
|
392
392
|
|
|
393
393
|
def setup_k8s_env(args) -> k8s_client.ApiClient:
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
)
|
|
394
|
+
add_zone_and_project(args)
|
|
395
|
+
get_cluster_credentials(args)
|
|
396
|
+
args.project_number = (
|
|
397
|
+
project_id_to_project_number(args.project)
|
|
398
|
+
if not args.dry_run
|
|
399
|
+
else abs(hash(args.project) % (10**12)) # 12 digit hash
|
|
400
|
+
)
|
|
402
401
|
|
|
403
402
|
config.load_kube_config()
|
|
404
403
|
return k8s_client.ApiClient()
|
|
@@ -717,8 +716,10 @@ def get_cluster_credentials(args) -> int:
|
|
|
717
716
|
location=location,
|
|
718
717
|
dns_endpoint=True,
|
|
719
718
|
)
|
|
719
|
+
if return_code != 0:
|
|
720
|
+
return return_code
|
|
720
721
|
|
|
721
|
-
if
|
|
722
|
+
if not _are_credentials_valid():
|
|
722
723
|
xpk_print('Detected error. Retrying without --dns-endpoint flag...')
|
|
723
724
|
return_code = _get_credentials(
|
|
724
725
|
project=args.project,
|
xpk/core/config.py
CHANGED
|
@@ -19,6 +19,7 @@ import os
|
|
|
19
19
|
import ruamel.yaml
|
|
20
20
|
from abc import ABC, abstractmethod
|
|
21
21
|
from ..utils import file
|
|
22
|
+
from ..utils.execution_context import is_dry_run
|
|
22
23
|
from ..utils.console import xpk_print
|
|
23
24
|
from setuptools_scm import get_version as setuptools_get_version
|
|
24
25
|
from importlib.metadata import version, PackageNotFoundError
|
|
@@ -96,8 +97,7 @@ class FileSystemConfig(Config):
|
|
|
96
97
|
self._allowed_keys = DEFAULT_KEYS
|
|
97
98
|
|
|
98
99
|
def _open_configs(self) -> dict | None:
|
|
99
|
-
|
|
100
|
-
file.ensure_directory_exists(dir_path)
|
|
100
|
+
file.ensure_directory_exists(os.path.dirname(self._config))
|
|
101
101
|
|
|
102
102
|
if not os.path.exists(self._config):
|
|
103
103
|
return None
|
|
@@ -107,6 +107,9 @@ class FileSystemConfig(Config):
|
|
|
107
107
|
return config_yaml
|
|
108
108
|
|
|
109
109
|
def _save_configs(self, config_yaml: dict) -> None:
|
|
110
|
+
if is_dry_run():
|
|
111
|
+
return None
|
|
112
|
+
|
|
110
113
|
with open(self._config, encoding='utf-8', mode='w') as stream:
|
|
111
114
|
yaml.dump(config_yaml, stream)
|
|
112
115
|
|
xpk/core/kueue_manager_test.py
CHANGED
|
@@ -36,6 +36,7 @@ TPU_SYSTEM: SystemCharacteristics = SystemCharacteristics(
|
|
|
36
36
|
device_type="v5p-8",
|
|
37
37
|
supports_sub_slicing=False,
|
|
38
38
|
supports_super_slicing=False,
|
|
39
|
+
supports_accelerator_network_profile=False,
|
|
39
40
|
docker_platform=DockerPlatform.ARM,
|
|
40
41
|
)
|
|
41
42
|
|
|
@@ -411,6 +412,7 @@ def test_configure_generates_correct_manifest_with_gke_default_topology(
|
|
|
411
412
|
supports_sub_slicing=False,
|
|
412
413
|
supports_super_slicing=False,
|
|
413
414
|
docker_platform=DockerPlatform.ARM,
|
|
415
|
+
supports_accelerator_network_profile=True,
|
|
414
416
|
gpu_config=GpuConfig(requires_topology=True),
|
|
415
417
|
),
|
|
416
418
|
)
|
xpk/core/nodepool.py
CHANGED
|
@@ -289,6 +289,12 @@ def run_gke_node_pool_create_command(
|
|
|
289
289
|
f'{placement_args}'
|
|
290
290
|
' --enable-gvnic'
|
|
291
291
|
)
|
|
292
|
+
|
|
293
|
+
if system.supports_accelerator_network_profile:
|
|
294
|
+
command += (
|
|
295
|
+
' --accelerator-network-profile=auto'
|
|
296
|
+
' --node-labels=cloud.google.com/gke-networking-dra-driver=true'
|
|
297
|
+
)
|
|
292
298
|
if system.accelerator_type == AcceleratorType.TPU:
|
|
293
299
|
command += f' --node-version={gke_node_pool_version}'
|
|
294
300
|
if capacity_type == CapacityType.FLEX_START:
|
xpk/core/nodepool_test.py
CHANGED
|
@@ -251,6 +251,7 @@ def test_placement_policy_created_for_gpu_with_valid_topology(
|
|
|
251
251
|
device_type="h100-80gb-8",
|
|
252
252
|
supports_sub_slicing=False,
|
|
253
253
|
supports_super_slicing=False,
|
|
254
|
+
supports_accelerator_network_profile=True,
|
|
254
255
|
docker_platform=DockerPlatform.ARM,
|
|
255
256
|
gpu_config=GpuConfig(requires_topology=True),
|
|
256
257
|
)
|
|
@@ -284,6 +285,7 @@ def test_placement_policy_not_created_for_gpu_with_invalid_topology(
|
|
|
284
285
|
device_type="h100-80gb-8",
|
|
285
286
|
supports_sub_slicing=False,
|
|
286
287
|
supports_super_slicing=False,
|
|
288
|
+
supports_accelerator_network_profile=True,
|
|
287
289
|
docker_platform=DockerPlatform.ARM,
|
|
288
290
|
gpu_config=GpuConfig(requires_topology=True),
|
|
289
291
|
)
|
|
@@ -320,6 +322,7 @@ def test_placement_policy_created_for_tpu7x_with_valid_topology(
|
|
|
320
322
|
requires_workload_policy=True,
|
|
321
323
|
supports_sub_slicing=False,
|
|
322
324
|
supports_super_slicing=False,
|
|
325
|
+
supports_accelerator_network_profile=False,
|
|
323
326
|
docker_platform=DockerPlatform.ARM,
|
|
324
327
|
)
|
|
325
328
|
|
|
@@ -354,6 +357,7 @@ def test_placement_policy_not_created_for_non7x_tpu(
|
|
|
354
357
|
device_type="v6e-4",
|
|
355
358
|
supports_sub_slicing=True,
|
|
356
359
|
supports_super_slicing=False,
|
|
360
|
+
supports_accelerator_network_profile=True,
|
|
357
361
|
docker_platform=DockerPlatform.ARM,
|
|
358
362
|
)
|
|
359
363
|
|
xpk/core/scheduling.py
CHANGED
|
@@ -18,7 +18,7 @@ from enum import Enum
|
|
|
18
18
|
|
|
19
19
|
from .kueue_manager import get_installed_kueue_version, has_sub_slicing_enabled, has_super_slicing_enabled
|
|
20
20
|
from ..utils.feature_flags import FeatureFlags
|
|
21
|
-
from ..utils.topology import get_slice_topology_level
|
|
21
|
+
from ..utils.topology import get_slice_topology_level, parse_topology
|
|
22
22
|
from ..utils.console import xpk_print
|
|
23
23
|
from ..utils.topology import is_topology_valid
|
|
24
24
|
from ..utils.execution_context import is_dry_run
|
|
@@ -34,6 +34,7 @@ from packaging.version import Version
|
|
|
34
34
|
|
|
35
35
|
_SUB_SLICING_MINIMUM_KUEUE_VERSION = Version('0.13.0')
|
|
36
36
|
_SUPER_SLICING_MINIMUM_KUEUE_VERSION = Version('0.14.0')
|
|
37
|
+
_SUPER_SLICING_MAX_TOPOLOGY = (16, 24, 24)
|
|
37
38
|
|
|
38
39
|
|
|
39
40
|
class WorkloadScheduling(Enum):
|
|
@@ -115,7 +116,7 @@ def check_if_workload_can_schedule(
|
|
|
115
116
|
args,
|
|
116
117
|
workload_system,
|
|
117
118
|
max_vm_in_cluster=int(resources_config_map[cluster_system.device_type]),
|
|
118
|
-
):
|
|
119
|
+
) and _check_super_slicing_topology(workload_system):
|
|
119
120
|
return WorkloadScheduling.SUPER_SLICING_AVAILABLE
|
|
120
121
|
else:
|
|
121
122
|
return WorkloadScheduling.UNAVAILABLE
|
|
@@ -189,7 +190,6 @@ def _check_super_slicing_availability(
|
|
|
189
190
|
workload_system: SystemCharacteristics,
|
|
190
191
|
cluster_system: SystemCharacteristics,
|
|
191
192
|
) -> bool:
|
|
192
|
-
# TODO: b/465447813 - Add super-slicing workload topology validation.
|
|
193
193
|
if (
|
|
194
194
|
(not FeatureFlags.SUPER_SLICING_ENABLED)
|
|
195
195
|
or (workload_system.gke_accelerator != cluster_system.gke_accelerator)
|
|
@@ -212,6 +212,27 @@ def _check_super_slicing_availability(
|
|
|
212
212
|
)
|
|
213
213
|
|
|
214
214
|
|
|
215
|
+
def _check_super_slicing_topology(
|
|
216
|
+
workload_system: SystemCharacteristics,
|
|
217
|
+
) -> bool:
|
|
218
|
+
topology = parse_topology(workload_system.topology)
|
|
219
|
+
result = (
|
|
220
|
+
all(size % 4 == 0 and size >= 4 for size in topology)
|
|
221
|
+
and len(topology) == len(_SUPER_SLICING_MAX_TOPOLOGY)
|
|
222
|
+
and topology[0] <= topology[1] <= topology[2]
|
|
223
|
+
and all(a <= b for a, b in zip(topology, _SUPER_SLICING_MAX_TOPOLOGY))
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
if not result:
|
|
227
|
+
xpk_print(
|
|
228
|
+
'Error: Invalid super-slicing topology. It must adhere to the format of'
|
|
229
|
+
' 4i x 4j x 4k, where i <= j <= k, and i, j, k are integers, with a'
|
|
230
|
+
' maximum of 16x24x24.'
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return result
|
|
234
|
+
|
|
235
|
+
|
|
215
236
|
def get_total_chips_requested_from_args(
|
|
216
237
|
args, system: SystemCharacteristics
|
|
217
238
|
) -> int:
|
|
@@ -342,6 +363,10 @@ def create_sub_slicing_annotations(sub_slicing_topology: str) -> list[str]:
|
|
|
342
363
|
]
|
|
343
364
|
|
|
344
365
|
|
|
366
|
+
def create_tpu_slice_topology_annotation(workload_topology: str) -> str:
|
|
367
|
+
return f'cloud.google.com/gke-tpu-slice-topology: {workload_topology}'
|
|
368
|
+
|
|
369
|
+
|
|
345
370
|
def create_placement_policy_label(
|
|
346
371
|
system: SystemCharacteristics, super_slicing: bool
|
|
347
372
|
) -> str:
|
xpk/core/scheduling_test.py
CHANGED
|
@@ -22,7 +22,7 @@ from pytest_mock import MockerFixture
|
|
|
22
22
|
from xpk.core.capacity import AUTOPROVISIONING_CONFIG_MAXIMUM_KEY, AUTOPROVISIONING_CONFIG_VALUE
|
|
23
23
|
from xpk.core.testing.commands_tester import CommandsTester
|
|
24
24
|
from xpk.utils.feature_flags import FeatureFlags
|
|
25
|
-
from .scheduling import WorkloadScheduling, check_if_workload_can_schedule, create_sub_slicing_annotations, create_placement_policy_label, get_placement_policy_name, is_placement_policy_supported
|
|
25
|
+
from .scheduling import WorkloadScheduling, check_if_workload_can_schedule, create_sub_slicing_annotations, create_placement_policy_label, create_tpu_slice_topology_annotation, get_placement_policy_name, is_placement_policy_supported
|
|
26
26
|
from .system_characteristics import SystemCharacteristics, AcceleratorType, DockerPlatform, get_system_characteristics_by_device_type
|
|
27
27
|
|
|
28
28
|
|
|
@@ -66,6 +66,7 @@ def test_create_placement_policy_label_returns_valid_label():
|
|
|
66
66
|
accelerator_type=AcceleratorType.TPU,
|
|
67
67
|
supports_sub_slicing=False,
|
|
68
68
|
supports_super_slicing=False,
|
|
69
|
+
supports_accelerator_network_profile=False,
|
|
69
70
|
docker_platform=DockerPlatform.ARM,
|
|
70
71
|
)
|
|
71
72
|
label = create_placement_policy_label(
|
|
@@ -89,6 +90,7 @@ def test_get_placement_policy_name_returns_valid_name():
|
|
|
89
90
|
accelerator_type=AcceleratorType.TPU,
|
|
90
91
|
supports_sub_slicing=False,
|
|
91
92
|
supports_super_slicing=False,
|
|
93
|
+
supports_accelerator_network_profile=False,
|
|
92
94
|
docker_platform=DockerPlatform.ARM,
|
|
93
95
|
)
|
|
94
96
|
name = get_placement_policy_name(system_characteristics, super_slicing=False)
|
|
@@ -107,6 +109,7 @@ def test_get_placement_policy_name_super_slicing_returns_valid_name():
|
|
|
107
109
|
accelerator_type=AcceleratorType.TPU,
|
|
108
110
|
supports_sub_slicing=False,
|
|
109
111
|
supports_super_slicing=False,
|
|
112
|
+
supports_accelerator_network_profile=False,
|
|
110
113
|
docker_platform=DockerPlatform.ARM,
|
|
111
114
|
)
|
|
112
115
|
name = get_placement_policy_name(system_characteristics, super_slicing=True)
|
|
@@ -125,6 +128,7 @@ def test_is_placement_policy_supported_returns_true_for_system_characteristics_s
|
|
|
125
128
|
accelerator_type=AcceleratorType.TPU,
|
|
126
129
|
supports_sub_slicing=False,
|
|
127
130
|
supports_super_slicing=False,
|
|
131
|
+
supports_accelerator_network_profile=False,
|
|
128
132
|
docker_platform=DockerPlatform.ARM,
|
|
129
133
|
)
|
|
130
134
|
assert is_placement_policy_supported(system_characteristics) is True
|
|
@@ -142,6 +146,7 @@ def test_is_placement_policy_supported_returns_false_for_system_characteristics_
|
|
|
142
146
|
accelerator_type=AcceleratorType.TPU,
|
|
143
147
|
supports_sub_slicing=False,
|
|
144
148
|
supports_super_slicing=False,
|
|
149
|
+
supports_accelerator_network_profile=False,
|
|
145
150
|
docker_platform=DockerPlatform.ARM,
|
|
146
151
|
)
|
|
147
152
|
assert is_placement_policy_supported(system_characteristics) is False
|
|
@@ -159,6 +164,7 @@ def test_is_placement_policy_supported_returns_false_for_system_characteristics_
|
|
|
159
164
|
accelerator_type=AcceleratorType.TPU,
|
|
160
165
|
supports_sub_slicing=False,
|
|
161
166
|
supports_super_slicing=False,
|
|
167
|
+
supports_accelerator_network_profile=False,
|
|
162
168
|
docker_platform=DockerPlatform.ARM,
|
|
163
169
|
)
|
|
164
170
|
assert is_placement_policy_supported(system_characteristics) is False
|
|
@@ -369,6 +375,28 @@ SUPER_SLICING_CASE = SchedulingTestCase(
|
|
|
369
375
|
),
|
|
370
376
|
WorkloadScheduling.UNAVAILABLE,
|
|
371
377
|
),
|
|
378
|
+
(
|
|
379
|
+
'Super-slicing, but workload topology is not divisible by four',
|
|
380
|
+
dataclasses.replace(
|
|
381
|
+
SUPER_SLICING_CASE,
|
|
382
|
+
workload_system=_get_system_characteristics_or_die(
|
|
383
|
+
'tpu7x-2x2x1'
|
|
384
|
+
),
|
|
385
|
+
),
|
|
386
|
+
WorkloadScheduling.UNAVAILABLE,
|
|
387
|
+
),
|
|
388
|
+
(
|
|
389
|
+
'Super-slicing, but workload topology is too big for super-slice',
|
|
390
|
+
dataclasses.replace(
|
|
391
|
+
SUPER_SLICING_CASE,
|
|
392
|
+
workload_system=_get_system_characteristics_or_die(
|
|
393
|
+
'tpu7x-4x4x32'
|
|
394
|
+
),
|
|
395
|
+
# 10 cubes, to make sure vms fit:
|
|
396
|
+
resources_config_map={'tpu7x-128': str(64 // 4 * 10)},
|
|
397
|
+
),
|
|
398
|
+
WorkloadScheduling.UNAVAILABLE,
|
|
399
|
+
),
|
|
372
400
|
(
|
|
373
401
|
(
|
|
374
402
|
'Super-slicing should be ignored when a given device is already'
|
|
@@ -426,3 +454,12 @@ def test_check_if_workload_can_schedule(
|
|
|
426
454
|
)
|
|
427
455
|
== expected
|
|
428
456
|
)
|
|
457
|
+
|
|
458
|
+
|
|
459
|
+
def test_create_tpu_slice_topology_annotation():
|
|
460
|
+
workload_system = _get_system_characteristics_or_die('tpu7x-4x4x8')
|
|
461
|
+
|
|
462
|
+
assert (
|
|
463
|
+
create_tpu_slice_topology_annotation(workload_system.topology)
|
|
464
|
+
== 'cloud.google.com/gke-tpu-slice-topology: 4x4x8'
|
|
465
|
+
)
|