PyPI - xpk - Versions diffs - 0.17.2__py3-none-any.whl → 1.0.0__py3-none-any.whl - Mend

xpk 0.17.2py3-none-any.whl → 1.0.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

xpk/commands/cluster.py +4 -35
xpk/commands/cluster_gcluster.py +1 -13
xpk/commands/cluster_gcluster_test.py +2 -10
xpk/commands/cluster_test.py +0 -4
xpk/commands/workload.py +10 -3
xpk/commands/workload_test.py +1 -0
xpk/core/cluster.py +10 -9
xpk/core/config.py +5 -17
xpk/core/kueue_manager_test.py +2 -0
xpk/core/nodepool.py +6 -0
xpk/core/nodepool_test.py +4 -0
xpk/core/scheduling.py +28 -3
xpk/core/scheduling_test.py +38 -1
xpk/core/system_characteristics.py +39 -16
xpk/core/system_characteristics_test.py +11 -0
xpk/core/workload_decorators/rdma_decorator.py +0 -15
xpk/core/workload_decorators/tcpx_decorator.py +0 -8
xpk/core/workload_decorators/tcpx_decorator_test.py +0 -78
xpk/core/workload_decorators/tcpxo_decorator.py +0 -16
xpk/parser/common.py +0 -17
xpk/parser/core.py +0 -39
xpk/parser/storage.py +0 -11
xpk/utils/feature_flags.py +1 -1
xpk/utils/validation.py +0 -8
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/METADATA +15 -4
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/RECORD +30 -41
xpk/commands/batch.py +0 -144
xpk/commands/job.py +0 -244
xpk/commands/kind.py +0 -286
xpk/commands/kjob_common.py +0 -60
xpk/commands/run.py +0 -140
xpk/commands/shell.py +0 -142
xpk/parser/batch.py +0 -43
xpk/parser/job.py +0 -147
xpk/parser/kind.py +0 -95
xpk/parser/run.py +0 -47
xpk/parser/shell.py +0 -59
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/WHEEL +0 -0
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/entry_points.txt +0 -0
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/licenses/LICENSE +0 -0
{xpk-0.17.2.dist-info → xpk-1.0.0.dist-info}/top_level.txt +0 -0

xpk/commands/cluster.py CHANGED Viewed

@@ -49,7 +49,6 @@ from ..core.gcloud_context import (
     zone_to_region,
 )
 from ..core.jobset import update_jobset_resources_if_necessary
-from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
 from ..core.kueue_manager import (KueueConfig, KueueManager)
 from ..core.nap import enable_autoprovisioning_on_cluster
 from ..core.network import (
@@ -98,7 +97,6 @@ def cluster_adapt(args) -> None:
   if should_validate_dependencies(args):
     validate_dependencies_list([
         SystemDependency.KUBECTL,
-        SystemDependency.KJOB,
         SystemDependency.GCLOUD,
     ])
   args.enable_pathways = False
@@ -188,7 +186,6 @@ def cluster_adapt(args) -> None:
   if install_kueue_code != 0:
     xpk_exit(install_kueue_code)
-  install_kjob(args)
   if system.accelerator_type == AcceleratorType.GPU:
     prepare_gpus(system)
@@ -308,7 +305,6 @@ def cluster_create(args) -> None:
   if should_validate_dependencies(args):
     validate_dependencies_list([
         SystemDependency.KUBECTL,
-        SystemDependency.KJOB,
         SystemDependency.GCLOUD,
     ])
@@ -455,8 +451,6 @@ def cluster_create(args) -> None:
   if install_kueue_code != 0:
     xpk_exit(install_kueue_code)
-  install_kjob(args)
   if system.accelerator_type == AcceleratorType.GPU:
     prepare_gpus(system)
@@ -1239,29 +1233,20 @@ def run_gke_cluster_create_command(
       ' --autoscaling-profile=optimize-utilization'
       ' --labels=gke_product_type=xpk'
       f' --release-channel={release_channel.value.lower()}'
+      ' --enable-ip-alias'
+      ' --enable-dataplane-v2'
+      ' --enable-multi-networking'
   )
   if args.gke_version:
     command += ' --no-enable-autoupgrade'
-  enable_ip_alias = False
   if args.private or args.authorized_networks is not None:
-    enable_ip_alias = True
     command += ' --enable-master-authorized-networks --enable-private-nodes'
-  if system.accelerator_type == AcceleratorType.GPU:
-    enable_ip_alias = True
-    command += ' --enable-dataplane-v2 --enable-multi-networking'
-  else:
+  if system.accelerator_type != AcceleratorType.GPU:
     command += ' --location-policy=BALANCED --scopes=storage-full,gke-default'
-    if args.enable_pathways:
-      enable_ip_alias = True
-  if enable_ip_alias:
-    command += ' --enable-ip-alias'
   if args.enable_ray_cluster:
     command += ' --addons RayOperator'
@@ -1343,22 +1328,6 @@ def install_storage_csis(args):
       xpk_exit(update_cluster_command_code)
-def install_kjob(args):
-  xpk_print('Verifying kjob installation')
-  err_code = verify_kjob_installed()
-  if err_code > 0:
-    xpk_exit(err_code)
-  xpk_print('Applying kjob CDRs')
-  err_code = apply_kjob_crds()
-  if err_code > 0:
-    xpk_exit(err_code)
-  err_code = prepare_kjob(args)
-  if err_code > 0:
-    xpk_exit(err_code)
 def _install_kueue(
     args,
     system: SystemCharacteristics,

xpk/commands/cluster_gcluster.py CHANGED Viewed

@@ -38,7 +38,6 @@ from ..core.commands import run_command_for_value
 from ..core.docker_manager import DockerManager
 from ..core.gcloud_context import zone_to_region
 from ..core.gcluster_manager import GclusterManager
-from ..core.kjob import apply_kjob_crds, prepare_kjob
 from ..core.remote_state.fuse_remote_state import FuseStateClient
 from ..core.remote_state.remote_state_client import RemoteStateClient
 from ..utils.console import xpk_exit, xpk_print
@@ -112,18 +111,7 @@ def cluster_create(
   get_cluster_credentials(args)
   err_code = __install_kueue(args)
-  if err_code > 0:
-    xpk_exit(err_code)
-  err_code = apply_kjob_crds()
-  if err_code > 0:
-    xpk_exit(err_code)
-  err_code = prepare_kjob(args)
-  if err_code > 0:
-    xpk_exit(err_code)
-  xpk_exit(0)
+  xpk_exit(err_code)
 def __install_kueue(args) -> int:

xpk/commands/cluster_gcluster_test.py CHANGED Viewed

@@ -46,8 +46,6 @@ def mock_cluster_create_deps(request):
   """Mocks dependencies for cluster_create."""
   with (
       patch("xpk.commands.cluster_gcluster.xpk_exit") as mock_exit,
-      patch("xpk.commands.cluster_gcluster.prepare_kjob") as mock_prep_kjob,
-      patch("xpk.commands.cluster_gcluster.apply_kjob_crds") as mock_apply_kjob,
       patch(
           "xpk.commands.cluster_gcluster.get_cluster_credentials"
       ) as mock_get_creds,
@@ -68,8 +66,6 @@ def mock_cluster_create_deps(request):
   ):
     yield {
         "xpk_exit": mock_exit,
-        "prepare_kjob": mock_prep_kjob,
-        "apply_kjob_crds": mock_apply_kjob,
         "get_cluster_credentials": mock_get_creds,
         "generate_blueprint": mock_gen_bp,
         "prepare_gcluster_manager": mock_prep_gcm,
@@ -85,9 +81,6 @@ def test_install_kueue_standard(
     mock_get_total_chips, mock_args, mock_cluster_create_deps
 ):
   """Tests __install_kueue for a standard installation."""
-  mock_cluster_create_deps["prepare_kjob"].return_value = 0
-  mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
   mock_system = SystemCharacteristics(
       topology="N/A",
       vms_per_slice=1,
@@ -98,6 +91,7 @@ def test_install_kueue_standard(
       device_type="h100-mega-80gb-8",
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=True,
       docker_platform=DockerPlatform.ARM,
       gpu_config=GpuConfig(requires_topology=True),
   )
@@ -138,9 +132,6 @@ def test_install_kueue_with_autoprovisioning(
     mock_enable_autoprovisioning, mock_args, mock_cluster_create_deps
 ):
   """Tests __install_kueue with autoprovisioning enabled."""
-  mock_cluster_create_deps["prepare_kjob"].return_value = 0
-  mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
   mock_args.enable_autoprovisioning = True
   mock_system = SystemCharacteristics(
       topology="N/A",
@@ -152,6 +143,7 @@ def test_install_kueue_with_autoprovisioning(
       device_type="h100-mega-80gb-8",
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=True,
       docker_platform=DockerPlatform.ARM,
       gpu_config=GpuConfig(requires_topology=True),
   )

xpk/commands/cluster_test.py CHANGED Viewed

@@ -56,7 +56,6 @@ class _ClusterCreateMocks:
   create_cluster_configmaps: MagicMock
   set_jobset_on_cluster: MagicMock
   get_cluster_location: MagicMock
-  install_kjob: MagicMock
   xpk_exit: MagicMock
   update_jobset_resources_if_necessary: MagicMock
   _install_kueue: MagicMock
@@ -204,9 +203,6 @@ def cluster_create_mocks(mocker) -> _ClusterCreateMocks:
           'xpk.commands.cluster.get_cluster_location',
           return_value='us-central1',
       ),
-      install_kjob=mocker.patch(
-          'xpk.commands.cluster.install_kjob', return_value=0
-      ),
       xpk_exit=mocker.patch('xpk.commands.cluster.xpk_exit'),
       update_jobset_resources_if_necessary=mocker.patch(
           'xpk.commands.cluster.update_jobset_resources_if_necessary',

xpk/commands/workload.py CHANGED Viewed

@@ -57,6 +57,7 @@ from ..core.scheduling import (
     WorkloadScheduling,
     check_if_workload_can_schedule,
     create_tpu_machine_type,
+    create_tpu_slice_topology_annotation,
     create_tpu_topology,
     get_cpu_affinity,
     get_gpu_scheduler,
@@ -132,7 +133,7 @@ spec:
               annotations:
                 {storage_annotations}
                 {sub_slicing_annotations}
-                {annotations_machine_label}
+                {tpu_slice_topology_annotation}
             spec:
               schedulerName: {args.scheduler}
               imagePullSecrets:
@@ -518,6 +519,8 @@ def workload_create(args) -> None:
         workload_system, super_slicing=False
     )
+  # TODO(b/466943057): Add ANP label for NAP (if not possible, use CCC)
   # Create the workload file based on accelerator type or workload type.
   if workload_system.accelerator_type == AcceleratorType.GPU:
     container, debugging_dashboard_id = get_user_workload_container(
@@ -640,7 +643,11 @@ def workload_create(args) -> None:
         else create_machine_label(workload_system)
     )
     node_selector_machine_label = machine_label if not use_super_slicing else ''
-    annotations_machine_label = machine_label if use_super_slicing else ''
+    tpu_slice_topology_annotation = (
+        create_tpu_slice_topology_annotation(workload_system.topology)
+        if use_super_slicing
+        else ''
+    )
     yml_string = WORKLOAD_CREATE_YAML.format(
         args=args,
@@ -657,7 +664,7 @@ def workload_create(args) -> None:
         ),
         placement_policy_label=placement_policy_label,
         node_selector_machine_label=node_selector_machine_label,
-        annotations_machine_label=annotations_machine_label,
+        tpu_slice_topology_annotation=tpu_slice_topology_annotation,
         local_queue_name=LOCAL_QUEUE_NAME,
         autoprovisioning_args=autoprovisioning_args,
         volumes=get_volumes(args, workload_system),

xpk/commands/workload_test.py CHANGED Viewed

@@ -36,6 +36,7 @@ SYSTEM_CHARACTERISTICS = SystemCharacteristics(
     supports_sub_slicing=True,
     supports_super_slicing=False,
     requires_workload_policy=False,
+    supports_accelerator_network_profile=False,
     docker_platform=DockerPlatform.AMD,
 )

xpk/core/cluster.py CHANGED Viewed

@@ -391,14 +391,13 @@ def project_id_to_project_number(project_id: str) -> str:
 def setup_k8s_env(args) -> k8s_client.ApiClient:
-  if not getattr(args, 'kind_cluster', False):
-    add_zone_and_project(args)
-    get_cluster_credentials(args)
-    args.project_number = (
-        project_id_to_project_number(args.project)
-        if not args.dry_run
-        else abs(hash(args.project) % (10**12))  # 12 digit hash
-    )
+  add_zone_and_project(args)
+  get_cluster_credentials(args)
+  args.project_number = (
+      project_id_to_project_number(args.project)
+      if not args.dry_run
+      else abs(hash(args.project) % (10**12))  # 12 digit hash
+  )
   config.load_kube_config()
   return k8s_client.ApiClient()
@@ -717,8 +716,10 @@ def get_cluster_credentials(args) -> int:
       location=location,
       dns_endpoint=True,
   )
+  if return_code != 0:
+    return return_code
-  if return_code != 0 or not _are_credentials_valid():
+  if not _are_credentials_valid():
     xpk_print('Detected error. Retrying without --dns-endpoint flag...')
     return_code = _get_credentials(
         project=args.project,

xpk/core/config.py CHANGED Viewed

@@ -19,6 +19,7 @@ import os
 import ruamel.yaml
 from abc import ABC, abstractmethod
 from ..utils import file
+from ..utils.execution_context import is_dry_run
 from ..utils.console import xpk_print
 from setuptools_scm import get_version as setuptools_get_version
 from importlib.metadata import version, PackageNotFoundError
@@ -53,14 +54,6 @@ PROJECT_KEY = 'project-id'
 CLIENT_ID_KEY = 'client-id'
 SEND_TELEMETRY_KEY = 'send-telemetry'
 ZONE_KEY = 'zone'
-KJOB_BATCH_IMAGE = 'batch-image'
-KJOB_BATCH_WORKING_DIRECTORY = 'batch-working-directory'
-KJOB_SHELL_IMAGE = 'shell-image'
-KJOB_SHELL_INTERACTIVE_COMMAND = 'shell-interactive-command'
-KJOB_SHELL_WORKING_DIRECTORY = 'shell-working-directory'
-CONFIGS_KEY = 'configs'
-GKE_ENDPOINT_KEY = 'gke-endpoint'
-DEPENDENCIES_KEY = 'deps-verified-version'
 DEFAULT_KEYS = [
     CFG_BUCKET_KEY,
@@ -69,13 +62,6 @@ DEFAULT_KEYS = [
     CLIENT_ID_KEY,
     SEND_TELEMETRY_KEY,
     ZONE_KEY,
-    GKE_ENDPOINT_KEY,
-    DEPENDENCIES_KEY,
-    KJOB_BATCH_IMAGE,
-    KJOB_BATCH_WORKING_DIRECTORY,
-    KJOB_SHELL_IMAGE,
-    KJOB_SHELL_INTERACTIVE_COMMAND,
-    KJOB_SHELL_WORKING_DIRECTORY,
 ]
 VERTEX_TENSORBOARD_FEATURE_FLAG = XPK_CURRENT_VERSION >= '0.4.0'
@@ -111,8 +97,7 @@ class FileSystemConfig(Config):
     self._allowed_keys = DEFAULT_KEYS
   def _open_configs(self) -> dict | None:
-    dir_path = '/'.join(self._config.split('/')[:-1])
-    file.ensure_directory_exists(dir_path)
+    file.ensure_directory_exists(os.path.dirname(self._config))
     if not os.path.exists(self._config):
       return None
@@ -122,6 +107,9 @@ class FileSystemConfig(Config):
       return config_yaml
   def _save_configs(self, config_yaml: dict) -> None:
+    if is_dry_run():
+      return None
     with open(self._config, encoding='utf-8', mode='w') as stream:
       yaml.dump(config_yaml, stream)

xpk/core/kueue_manager_test.py CHANGED Viewed

@@ -36,6 +36,7 @@ TPU_SYSTEM: SystemCharacteristics = SystemCharacteristics(
     device_type="v5p-8",
     supports_sub_slicing=False,
     supports_super_slicing=False,
+    supports_accelerator_network_profile=False,
     docker_platform=DockerPlatform.ARM,
 )
@@ -411,6 +412,7 @@ def test_configure_generates_correct_manifest_with_gke_default_topology(
           supports_sub_slicing=False,
           supports_super_slicing=False,
           docker_platform=DockerPlatform.ARM,
+          supports_accelerator_network_profile=True,
           gpu_config=GpuConfig(requires_topology=True),
       ),
   )

xpk/core/nodepool.py CHANGED Viewed

@@ -289,6 +289,12 @@ def run_gke_node_pool_create_command(
         f'{placement_args}'
         ' --enable-gvnic'
     )
+    if system.supports_accelerator_network_profile:
+      command += (
+          ' --accelerator-network-profile=auto'
+          ' --node-labels=cloud.google.com/gke-networking-dra-driver=true'
+      )
     if system.accelerator_type == AcceleratorType.TPU:
       command += f' --node-version={gke_node_pool_version}'
       if capacity_type == CapacityType.FLEX_START:

xpk/core/nodepool_test.py CHANGED Viewed

@@ -251,6 +251,7 @@ def test_placement_policy_created_for_gpu_with_valid_topology(
       device_type="h100-80gb-8",
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=True,
       docker_platform=DockerPlatform.ARM,
       gpu_config=GpuConfig(requires_topology=True),
   )
@@ -284,6 +285,7 @@ def test_placement_policy_not_created_for_gpu_with_invalid_topology(
       device_type="h100-80gb-8",
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=True,
       docker_platform=DockerPlatform.ARM,
       gpu_config=GpuConfig(requires_topology=True),
   )
@@ -320,6 +322,7 @@ def test_placement_policy_created_for_tpu7x_with_valid_topology(
       requires_workload_policy=True,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
@@ -354,6 +357,7 @@ def test_placement_policy_not_created_for_non7x_tpu(
       device_type="v6e-4",
       supports_sub_slicing=True,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=True,
       docker_platform=DockerPlatform.ARM,
   )

xpk/core/scheduling.py CHANGED Viewed

@@ -18,7 +18,7 @@ from enum import Enum
 from .kueue_manager import get_installed_kueue_version, has_sub_slicing_enabled, has_super_slicing_enabled
 from ..utils.feature_flags import FeatureFlags
-from ..utils.topology import get_slice_topology_level
+from ..utils.topology import get_slice_topology_level, parse_topology
 from ..utils.console import xpk_print
 from ..utils.topology import is_topology_valid
 from ..utils.execution_context import is_dry_run
@@ -34,6 +34,7 @@ from packaging.version import Version
 _SUB_SLICING_MINIMUM_KUEUE_VERSION = Version('0.13.0')
 _SUPER_SLICING_MINIMUM_KUEUE_VERSION = Version('0.14.0')
+_SUPER_SLICING_MAX_TOPOLOGY = (16, 24, 24)
 class WorkloadScheduling(Enum):
@@ -115,7 +116,7 @@ def check_if_workload_can_schedule(
         args,
         workload_system,
         max_vm_in_cluster=int(resources_config_map[cluster_system.device_type]),
-    ):
+    ) and _check_super_slicing_topology(workload_system):
       return WorkloadScheduling.SUPER_SLICING_AVAILABLE
     else:
       return WorkloadScheduling.UNAVAILABLE
@@ -189,7 +190,6 @@ def _check_super_slicing_availability(
     workload_system: SystemCharacteristics,
     cluster_system: SystemCharacteristics,
 ) -> bool:
-  # TODO: b/465447813 - Add super-slicing workload topology validation.
   if (
       (not FeatureFlags.SUPER_SLICING_ENABLED)
       or (workload_system.gke_accelerator != cluster_system.gke_accelerator)
@@ -212,6 +212,27 @@ def _check_super_slicing_availability(
   )
+def _check_super_slicing_topology(
+    workload_system: SystemCharacteristics,
+) -> bool:
+  topology = parse_topology(workload_system.topology)
+  result = (
+      all(size % 4 == 0 and size >= 4 for size in topology)
+      and len(topology) == len(_SUPER_SLICING_MAX_TOPOLOGY)
+      and topology[0] <= topology[1] <= topology[2]
+      and all(a <= b for a, b in zip(topology, _SUPER_SLICING_MAX_TOPOLOGY))
+  )
+  if not result:
+    xpk_print(
+        'Error: Invalid super-slicing topology. It must adhere to the format of'
+        ' 4i x 4j x 4k, where i <= j <= k, and i, j, k are integers, with a'
+        ' maximum of 16x24x24.'
+    )
+  return result
 def get_total_chips_requested_from_args(
     args, system: SystemCharacteristics
 ) -> int:
@@ -342,6 +363,10 @@ def create_sub_slicing_annotations(sub_slicing_topology: str) -> list[str]:
   ]
+def create_tpu_slice_topology_annotation(workload_topology: str) -> str:
+  return f'cloud.google.com/gke-tpu-slice-topology: {workload_topology}'
 def create_placement_policy_label(
     system: SystemCharacteristics, super_slicing: bool
 ) -> str:

xpk/core/scheduling_test.py CHANGED Viewed

@@ -22,7 +22,7 @@ from pytest_mock import MockerFixture
 from xpk.core.capacity import AUTOPROVISIONING_CONFIG_MAXIMUM_KEY, AUTOPROVISIONING_CONFIG_VALUE
 from xpk.core.testing.commands_tester import CommandsTester
 from xpk.utils.feature_flags import FeatureFlags
-from .scheduling import WorkloadScheduling, check_if_workload_can_schedule, create_sub_slicing_annotations, create_placement_policy_label, get_placement_policy_name, is_placement_policy_supported
+from .scheduling import WorkloadScheduling, check_if_workload_can_schedule, create_sub_slicing_annotations, create_placement_policy_label, create_tpu_slice_topology_annotation, get_placement_policy_name, is_placement_policy_supported
 from .system_characteristics import SystemCharacteristics, AcceleratorType, DockerPlatform, get_system_characteristics_by_device_type
@@ -66,6 +66,7 @@ def test_create_placement_policy_label_returns_valid_label():
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   label = create_placement_policy_label(
@@ -89,6 +90,7 @@ def test_get_placement_policy_name_returns_valid_name():
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   name = get_placement_policy_name(system_characteristics, super_slicing=False)
@@ -107,6 +109,7 @@ def test_get_placement_policy_name_super_slicing_returns_valid_name():
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   name = get_placement_policy_name(system_characteristics, super_slicing=True)
@@ -125,6 +128,7 @@ def test_is_placement_policy_supported_returns_true_for_system_characteristics_s
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   assert is_placement_policy_supported(system_characteristics) is True
@@ -142,6 +146,7 @@ def test_is_placement_policy_supported_returns_false_for_system_characteristics_
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   assert is_placement_policy_supported(system_characteristics) is False
@@ -159,6 +164,7 @@ def test_is_placement_policy_supported_returns_false_for_system_characteristics_
       accelerator_type=AcceleratorType.TPU,
       supports_sub_slicing=False,
       supports_super_slicing=False,
+      supports_accelerator_network_profile=False,
       docker_platform=DockerPlatform.ARM,
   )
   assert is_placement_policy_supported(system_characteristics) is False
@@ -369,6 +375,28 @@ SUPER_SLICING_CASE = SchedulingTestCase(
             ),
             WorkloadScheduling.UNAVAILABLE,
         ),
+        (
+            'Super-slicing, but workload topology is not divisible by four',
+            dataclasses.replace(
+                SUPER_SLICING_CASE,
+                workload_system=_get_system_characteristics_or_die(
+                    'tpu7x-2x2x1'
+                ),
+            ),
+            WorkloadScheduling.UNAVAILABLE,
+        ),
+        (
+            'Super-slicing, but workload topology is too big for super-slice',
+            dataclasses.replace(
+                SUPER_SLICING_CASE,
+                workload_system=_get_system_characteristics_or_die(
+                    'tpu7x-4x4x32'
+                ),
+                # 10 cubes, to make sure vms fit:
+                resources_config_map={'tpu7x-128': str(64 // 4 * 10)},
+            ),
+            WorkloadScheduling.UNAVAILABLE,
+        ),
         (
             (
                 'Super-slicing should be ignored when a given device is already'
@@ -426,3 +454,12 @@ def test_check_if_workload_can_schedule(
       )
       == expected
   )
+def test_create_tpu_slice_topology_annotation():
+  workload_system = _get_system_characteristics_or_die('tpu7x-4x4x8')
+  assert (
+      create_tpu_slice_topology_annotation(workload_system.topology)
+      == 'cloud.google.com/gke-tpu-slice-topology: 4x4x8'
+  )

xpk 0.17.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

xpk 0.17.2py3-none-any.whl → 1.0.0py3-none-any.whl