PyPI - xpk - Versions diffs - 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl - Mend

xpk 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

xpk/commands/cluster.py +57 -22
xpk/commands/cluster_gcluster_test.py +2 -2
xpk/commands/cluster_test.py +197 -25
xpk/commands/inspector.py +20 -7
xpk/commands/kind.py +1 -1
xpk/commands/workload.py +42 -4
xpk/commands/workload_test.py +88 -5
xpk/core/blueprint/blueprint_definitions.py +16 -1
xpk/core/blueprint/blueprint_generator.py +11 -11
xpk/core/capacity.py +17 -0
xpk/core/capacity_test.py +50 -0
xpk/core/config.py +1 -1
xpk/core/docker_container.py +4 -4
xpk/core/docker_resources.py +11 -11
xpk/core/kjob.py +3 -5
xpk/core/kueue_manager.py +21 -10
xpk/core/kueue_manager_test.py +379 -536
xpk/core/nap.py +1 -1
xpk/core/nodepool.py +9 -9
xpk/core/nodepool_test.py +4 -4
xpk/core/pathways.py +1 -1
xpk/core/resources.py +1 -1
xpk/core/scheduling.py +7 -13
xpk/core/system_characteristics.py +42 -35
xpk/core/system_characteristics_test.py +3 -3
xpk/core/testing/__init__.py +15 -0
xpk/core/testing/commands_tester.py +131 -0
xpk/core/testing/commands_tester_test.py +129 -0
xpk/core/updates.py +57 -0
xpk/core/updates_test.py +80 -0
xpk/main.py +7 -4
xpk/parser/common.py +8 -0
xpk/utils/execution_context.py +20 -2
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/METADATA +1 -3
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/RECORD +39 -33
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/WHEEL +0 -0
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/entry_points.txt +0 -0
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/licenses/LICENSE +0 -0
{xpk-0.14.2.dist-info → xpk-0.14.3.dist-info}/top_level.txt +0 -0

xpk/commands/cluster.py CHANGED Viewed

@@ -17,7 +17,7 @@ limitations under the License.
 from tabulate import tabulate
 from ..utils.feature_flags import FeatureFlags
-from ..core.capacity import H100_DEVICE_TYPE, H200_DEVICE_TYPE, B200_DEVICE_TYPE
+from ..core.capacity import H100_DEVICE_TYPE, H200_DEVICE_TYPE, B200_DEVICE_TYPE, get_reservation_deployment_type
 from ..core.cluster import (
     get_all_clusters_programmatic,
     get_cluster_credentials,
@@ -60,7 +60,7 @@ from ..core.nodepool import (
 )
 from ..core.ray import install_ray_cluster
 from ..core.mtc import install_mtc_on_cluster
-from ..core.resources import create_cluster_configmaps
+from ..core.resources import AutoprovisioningConfig, create_cluster_configmaps
 from ..core.scheduling import get_total_chips_requested_from_args
 from ..core.storage import install_storage_crd
 from ..core.system_characteristics import (
@@ -110,7 +110,7 @@ def cluster_adapt(args) -> None:
   )
   add_zone_and_project(args)
-  if system.accelerator_type == AcceleratorType['GPU'] and not getattr(
+  if system.accelerator_type == AcceleratorType.GPU and not getattr(
       args, 'num_nodes'
   ):
     xpk_print(
@@ -180,10 +180,12 @@ def cluster_adapt(args) -> None:
   # if set_pathways_job_on_cluster_code != 0:
   #   xpk_exit(set_pathways_job_on_cluster_code)
-  install_kueue(args, system, autoprovisioning_config)
+  install_kueue_code = _install_kueue(args, system, autoprovisioning_config)
+  if install_kueue_code != 0:
+    xpk_exit(install_kueue_code)
   install_kjob(args)
-  if system.accelerator_type == AcceleratorType['GPU']:
+  if system.accelerator_type == AcceleratorType.GPU:
     prepare_gpus(system)
   if args.enable_ray_cluster:
@@ -204,6 +206,38 @@ def cluster_adapt(args) -> None:
 def _validate_cluster_create_args(args, system: SystemCharacteristics):
   if FeatureFlags.SUB_SLICING_ENABLED and args.sub_slicing:
     validate_sub_slicing_system(system)
+    _validate_sub_slicing_reservation(args)
+def _validate_sub_slicing_reservation(args):
+  if args.reservation is None:
+    xpk_print(
+        'Error: Validation failed: Sub-slicing cluster creation requires'
+        ' Cluster Director reservation to be specified.'
+    )
+    xpk_exit(1)
+  deployment_type = get_reservation_deployment_type(
+      reservation=args.reservation, project=args.project, zone=args.zone
+  )
+  if deployment_type != 'DENSE':
+    xpk_print(
+        'Error: Validation failed: The specified reservation'
+        f' "{args.reservation}" is not a Cluster Director reservation.'
+    )
+    xpk_print(
+        'Please provide a reservation created for Cluster Director to proceed.'
+    )
+    xpk_print('To list valid Cluster Director reservations, run:')
+    xpk_print(
+        '  gcloud compute reservations list --filter="deploymentType=DENSE"'
+    )
+    xpk_print(
+        'Refer to the documentation for more information on creating Cluster'
+        ' Director reservations:'
+        ' https://cloud.google.com/cluster-director/docs/reserve-capacity'
+    )
+    xpk_exit(1)
 def cluster_create(args) -> None:
@@ -346,11 +380,13 @@ def cluster_create(args) -> None:
   if set_pathways_job_on_cluster_code != 0:
     xpk_exit(set_pathways_job_on_cluster_code)
-  install_kueue(args, system, autoprovisioning_config)
+  install_kueue_code = _install_kueue(args, system, autoprovisioning_config)
+  if install_kueue_code != 0:
+    xpk_exit(install_kueue_code)
   install_kjob(args)
-  if system.accelerator_type == AcceleratorType['GPU']:
+  if system.accelerator_type == AcceleratorType.GPU:
     prepare_gpus(system)
   if args.enable_ray_cluster:
@@ -1106,12 +1142,6 @@ def run_gke_cluster_create_command(
   # benefit from a larger initial `--num-nodes`. After the cluster is created,
   # the auto-scaler can reduce/increase the nodes based on the load.
-  # If the user passes in the gke version then we use that directly instead of the rapid release.
-  # This allows users to directly pass a specified gke version without release channel constraints.
-  rapid_release_cmd = ''
-  if args.gke_version is not None:
-    rapid_release_cmd = ' --release-channel rapid'
   command = (
       'gcloud beta container clusters create'
       f' {args.cluster} --project={args.project}'
@@ -1122,25 +1152,23 @@ def run_gke_cluster_create_command(
       ' --enable-autoscaling'
       ' --total-min-nodes 1 --total-max-nodes 1000'
       f' --num-nodes {args.default_pool_cpu_num_nodes}'
-      f' {args.custom_cluster_arguments}'
-      f' {rapid_release_cmd}'
       ' --enable-dns-access'
       ' --autoscaling-profile=optimize-utilization'
       ' --labels=gke_product_type=xpk'
   )
+  if args.gke_version or system.accelerator_type == AcceleratorType.GPU:
+    command += ' --no-enable-autoupgrade'
   enable_ip_alias = False
   if args.private or args.authorized_networks is not None:
     enable_ip_alias = True
     command += ' --enable-master-authorized-networks --enable-private-nodes'
-  if system.accelerator_type == AcceleratorType['GPU']:
+  if system.accelerator_type == AcceleratorType.GPU:
     enable_ip_alias = True
-    command += (
-        ' --enable-dataplane-v2'
-        ' --enable-multi-networking --no-enable-autoupgrade'
-    )
+    command += ' --enable-dataplane-v2 --enable-multi-networking'
   else:
     command += ' --location-policy=BALANCED --scopes=storage-full,gke-default'
@@ -1180,6 +1208,9 @@ def run_gke_cluster_create_command(
     addons_str = ','.join(addons)
     command += f' --addons={addons_str}'
+  if args.custom_cluster_arguments:
+    command += f' {args.custom_cluster_arguments}'
   return_code = run_command_with_updates(command, 'GKE Cluster Create')
   if return_code != 0:
     xpk_print(f'GKE Cluster Create request returned ERROR {return_code}')
@@ -1240,7 +1271,11 @@ def install_kjob(args):
     xpk_exit(err_code)
-def install_kueue(args, system: SystemCharacteristics, autoprovisioning_config):
+def _install_kueue(
+    args,
+    system: SystemCharacteristics,
+    autoprovisioning_config: AutoprovisioningConfig | None,
+) -> int:
   xpk_print('Enabling Kueue on the cluster')
   autoprovisioning_enabled = False
   if autoprovisioning_config:
@@ -1251,7 +1286,7 @@ def install_kueue(args, system: SystemCharacteristics, autoprovisioning_config):
     # Determine total chips based on user specified topology.
     total_chips = get_total_chips_requested_from_args(args, system)
   kueue_manager = KueueManager()
-  kueue_manager.install_or_upgrade(
+  return kueue_manager.install_or_upgrade(
       KueueConfig(
           system,
           total_chips=total_chips,

xpk/commands/cluster_gcluster_test.py CHANGED Viewed

@@ -93,7 +93,7 @@ def test_install_kueue_standard(
       gke_accelerator="nvidia-h100-mega-80gb",
       gce_machine_type="a3-megagpu-8g",
       chips_per_vm=8,
-      accelerator_type=AcceleratorType["GPU"],
+      accelerator_type=AcceleratorType.GPU,
       device_type="h100-mega-80gb-8",
       supports_sub_slicing=False,
   )
@@ -140,7 +140,7 @@ def test_install_kueue_with_autoprovisioning(
       gke_accelerator="nvidia-h100-mega-80gb",
       gce_machine_type="a3-megagpu-8g",
       chips_per_vm=8,
-      accelerator_type=AcceleratorType["GPU"],
+      accelerator_type=AcceleratorType.GPU,
       device_type="h100-mega-80gb-8",
       supports_sub_slicing=False,
   )

xpk/commands/cluster_test.py CHANGED Viewed

@@ -16,77 +16,249 @@ limitations under the License.
 from argparse import Namespace
 from dataclasses import dataclass
-from unittest.mock import MagicMock
+from typing import Any
+from unittest.mock import MagicMock, patch
 import pytest
-from xpk.commands.cluster import _validate_cluster_create_args
+from xpk.commands.cluster import _install_kueue, _validate_cluster_create_args, run_gke_cluster_create_command
 from xpk.core.system_characteristics import SystemCharacteristics, UserFacingNameToSystemCharacteristics
+from xpk.core.testing.commands_tester import CommandsTester
 from xpk.utils.feature_flags import FeatureFlags
 @dataclass
 class _Mocks:
   common_print_mock: MagicMock
-  common_exit_mock: MagicMock
+  commands_print_mock: MagicMock
+  commands_get_reservation_deployment_type: MagicMock
+  commands_tester: CommandsTester
 @pytest.fixture
-def mock_common_print_and_exit(mocker):
+def mocks(mocker) -> _Mocks:
   common_print_mock = mocker.patch(
       'xpk.commands.common.xpk_print',
       return_value=None,
   )
-  common_exit_mock = mocker.patch(
-      'xpk.commands.common.xpk_exit',
-      return_value=None,
+  commands_print_mock = mocker.patch(
+      'xpk.commands.cluster.xpk_print', return_value=None
+  )
+  commands_get_reservation_deployment_type = mocker.patch(
+      'xpk.commands.cluster.get_reservation_deployment_type',
+      return_value='DENSE',
   )
   return _Mocks(
-      common_print_mock=common_print_mock, common_exit_mock=common_exit_mock
+      common_print_mock=common_print_mock,
+      commands_get_reservation_deployment_type=commands_get_reservation_deployment_type,
+      commands_print_mock=commands_print_mock,
+      commands_tester=CommandsTester(
+          mocker,
+          run_command_with_updates_path=(
+              'xpk.commands.cluster.run_command_with_updates'
+          ),
+      ),
   )
-DEFAULT_TEST_SYSTEM: SystemCharacteristics = (
-    UserFacingNameToSystemCharacteristics['l4-1']
-)
+def construct_args(**kwargs: Any) -> Namespace:
+  args_dict = dict(
+      project='project',
+      zone='us-central1-a',
+      reservation='',
+      default_pool_cpu_machine_type='test-machine-type',
+      cluster='test-cluster',
+      default_pool_cpu_num_nodes='100',
+      sub_slicing=False,
+      gke_version='',
+      private=False,
+      authorized_networks=None,
+      enable_pathways=False,
+      enable_ray_cluster=False,
+      enable_workload_identity=False,
+      enable_gcsfuse_csi_driver=False,
+      enable_gcpfilestore_csi_driver=False,
+      enable_parallelstore_csi_driver=False,
+      enable_pd_csi_driver=False,
+      enable_lustre_csi_driver=False,
+      custom_cluster_arguments='',
+      num_slices=1,
+      num_nodes=1,
+      flex=False,
+      memory_limit='100Gi',
+      cpu_limit=100,
+      cluster_cpu_machine_type='',
+  )
+  args_dict.update(kwargs)
+  return Namespace(**args_dict)
+GPU_TEST_SYSTEM: SystemCharacteristics = UserFacingNameToSystemCharacteristics[
+    'l4-1'
+]
 SUB_SLICING_SYSTEM: SystemCharacteristics = (
     UserFacingNameToSystemCharacteristics['v6e-4x4']
 )
+TPU_TEST_SYSTEM: SystemCharacteristics = UserFacingNameToSystemCharacteristics[
+    'v6e-4x4'
+]
 def test_validate_cluster_create_args_for_correct_args_pass(
-    mock_common_print_and_exit: _Mocks,
+    mocks: _Mocks,
 ):
   args = Namespace()
-  _validate_cluster_create_args(args, DEFAULT_TEST_SYSTEM)
+  _validate_cluster_create_args(args, GPU_TEST_SYSTEM)
-  assert mock_common_print_and_exit.common_print_mock.call_count == 0
-  assert mock_common_print_and_exit.common_exit_mock.call_count == 0
+  assert mocks.common_print_mock.call_count == 0
 def test_validate_cluster_create_args_for_correct_sub_slicing_args_pass(
-    mock_common_print_and_exit: _Mocks,
+    mocks: _Mocks,
 ):
   FeatureFlags.SUB_SLICING_ENABLED = True
-  args = Namespace(sub_slicing=True)
+  args = construct_args(
+      sub_slicing=True,
+      reservation='test-reservation',
+  )
   _validate_cluster_create_args(args, SUB_SLICING_SYSTEM)
-  assert mock_common_print_and_exit.common_print_mock.call_count == 0
-  assert mock_common_print_and_exit.common_exit_mock.call_count == 0
+  assert mocks.common_print_mock.call_count == 0
 def test_validate_cluster_create_args_for_not_supported_system_throws(
-    mock_common_print_and_exit: _Mocks,
+    mocks: _Mocks,
 ):
   FeatureFlags.SUB_SLICING_ENABLED = True
-  args = Namespace(sub_slicing=True)
+  args = construct_args(
+      sub_slicing=True,
+      reservation='test-reservation',
+  )
-  _validate_cluster_create_args(args, DEFAULT_TEST_SYSTEM)
+  with pytest.raises(SystemExit):
+    _validate_cluster_create_args(args, GPU_TEST_SYSTEM)
-  assert mock_common_print_and_exit.common_print_mock.call_count == 1
+  assert mocks.common_print_mock.call_count == 1
   assert (
-      mock_common_print_and_exit.common_print_mock.call_args[0][0]
+      mocks.common_print_mock.call_args[0][0]
       == 'Error: l4-1 does not support Sub-slicing.'
   )
-  assert mock_common_print_and_exit.common_exit_mock.call_count == 1
+def test_validate_cluster_create_args_for_missing_reservation(
+    mocks: _Mocks,
+):
+  FeatureFlags.SUB_SLICING_ENABLED = True
+  args = construct_args(
+      sub_slicing=True,
+      reservation=None,
+  )
+  with pytest.raises(SystemExit):
+    _validate_cluster_create_args(args, SUB_SLICING_SYSTEM)
+  assert mocks.commands_print_mock.call_count == 1
+  assert (
+      'Validation failed: Sub-slicing cluster creation requires'
+      in mocks.commands_print_mock.call_args[0][0]
+  )
+def test_validate_cluster_create_args_for_invalid_reservation(
+    mocks: _Mocks,
+):
+  FeatureFlags.SUB_SLICING_ENABLED = True
+  args = construct_args(
+      sub_slicing=True,
+      reservation='test-reservation',
+  )
+  mocks.commands_get_reservation_deployment_type.return_value = 'SPARSE'
+  with pytest.raises(SystemExit):
+    _validate_cluster_create_args(args, SUB_SLICING_SYSTEM)
+  assert mocks.commands_print_mock.call_count == 5
+  assert (
+      'Refer to the documentation for more information on creating Cluster'
+      in mocks.commands_print_mock.call_args[0][0]
+  )
+@patch('xpk.commands.cluster.KueueManager.install_or_upgrade')
+def test_install_kueue_returns_kueue_installation_code(
+    mock_kueue_manager_install: MagicMock,
+):
+  mock_kueue_manager_install.return_value = 17
+  code = _install_kueue(
+      args=construct_args(),
+      system=GPU_TEST_SYSTEM,
+      autoprovisioning_config=None,
+  )
+  assert code == 17
+def test_run_gke_cluster_create_command_specifies_custom_cluster_arguments_last(
+    mocks: _Mocks,
+):
+  result = run_gke_cluster_create_command(
+      args=construct_args(
+          custom_cluster_arguments='--enable-autoscaling=False --foo=baz'
+      ),
+      gke_control_plane_version='1.2.3',
+      system=TPU_TEST_SYSTEM,
+  )
+  assert result == 0
+  mocks.commands_tester.assert_command_run(
+      'clusters create',
+      ' --enable-autoscaling',
+      ' --enable-autoscaling=False --foo=baz',
+  )
+def test_run_gke_cluster_create_command_without_gke_version_does_not_have_no_autoupgrade_flag(
+    mocks: _Mocks,
+):
+  result = run_gke_cluster_create_command(
+      args=construct_args(gke_version=''),
+      gke_control_plane_version='1.2.3',
+      system=TPU_TEST_SYSTEM,
+  )
+  assert result == 0
+  mocks.commands_tester.assert_command_not_run(
+      'clusters create', ' --no-enable-autoupgrade'
+  )
+def test_run_gke_cluster_create_command_with_gke_version_has_no_autoupgrade_flag(
+    mocks: _Mocks,
+):
+  result = run_gke_cluster_create_command(
+      args=construct_args(gke_version='1.2.3'),
+      gke_control_plane_version='1.2.3',
+      system=TPU_TEST_SYSTEM,
+  )
+  assert result == 0
+  mocks.commands_tester.assert_command_run(
+      'clusters create', ' --no-enable-autoupgrade'
+  )
+def test_run_gke_cluster_create_command_with_gpu_system_has_no_enable_autoupgrade(
+    mocks: _Mocks,
+):
+  result = run_gke_cluster_create_command(
+      args=construct_args(gke_version=''),
+      gke_control_plane_version='1.2.3',
+      system=GPU_TEST_SYSTEM,
+  )
+  assert result == 0
+  mocks.commands_tester.assert_command_run(
+      'clusters create', ' --no-enable-autoupgrade'
+  )

xpk/commands/inspector.py CHANGED Viewed

@@ -23,6 +23,10 @@ from ..utils.console import xpk_exit, xpk_print
 from ..utils.file import append_tmp_file, write_tmp_file
 from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
 from .workload import get_workload_list
+from ..core.kueue_manager import has_sub_slicing_enabled
+_SPACER = '========================================================'
 def inspector_run_command_helper(
@@ -40,7 +44,6 @@ def inspector_run_command_helper(
     0 if successful and 1 otherwise.
   """
   prefix = f'Command: {command}\nCommand Description: {command_description}\n'
-  postfix = '========================================================'
   return_code, command_output = run_command_for_value(
       command, f'{command_description}'
   )
@@ -51,7 +54,7 @@ def inspector_run_command_helper(
     )
     return 1
-  inspector_command_output = f'{prefix} \n{command_output} \n{postfix} \n'
+  inspector_command_output = f'{prefix} \n{command_output} \n{_SPACER} \n'
   append_tmp_file(inspector_command_output, file)
   if args.print_to_terminal:
@@ -71,17 +74,27 @@ def inspector_run_workload_list_helper(args, command_description, file) -> int:
     0 if successful and 1 otherwise.
   """
   prefix = f'Command Description: {command_description}\n'
-  postfix = '========================================================'
   return_code, command_output = get_workload_list(args)
   if return_code != 0:
     xpk_exit(return_code)
-  inspector_command_output = f'{prefix} \n{command_output} \n{postfix} \n'
+  inspector_command_output = f'{prefix} \n{command_output} \n{_SPACER} \n'
   append_tmp_file(inspector_command_output, file)
   if args.print_to_terminal:
     xpk_print(inspector_command_output)
   return 0
+def inspector_run_sub_slicing_helper(args, file: str):
+  return_code, result = has_sub_slicing_enabled()
+  if return_code != 0:
+    xpk_exit(return_code)
+  if result:
+    output = f'Sub-slicing topology set up.\n{_SPACER}'
+    append_tmp_file(output, file)
+    if args.print_to_terminal:
+      xpk_print(output)
 def inspector_output_link_helper(args, link, link_description, file) -> int:
   """Outputs a link for xpk inspector to the output file.
@@ -95,9 +108,7 @@ def inspector_output_link_helper(args, link, link_description, file) -> int:
     0 if successful and 1 otherwise.
   """
   inspector_link = (
-      f'Link Description: {link_description}\n'
-      f'Link: {link}\n'
-      '========================================================'
+      f'Link Description: {link_description}\nLink: {link}\n{_SPACER}\n'
   )
   append_tmp_file(inspector_link, file)
   if args.print_to_terminal:
@@ -308,6 +319,8 @@ def inspector(args) -> None:
           f' {command_description} return code: {return_code}'
       )
+  inspector_run_sub_slicing_helper(args, inspector_file)
   # Cloud Console Links:
   workload_links = []
   if args.workload:

xpk/commands/kind.py CHANGED Viewed

@@ -94,7 +94,7 @@ def cluster_create(args) -> None:
       'N/A',
       'N/A',
       1,
-      AcceleratorType['CPU'],
+      AcceleratorType.CPU,
       'kind',
       supports_sub_slicing=False,
   )

xpk/commands/workload.py CHANGED Viewed

@@ -27,6 +27,7 @@ from ..core.cluster import (
     setup_k8s_env,
 )
 from ..core.commands import run_command_with_updates, run_commands
+from ..core.kueue_manager import KueueManager, has_sub_slicing_enabled
 from ..core.config import (VERTEX_TENSORBOARD_FEATURE_FLAG, XPK_CURRENT_VERSION)
 from ..core.docker_container import (
     get_main_container_docker_image,
@@ -95,6 +96,7 @@ from ..core.workload_decorators import (
     tcpxo_decorator,
 )
 from ..utils.console import get_user_input, xpk_exit, xpk_print
+from packaging.version import Version
 from ..utils.file import write_tmp_file
 from ..utils.execution_context import is_dry_run
 from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
@@ -283,6 +285,7 @@ PW_WORKLOAD_CREATE_YAML = """
 """
 SUB_SLICING_TOPOLOGIES = ['2x2', '2x4', '4x4', '4x8', '8x8', '8x16', '16x16']
+SUB_SLICING_MINIMUM_KUEUE_VERSION = Version('0.13.0')
 def workload_create_pathways(args) -> None:
@@ -340,6 +343,7 @@ def workload_create(args) -> None:
     xpk_exit(return_code)
   if FeatureFlags.SUB_SLICING_ENABLED and args.sub_slicing_topology is not None:
+    _validate_sub_slicing_availability()
     _validate_sub_slicing_topology(system, args.sub_slicing_topology)
   if not check_if_workload_can_schedule(args, system):
@@ -483,7 +487,7 @@ def workload_create(args) -> None:
                 values: [{restart_on_exit_codes}]"""
   # Create the workload file based on accelerator type or workload type.
-  if system.accelerator_type == AcceleratorType['GPU']:
+  if system.accelerator_type == AcceleratorType.GPU:
     container, debugging_dashboard_id = get_user_workload_container(
         args, system
     )
@@ -566,7 +570,7 @@ def workload_create(args) -> None:
         container=container,
         vms_per_slice=(
             compute_vms_per_slice(args.sub_slicing_topology)
-            if system.accelerator_type == AcceleratorType['TPU']
+            if system.accelerator_type == AcceleratorType.TPU
             and FeatureFlags.SUB_SLICING_ENABLED
             and args.sub_slicing_topology is not None
             else system.vms_per_slice
@@ -594,7 +598,7 @@ def workload_create(args) -> None:
         tpu_toleration="""
               - operator: "Exists"
                 key: google.com/tpu
-        """ if system.accelerator_type == AcceleratorType['TPU'] else '',
+        """ if system.accelerator_type == AcceleratorType.TPU else '',
         failure_policy_rules=failure_policy_rules,
         pod_failure_policy=pod_failure_policy,
     )
@@ -611,7 +615,7 @@ def workload_create(args) -> None:
   # Get GKE outlier dashboard for TPU
   outlier_dashboard_id = None
-  if system.accelerator_type == AcceleratorType['TPU']:
+  if system.accelerator_type == AcceleratorType.TPU:
     outlier_dashboard_id = get_gke_outlier_dashboard(args)
   # Outlier and debugging dashboards
@@ -678,6 +682,40 @@ def workload_create(args) -> None:
   xpk_exit(0)
+def _validate_sub_slicing_availability():
+  return_code, sub_slicing_enabled = has_sub_slicing_enabled()
+  if return_code != 0:
+    xpk_print(
+        'Error: Unable to validate sub-slicing support on a given cluster.'
+    )
+    xpk_exit(1)
+  if not sub_slicing_enabled:
+    xpk_print(
+        'Error: Cluster has not been not set up for Sub-slicing. Please enable'
+        ' --sub-slicing in "cluster create" command first.'
+    )
+    xpk_exit(1)
+  kueue_manager = KueueManager()
+  return_code, current_version = kueue_manager.get_installed_kueue_version()
+  if return_code != 0:
+    xpk_print(
+        'Error: Unable to validate sub-slicing support on a given cluster.'
+    )
+    xpk_exit(1)
+  if current_version < SUB_SLICING_MINIMUM_KUEUE_VERSION:
+    xpk_print(
+        "Error: Current Kueue version ({current_version}) doesn't support"
+        ' Sub-slicing. The minimal required version is'
+        ' v{SUB_SLICING_MINIMUM_KUEUE_VERSION}. Please either update Kueue'
+        ' manually, or run "cluster create --sub-slicing" on the existing'
+        ' cluster.'
+    )
+    xpk_exit(1)
 def _validate_sub_slicing_topology(
     system_characteristics: SystemCharacteristics, sub_slicing_topology: str
 ) -> None:

xpk 0.14.2__py3-none-any.whl → 0.14.3__py3-none-any.whl

xpk 0.14.2py3-none-any.whl → 0.14.3py3-none-any.whl