PyPI - xpk - Versions diffs - 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

xpk 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

xpk/commands/cluster.py +29 -30
xpk/commands/cluster_gcluster.py +19 -14
xpk/commands/cluster_test.py +1 -21
xpk/commands/common.py +39 -6
xpk/commands/common_test.py +170 -0
xpk/commands/info.py +9 -5
xpk/commands/inspector.py +33 -4
xpk/commands/inspector_test.py +142 -0
xpk/commands/workload.py +22 -8
xpk/commands/workload_test.py +70 -3
xpk/core/blueprint/blueprint_generator.py +19 -8
xpk/core/blueprint/testing/data/a3_ultra.yaml +3 -1
xpk/core/blueprint/testing/data/a4.yaml +3 -1
xpk/core/capacity.py +37 -17
xpk/core/capacity_test.py +66 -1
xpk/core/cluster.py +10 -10
xpk/core/cluster_private.py +3 -3
xpk/core/cluster_test.py +29 -2
xpk/core/docker_container.py +31 -24
xpk/core/docker_manager.py +4 -4
xpk/core/docker_resources.py +4 -1
xpk/core/kueue_manager.py +6 -8
xpk/core/kueue_manager_test.py +4 -5
xpk/core/nap.py +14 -3
xpk/core/nodepool.py +46 -13
xpk/core/nodepool_test.py +143 -8
xpk/core/remote_state/fuse_remote_state.py +1 -1
xpk/core/scheduling.py +4 -1
xpk/core/scheduling_test.py +1 -1
xpk/core/system_characteristics.py +6 -0
xpk/core/telemetry.py +11 -1
xpk/core/telemetry_test.py +39 -0
xpk/core/testing/commands_tester.py +26 -0
xpk/core/testing/commands_tester_test.py +20 -1
xpk/core/workload_decorators/rdma_decorator.py +9 -0
xpk/parser/cluster.py +11 -1
xpk/parser/cluster_test.py +59 -1
xpk/parser/common.py +11 -0
xpk/parser/storage.py +3 -3
xpk/utils/console.py +1 -1
xpk/utils/feature_flags.py +7 -3
{xpk-1.0.0.dist-info → xpk-1.1.0.dist-info}/METADATA +37 -21
{xpk-1.0.0.dist-info → xpk-1.1.0.dist-info}/RECORD +47 -54
xpk-1.1.0.dist-info/top_level.txt +1 -0
integration/README.md +0 -19
integration/__init__.py +0 -15
integration/docker_manager_test.py +0 -102
integration/gcluster_a3mega_test.py +0 -215
integration/gcluster_a3ultra_test.py +0 -187
integration/gcluster_a4_test.py +0 -187
integration/gcluster_test.py +0 -107
xpk/utils/user_input.py +0 -48
xpk/utils/user_input_test.py +0 -92
xpk-1.0.0.dist-info/top_level.txt +0 -2
{xpk-1.0.0.dist-info → xpk-1.1.0.dist-info}/WHEEL +0 -0
{xpk-1.0.0.dist-info → xpk-1.1.0.dist-info}/entry_points.txt +0 -0
{xpk-1.0.0.dist-info → xpk-1.1.0.dist-info}/licenses/LICENSE +0 -0

xpk/commands/inspector_test.py ADDED Viewed

@@ -0,0 +1,142 @@
+"""
+Copyright 2025 Google LLC
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+     https://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import pytest
+from unittest import mock
+from xpk.commands import inspector
+from xpk.core.testing.commands_tester import CommandsTester
+@pytest.fixture
+def args():
+  args = mock.Mock()
+  args.print_to_terminal = False
+  return args
+@pytest.fixture
+def commands_tester(mocker):
+  return CommandsTester(
+      mocker,
+      run_command_for_value_path="xpk.commands.inspector.run_command_for_value",
+  )
+@pytest.fixture
+def mock_has_super_slicing_enabled(mocker):
+  return mocker.patch("xpk.commands.inspector.has_super_slicing_enabled")
+@pytest.fixture
+def mock_append_tmp_file(mocker):
+  return mocker.patch("xpk.commands.inspector.append_tmp_file")
+@pytest.fixture
+def mock_xpk_print(mocker):
+  return mocker.patch("xpk.commands.inspector.xpk_print")
+def test_inspector_run_slice_controller_helper_no_super_slicing(
+    args: mock.Mock,
+    commands_tester: CommandsTester,
+    mock_has_super_slicing_enabled: mock.Mock,
+    mock_append_tmp_file: mock.Mock,
+):
+  mock_has_super_slicing_enabled.return_value = (0, False)
+  inspector.inspector_run_slice_controller_helper(args, "test_file")
+  commands_tester.assert_command_not_run(
+      "kubectl logs deployment slice-controller-controller-manager"
+  )
+  commands_tester.assert_command_not_run(
+      "kubectl describe deployment slice-controller-controller-manager"
+  )
+  mock_append_tmp_file.assert_not_called()
+def test_inspector_run_slice_controller_helper_with_super_slicing_success(
+    args: mock.Mock,
+    commands_tester: CommandsTester,
+    mock_has_super_slicing_enabled: mock.Mock,
+    mock_append_tmp_file: mock.Mock,
+):
+  commands_tester.set_result_for_command(
+      (0, "some logs"),
+      "kubectl",
+      "logs",
+      "deployment slice-controller-controller-manager",
+  )
+  commands_tester.set_result_for_command(
+      (0, "some details"),
+      "kubectl",
+      "describe",
+      "deployment slice-controller-controller-manager",
+  )
+  mock_has_super_slicing_enabled.return_value = (0, True)
+  inspector.inspector_run_slice_controller_helper(args, "test_file")
+  commands_tester.assert_command_run(
+      "kubectl logs deployment slice-controller-controller-manager"
+  )
+  commands_tester.assert_command_run(
+      "kubectl describe deployment slice-controller-controller-manager"
+  )
+  mock_append_tmp_file.assert_called()
+  call_args_list = mock_append_tmp_file.call_args_list
+  assert any(
+      "Super-slicing topology set up" in args[0] for args, _ in call_args_list
+  )
+  assert any("some logs" in args[0] for args, _ in call_args_list)
+  assert any("some details" in args[0] for args, _ in call_args_list)
+def test_inspector_run_slice_controller_helper_with_slice_controller_not_found(
+    args: mock.Mock,
+    commands_tester: CommandsTester,
+    mock_has_super_slicing_enabled: mock.Mock,
+    mock_append_tmp_file: mock.Mock,
+    mock_xpk_print: mock.Mock,
+):
+  commands_tester.set_result_for_command(
+      (1, "Error: Deployment not found"),
+      "kubectl",
+      "deployment slice-controller-controller-manager",
+  )
+  mock_has_super_slicing_enabled.return_value = (0, True)
+  inspector.inspector_run_slice_controller_helper(args, "test_file")
+  commands_tester.assert_command_run(
+      "kubectl describe deployment slice-controller-controller-manager"
+  )
+  commands_tester.assert_command_run(
+      "kubectl logs deployment slice-controller-controller-manager"
+  )
+  mock_append_tmp_file.assert_called()
+  call_args_list = mock_append_tmp_file.call_args_list
+  assert any(
+      "Super-slicing topology set up" in args[0] for args, _ in call_args_list
+  )
+  mock_xpk_print.assert_called()
+  call_args_list = mock_xpk_print.call_args_list
+  assert any(
+      "Error: Deployment not found" in args[0] for args, _ in call_args_list
+  )

xpk/commands/workload.py CHANGED Viewed

@@ -54,6 +54,7 @@ from ..core.resources import get_cluster_capacity_type, get_cluster_system_chara
 from ..core.resources import ConfigMapType, get_cluster_configmap
 from ..core.nodepool import ensure_resource_policy_exists
 from ..core.scheduling import (
+    ONE_TO_ONE_REPLICA_NODE_POOL_ASSIGNMENT_ANNOTATION,
     WorkloadScheduling,
     check_if_workload_can_schedule,
     create_tpu_machine_type,
@@ -99,7 +100,7 @@ from ..utils.file import write_tmp_file
 from ..utils.execution_context import is_dry_run
 from ..utils.validation import validate_dependencies_list, SystemDependency, should_validate_dependencies
 from . import cluster_gcluster
-from .common import is_TAS_possible
+from .common import is_GPU_TAS_possible
 from jinja2 import Environment, FileSystemLoader
 from ..utils.templates import get_templates_absolute_path
@@ -111,7 +112,7 @@ metadata:
     kueue.x-k8s.io/queue-name: {local_queue_name}  # Name of the LocalQueue
     xpk.google.com/workload: {args.workload}
   annotations:
-    alpha.jobset.sigs.k8s.io/exclusive-topology: cloud.google.com/gke-nodepool # 1:1 job replica to node pool assignment
+    {jobset_annotations}
 spec:
   ttlSecondsAfterFinished: {args.ttl_seconds_after_finished}
   failurePolicy:
@@ -490,13 +491,21 @@ def workload_create(args) -> None:
         - PodFailurePolicy"""
     restart_on_exit_codes_list = get_restart_exit_codes(args)
     restart_on_exit_codes = ','.join(map(str, restart_on_exit_codes_list))
-    pod_failure_policy = f"""
+    pod_failure_policy = """
           podFailurePolicy:
             rules:
+          """
+    docker_image = get_main_container_docker_image(args, workload_system)
+    for i in range(workload_system.parallel_containers):
+      docker_image_sufix = (
+          f'-{i + 1}' if workload_system.parallel_containers > 1 else ''
+      )
+      pod_failure_policy += f"""
             - action: FailJob
               onPodConditions: []
               onExitCodes:
-                containerName: {get_main_container_docker_image(args, workload_system)}
+                containerName: {docker_image}{docker_image_sufix}
                 operator: NotIn
                 values: [{restart_on_exit_codes}]"""
@@ -534,11 +543,10 @@ def workload_create(args) -> None:
     capacity_type = get_cluster_capacity_type(args)
     annotations = (
-        (
-            'kueue.x-k8s.io/podset-preferred-topology:'
-            ' "cloud.google.com/gce-topology-host"'
+        'kueue.x-k8s.io/podset-preferred-topology: "kubernetes.io/hostname"'
+        if is_GPU_TAS_possible(
+            cluster_system, capacity_type, args.cluster, args.zone, args.project
         )
-        if is_TAS_possible(cluster_system, capacity_type)
         else ''
     )
@@ -648,9 +656,15 @@ def workload_create(args) -> None:
         if use_super_slicing
         else ''
     )
+    jobset_annotations = (
+        ''
+        if use_super_slicing or use_sub_slicing
+        else ONE_TO_ONE_REPLICA_NODE_POOL_ASSIGNMENT_ANNOTATION
+    )
     yml_string = WORKLOAD_CREATE_YAML.format(
         args=args,
+        jobset_annotations=jobset_annotations,
         container=container,
         vms_per_slice=workload_system.vms_per_slice,
         affinity=get_cpu_affinity(workload_system.accelerator_type),

xpk/commands/workload_test.py CHANGED Viewed

@@ -23,6 +23,7 @@ from ..core.scheduling import WorkloadScheduling
 from ..core.system_characteristics import DockerPlatform, SystemCharacteristics, AcceleratorType, UserFacingNameToSystemCharacteristics, GpuConfig
 from .workload import workload_create
 from .cluster_test import construct_args
+from ..core.docker_container import get_user_workload_container as real_get_user_workload_container
 SYSTEM_CHARACTERISTICS = SystemCharacteristics(
@@ -58,7 +59,7 @@ class _WorkloadCreateMocks:
   validate_dependencies_list: MagicMock
   write_tmp_file: MagicMock
   get_cluster_capacity_type: MagicMock
-  is_TAS_possible: MagicMock
+  is_GPU_TAS_possible: MagicMock
   get_cluster_location: MagicMock
   xpk_exit: MagicMock
   run_command_with_updates: MagicMock
@@ -113,8 +114,8 @@ def workload_create_mocks(mocker) -> _WorkloadCreateMocks:
           'xpk.commands.workload.get_cluster_capacity_type',
           return_value='on-demand',
       ),
-      is_TAS_possible=mocker.patch(
-          'xpk.commands.workload.is_TAS_possible', return_value=False
+      is_GPU_TAS_possible=mocker.patch(
+          'xpk.commands.workload.is_GPU_TAS_possible', return_value=False
       ),
       get_cluster_location=mocker.patch(
           'xpk.commands.workload.get_cluster_location',
@@ -206,3 +207,69 @@ def test_workload_create_dry_run_with_output_file(mocker):
   written_content = mock_open.return_value.write.call_args[0][0]
   assert 'test-workload' in written_content
   assert 'cloud.google.com/gke-tpu-topology: 8x8' in written_content
+def test_workload_create_multi_container_for_tpu7x(
+    workload_create_mocks: _WorkloadCreateMocks,
+    mocker,
+):
+  """Tests that the generated YAML for a multi-container workload has correct pod failure policy and container structure."""
+  # Enable dry_run to prevent external calls like get_storages_to_mount -> gcloud
+  mocker.patch('xpk.utils.execution_context.dry_run', True)
+  # Mock dependencies required by get_user_workload_container -> get_main_container
+  mocker.patch(
+      'xpk.core.docker_container.setup_docker_image',
+      return_value=(0, 'dummy-image'),
+  )
+  mocker.patch(
+      'xpk.core.docker_container.get_gke_debugging_dashboard', return_value=None
+  )
+  # Use the real get_user_workload_container to test integration
+  workload_create_mocks.get_user_workload_container.side_effect = (
+      real_get_user_workload_container
+  )
+  args = construct_args(
+      workload='test-workload',
+      command='echo hello',
+      num_nodes=1,
+      tpu_type='tpu7x-2x2x2',
+      restart_on_exit_codes=None,
+      docker_name='test-docker',
+      deploy_stacktrace_sidecar=False,
+      enable_debug_logs=False,
+      scheduler='default-scheduler',
+  )
+  workload_create(args)
+  assert workload_create_mocks.write_tmp_file.called
+  yaml_content = workload_create_mocks.write_tmp_file.call_args[0][0]
+  jobset = yaml.safe_load(yaml_content)
+  # Verify Pod Failure Policy
+  pod_failure_rules = jobset['spec']['replicatedJobs'][0]['template']['spec'][
+      'podFailurePolicy'
+  ]['rules']
+  # Should have 2 rules for multi_container
+  assert len(pod_failure_rules) == 2
+  assert pod_failure_rules[0]['onExitCodes']['containerName'].endswith('-1')
+  assert pod_failure_rules[1]['onExitCodes']['containerName'].endswith('-2')
+  # Verify Containers
+  # Navigate to the containers list in the YAML
+  containers = jobset['spec']['replicatedJobs'][0]['template']['spec'][
+      'template'
+  ]['spec']['containers']
+  assert len(containers) == 2
+  assert containers[0]['name'].endswith('-1')
+  assert containers[1]['name'].endswith('-2')
+  assert containers[0]['image'] == 'dummy-image'
+  assert containers[1]['image'] == 'dummy-image'
+  # Check if resources are split correctly (4 chips / 2 containers = 2 chips)
+  assert containers[0]['resources']['limits']['google.com/tpu'] == 2
+  assert containers[1]['resources']['limits']['google.com/tpu'] == 2

xpk/core/blueprint/blueprint_generator.py CHANGED Viewed

@@ -24,6 +24,7 @@ from packaging.version import parse
 from ...utils.console import xpk_exit, xpk_print
 from ...utils.versions import ReleaseChannel
 from ...utils.file import ensure_directory_exists
+from ...utils.templates import get_templates_absolute_path
 from ..capacity import (
@@ -51,9 +52,9 @@ supported_device_types = {
     a4_device_type,
 }
 blueprint_dependencies_dir = {
-    a3mega_device_type: "src/xpk/blueprints/a3mega",
-    a3ultra_device_type: "src/xpk/blueprints/a3ultra",
-    a4_device_type: "src/xpk/blueprints/a4",
+    a3mega_device_type: get_templates_absolute_path("blueprints/a3mega"),
+    a3ultra_device_type: get_templates_absolute_path("blueprints/a3ultra"),
+    a4_device_type: get_templates_absolute_path("blueprints/a4"),
 }
 cluster_toolkit_url = "github.com/GoogleCloudPlatform/cluster-toolkit"
@@ -63,7 +64,7 @@ common_cluster_labels = {"gke_product_type": "xpk"}
 class BlueprintGeneratorOutput:
   """BlueprintGeneratorOutput is a class containing fields with output blueprint file path and path to blueprint dependencies.
-  Atributes:
+  Attributes:
   - blueprint_file (str) : path to generated blueprint file.
   - blueprint_dependencies (str) : path to directory containing blueprint dependencies.
   """
@@ -75,7 +76,7 @@ class BlueprintGeneratorOutput:
 class BlueprintGenerator:
   """BlueprintGenerator is a class for generating blueprints
-  Atributes:
+  Attributes:
   - storage_path (str) - path to directory where generated files and directories will be stored.
   """
@@ -239,10 +240,18 @@ class BlueprintGenerator:
     else:
       a3_megagpu_pool_0.update_settings({"static_node_count": num_nodes})
+    if capacity_type not in (CapacityType.SPOT, CapacityType.FLEX_START):
+      a3_megagpu_pool_0.update_settings(
+          {"placement_policy": {"type": "COMPACT"}}
+      )
     if release_channel == ReleaseChannel.RAPID:
       a3_megagpu_pool_0.set_setting("auto_upgrade", True)
-    set_placement_policy = capacity_type != CapacityType.SPOT
+    set_placement_policy = capacity_type not in (
+        CapacityType.SPOT,
+        CapacityType.FLEX_START,
+    )
     workload = DeploymentModule(
         id="workload_component_install",
         source="modules/management/kubectl-apply",
@@ -521,7 +530,7 @@ class BlueprintGenerator:
         settings={
             "release_channel": release_channel.value,
             "version_prefix": version_prefix,
-            "min_cluster_version": cluster_version,
+            "min_master_version": cluster_version,
             "prefix_with_deployment_name": False,
             "name_suffix": cluster_name,
             "system_node_pool_machine_type": system_node_pool_machine_type,
@@ -614,6 +623,7 @@ class BlueprintGenerator:
       gpu_pool.update_settings(self.get_dws_flex_start())
     else:
       gpu_pool.update_settings({"static_node_count": num_nodes})
+      gpu_pool.update_settings({"placement_policy": {"type": "COMPACT"}})
     if release_channel == ReleaseChannel.RAPID:
       gpu_pool.set_setting("auto_upgrade", True)
@@ -809,7 +819,7 @@ class BlueprintGenerator:
         settings={
             "release_channel": release_channel.value,
             "version_prefix": version_prefix,
-            "min_cluster_version": cluster_version,
+            "min_master_version": cluster_version,
             "system_node_pool_machine_type": system_node_pool_machine_type,
             "system_node_pool_node_count": {
                 "total_min_nodes": system_node_pool_min_node_count,
@@ -896,6 +906,7 @@ class BlueprintGenerator:
       gpu_pool.update_settings(self.get_dws_flex_start())
     else:
       gpu_pool.update_settings({"static_node_count": num_nodes})
+      gpu_pool.update_settings({"placement_policy": {"type": "COMPACT"}})
     if release_channel == ReleaseChannel.RAPID:
       gpu_pool.set_setting("auto_upgrade", True)

xpk/core/blueprint/testing/data/a3_ultra.yaml CHANGED Viewed

@@ -97,7 +97,7 @@ deployment_groups:
     settings:
       release_channel: RAPID
       version_prefix: '1.2'
-      min_cluster_version: 1.2.3
+      min_master_version: 1.2.3
       prefix_with_deployment_name: false
       name_suffix: gke-a3-ultra
       system_node_pool_machine_type: "e2-standard-16"
@@ -142,6 +142,8 @@ deployment_groups:
         specific_reservations:
         - name: test-reservation
       static_node_count: 2
+      placement_policy:
+        type: COMPACT
     outputs: [instructions]
   - !DeploymentModule

xpk/core/blueprint/testing/data/a4.yaml CHANGED Viewed

@@ -121,7 +121,7 @@ deployment_groups:
         network_tier=null}], ipv6_access_config=[], alias_ip_range=[]}], gke-a4-rdma-net.subnetwork_interfaces_gke))
       release_channel: RAPID
       version_prefix: '1.2'
-      min_cluster_version: 1.2.3
+      min_master_version: 1.2.3
     use:
     - gke-a4-net-0
   - !DeploymentModule
@@ -154,6 +154,8 @@ deployment_groups:
         network_ip=null, stack_type=null, access_config=[{nat_ip=null, public_ptr_domain_name=null,
         network_tier=null}], ipv6_access_config=[], alias_ip_range=[]}], gke-a4-rdma-net.subnetwork_interfaces_gke))
       static_node_count: 2
+      placement_policy:
+        type: COMPACT
   - !DeploymentModule
     id: workload-manager-install

xpk/core/capacity.py CHANGED Viewed

@@ -90,7 +90,7 @@ def get_capacity_type(args) -> tuple[CapacityType, int]:
     capacity_type = CapacityType.ON_DEMAND
     num_types += 1
   if args.reservation:
-    return_code = verify_reservation_exists(args)
+    return_code = verify_reservations_exist(args)
     if return_code > 0:
       return capacity_type, return_code
     capacity_type = CapacityType.RESERVATION
@@ -184,8 +184,22 @@ def get_reservation_deployment_type(
   return output.strip()
-def verify_reservation_exists(args) -> int:
-  """Verify the reservation exists.
+def get_reservations_list(args) -> list[str]:
+  """Get the list of reservations from args.
+  Args:
+    args: user provided arguments.
+  Returns:
+    List of strings of reservations.
+  """
+  if not args.reservation:
+    return []
+  return [r.strip() for r in args.reservation.split(',')]
+def verify_reservations_exist(args) -> int:
+  """Verify the reservations exist.
   Args:
     args: user provided arguments for running the command.
@@ -193,16 +207,20 @@ def verify_reservation_exists(args) -> int:
   Returns:
     0 if successful and 1 otherwise.
   """
-  reservation = parse_reservation(args.reservation, args.project)
-  command = (
-      f'gcloud beta compute reservations describe {reservation.name}'
-      f' --project={reservation.project} --zone={args.zone}'
-  )
-  return_code = run_command_with_updates(command, 'Describe reservation')
-  if return_code != 0:
-    xpk_print(f'Describe reservation returned ERROR {return_code}')
-    xpk_print('Please confirm that your reservation name is correct.')
-    return 1
+  for reservation_name in get_reservations_list(args):
+    reservation = parse_reservation(reservation_name, args.project)
+    command = (
+        f'gcloud beta compute reservations describe {reservation.name}'
+        f' --project={reservation.project} --zone={args.zone}'
+    )
+    return_code = run_command_with_updates(command, 'Describe reservation')
+    if return_code != 0:
+      xpk_print(f'Describe reservation returned ERROR {return_code}')
+      xpk_print(
+          f'Please confirm that your reservation name {reservation_name} is'
+          ' correct.'
+      )
+      return 1
   return 0
@@ -211,6 +229,7 @@ def get_capacity_arguments_from_capacity_type(
     capacity_type: CapacityType,
     max_nodes: int,
     accelerator_type: AcceleratorType,
+    reservation_name: str | None,
 ) -> tuple[str, int]:
   """Determine the Nodepool creation capacity arguments needed.
@@ -240,7 +259,7 @@ def get_capacity_arguments_from_capacity_type(
         capacity_args += ' --enable-queued-provisioning'
     case CapacityType.RESERVATION:
       capacity_args = (
-          f'--reservation-affinity=specific --reservation={args.reservation}'
+          f'--reservation-affinity=specific --reservation={reservation_name}'
       )
     case _:
       xpk_print(
@@ -252,13 +271,14 @@ def get_capacity_arguments_from_capacity_type(
 def get_capacity_node_selectors_from_capacity_type(
-    args, capacity_type: str
+    capacity_type: str, reservation_name: str | None
 ) -> tuple[str, int]:
   """Determine the node selectors for a workload to run on a specific capacity type.
   Args:
-    args: user provided arguments for running the command.
     capacity_type: The type of capacity the user configured.
+    reservation_name: The name of the reservation to use. Set to None if not
+      using reservations.
   Returns:
     Tuple with string with the node selectors to use and
@@ -275,7 +295,7 @@ def get_capacity_node_selectors_from_capacity_type(
     case CapacityType.SPOT.name:
       node_selector = 'cloud.google.com/gke-spot: "true"'
     case CapacityType.RESERVATION.name:
-      node_selector = f'cloud.google.com/reservation-name: {args.reservation}'
+      node_selector = f'cloud.google.com/reservation-name: {reservation_name}'
     case _:
       xpk_print(
           f'Unknown capacity type: {capacity_type}. Unable to determine the'

xpk/core/capacity_test.py CHANGED Viewed

@@ -16,7 +16,15 @@ limitations under the License.
 import pytest
 from unittest.mock import MagicMock, patch
-from .capacity import get_reservation_deployment_type, parse_reservation, Reservation
+from .capacity import (
+    get_reservation_deployment_type,
+    parse_reservation,
+    Reservation,
+    get_capacity_type,
+    CapacityType,
+    verify_reservations_exist,
+    get_reservations_list,
+)
 @patch('xpk.core.capacity.xpk_print')
@@ -133,3 +141,60 @@ def test_parse_reservation_fails_on_invalid_reservations(
     parse_reservation(reservation_path, 'cluster-project')
   assert 'Unable to parse reservation' in xpk_print.mock_calls[0].args[0]
+def test_get_capacity_type_multiple_reservations(mocker):
+  args = MagicMock()
+  args.on_demand = False
+  args.spot = False
+  args.flex = False
+  args.reservation = 'res1,res2'
+  args.project = 'test-project'
+  args.zone = 'us-central1-a'
+  mocker.patch('xpk.core.capacity.run_command_with_updates', return_value=0)
+  capacity_type, return_code = get_capacity_type(args)
+  assert capacity_type == CapacityType.RESERVATION
+  assert return_code == 0
+def test_verify_reservations_exist_multiple(mocker):
+  args = MagicMock()
+  args.reservation = 'res1,res2'
+  args.project = 'test-project'
+  args.zone = 'us-central1-a'
+  mock_run = mocker.patch(
+      'xpk.core.capacity.run_command_with_updates', return_value=0
+  )
+  return_code = verify_reservations_exist(args)
+  assert return_code == 0
+  assert mock_run.call_count == 2
+def test_get_reservations_list_with_single_reservation(mocker):
+  args = mocker.Mock(reservation='res1')
+  assert get_reservations_list(args) == ['res1']
+def test_get_reservations_list_with_multiple_reservations(mocker):
+  args = mocker.Mock(reservation='res1,res2')
+  assert get_reservations_list(args) == ['res1', 'res2']
+def test_get_reservations_list_with_whitespace(mocker):
+  args = mocker.Mock(reservation='res1, res2 ')
+  assert get_reservations_list(args) == ['res1', 'res2']
+def test_get_reservations_list_none(mocker):
+  args = mocker.Mock(reservation=None)
+  assert get_reservations_list(args) == []
+def test_get_reservations_list_empty(mocker):
+  args = mocker.Mock(reservation='')
+  assert get_reservations_list(args) == []

xpk/core/cluster.py CHANGED Viewed

@@ -158,7 +158,7 @@ def install_nri_on_cluster() -> int:
 def get_cluster_nodes_info() -> list[dict]:
-  """Get list of cluster's nodes descrition in yaml format
+  """Get list of cluster's nodes description in yaml format
   Returns:
     List of nodes info yaml objects.
@@ -393,11 +393,13 @@ def project_id_to_project_number(project_id: str) -> str:
 def setup_k8s_env(args) -> k8s_client.ApiClient:
   add_zone_and_project(args)
   get_cluster_credentials(args)
-  args.project_number = (
-      project_id_to_project_number(args.project)
-      if not args.dry_run
-      else abs(hash(args.project) % (10**12))  # 12 digit hash
-  )
+  # Use provided project number if available, otherwise fetch via API
+  if getattr(args, 'project_number', None):
+    xpk_print(f'Using provided project number: {args.project_number}')
+  elif args.dry_run:
+    args.project_number = abs(hash(args.project) % (10**12))  # 12 digit hash
+  else:
+    args.project_number = project_id_to_project_number(args.project)
   config.load_kube_config()
   return k8s_client.ApiClient()
@@ -716,10 +718,8 @@ def get_cluster_credentials(args) -> int:
       location=location,
       dns_endpoint=True,
   )
-  if return_code != 0:
-    return return_code
-  if not _are_credentials_valid():
+  if return_code != 0 or not _are_credentials_valid():
     xpk_print('Detected error. Retrying without --dns-endpoint flag...')
     return_code = _get_credentials(
         project=args.project,
@@ -751,6 +751,6 @@ def _get_credentials(
 def _are_credentials_valid() -> bool:
   kubectl_command = 'kubectl get pods'
   kubectl_return_code = run_command_with_updates(
-      kubectl_command, 'Test kubectl credentials'
+      kubectl_command, 'Test kubectl credentials', verbose=False
   )
   return kubectl_return_code == 0

xpk 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

xpk 1.0.0py3-none-any.whl → 1.1.0py3-none-any.whl