PyPI - xpk - Versions diffs - 0.14.4__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

xpk 0.14.4py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (91) hide show

integration/README.md +19 -0
integration/gcluster_a3mega_test.py +11 -0
integration/gcluster_a3ultra_test.py +11 -0
integration/gcluster_a4_test.py +11 -0
xpk/blueprints/a3mega/config-map.yaml.tftpl +15 -0
xpk/blueprints/a3mega/storage_crd.yaml +52 -0
xpk/blueprints/a3ultra/config-map.yaml.tftpl +15 -0
xpk/blueprints/a3ultra/mlgru-disable.yaml +59 -0
xpk/blueprints/a3ultra/nccl-installer.yaml +95 -0
xpk/blueprints/a3ultra/storage_crd.yaml +52 -0
xpk/blueprints/a4/config-map.yaml.tftpl +15 -0
xpk/blueprints/a4/nccl-rdma-installer-a4.yaml +66 -0
xpk/blueprints/a4/storage_crd.yaml +52 -0
xpk/commands/cluster.py +89 -32
xpk/commands/cluster_gcluster.py +25 -5
xpk/commands/cluster_gcluster_test.py +16 -3
xpk/commands/cluster_test.py +353 -7
xpk/commands/config.py +3 -5
xpk/commands/inspector.py +5 -3
xpk/commands/kind.py +3 -1
xpk/commands/managed_ml_diagnostics.py +249 -0
xpk/commands/managed_ml_diagnostics_test.py +146 -0
xpk/commands/storage.py +8 -10
xpk/commands/workload.py +143 -142
xpk/commands/workload_test.py +160 -118
xpk/core/blueprint/blueprint_generator.py +73 -33
xpk/core/blueprint/blueprint_test.py +9 -0
xpk/core/blueprint/testing/data/a3_mega.yaml +129 -0
xpk/core/blueprint/testing/data/a3_mega_spot.yaml +125 -0
xpk/core/blueprint/testing/data/a3_ultra.yaml +173 -0
xpk/core/blueprint/testing/data/a4.yaml +185 -0
xpk/core/capacity.py +48 -8
xpk/core/capacity_test.py +32 -1
xpk/core/cluster.py +55 -104
xpk/core/cluster_test.py +170 -0
xpk/core/commands.py +4 -10
xpk/core/config.py +88 -7
xpk/core/config_test.py +67 -11
xpk/core/docker_container.py +3 -1
xpk/core/docker_image.py +10 -6
xpk/core/docker_resources.py +1 -10
xpk/core/gcloud_context.py +18 -12
xpk/core/gcloud_context_test.py +111 -1
xpk/core/kjob.py +17 -19
xpk/core/kueue_manager.py +205 -51
xpk/core/kueue_manager_test.py +158 -4
xpk/core/nap.py +13 -14
xpk/core/nodepool.py +37 -43
xpk/core/nodepool_test.py +42 -19
xpk/core/pathways.py +23 -0
xpk/core/pathways_test.py +57 -0
xpk/core/resources.py +84 -27
xpk/core/scheduling.py +144 -133
xpk/core/scheduling_test.py +298 -6
xpk/core/system_characteristics.py +256 -19
xpk/core/system_characteristics_test.py +128 -5
xpk/core/telemetry.py +263 -0
xpk/core/telemetry_test.py +211 -0
xpk/core/vertex.py +4 -3
xpk/core/workload_decorators/tcpx_decorator.py +5 -1
xpk/main.py +33 -13
xpk/parser/cluster.py +40 -67
xpk/parser/cluster_test.py +83 -3
xpk/parser/common.py +84 -0
xpk/parser/storage.py +10 -0
xpk/parser/storage_test.py +47 -0
xpk/parser/workload.py +14 -29
xpk/parser/workload_test.py +3 -49
xpk/telemetry_uploader.py +29 -0
xpk/templates/arm_gpu_workload_crate.yaml.j2 +46 -0
xpk/templates/kueue_gke_default_topology.yaml.j2 +1 -1
xpk/templates/kueue_sub_slicing_topology.yaml.j2 +3 -8
xpk/utils/console.py +41 -10
xpk/utils/console_test.py +106 -0
xpk/utils/feature_flags.py +10 -1
xpk/utils/file.py +4 -1
xpk/utils/topology.py +4 -0
xpk/utils/user_agent.py +35 -0
xpk/utils/user_agent_test.py +44 -0
xpk/utils/user_input.py +48 -0
xpk/utils/user_input_test.py +92 -0
xpk/utils/validation.py +2 -13
xpk/utils/versions.py +31 -0
xpk-0.16.0.dist-info/METADATA +127 -0
xpk-0.16.0.dist-info/RECORD +168 -0
xpk-0.14.4.dist-info/METADATA +0 -1645
xpk-0.14.4.dist-info/RECORD +0 -139
{xpk-0.14.4.dist-info → xpk-0.16.0.dist-info}/WHEEL +0 -0
{xpk-0.14.4.dist-info → xpk-0.16.0.dist-info}/entry_points.txt +0 -0
{xpk-0.14.4.dist-info → xpk-0.16.0.dist-info}/licenses/LICENSE +0 -0
{xpk-0.14.4.dist-info → xpk-0.16.0.dist-info}/top_level.txt +0 -0

xpk/core/system_characteristics.py CHANGED Viewed

@@ -15,9 +15,29 @@ limitations under the License.
 """
 from dataclasses import dataclass
+import dataclasses
+from typing import Callable, Literal, Optional
+from ..core.workload_decorators import rdma_decorator, tcpxo_decorator, tcpx_decorator
 from ..utils.topology import get_topology_product
 from enum import Enum
+SUB_SLICING_TOPOLOGIES = ['2x4', '4x4', '4x8', '8x8', '8x16', '16x16']
+INSTALLER_NCCL_TCPX = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpx/nccl-tcpx-installer.yaml'
+INSTALLER_NCCL_TCPXO = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-tcpxo/nccl-tcpxo-installer.yaml'
+INSTALLER_NCCL_RDMA = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-rdma/nccl-rdma-installer.yaml'
+INSTALLER_NCCL_RDMA_A4X = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/gpudirect-rdma/nccl-rdma-installer-a4x.yaml'
+class DockerPlatform(str, Enum):
+  AMD = 'linux/amd64'
+  ARM = 'linux/arm64'
+AMD_PLATFORM = DockerPlatform.AMD
+ARM_PLATFORM = DockerPlatform.ARM
 class AcceleratorType(Enum):
   TPU = 1
@@ -54,6 +74,45 @@ AcceleratorTypeToAcceleratorCharacteristics = {
 }
+@dataclass
+class GpuConfig:
+  """Contains GPU-specific configuration and requirements."""
+  requires_topology: bool
+  gpu_direct_name: Literal['fastrak', 'rdma', 'tcpx', 'tcpxo'] = 'fastrak'
+  kjob_decorator_fn: Optional[Callable[[dict], dict]] = None
+  """A function to decorate the kjob template for GPU-specific configurations.
+  Args:
+    job_manifest (dict): The kjob manifest as a dictionary.
+  Returns:
+    dict: The modified kjob manifest as a dictionary.
+  """
+  nccl_installer: Optional[str] = None
+  jobset_decorator_fn: Optional[Callable[[str, list[str]], str]] = None
+  """A function to decorate the jobset for GPU-specific configurations.
+  Args:
+    jobset_manifest_str (str): The JobSet manifest as a YAML string.
+    sub_networks (list[str], optional): A list of sub-network names, used by some decorators.
+  Returns:
+    str: The modified JobSet manifest as a YAML string.
+  """
+  def __repr__(self) -> str:
+    """Returns a string representation of the GpuConfig, omitting memory addresses for functions."""
+    parts = []
+    for f in dataclasses.fields(self):
+      value = getattr(self, f.name)
+      if f.name in ('kjob_decorator_fn', 'jobset_decorator_fn') and value:
+        parts.append(f'{f.name}=<function {value.__name__}>')
+      else:
+        parts.append(f'{f.name}={repr(value)}')
+    return f"GpuConfig({', '.join(parts)})"
 @dataclass
 class SystemCharacteristics:
   """Contains the defining characteristics of a specific accelerator system.
@@ -90,12 +149,28 @@ class SystemCharacteristics:
   accelerator_type: AcceleratorType
   device_type: str
   supports_sub_slicing: bool
+  docker_platform: DockerPlatform
   requires_workload_policy: bool = False
+  gpu_config: Optional[GpuConfig] = None
   def __post_init__(self):
     if self.accelerator_type == AcceleratorType.GPU:
       self.requires_workload_policy = True
+      if self.gpu_config is None:
+        raise ValueError(
+            f"Validation Error: System '{self.device_type}' is a GPU, "
+            "but 'gpu_config' was not provided."
+        )
+  @property
+  def gpu_requires_topology(self) -> bool:
+    """
+    Safely returns whether the GPU config requires topology,
+    defaulting to False if no GPU config exists.
+    """
+    return self.gpu_config.requires_topology if self.gpu_config else False
 def get_system_characteristics(
     args,
@@ -131,6 +206,33 @@ def get_system_characteristics_by_device_type(
     return None, 1
+def generate_tpu_topologies(
+    max_cubes: int, enforce_nondecreasing: bool = True
+) -> list[str]:
+  """Generates a list of unique TPU topologies formatted as strings "AxBxC".
+  The list will contain all triplets (A, B, C) such that:
+    - A, B and C are integers in range 4..256 (including 4 and 256)
+    - A, B and C are divisible by 4
+    - (A/4) * (B/4) * (C/4) <= max_cubes
+    - if enforce_nondecreasing: A <= B <= C
+  Additionally, the list will also contain the following triplets:
+    2x2x1, 2x2x2, 2x2x4, 2x4x4
+  Args:
+    max_cubes: maximum number of cubes supported by a TPU platform
+    enforce_nondecreasing: whether to enforce A <= B <= C or not
+  """
+  topologies = ['2x2x1', '2x2x2', '2x2x4', '2x4x4']
+  MAX = 256
+  for x in range(4, MAX + 1, 4):
+    for y in range(x if enforce_nondecreasing else 4, MAX + 1, 4):
+      for z in range(y if enforce_nondecreasing else 4, MAX + 1, 4):
+        if (x // 4) * (y // 4) * (z // 4) <= max_cubes:
+          topologies.append(f'{x}x{y}x{z}')
+  return topologies
 def get_tpu_system_characteristics_map(
     prefix: str,
     tensorcores_per_chip: int,
@@ -138,13 +240,18 @@ def get_tpu_system_characteristics_map(
     machine_type: str,
     supported_topologies: list[str],
     supports_sub_slicing: bool,
-    requires_workload_policy: bool = False,
+    docker_platform: DockerPlatform,
+    tpu_type_requires_workload_policy: bool = False,
+    default_topologies: set[str] | None = None,
 ) -> dict[str, SystemCharacteristics]:
   system_characteristics_map = {}
+  if default_topologies is None:
+    default_topologies = set()
   for topology in supported_topologies:
     chips_per_vm = compute_chips_per_vm(topology)
     vms_per_slice = compute_vms_per_slice(topology)
     num_tensorcores = compute_num_tensorcores(tensorcores_per_chip, topology)
+    device_type = f'{prefix}-{num_tensorcores}'
     system = SystemCharacteristics(
         topology=topology,
         vms_per_slice=vms_per_slice,
@@ -152,12 +259,18 @@ def get_tpu_system_characteristics_map(
         gce_machine_type=machine_type,
         chips_per_vm=chips_per_vm,
         accelerator_type=AcceleratorType.TPU,
-        device_type=f'{prefix}-{num_tensorcores}',
-        requires_workload_policy=requires_workload_policy,
+        device_type=device_type,
+        requires_workload_policy=tpu_type_requires_workload_policy
+        and vms_per_slice > 1,
         supports_sub_slicing=supports_sub_slicing,
+        docker_platform=docker_platform,
     )
     system_characteristics_map[f'{prefix}-{topology}'] = system
-    system_characteristics_map[f'{prefix}-{num_tensorcores}'] = system
+    if (
+        topology in default_topologies
+        or device_type not in system_characteristics_map
+    ):
+      system_characteristics_map[device_type] = system
   return system_characteristics_map
@@ -193,6 +306,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='l4-1',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'l4-2': SystemCharacteristics(
         topology='N/A',
@@ -203,6 +318,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='l4-2',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'l4-4': SystemCharacteristics(
         topology='N/A',
@@ -213,6 +330,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='l4-4',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'l4-8': SystemCharacteristics(
         topology='N/A',
@@ -223,6 +342,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='l4-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     # A100-40gb-$CHIPSc
     'a100-40gb-1': SystemCharacteristics(
@@ -234,6 +355,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='a100-40gb-1',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'a100-40gb-2': SystemCharacteristics(
         topology='N/A',
@@ -244,6 +367,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='a100-40gb-2',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'a100-40gb-4': SystemCharacteristics(
         topology='N/A',
@@ -254,6 +379,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='a100-40gb-4',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'a100-40gb-8': SystemCharacteristics(
         topology='N/A',
@@ -264,6 +391,8 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='a100-40gb-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(requires_topology=False),
+        docker_platform=AMD_PLATFORM,
     ),
     'gb200-4': SystemCharacteristics(
         topology='1x72',
@@ -274,6 +403,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='gb200-4',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_RDMA_A4X,
+            kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
+            jobset_decorator_fn=rdma_decorator.decorate_jobset,
+            gpu_direct_name='rdma',
+        ),
+        docker_platform=ARM_PLATFORM,
     ),
     'gb200-4-nolssd': SystemCharacteristics(
         topology='1x72',
@@ -284,6 +421,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='gb200-4',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_RDMA_A4X,
+            kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
+            jobset_decorator_fn=rdma_decorator.decorate_jobset,
+            gpu_direct_name='rdma',
+        ),
+        docker_platform=ARM_PLATFORM,
     ),
     'b200-8': SystemCharacteristics(
         topology='N/A',
@@ -294,6 +439,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='b200-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_RDMA,
+            kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
+            jobset_decorator_fn=rdma_decorator.decorate_jobset,
+            gpu_direct_name='rdma',
+        ),
+        docker_platform=AMD_PLATFORM,
     ),
     'h200-141gb-8': SystemCharacteristics(
         topology='N/A',
@@ -304,6 +457,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='h200-141gb-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_RDMA,
+            kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
+            jobset_decorator_fn=rdma_decorator.decorate_jobset,
+            gpu_direct_name='rdma',
+        ),
+        docker_platform=AMD_PLATFORM,
     ),
     # H100-80gb-$CHIPS
     'h100-80gb-8': SystemCharacteristics(
@@ -315,6 +476,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='h100-80gb-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_TCPX,
+            kjob_decorator_fn=tcpx_decorator.decorate_kjob_template,
+            jobset_decorator_fn=tcpx_decorator.decorate_jobset,
+            gpu_direct_name='tcpx',
+        ),
+        docker_platform=AMD_PLATFORM,
     ),
     # H100-mega-80gb-$CHIPS
     'h100-mega-80gb-8': SystemCharacteristics(
@@ -326,6 +495,14 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.GPU,
         device_type='h100-mega-80gb-8',
         supports_sub_slicing=False,
+        gpu_config=GpuConfig(
+            requires_topology=True,
+            nccl_installer=INSTALLER_NCCL_TCPXO,
+            kjob_decorator_fn=tcpxo_decorator.decorate_kjob_template,
+            jobset_decorator_fn=tcpxo_decorator.decorate_jobset,
+            gpu_direct_name='tcpxo',
+        ),
+        docker_platform=AMD_PLATFORM,
     ),
     # TPU system characteristics
     **get_tpu_system_characteristics_map(
@@ -334,17 +511,20 @@ UserFacingNameToSystemCharacteristics = {
         gke_accelerator='tpu7x',
         machine_type='tpu7x-standard-1t',
         supported_topologies=['1x1x1'],
-        requires_workload_policy=True,
+        tpu_type_requires_workload_policy=True,
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     **get_tpu_system_characteristics_map(
         prefix='tpu7x',
         tensorcores_per_chip=2,
         gke_accelerator='tpu7x',
         machine_type='tpu7x-standard-4t',
-        requires_workload_policy=True,
+        tpu_type_requires_workload_policy=True,
         supports_sub_slicing=False,
-        supported_topologies=[
+        docker_platform=AMD_PLATFORM,
+        supported_topologies=generate_tpu_topologies(max_cubes=144),
+        default_topologies=set([
             '12x12x12',
             '12x12x16',
             '12x12x20',
@@ -443,7 +623,7 @@ UserFacingNameToSystemCharacteristics = {
             '8x8x76',
             '8x8x8',
             '8x8x92',
-        ],
+        ]),
     ),
     **get_tpu_system_characteristics_map(
         prefix='v6e',
@@ -452,22 +632,27 @@ UserFacingNameToSystemCharacteristics = {
         machine_type='ct6e-standard-1t',
         supports_sub_slicing=False,
         supported_topologies=['1x1'],
+        docker_platform=AMD_PLATFORM,
     ),
     **get_tpu_system_characteristics_map(
         prefix='v6e',
         tensorcores_per_chip=1,
         gke_accelerator='tpu-v6e-slice',
         machine_type='ct6e-standard-4t',
-        supports_sub_slicing=True,
+        supports_sub_slicing=False,
         supported_topologies=[
             '2x2',
-            '2x4',
-            '4x4',
-            '4x8',
-            '8x8',
-            '8x16',
-            '16x16',
         ],
+        docker_platform=AMD_PLATFORM,
+    ),
+    **get_tpu_system_characteristics_map(
+        prefix='v6e',
+        tensorcores_per_chip=1,
+        gke_accelerator='tpu-v6e-slice',
+        machine_type='ct6e-standard-4t',
+        supports_sub_slicing=True,
+        supported_topologies=SUB_SLICING_TOPOLOGIES,
+        docker_platform=AMD_PLATFORM,
     ),
     **get_tpu_system_characteristics_map(
         prefix='v5p',
@@ -475,7 +660,9 @@ UserFacingNameToSystemCharacteristics = {
         gke_accelerator='tpu-v5p-slice',
         machine_type='ct5p-hightpu-4t',
         supports_sub_slicing=False,
-        supported_topologies=[
+        docker_platform=AMD_PLATFORM,
+        supported_topologies=generate_tpu_topologies(max_cubes=140),
+        default_topologies=set([
             '2x2x1',
             '2x2x2',
             '2x2x4',
@@ -572,13 +759,14 @@ UserFacingNameToSystemCharacteristics = {
             '16x16x24',
             '12x24x24',
             '16x20x28',
-        ],
+        ]),
     ),
     **get_tpu_system_characteristics_map(
         prefix='v5litepod',
         tensorcores_per_chip=1,
         gke_accelerator='tpu-v5-lite-podslice',
         machine_type='ct5lp-hightpu-4t',
+        docker_platform=AMD_PLATFORM,
         supports_sub_slicing=False,
         supported_topologies=['2x4', '4x4', '4x8', '8x8', '8x16', '16x16'],
     ),
@@ -587,8 +775,12 @@ UserFacingNameToSystemCharacteristics = {
         tensorcores_per_chip=2,
         gke_accelerator='tpu-v4-podslice',
         machine_type='ct4p-hightpu-4t',
+        docker_platform=AMD_PLATFORM,
         supports_sub_slicing=False,
-        supported_topologies=[
+        supported_topologies=generate_tpu_topologies(
+            max_cubes=64, enforce_nondecreasing=False
+        ),
+        default_topologies=set([
             '2x2x1',
             '2x2x2',
             '2x2x4',
@@ -600,7 +792,7 @@ UserFacingNameToSystemCharacteristics = {
             '8x8x12',
             '8x8x16',
             '8x16x16',
-        ],
+        ]),
     ),
     # CPU system characteristics.
     # Note that chips_per_vm is actually the number of vCPUs in that CPU.
@@ -615,6 +807,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='m1-megamem-96-1',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     # n2-standard-#vCPUs-#VMs
     'n2-standard-64-1': SystemCharacteristics(
@@ -626,6 +819,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-64-1',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-1': SystemCharacteristics(
         topology='N/A',
@@ -636,6 +830,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-1',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-2': SystemCharacteristics(
         topology='N/A',
@@ -646,6 +841,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-2',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-4': SystemCharacteristics(
         topology='N/A',
@@ -656,6 +852,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-4',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-8': SystemCharacteristics(
         topology='N/A',
@@ -666,6 +863,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-8',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-16': SystemCharacteristics(
         topology='N/A',
@@ -676,6 +874,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-16',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-32': SystemCharacteristics(
         topology='N/A',
@@ -686,6 +885,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-32',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-64': SystemCharacteristics(
         topology='N/A',
@@ -696,6 +896,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-64',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-128': SystemCharacteristics(
         topology='N/A',
@@ -706,6 +907,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-128',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-256': SystemCharacteristics(
         topology='N/A',
@@ -716,6 +918,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-256',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-512': SystemCharacteristics(
         topology='N/A',
@@ -726,6 +929,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-512',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-1024': SystemCharacteristics(
         topology='N/A',
@@ -736,6 +940,7 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-1024',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
     'n2-standard-32-2048': SystemCharacteristics(
         topology='N/A',
@@ -746,7 +951,39 @@ UserFacingNameToSystemCharacteristics = {
         accelerator_type=AcceleratorType.CPU,
         device_type='n2-standard-32-2048',
         supports_sub_slicing=False,
+        docker_platform=AMD_PLATFORM,
     ),
 }
 """ If you modify UserFacingNameToSystemCharacteristics you should also modify
 the corresponding Map in MaxText/accelerator_to_spec_map.py """
+def get_system_characteristics_keys_by_accelerator_type(
+    accelerators: list[AcceleratorType] | None = None,
+) -> list[str]:
+  """Returns UserFacingNameToSystemCharacteristics keys for given AcceleratorTypes."""
+  if accelerators is None:
+    accelerators = list(AcceleratorType)
+  return [
+      key
+      for key, value in UserFacingNameToSystemCharacteristics.items()
+      if value.accelerator_type in accelerators
+  ]
+def create_accelerator_label(system: SystemCharacteristics) -> str:
+  if system.accelerator_type == AcceleratorType.CPU:
+    return ''
+  return (
+      f'{AcceleratorTypeToAcceleratorCharacteristics[system.accelerator_type].accelerator_label}:'
+      f' {system.gke_accelerator}'
+  )
+def create_machine_label(system: SystemCharacteristics) -> str:
+  if system.accelerator_type == AcceleratorType.TPU:
+    return (
+        f'{AcceleratorTypeToAcceleratorCharacteristics[system.accelerator_type].machine_label}:'
+        f' {system.topology}'
+    )
+  return ''

xpk 0.14.4__py3-none-any.whl → 0.16.0__py3-none-any.whl

xpk 0.14.4py3-none-any.whl → 0.16.0py3-none-any.whl