PyPI - xpk - Versions diffs - 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl - Mend

xpk 0.9.0py3-none-any.whl → 0.10.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

xpk/commands/batch.py +3 -3
xpk/commands/cluster.py +22 -1
xpk/commands/cluster_gcluster.py +27 -0
xpk/commands/common.py +12 -5
xpk/commands/kjob_common.py +4 -1
xpk/commands/run.py +2 -2
xpk/commands/shell.py +2 -2
xpk/commands/storage.py +10 -3
xpk/commands/workload.py +64 -27
xpk/core/blueprint/blueprint_generator.py +108 -40
xpk/core/capacity.py +66 -6
xpk/core/cluster.py +165 -7
xpk/core/config.py +1 -65
xpk/core/docker_manager.py +1 -1
xpk/core/docker_resources.py +145 -72
xpk/core/jobset.py +143 -0
xpk/core/kjob.py +2 -6
xpk/core/kueue.py +165 -5
xpk/core/nodepool.py +17 -4
xpk/core/pathways.py +1 -2
xpk/core/storage.py +1 -95
xpk/core/system_characteristics.py +1 -1
xpk/core/workload.py +0 -44
xpk/core/workload_decorators/rdma_decorator.py +2 -0
xpk/core/workload_decorators/tcpx_decorator.py +10 -4
xpk/core/workload_decorators/tcpxo_decorator.py +7 -0
xpk/parser/cluster.py +23 -7
xpk/parser/storage.py +2 -2
xpk/parser/workload.py +21 -3
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/METADATA +45 -6
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/RECORD +35 -34
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/WHEEL +0 -0
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/entry_points.txt +0 -0
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/licenses/LICENSE +0 -0
{xpk-0.9.0.dist-info → xpk-0.10.0.dist-info}/top_level.txt +0 -0

xpk/core/kueue.py CHANGED Viewed

@@ -16,6 +16,7 @@ limitations under the License.
 from argparse import Namespace
+import math
 import packaging
 from packaging.version import Version
@@ -39,10 +40,12 @@ from .system_characteristics import (
     SystemCharacteristics,
 )
-KUEUE_VERSION = 'v0.10.0'
+KUEUE_VERSION = 'v0.12.2'
 CLUSTER_QUEUE_NAME = 'cluster-queue'
 LOCAL_QUEUE_NAME = 'multislice-queue'
 WAIT_FOR_KUEUE_TIMEOUT = '5m'
+MEMORY_SIZE_PER_VM = 1.2
+MIN_MEMORY_LIMIT_SIZE = 4096
 packaging.version.VERSION_PATTERN = r'^v\d+\.\d+\.\d+$'
@@ -69,6 +72,26 @@ spec:
     {machine_label}
   {topology_label}
 ---
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: AdmissionCheck
+metadata:
+  name: dws-prov
+spec:
+  controllerName: kueue.x-k8s.io/provisioning-request
+  parameters:
+    apiGroup: kueue.x-k8s.io
+    kind: ProvisioningRequestConfig
+    name: dws-config
+---
+apiVersion: kueue.x-k8s.io/v1beta1
+kind: ProvisioningRequestConfig
+metadata:
+  name: dws-config
+spec:
+  provisioningClassName: queued-provisioning.gke.io
+  managedResources:
+  - {managed_resource}
+---
 {pw_resource_flavors}
 apiVersion: kueue.x-k8s.io/v1beta1
 kind: ClusterQueue
@@ -82,6 +105,7 @@ spec:
   resourceGroups:
   {covered_resources_config}
   {pw_resources_kueue}
+  {admission_checks}
 ---
 apiVersion: kueue.x-k8s.io/v1beta1
 kind: LocalQueue
@@ -166,6 +190,99 @@ spec:
         command: [ "sleep", "inf" ]
 """
+kueue_controller_manager_yml = """
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  labels:
+    app.kubernetes.io/component: controller
+    app.kubernetes.io/name: kueue
+    control-plane: controller-manager
+  name: kueue-controller-manager
+  namespace: kueue-system
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      control-plane: controller-manager
+  template:
+    metadata:
+      annotations:
+        kubectl.kubernetes.io/default-container: manager
+      labels:
+        app.kubernetes.io/component: controller
+        app.kubernetes.io/name: kueue
+        control-plane: controller-manager
+    spec:
+      containers:
+      - args:
+        - --config=/controller_manager_config.yaml
+        - --zap-log-level=2
+        command:
+        - /manager
+        image: registry.k8s.io/kueue/kueue:v0.10.0
+        imagePullPolicy: Always
+        livenessProbe:
+          httpGet:
+            path: /healthz
+            port: 8081
+          initialDelaySeconds: 15
+          periodSeconds: 20
+        name: manager
+        ports:
+        - containerPort: 8082
+          name: visibility
+          protocol: TCP
+        - containerPort: 9443
+          name: webhook-server
+          protocol: TCP
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: 8081
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        resources:
+          limits:
+            cpu: 500m
+            memory: {memory_limit_size}
+          requests:
+            cpu: 500m
+            memory: 512Mi
+        securityContext:
+          allowPrivilegeEscalation: false
+        volumeMounts:
+        - mountPath: /tmp/k8s-webhook-server/serving-certs
+          name: cert
+          readOnly: true
+        - mountPath: /controller_manager_config.yaml
+          name: manager-config
+          subPath: controller_manager_config.yaml
+      - args:
+        - --secure-listen-address=0.0.0.0:8443
+        - --upstream=http://127.0.0.1:8080/
+        - --logtostderr=true
+        - --v=10
+        image: registry.k8s.io/kubebuilder/kube-rbac-proxy:v0.16.0
+        name: kube-rbac-proxy
+        ports:
+        - containerPort: 8443
+          name: https
+          protocol: TCP
+      securityContext:
+        runAsNonRoot: true
+      serviceAccountName: kueue-controller-manager
+      terminationGracePeriodSeconds: 10
+      volumes:
+      - name: cert
+        secret:
+          defaultMode: 420
+          secretName: kueue-webhook-server-cert
+      - configMap:
+          name: kueue-manager-config
+        name: manager-config
+"""
 def verify_kueuectl(args: Namespace) -> None:
   """Verify if kueuectl is installed.
@@ -282,6 +399,7 @@ def install_kueue_crs(
     args,
     system: SystemCharacteristics,
     autoprovisioning_config: AutoprovisioningConfig | None,
+    flex_with_tpu=False,
 ) -> int:
   """Install Kueue Custom Resources.
@@ -309,6 +427,13 @@ def install_kueue_crs(
   else:
     # Determine total chips based on user specified topology.
     total_chips = get_total_chips_requested_from_args(args, system)
+  if args.flex and flex_with_tpu is False:
+    admission_checks = """
+  admissionChecks:
+  - dws-prov
+"""
+  else:
+    admission_checks = ''
   covered_resources_config = get_kueue_covered_resources_config(
       cluster_hardware_name=cluster_hardware_name,
@@ -322,7 +447,9 @@ def install_kueue_crs(
       B200_DEVICE_TYPE,
   ]:
     topology_label = 'topologyName: "gke-default"'
+  res_type = AcceleratorTypeToAcceleratorCharacteristics[
+      system.accelerator_type
+  ].resource_type
   yml_string = cluster_set_crd_yaml.format(
       system=system,
       cluster_hardware_name=cluster_hardware_name,
@@ -334,11 +461,11 @@ def install_kueue_crs(
       ),
       topology_label=topology_label,
       covered_resources_config=covered_resources_config,
-      resource_type=AcceleratorTypeToAcceleratorCharacteristics[
-          system.accelerator_type
-      ].resource_type,
+      resource_type=res_type,
       pw_resource_flavors=add_pw_resource_flavors(args),
       pw_resources_kueue=add_pw_resources_to_kueue(args),
+      admission_checks=admission_checks,
+      managed_resource=res_type,
       cluster_queue_name=CLUSTER_QUEUE_NAME,
       local_queue_name=LOCAL_QUEUE_NAME,
   )
@@ -386,3 +513,36 @@ def get_kueue_covered_resources_config(
       total_chips=total_chips,
   )
   return config_string
+def update_kueue_resources_if_necessary(args):
+  """Update the kueue manifest to increase the resources for the kueue controller manager.
+  Args:
+    args: user provided arguments for running the command.
+  Returns:
+    0 if successful and 1 otherwise.
+  """
+  # Get total number of nodes
+  cmd_total_node_num = 'kubectl get node --no-headers | wc -l'
+  return_code, out = run_command_for_value(
+      cmd_total_node_num, 'Count total nodes', args
+  )
+  if return_code != 0:
+    xpk_exit(1)
+  # 1.2MiB per VM or 4GiB (whichever is greater).
+  new_memory_limit = (
+      f'{max(math.ceil(int(out) * MEMORY_SIZE_PER_VM), MIN_MEMORY_LIMIT_SIZE)}Mi'
+  )
+  yml_string = kueue_controller_manager_yml.format(
+      memory_limit_size=new_memory_limit,
+  )
+  tmp = write_tmp_file(yml_string)
+  command = f'kubectl apply -f {str(tmp.file.name)}'
+  task = 'Updating Kueue Controller Manager resources'
+  return_code = run_command_with_updates_retry(command, task, args)
+  if return_code != 0:
+    xpk_print(f'{task} returned ERROR {return_code}')
+  return return_code

xpk/core/nodepool.py CHANGED Viewed

@@ -77,8 +77,12 @@ def run_gke_node_pool_create_command(
     if return_code > 0:
       xpk_print('Listing all reservations failed!')
     return_code = 1
+  if system.accelerator_type == AcceleratorType['TPU']:
+    max_nodes = system.vms_per_slice
+  else:
+    max_nodes = 1000
   capacity_args, return_code = get_capacity_arguments_from_capacity_type(
-      args, capacity_type
+      args, capacity_type, max_nodes
   )
   if return_code > 0:
     xpk_print('Parsing capacity arguments failed!')
@@ -275,7 +279,10 @@ def run_gke_node_pool_create_command(
     )
     if system.accelerator_type == AcceleratorType['TPU']:
       command += f' --node-version={gke_node_pool_version}'
-      command += f' --num-nodes={system.vms_per_slice}'
+      if capacity_type == CapacityType.FLEX_START:
+        command += ' --num-nodes=0'
+      else:
+        command += f' --num-nodes={system.vms_per_slice}'
       command += ' --placement-type=COMPACT  --max-pods-per-node 15'
       command += (
           f' --scopes=storage-full,gke-default,{CLOUD_PLATFORM_AUTH_SCOPE_URL}'
@@ -284,7 +291,10 @@ def run_gke_node_pool_create_command(
       command += f' {args.custom_tpu_nodepool_arguments}'
     elif system.accelerator_type == AcceleratorType['GPU']:
       subnet_prefix = f'{args.cluster}-{zone_to_region(args.zone)}'
-      command += f' --num-nodes={args.num_nodes}'
+      if capacity_type == CapacityType.FLEX_START:
+        command += ' --num-nodes=0'
+      else:
+        command += f' --num-nodes={args.num_nodes}'
       command += (
           ' --accelerator'
           f' type={system.gke_accelerator},count={str(system.chips_per_vm)},gpu-driver-version=latest'
@@ -298,7 +308,10 @@ def run_gke_node_pool_create_command(
           )
         command += ' --max-pods-per-node=32'
     elif system.accelerator_type == AcceleratorType['CPU']:
-      command += f' --num-nodes={system.vms_per_slice}'
+      if capacity_type == CapacityType.FLEX_START:
+        command += ' --num-nodes=0'
+      else:
+        command += f' --num-nodes={system.vms_per_slice}'
       command += (
           f' --scopes=storage-full,gke-default,{CLOUD_PLATFORM_AUTH_SCOPE_URL}'
       )

xpk/core/pathways.py CHANGED Viewed

@@ -19,8 +19,7 @@ from ..core.docker_container import get_user_workload_container
 from ..core.gcloud_context import zone_to_region
 from ..core.nodepool import get_all_nodepools_programmatic
 from ..utils.console import xpk_exit, xpk_print
-from .config import AcceleratorType
-from .system_characteristics import SystemCharacteristics
+from .system_characteristics import AcceleratorType, SystemCharacteristics
 def add_pw_resource_flavors(args):

xpk/core/storage.py CHANGED Viewed

@@ -46,6 +46,7 @@ STORAGE_CRD_NAME = f"{XPK_API_GROUP_NAME}.{STORAGE_CRD_PLURAL}"
 GCS_FUSE_TYPE = "gcsfuse"
 GCP_FILESTORE_TYPE = "gcpfilestore"
 PARALLELSTORE_TYPE = "parallelstore"
+LUSTRE_TYPE = "lustre"
 GCE_PD_TYPE = "pd"
 MANIFESTS_PATH = os.path.abspath("xpkclusters/storage-manifests")
 GCS_FUSE_ANNOTATIONS = {
@@ -365,101 +366,6 @@ def get_storage_annotations(storages: list[Storage]) -> list[str]:
   return annotations
-def get_storage_volume_mounts_yaml(storages: list[Storage]) -> str:
-  """
-  Generates the YAML representation of the volumeMounts section for the given Storages.
-  This function creates the YAML snippet that defines how the storage volumes
-  should be mounted within a Pod's containers.
-  Args:
-      storages: A list of Storage objects.
-  Returns:
-      A string containing the YAML representation of the volumeMounts section.
-  """
-  yaml_str = ""
-  for storage in storages:
-    yaml_str += f"""- name: {storage.pv}
-                  mountPath: {storage.mount_point}
-                  readOnly: {storage.readonly}
-            """
-  return yaml_str
-def get_storage_volumes_yaml(storages: list[Storage]) -> str:
-  """
-  Generates the YAML representation of the volumes section for the given Storages.
-  This function creates the YAML snippet that defines the volumes to be
-  mounted in a Pod, including the PersistentVolumeClaim associated with
-  each Storage.
-  Args:
-      storages: A list of Storage objects.
-  Returns:
-      A string containing the YAML representation of the volumes section.
-  """
-  yaml_str = ""
-  for storage in storages:
-    yaml_str += f"""- name: {storage.pv}
-                persistentVolumeClaim:
-                  claimName: {storage.pvc}
-                  readOnly: {storage.readonly}
-            """
-  return yaml_str
-def get_storage_volume_mounts_for_gpu(
-    storages: list[Storage],
-) -> list[dict]:
-  """
-  Generates the YAML representation of the volumeMounts section for the given Storages.
-  This function creates the list of storage specifications that define how the storage volumes
-  should be mounted within a Pod's containers.
-  Args:
-      storages: A list of Storage objects.
-  Returns:
-      A list containing the dictionary representation of the volumeMounts section.
-  """
-  return [
-      {
-          "name": storage.pv,
-          "mountPath": storage.mount_point,
-          "readOnly": storage.readonly,
-      }
-      for storage in storages
-  ]
-def get_storage_volumes_yaml_for_gpu(storages: list[Storage]) -> str:
-  """
-  Generates the YAML representation of the volumes section for the given Storages.
-  This function creates the YAML snippet that defines the volumes to be
-  mounted in a Pod, including the PersistentVolumeClaim associated with
-  each Storage.
-  Args:
-      storages: A list of Storage objects.
-  Returns:
-      A string containing the YAML representation of the volumes section.
-  """
-  yaml_str = ""
-  for storage in storages:
-    yaml_str += f"""- name: {storage.pv}
-                persistentVolumeClaim:
-                  claimName: {storage.pvc}
-                  readOnly: {storage.readonly}
-            """
-  return yaml_str
 def get_storage_volumes_yaml_dict(storages: list[Storage]) -> list[dict]:
   vols = []
   for storage in storages:

xpk/core/system_characteristics.py CHANGED Viewed

@@ -1156,7 +1156,7 @@ UserFacingNameToSystemCharacteristics = {
         2,
         'tpu-v5-lite-podslice',
         'ct5lp-hightpu-4t',
-        8,
+        4,
         AcceleratorType['TPU'],
         'v5litepod-8',
     ),

xpk/core/workload.py CHANGED Viewed

@@ -14,18 +14,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
-import yaml
-from ..utils import templates
 from ..utils.console import xpk_exit, xpk_print
-from .capacity import H100_DEVICE_TYPE, H100_MEGA_DEVICE_TYPE
 from .commands import run_command_for_value
 from .gcloud_context import zone_to_region
-from .storage import Storage, get_storage_volume_mounts_for_gpu
-from .system_characteristics import SystemCharacteristics
-RXDM_CONTAINER_A3HIGH_PATH = '/../templates/rxdm_container_a3high.yaml'
-RXDM_CONTAINER_A3MEGA_PATH = '/../templates/rxdm_container_a3mega.yaml'
 def workload_list_awk_command(filter_key) -> str:
@@ -249,38 +240,3 @@ def wait_for_job_completion(args) -> int:
     xpk_print('Your workload did not complete successfully')
     return 125
   return 0
-def add_gpu_rxdm_container(
-    jobset_manifest_str: str,
-    system: SystemCharacteristics,
-    all_storages: list[Storage],
-) -> str:
-  """Add gpu rxdm container to jobset manifest based on user provided arguments.
-  Args:
-    jobset_manifest_str: the JobSet manifest as a YAML string.
-    system: system characteristics.
-    all_storages: list of all storages.
-  Returns:
-    str: the modified JobSet manifest as a YAML string.
-  """
-  if system.device_type == H100_DEVICE_TYPE:
-    gpu_rxdm_container = templates.load(RXDM_CONTAINER_A3HIGH_PATH)
-  elif system.device_type == H100_MEGA_DEVICE_TYPE:
-    gpu_rxdm_container = templates.load(RXDM_CONTAINER_A3MEGA_PATH)
-  else:
-    return jobset_manifest_str
-  storage_volume_mounts = get_storage_volume_mounts_for_gpu(all_storages)
-  gpu_rxdm_container['volumeMounts'].extend(storage_volume_mounts)
-  manifest = yaml.safe_load(jobset_manifest_str)
-  for job in manifest['spec']['replicatedJobs']:
-    job['template']['spec']['template']['spec']['containers'].append(
-        gpu_rxdm_container
-    )
-  return yaml.dump(manifest, sort_keys=False)

xpk/core/workload_decorators/rdma_decorator.py CHANGED Viewed

@@ -80,6 +80,8 @@ def add_annotations(job_manifest: dict, sub_networks: list[str]):
   """Adds or updates annotations in the Pod template."""
   annotations = job_manifest['spec']['template']['metadata']['annotations']
   interfaces_key, interfaces_value = get_interfaces_entry(sub_networks)
+  if annotations is None:
+    annotations = {}
   annotations.update({
       'networking.gke.io/default-interface': 'eth0',
       interfaces_key: interfaces_value,

xpk/core/workload_decorators/tcpx_decorator.py CHANGED Viewed

@@ -25,7 +25,7 @@ tcpx = 'v2.0.11'
 def decorate_kjob_template(job_manifest: dict) -> dict:
   add_volumes(job_manifest)
   add_tolerations(job_manifest)
-  add_tcpxo_daemon_container(job_manifest)
+  add_tcpx_daemon_container(job_manifest)
   update_gpu_containers(job_manifest)
   return job_manifest
@@ -34,7 +34,7 @@ def decorate_job(job_manifest: dict) -> dict:
   add_annotations(job_manifest)
   add_volumes(job_manifest)
   add_tolerations(job_manifest)
-  add_tcpxo_daemon_container(job_manifest)
+  add_tcpx_daemon_container(job_manifest)
   update_gpu_containers(job_manifest)
   return job_manifest
@@ -131,10 +131,13 @@ def add_volumes(job_manifest: dict):
   })
   volumes.append({'name': 'sys', 'hostPath': {'path': '/sys'}})
   volumes.append({'name': 'proc-sys', 'hostPath': {'path': '/proc/sys'}})
+  volumes.append(
+      {'name': 'dshm', 'emptyDir': {'medium': 'Memory', 'sizeLimit': '128Gi'}}
+  )
-def add_tcpxo_daemon_container(job_manifest):
-  """Adds the tcpxo-daemon container to the Pod spec."""
+def add_tcpx_daemon_container(job_manifest):
+  """Adds the tcpx-daemon container to the Pod spec."""
   tcpxo_daemon_container = {
       'name': 'tcpx-daemon',
       'image': f'us-docker.pkg.dev/gce-ai-infra/gpudirect-tcpx/tcpgpudmarxd-dev:{tcpx}',
@@ -177,3 +180,6 @@ def update_gpu_containers(job_manifest):
       volumeMounts.append(
           {'name': 'libraries', 'mountPath': '/usr/local/nvidia/lib64'}
       )
+      container['volumeMounts'].append(
+          {'name': 'dshm', 'mountPath': '/dev/shm'}
+      )

xpk/core/workload_decorators/tcpxo_decorator.py CHANGED Viewed

@@ -15,6 +15,7 @@ limitations under the License.
 """
 import yaml
 from ...utils.yaml import literal_string
 # Component version
@@ -141,6 +142,9 @@ def add_volumes(job_manifest):
       'name': 'aperture-devices',
       'hostPath': {'path': '/dev/aperture_devices'},
   })
+  volumes.append(
+      {'name': 'dshm', 'emptyDir': {'medium': 'Memory', 'sizeLimit': '128Gi'}}
+  )
 def add_tcpxo_daemon_container(job_manifest):
@@ -189,3 +193,6 @@ def update_gpu_containers(job_manifest):
       container['volumeMounts'].append(
           {'name': 'libraries', 'mountPath': '/usr/local/nvidia'}
       )
+      container['volumeMounts'].append(
+          {'name': 'dshm', 'mountPath': '/dev/shm'}
+      )

xpk/parser/cluster.py CHANGED Viewed

@@ -743,6 +743,11 @@ def add_driver_arguments(parser: ArgumentParser):
       action='store_true',
       help='Enable PersistentDisk CSI driver on the cluster.',
   )
+  parser.add_argument(
+      '--enable-lustre-csi-driver',
+      action='store_true',
+      help='Enable Lustre CSI driver on the cluster.',
+  )
 def add_shared_cluster_create_tensorboard_arguments(parser: ArgumentParser):
@@ -792,25 +797,36 @@ def add_shared_cluster_create_capacity_arguments(parser: ArgumentParser):
       '--on-demand',
       action='store_true',
       help=(
-          'Sets node pool creation to use on-demand resources. '
-          ' See `--reservation` or `--spot` for other capacity types.'
+          'Sets node pool creation to use on-demand resources.  See'
+          ' `--reservation`, `--flex` or `--spot` for other capacity'
+          ' types.'
       ),
   )
   parser.add_argument(
       '--reservation',
       type=str,
       help=(
-          'The reservation to be used for acquiring resources in the'
-          ' cluster. This will attempt to find the provided reservation.'
-          ' See `--spot` or `--on-demand` for other capacity types.'
+          'The reservation to be used for acquiring resources in the cluster.'
+          ' This will attempt to find the provided reservation. See `--spot`,'
+          ' `--flex` or `--on-demand` for other capacity types.'
       ),
   )
   parser.add_argument(
       '--spot',
       action='store_true',
       help=(
-          'Sets node pool creation to use spot resources.'
-          ' See `--reservation` or `--on-demand` for other capacity types.'
+          'Sets node pool creation to use spot resources. See'
+          ' `--reservation`, `--flex` or `--on-demand` for other'
+          ' capacity types.'
+      ),
+  )
+  parser.add_argument(
+      '--flex',
+      action='store_true',
+      help=(
+          'Sets node pool creation to use DWS Flex Start resources. See'
+          ' `--reservation`, `--on-demand` or `--spot` for other capacity'
+          ' types.'
       ),
   )

xpk/parser/storage.py CHANGED Viewed

@@ -71,9 +71,9 @@ def add_storage_attach_parser(
       type=str,
       help=(
           'The type of storage. Currently supported types: "gcsfuse",'
-          ' "gcpfilestore", "parallelstore", "pd"'
+          ' "gcpfilestore", "parallelstore", "pd", "lustre"'
       ),
-      choices=['gcsfuse', 'gcpfilestore', 'parallelstore', 'pd'],
+      choices=['gcsfuse', 'gcpfilestore', 'parallelstore', 'pd', 'lustre'],
       required=True,
   )
   add_cluster_arguments(req_args, required=True)

xpk/parser/workload.py CHANGED Viewed

@@ -208,15 +208,25 @@ def set_workload_parsers(workload_parser):
       help=(
           'Sets autoprovisioning to use reservation resources for the workload'
           ' request. This will attempt to find the provided reservation. See'
-          ' `--spot` or `--on-demand` for other capacity types.'
+          ' `--spot`, `--flex` or `--on-demand` for other capacity types.'
       ),
   )
   workload_create_autoprovisioning_arguments.add_argument(
       '--spot',
       action='store_true',
       help=(
-          'Sets autoprovisioning to use spot resources.'
-          ' See `--reservation` or `--on-demand` for other capacity types.'
+          'Sets autoprovisioning to use spot resources. See `--reservation`,'
+          ' `--flex` or `--on-demand` for other capacity types.'
+      ),
+  )
+  workload_create_autoprovisioning_arguments.add_argument(
+      '--flex',
+      action='store_true',
+      help=(
+          'Sets autoprovisioning to use flex-start resources. See'
+          ' `--reservation`, `--spot` or `--on-demand` for other capacity'
+          ' types.'
       ),
   )
@@ -728,6 +738,14 @@ def add_shared_workload_docker_image_arguments(args_parsers):
             ' directly by the xpk workload.'
         ),
     )
+    custom_parser.add_argument(
+        '--docker-image-pull-secret',
+        type=str,
+        help=(
+            'Name of the secret that will be used to pull image from'
+            ' private repository'
+        ),
+    )
 def add_shared_workload_create_tensorboard_arguments(args_parsers):

xpk 0.9.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

xpk 0.9.0py3-none-any.whl → 0.10.0py3-none-any.whl