xpk 0.7.1__py3-none-any.whl → 0.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. xpk/commands/batch.py +19 -12
  2. xpk/commands/cluster.py +33 -16
  3. xpk/commands/cluster_gcluster.py +22 -5
  4. xpk/commands/info.py +2 -4
  5. xpk/commands/job.py +7 -8
  6. xpk/commands/kjob_common.py +23 -20
  7. xpk/commands/run.py +17 -11
  8. xpk/commands/shell.py +3 -4
  9. xpk/commands/storage.py +64 -19
  10. xpk/commands/workload.py +154 -319
  11. xpk/core/blueprint/blueprint_definitions.py +2 -0
  12. xpk/core/blueprint/blueprint_generator.py +322 -32
  13. xpk/core/capacity.py +1 -0
  14. xpk/core/cluster.py +75 -5
  15. xpk/core/config.py +3 -1
  16. xpk/core/docker_manager.py +1 -1
  17. xpk/core/docker_resources.py +9 -21
  18. xpk/core/filestore.py +11 -3
  19. xpk/core/gcsfuse.py +8 -5
  20. xpk/core/kjob.py +57 -18
  21. xpk/core/nap.py +4 -0
  22. xpk/core/network.py +11 -21
  23. xpk/core/nodepool.py +28 -26
  24. xpk/core/pathways.py +165 -210
  25. xpk/core/scheduling.py +36 -0
  26. xpk/core/storage.py +66 -12
  27. xpk/core/system_characteristics.py +9 -0
  28. xpk/core/workload.py +27 -82
  29. xpk/core/workload_decorators/rdma_decorator.py +3 -3
  30. xpk/core/workload_decorators/storage_decorator.py +8 -3
  31. xpk/core/workload_decorators/tcpxo_decorator.py +2 -2
  32. xpk/parser/cluster.py +15 -6
  33. xpk/parser/storage.py +14 -3
  34. xpk/parser/workload.py +59 -31
  35. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/METADATA +60 -4
  36. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/RECORD +40 -40
  37. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/WHEEL +1 -1
  38. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/entry_points.txt +0 -0
  39. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/licenses/LICENSE +0 -0
  40. {xpk-0.7.1.dist-info → xpk-0.8.0.dist-info}/top_level.txt +0 -0
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- from .capacity import H100_DEVICE_TYPE, H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE
17
+ from .capacity import H100_DEVICE_TYPE, H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE, B200_DEVICE_TYPE
18
18
  from .cluster import setup_k8s_env
19
19
  from .storage import GCS_FUSE_TYPE, GCP_FILESTORE_TYPE, Storage, get_storages_to_mount
20
20
  from .system_characteristics import AcceleratorType, SystemCharacteristics
@@ -64,22 +64,6 @@ def get_env_container(args, system: SystemCharacteristics) -> str:
64
64
  str:
65
65
  YAML with the env config for the main container, as a YAML string.
66
66
  """
67
- pw_env_yaml = """
68
- - name: XCLOUD_ENVIRONMENT
69
- value: GCP
70
- - name: JAX_PLATFORMS
71
- value: proxy
72
- - name: JAX_BACKEND_TARGET
73
- value: {proxy_address}
74
- - name: JOBSET_NAME
75
- valueFrom:
76
- fieldRef:
77
- fieldPath: metadata.annotations['jobset.sigs.k8s.io/jobset-name']"""
78
- if args.use_pathways:
79
- return pw_env_yaml.format(
80
- args=args, proxy_address=args.pathways_proxy_address
81
- )
82
-
83
67
  gpu_env_yaml = """
84
68
  - name: REPLICATED_JOB_NAME
85
69
  valueFrom:
@@ -182,11 +166,14 @@ def get_volumes(args, system: SystemCharacteristics) -> str:
182
166
  name: dshm-2
183
167
  """
184
168
 
185
- if args.ramdisk_directory != '':
186
- volumes += """
169
+ if hasattr(args, 'ramdisk_directory') and args.ramdisk_directory != '':
170
+ driver = 'phase1-checkpoint.csi.storage.gke.io'
171
+ if hasattr(args, 'mtc_enabled') and args.mtc_enabled:
172
+ driver = 'multitier-checkpoint.csi.storage.gke.io'
173
+ volumes += f"""
187
174
  - name: cache
188
175
  csi:
189
- driver: phase1-checkpoint.csi.storage.gke.io"""
176
+ driver: {driver}"""
190
177
 
191
178
  if (
192
179
  system.accelerator_type == AcceleratorType['TPU']
@@ -229,7 +216,7 @@ def get_volume_mounts(args, system: SystemCharacteristics) -> str:
229
216
  name: dshm-2
230
217
  """
231
218
 
232
- if args.ramdisk_directory != '':
219
+ if hasattr(args, 'ramdisk_directory') and args.ramdisk_directory != '':
233
220
  volume_mount_yaml += f"""
234
221
  - mountPath: /{args.ramdisk_directory}
235
222
  name: cache"""
@@ -262,6 +249,7 @@ def get_volume_mounts(args, system: SystemCharacteristics) -> str:
262
249
  elif (
263
250
  system.device_type == H100_MEGA_DEVICE_TYPE
264
251
  or system.device_type == H200_DEVICE_TYPE
252
+ or system.device_type == B200_DEVICE_TYPE
265
253
  ):
266
254
  volume_mount_yaml = ''
267
255
 
xpk/core/filestore.py CHANGED
@@ -200,7 +200,9 @@ class FilestoreClient:
200
200
  ] = f"projects/{self.project}/global/networks/{network}"
201
201
  return data
202
202
 
203
- def create_pv(self, name: str, vol: str, access_mode: str) -> dict:
203
+ def create_pv(
204
+ self, name: str, vol: str, access_mode: str, mount_options: str
205
+ ) -> dict:
204
206
  """Create a yaml representing filestore PersistentVolume."""
205
207
  data = templates.load(FS_PV_PATH)
206
208
  data["metadata"]["name"] = get_pv_name(name)
@@ -215,6 +217,7 @@ class FilestoreClient:
215
217
  0
216
218
  ].ip_addresses[0]
217
219
  data["spec"]["csi"]["volumeAttributes"]["volume"] = vol
220
+ data["spec"]["mountOptions"] = mount_options.split(",")
218
221
  return data
219
222
 
220
223
  def create_pvc(self, name: str, access_mode: str) -> dict:
@@ -230,10 +233,15 @@ class FilestoreClient:
230
233
  return data
231
234
 
232
235
  def manifest(
233
- self, name: str, vol: str, access_mode: str, network: str
236
+ self,
237
+ name: str,
238
+ vol: str,
239
+ access_mode: str,
240
+ network: str,
241
+ mount_options: str,
234
242
  ) -> list[dict]:
235
243
  self.load_instance()
236
- pv = self.create_pv(name, vol, access_mode)
244
+ pv = self.create_pv(name, vol, access_mode, mount_options)
237
245
  pvc = self.create_pvc(name, access_mode)
238
246
  sc = self.create_sc(name, network)
239
247
  return [pv, pvc, sc]
xpk/core/gcsfuse.py CHANGED
@@ -20,11 +20,12 @@ FUSE_PV_PATH = "/../templates/fuse-pv.yaml"
20
20
  FUSE_PVC_PATH = "/../templates/fuse-pvc.yaml"
21
21
 
22
22
 
23
- def create_pv(name: str, size: int, bucket: str) -> dict:
23
+ def create_pv(name: str, size: int, bucket: str, mount_options: str) -> dict:
24
24
  data = templates.load(FUSE_PV_PATH)
25
25
  data["metadata"]["name"] = f"{name}-pv"
26
26
  data["spec"]["capacity"]["storage"] = f"{size}Gi"
27
27
  data["spec"]["csi"]["volumeHandle"] = bucket
28
+ data["spec"]["mountOptions"] = mount_options.split(",")
28
29
  return data
29
30
 
30
31
 
@@ -36,15 +37,17 @@ def create_pvc(name: str, size: int) -> dict:
36
37
  return data
37
38
 
38
39
 
39
- def manifest(name: str, bucket: str, size: int) -> list[dict]:
40
+ def manifest(
41
+ name: str, bucket: str, size: int, mount_options: str
42
+ ) -> list[dict]:
40
43
  """Creates GCS FUSE manifest file.
41
44
 
42
45
  Args:
43
- path (str): path to the file where the manifest will be created
44
46
  name (str): base name of the volumes
45
47
  bucket (str): name of the storage bucket
46
- size (str): size of the storage
48
+ size (str): size of the storage (in GB)
49
+ mount_options (str): comma-separated list of mountOptions for PersistentVolume
47
50
  """
48
- pv = create_pv(name, size, bucket)
51
+ pv = create_pv(name, size, bucket, mount_options)
49
52
  pvc = create_pvc(name, size)
50
53
  return [pv, pvc]
xpk/core/kjob.py CHANGED
@@ -14,27 +14,45 @@ See the License for the specific language governing permissions and
14
14
  limitations under the License.
15
15
  """
16
16
 
17
- from ..core.blueprint.blueprint_generator import get_subnetworks_for_a3mega, get_subnetworks_for_a3ultra
18
- from ..core.capacity import H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE
19
17
  from argparse import Namespace
20
- import yaml
21
- from .workload_decorators.tcpxo_decorator import get_tcpxo_deamon_entry
22
- from ..utils.console import xpk_print, xpk_exit
18
+ from enum import Enum
23
19
 
24
- from ..utils import templates
20
+ import yaml
25
21
  from kubernetes import client as k8s_client
26
22
  from kubernetes.client import ApiClient
27
23
  from kubernetes.client.rest import ApiException
28
- from .cluster import setup_k8s_env, XPK_SA, DEFAULT_NAMESPACE
29
- from .storage import get_auto_mount_storages, get_auto_mount_gcsfuse_storages
30
- from .commands import run_command_for_value, run_kubectl_apply, run_command_with_updates
31
- from .config import XpkConfig, KJOB_SHELL_IMAGE, KJOB_SHELL_INTERACTIVE_COMMAND, KJOB_SHELL_WORKING_DIRECTORY, KJOB_BATCH_IMAGE, KJOB_BATCH_WORKING_DIRECTORY
32
- from .resources import get_cluster_system_characteristics, SystemCharacteristics, AcceleratorType
33
- from enum import Enum
34
-
35
- from ..core.workload_decorators import tcpxo_decorator
36
24
 
37
- from ..core.workload_decorators import rdma_decorator
25
+ from ..core.blueprint.blueprint_generator import (
26
+ get_subnetworks_for_a3mega,
27
+ get_subnetworks_for_a3ultra,
28
+ get_subnetworks_for_a4,
29
+ )
30
+ from ..core.capacity import H100_MEGA_DEVICE_TYPE, H200_DEVICE_TYPE
31
+ from ..core.storage import GCS_FUSE_ANNOTATIONS, PARALLELSTORE_ANNOTATIONS
32
+ from ..core.workload_decorators import rdma_decorator, tcpxo_decorator
33
+ from ..utils import templates
34
+ from ..utils.console import xpk_exit, xpk_print
35
+ from .cluster import DEFAULT_NAMESPACE, XPK_SA, setup_k8s_env
36
+ from .commands import (
37
+ run_command_for_value,
38
+ run_command_with_updates,
39
+ run_kubectl_apply,
40
+ )
41
+ from .config import (
42
+ KJOB_BATCH_IMAGE,
43
+ KJOB_BATCH_WORKING_DIRECTORY,
44
+ KJOB_SHELL_IMAGE,
45
+ KJOB_SHELL_INTERACTIVE_COMMAND,
46
+ KJOB_SHELL_WORKING_DIRECTORY,
47
+ XpkConfig,
48
+ )
49
+ from .resources import (
50
+ AcceleratorType,
51
+ SystemCharacteristics,
52
+ get_cluster_system_characteristics,
53
+ )
54
+ from .storage import get_auto_mount_gcsfuse_storages, get_auto_mount_storages, get_auto_mount_parallelstore_storages
55
+ from .workload_decorators.tcpxo_decorator import get_tcpxo_deamon_entry
38
56
 
39
57
  KJOB_API_GROUP_NAME = "kjobctl.x-k8s.io"
40
58
  KJOB_API_GROUP_VERSION = "v1alpha1"
@@ -146,6 +164,18 @@ Kueue_TAS_annotation = "kueue.x-k8s.io/podset-preferred-topology=cloud.google.co
146
164
  default_interface_annotation = "networking.gke.io/default-interface=eth0"
147
165
 
148
166
 
167
+ def get_a4_pod_template_annotations() -> tuple[str, str]:
168
+ sub_networks = get_subnetworks_for_a4()
169
+ interfaces_key, interfaces_value = rdma_decorator.get_interfaces_entry(
170
+ sub_networks
171
+ )
172
+
173
+ return (
174
+ default_interface_annotation,
175
+ f"{interfaces_key}=$'{interfaces_value}'",
176
+ )
177
+
178
+
149
179
  def get_a3ultra_pod_template_annotations(args: Namespace) -> tuple[str, str]:
150
180
  sub_networks = get_subnetworks_for_a3ultra(args.cluster)
151
181
  interfaces_key, interfaces_value = rdma_decorator.get_interfaces_entry(
@@ -436,9 +466,18 @@ def create_volume_bundle_instance(
436
466
  xpk_exit(1)
437
467
 
438
468
 
439
- def get_gcsfuse_annotation(args: Namespace) -> str | None:
469
+ def get_storage_annotations(args: Namespace) -> list[str]:
470
+ annotations = []
440
471
  k8s_api_client = setup_k8s_env(args)
472
+
441
473
  gcsfuse_storages = get_auto_mount_gcsfuse_storages(k8s_api_client)
442
474
  if len(gcsfuse_storages) > 0:
443
- return "gke-gcsfuse/volumes=true"
444
- return None
475
+ for key, value in GCS_FUSE_ANNOTATIONS.items():
476
+ annotations.append(f"{key}={value}")
477
+
478
+ parallelstore_storages = get_auto_mount_parallelstore_storages(k8s_api_client)
479
+ if len(parallelstore_storages) > 0:
480
+ for key, value in PARALLELSTORE_ANNOTATIONS.items():
481
+ annotations.append(f"{key}={value}")
482
+
483
+ return annotations
xpk/core/nap.py CHANGED
@@ -255,6 +255,10 @@ def is_autoprovisioning_enabled(
255
255
  bool is true if autoprovisioning is enabled, false otherwise.
256
256
  int of 0 if successful and 1 otherwise.
257
257
  """
258
+ # Currently autoprovisioning is not enabled for Pathways workloads. b/360898087
259
+ if args.use_pathways:
260
+ return False, 0
261
+
258
262
  resources_configmap_name = f'{args.cluster}-{CLUSTER_RESOURCES_CONFIGMAP}'
259
263
  cluster_config_map = get_cluster_configmap(args, resources_configmap_name)
260
264
 
xpk/core/network.py CHANGED
@@ -16,10 +16,8 @@ limitations under the License.
16
16
 
17
17
  from ..utils.console import xpk_print
18
18
  from ..utils.file import write_tmp_file
19
- from .capacity import H100_DEVICE_TYPE
20
19
  from .commands import run_command_for_value, run_command_with_updates
21
20
  from .gcloud_context import zone_to_region
22
- from .system_characteristics import SystemCharacteristics
23
21
 
24
22
  # cluster_network_yaml: the config when creating the network for a3 cluster
25
23
  CLUSTER_NETWORK_YAML = """
@@ -175,16 +173,6 @@ def create_cluster_subnet(args, index) -> int:
175
173
  return 0
176
174
 
177
175
 
178
- def get_subnetworks_for_a3mega(cluster_name: str) -> list[str]:
179
- return [f'{cluster_name}-gpunet-{i}-subnet' for i in range(8)]
180
-
181
-
182
- def get_subnetworks_for_a3ultra(cluster_name: str) -> list[str]:
183
- return [f'{cluster_name}-sub-1'] + [
184
- f'{cluster_name}-rdma-sub-{i}' for i in range(8)
185
- ]
186
-
187
-
188
176
  def create_cluster_firewall_rule(args, index) -> int:
189
177
  """Create one GKE Cluster firewall rule.
190
178
 
@@ -247,20 +235,18 @@ def create_cluster_network_config(args) -> int:
247
235
  return 0
248
236
 
249
237
 
250
- def set_up_cluster_network_for_gpu(args, system: SystemCharacteristics) -> int:
251
- """Set up GKE Cluster networks, subnets and firewall rules for A3/A3+.
252
- Note: there are 4 NICs for GPU-GPU bw and 1 NIC for host in an A3 node,
253
- and there are 8 NICs for GPU-GPU bw and 1 NIC for host in an A3+ node.
238
+ def set_up_cluster_network_for_a3(args) -> int:
239
+ """Set up GKE Cluster networks, subnets and firewall rules for A3.
240
+ Note: there are 4 NICs for GPU-GPU bw and 1 NIC for host in an A3 node.
254
241
 
255
242
  Args:
256
243
  args: user provided arguments for running the command.
257
- system: system characteristics.
258
244
 
259
245
  Returns:
260
246
  0 if successful and 1 otherwise.
261
247
  """
262
- num_networks = 5 if system.device_type == H100_DEVICE_TYPE else 9
263
- for i in range(1, num_networks):
248
+ num_networks = 4
249
+ for i in range(1, num_networks + 1):
264
250
  return_code = create_cluster_network(args, i)
265
251
  if return_code != 0:
266
252
  return 1
@@ -315,7 +301,10 @@ def get_all_networks_programmatic(args) -> tuple[list[str], int]:
315
301
  Returns:
316
302
  List of networks and 0 if successful and 1 otherwise.
317
303
  """
318
- command = 'gcloud compute networks list --format="csv[no-heading](name)"'
304
+ command = (
305
+ 'gcloud compute networks list --format="csv[no-heading](name)" '
306
+ f' --project={args.project}'
307
+ )
319
308
  return_code, raw_network_output = run_command_for_value(
320
309
  command, 'Get All Networks', args
321
310
  )
@@ -365,7 +354,8 @@ def get_all_firewall_rules_programmatic(args) -> tuple[list[str], int]:
365
354
  List of firewall rules and 0 if successful and 1 otherwise.
366
355
  """
367
356
  command = (
368
- 'gcloud compute firewall-rules list --format="csv[no-heading](name)"'
357
+ 'gcloud compute firewall-rules list --format="csv[no-heading](name)" '
358
+ f' --project={args.project}'
369
359
  )
370
360
  return_code, raw_subnets_output = run_command_for_value(
371
361
  command, 'Get All Firewall Rules', args
xpk/core/nodepool.py CHANGED
@@ -37,6 +37,8 @@ CLOUD_PLATFORM_AUTH_SCOPE_URL = (
37
37
  '"https://www.googleapis.com/auth/cloud-platform"'
38
38
  )
39
39
 
40
+ OLDER_PATHWAYS_CPU_NP_TO_DELETE = ['cpu-rm-np', 'cpu-proxy-np', 'cpu-user-np']
41
+
40
42
 
41
43
  def run_gke_node_pool_create_command(
42
44
  args, system, gke_node_pool_version
@@ -122,7 +124,10 @@ def run_gke_node_pool_create_command(
122
124
  args, system, existing_node_pool_names, desired_node_pool_names
123
125
  )
124
126
  for node_pool_name in existing_node_pool_names:
125
- if node_pool_name.find(f'{args.cluster}-np-') != 0:
127
+ if (
128
+ node_pool_name.find(f'{args.cluster}-np-') != 0
129
+ and node_pool_name not in OLDER_PATHWAYS_CPU_NP_TO_DELETE
130
+ ):
126
131
  continue
127
132
 
128
133
  if node_pool_name in node_pools_to_delete:
@@ -283,28 +288,15 @@ def run_gke_node_pool_create_command(
283
288
  command += (
284
289
  ' --accelerator'
285
290
  f' type={system.gke_accelerator},count={str(system.chips_per_vm)},gpu-driver-version=latest'
286
- ' --no-enable-autoupgrade '
287
- f' --scopes={CLOUD_PLATFORM_AUTH_SCOPE_URL} --additional-node-network'
288
- f' network={args.cluster}-net-1,subnetwork={subnet_prefix}-sub-1'
289
- ' --additional-node-network'
290
- f' network={args.cluster}-net-2,subnetwork={subnet_prefix}-sub-2'
291
- ' --additional-node-network'
292
- f' network={args.cluster}-net-3,subnetwork={subnet_prefix}-sub-3'
293
- ' --additional-node-network'
294
- f' network={args.cluster}-net-4,subnetwork={subnet_prefix}-sub-4'
291
+ f' --no-enable-autoupgrade --scopes={CLOUD_PLATFORM_AUTH_SCOPE_URL}'
295
292
  )
296
293
  if device_type == H100_MEGA_DEVICE_TYPE:
297
- command += (
298
- ' --additional-node-network'
299
- f' network={args.cluster}-net-5,subnetwork={subnet_prefix}-sub-5'
300
- ' --additional-node-network'
301
- f' network={args.cluster}-net-6,subnetwork={subnet_prefix}-sub-6'
302
- ' --additional-node-network'
303
- f' network={args.cluster}-net-7,subnetwork={subnet_prefix}-sub-7'
304
- ' --additional-node-network'
305
- f' network={args.cluster}-net-8,subnetwork={subnet_prefix}-sub-8'
306
- ' --max-pods-per-node=32'
307
- )
294
+ for i in range(1, 9):
295
+ command += (
296
+ ' --additional-node-network'
297
+ f' network={args.cluster}-net-{i},subnetwork={subnet_prefix}-sub-{i}'
298
+ )
299
+ command += ' --max-pods-per-node=32'
308
300
  elif system.accelerator_type == AcceleratorType['CPU']:
309
301
  command += f' --num-nodes={system.vms_per_slice}'
310
302
  command += (
@@ -318,7 +310,7 @@ def run_gke_node_pool_create_command(
318
310
  create_commands.append(command)
319
311
  create_task_names.append(task)
320
312
 
321
- desired_pw_cpu_node_pools = ['cpu-user-np', 'cpu-rm-np', 'cpu-proxy-np']
313
+ desired_pw_cpu_node_pools = ['cpu-np']
322
314
  if args.enable_pathways:
323
315
  # Pathways needs CPU nodepools in addition to TPU nodepools
324
316
  for node_pool_name in desired_pw_cpu_node_pools:
@@ -368,11 +360,9 @@ def get_node_pools_to_delete(
368
360
  check_resource, is_requested_resource_in_cluster = check_cluster_resources(
369
361
  args, system
370
362
  )
371
- for existing_node_pool_name in existing_node_pool_names:
372
- # Deletion logic would leave behind any Pathways CPU nodepools.
373
- if existing_node_pool_name.find(f'{args.cluster}-np-') != 0:
374
- continue
363
+ xpk_print('Existing node pool names ', existing_node_pool_names)
375
364
 
365
+ for existing_node_pool_name in existing_node_pool_names:
376
366
  # Nodepools will be deleted in two scenarios:
377
367
  # Scenario 1: Cluster exists with 3 nodepools of 'x' device_type/gke_accelerator and now we are updating
378
368
  # the cluster to 2 nodepools of 'x' device_type/gke_accelerator. In this case, we will delete
@@ -380,6 +370,18 @@ def get_node_pools_to_delete(
380
370
  # Scenario 2: Cluster exists with 2 nodepools of 'x' device_type/gke_accelerator and now we are updating
381
371
  # the cluster to 2 nodepools of 'y' device_type/gke_accelerator. In this case, we will delete
382
372
  # '{args.cluster}-np-0' and '{args.cluster}-np-1' from the cluster.
373
+ # Scenario 3: Deletes older Pathways CPU nodepools named cpu-rm-np, cpu-proxy-np and cpu-user-np
374
+
375
+ if existing_node_pool_name in OLDER_PATHWAYS_CPU_NP_TO_DELETE:
376
+ node_pools_to_delete.append(existing_node_pool_name)
377
+ xpk_print(
378
+ 'Upgrading Pathways version on the cluster. Deleting older pathways'
379
+ ' nodepool ',
380
+ existing_node_pool_name,
381
+ )
382
+
383
+ if existing_node_pool_name.find(f'{args.cluster}-np-') != 0:
384
+ continue
383
385
  if existing_node_pool_name not in desired_node_pool_names or (
384
386
  check_resource and not is_requested_resource_in_cluster
385
387
  ):