xpk 0.17.1__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xpk/commands/cluster.py CHANGED
@@ -49,7 +49,6 @@ from ..core.gcloud_context import (
49
49
  zone_to_region,
50
50
  )
51
51
  from ..core.jobset import update_jobset_resources_if_necessary
52
- from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
53
52
  from ..core.kueue_manager import (KueueConfig, KueueManager)
54
53
  from ..core.nap import enable_autoprovisioning_on_cluster
55
54
  from ..core.network import (
@@ -98,7 +97,6 @@ def cluster_adapt(args) -> None:
98
97
  if should_validate_dependencies(args):
99
98
  validate_dependencies_list([
100
99
  SystemDependency.KUBECTL,
101
- SystemDependency.KJOB,
102
100
  SystemDependency.GCLOUD,
103
101
  ])
104
102
  args.enable_pathways = False
@@ -188,7 +186,6 @@ def cluster_adapt(args) -> None:
188
186
  if install_kueue_code != 0:
189
187
  xpk_exit(install_kueue_code)
190
188
 
191
- install_kjob(args)
192
189
  if system.accelerator_type == AcceleratorType.GPU:
193
190
  prepare_gpus(system)
194
191
 
@@ -308,7 +305,6 @@ def cluster_create(args) -> None:
308
305
  if should_validate_dependencies(args):
309
306
  validate_dependencies_list([
310
307
  SystemDependency.KUBECTL,
311
- SystemDependency.KJOB,
312
308
  SystemDependency.GCLOUD,
313
309
  ])
314
310
 
@@ -455,8 +451,6 @@ def cluster_create(args) -> None:
455
451
  if install_kueue_code != 0:
456
452
  xpk_exit(install_kueue_code)
457
453
 
458
- install_kjob(args)
459
-
460
454
  if system.accelerator_type == AcceleratorType.GPU:
461
455
  prepare_gpus(system)
462
456
 
@@ -1343,22 +1337,6 @@ def install_storage_csis(args):
1343
1337
  xpk_exit(update_cluster_command_code)
1344
1338
 
1345
1339
 
1346
- def install_kjob(args):
1347
- xpk_print('Verifying kjob installation')
1348
- err_code = verify_kjob_installed()
1349
- if err_code > 0:
1350
- xpk_exit(err_code)
1351
-
1352
- xpk_print('Applying kjob CDRs')
1353
- err_code = apply_kjob_crds()
1354
- if err_code > 0:
1355
- xpk_exit(err_code)
1356
-
1357
- err_code = prepare_kjob(args)
1358
- if err_code > 0:
1359
- xpk_exit(err_code)
1360
-
1361
-
1362
1340
  def _install_kueue(
1363
1341
  args,
1364
1342
  system: SystemCharacteristics,
@@ -38,7 +38,6 @@ from ..core.commands import run_command_for_value
38
38
  from ..core.docker_manager import DockerManager
39
39
  from ..core.gcloud_context import zone_to_region
40
40
  from ..core.gcluster_manager import GclusterManager
41
- from ..core.kjob import apply_kjob_crds, prepare_kjob
42
41
  from ..core.remote_state.fuse_remote_state import FuseStateClient
43
42
  from ..core.remote_state.remote_state_client import RemoteStateClient
44
43
  from ..utils.console import xpk_exit, xpk_print
@@ -112,18 +111,7 @@ def cluster_create(
112
111
  get_cluster_credentials(args)
113
112
 
114
113
  err_code = __install_kueue(args)
115
- if err_code > 0:
116
- xpk_exit(err_code)
117
-
118
- err_code = apply_kjob_crds()
119
- if err_code > 0:
120
- xpk_exit(err_code)
121
-
122
- err_code = prepare_kjob(args)
123
- if err_code > 0:
124
- xpk_exit(err_code)
125
-
126
- xpk_exit(0)
114
+ xpk_exit(err_code)
127
115
 
128
116
 
129
117
  def __install_kueue(args) -> int:
@@ -46,8 +46,6 @@ def mock_cluster_create_deps(request):
46
46
  """Mocks dependencies for cluster_create."""
47
47
  with (
48
48
  patch("xpk.commands.cluster_gcluster.xpk_exit") as mock_exit,
49
- patch("xpk.commands.cluster_gcluster.prepare_kjob") as mock_prep_kjob,
50
- patch("xpk.commands.cluster_gcluster.apply_kjob_crds") as mock_apply_kjob,
51
49
  patch(
52
50
  "xpk.commands.cluster_gcluster.get_cluster_credentials"
53
51
  ) as mock_get_creds,
@@ -68,8 +66,6 @@ def mock_cluster_create_deps(request):
68
66
  ):
69
67
  yield {
70
68
  "xpk_exit": mock_exit,
71
- "prepare_kjob": mock_prep_kjob,
72
- "apply_kjob_crds": mock_apply_kjob,
73
69
  "get_cluster_credentials": mock_get_creds,
74
70
  "generate_blueprint": mock_gen_bp,
75
71
  "prepare_gcluster_manager": mock_prep_gcm,
@@ -85,9 +81,6 @@ def test_install_kueue_standard(
85
81
  mock_get_total_chips, mock_args, mock_cluster_create_deps
86
82
  ):
87
83
  """Tests __install_kueue for a standard installation."""
88
- mock_cluster_create_deps["prepare_kjob"].return_value = 0
89
- mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
90
-
91
84
  mock_system = SystemCharacteristics(
92
85
  topology="N/A",
93
86
  vms_per_slice=1,
@@ -138,9 +131,6 @@ def test_install_kueue_with_autoprovisioning(
138
131
  mock_enable_autoprovisioning, mock_args, mock_cluster_create_deps
139
132
  ):
140
133
  """Tests __install_kueue with autoprovisioning enabled."""
141
- mock_cluster_create_deps["prepare_kjob"].return_value = 0
142
- mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
143
-
144
134
  mock_args.enable_autoprovisioning = True
145
135
  mock_system = SystemCharacteristics(
146
136
  topology="N/A",
@@ -56,7 +56,6 @@ class _ClusterCreateMocks:
56
56
  create_cluster_configmaps: MagicMock
57
57
  set_jobset_on_cluster: MagicMock
58
58
  get_cluster_location: MagicMock
59
- install_kjob: MagicMock
60
59
  xpk_exit: MagicMock
61
60
  update_jobset_resources_if_necessary: MagicMock
62
61
  _install_kueue: MagicMock
@@ -204,9 +203,6 @@ def cluster_create_mocks(mocker) -> _ClusterCreateMocks:
204
203
  'xpk.commands.cluster.get_cluster_location',
205
204
  return_value='us-central1',
206
205
  ),
207
- install_kjob=mocker.patch(
208
- 'xpk.commands.cluster.install_kjob', return_value=0
209
- ),
210
206
  xpk_exit=mocker.patch('xpk.commands.cluster.xpk_exit'),
211
207
  update_jobset_resources_if_necessary=mocker.patch(
212
208
  'xpk.commands.cluster.update_jobset_resources_if_necessary',
xpk/commands/kind.py CHANGED
@@ -20,11 +20,6 @@ from ..core.commands import (
20
20
  run_command_with_updates,
21
21
  )
22
22
  from ..core.cluster import set_jobset_on_cluster, setup_k8s_env
23
- from ..core.kjob import (
24
- verify_kjob_installed,
25
- prepare_kjob,
26
- apply_kjob_crds,
27
- )
28
23
  from ..core.scheduling import get_total_chips_requested_from_args
29
24
  from ..core.storage import install_storage_crd
30
25
  from ..core.system_characteristics import (
@@ -48,7 +43,6 @@ def cluster_create(args) -> None:
48
43
  if should_validate_dependencies(args):
49
44
  validate_dependencies_list([
50
45
  SystemDependency.KUBECTL,
51
- SystemDependency.KJOB,
52
46
  SystemDependency.GCLOUD,
53
47
  ])
54
48
  xpk_print(f'Starting cluster create for cluster {args.cluster}:', flush=True)
@@ -69,21 +63,6 @@ def cluster_create(args) -> None:
69
63
  if set_jobset_on_cluster_code != 0:
70
64
  xpk_exit(set_jobset_on_cluster_code)
71
65
 
72
- xpk_print('Verifying kjob installation')
73
- err_code = verify_kjob_installed()
74
- if err_code > 0:
75
- xpk_exit(err_code)
76
-
77
- xpk_print('Applying kjob CDRs')
78
- err_code = apply_kjob_crds()
79
- if err_code > 0:
80
- xpk_exit(err_code)
81
-
82
- args.kind_cluster = True
83
- err_code = prepare_kjob(args)
84
- if err_code > 0:
85
- xpk_exit(err_code)
86
-
87
66
  k8s_client = setup_k8s_env(args)
88
67
  install_storage_crd(k8s_client)
89
68
 
xpk/commands/storage.py CHANGED
@@ -23,7 +23,6 @@ from kubernetes.client.rest import ApiException
23
23
 
24
24
  from ..core import gcsfuse
25
25
  from ..core.cluster import (
26
- DEFAULT_NAMESPACE,
27
26
  add_zone_and_project,
28
27
  get_cluster_network,
29
28
  setup_k8s_env,
@@ -35,12 +34,6 @@ from ..core.cluster import (
35
34
  update_cluster_with_workload_identity_if_necessary,
36
35
  )
37
36
  from ..core.filestore import FilestoreClient, get_storage_class_name
38
- from ..core.kjob import (
39
- KJOB_API_GROUP_NAME,
40
- KJOB_API_GROUP_VERSION,
41
- KJOB_API_VOLUME_BUNDLE_PLURAL,
42
- create_volume_bundle_instance,
43
- )
44
37
  from ..core.storage import (
45
38
  GCP_FILESTORE_TYPE,
46
39
  GCS_FUSE_TYPE,
@@ -98,9 +91,6 @@ def storage_create(args: Namespace) -> None:
98
91
 
99
92
  k8s_api_client = setup_k8s_env(args)
100
93
  create_storage_crds(k8s_api_client, args, manifest)
101
- create_volume_bundle_instance(
102
- k8s_api_client, args.name, manifest, args.readonly, args.mount_point
103
- )
104
94
  # Not required for Filestore. Will be uncommented when adding GCSFuse create
105
95
  # return_code = update_cluster_with_workload_identity_if_necessary(args)
106
96
  # if return_code > 0:
@@ -214,9 +204,6 @@ def storage_attach(args: Namespace) -> None:
214
204
 
215
205
  k8s_api_client = setup_k8s_env(args)
216
206
  create_storage_crds(k8s_api_client, args, manifest)
217
- create_volume_bundle_instance(
218
- k8s_api_client, args.name, manifest, args.readonly, args.mount_point
219
- )
220
207
 
221
208
  enable_csi_drivers_if_necessary(args)
222
209
 
@@ -332,18 +319,6 @@ def delete_storage_resources(k8s_api_client: ApiClient, storage: Storage):
332
319
  "Storage Class",
333
320
  )
334
321
 
335
- delete_resource(
336
- lambda name: api_instance.delete_namespaced_custom_object(
337
- namespace=DEFAULT_NAMESPACE,
338
- name=name,
339
- group=KJOB_API_GROUP_NAME,
340
- version=KJOB_API_GROUP_VERSION,
341
- plural=KJOB_API_VOLUME_BUNDLE_PLURAL,
342
- ),
343
- storage.name,
344
- "VolumeBundle",
345
- )
346
-
347
322
  delete_resource(
348
323
  lambda name: api_instance.delete_cluster_custom_object(
349
324
  name=name,
xpk/core/cluster.py CHANGED
@@ -717,10 +717,8 @@ def get_cluster_credentials(args) -> int:
717
717
  location=location,
718
718
  dns_endpoint=True,
719
719
  )
720
- if return_code != 0:
721
- return return_code
722
720
 
723
- if not _are_credentials_valid():
721
+ if return_code != 0 or not _are_credentials_valid():
724
722
  xpk_print('Detected error. Retrying without --dns-endpoint flag...')
725
723
  return_code = _get_credentials(
726
724
  project=args.project,
xpk/core/config.py CHANGED
@@ -53,14 +53,6 @@ PROJECT_KEY = 'project-id'
53
53
  CLIENT_ID_KEY = 'client-id'
54
54
  SEND_TELEMETRY_KEY = 'send-telemetry'
55
55
  ZONE_KEY = 'zone'
56
- KJOB_BATCH_IMAGE = 'batch-image'
57
- KJOB_BATCH_WORKING_DIRECTORY = 'batch-working-directory'
58
- KJOB_SHELL_IMAGE = 'shell-image'
59
- KJOB_SHELL_INTERACTIVE_COMMAND = 'shell-interactive-command'
60
- KJOB_SHELL_WORKING_DIRECTORY = 'shell-working-directory'
61
- CONFIGS_KEY = 'configs'
62
- GKE_ENDPOINT_KEY = 'gke-endpoint'
63
- DEPENDENCIES_KEY = 'deps-verified-version'
64
56
 
65
57
  DEFAULT_KEYS = [
66
58
  CFG_BUCKET_KEY,
@@ -69,13 +61,6 @@ DEFAULT_KEYS = [
69
61
  CLIENT_ID_KEY,
70
62
  SEND_TELEMETRY_KEY,
71
63
  ZONE_KEY,
72
- GKE_ENDPOINT_KEY,
73
- DEPENDENCIES_KEY,
74
- KJOB_BATCH_IMAGE,
75
- KJOB_BATCH_WORKING_DIRECTORY,
76
- KJOB_SHELL_IMAGE,
77
- KJOB_SHELL_INTERACTIVE_COMMAND,
78
- KJOB_SHELL_WORKING_DIRECTORY,
79
64
  ]
80
65
  VERTEX_TENSORBOARD_FEATURE_FLAG = XPK_CURRENT_VERSION >= '0.4.0'
81
66
 
@@ -80,15 +80,6 @@ class GpuConfig:
80
80
 
81
81
  requires_topology: bool
82
82
  gpu_direct_name: Literal['fastrak', 'rdma', 'tcpx', 'tcpxo'] = 'fastrak'
83
- kjob_decorator_fn: Optional[Callable[[dict], dict]] = None
84
- """A function to decorate the kjob template for GPU-specific configurations.
85
-
86
- Args:
87
- job_manifest (dict): The kjob manifest as a dictionary.
88
-
89
- Returns:
90
- dict: The modified kjob manifest as a dictionary.
91
- """
92
83
  nccl_installer: Optional[str] = None
93
84
  jobset_decorator_fn: Optional[Callable[[str, list[str]], str]] = None
94
85
  """A function to decorate the jobset for GPU-specific configurations.
@@ -106,7 +97,7 @@ class GpuConfig:
106
97
  parts = []
107
98
  for f in dataclasses.fields(self):
108
99
  value = getattr(self, f.name)
109
- if f.name in ('kjob_decorator_fn', 'jobset_decorator_fn') and value:
100
+ if f.name in ('jobset_decorator_fn') and value:
110
101
  parts.append(f'{f.name}=<function {value.__name__}>')
111
102
  else:
112
103
  parts.append(f'{f.name}={repr(value)}')
@@ -420,7 +411,6 @@ UserFacingNameToSystemCharacteristics = {
420
411
  gpu_config=GpuConfig(
421
412
  requires_topology=True,
422
413
  nccl_installer=INSTALLER_NCCL_RDMA_A4X,
423
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
424
414
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
425
415
  gpu_direct_name='rdma',
426
416
  ),
@@ -439,7 +429,6 @@ UserFacingNameToSystemCharacteristics = {
439
429
  gpu_config=GpuConfig(
440
430
  requires_topology=True,
441
431
  nccl_installer=INSTALLER_NCCL_RDMA_A4X,
442
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
443
432
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
444
433
  gpu_direct_name='rdma',
445
434
  ),
@@ -458,7 +447,6 @@ UserFacingNameToSystemCharacteristics = {
458
447
  gpu_config=GpuConfig(
459
448
  requires_topology=True,
460
449
  nccl_installer=INSTALLER_NCCL_RDMA,
461
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
462
450
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
463
451
  gpu_direct_name='rdma',
464
452
  ),
@@ -477,7 +465,6 @@ UserFacingNameToSystemCharacteristics = {
477
465
  gpu_config=GpuConfig(
478
466
  requires_topology=True,
479
467
  nccl_installer=INSTALLER_NCCL_RDMA,
480
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
481
468
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
482
469
  gpu_direct_name='rdma',
483
470
  ),
@@ -497,7 +484,6 @@ UserFacingNameToSystemCharacteristics = {
497
484
  gpu_config=GpuConfig(
498
485
  requires_topology=True,
499
486
  nccl_installer=INSTALLER_NCCL_TCPX,
500
- kjob_decorator_fn=tcpx_decorator.decorate_kjob_template,
501
487
  jobset_decorator_fn=tcpx_decorator.decorate_jobset,
502
488
  gpu_direct_name='tcpx',
503
489
  ),
@@ -517,7 +503,6 @@ UserFacingNameToSystemCharacteristics = {
517
503
  gpu_config=GpuConfig(
518
504
  requires_topology=True,
519
505
  nccl_installer=INSTALLER_NCCL_TCPXO,
520
- kjob_decorator_fn=tcpxo_decorator.decorate_kjob_template,
521
506
  jobset_decorator_fn=tcpxo_decorator.decorate_jobset,
522
507
  gpu_direct_name='tcpxo',
523
508
  ),
@@ -18,21 +18,6 @@ import yaml
18
18
  from ...utils.yaml import literal_string
19
19
 
20
20
 
21
- def decorate_kjob_template(job_manifest: dict) -> dict:
22
- spec = (
23
- job_manifest.setdefault('spec', {})
24
- .setdefault('template', {})
25
- .setdefault('spec', {})
26
- )
27
- spec.setdefault('tolerations', [])
28
- spec.setdefault('volumes', [])
29
-
30
- add_volumes(job_manifest)
31
- add_tolerations(job_manifest)
32
- update_gpu_containers(job_manifest)
33
- return job_manifest
34
-
35
-
36
21
  def decorate_jobset(jobset_manifest_str: str, sub_networks: list[str]) -> str:
37
22
  """
38
23
  Decorates a JobSet manifest with the necessary components for rdma-daemon.
@@ -22,14 +22,6 @@ from ...utils.yaml import literal_string
22
22
  tcpx = 'v2.0.11'
23
23
 
24
24
 
25
- def decorate_kjob_template(job_manifest: dict) -> dict:
26
- add_volumes(job_manifest)
27
- add_tolerations(job_manifest)
28
- add_tcpx_daemon_container(job_manifest)
29
- update_gpu_containers(job_manifest)
30
- return job_manifest
31
-
32
-
33
25
  def decorate_job(job_manifest: dict) -> dict:
34
26
  add_annotations(job_manifest)
35
27
  add_volumes(job_manifest)
@@ -47,24 +47,6 @@ spec:
47
47
  image: my-sidecar-image
48
48
  """
49
49
 
50
- # Minimal kjob template for testing
51
- BASE_KJOB_TEMPLATE = {
52
- "spec": {
53
- "template": {
54
- "spec": {
55
- "containers": [
56
- {
57
- "name": "main-gpu-container",
58
- "image": "my-gpu-image",
59
- "resources": {"limits": {"nvidia.com/gpu": 8}},
60
- },
61
- {"name": "sidecar-container", "image": "my-sidecar-image"},
62
- ]
63
- }
64
- }
65
- }
66
- }
67
-
68
50
  # Minimal job manifest for testing
69
51
  BASE_JOB_MANIFEST = {
70
52
  "spec": {
@@ -205,63 +187,3 @@ def test_decorate_job():
205
187
  assert "devices.gke.io/container.tcpx-daemon" in annotations
206
188
  assert "networking.gke.io/default-interface" in annotations
207
189
  assert "networking.gke.io/interfaces" in annotations
208
-
209
-
210
- def test_decorate_kjob_template():
211
- """Tests decorate_kjob_template."""
212
- kjob_template = copy.deepcopy(BASE_KJOB_TEMPLATE)
213
-
214
- decorated_manifest = tcpx_decorator.decorate_kjob_template(kjob_template)
215
-
216
- pod_template_spec = decorated_manifest["spec"]["template"]["spec"]
217
-
218
- # Check annotations are NOT added
219
- assert "annotations" not in decorated_manifest["spec"]["template"].get(
220
- "metadata", {}
221
- )
222
-
223
- # Check tolerations
224
- tolerations = pod_template_spec["tolerations"]
225
- assert {
226
- "key": "user-workload",
227
- "operator": "Equal",
228
- "value": "true",
229
- "effect": "NoSchedule",
230
- } in tolerations
231
-
232
- # Check volumes
233
- volumes = pod_template_spec["volumes"]
234
- volume_names = {v["name"] for v in volumes}
235
- assert "libraries" in volume_names
236
- assert "sys" in volume_names
237
- assert "proc-sys" in volume_names
238
- assert "tcpx-socket" in volume_names
239
- assert "dshm" in volume_names
240
-
241
- # Check init container
242
- init_containers = pod_template_spec["initContainers"]
243
- assert len(init_containers) == 1
244
- tcpx_daemon = init_containers[0]
245
- assert tcpx_daemon["name"] == "tcpx-daemon"
246
- assert tcpx_daemon["image"].endswith(f":{tcpx_decorator.tcpx}")
247
-
248
- # Check GPU container update
249
- gpu_container = pod_template_spec["containers"][0]
250
- assert gpu_container["name"] == "main-gpu-container"
251
-
252
- # Check env
253
- env_vars = {e["name"]: e["value"] for e in gpu_container["env"]}
254
- assert env_vars["LD_LIBRARY_PATH"] == "/usr/local/nvidia/lib64"
255
-
256
- # Check volume mounts
257
- volume_mounts = {
258
- vm["name"]: vm["mountPath"] for vm in gpu_container["volumeMounts"]
259
- }
260
- assert volume_mounts["tcpx-socket"] == "/tmp"
261
- assert volume_mounts["libraries"] == "/usr/local/nvidia/lib64"
262
- assert volume_mounts["dshm"] == "/dev/shm"
263
-
264
- # Check non-GPU container is not updated
265
- sidecar_container = pod_template_spec["containers"][1]
266
- assert "env" not in sidecar_container
267
- assert "volumeMounts" not in sidecar_container
@@ -22,22 +22,6 @@ from ...utils.yaml import literal_string
22
22
  rxdm = 'v1.0.12'
23
23
 
24
24
 
25
- def decorate_kjob_template(job_manifest: dict) -> dict:
26
- spec = (
27
- job_manifest.setdefault('spec', {})
28
- .setdefault('template', {})
29
- .setdefault('spec', {})
30
- )
31
- spec.setdefault('tolerations', [])
32
- spec.setdefault('volumes', [])
33
-
34
- add_volumes(job_manifest)
35
- add_tolerations(job_manifest)
36
- add_tcpxo_daemon_container(job_manifest)
37
- update_gpu_containers(job_manifest)
38
- return job_manifest
39
-
40
-
41
25
  def decorate_job(job_manifest: dict, sub_networks: list[str]) -> dict:
42
26
  job_manifest.setdefault('spec', {}).setdefault('template', {}).setdefault(
43
27
  'metadata', {}
xpk/parser/common.py CHANGED
@@ -180,157 +180,6 @@ def add_global_arguments(custom_parser_or_group: ParserOrArgumentGroup):
180
180
  )
181
181
 
182
182
 
183
- def add_slurm_arguments(custom_parser_or_group: ParserOrArgumentGroup):
184
- """Add Slurm job arguments to the parser.
185
-
186
- Args:
187
- custom_parser_or_group: parser or argument group to add global arguments to.
188
- """
189
- custom_parser_or_group.add_argument(
190
- '--ignore-unknown-flags',
191
- type=bool,
192
- action=argparse.BooleanOptionalAction,
193
- default=False,
194
- help='Ignore all the unsupported flags in the bash script.',
195
- )
196
- custom_parser_or_group.add_argument(
197
- '-a',
198
- '--array',
199
- type=str,
200
- default=None,
201
- help=(
202
- 'Submit a job array, multiple jobs to be executed with identical'
203
- ' parameters. The indexes specification identifies what array index'
204
- ' values should be used. For example, "--array=0-15" or'
205
- ' "--array=0,6,16-32". Multiple values may be specified using a comma'
206
- ' separated list and/or a range of values with a "-" separator. For'
207
- ' example "--array=0-15%%4" will limit the number of simultaneously'
208
- ' running tasks from this job array to 4. The minimum index value is'
209
- ' 0. The maximum index value is 2147483647.'
210
- ),
211
- )
212
- custom_parser_or_group.add_argument(
213
- '-c',
214
- '--cpus-per-task',
215
- type=str,
216
- default=None,
217
- help='How much cpus a container inside a pod requires.',
218
- )
219
- custom_parser_or_group.add_argument(
220
- '--gpus-per-task',
221
- type=str,
222
- default=None,
223
- help='How much gpus a container inside a pod requires.',
224
- )
225
- custom_parser_or_group.add_argument(
226
- '--mem',
227
- type=str,
228
- default=None,
229
- help='How much memory a pod requires.',
230
- )
231
- custom_parser_or_group.add_argument(
232
- '--mem-per-task',
233
- type=str,
234
- default=None,
235
- help='How much memory a container requires.',
236
- )
237
- custom_parser_or_group.add_argument(
238
- '--mem-per-cpu',
239
- type=str,
240
- default=None,
241
- help=(
242
- 'How much memory a container requires, it multiplies the number '
243
- 'of requested cpus per task by mem-per-cpu.'
244
- ),
245
- )
246
- custom_parser_or_group.add_argument(
247
- '--mem-per-gpu',
248
- type=str,
249
- default=None,
250
- help=(
251
- 'How much memory a container requires, it multiplies the number '
252
- 'of requested gpus per task by mem-per-gpu.'
253
- ),
254
- )
255
- custom_parser_or_group.add_argument(
256
- '-N',
257
- '--nodes',
258
- type=int,
259
- default=None,
260
- help='Number of pods to be used at a time.',
261
- )
262
- custom_parser_or_group.add_argument(
263
- '-n',
264
- '--ntasks',
265
- type=int,
266
- default=None,
267
- help='Number of identical containers inside of a pod, usually 1.',
268
- )
269
- custom_parser_or_group.add_argument(
270
- '-o',
271
- '--output',
272
- type=str,
273
- default=None,
274
- help=(
275
- 'Where to redirect the standard output stream of a task. If not'
276
- ' passed it proceeds to stdout, and is available via kubectl logs.'
277
- ),
278
- )
279
- custom_parser_or_group.add_argument(
280
- '-e',
281
- '--error',
282
- type=str,
283
- default=None,
284
- help=(
285
- 'Where to redirect std error stream of a task. If not passed it'
286
- ' proceeds to stdout, and is available via kubectl logs.'
287
- ),
288
- )
289
- custom_parser_or_group.add_argument(
290
- '--input',
291
- type=str,
292
- default=None,
293
- help='What to pipe into the script.',
294
- )
295
- custom_parser_or_group.add_argument(
296
- '-J',
297
- '--job-name',
298
- type=str,
299
- default=None,
300
- help='What is the job name.',
301
- )
302
- custom_parser_or_group.add_argument(
303
- '-D',
304
- '--chdir',
305
- type=str,
306
- default=None,
307
- help='Change directory before executing the script.',
308
- )
309
- custom_parser_or_group.add_argument(
310
- '-t',
311
- '--time',
312
- type=str,
313
- default=None,
314
- help=(
315
- 'Set a limit on the total run time of the job. '
316
- 'A time limit of zero requests that no time limit be imposed. '
317
- 'Acceptable time formats include "minutes", "minutes:seconds", '
318
- '"hours:minutes:seconds", "days-hours", "days-hours:minutes" '
319
- 'and "days-hours:minutes:seconds".'
320
- ),
321
- )
322
- custom_parser_or_group.add_argument(
323
- '--priority',
324
- type=str,
325
- default='medium',
326
- choices=['very-low', 'low', 'medium', 'high', 'very-high'],
327
- help=(
328
- 'A priority, one of `very-low`, `low`, `medium`, `high` or'
329
- ' `very-high`. Defaults to `medium`.'
330
- ),
331
- )
332
-
333
-
334
183
  def add_tpu_type_argument(
335
184
  custom_parser_or_group: ParserOrArgumentGroup,
336
185
  required: bool = False,