xpk 0.17.2__py3-none-any.whl → 0.17.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
xpk/commands/cluster.py CHANGED
@@ -49,7 +49,6 @@ from ..core.gcloud_context import (
49
49
  zone_to_region,
50
50
  )
51
51
  from ..core.jobset import update_jobset_resources_if_necessary
52
- from ..core.kjob import apply_kjob_crds, prepare_kjob, verify_kjob_installed
53
52
  from ..core.kueue_manager import (KueueConfig, KueueManager)
54
53
  from ..core.nap import enable_autoprovisioning_on_cluster
55
54
  from ..core.network import (
@@ -98,7 +97,6 @@ def cluster_adapt(args) -> None:
98
97
  if should_validate_dependencies(args):
99
98
  validate_dependencies_list([
100
99
  SystemDependency.KUBECTL,
101
- SystemDependency.KJOB,
102
100
  SystemDependency.GCLOUD,
103
101
  ])
104
102
  args.enable_pathways = False
@@ -188,7 +186,6 @@ def cluster_adapt(args) -> None:
188
186
  if install_kueue_code != 0:
189
187
  xpk_exit(install_kueue_code)
190
188
 
191
- install_kjob(args)
192
189
  if system.accelerator_type == AcceleratorType.GPU:
193
190
  prepare_gpus(system)
194
191
 
@@ -308,7 +305,6 @@ def cluster_create(args) -> None:
308
305
  if should_validate_dependencies(args):
309
306
  validate_dependencies_list([
310
307
  SystemDependency.KUBECTL,
311
- SystemDependency.KJOB,
312
308
  SystemDependency.GCLOUD,
313
309
  ])
314
310
 
@@ -455,8 +451,6 @@ def cluster_create(args) -> None:
455
451
  if install_kueue_code != 0:
456
452
  xpk_exit(install_kueue_code)
457
453
 
458
- install_kjob(args)
459
-
460
454
  if system.accelerator_type == AcceleratorType.GPU:
461
455
  prepare_gpus(system)
462
456
 
@@ -1343,22 +1337,6 @@ def install_storage_csis(args):
1343
1337
  xpk_exit(update_cluster_command_code)
1344
1338
 
1345
1339
 
1346
- def install_kjob(args):
1347
- xpk_print('Verifying kjob installation')
1348
- err_code = verify_kjob_installed()
1349
- if err_code > 0:
1350
- xpk_exit(err_code)
1351
-
1352
- xpk_print('Applying kjob CDRs')
1353
- err_code = apply_kjob_crds()
1354
- if err_code > 0:
1355
- xpk_exit(err_code)
1356
-
1357
- err_code = prepare_kjob(args)
1358
- if err_code > 0:
1359
- xpk_exit(err_code)
1360
-
1361
-
1362
1340
  def _install_kueue(
1363
1341
  args,
1364
1342
  system: SystemCharacteristics,
@@ -38,7 +38,6 @@ from ..core.commands import run_command_for_value
38
38
  from ..core.docker_manager import DockerManager
39
39
  from ..core.gcloud_context import zone_to_region
40
40
  from ..core.gcluster_manager import GclusterManager
41
- from ..core.kjob import apply_kjob_crds, prepare_kjob
42
41
  from ..core.remote_state.fuse_remote_state import FuseStateClient
43
42
  from ..core.remote_state.remote_state_client import RemoteStateClient
44
43
  from ..utils.console import xpk_exit, xpk_print
@@ -112,18 +111,7 @@ def cluster_create(
112
111
  get_cluster_credentials(args)
113
112
 
114
113
  err_code = __install_kueue(args)
115
- if err_code > 0:
116
- xpk_exit(err_code)
117
-
118
- err_code = apply_kjob_crds()
119
- if err_code > 0:
120
- xpk_exit(err_code)
121
-
122
- err_code = prepare_kjob(args)
123
- if err_code > 0:
124
- xpk_exit(err_code)
125
-
126
- xpk_exit(0)
114
+ xpk_exit(err_code)
127
115
 
128
116
 
129
117
  def __install_kueue(args) -> int:
@@ -46,8 +46,6 @@ def mock_cluster_create_deps(request):
46
46
  """Mocks dependencies for cluster_create."""
47
47
  with (
48
48
  patch("xpk.commands.cluster_gcluster.xpk_exit") as mock_exit,
49
- patch("xpk.commands.cluster_gcluster.prepare_kjob") as mock_prep_kjob,
50
- patch("xpk.commands.cluster_gcluster.apply_kjob_crds") as mock_apply_kjob,
51
49
  patch(
52
50
  "xpk.commands.cluster_gcluster.get_cluster_credentials"
53
51
  ) as mock_get_creds,
@@ -68,8 +66,6 @@ def mock_cluster_create_deps(request):
68
66
  ):
69
67
  yield {
70
68
  "xpk_exit": mock_exit,
71
- "prepare_kjob": mock_prep_kjob,
72
- "apply_kjob_crds": mock_apply_kjob,
73
69
  "get_cluster_credentials": mock_get_creds,
74
70
  "generate_blueprint": mock_gen_bp,
75
71
  "prepare_gcluster_manager": mock_prep_gcm,
@@ -85,9 +81,6 @@ def test_install_kueue_standard(
85
81
  mock_get_total_chips, mock_args, mock_cluster_create_deps
86
82
  ):
87
83
  """Tests __install_kueue for a standard installation."""
88
- mock_cluster_create_deps["prepare_kjob"].return_value = 0
89
- mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
90
-
91
84
  mock_system = SystemCharacteristics(
92
85
  topology="N/A",
93
86
  vms_per_slice=1,
@@ -138,9 +131,6 @@ def test_install_kueue_with_autoprovisioning(
138
131
  mock_enable_autoprovisioning, mock_args, mock_cluster_create_deps
139
132
  ):
140
133
  """Tests __install_kueue with autoprovisioning enabled."""
141
- mock_cluster_create_deps["prepare_kjob"].return_value = 0
142
- mock_cluster_create_deps["apply_kjob_crds"].return_value = 0
143
-
144
134
  mock_args.enable_autoprovisioning = True
145
135
  mock_system = SystemCharacteristics(
146
136
  topology="N/A",
@@ -56,7 +56,6 @@ class _ClusterCreateMocks:
56
56
  create_cluster_configmaps: MagicMock
57
57
  set_jobset_on_cluster: MagicMock
58
58
  get_cluster_location: MagicMock
59
- install_kjob: MagicMock
60
59
  xpk_exit: MagicMock
61
60
  update_jobset_resources_if_necessary: MagicMock
62
61
  _install_kueue: MagicMock
@@ -204,9 +203,6 @@ def cluster_create_mocks(mocker) -> _ClusterCreateMocks:
204
203
  'xpk.commands.cluster.get_cluster_location',
205
204
  return_value='us-central1',
206
205
  ),
207
- install_kjob=mocker.patch(
208
- 'xpk.commands.cluster.install_kjob', return_value=0
209
- ),
210
206
  xpk_exit=mocker.patch('xpk.commands.cluster.xpk_exit'),
211
207
  update_jobset_resources_if_necessary=mocker.patch(
212
208
  'xpk.commands.cluster.update_jobset_resources_if_necessary',
xpk/commands/kind.py CHANGED
@@ -20,11 +20,6 @@ from ..core.commands import (
20
20
  run_command_with_updates,
21
21
  )
22
22
  from ..core.cluster import set_jobset_on_cluster, setup_k8s_env
23
- from ..core.kjob import (
24
- verify_kjob_installed,
25
- prepare_kjob,
26
- apply_kjob_crds,
27
- )
28
23
  from ..core.scheduling import get_total_chips_requested_from_args
29
24
  from ..core.storage import install_storage_crd
30
25
  from ..core.system_characteristics import (
@@ -48,7 +43,6 @@ def cluster_create(args) -> None:
48
43
  if should_validate_dependencies(args):
49
44
  validate_dependencies_list([
50
45
  SystemDependency.KUBECTL,
51
- SystemDependency.KJOB,
52
46
  SystemDependency.GCLOUD,
53
47
  ])
54
48
  xpk_print(f'Starting cluster create for cluster {args.cluster}:', flush=True)
@@ -69,21 +63,6 @@ def cluster_create(args) -> None:
69
63
  if set_jobset_on_cluster_code != 0:
70
64
  xpk_exit(set_jobset_on_cluster_code)
71
65
 
72
- xpk_print('Verifying kjob installation')
73
- err_code = verify_kjob_installed()
74
- if err_code > 0:
75
- xpk_exit(err_code)
76
-
77
- xpk_print('Applying kjob CDRs')
78
- err_code = apply_kjob_crds()
79
- if err_code > 0:
80
- xpk_exit(err_code)
81
-
82
- args.kind_cluster = True
83
- err_code = prepare_kjob(args)
84
- if err_code > 0:
85
- xpk_exit(err_code)
86
-
87
66
  k8s_client = setup_k8s_env(args)
88
67
  install_storage_crd(k8s_client)
89
68
 
xpk/core/config.py CHANGED
@@ -53,14 +53,6 @@ PROJECT_KEY = 'project-id'
53
53
  CLIENT_ID_KEY = 'client-id'
54
54
  SEND_TELEMETRY_KEY = 'send-telemetry'
55
55
  ZONE_KEY = 'zone'
56
- KJOB_BATCH_IMAGE = 'batch-image'
57
- KJOB_BATCH_WORKING_DIRECTORY = 'batch-working-directory'
58
- KJOB_SHELL_IMAGE = 'shell-image'
59
- KJOB_SHELL_INTERACTIVE_COMMAND = 'shell-interactive-command'
60
- KJOB_SHELL_WORKING_DIRECTORY = 'shell-working-directory'
61
- CONFIGS_KEY = 'configs'
62
- GKE_ENDPOINT_KEY = 'gke-endpoint'
63
- DEPENDENCIES_KEY = 'deps-verified-version'
64
56
 
65
57
  DEFAULT_KEYS = [
66
58
  CFG_BUCKET_KEY,
@@ -69,13 +61,6 @@ DEFAULT_KEYS = [
69
61
  CLIENT_ID_KEY,
70
62
  SEND_TELEMETRY_KEY,
71
63
  ZONE_KEY,
72
- GKE_ENDPOINT_KEY,
73
- DEPENDENCIES_KEY,
74
- KJOB_BATCH_IMAGE,
75
- KJOB_BATCH_WORKING_DIRECTORY,
76
- KJOB_SHELL_IMAGE,
77
- KJOB_SHELL_INTERACTIVE_COMMAND,
78
- KJOB_SHELL_WORKING_DIRECTORY,
79
64
  ]
80
65
  VERTEX_TENSORBOARD_FEATURE_FLAG = XPK_CURRENT_VERSION >= '0.4.0'
81
66
 
@@ -80,15 +80,6 @@ class GpuConfig:
80
80
 
81
81
  requires_topology: bool
82
82
  gpu_direct_name: Literal['fastrak', 'rdma', 'tcpx', 'tcpxo'] = 'fastrak'
83
- kjob_decorator_fn: Optional[Callable[[dict], dict]] = None
84
- """A function to decorate the kjob template for GPU-specific configurations.
85
-
86
- Args:
87
- job_manifest (dict): The kjob manifest as a dictionary.
88
-
89
- Returns:
90
- dict: The modified kjob manifest as a dictionary.
91
- """
92
83
  nccl_installer: Optional[str] = None
93
84
  jobset_decorator_fn: Optional[Callable[[str, list[str]], str]] = None
94
85
  """A function to decorate the jobset for GPU-specific configurations.
@@ -106,7 +97,7 @@ class GpuConfig:
106
97
  parts = []
107
98
  for f in dataclasses.fields(self):
108
99
  value = getattr(self, f.name)
109
- if f.name in ('kjob_decorator_fn', 'jobset_decorator_fn') and value:
100
+ if f.name in ('jobset_decorator_fn') and value:
110
101
  parts.append(f'{f.name}=<function {value.__name__}>')
111
102
  else:
112
103
  parts.append(f'{f.name}={repr(value)}')
@@ -420,7 +411,6 @@ UserFacingNameToSystemCharacteristics = {
420
411
  gpu_config=GpuConfig(
421
412
  requires_topology=True,
422
413
  nccl_installer=INSTALLER_NCCL_RDMA_A4X,
423
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
424
414
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
425
415
  gpu_direct_name='rdma',
426
416
  ),
@@ -439,7 +429,6 @@ UserFacingNameToSystemCharacteristics = {
439
429
  gpu_config=GpuConfig(
440
430
  requires_topology=True,
441
431
  nccl_installer=INSTALLER_NCCL_RDMA_A4X,
442
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
443
432
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
444
433
  gpu_direct_name='rdma',
445
434
  ),
@@ -458,7 +447,6 @@ UserFacingNameToSystemCharacteristics = {
458
447
  gpu_config=GpuConfig(
459
448
  requires_topology=True,
460
449
  nccl_installer=INSTALLER_NCCL_RDMA,
461
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
462
450
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
463
451
  gpu_direct_name='rdma',
464
452
  ),
@@ -477,7 +465,6 @@ UserFacingNameToSystemCharacteristics = {
477
465
  gpu_config=GpuConfig(
478
466
  requires_topology=True,
479
467
  nccl_installer=INSTALLER_NCCL_RDMA,
480
- kjob_decorator_fn=rdma_decorator.decorate_kjob_template,
481
468
  jobset_decorator_fn=rdma_decorator.decorate_jobset,
482
469
  gpu_direct_name='rdma',
483
470
  ),
@@ -497,7 +484,6 @@ UserFacingNameToSystemCharacteristics = {
497
484
  gpu_config=GpuConfig(
498
485
  requires_topology=True,
499
486
  nccl_installer=INSTALLER_NCCL_TCPX,
500
- kjob_decorator_fn=tcpx_decorator.decorate_kjob_template,
501
487
  jobset_decorator_fn=tcpx_decorator.decorate_jobset,
502
488
  gpu_direct_name='tcpx',
503
489
  ),
@@ -517,7 +503,6 @@ UserFacingNameToSystemCharacteristics = {
517
503
  gpu_config=GpuConfig(
518
504
  requires_topology=True,
519
505
  nccl_installer=INSTALLER_NCCL_TCPXO,
520
- kjob_decorator_fn=tcpxo_decorator.decorate_kjob_template,
521
506
  jobset_decorator_fn=tcpxo_decorator.decorate_jobset,
522
507
  gpu_direct_name='tcpxo',
523
508
  ),
@@ -18,21 +18,6 @@ import yaml
18
18
  from ...utils.yaml import literal_string
19
19
 
20
20
 
21
- def decorate_kjob_template(job_manifest: dict) -> dict:
22
- spec = (
23
- job_manifest.setdefault('spec', {})
24
- .setdefault('template', {})
25
- .setdefault('spec', {})
26
- )
27
- spec.setdefault('tolerations', [])
28
- spec.setdefault('volumes', [])
29
-
30
- add_volumes(job_manifest)
31
- add_tolerations(job_manifest)
32
- update_gpu_containers(job_manifest)
33
- return job_manifest
34
-
35
-
36
21
  def decorate_jobset(jobset_manifest_str: str, sub_networks: list[str]) -> str:
37
22
  """
38
23
  Decorates a JobSet manifest with the necessary components for rdma-daemon.
@@ -22,14 +22,6 @@ from ...utils.yaml import literal_string
22
22
  tcpx = 'v2.0.11'
23
23
 
24
24
 
25
- def decorate_kjob_template(job_manifest: dict) -> dict:
26
- add_volumes(job_manifest)
27
- add_tolerations(job_manifest)
28
- add_tcpx_daemon_container(job_manifest)
29
- update_gpu_containers(job_manifest)
30
- return job_manifest
31
-
32
-
33
25
  def decorate_job(job_manifest: dict) -> dict:
34
26
  add_annotations(job_manifest)
35
27
  add_volumes(job_manifest)
@@ -47,24 +47,6 @@ spec:
47
47
  image: my-sidecar-image
48
48
  """
49
49
 
50
- # Minimal kjob template for testing
51
- BASE_KJOB_TEMPLATE = {
52
- "spec": {
53
- "template": {
54
- "spec": {
55
- "containers": [
56
- {
57
- "name": "main-gpu-container",
58
- "image": "my-gpu-image",
59
- "resources": {"limits": {"nvidia.com/gpu": 8}},
60
- },
61
- {"name": "sidecar-container", "image": "my-sidecar-image"},
62
- ]
63
- }
64
- }
65
- }
66
- }
67
-
68
50
  # Minimal job manifest for testing
69
51
  BASE_JOB_MANIFEST = {
70
52
  "spec": {
@@ -205,63 +187,3 @@ def test_decorate_job():
205
187
  assert "devices.gke.io/container.tcpx-daemon" in annotations
206
188
  assert "networking.gke.io/default-interface" in annotations
207
189
  assert "networking.gke.io/interfaces" in annotations
208
-
209
-
210
- def test_decorate_kjob_template():
211
- """Tests decorate_kjob_template."""
212
- kjob_template = copy.deepcopy(BASE_KJOB_TEMPLATE)
213
-
214
- decorated_manifest = tcpx_decorator.decorate_kjob_template(kjob_template)
215
-
216
- pod_template_spec = decorated_manifest["spec"]["template"]["spec"]
217
-
218
- # Check annotations are NOT added
219
- assert "annotations" not in decorated_manifest["spec"]["template"].get(
220
- "metadata", {}
221
- )
222
-
223
- # Check tolerations
224
- tolerations = pod_template_spec["tolerations"]
225
- assert {
226
- "key": "user-workload",
227
- "operator": "Equal",
228
- "value": "true",
229
- "effect": "NoSchedule",
230
- } in tolerations
231
-
232
- # Check volumes
233
- volumes = pod_template_spec["volumes"]
234
- volume_names = {v["name"] for v in volumes}
235
- assert "libraries" in volume_names
236
- assert "sys" in volume_names
237
- assert "proc-sys" in volume_names
238
- assert "tcpx-socket" in volume_names
239
- assert "dshm" in volume_names
240
-
241
- # Check init container
242
- init_containers = pod_template_spec["initContainers"]
243
- assert len(init_containers) == 1
244
- tcpx_daemon = init_containers[0]
245
- assert tcpx_daemon["name"] == "tcpx-daemon"
246
- assert tcpx_daemon["image"].endswith(f":{tcpx_decorator.tcpx}")
247
-
248
- # Check GPU container update
249
- gpu_container = pod_template_spec["containers"][0]
250
- assert gpu_container["name"] == "main-gpu-container"
251
-
252
- # Check env
253
- env_vars = {e["name"]: e["value"] for e in gpu_container["env"]}
254
- assert env_vars["LD_LIBRARY_PATH"] == "/usr/local/nvidia/lib64"
255
-
256
- # Check volume mounts
257
- volume_mounts = {
258
- vm["name"]: vm["mountPath"] for vm in gpu_container["volumeMounts"]
259
- }
260
- assert volume_mounts["tcpx-socket"] == "/tmp"
261
- assert volume_mounts["libraries"] == "/usr/local/nvidia/lib64"
262
- assert volume_mounts["dshm"] == "/dev/shm"
263
-
264
- # Check non-GPU container is not updated
265
- sidecar_container = pod_template_spec["containers"][1]
266
- assert "env" not in sidecar_container
267
- assert "volumeMounts" not in sidecar_container
@@ -22,22 +22,6 @@ from ...utils.yaml import literal_string
22
22
  rxdm = 'v1.0.12'
23
23
 
24
24
 
25
- def decorate_kjob_template(job_manifest: dict) -> dict:
26
- spec = (
27
- job_manifest.setdefault('spec', {})
28
- .setdefault('template', {})
29
- .setdefault('spec', {})
30
- )
31
- spec.setdefault('tolerations', [])
32
- spec.setdefault('volumes', [])
33
-
34
- add_volumes(job_manifest)
35
- add_tolerations(job_manifest)
36
- add_tcpxo_daemon_container(job_manifest)
37
- update_gpu_containers(job_manifest)
38
- return job_manifest
39
-
40
-
41
25
  def decorate_job(job_manifest: dict, sub_networks: list[str]) -> dict:
42
26
  job_manifest.setdefault('spec', {}).setdefault('template', {}).setdefault(
43
27
  'metadata', {}
xpk/parser/core.py CHANGED
@@ -23,13 +23,9 @@ from .cluster import set_cluster_parser
23
23
  from .inspector import set_inspector_parser
24
24
  from .storage import set_storage_parser
25
25
  from .workload import set_workload_parsers
26
- from .batch import set_batch_parser
27
- from .job import set_job_parser
28
26
  from .info import set_info_parser
29
27
  from .kind import set_kind_parser
30
- from .shell import set_shell_parser
31
28
  from .version import set_version_parser
32
- from .run import set_run_parser
33
29
 
34
30
 
35
31
  def set_parser(parser: argparse.ArgumentParser):
@@ -54,20 +50,10 @@ def set_parser(parser: argparse.ArgumentParser):
54
50
  "info",
55
51
  help="Commands around listing kueue clusterqueues and localqueues.",
56
52
  )
57
- batch_parser = xpk_subcommands.add_parser(
58
- "batch",
59
- help="commands around running batch job",
60
- )
61
- job_parser = xpk_subcommands.add_parser(
62
- "job", help="commands around listing, cancelling and investigating jobs"
63
- )
64
53
  kind_parser = xpk_subcommands.add_parser(
65
54
  "kind",
66
55
  help="commands around Kind cluster management",
67
56
  )
68
- shell_parser = xpk_subcommands.add_parser(
69
- "shell", help="Commands around configuring and using interactive shell."
70
- )
71
57
  version_parser = xpk_subcommands.add_parser(
72
58
  "version", help="Command to get xpk version"
73
59
  )
@@ -76,11 +62,6 @@ def set_parser(parser: argparse.ArgumentParser):
76
62
  "config", help="Commands to set and retrieve values from xpk config."
77
63
  )
78
64
 
79
- run_parser = xpk_subcommands.add_parser(
80
- "run",
81
- help="Command to run parallel jobs",
82
- )
83
-
84
65
  def default_subcommand_function(
85
66
  _args,
86
67
  ) -> int: # args is unused, so pylint: disable=invalid-name
@@ -96,14 +77,10 @@ def set_parser(parser: argparse.ArgumentParser):
96
77
  parser.print_help()
97
78
  cluster_parser.print_help()
98
79
  workload_parser.print_help()
99
- batch_parser.print_help()
100
80
  info_parser.print_help()
101
- job_parser.print_help()
102
- shell_parser.print_help()
103
81
  version_parser.print_help()
104
82
  kind_parser.print_help()
105
83
  config_parser.print_help()
106
- run_parser.print_help()
107
84
 
108
85
  storage_parser.print_help()
109
86
  return 0
@@ -111,25 +88,17 @@ def set_parser(parser: argparse.ArgumentParser):
111
88
  parser.set_defaults(func=default_subcommand_function)
112
89
  workload_parser.set_defaults(func=default_subcommand_function)
113
90
  cluster_parser.set_defaults(func=default_subcommand_function)
114
- batch_parser.set_defaults(func=default_subcommand_function)
115
91
  info_parser.set_defaults(func=default_subcommand_function)
116
- job_parser.set_defaults(func=default_subcommand_function)
117
92
  kind_parser.set_defaults(func=default_subcommand_function)
118
- shell_parser.set_defaults(func=default_subcommand_function)
119
93
  storage_parser.set_defaults(func=default_subcommand_function)
120
94
  version_parser.set_defaults(func=default_subcommand_function)
121
95
  config_parser.set_defaults(func=default_subcommand_function)
122
- run_parser.set_defaults(func=default_subcommand_function)
123
96
 
124
97
  set_workload_parsers(workload_parser=workload_parser)
125
98
  set_cluster_parser(cluster_parser=cluster_parser)
126
99
  set_inspector_parser(inspector_parser=inspector_parser)
127
- set_batch_parser(batch_parser=batch_parser)
128
100
  set_info_parser(info_parser=info_parser)
129
- set_job_parser(job_parser=job_parser)
130
101
  set_kind_parser(kind_parser=kind_parser)
131
- set_shell_parser(shell_parser=shell_parser)
132
102
  set_storage_parser(storage_parser=storage_parser)
133
103
  set_version_parser(version_parser=version_parser)
134
104
  set_config_parsers(config_parser=config_parser)
135
- set_run_parser(run_parser=run_parser)
xpk/utils/validation.py CHANGED
@@ -37,14 +37,6 @@ class SystemDependency(Enum):
37
37
  ' to install xpk prerequisites.'
38
38
  ),
39
39
  )
40
- KJOB = _SystemDependency(
41
- command='kubectl kjob --help',
42
- message=(
43
- '`kjobctl` not installed. Please follow'
44
- ' https://github.com/AI-Hypercomputer/xpk?tab=readme-ov-file#prerequisites'
45
- ' to install xpk prerequisites.'
46
- ),
47
- )
48
40
  GCLOUD = _SystemDependency(
49
41
  command='gcloud version',
50
42
  message=(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: xpk
3
- Version: 0.17.2
3
+ Version: 0.17.3
4
4
  Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
5
5
  Author-email: XPK team <xpk-code-reviewers@google.com>
6
6
  License: Apache-2.0
@@ -20,22 +20,17 @@ xpk/blueprints/a4/config-map.yaml.tftpl,sha256=o6LeGIYUfFGyj3vj-8ztV5ildQ46QZVl7
20
20
  xpk/blueprints/a4/nccl-rdma-installer-a4.yaml,sha256=if3WOmNLVGTJIJHU76EWC1FyiIXDTRIXcwo4OsBxarQ,2113
21
21
  xpk/blueprints/a4/storage_crd.yaml,sha256=r4WFXnSJJ25EUF-t4Ljfbl-cJoSaiFiZkP8451eTub4,1260
22
22
  xpk/commands/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
23
- xpk/commands/batch.py,sha256=Cj1bDpzPMoPdhaKKrOJJLJ3JzRvJrCMn8huQoHHIZJI,4192
24
- xpk/commands/cluster.py,sha256=DtMiIYdYsciXldoWqAfxPIxl9Hc9kbYIj2LsdBER0PI,46172
25
- xpk/commands/cluster_gcluster.py,sha256=x26UqoT8RFX5T9ftQXPEL12HMnMFTi8lret16dnZCms,13970
26
- xpk/commands/cluster_gcluster_test.py,sha256=UcqTTkrQv-R753AtsQvinwgI2vqI6lMHPPEfHPS5e-4,6655
27
- xpk/commands/cluster_test.py,sha256=-7EjuOoGSZhdnLBNBNCMKM6laDYy02aPncbSfUYcrUs,24147
23
+ xpk/commands/cluster.py,sha256=5ebvHXe8Bi4haMd1RokJbjP0LemqXxC1EseqOLWIkGw,45659
24
+ xpk/commands/cluster_gcluster.py,sha256=Ig8jLjsiyFgw9U4BBEzDK2diA9m0STKQgz-uUTG_vYE,13731
25
+ xpk/commands/cluster_gcluster_test.py,sha256=s1wwkcdY4LTxmk_Tx5PKdh9pZmEoo1n8XlzybWalc0M,6165
26
+ xpk/commands/cluster_test.py,sha256=aMkwKrhoEuqElME16ztx5lwv4zT0z_xV0L3in1RaW6M,24017
28
27
  xpk/commands/common.py,sha256=p43sspD5RfYRj3Se_b-X0s0dbBs1PMI1qtySg6zZKKg,2706
29
28
  xpk/commands/config.py,sha256=L_zRpQTxMcSh6rxOT8gG263V6YGqzVoz4UxdWywTFdA,850
30
29
  xpk/commands/info.py,sha256=uhv5mPfgg9N-5JhQw4dT2jujL9ZC5kzGA18h9NFfm5A,7429
31
30
  xpk/commands/inspector.py,sha256=FPasKtGuEZKNXIQin4AG49clfD4b53NxXpWqBPZIIoE,12955
32
- xpk/commands/job.py,sha256=rPIfWvgm5mLz7K7YDLK721ZcUcg5OEmYVAPAtRtB5Ag,6718
33
- xpk/commands/kind.py,sha256=GNqsaoLInifFQ_ZGpbN_3xA8ExyeyOqBMdnoPV-PqYI,7813
34
- xpk/commands/kjob_common.py,sha256=bRaORiGVjPAdN0T3aRmbcQgXYe-EtjoVKePdWzQ5xU4,1928
31
+ xpk/commands/kind.py,sha256=ck4zaJh9kaNluEdBew9OVj4cZXjUYk_ElycqGLo6f7g,7355
35
32
  xpk/commands/managed_ml_diagnostics.py,sha256=87wmFbnYQY-kEpJfPo1Up53xM5P_P5wOlXczxHzxJjQ,6984
36
33
  xpk/commands/managed_ml_diagnostics_test.py,sha256=pQ1YUGMGRQFJYTS_1o9YyGUzYdLaBdA84LjbnncaeEo,3828
37
- xpk/commands/run.py,sha256=D0zgmnGeBLATphYhzQj29EScxrMmAKqPRhP6nfWuYcY,4085
38
- xpk/commands/shell.py,sha256=mRHMwm3Izzsue4bocekm82Rg_cPUaGMClSlvNzNXQ-o,4467
39
34
  xpk/commands/storage.py,sha256=cSTJN9Mjvdsvk_Nk43kVdQFhp89nxWbanDsTOGZCkpQ,10708
40
35
  xpk/commands/version.py,sha256=k30rdLP9clUM8eeSwRFhpfzSb1qwcQImTfuC59Ed6CA,771
41
36
  xpk/commands/workload.py,sha256=l99NRFLs7pXuaLdn5d-Pid-cZulKpB3FNus-HdNDtZw,31513
@@ -47,7 +42,7 @@ xpk/core/cluster.py,sha256=3nl77I_MgQpBZsZSzsiQ_7IyFRzfLrYNRUL1gsSNhKU,24036
47
42
  xpk/core/cluster_private.py,sha256=RLi0C7bV0NEUXl6QKQzvUT0weN9EdqPvjuuOQsNO0DY,6868
48
43
  xpk/core/cluster_test.py,sha256=J4Wk7E--ik_IsWWzL_iWGWbx99Ih03m-0bs-uU7gGDg,5853
49
44
  xpk/core/commands.py,sha256=at73VJHdZ4rVA8uvW997tNrvnCjP9v6zaw96bU0kd74,10841
50
- xpk/core/config.py,sha256=L3iPFvzFCpW8IEAvlbkuEHYBYXmRTC0BAaR7I_5_Peo,5146
45
+ xpk/core/config.py,sha256=7U8jI5oZcgV_UnOHSS3huUIlDmPNREM-ml0N1Y9IvGM,4612
51
46
  xpk/core/config_test.py,sha256=POSuofK0LFbNNygDAo2fjtKY4NMrRjUFeGcpBh9JOS4,3569
52
47
  xpk/core/docker_container.py,sha256=8hqWWNKtjf6dqCFRpfndTMGvN_NS6zhfBr7YuKfh7qo,7626
53
48
  xpk/core/docker_image.py,sha256=9vwqbb6Mc3C5ZEOph03WS-EWI5hxMYGGigqzIMkDTjE,6909
@@ -74,7 +69,7 @@ xpk/core/resources.py,sha256=dDsG_LOtcU17p1UKgOYyjdPxbMfqcb7pJ4SjfLDA6Os,9389
74
69
  xpk/core/scheduling.py,sha256=RMoei_HUs03rfrEC-HYk7ONzg9BRKwr59-KljCR2TMo,11560
75
70
  xpk/core/scheduling_test.py,sha256=iYnzXv_MjN743pa4zYAgRqb-6dB9nVPpLI7JP5S8M2I,14463
76
71
  xpk/core/storage.py,sha256=NILvVAcLNMLmp4wKx_TEKbMMF5X1oL-FrQV46PT0_ds,16902
77
- xpk/core/system_characteristics.py,sha256=Tam8wjUz77E6jAJib-r0GsTBmdjo9uaEkXmIdWuzGO8,32844
72
+ xpk/core/system_characteristics.py,sha256=ZQbTbjaeT3Q12kmobz14U878w3FWnXDCetiLZQlVAdY,32127
78
73
  xpk/core/system_characteristics_test.py,sha256=sREN8u8bC0ze_q9hY3v-ZxC7so-_Ox1mt_DkIbUgHJ4,7477
79
74
  xpk/core/telemetry.py,sha256=R7IONNl5heMoNcOurfT3I34XJrBEODKVY88ONiDGuqE,7512
80
75
  xpk/core/telemetry_test.py,sha256=ll-B1ut9X-por17fpQnNb6hKrfyoZanMWRPbvqWrXss,8261
@@ -99,25 +94,21 @@ xpk/core/testing/__init__.py,sha256=PkV8D9WOtlJHH5AIxsQaKeIBcmupT_Ol_bwJgN6G2I8,
99
94
  xpk/core/testing/commands_tester.py,sha256=mQOSFggESeTdzqG4srAPV9ezmoeT90r22K58yAty9sE,4445
100
95
  xpk/core/testing/commands_tester_test.py,sha256=NnLWh7TJ9rKtb-DtB-vwkxvCe5wNtvUJ0f6sOa87Ht4,4023
101
96
  xpk/core/workload_decorators/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
102
- xpk/core/workload_decorators/rdma_decorator.py,sha256=isbgPnjdu2AT_Da1nVUIRoGE_qZ7jMDOKCgZOLq5r2A,4006
97
+ xpk/core/workload_decorators/rdma_decorator.py,sha256=02HVA_jSyzlVtSQnQj7aPdK03h7v5YyioBqEen6pbj0,3636
103
98
  xpk/core/workload_decorators/storage_decorator.py,sha256=DDYQVO1OKTLhveDOA4V6b2RWr4n0fbwHdnoFFmW7iaQ,2000
104
- xpk/core/workload_decorators/tcpx_decorator.py,sha256=6yvofTv6_XmRfI-nESZjGYeLmGrza1rWxeJGET0TqXU,6182
105
- xpk/core/workload_decorators/tcpx_decorator_test.py,sha256=iTBS3X_-VwA2oveNDjscduLtll0VOJyFRCp4xmsjg7w,8515
106
- xpk/core/workload_decorators/tcpxo_decorator.py,sha256=_nLX7tbnxhnS-xv4Jijd1JOP76V4LpNCfW3Np404Cqw,6537
99
+ xpk/core/workload_decorators/tcpx_decorator.py,sha256=cLOntH2ekBcPeiPW0sU3TRozSCpcTxgxpzncrMbRj44,5962
100
+ xpk/core/workload_decorators/tcpx_decorator_test.py,sha256=BmTWsFoBeLb9xhQh3kpqSiarkYax4bj2wLeZ9GrQzag,6089
101
+ xpk/core/workload_decorators/tcpxo_decorator.py,sha256=5SgL-7aTHclN7rvCGvEOjZoUixBmyjfuhVIUBFmneug,6124
107
102
  xpk/parser/__init__.py,sha256=YPwWBbgLAu7L-YlTVGB2r8ZV4TzypURMRBcehSHHlLY,561
108
- xpk/parser/batch.py,sha256=mJU-Cp1yTLje59vD-B1IiBcUeD-ZmEsoeB4xhj9cflc,1406
109
103
  xpk/parser/cluster.py,sha256=U2T-Q4yS86PWeFLNfknYWDDzZfubCKqIhqasxKLmErI,31342
110
104
  xpk/parser/cluster_test.py,sha256=xzQEC3IeAMpwsbNbHLuaNKxR3iaZcm3z4m3i61G62d4,6581
111
105
  xpk/parser/common.py,sha256=sJYGjrn2YgFxelDCYB18s1R8Md8GpDcMQNoAezxDDIs,7257
112
106
  xpk/parser/common_test.py,sha256=_6Fm2pUF7h4K0G5qxGabXSYr4ng9ihOzlViE6oLQwQs,1557
113
107
  xpk/parser/config.py,sha256=-XnWx9aFsBW4Uzo_hpOMD2ZQ0bdZLvq1ksv83_5jqSM,1633
114
- xpk/parser/core.py,sha256=VRJerlS92ufoQbG1mZv7B04DAP4qGkBHa4pRXgcbAs0,4761
108
+ xpk/parser/core.py,sha256=P2Dx3AbTlDoWnCCrMhVdr3Fs5FEzYFlmSiugkun1GL0,3623
115
109
  xpk/parser/info.py,sha256=UJohxVVWdt9IgUXoPsrVae2DN1BjAVGWrSN2ajrB8RQ,1860
116
110
  xpk/parser/inspector.py,sha256=hAPAZ2k9iSJgC1mjnz3rMleInsAQ8PmkyyUKFyBmsgY,1997
117
- xpk/parser/job.py,sha256=5RdE70rucGfrsn65l7Ho6RmO06mag1S0AO-3saVuXyw,4328
118
111
  xpk/parser/kind.py,sha256=sgPCqNVrgmFLcOBEbhlaphwVXxMh_opP9ntCq4KPePE,2682
119
- xpk/parser/run.py,sha256=oi_ksSyJ8Ooffe2EgoV_ecpmXEmNGVotjpIQH-HjufE,1481
120
- xpk/parser/shell.py,sha256=VC8p-kz9XjJZW9DXZ-rnv41XnRDRpQRFywHpB5j7tfc,1970
121
112
  xpk/parser/storage.py,sha256=0V1d1htsjoa-SuxOX_vNxz2Lg4Nue9CBe_H0bNS2Hv0,10270
122
113
  xpk/parser/storage_test.py,sha256=i_F9cuQXHRvUy4RJwbfuuI8ZVpTpkkY96sZ1GZ4dLPw,1494
123
114
  xpk/parser/validators.py,sha256=-NBZelvfwZRzjz-YUCreD8EzMLHll8PZM-d-MVm2PG4,1192
@@ -156,13 +147,13 @@ xpk/utils/user_agent.py,sha256=1NMtixC1RIr_MwM5pJ0THQ0x1-fCQA92TFHjWAVZldw,1083
156
147
  xpk/utils/user_agent_test.py,sha256=lkv8LqzhlA1gXFVeBzoLwE1_iGnm8G9LzkkElMrIrx0,1774
157
148
  xpk/utils/user_input.py,sha256=kMdCcPWdkI31f1mJcMsNGda-xKyKxEerpSLpCqIWYPc,1503
158
149
  xpk/utils/user_input_test.py,sha256=xO34jkMoTAk5Cmw7yHTk-7YexzC2UZ6ajihV8lnlAyI,2666
159
- xpk/utils/validation.py,sha256=irL9579RbvwxiGn1t3zhhPo-0oHgdUPOSYsUuFqsDSM,3039
150
+ xpk/utils/validation.py,sha256=rE9LTkXJT7jIesodFb9pONL7ixhLqiQleyoaz7N39Dw,2765
160
151
  xpk/utils/validation_test.py,sha256=PEDSMUqZdt_Lx1FSR-LOTXKKtsJ47JH1fxugM0Gfz6Y,1168
161
152
  xpk/utils/versions.py,sha256=_Ep68W70a9605XjiaOOpBa9Is9jXlsoOiwL8v5Xt-WA,897
162
153
  xpk/utils/yaml.py,sha256=j8xuAJ9yAAwnQi6ozwZ-nMnDyDnc3xWkeBZMtSuP4RU,844
163
- xpk-0.17.2.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
164
- xpk-0.17.2.dist-info/METADATA,sha256=_G5EPL08DVbtGWPXVmHAg_HxH_-op5be3Fx1rWRJiwI,7930
165
- xpk-0.17.2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
166
- xpk-0.17.2.dist-info/entry_points.txt,sha256=mzEtiIesFkT1kmcTUVDA1o3uOhiniX6tIz2wmOlMu1M,38
167
- xpk-0.17.2.dist-info/top_level.txt,sha256=TQKZWgV7LSElvmunYT9V_627qOMoxq3qYzWAFzKudB8,16
168
- xpk-0.17.2.dist-info/RECORD,,
154
+ xpk-0.17.3.dist-info/licenses/LICENSE,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358
155
+ xpk-0.17.3.dist-info/METADATA,sha256=ONK-6JpzJboT0wF60svxA4SKJTSqeQ4KNfgSvZ_kkDY,7930
156
+ xpk-0.17.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
157
+ xpk-0.17.3.dist-info/entry_points.txt,sha256=mzEtiIesFkT1kmcTUVDA1o3uOhiniX6tIz2wmOlMu1M,38
158
+ xpk-0.17.3.dist-info/top_level.txt,sha256=TQKZWgV7LSElvmunYT9V_627qOMoxq3qYzWAFzKudB8,16
159
+ xpk-0.17.3.dist-info/RECORD,,