xpk 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/api/__init__.py +15 -0
- xpk/api/storage_crd.yaml +52 -0
- xpk/commands/batch.py +27 -5
- xpk/commands/cluster.py +104 -80
- xpk/commands/cluster_gcluster.py +94 -10
- xpk/commands/common.py +44 -0
- xpk/commands/config.py +29 -0
- xpk/commands/info.py +8 -10
- xpk/commands/inspector.py +5 -11
- xpk/commands/job.py +9 -7
- xpk/commands/kind.py +34 -4
- xpk/commands/kjob_common.py +44 -0
- xpk/commands/run.py +128 -0
- xpk/commands/shell.py +27 -7
- xpk/commands/storage.py +267 -0
- xpk/commands/version.py +6 -18
- xpk/commands/workload.py +381 -184
- xpk/core/blueprint/blueprint_definitions.py +1 -0
- xpk/core/blueprint/blueprint_generator.py +132 -76
- xpk/core/capacity.py +185 -0
- xpk/core/cluster.py +564 -0
- xpk/core/cluster_private.py +6 -3
- xpk/core/commands.py +18 -14
- xpk/core/config.py +179 -0
- xpk/core/docker_container.py +225 -0
- xpk/core/docker_image.py +210 -0
- xpk/core/docker_resources.py +350 -0
- xpk/core/filestore.py +251 -0
- xpk/core/gcloud_context.py +196 -0
- xpk/core/gcluster_manager.py +20 -2
- xpk/core/gcsfuse.py +50 -0
- xpk/core/kjob.py +257 -18
- xpk/core/kueue.py +12 -6
- xpk/core/monitoring.py +134 -0
- xpk/core/nap.py +32 -20
- xpk/core/network.py +377 -0
- xpk/core/nodepool.py +581 -0
- xpk/core/pathways.py +124 -45
- xpk/core/remote_state/__init__.py +15 -0
- xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk/core/remote_state/remote_state_client.py +38 -0
- xpk/core/resources.py +238 -0
- xpk/core/scheduling.py +253 -0
- xpk/core/storage.py +581 -0
- xpk/core/system_characteristics.py +38 -1
- xpk/core/vertex.py +105 -0
- xpk/core/workload.py +209 -1
- xpk/core/workload_decorators/rdma_decorator.py +25 -5
- xpk/core/workload_decorators/storage_decorator.py +52 -0
- xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
- xpk/main.py +3 -1
- xpk/parser/batch.py +10 -151
- xpk/parser/cluster.py +49 -8
- xpk/parser/common.py +189 -1
- xpk/parser/config.py +49 -0
- xpk/parser/core.py +27 -1
- xpk/parser/info.py +2 -1
- xpk/parser/inspector.py +3 -3
- xpk/parser/job.py +25 -4
- xpk/parser/kind.py +3 -2
- xpk/parser/run.py +47 -0
- xpk/parser/shell.py +10 -1
- xpk/parser/storage.py +316 -0
- xpk/parser/validators.py +3 -3
- xpk/parser/workload.py +118 -76
- xpk/templates/__init__.py +15 -0
- xpk/templates/storage.yaml +13 -0
- xpk/utils/gcs_utils.py +125 -0
- xpk/utils/kubectl.py +57 -0
- xpk/utils/objects.py +8 -5
- xpk/utils/templates.py +28 -0
- xpk/utils/validation.py +80 -0
- {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/METADATA +165 -14
- xpk-0.7.0.dist-info/RECORD +92 -0
- {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/WHEEL +1 -1
- xpk/core/core.py +0 -2824
- xpk-0.6.0.dist-info/RECORD +0 -57
- {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/LICENSE +0 -0
- {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/entry_points.txt +0 -0
- {xpk-0.6.0.dist-info → xpk-0.7.0.dist-info}/top_level.txt +0 -0
xpk/parser/batch.py
CHANGED
|
@@ -14,9 +14,12 @@ See the License for the specific language governing permissions and
|
|
|
14
14
|
limitations under the License.
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
|
-
import
|
|
18
|
-
|
|
19
|
-
|
|
17
|
+
from .common import (
|
|
18
|
+
add_shared_arguments,
|
|
19
|
+
add_slurm_arguments,
|
|
20
|
+
add_cluster_arguments,
|
|
21
|
+
add_kind_cluster_arguments,
|
|
22
|
+
)
|
|
20
23
|
from ..commands.batch import batch
|
|
21
24
|
|
|
22
25
|
|
|
@@ -32,153 +35,9 @@ def set_batch_parser(batch_parser):
|
|
|
32
35
|
batch_required_arguments.add_argument(
|
|
33
36
|
'script', help='script with batch task to run'
|
|
34
37
|
)
|
|
35
|
-
batch_optional_arguments.add_argument(
|
|
36
|
-
'--cluster',
|
|
37
|
-
type=str,
|
|
38
|
-
default=None,
|
|
39
|
-
help='Cluster to which command applies.',
|
|
40
|
-
)
|
|
41
|
-
batch_optional_arguments.add_argument(
|
|
42
|
-
'--kind-cluster',
|
|
43
|
-
type=bool,
|
|
44
|
-
action=argparse.BooleanOptionalAction,
|
|
45
|
-
default=False,
|
|
46
|
-
help='Apply command to a local test cluster.',
|
|
47
|
-
)
|
|
48
|
-
add_shared_arguments(batch_optional_arguments)
|
|
49
38
|
|
|
39
|
+
add_cluster_arguments(batch_optional_arguments)
|
|
40
|
+
add_kind_cluster_arguments(batch_optional_arguments)
|
|
41
|
+
add_shared_arguments(batch_optional_arguments)
|
|
42
|
+
add_slurm_arguments(batch_optional_arguments)
|
|
50
43
|
batch_parser.set_defaults(func=batch)
|
|
51
|
-
|
|
52
|
-
batch_optional_arguments.add_argument(
|
|
53
|
-
'--ignore-unknown-flags',
|
|
54
|
-
type=bool,
|
|
55
|
-
action=argparse.BooleanOptionalAction,
|
|
56
|
-
default=False,
|
|
57
|
-
help='Ignore all the unsupported flags in the bash script.',
|
|
58
|
-
)
|
|
59
|
-
batch_optional_arguments.add_argument(
|
|
60
|
-
'-a',
|
|
61
|
-
'--array',
|
|
62
|
-
type=str,
|
|
63
|
-
default=None,
|
|
64
|
-
help=(
|
|
65
|
-
'Submit a job array, multiple jobs to be executed with identical'
|
|
66
|
-
' parameters. The indexes specification identifies what array index'
|
|
67
|
-
' values should be used. For example, "--array=0-15" or'
|
|
68
|
-
' "--array=0,6,16-32". Multiple values may be specified using a comma'
|
|
69
|
-
' separated list and/or a range of values with a "-" separator. For'
|
|
70
|
-
' example "--array=0-15%%4" will limit the number of simultaneously'
|
|
71
|
-
' running tasks from this job array to 4. The minimum index value is'
|
|
72
|
-
' 0. The maximum index value is 2147483647.'
|
|
73
|
-
),
|
|
74
|
-
)
|
|
75
|
-
batch_optional_arguments.add_argument(
|
|
76
|
-
'-c',
|
|
77
|
-
'--cpus-per-task',
|
|
78
|
-
type=str,
|
|
79
|
-
default=None,
|
|
80
|
-
help='How much cpus a container inside a pod requires.',
|
|
81
|
-
)
|
|
82
|
-
batch_optional_arguments.add_argument(
|
|
83
|
-
'--gpus-per-task',
|
|
84
|
-
type=str,
|
|
85
|
-
default=None,
|
|
86
|
-
help='How much gpus a container inside a pod requires.',
|
|
87
|
-
)
|
|
88
|
-
batch_optional_arguments.add_argument(
|
|
89
|
-
'--mem',
|
|
90
|
-
type=str,
|
|
91
|
-
default=None,
|
|
92
|
-
help='How much memory a pod requires.',
|
|
93
|
-
)
|
|
94
|
-
batch_optional_arguments.add_argument(
|
|
95
|
-
'--mem-per-task',
|
|
96
|
-
type=str,
|
|
97
|
-
default=None,
|
|
98
|
-
help='How much memory a container requires.',
|
|
99
|
-
)
|
|
100
|
-
batch_optional_arguments.add_argument(
|
|
101
|
-
'--mem-per-cpu',
|
|
102
|
-
type=str,
|
|
103
|
-
default=None,
|
|
104
|
-
help=(
|
|
105
|
-
'How much memory a container requires, it multiplies the number '
|
|
106
|
-
'of requested cpus per task by mem-per-cpu.'
|
|
107
|
-
),
|
|
108
|
-
)
|
|
109
|
-
batch_optional_arguments.add_argument(
|
|
110
|
-
'--mem-per-gpu',
|
|
111
|
-
type=str,
|
|
112
|
-
default=None,
|
|
113
|
-
help=(
|
|
114
|
-
'How much memory a container requires, it multiplies the number '
|
|
115
|
-
'of requested gpus per task by mem-per-gpu.'
|
|
116
|
-
),
|
|
117
|
-
)
|
|
118
|
-
batch_optional_arguments.add_argument(
|
|
119
|
-
'-N',
|
|
120
|
-
'--nodes',
|
|
121
|
-
type=int,
|
|
122
|
-
default=None,
|
|
123
|
-
help='Number of pods to be used at a time.',
|
|
124
|
-
)
|
|
125
|
-
batch_optional_arguments.add_argument(
|
|
126
|
-
'-n',
|
|
127
|
-
'--ntasks',
|
|
128
|
-
type=int,
|
|
129
|
-
default=None,
|
|
130
|
-
help='Number of identical containers inside of a pod, usually 1.',
|
|
131
|
-
)
|
|
132
|
-
batch_optional_arguments.add_argument(
|
|
133
|
-
'-o',
|
|
134
|
-
'--output',
|
|
135
|
-
type=str,
|
|
136
|
-
default=None,
|
|
137
|
-
help=(
|
|
138
|
-
'Where to redirect the standard output stream of a task. If not'
|
|
139
|
-
' passed it proceeds to stdout, and is available via kubectl logs.'
|
|
140
|
-
),
|
|
141
|
-
)
|
|
142
|
-
batch_optional_arguments.add_argument(
|
|
143
|
-
'-e',
|
|
144
|
-
'--error',
|
|
145
|
-
type=str,
|
|
146
|
-
default=None,
|
|
147
|
-
help=(
|
|
148
|
-
'Where to redirect std error stream of a task. If not passed it'
|
|
149
|
-
' proceeds to stdout, and is available via kubectl logs.'
|
|
150
|
-
),
|
|
151
|
-
)
|
|
152
|
-
batch_optional_arguments.add_argument(
|
|
153
|
-
'--input',
|
|
154
|
-
type=str,
|
|
155
|
-
default=None,
|
|
156
|
-
help='What to pipe into the script.',
|
|
157
|
-
)
|
|
158
|
-
batch_optional_arguments.add_argument(
|
|
159
|
-
'-J',
|
|
160
|
-
'--job-name',
|
|
161
|
-
type=str,
|
|
162
|
-
default=None,
|
|
163
|
-
help='What is the job name.',
|
|
164
|
-
)
|
|
165
|
-
batch_optional_arguments.add_argument(
|
|
166
|
-
'-D',
|
|
167
|
-
'--chdir',
|
|
168
|
-
type=str,
|
|
169
|
-
default=None,
|
|
170
|
-
help='Change directory before executing the script.',
|
|
171
|
-
)
|
|
172
|
-
batch_optional_arguments.add_argument(
|
|
173
|
-
'-t',
|
|
174
|
-
'--time',
|
|
175
|
-
type=str,
|
|
176
|
-
default=None,
|
|
177
|
-
help=(
|
|
178
|
-
'Set a limit on the total run time of the job. '
|
|
179
|
-
'A time limit of zero requests that no time limit be imposed. '
|
|
180
|
-
'Acceptable time formats include "minutes", "minutes:seconds", '
|
|
181
|
-
'"hours:minutes:seconds", "days-hours", "days-hours:minutes" '
|
|
182
|
-
'and "days-hours:minutes:seconds".'
|
|
183
|
-
),
|
|
184
|
-
)
|
xpk/parser/cluster.py
CHANGED
|
@@ -23,8 +23,11 @@ from ..commands.cluster import (
|
|
|
23
23
|
cluster_describe,
|
|
24
24
|
cluster_list,
|
|
25
25
|
)
|
|
26
|
-
from ..core.
|
|
26
|
+
from ..core.vertex import DEFAULT_VERTEX_TENSORBOARD_NAME
|
|
27
27
|
from .common import add_shared_arguments
|
|
28
|
+
from .validators import name_type
|
|
29
|
+
from ..commands.config import xpk_cfg
|
|
30
|
+
from ..core.config import CFG_BUCKET_KEY
|
|
28
31
|
|
|
29
32
|
|
|
30
33
|
def set_cluster_parser(cluster_parser):
|
|
@@ -82,6 +85,13 @@ def set_cluster_parser(cluster_parser):
|
|
|
82
85
|
)
|
|
83
86
|
|
|
84
87
|
### Optional arguments specific to "cluster create"
|
|
88
|
+
cluster_create_optional_arguments.add_argument(
|
|
89
|
+
'--cluster-state-gcs-bucket',
|
|
90
|
+
type=str,
|
|
91
|
+
default=xpk_cfg.get(CFG_BUCKET_KEY),
|
|
92
|
+
help='The name of the bucket to store cluster state.',
|
|
93
|
+
required=False,
|
|
94
|
+
)
|
|
85
95
|
cluster_create_optional_arguments.add_argument(
|
|
86
96
|
'--num-nodes',
|
|
87
97
|
type=int,
|
|
@@ -93,11 +103,10 @@ def set_cluster_parser(cluster_parser):
|
|
|
93
103
|
'--enable-pathways',
|
|
94
104
|
action='store_true',
|
|
95
105
|
help=(
|
|
96
|
-
'
|
|
97
|
-
'
|
|
106
|
+
'Please use `xpk cluster create-pathways` instead to'
|
|
107
|
+
' enable cluster to accept Pathways workloads.'
|
|
98
108
|
),
|
|
99
109
|
)
|
|
100
|
-
|
|
101
110
|
### Autoprovisioning arguments specific to "cluster create"
|
|
102
111
|
cluster_create_autoprovisioning_arguments = (
|
|
103
112
|
cluster_create_parser.add_argument_group(
|
|
@@ -269,13 +278,20 @@ def set_cluster_parser(cluster_parser):
|
|
|
269
278
|
### Required arguments
|
|
270
279
|
cluster_delete_required_arguments.add_argument(
|
|
271
280
|
'--cluster',
|
|
272
|
-
type=
|
|
281
|
+
type=name_type,
|
|
273
282
|
default=None,
|
|
274
283
|
help='The name of the cluster to be deleted.',
|
|
275
284
|
required=True,
|
|
276
285
|
)
|
|
277
286
|
|
|
278
287
|
### Optional Arguments
|
|
288
|
+
cluster_delete_optional_arguments.add_argument(
|
|
289
|
+
'--cluster-state-gcs-bucket',
|
|
290
|
+
type=str,
|
|
291
|
+
default=xpk_cfg.get(CFG_BUCKET_KEY),
|
|
292
|
+
help='The name of the bucket to store cluster state.',
|
|
293
|
+
required=False,
|
|
294
|
+
)
|
|
279
295
|
add_shared_arguments(cluster_delete_optional_arguments)
|
|
280
296
|
cluster_delete_parser.set_defaults(func=cluster_delete)
|
|
281
297
|
cluster_delete_parser.add_argument(
|
|
@@ -326,7 +342,7 @@ def set_cluster_parser(cluster_parser):
|
|
|
326
342
|
### Required arguments
|
|
327
343
|
cluster_cacheimage_required_arguments.add_argument(
|
|
328
344
|
'--cluster',
|
|
329
|
-
type=
|
|
345
|
+
type=name_type,
|
|
330
346
|
default=None,
|
|
331
347
|
help='The name of the cluster to cache the image.',
|
|
332
348
|
required=True,
|
|
@@ -370,7 +386,7 @@ def set_cluster_parser(cluster_parser):
|
|
|
370
386
|
### Required arguments
|
|
371
387
|
cluster_describe_required_arguments.add_argument(
|
|
372
388
|
'--cluster',
|
|
373
|
-
type=
|
|
389
|
+
type=name_type,
|
|
374
390
|
default=None,
|
|
375
391
|
help='The name of the cluster to be describe.',
|
|
376
392
|
required=True,
|
|
@@ -402,7 +418,7 @@ def add_shared_cluster_create_required_arguments(args_parsers):
|
|
|
402
418
|
for custom_parser in args_parsers:
|
|
403
419
|
custom_parser.add_argument(
|
|
404
420
|
'--cluster',
|
|
405
|
-
type=
|
|
421
|
+
type=name_type,
|
|
406
422
|
default=None,
|
|
407
423
|
help=(
|
|
408
424
|
'The name of the cluster. Will be used as the prefix for internal'
|
|
@@ -548,6 +564,31 @@ def add_shared_cluster_create_optional_arguments(args_parsers):
|
|
|
548
564
|
' Example usage: --authorized-networks 1.2.3.0/24 1.2.4.5/32'
|
|
549
565
|
),
|
|
550
566
|
)
|
|
567
|
+
custom_parser.add_argument(
|
|
568
|
+
'--enable-workload-identity',
|
|
569
|
+
action='store_true',
|
|
570
|
+
help=(
|
|
571
|
+
'Enable Workload Identity Federation on the cluster and node-pools.'
|
|
572
|
+
),
|
|
573
|
+
)
|
|
574
|
+
custom_parser.add_argument(
|
|
575
|
+
'--enable-gcsfuse-csi-driver',
|
|
576
|
+
action='store_true',
|
|
577
|
+
help=(
|
|
578
|
+
'Enable GSCFuse driver on the cluster. This enables Workload'
|
|
579
|
+
' Identity Federation. When using A3 ultra/A3 mega Workload'
|
|
580
|
+
' Identity is enabled by default.'
|
|
581
|
+
),
|
|
582
|
+
)
|
|
583
|
+
|
|
584
|
+
custom_parser.add_argument(
|
|
585
|
+
'--enable-gcpfilestore-csi-driver',
|
|
586
|
+
action='store_true',
|
|
587
|
+
help=(
|
|
588
|
+
'Enable GCPFilestore driver on the cluster. This enables Workload'
|
|
589
|
+
' Identity Federation.'
|
|
590
|
+
),
|
|
591
|
+
)
|
|
551
592
|
|
|
552
593
|
|
|
553
594
|
def add_shared_cluster_create_tensorboard_arguments(args_parsers):
|
xpk/parser/common.py
CHANGED
|
@@ -17,7 +17,9 @@ limitations under the License.
|
|
|
17
17
|
import argparse
|
|
18
18
|
|
|
19
19
|
|
|
20
|
-
def add_shared_arguments(
|
|
20
|
+
def add_shared_arguments(
|
|
21
|
+
custom_parser: argparse.ArgumentParser, required=False
|
|
22
|
+
) -> None:
|
|
21
23
|
"""Add shared arguments to the parser.
|
|
22
24
|
|
|
23
25
|
Args:
|
|
@@ -28,6 +30,7 @@ def add_shared_arguments(custom_parser: argparse.ArgumentParser):
|
|
|
28
30
|
type=str,
|
|
29
31
|
default=None,
|
|
30
32
|
help='GCE project name, defaults to "gcloud config project."',
|
|
33
|
+
required=required,
|
|
31
34
|
)
|
|
32
35
|
custom_parser.add_argument(
|
|
33
36
|
'--zone',
|
|
@@ -38,6 +41,7 @@ def add_shared_arguments(custom_parser: argparse.ArgumentParser):
|
|
|
38
41
|
'compute/zone." Only one of --zone or --region is allowed in a '
|
|
39
42
|
'command.'
|
|
40
43
|
),
|
|
44
|
+
required=required,
|
|
41
45
|
)
|
|
42
46
|
custom_parser.add_argument(
|
|
43
47
|
'--dry-run',
|
|
@@ -49,6 +53,39 @@ def add_shared_arguments(custom_parser: argparse.ArgumentParser):
|
|
|
49
53
|
' but not run them. This is imperfect in cases where xpk might'
|
|
50
54
|
' branch based on the output of commands'
|
|
51
55
|
),
|
|
56
|
+
required=required,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def add_cluster_arguments(
|
|
61
|
+
custom_parser: argparse.ArgumentParser, required=False
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Add cluster argument to the parser.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
custom_parser: parser to add shared arguments to.
|
|
67
|
+
"""
|
|
68
|
+
custom_parser.add_argument(
|
|
69
|
+
'--cluster',
|
|
70
|
+
type=str,
|
|
71
|
+
default=None,
|
|
72
|
+
help='The name of the cluster.',
|
|
73
|
+
required=required,
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def add_kind_cluster_arguments(custom_parser: argparse.ArgumentParser) -> None:
|
|
78
|
+
"""Add kind cluster arguments to the parser.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
custom_parser: parser to add shared arguments to.
|
|
82
|
+
"""
|
|
83
|
+
custom_parser.add_argument(
|
|
84
|
+
'--kind-cluster',
|
|
85
|
+
type=bool,
|
|
86
|
+
action=argparse.BooleanOptionalAction,
|
|
87
|
+
default=False,
|
|
88
|
+
help='Apply command to a local test cluster.',
|
|
52
89
|
)
|
|
53
90
|
|
|
54
91
|
|
|
@@ -69,3 +106,154 @@ def add_global_arguments(custom_parser: argparse.ArgumentParser):
|
|
|
69
106
|
' branch based on the output of commands'
|
|
70
107
|
),
|
|
71
108
|
)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def add_slurm_arguments(custom_parser: argparse.ArgumentParser):
|
|
112
|
+
"""Add Slurm job arguments to the parser.
|
|
113
|
+
|
|
114
|
+
Args:
|
|
115
|
+
custom_parser: parser to add global arguments to.
|
|
116
|
+
"""
|
|
117
|
+
custom_parser.add_argument(
|
|
118
|
+
'--ignore-unknown-flags',
|
|
119
|
+
type=bool,
|
|
120
|
+
action=argparse.BooleanOptionalAction,
|
|
121
|
+
default=False,
|
|
122
|
+
help='Ignore all the unsupported flags in the bash script.',
|
|
123
|
+
)
|
|
124
|
+
custom_parser.add_argument(
|
|
125
|
+
'-a',
|
|
126
|
+
'--array',
|
|
127
|
+
type=str,
|
|
128
|
+
default=None,
|
|
129
|
+
help=(
|
|
130
|
+
'Submit a job array, multiple jobs to be executed with identical'
|
|
131
|
+
' parameters. The indexes specification identifies what array index'
|
|
132
|
+
' values should be used. For example, "--array=0-15" or'
|
|
133
|
+
' "--array=0,6,16-32". Multiple values may be specified using a comma'
|
|
134
|
+
' separated list and/or a range of values with a "-" separator. For'
|
|
135
|
+
' example "--array=0-15%%4" will limit the number of simultaneously'
|
|
136
|
+
' running tasks from this job array to 4. The minimum index value is'
|
|
137
|
+
' 0. The maximum index value is 2147483647.'
|
|
138
|
+
),
|
|
139
|
+
)
|
|
140
|
+
custom_parser.add_argument(
|
|
141
|
+
'-c',
|
|
142
|
+
'--cpus-per-task',
|
|
143
|
+
type=str,
|
|
144
|
+
default=None,
|
|
145
|
+
help='How much cpus a container inside a pod requires.',
|
|
146
|
+
)
|
|
147
|
+
custom_parser.add_argument(
|
|
148
|
+
'--gpus-per-task',
|
|
149
|
+
type=str,
|
|
150
|
+
default=None,
|
|
151
|
+
help='How much gpus a container inside a pod requires.',
|
|
152
|
+
)
|
|
153
|
+
custom_parser.add_argument(
|
|
154
|
+
'--mem',
|
|
155
|
+
type=str,
|
|
156
|
+
default=None,
|
|
157
|
+
help='How much memory a pod requires.',
|
|
158
|
+
)
|
|
159
|
+
custom_parser.add_argument(
|
|
160
|
+
'--mem-per-task',
|
|
161
|
+
type=str,
|
|
162
|
+
default=None,
|
|
163
|
+
help='How much memory a container requires.',
|
|
164
|
+
)
|
|
165
|
+
custom_parser.add_argument(
|
|
166
|
+
'--mem-per-cpu',
|
|
167
|
+
type=str,
|
|
168
|
+
default=None,
|
|
169
|
+
help=(
|
|
170
|
+
'How much memory a container requires, it multiplies the number '
|
|
171
|
+
'of requested cpus per task by mem-per-cpu.'
|
|
172
|
+
),
|
|
173
|
+
)
|
|
174
|
+
custom_parser.add_argument(
|
|
175
|
+
'--mem-per-gpu',
|
|
176
|
+
type=str,
|
|
177
|
+
default=None,
|
|
178
|
+
help=(
|
|
179
|
+
'How much memory a container requires, it multiplies the number '
|
|
180
|
+
'of requested gpus per task by mem-per-gpu.'
|
|
181
|
+
),
|
|
182
|
+
)
|
|
183
|
+
custom_parser.add_argument(
|
|
184
|
+
'-N',
|
|
185
|
+
'--nodes',
|
|
186
|
+
type=int,
|
|
187
|
+
default=None,
|
|
188
|
+
help='Number of pods to be used at a time.',
|
|
189
|
+
)
|
|
190
|
+
custom_parser.add_argument(
|
|
191
|
+
'-n',
|
|
192
|
+
'--ntasks',
|
|
193
|
+
type=int,
|
|
194
|
+
default=None,
|
|
195
|
+
help='Number of identical containers inside of a pod, usually 1.',
|
|
196
|
+
)
|
|
197
|
+
custom_parser.add_argument(
|
|
198
|
+
'-o',
|
|
199
|
+
'--output',
|
|
200
|
+
type=str,
|
|
201
|
+
default=None,
|
|
202
|
+
help=(
|
|
203
|
+
'Where to redirect the standard output stream of a task. If not'
|
|
204
|
+
' passed it proceeds to stdout, and is available via kubectl logs.'
|
|
205
|
+
),
|
|
206
|
+
)
|
|
207
|
+
custom_parser.add_argument(
|
|
208
|
+
'-e',
|
|
209
|
+
'--error',
|
|
210
|
+
type=str,
|
|
211
|
+
default=None,
|
|
212
|
+
help=(
|
|
213
|
+
'Where to redirect std error stream of a task. If not passed it'
|
|
214
|
+
' proceeds to stdout, and is available via kubectl logs.'
|
|
215
|
+
),
|
|
216
|
+
)
|
|
217
|
+
custom_parser.add_argument(
|
|
218
|
+
'--input',
|
|
219
|
+
type=str,
|
|
220
|
+
default=None,
|
|
221
|
+
help='What to pipe into the script.',
|
|
222
|
+
)
|
|
223
|
+
custom_parser.add_argument(
|
|
224
|
+
'-J',
|
|
225
|
+
'--job-name',
|
|
226
|
+
type=str,
|
|
227
|
+
default=None,
|
|
228
|
+
help='What is the job name.',
|
|
229
|
+
)
|
|
230
|
+
custom_parser.add_argument(
|
|
231
|
+
'-D',
|
|
232
|
+
'--chdir',
|
|
233
|
+
type=str,
|
|
234
|
+
default=None,
|
|
235
|
+
help='Change directory before executing the script.',
|
|
236
|
+
)
|
|
237
|
+
custom_parser.add_argument(
|
|
238
|
+
'-t',
|
|
239
|
+
'--time',
|
|
240
|
+
type=str,
|
|
241
|
+
default=None,
|
|
242
|
+
help=(
|
|
243
|
+
'Set a limit on the total run time of the job. '
|
|
244
|
+
'A time limit of zero requests that no time limit be imposed. '
|
|
245
|
+
'Acceptable time formats include "minutes", "minutes:seconds", '
|
|
246
|
+
'"hours:minutes:seconds", "days-hours", "days-hours:minutes" '
|
|
247
|
+
'and "days-hours:minutes:seconds".'
|
|
248
|
+
),
|
|
249
|
+
)
|
|
250
|
+
custom_parser.add_argument(
|
|
251
|
+
'--priority',
|
|
252
|
+
type=str,
|
|
253
|
+
default='medium',
|
|
254
|
+
choices=['very-low', 'low', 'medium', 'high', 'very-high'],
|
|
255
|
+
help=(
|
|
256
|
+
'A priority, one of `very-low`, `low`, `medium`, `high` or'
|
|
257
|
+
' `very-high`. Defaults to `medium`.'
|
|
258
|
+
),
|
|
259
|
+
)
|
xpk/parser/config.py
ADDED
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ..commands.config import get_config, set_config
|
|
18
|
+
from ..core.config import DEFAULT_KEYS
|
|
19
|
+
from .common import add_shared_arguments
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def set_config_parsers(config_parser):
|
|
23
|
+
add_shared_arguments(config_parser)
|
|
24
|
+
|
|
25
|
+
config_subcommands = config_parser.add_subparsers(
|
|
26
|
+
title='config subcommands', dest='xpk_config_subcommands'
|
|
27
|
+
)
|
|
28
|
+
config_set_parser = config_subcommands.add_parser(
|
|
29
|
+
'set', help='set config key'
|
|
30
|
+
)
|
|
31
|
+
config_get_parser = config_subcommands.add_parser(
|
|
32
|
+
'get', help='get config key'
|
|
33
|
+
)
|
|
34
|
+
config_set_parser.add_argument(
|
|
35
|
+
'set_config_args',
|
|
36
|
+
help=f"""Pair of (key, value) to be set in config. Allowed keys are: {DEFAULT_KEYS}.
|
|
37
|
+
Command usage: `xpk config set key value`""",
|
|
38
|
+
type=str,
|
|
39
|
+
nargs=2,
|
|
40
|
+
)
|
|
41
|
+
config_get_parser.add_argument(
|
|
42
|
+
'get_config_key',
|
|
43
|
+
help=f"""Get key value from config. Allowed keys are: {DEFAULT_KEYS} .
|
|
44
|
+
Command usage: `xpk config get key`""",
|
|
45
|
+
type=str,
|
|
46
|
+
nargs=1,
|
|
47
|
+
)
|
|
48
|
+
config_set_parser.set_defaults(func=set_config)
|
|
49
|
+
config_get_parser.set_defaults(func=get_config)
|
xpk/parser/core.py
CHANGED
|
@@ -16,9 +16,12 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
import argparse
|
|
18
18
|
|
|
19
|
+
from .config import set_config_parsers
|
|
20
|
+
|
|
19
21
|
from ..utils.console import xpk_print
|
|
20
22
|
from .cluster import set_cluster_parser
|
|
21
23
|
from .inspector import set_inspector_parser
|
|
24
|
+
from .storage import set_storage_parser
|
|
22
25
|
from .workload import set_workload_parsers
|
|
23
26
|
from .batch import set_batch_parser
|
|
24
27
|
from .job import set_job_parser
|
|
@@ -26,6 +29,7 @@ from .info import set_info_parser
|
|
|
26
29
|
from .kind import set_kind_parser
|
|
27
30
|
from .shell import set_shell_parser
|
|
28
31
|
from .version import set_version_parser
|
|
32
|
+
from .run import set_run_parser
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
def set_parser(parser: argparse.ArgumentParser):
|
|
@@ -33,7 +37,10 @@ def set_parser(parser: argparse.ArgumentParser):
|
|
|
33
37
|
title="xpk subcommands", dest="xpk_subcommands", help="Top level commands"
|
|
34
38
|
)
|
|
35
39
|
workload_parser = xpk_subcommands.add_parser(
|
|
36
|
-
"workload", help="Commands around workload management
|
|
40
|
+
"workload", help="Commands around workload management"
|
|
41
|
+
)
|
|
42
|
+
storage_parser = xpk_subcommands.add_parser(
|
|
43
|
+
"storage", help="Commands around storage management"
|
|
37
44
|
)
|
|
38
45
|
cluster_parser = xpk_subcommands.add_parser(
|
|
39
46
|
"cluster",
|
|
@@ -65,6 +72,15 @@ def set_parser(parser: argparse.ArgumentParser):
|
|
|
65
72
|
"version", help="Command to get xpk version"
|
|
66
73
|
)
|
|
67
74
|
|
|
75
|
+
config_parser = xpk_subcommands.add_parser(
|
|
76
|
+
"config", help="Commands to set and retrieve values from xpk config."
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
run_parser = xpk_subcommands.add_parser(
|
|
80
|
+
"run",
|
|
81
|
+
help="Command to run parallel jobs",
|
|
82
|
+
)
|
|
83
|
+
|
|
68
84
|
def default_subcommand_function(
|
|
69
85
|
_args,
|
|
70
86
|
) -> int: # args is unused, so pylint: disable=invalid-name
|
|
@@ -86,7 +102,10 @@ def set_parser(parser: argparse.ArgumentParser):
|
|
|
86
102
|
shell_parser.print_help()
|
|
87
103
|
version_parser.print_help()
|
|
88
104
|
kind_parser.print_help()
|
|
105
|
+
config_parser.print_help()
|
|
106
|
+
run_parser.print_help()
|
|
89
107
|
|
|
108
|
+
storage_parser.print_help()
|
|
90
109
|
return 0
|
|
91
110
|
|
|
92
111
|
parser.set_defaults(func=default_subcommand_function)
|
|
@@ -97,7 +116,11 @@ def set_parser(parser: argparse.ArgumentParser):
|
|
|
97
116
|
job_parser.set_defaults(func=default_subcommand_function)
|
|
98
117
|
kind_parser.set_defaults(func=default_subcommand_function)
|
|
99
118
|
shell_parser.set_defaults(func=default_subcommand_function)
|
|
119
|
+
storage_parser.set_defaults(func=default_subcommand_function)
|
|
100
120
|
version_parser.set_defaults(func=default_subcommand_function)
|
|
121
|
+
config_parser.set_defaults(func=default_subcommand_function)
|
|
122
|
+
run_parser.set_defaults(func=default_subcommand_function)
|
|
123
|
+
|
|
101
124
|
set_workload_parsers(workload_parser=workload_parser)
|
|
102
125
|
set_cluster_parser(cluster_parser=cluster_parser)
|
|
103
126
|
set_inspector_parser(inspector_parser=inspector_parser)
|
|
@@ -106,4 +129,7 @@ def set_parser(parser: argparse.ArgumentParser):
|
|
|
106
129
|
set_job_parser(job_parser=job_parser)
|
|
107
130
|
set_kind_parser(kind_parser=kind_parser)
|
|
108
131
|
set_shell_parser(shell_parser=shell_parser)
|
|
132
|
+
set_storage_parser(storage_parser=storage_parser)
|
|
109
133
|
set_version_parser(version_parser=version_parser)
|
|
134
|
+
set_config_parsers(config_parser=config_parser)
|
|
135
|
+
set_run_parser(run_parser=run_parser)
|
xpk/parser/info.py
CHANGED
|
@@ -16,6 +16,7 @@ limitations under the License.
|
|
|
16
16
|
|
|
17
17
|
from ..commands.info import info
|
|
18
18
|
from .common import add_shared_arguments
|
|
19
|
+
from .validators import name_type
|
|
19
20
|
import argparse
|
|
20
21
|
|
|
21
22
|
|
|
@@ -29,7 +30,7 @@ def set_info_parser(info_parser: argparse.ArgumentParser) -> None:
|
|
|
29
30
|
|
|
30
31
|
info_required_arguments.add_argument(
|
|
31
32
|
'--cluster',
|
|
32
|
-
type=
|
|
33
|
+
type=name_type,
|
|
33
34
|
default=None,
|
|
34
35
|
help='Cluster to which command applies.',
|
|
35
36
|
required=True,
|
xpk/parser/inspector.py
CHANGED
|
@@ -15,7 +15,7 @@ limitations under the License.
|
|
|
15
15
|
"""
|
|
16
16
|
|
|
17
17
|
from ..commands.inspector import inspector
|
|
18
|
-
from .validators import
|
|
18
|
+
from .validators import name_type
|
|
19
19
|
from .common import add_shared_arguments
|
|
20
20
|
|
|
21
21
|
|
|
@@ -37,7 +37,7 @@ def set_inspector_parser(inspector_parser):
|
|
|
37
37
|
|
|
38
38
|
inspector_parser_required_arguments.add_argument(
|
|
39
39
|
'--cluster',
|
|
40
|
-
type=
|
|
40
|
+
type=name_type,
|
|
41
41
|
default=None,
|
|
42
42
|
help='The name of the cluster to investigate.',
|
|
43
43
|
required=True,
|
|
@@ -48,7 +48,7 @@ def set_inspector_parser(inspector_parser):
|
|
|
48
48
|
|
|
49
49
|
inspector_parser_optional_arguments.add_argument(
|
|
50
50
|
'--workload',
|
|
51
|
-
type=
|
|
51
|
+
type=name_type,
|
|
52
52
|
default=None,
|
|
53
53
|
help='The name of the workload to investigate.',
|
|
54
54
|
)
|