xpk 0.7.2__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. xpk/commands/batch.py +19 -13
  2. xpk/commands/cluster.py +240 -71
  3. xpk/commands/cluster_gcluster.py +22 -5
  4. xpk/commands/common.py +33 -1
  5. xpk/commands/info.py +2 -4
  6. xpk/commands/job.py +7 -8
  7. xpk/commands/kjob_common.py +30 -18
  8. xpk/commands/run.py +17 -12
  9. xpk/commands/shell.py +3 -4
  10. xpk/commands/storage.py +75 -19
  11. xpk/commands/workload.py +161 -324
  12. xpk/core/blueprint/blueprint_definitions.py +2 -0
  13. xpk/core/blueprint/blueprint_generator.py +335 -45
  14. xpk/core/capacity.py +1 -0
  15. xpk/core/cluster.py +193 -12
  16. xpk/core/config.py +3 -1
  17. xpk/core/docker_manager.py +1 -1
  18. xpk/core/docker_resources.py +9 -21
  19. xpk/core/filestore.py +5 -1
  20. xpk/core/gcsfuse.py +27 -6
  21. xpk/core/kjob.py +66 -20
  22. xpk/core/kueue.py +30 -0
  23. xpk/core/mtc.py +195 -0
  24. xpk/core/nap.py +4 -0
  25. xpk/core/network.py +34 -22
  26. xpk/core/nodepool.py +28 -26
  27. xpk/core/pathways.py +165 -210
  28. xpk/core/resources.py +21 -0
  29. xpk/core/scheduling.py +36 -0
  30. xpk/core/storage.py +66 -12
  31. xpk/core/system_characteristics.py +9 -0
  32. xpk/core/workload.py +28 -83
  33. xpk/core/workload_decorators/rdma_decorator.py +11 -15
  34. xpk/core/workload_decorators/storage_decorator.py +8 -3
  35. xpk/core/workload_decorators/tcpx_decorator.py +179 -0
  36. xpk/core/workload_decorators/tcpxo_decorator.py +17 -16
  37. xpk/parser/cluster.py +574 -381
  38. xpk/parser/storage.py +25 -5
  39. xpk/parser/workload.py +59 -31
  40. xpk/utils/kubectl.py +4 -1
  41. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/METADATA +192 -93
  42. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/RECORD +46 -44
  43. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/WHEEL +1 -1
  44. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/entry_points.txt +0 -0
  45. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/licenses/LICENSE +0 -0
  46. {xpk-0.7.2.dist-info → xpk-0.9.0.dist-info}/top_level.txt +0 -0
xpk/parser/storage.py CHANGED
@@ -70,10 +70,10 @@ def add_storage_attach_parser(
70
70
  '--type',
71
71
  type=str,
72
72
  help=(
73
- 'The type of storage. Currently supported types: ["gcsfuse",'
74
- ' "gcpfilestore"]'
73
+ 'The type of storage. Currently supported types: "gcsfuse",'
74
+ ' "gcpfilestore", "parallelstore", "pd"'
75
75
  ),
76
- choices=['gcsfuse', 'gcpfilestore'],
76
+ choices=['gcsfuse', 'gcpfilestore', 'parallelstore', 'pd'],
77
77
  required=True,
78
78
  )
79
79
  add_cluster_arguments(req_args, required=True)
@@ -114,6 +114,15 @@ def add_storage_attach_parser(
114
114
  ' is infered as a bucket name.'
115
115
  ),
116
116
  )
117
+ gcsfuse_args.add_argument(
118
+ '--prefetch-metadata',
119
+ action=argparse.BooleanOptionalAction,
120
+ default=True,
121
+ help=(
122
+ '(optional) Enables metadata pre-population when'
123
+ ' mounting the volume. True by default.'
124
+ ),
125
+ )
117
126
 
118
127
  gcpfilestore_args = storage_attach_parser.add_argument_group(
119
128
  'Filestore arguments',
@@ -146,13 +155,19 @@ def add_storage_attach_parser(
146
155
 
147
156
  opt_args = storage_attach_parser.add_argument_group(
148
157
  'Optional Arguments',
149
- 'Optional arguments for storage create.',
158
+ 'Optional arguments for storage attach.',
150
159
  )
151
160
  opt_args.add_argument(
152
161
  '--manifest',
153
162
  type=str,
154
163
  help='Path to manifest file containing volume definitions',
155
164
  )
165
+ opt_args.add_argument(
166
+ '--mount-options',
167
+ type=str,
168
+ help='Comma-separated list of mountOptions for PersistentVolume',
169
+ default='implicit-dirs',
170
+ )
156
171
  add_kind_cluster_arguments(opt_args)
157
172
 
158
173
 
@@ -184,7 +199,6 @@ def add_storage_create_parser(
184
199
  ),
185
200
  required=True,
186
201
  )
187
-
188
202
  req_args.add_argument(
189
203
  '--type',
190
204
  type=str,
@@ -248,6 +262,12 @@ def add_storage_create_parser(
248
262
  type=str,
249
263
  help='Path to manifest file containing volume definitions',
250
264
  )
265
+ opt_args.add_argument(
266
+ '--mount-options',
267
+ type=str,
268
+ help='Comma-separated list of mountOptions for PersistentVolume',
269
+ default='',
270
+ )
251
271
 
252
272
  add_kind_cluster_arguments(opt_args)
253
273
 
xpk/parser/workload.py CHANGED
@@ -134,6 +134,24 @@ def set_workload_parsers(workload_parser):
134
134
  ' to use `gke.io/topology-aware-auto`.'
135
135
  ),
136
136
  )
137
+ workload_create_parser_optional_arguments.add_argument(
138
+ '--ramdisk-directory',
139
+ type=str,
140
+ default='',
141
+ help=(
142
+ 'The directory of the locally mounted RAM disk. This is only to'
143
+ ' be used with the CSI driver provided by GKE.'
144
+ ),
145
+ )
146
+ workload_create_parser_optional_arguments.add_argument(
147
+ '--mtc-enabled',
148
+ action='store_true',
149
+ help=(
150
+ 'The workload can use multi-tier checkpointing controllers when the'
151
+ ' --ramdisk-directory argument is used with this additional'
152
+ ' argument.'
153
+ ),
154
+ )
137
155
  workload_create_parser_optional_arguments.add_argument(
138
156
  '--debug-dump-gcs',
139
157
  type=str,
@@ -161,6 +179,19 @@ def set_workload_parsers(workload_parser):
161
179
  ' create Pathways workloads.'
162
180
  ),
163
181
  )
182
+ workload_create_parser_optional_arguments.add_argument(
183
+ '--restart-on-exit-codes',
184
+ type=str,
185
+ default=None,
186
+ help=(
187
+ 'Adding this argument specifies additional user-defined exit codes'
188
+ ' that allow restarting the workload when --max-restarts is set to'
189
+ ' a value greater than 0. By default, workloads restart on exit'
190
+ ' codes 42 and 127-255. Any exit codes provided through this flag'
191
+ ' will be included alongside the default codes for restarting'
192
+ ' conditions.'
193
+ ),
194
+ )
164
195
 
165
196
  # Autoprovisioning workload arguments
166
197
  workload_create_autoprovisioning_arguments.add_argument(
@@ -244,9 +275,7 @@ def set_workload_parsers(workload_parser):
244
275
  workload_create_pathways_parser_optional_arguments.add_argument(
245
276
  '--proxy-server-image',
246
277
  type=str,
247
- default=(
248
- 'us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest'
249
- ),
278
+ default='',
250
279
  help=(
251
280
  'Please provide the proxy server image for Pathways. This arg can'
252
281
  ' only be used in `xpk workload create-pathways`.'
@@ -255,7 +284,7 @@ def set_workload_parsers(workload_parser):
255
284
  workload_create_pathways_parser_optional_arguments.add_argument(
256
285
  '--server-image',
257
286
  type=str,
258
- default='us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest',
287
+ default='',
259
288
  help=(
260
289
  'Please provide the server image for Pathways. This arg can only be'
261
290
  ' used in `xpk workload create-pathways`.'
@@ -293,7 +322,7 @@ def set_workload_parsers(workload_parser):
293
322
  workload_create_pathways_parser_optional_arguments.add_argument(
294
323
  '--custom-pathways-server-args',
295
324
  type=str,
296
- default=None,
325
+ default='',
297
326
  help=(
298
327
  'Provide custom Pathways server args as follows -'
299
328
  " --custom-pathways-server-args='--arg_1=xxx --arg2=yyy'"
@@ -304,7 +333,7 @@ def set_workload_parsers(workload_parser):
304
333
  workload_create_pathways_parser_optional_arguments.add_argument(
305
334
  '--custom-pathways-proxy-server-args',
306
335
  type=str,
307
- default=None,
336
+ default='',
308
337
  help=(
309
338
  'Provide custom Pathways proxy server args as follows -'
310
339
  " --custom-pathways-proxy-server-args='--arg_1=xxx --arg2=yyy'"
@@ -315,7 +344,7 @@ def set_workload_parsers(workload_parser):
315
344
  workload_create_pathways_parser_optional_arguments.add_argument(
316
345
  '--custom-pathways-worker-args',
317
346
  type=str,
318
- default=None,
347
+ default='',
319
348
  help=(
320
349
  'Provide custom Pathways worker args as follows -'
321
350
  " --custom-pathways-worker-args='--arg_1=xxx --arg2=yyy'"
@@ -323,6 +352,27 @@ def set_workload_parsers(workload_parser):
323
352
  required=False,
324
353
  )
325
354
 
355
+ workload_create_pathways_parser_optional_arguments.add_argument(
356
+ '--elastic-slices',
357
+ type=int,
358
+ default=0,
359
+ help=(
360
+ 'Enable elastic slices in Pathways and specify'
361
+ ' the number of slices the workload could lose.'
362
+ ),
363
+ required=False,
364
+ )
365
+ workload_create_pathways_parser_optional_arguments.add_argument(
366
+ '--max-slice-restarts',
367
+ type=int,
368
+ default=1,
369
+ help=(
370
+ 'Specify the maximum times the workers in a slice can be'
371
+ ' restarted. Used with --elastic-slices for Pathways workloads.'
372
+ ),
373
+ required=False,
374
+ )
375
+
326
376
  add_shared_workload_create_required_arguments([
327
377
  workload_create_parser_required_arguments,
328
378
  workload_create_pathways_parser_required_arguments,
@@ -583,9 +633,9 @@ def add_shared_workload_create_optional_arguments(args_parsers):
583
633
  ),
584
634
  )
585
635
  custom_parser.add_argument(
586
- '--remote-python-sidecar-image',
636
+ '--colocated-python-sidecar-image',
587
637
  type=str,
588
- default=None,
638
+ default='',
589
639
  help='Remote Python sidecar server image.',
590
640
  )
591
641
  custom_parser.add_argument(
@@ -596,28 +646,6 @@ def add_shared_workload_create_optional_arguments(args_parsers):
596
646
  ' the workload.'
597
647
  ),
598
648
  )
599
- custom_parser.add_argument(
600
- '--restart-on-exit-codes',
601
- type=str,
602
- default=None,
603
- help=(
604
- 'Adding this argument specifies additional user-defined exit codes'
605
- ' that allow restarting the workload when --max-restarts is set to'
606
- ' a value greater than 0. By default, workloads restart on exit'
607
- ' codes 42 and 127-255. Any exit codes provided through this flag'
608
- ' will be included alongside the default codes for restarting'
609
- ' conditions.'
610
- ),
611
- )
612
- custom_parser.add_argument(
613
- '--ramdisk-directory',
614
- type=str,
615
- default='',
616
- help=(
617
- 'The directory of the locally mounted RAM disk. This is only to'
618
- ' be used with the CSI driver provided by GKE.'
619
- ),
620
- )
621
649
 
622
650
 
623
651
  def add_shared_workload_create_env_arguments(args_parsers):
xpk/utils/kubectl.py CHANGED
@@ -20,10 +20,11 @@ from kubernetes.dynamic import DynamicClient
20
20
  from .console import xpk_print
21
21
 
22
22
 
23
- def apply_kubectl_manifest(client, manifest):
23
+ def apply_kubectl_manifest(client, manifest) -> int:
24
24
  xpk_print('Applying manifest')
25
25
  dynamic_client = DynamicClient(client)
26
26
 
27
+ status_code = 0
27
28
  for obj in manifest:
28
29
  api_version = obj['apiVersion']
29
30
  kind = obj['kind']
@@ -55,3 +56,5 @@ def apply_kubectl_manifest(client, manifest):
55
56
  )
56
57
  else:
57
58
  xpk_print(f'Error applying {kind}: {e}')
59
+ status_code = 1
60
+ return status_code