xpk 0.6.0__tar.gz → 0.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {xpk-0.6.0 → xpk-0.7.1}/PKG-INFO +169 -15
- xpk-0.6.0/src/xpk.egg-info/PKG-INFO → xpk-0.7.1/README.md +154 -32
- {xpk-0.6.0 → xpk-0.7.1}/pyproject.toml +15 -9
- xpk-0.7.1/src/xpk/api/storage_crd.yaml +52 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/batch.py +27 -5
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/cluster.py +104 -80
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/cluster_gcluster.py +94 -10
- xpk-0.7.1/src/xpk/commands/common.py +44 -0
- xpk-0.7.1/src/xpk/commands/config.py +29 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/info.py +8 -10
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/inspector.py +5 -11
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/job.py +9 -7
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/kind.py +34 -4
- xpk-0.7.1/src/xpk/commands/kjob_common.py +44 -0
- xpk-0.7.1/src/xpk/commands/run.py +128 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/shell.py +27 -7
- xpk-0.7.1/src/xpk/commands/storage.py +280 -0
- xpk-0.7.1/src/xpk/commands/version.py +27 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/commands/workload.py +381 -184
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/blueprint/blueprint_definitions.py +1 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/blueprint/blueprint_generator.py +132 -76
- xpk-0.7.1/src/xpk/core/capacity.py +185 -0
- xpk-0.7.1/src/xpk/core/cluster.py +564 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/cluster_private.py +6 -3
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/commands.py +18 -14
- xpk-0.7.1/src/xpk/core/config.py +179 -0
- xpk-0.7.1/src/xpk/core/docker_container.py +225 -0
- xpk-0.7.1/src/xpk/core/docker_image.py +210 -0
- xpk-0.7.1/src/xpk/core/docker_resources.py +350 -0
- xpk-0.7.1/src/xpk/core/filestore.py +251 -0
- xpk-0.7.1/src/xpk/core/gcloud_context.py +196 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/gcluster_manager.py +20 -2
- xpk-0.7.1/src/xpk/core/gcsfuse.py +50 -0
- xpk-0.7.1/src/xpk/core/kjob.py +444 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/kueue.py +12 -6
- xpk-0.7.1/src/xpk/core/monitoring.py +134 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/nap.py +32 -20
- xpk-0.7.1/src/xpk/core/network.py +377 -0
- xpk-0.7.1/src/xpk/core/nodepool.py +581 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/pathways.py +124 -45
- xpk-0.7.1/src/xpk/core/remote_state/__init__.py +15 -0
- xpk-0.7.1/src/xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk-0.7.1/src/xpk/core/remote_state/remote_state_client.py +38 -0
- xpk-0.7.1/src/xpk/core/resources.py +238 -0
- xpk-0.7.1/src/xpk/core/scheduling.py +253 -0
- xpk-0.7.1/src/xpk/core/storage.py +581 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/system_characteristics.py +38 -1
- xpk-0.7.1/src/xpk/core/vertex.py +105 -0
- xpk-0.7.1/src/xpk/core/workload.py +341 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/workload_decorators/rdma_decorator.py +25 -5
- xpk-0.7.1/src/xpk/core/workload_decorators/storage_decorator.py +52 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/workload_decorators/tcpxo_decorator.py +70 -37
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/main.py +3 -1
- xpk-0.7.1/src/xpk/parser/batch.py +43 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/cluster.py +49 -8
- xpk-0.6.0/src/xpk/parser/batch.py → xpk-0.7.1/src/xpk/parser/common.py +107 -32
- xpk-0.7.1/src/xpk/parser/config.py +49 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/core.py +27 -1
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/info.py +2 -1
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/inspector.py +3 -3
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/job.py +25 -4
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/kind.py +3 -2
- xpk-0.7.1/src/xpk/parser/run.py +47 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/shell.py +10 -1
- xpk-0.7.1/src/xpk/parser/storage.py +326 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/validators.py +3 -3
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/workload.py +118 -76
- xpk-0.7.1/src/xpk/templates/__init__.py +15 -0
- xpk-0.7.1/src/xpk/templates/storage.yaml +13 -0
- xpk-0.7.1/src/xpk/utils/__init__.py +15 -0
- xpk-0.7.1/src/xpk/utils/gcs_utils.py +125 -0
- xpk-0.7.1/src/xpk/utils/kubectl.py +57 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/utils/objects.py +8 -5
- xpk-0.7.1/src/xpk/utils/templates.py +28 -0
- xpk-0.7.1/src/xpk/utils/validation.py +80 -0
- xpk-0.6.0/README.md → xpk-0.7.1/src/xpk.egg-info/PKG-INFO +186 -6
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk.egg-info/SOURCES.txt +36 -1
- xpk-0.7.1/src/xpk.egg-info/requires.txt +18 -0
- xpk-0.6.0/src/xpk/commands/version.py +0 -39
- xpk-0.6.0/src/xpk/core/core.py +0 -2824
- xpk-0.6.0/src/xpk/core/kjob.py +0 -205
- xpk-0.6.0/src/xpk/core/workload.py +0 -133
- xpk-0.6.0/src/xpk/parser/common.py +0 -71
- xpk-0.6.0/src/xpk.egg-info/requires.txt +0 -13
- {xpk-0.6.0 → xpk-0.7.1}/LICENSE +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/setup.cfg +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/__init__.py +0 -0
- {xpk-0.6.0/src/xpk/commands → xpk-0.7.1/src/xpk/api}/__init__.py +0 -0
- {xpk-0.6.0/src/xpk/core → xpk-0.7.1/src/xpk/commands}/__init__.py +0 -0
- {xpk-0.6.0/src/xpk/core/blueprint → xpk-0.7.1/src/xpk/core}/__init__.py +0 -0
- {xpk-0.6.0/src/xpk/core/workload_decorators → xpk-0.7.1/src/xpk/core/blueprint}/__init__.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/docker_manager.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/core/ray.py +0 -0
- {xpk-0.6.0/src/xpk/parser → xpk-0.7.1/src/xpk/core/workload_decorators}/__init__.py +0 -0
- {xpk-0.6.0/src/xpk/utils → xpk-0.7.1/src/xpk/parser}/__init__.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/parser/version.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/utils/console.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/utils/file.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/utils/network.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk/utils/yaml.py +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk.egg-info/dependency_links.txt +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk.egg-info/entry_points.txt +0 -0
- {xpk-0.6.0 → xpk-0.7.1}/src/xpk.egg-info/top_level.txt +0 -0
{xpk-0.6.0 → xpk-0.7.1}/PKG-INFO
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: xpk
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.1
|
|
4
4
|
Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
|
|
5
5
|
Author-email: XPK team <xpk-code-reviewers@google.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -11,18 +11,24 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
11
11
|
Requires-Python: >=3.10
|
|
12
12
|
Description-Content-Type: text/markdown
|
|
13
13
|
License-File: LICENSE
|
|
14
|
-
Requires-Dist: cloud-accelerator-diagnostics
|
|
15
|
-
Requires-Dist: tabulate
|
|
16
|
-
Requires-Dist: ruamel.yaml
|
|
17
|
-
Requires-Dist: pyyaml
|
|
18
|
-
Requires-Dist: docker
|
|
19
|
-
Requires-Dist:
|
|
14
|
+
Requires-Dist: cloud-accelerator-diagnostics==0.1.1
|
|
15
|
+
Requires-Dist: tabulate==0.9.0
|
|
16
|
+
Requires-Dist: ruamel.yaml==0.18.10
|
|
17
|
+
Requires-Dist: pyyaml==6.0.2
|
|
18
|
+
Requires-Dist: docker==7.1.0
|
|
19
|
+
Requires-Dist: kubernetes==31.0.0
|
|
20
|
+
Requires-Dist: google-cloud==0.34.0
|
|
21
|
+
Requires-Dist: google-api-core==2.24.1
|
|
22
|
+
Requires-Dist: packaging==24.2
|
|
23
|
+
Requires-Dist: google-cloud-filestore==1.12.0
|
|
24
|
+
Requires-Dist: google-cloud-storage==2.19.0
|
|
20
25
|
Provides-Extra: dev
|
|
21
26
|
Requires-Dist: pyink==24.3.0; extra == "dev"
|
|
22
27
|
Requires-Dist: pylint>=2.6.0; extra == "dev"
|
|
23
28
|
Requires-Dist: pre-commit; extra == "dev"
|
|
24
29
|
Requires-Dist: pytest; extra == "dev"
|
|
25
|
-
Requires-Dist: docker; extra == "dev"
|
|
30
|
+
Requires-Dist: docker==7.1.0; extra == "dev"
|
|
31
|
+
Dynamic: license-file
|
|
26
32
|
|
|
27
33
|
<!--
|
|
28
34
|
Copyright 2023 Google LLC
|
|
@@ -42,6 +48,8 @@ Requires-Dist: docker; extra == "dev"
|
|
|
42
48
|
|
|
43
49
|
[](https://github.com/google/xpk/actions/workflows/build_tests.yaml)
|
|
44
50
|
[](https://github.com/google/xpk/actions/workflows/nightly_tests.yaml)
|
|
51
|
+
[](https://github.com/AI-Hypercomputer/xpk/actions/workflows/build_tests.yaml)
|
|
52
|
+
[](https://github.com/AI-Hypercomputer/xpk/actions/workflows/nightly_tests.yaml)
|
|
45
53
|
|
|
46
54
|
# Overview
|
|
47
55
|
|
|
@@ -80,7 +88,11 @@ and the following GPU types:
|
|
|
80
88
|
and the following CPU types:
|
|
81
89
|
* n2-standard-32
|
|
82
90
|
|
|
83
|
-
|
|
91
|
+
xpk also supports Google Cloud Storage solutions:
|
|
92
|
+
* [Cloud Storage FUSE](#fuse)
|
|
93
|
+
* [Filestore](#filestore)
|
|
94
|
+
|
|
95
|
+
# Permissions needed on Cloud Console:
|
|
84
96
|
|
|
85
97
|
* Artifact Registry Writer
|
|
86
98
|
* Compute Admin
|
|
@@ -90,6 +102,7 @@ and the following CPU types:
|
|
|
90
102
|
* Service Account User
|
|
91
103
|
* Storage Admin
|
|
92
104
|
* Vertex AI Administrator
|
|
105
|
+
* Filestore Editor (This role is neccessary if you want to run `storage create` command with `--type=gcpfilestore`)
|
|
93
106
|
|
|
94
107
|
# Prerequisites
|
|
95
108
|
|
|
@@ -111,17 +124,28 @@ Following tools must be installed:
|
|
|
111
124
|
# sudo may be required
|
|
112
125
|
apt-get -y install make
|
|
113
126
|
```
|
|
114
|
-
In addition, below dependencies
|
|
127
|
+
In addition, below dependencies can be installed either using provided links or using `make install` command, if xpk is downloaded via `git clone` command:
|
|
115
128
|
- kueuectl (install from [here](https://kueue.sigs.k8s.io/docs/reference/kubectl-kueue/installation/))
|
|
116
129
|
- kjob (installation instructions [here](https://github.com/kubernetes-sigs/kjob/blob/main/docs/installation.md))
|
|
117
130
|
|
|
118
131
|
# Installation
|
|
119
|
-
To install xpk,
|
|
132
|
+
To install xpk, install required tools mentioned in [prerequisites](#prerequisites). [Makefile](https://github.com/AI-Hypercomputer/xpk/blob/main/Makefile) provides a way to install all neccessary tools. XPK can be installed via pip:
|
|
120
133
|
|
|
121
134
|
```shell
|
|
122
135
|
pip install xpk
|
|
123
136
|
```
|
|
124
137
|
|
|
138
|
+
If you see an error saying: `This environment is externally managed`, please use a virtual environment.
|
|
139
|
+
|
|
140
|
+
```shell
|
|
141
|
+
## One time step of creating the venv
|
|
142
|
+
VENV_DIR=~/venvp3
|
|
143
|
+
python3 -m venv $VENV_DIR
|
|
144
|
+
## Enter your venv.
|
|
145
|
+
source $VENV_DIR/bin/activate
|
|
146
|
+
## Clone the repository and installing dependencies.
|
|
147
|
+
pip install xpk
|
|
148
|
+
```
|
|
125
149
|
|
|
126
150
|
If you are running XPK by cloning GitHub repository, first run the
|
|
127
151
|
following commands to begin using XPK commands:
|
|
@@ -174,6 +198,8 @@ cleanup with a `Cluster Delete`.
|
|
|
174
198
|
If you have failures with workloads not running, use `xpk inspector` to investigate
|
|
175
199
|
more.
|
|
176
200
|
|
|
201
|
+
If you need your Workloads to have persistent storage, use `xpk storage` to find out more.
|
|
202
|
+
|
|
177
203
|
## Cluster Create
|
|
178
204
|
|
|
179
205
|
First set the project and zone through gcloud config or xpk arguments.
|
|
@@ -448,6 +474,103 @@ Currently, the below flags/arguments are supported for A3-Mega and A3-Ultra mach
|
|
|
448
474
|
* --on-demand (only A3-Mega)
|
|
449
475
|
|
|
450
476
|
|
|
477
|
+
## Storage
|
|
478
|
+
Currently XPK supports two types of storages: Cloud Storage FUSE and Google Cloud Filestore.
|
|
479
|
+
|
|
480
|
+
### FUSE
|
|
481
|
+
A FUSE adapter lets you mount and access Cloud Storage buckets as local file systems, so applications can read and write objects in your bucket using standard file system semantics.
|
|
482
|
+
|
|
483
|
+
To use the GCS FUSE with XPK you need to create a [Storage Bucket](https://console.cloud.google.com/storage/).
|
|
484
|
+
|
|
485
|
+
Once it's ready you can use `xpk storage attach` with `--type=gcsfuse` command to attach a FUSE storage instance to your cluster:
|
|
486
|
+
|
|
487
|
+
```shell
|
|
488
|
+
python3 xpk.py storage attach test-fuse-storage --type=gcsfuse \
|
|
489
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
490
|
+
--mount-point='/test-mount-point' --readonly=false \
|
|
491
|
+
--bucket=test-bucket --size=1 --auto-mount=false
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
Parameters:
|
|
495
|
+
|
|
496
|
+
- `--type` - type of the storage, currently xpk supports `gcsfuse` and `gcpfilestore` only.
|
|
497
|
+
- `--auto-mount` - if set to true all workloads will have this storage mounted by default.
|
|
498
|
+
- `--mount-point` - the path on which this storage should be mounted for a workload.
|
|
499
|
+
- `--readonly` - if set to true, workload can only read from storage.
|
|
500
|
+
- `--size` - size of the storage in Gb.
|
|
501
|
+
- `--bucket` - name of the storage bucket. If not set then the name of the storage is used as a bucket name.
|
|
502
|
+
- `--manifest` - path to the manifest file containing PersistentVolume and PresistentVolumeClaim definitions. If set, then values from manifest override the following parameters: `--size` and `--bucket`.
|
|
503
|
+
|
|
504
|
+
### Filestore
|
|
505
|
+
|
|
506
|
+
A Filestore adapter lets you mount and access [Filestore instances](https://cloud.google.com/filestore/) as local file systems, so applications can read and write objects in your volumes using standard file system semantics.
|
|
507
|
+
|
|
508
|
+
To create and attach a GCP Filestore instance to your cluster use `xpk storage create` command with `--type=gcpfilestore`:
|
|
509
|
+
|
|
510
|
+
```shell
|
|
511
|
+
python3 xpk.py storage create test-fs-storage --type=gcpfilestore \
|
|
512
|
+
--auto-mount=false --mount-point=/data-fs --readonly=false \
|
|
513
|
+
--size=1024 --tier=BASIC_HDD --access_mode=ReadWriteMany --vol=default \
|
|
514
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
You can also attach an existing Filestore instance to your cluster using `xpk storage attach` command:
|
|
518
|
+
|
|
519
|
+
```shell
|
|
520
|
+
python3 xpk.py storage attach test-fs-storage --type=gcpfilestore \
|
|
521
|
+
--auto-mount=false --mount-point=/data-fs --readonly=false \
|
|
522
|
+
--size=1024 --tier=BASIC_HDD --access_mode=ReadWriteMany --vol=default \
|
|
523
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
The command above is also useful when attaching multiple volumes from the same Filestore instance.
|
|
527
|
+
|
|
528
|
+
Commands `xpk storage create` and `xpk storage attach` with `--type=gcpfilestore` accept following arguments:
|
|
529
|
+
- `--type` - type of the storage.
|
|
530
|
+
- `--auto-mount` - if set to true all workloads will have this storage mounted by default.
|
|
531
|
+
- `--mount-point` - the path on which this storage should be mounted for a workload.
|
|
532
|
+
- `--readonly` - if set to true, workload can only read from storage.
|
|
533
|
+
- `--size` - size of the Filestore instance that will be created in Gb.
|
|
534
|
+
- `--tier` - tier of the Filestore instance that will be created. Possible options are: `[BASIC_HDD, BASIC_SSD, ZONAL, REGIONAL, ENTERPRISE]`
|
|
535
|
+
- `--access-mode` - access mode of the Filestore instance that will be created. Possible values are: `[ReadWriteOnce, ReadOnlyMany, ReadWriteMany]`
|
|
536
|
+
- `--vol` - file share name of the Filestore instance that will be created.
|
|
537
|
+
- `--instance` - the name of the Filestore instance. If not set then the name parameter is used as an instance name. Useful when connecting multiple volumes from the same Filestore instance.
|
|
538
|
+
- `--manifest` - path to the manifest file containing PersistentVolume, PresistentVolumeClaim and StorageClass definitions. If set, then values from manifest override the following parameters: `--access-mode`, `--size` and `--volume`.
|
|
539
|
+
|
|
540
|
+
### List attached storages
|
|
541
|
+
|
|
542
|
+
```shell
|
|
543
|
+
python3 xpk.py storage list \
|
|
544
|
+
--project=$PROJECT --cluster $CLUSTER --zone=$ZONE
|
|
545
|
+
```
|
|
546
|
+
|
|
547
|
+
### Running workloads with storage
|
|
548
|
+
|
|
549
|
+
If you specified `--auto-mount=true` when creating or attaching a storage, then all workloads deployed on the cluster will have the volume attached by default. Otherwise, in order to have the storage attached, you have to add `--storage` parameter to `workload create` command:
|
|
550
|
+
|
|
551
|
+
```shell
|
|
552
|
+
python3 xpk.py workload create \
|
|
553
|
+
--workload xpk-test-workload --command "echo goodbye" \
|
|
554
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE \
|
|
555
|
+
--tpu-type=v5litepod-16 --storage=test-storage
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
### Detaching storage
|
|
559
|
+
|
|
560
|
+
```shell
|
|
561
|
+
python3 xpk.py storage detach $STORAGE_NAME \
|
|
562
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
563
|
+
```
|
|
564
|
+
|
|
565
|
+
### Deleting storage
|
|
566
|
+
|
|
567
|
+
XPK allows you to remove Filestore instances easily with `xpk storage delete` command. **Warning:** this deletes all data contained in the Filestore!
|
|
568
|
+
|
|
569
|
+
```shell
|
|
570
|
+
python3 xpk.py storage delete test-fs-instance \
|
|
571
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
572
|
+
```
|
|
573
|
+
|
|
451
574
|
## Workload Create
|
|
452
575
|
* Workload Create (submit training job):
|
|
453
576
|
|
|
@@ -455,7 +578,7 @@ Currently, the below flags/arguments are supported for A3-Mega and A3-Ultra mach
|
|
|
455
578
|
python3 xpk.py workload create \
|
|
456
579
|
--workload xpk-test-workload --command "echo goodbye" \
|
|
457
580
|
--cluster xpk-test \
|
|
458
|
-
--tpu-type=v5litepod-16
|
|
581
|
+
--tpu-type=v5litepod-16 --projet=$PROJECT
|
|
459
582
|
```
|
|
460
583
|
|
|
461
584
|
* Workload Create for Pathways:
|
|
@@ -528,6 +651,8 @@ To submit jobs on a cluster with A3 machines, run the below command. To create a
|
|
|
528
651
|
```
|
|
529
652
|
> The docker image flags/arguments introduced in [workloads section](#workload-create) can be used with A3 machines as well.
|
|
530
653
|
|
|
654
|
+
In order to run NCCL test on A3 Ultra machines check out [this guide](/examples/nccl/nccl.md).
|
|
655
|
+
|
|
531
656
|
### Workload Priority and Preemption
|
|
532
657
|
* Set the priority level of your workload with `--priority=LEVEL`
|
|
533
658
|
|
|
@@ -666,8 +791,6 @@ Check out [MaxText example](https://github.com/google/maxtext/pull/570) on how t
|
|
|
666
791
|
```
|
|
667
792
|
|
|
668
793
|
* Workload List supports waiting for the completion of a specific job. XPK will follow an existing job until it has finished or the `timeout`, if provided, has been reached and then list the job. If no `timeout` is specified, the default value is set to the max value, 1 week. You may also set `timeout=0` to poll the job once.
|
|
669
|
-
(Note: `restart-on-user-code-failure` must be set
|
|
670
|
-
when creating the workload otherwise the workload will always finish with `Completed` status.)
|
|
671
794
|
|
|
672
795
|
Wait for a job to complete.
|
|
673
796
|
|
|
@@ -759,6 +882,35 @@ Inspector output is saved to a file.
|
|
|
759
882
|
[XPK] Exiting XPK cleanly
|
|
760
883
|
```
|
|
761
884
|
|
|
885
|
+
## Run
|
|
886
|
+
* `xpk run` lets you execute scripts on a cluster with ease. It automates task execution, handles interruptions, and streams job output to your console.
|
|
887
|
+
|
|
888
|
+
```shell
|
|
889
|
+
python xpk.py run --kind-cluster -n 2 -t 0-2 examples/job.sh
|
|
890
|
+
```
|
|
891
|
+
|
|
892
|
+
* Example Output:
|
|
893
|
+
|
|
894
|
+
```shell
|
|
895
|
+
[XPK] Starting xpk
|
|
896
|
+
[XPK] Task: `get current-context` is implemented by `kubectl config current-context`, hiding output unless there is an error.
|
|
897
|
+
[XPK] No local cluster name specified. Using current-context `kind-kind`
|
|
898
|
+
[XPK] Task: `run task` is implemented by `kubectl kjob create slurm --profile xpk-def-app-profile --localqueue multislice-queue --wait --rm -- examples/job.sh --partition multislice-queue --ntasks 2 --time 0-2`. Streaming output and input live.
|
|
899
|
+
job.batch/xpk-def-app-profile-slurm-g4vr6 created
|
|
900
|
+
configmap/xpk-def-app-profile-slurm-g4vr6 created
|
|
901
|
+
service/xpk-def-app-profile-slurm-g4vr6 created
|
|
902
|
+
Starting log streaming for pod xpk-def-app-profile-slurm-g4vr6-1-4rmgk...
|
|
903
|
+
Now processing task ID: 3
|
|
904
|
+
Starting log streaming for pod xpk-def-app-profile-slurm-g4vr6-0-bg6dm...
|
|
905
|
+
Now processing task ID: 1
|
|
906
|
+
exit
|
|
907
|
+
exit
|
|
908
|
+
Now processing task ID: 2
|
|
909
|
+
exit
|
|
910
|
+
Job logs streaming finished.[XPK] Task: `run task` terminated with code `0`
|
|
911
|
+
[XPK] XPK Done.
|
|
912
|
+
```
|
|
913
|
+
|
|
762
914
|
## GPU usage
|
|
763
915
|
|
|
764
916
|
In order to use XPK for GPU, you can do so by using `device-type` flag.
|
|
@@ -1241,6 +1393,8 @@ gcloud beta compute reservations describe $RESERVATION --project=$PROJECT_ID --z
|
|
|
1241
1393
|
|
|
1242
1394
|
## 403 error on workload create when using `--base-docker-image` flag
|
|
1243
1395
|
You need authority to push to the registry from your local machine. Try running `gcloud auth configure-docker`.
|
|
1396
|
+
## `Kubernetes API exception` - 404 error
|
|
1397
|
+
If error of this kind appeared after updating xpk version it's possible that you need to rerun `cluster create` command in order to update resource definitions.
|
|
1244
1398
|
|
|
1245
1399
|
# TPU Workload Debugging
|
|
1246
1400
|
|
|
@@ -1,29 +1,3 @@
|
|
|
1
|
-
Metadata-Version: 2.2
|
|
2
|
-
Name: xpk
|
|
3
|
-
Version: 0.6.0
|
|
4
|
-
Summary: xpk helps Cloud developers to orchestrate training jobs on accelerators on GKE.
|
|
5
|
-
Author-email: XPK team <xpk-code-reviewers@google.com>
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
Project-URL: Homepage, https://github.com/google/xpk
|
|
8
|
-
Project-URL: Bug Tracker, https://github.com/google/xpk/issues
|
|
9
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
10
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
11
|
-
Requires-Python: >=3.10
|
|
12
|
-
Description-Content-Type: text/markdown
|
|
13
|
-
License-File: LICENSE
|
|
14
|
-
Requires-Dist: cloud-accelerator-diagnostics
|
|
15
|
-
Requires-Dist: tabulate
|
|
16
|
-
Requires-Dist: ruamel.yaml
|
|
17
|
-
Requires-Dist: pyyaml
|
|
18
|
-
Requires-Dist: docker
|
|
19
|
-
Requires-Dist: packaging
|
|
20
|
-
Provides-Extra: dev
|
|
21
|
-
Requires-Dist: pyink==24.3.0; extra == "dev"
|
|
22
|
-
Requires-Dist: pylint>=2.6.0; extra == "dev"
|
|
23
|
-
Requires-Dist: pre-commit; extra == "dev"
|
|
24
|
-
Requires-Dist: pytest; extra == "dev"
|
|
25
|
-
Requires-Dist: docker; extra == "dev"
|
|
26
|
-
|
|
27
1
|
<!--
|
|
28
2
|
Copyright 2023 Google LLC
|
|
29
3
|
|
|
@@ -42,6 +16,8 @@ Requires-Dist: docker; extra == "dev"
|
|
|
42
16
|
|
|
43
17
|
[](https://github.com/google/xpk/actions/workflows/build_tests.yaml)
|
|
44
18
|
[](https://github.com/google/xpk/actions/workflows/nightly_tests.yaml)
|
|
19
|
+
[](https://github.com/AI-Hypercomputer/xpk/actions/workflows/build_tests.yaml)
|
|
20
|
+
[](https://github.com/AI-Hypercomputer/xpk/actions/workflows/nightly_tests.yaml)
|
|
45
21
|
|
|
46
22
|
# Overview
|
|
47
23
|
|
|
@@ -80,7 +56,11 @@ and the following GPU types:
|
|
|
80
56
|
and the following CPU types:
|
|
81
57
|
* n2-standard-32
|
|
82
58
|
|
|
83
|
-
|
|
59
|
+
xpk also supports Google Cloud Storage solutions:
|
|
60
|
+
* [Cloud Storage FUSE](#fuse)
|
|
61
|
+
* [Filestore](#filestore)
|
|
62
|
+
|
|
63
|
+
# Permissions needed on Cloud Console:
|
|
84
64
|
|
|
85
65
|
* Artifact Registry Writer
|
|
86
66
|
* Compute Admin
|
|
@@ -90,6 +70,7 @@ and the following CPU types:
|
|
|
90
70
|
* Service Account User
|
|
91
71
|
* Storage Admin
|
|
92
72
|
* Vertex AI Administrator
|
|
73
|
+
* Filestore Editor (This role is neccessary if you want to run `storage create` command with `--type=gcpfilestore`)
|
|
93
74
|
|
|
94
75
|
# Prerequisites
|
|
95
76
|
|
|
@@ -111,17 +92,28 @@ Following tools must be installed:
|
|
|
111
92
|
# sudo may be required
|
|
112
93
|
apt-get -y install make
|
|
113
94
|
```
|
|
114
|
-
In addition, below dependencies
|
|
95
|
+
In addition, below dependencies can be installed either using provided links or using `make install` command, if xpk is downloaded via `git clone` command:
|
|
115
96
|
- kueuectl (install from [here](https://kueue.sigs.k8s.io/docs/reference/kubectl-kueue/installation/))
|
|
116
97
|
- kjob (installation instructions [here](https://github.com/kubernetes-sigs/kjob/blob/main/docs/installation.md))
|
|
117
98
|
|
|
118
99
|
# Installation
|
|
119
|
-
To install xpk,
|
|
100
|
+
To install xpk, install required tools mentioned in [prerequisites](#prerequisites). [Makefile](https://github.com/AI-Hypercomputer/xpk/blob/main/Makefile) provides a way to install all neccessary tools. XPK can be installed via pip:
|
|
120
101
|
|
|
121
102
|
```shell
|
|
122
103
|
pip install xpk
|
|
123
104
|
```
|
|
124
105
|
|
|
106
|
+
If you see an error saying: `This environment is externally managed`, please use a virtual environment.
|
|
107
|
+
|
|
108
|
+
```shell
|
|
109
|
+
## One time step of creating the venv
|
|
110
|
+
VENV_DIR=~/venvp3
|
|
111
|
+
python3 -m venv $VENV_DIR
|
|
112
|
+
## Enter your venv.
|
|
113
|
+
source $VENV_DIR/bin/activate
|
|
114
|
+
## Clone the repository and installing dependencies.
|
|
115
|
+
pip install xpk
|
|
116
|
+
```
|
|
125
117
|
|
|
126
118
|
If you are running XPK by cloning GitHub repository, first run the
|
|
127
119
|
following commands to begin using XPK commands:
|
|
@@ -174,6 +166,8 @@ cleanup with a `Cluster Delete`.
|
|
|
174
166
|
If you have failures with workloads not running, use `xpk inspector` to investigate
|
|
175
167
|
more.
|
|
176
168
|
|
|
169
|
+
If you need your Workloads to have persistent storage, use `xpk storage` to find out more.
|
|
170
|
+
|
|
177
171
|
## Cluster Create
|
|
178
172
|
|
|
179
173
|
First set the project and zone through gcloud config or xpk arguments.
|
|
@@ -448,6 +442,103 @@ Currently, the below flags/arguments are supported for A3-Mega and A3-Ultra mach
|
|
|
448
442
|
* --on-demand (only A3-Mega)
|
|
449
443
|
|
|
450
444
|
|
|
445
|
+
## Storage
|
|
446
|
+
Currently XPK supports two types of storages: Cloud Storage FUSE and Google Cloud Filestore.
|
|
447
|
+
|
|
448
|
+
### FUSE
|
|
449
|
+
A FUSE adapter lets you mount and access Cloud Storage buckets as local file systems, so applications can read and write objects in your bucket using standard file system semantics.
|
|
450
|
+
|
|
451
|
+
To use the GCS FUSE with XPK you need to create a [Storage Bucket](https://console.cloud.google.com/storage/).
|
|
452
|
+
|
|
453
|
+
Once it's ready you can use `xpk storage attach` with `--type=gcsfuse` command to attach a FUSE storage instance to your cluster:
|
|
454
|
+
|
|
455
|
+
```shell
|
|
456
|
+
python3 xpk.py storage attach test-fuse-storage --type=gcsfuse \
|
|
457
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
458
|
+
--mount-point='/test-mount-point' --readonly=false \
|
|
459
|
+
--bucket=test-bucket --size=1 --auto-mount=false
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
Parameters:
|
|
463
|
+
|
|
464
|
+
- `--type` - type of the storage, currently xpk supports `gcsfuse` and `gcpfilestore` only.
|
|
465
|
+
- `--auto-mount` - if set to true all workloads will have this storage mounted by default.
|
|
466
|
+
- `--mount-point` - the path on which this storage should be mounted for a workload.
|
|
467
|
+
- `--readonly` - if set to true, workload can only read from storage.
|
|
468
|
+
- `--size` - size of the storage in Gb.
|
|
469
|
+
- `--bucket` - name of the storage bucket. If not set then the name of the storage is used as a bucket name.
|
|
470
|
+
- `--manifest` - path to the manifest file containing PersistentVolume and PresistentVolumeClaim definitions. If set, then values from manifest override the following parameters: `--size` and `--bucket`.
|
|
471
|
+
|
|
472
|
+
### Filestore
|
|
473
|
+
|
|
474
|
+
A Filestore adapter lets you mount and access [Filestore instances](https://cloud.google.com/filestore/) as local file systems, so applications can read and write objects in your volumes using standard file system semantics.
|
|
475
|
+
|
|
476
|
+
To create and attach a GCP Filestore instance to your cluster use `xpk storage create` command with `--type=gcpfilestore`:
|
|
477
|
+
|
|
478
|
+
```shell
|
|
479
|
+
python3 xpk.py storage create test-fs-storage --type=gcpfilestore \
|
|
480
|
+
--auto-mount=false --mount-point=/data-fs --readonly=false \
|
|
481
|
+
--size=1024 --tier=BASIC_HDD --access_mode=ReadWriteMany --vol=default \
|
|
482
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
You can also attach an existing Filestore instance to your cluster using `xpk storage attach` command:
|
|
486
|
+
|
|
487
|
+
```shell
|
|
488
|
+
python3 xpk.py storage attach test-fs-storage --type=gcpfilestore \
|
|
489
|
+
--auto-mount=false --mount-point=/data-fs --readonly=false \
|
|
490
|
+
--size=1024 --tier=BASIC_HDD --access_mode=ReadWriteMany --vol=default \
|
|
491
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
492
|
+
```
|
|
493
|
+
|
|
494
|
+
The command above is also useful when attaching multiple volumes from the same Filestore instance.
|
|
495
|
+
|
|
496
|
+
Commands `xpk storage create` and `xpk storage attach` with `--type=gcpfilestore` accept following arguments:
|
|
497
|
+
- `--type` - type of the storage.
|
|
498
|
+
- `--auto-mount` - if set to true all workloads will have this storage mounted by default.
|
|
499
|
+
- `--mount-point` - the path on which this storage should be mounted for a workload.
|
|
500
|
+
- `--readonly` - if set to true, workload can only read from storage.
|
|
501
|
+
- `--size` - size of the Filestore instance that will be created in Gb.
|
|
502
|
+
- `--tier` - tier of the Filestore instance that will be created. Possible options are: `[BASIC_HDD, BASIC_SSD, ZONAL, REGIONAL, ENTERPRISE]`
|
|
503
|
+
- `--access-mode` - access mode of the Filestore instance that will be created. Possible values are: `[ReadWriteOnce, ReadOnlyMany, ReadWriteMany]`
|
|
504
|
+
- `--vol` - file share name of the Filestore instance that will be created.
|
|
505
|
+
- `--instance` - the name of the Filestore instance. If not set then the name parameter is used as an instance name. Useful when connecting multiple volumes from the same Filestore instance.
|
|
506
|
+
- `--manifest` - path to the manifest file containing PersistentVolume, PresistentVolumeClaim and StorageClass definitions. If set, then values from manifest override the following parameters: `--access-mode`, `--size` and `--volume`.
|
|
507
|
+
|
|
508
|
+
### List attached storages
|
|
509
|
+
|
|
510
|
+
```shell
|
|
511
|
+
python3 xpk.py storage list \
|
|
512
|
+
--project=$PROJECT --cluster $CLUSTER --zone=$ZONE
|
|
513
|
+
```
|
|
514
|
+
|
|
515
|
+
### Running workloads with storage
|
|
516
|
+
|
|
517
|
+
If you specified `--auto-mount=true` when creating or attaching a storage, then all workloads deployed on the cluster will have the volume attached by default. Otherwise, in order to have the storage attached, you have to add `--storage` parameter to `workload create` command:
|
|
518
|
+
|
|
519
|
+
```shell
|
|
520
|
+
python3 xpk.py workload create \
|
|
521
|
+
--workload xpk-test-workload --command "echo goodbye" \
|
|
522
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE \
|
|
523
|
+
--tpu-type=v5litepod-16 --storage=test-storage
|
|
524
|
+
```
|
|
525
|
+
|
|
526
|
+
### Detaching storage
|
|
527
|
+
|
|
528
|
+
```shell
|
|
529
|
+
python3 xpk.py storage detach $STORAGE_NAME \
|
|
530
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
531
|
+
```
|
|
532
|
+
|
|
533
|
+
### Deleting storage
|
|
534
|
+
|
|
535
|
+
XPK allows you to remove Filestore instances easily with `xpk storage delete` command. **Warning:** this deletes all data contained in the Filestore!
|
|
536
|
+
|
|
537
|
+
```shell
|
|
538
|
+
python3 xpk.py storage delete test-fs-instance \
|
|
539
|
+
--project=$PROJECT --cluster=$CLUSTER --zone=$ZONE
|
|
540
|
+
```
|
|
541
|
+
|
|
451
542
|
## Workload Create
|
|
452
543
|
* Workload Create (submit training job):
|
|
453
544
|
|
|
@@ -455,7 +546,7 @@ Currently, the below flags/arguments are supported for A3-Mega and A3-Ultra mach
|
|
|
455
546
|
python3 xpk.py workload create \
|
|
456
547
|
--workload xpk-test-workload --command "echo goodbye" \
|
|
457
548
|
--cluster xpk-test \
|
|
458
|
-
--tpu-type=v5litepod-16
|
|
549
|
+
--tpu-type=v5litepod-16 --projet=$PROJECT
|
|
459
550
|
```
|
|
460
551
|
|
|
461
552
|
* Workload Create for Pathways:
|
|
@@ -528,6 +619,8 @@ To submit jobs on a cluster with A3 machines, run the below command. To create a
|
|
|
528
619
|
```
|
|
529
620
|
> The docker image flags/arguments introduced in [workloads section](#workload-create) can be used with A3 machines as well.
|
|
530
621
|
|
|
622
|
+
In order to run NCCL test on A3 Ultra machines check out [this guide](/examples/nccl/nccl.md).
|
|
623
|
+
|
|
531
624
|
### Workload Priority and Preemption
|
|
532
625
|
* Set the priority level of your workload with `--priority=LEVEL`
|
|
533
626
|
|
|
@@ -666,8 +759,6 @@ Check out [MaxText example](https://github.com/google/maxtext/pull/570) on how t
|
|
|
666
759
|
```
|
|
667
760
|
|
|
668
761
|
* Workload List supports waiting for the completion of a specific job. XPK will follow an existing job until it has finished or the `timeout`, if provided, has been reached and then list the job. If no `timeout` is specified, the default value is set to the max value, 1 week. You may also set `timeout=0` to poll the job once.
|
|
669
|
-
(Note: `restart-on-user-code-failure` must be set
|
|
670
|
-
when creating the workload otherwise the workload will always finish with `Completed` status.)
|
|
671
762
|
|
|
672
763
|
Wait for a job to complete.
|
|
673
764
|
|
|
@@ -759,6 +850,35 @@ Inspector output is saved to a file.
|
|
|
759
850
|
[XPK] Exiting XPK cleanly
|
|
760
851
|
```
|
|
761
852
|
|
|
853
|
+
## Run
|
|
854
|
+
* `xpk run` lets you execute scripts on a cluster with ease. It automates task execution, handles interruptions, and streams job output to your console.
|
|
855
|
+
|
|
856
|
+
```shell
|
|
857
|
+
python xpk.py run --kind-cluster -n 2 -t 0-2 examples/job.sh
|
|
858
|
+
```
|
|
859
|
+
|
|
860
|
+
* Example Output:
|
|
861
|
+
|
|
862
|
+
```shell
|
|
863
|
+
[XPK] Starting xpk
|
|
864
|
+
[XPK] Task: `get current-context` is implemented by `kubectl config current-context`, hiding output unless there is an error.
|
|
865
|
+
[XPK] No local cluster name specified. Using current-context `kind-kind`
|
|
866
|
+
[XPK] Task: `run task` is implemented by `kubectl kjob create slurm --profile xpk-def-app-profile --localqueue multislice-queue --wait --rm -- examples/job.sh --partition multislice-queue --ntasks 2 --time 0-2`. Streaming output and input live.
|
|
867
|
+
job.batch/xpk-def-app-profile-slurm-g4vr6 created
|
|
868
|
+
configmap/xpk-def-app-profile-slurm-g4vr6 created
|
|
869
|
+
service/xpk-def-app-profile-slurm-g4vr6 created
|
|
870
|
+
Starting log streaming for pod xpk-def-app-profile-slurm-g4vr6-1-4rmgk...
|
|
871
|
+
Now processing task ID: 3
|
|
872
|
+
Starting log streaming for pod xpk-def-app-profile-slurm-g4vr6-0-bg6dm...
|
|
873
|
+
Now processing task ID: 1
|
|
874
|
+
exit
|
|
875
|
+
exit
|
|
876
|
+
Now processing task ID: 2
|
|
877
|
+
exit
|
|
878
|
+
Job logs streaming finished.[XPK] Task: `run task` terminated with code `0`
|
|
879
|
+
[XPK] XPK Done.
|
|
880
|
+
```
|
|
881
|
+
|
|
762
882
|
## GPU usage
|
|
763
883
|
|
|
764
884
|
In order to use XPK for GPU, you can do so by using `device-type` flag.
|
|
@@ -1241,6 +1361,8 @@ gcloud beta compute reservations describe $RESERVATION --project=$PROJECT_ID --z
|
|
|
1241
1361
|
|
|
1242
1362
|
## 403 error on workload create when using `--base-docker-image` flag
|
|
1243
1363
|
You need authority to push to the registry from your local machine. Try running `gcloud auth configure-docker`.
|
|
1364
|
+
## `Kubernetes API exception` - 404 error
|
|
1365
|
+
If error of this kind appeared after updating xpk version it's possible that you need to rerun `cluster create` command in order to update resource definitions.
|
|
1244
1366
|
|
|
1245
1367
|
# TPU Workload Debugging
|
|
1246
1368
|
|
|
@@ -30,12 +30,17 @@ keywords = []
|
|
|
30
30
|
|
|
31
31
|
# pip dependencies installed with `pip install -e .`
|
|
32
32
|
dependencies = [
|
|
33
|
-
"cloud-accelerator-diagnostics",
|
|
34
|
-
"tabulate",
|
|
35
|
-
"ruamel.yaml",
|
|
36
|
-
"pyyaml",
|
|
37
|
-
"docker",
|
|
38
|
-
"
|
|
33
|
+
"cloud-accelerator-diagnostics==0.1.1",
|
|
34
|
+
"tabulate==0.9.0",
|
|
35
|
+
"ruamel.yaml==0.18.10",
|
|
36
|
+
"pyyaml==6.0.2",
|
|
37
|
+
"docker==7.1.0",
|
|
38
|
+
"kubernetes==31.0.0",
|
|
39
|
+
"google-cloud==0.34.0",
|
|
40
|
+
"google-api-core==2.24.1",
|
|
41
|
+
"packaging==24.2",
|
|
42
|
+
"google-cloud-filestore==1.12.0",
|
|
43
|
+
"google-cloud-storage==2.19.0"
|
|
39
44
|
]
|
|
40
45
|
|
|
41
46
|
[project.urls]
|
|
@@ -57,15 +62,16 @@ dev = [
|
|
|
57
62
|
"pylint>=2.6.0",
|
|
58
63
|
"pre-commit",
|
|
59
64
|
"pytest",
|
|
60
|
-
"docker"
|
|
65
|
+
"docker==7.1.0"
|
|
61
66
|
]
|
|
62
67
|
|
|
63
68
|
[tool.setuptools.dynamic]
|
|
64
|
-
version = {attr = "xpk.core.
|
|
69
|
+
version = {attr = "xpk.core.config.__version__"}
|
|
65
70
|
|
|
66
71
|
[tool.setuptools]
|
|
67
|
-
packages = ["xpk", "xpk.parser", "xpk.core", "xpk.commands", "xpk.utils", "xpk.core.blueprint", "xpk.core.workload_decorators"]
|
|
72
|
+
packages = ["xpk", "xpk.parser", "xpk.core", "xpk.commands", "xpk.api", "xpk.templates", "xpk.utils", "xpk.core.blueprint", "xpk.core.remote_state", "xpk.core.workload_decorators"]
|
|
68
73
|
package-dir = {"" = "src"}
|
|
74
|
+
package-data = {"xpk.api" = ["storage_crd.yaml"], "xpk.templates" = ["storage.yaml"]}
|
|
69
75
|
|
|
70
76
|
[tool.pyink]
|
|
71
77
|
# Formatting configuration to follow Google style-guide.
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
apiVersion: apiextensions.k8s.io/v1
|
|
2
|
+
kind: CustomResourceDefinition
|
|
3
|
+
metadata:
|
|
4
|
+
name: storages.xpk.x-k8s.io
|
|
5
|
+
spec:
|
|
6
|
+
group: xpk.x-k8s.io
|
|
7
|
+
versions:
|
|
8
|
+
- name: v1
|
|
9
|
+
served: true
|
|
10
|
+
storage: true
|
|
11
|
+
schema:
|
|
12
|
+
openAPIV3Schema:
|
|
13
|
+
type: object
|
|
14
|
+
properties:
|
|
15
|
+
spec:
|
|
16
|
+
type: object
|
|
17
|
+
properties:
|
|
18
|
+
type:
|
|
19
|
+
type: string
|
|
20
|
+
cluster:
|
|
21
|
+
type: string
|
|
22
|
+
auto_mount:
|
|
23
|
+
type: boolean
|
|
24
|
+
mount_point:
|
|
25
|
+
type: string
|
|
26
|
+
readonly:
|
|
27
|
+
type: boolean
|
|
28
|
+
manifest:
|
|
29
|
+
type: string
|
|
30
|
+
pv:
|
|
31
|
+
type: string
|
|
32
|
+
pvc:
|
|
33
|
+
type: string
|
|
34
|
+
required:
|
|
35
|
+
- type
|
|
36
|
+
- cluster
|
|
37
|
+
- auto_mount
|
|
38
|
+
- mount_point
|
|
39
|
+
- readonly
|
|
40
|
+
- manifest
|
|
41
|
+
- pvc
|
|
42
|
+
- pv
|
|
43
|
+
x-kubernetes-validations:
|
|
44
|
+
- message: Value is immutable
|
|
45
|
+
rule: self == oldSelf
|
|
46
|
+
scope: Cluster
|
|
47
|
+
names:
|
|
48
|
+
plural: storages
|
|
49
|
+
singular: storage
|
|
50
|
+
kind: Storage
|
|
51
|
+
shortNames:
|
|
52
|
+
- stg
|