xpk 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xpk/__init__.py +15 -0
- xpk/api/__init__.py +15 -0
- xpk/api/storage_crd.yaml +52 -0
- xpk/commands/__init__.py +15 -0
- xpk/commands/batch.py +131 -0
- xpk/commands/cluster.py +808 -0
- xpk/commands/cluster_gcluster.py +269 -0
- xpk/commands/common.py +44 -0
- xpk/commands/config.py +29 -0
- xpk/commands/info.py +243 -0
- xpk/commands/inspector.py +357 -0
- xpk/commands/job.py +199 -0
- xpk/commands/kind.py +283 -0
- xpk/commands/kjob_common.py +44 -0
- xpk/commands/run.py +128 -0
- xpk/commands/shell.py +140 -0
- xpk/commands/storage.py +267 -0
- xpk/commands/version.py +27 -0
- xpk/commands/workload.py +889 -0
- xpk/core/__init__.py +15 -0
- xpk/core/blueprint/__init__.py +15 -0
- xpk/core/blueprint/blueprint_definitions.py +62 -0
- xpk/core/blueprint/blueprint_generator.py +708 -0
- xpk/core/capacity.py +185 -0
- xpk/core/cluster.py +564 -0
- xpk/core/cluster_private.py +200 -0
- xpk/core/commands.py +356 -0
- xpk/core/config.py +179 -0
- xpk/core/docker_container.py +225 -0
- xpk/core/docker_image.py +210 -0
- xpk/core/docker_manager.py +308 -0
- xpk/core/docker_resources.py +350 -0
- xpk/core/filestore.py +251 -0
- xpk/core/gcloud_context.py +196 -0
- xpk/core/gcluster_manager.py +176 -0
- xpk/core/gcsfuse.py +50 -0
- xpk/core/kjob.py +444 -0
- xpk/core/kueue.py +358 -0
- xpk/core/monitoring.py +134 -0
- xpk/core/nap.py +361 -0
- xpk/core/network.py +377 -0
- xpk/core/nodepool.py +581 -0
- xpk/core/pathways.py +377 -0
- xpk/core/ray.py +222 -0
- xpk/core/remote_state/__init__.py +15 -0
- xpk/core/remote_state/fuse_remote_state.py +99 -0
- xpk/core/remote_state/remote_state_client.py +38 -0
- xpk/core/resources.py +238 -0
- xpk/core/scheduling.py +253 -0
- xpk/core/storage.py +581 -0
- xpk/core/system_characteristics.py +1432 -0
- xpk/core/vertex.py +105 -0
- xpk/core/workload.py +341 -0
- xpk/core/workload_decorators/__init__.py +15 -0
- xpk/core/workload_decorators/rdma_decorator.py +129 -0
- xpk/core/workload_decorators/storage_decorator.py +52 -0
- xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
- xpk/main.py +75 -0
- xpk/parser/__init__.py +15 -0
- xpk/parser/batch.py +43 -0
- xpk/parser/cluster.py +662 -0
- xpk/parser/common.py +259 -0
- xpk/parser/config.py +49 -0
- xpk/parser/core.py +135 -0
- xpk/parser/info.py +64 -0
- xpk/parser/inspector.py +65 -0
- xpk/parser/job.py +147 -0
- xpk/parser/kind.py +95 -0
- xpk/parser/run.py +47 -0
- xpk/parser/shell.py +59 -0
- xpk/parser/storage.py +316 -0
- xpk/parser/validators.py +39 -0
- xpk/parser/version.py +23 -0
- xpk/parser/workload.py +726 -0
- xpk/templates/__init__.py +15 -0
- xpk/templates/storage.yaml +13 -0
- xpk/utils/__init__.py +15 -0
- xpk/utils/console.py +55 -0
- xpk/utils/file.py +82 -0
- xpk/utils/gcs_utils.py +125 -0
- xpk/utils/kubectl.py +57 -0
- xpk/utils/network.py +168 -0
- xpk/utils/objects.py +88 -0
- xpk/utils/templates.py +28 -0
- xpk/utils/validation.py +80 -0
- xpk/utils/yaml.py +30 -0
- xpk-0.0.1.dist-info/LICENSE +202 -0
- xpk-0.0.1.dist-info/METADATA +1498 -0
- xpk-0.0.1.dist-info/RECORD +92 -0
- xpk-0.0.1.dist-info/WHEEL +5 -0
- xpk-0.0.1.dist-info/entry_points.txt +2 -0
- xpk-0.0.1.dist-info/top_level.txt +1 -0
xpk/core/filestore.py
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from enum import Enum
|
|
18
|
+
|
|
19
|
+
from google.cloud import filestore_v1
|
|
20
|
+
from google.cloud.exceptions import GoogleCloudError
|
|
21
|
+
from google.cloud.filestore_v1.types import (
|
|
22
|
+
FileShareConfig,
|
|
23
|
+
Instance,
|
|
24
|
+
NetworkConfig,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
from ..utils import templates
|
|
28
|
+
from ..utils.console import xpk_exit, xpk_print
|
|
29
|
+
from .cluster import zone_to_region
|
|
30
|
+
|
|
31
|
+
FS_PV_PATH = "/../templates/filestore-pv.yaml"
|
|
32
|
+
FS_PVC_PATH = "/../templates/filestore-pvc.yaml"
|
|
33
|
+
FS_SC_PATH = "/../templates/filestore-sc.yaml"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class Availability(Enum):
|
|
37
|
+
ZONAL = "Zonal"
|
|
38
|
+
REGIONAL = "Regional"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
TIERS = {
|
|
42
|
+
"BASIC_HDD": Availability.ZONAL,
|
|
43
|
+
"BASIC_SSD": Availability.ZONAL,
|
|
44
|
+
"ZONAL": Availability.ZONAL,
|
|
45
|
+
"REGIONAL": Availability.REGIONAL,
|
|
46
|
+
"ENTERPRISE": Availability.REGIONAL,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def get_storage_class_name(storage_name: str) -> str:
|
|
51
|
+
return f"{storage_name}-sc"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def get_pv_name(storage_name: str) -> str:
|
|
55
|
+
return f"{storage_name}-pv"
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def get_pvc_name(storage_name: str) -> str:
|
|
59
|
+
return f"{storage_name}-pvc"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FilestoreClient:
|
|
63
|
+
"""FilestoreClient is a class for interacting with GCP filestore instances."""
|
|
64
|
+
|
|
65
|
+
def __init__(
|
|
66
|
+
self,
|
|
67
|
+
zone: str,
|
|
68
|
+
name: str,
|
|
69
|
+
project: str,
|
|
70
|
+
) -> None:
|
|
71
|
+
self.zone = zone
|
|
72
|
+
self.region = zone_to_region(zone)
|
|
73
|
+
self.name = name
|
|
74
|
+
self.project = project
|
|
75
|
+
self._client = filestore_v1.CloudFilestoreManagerClient()
|
|
76
|
+
self.instance: Instance | None = None
|
|
77
|
+
|
|
78
|
+
def get_instance(self) -> Instance | None:
|
|
79
|
+
"""Get existing Filestore instance"""
|
|
80
|
+
parentZonal = self.get_parent(self.zone)
|
|
81
|
+
parentRegional = self.get_parent(self.region)
|
|
82
|
+
reqZonal = filestore_v1.ListInstancesRequest(parent=parentZonal)
|
|
83
|
+
reqRegional = filestore_v1.ListInstancesRequest(parent=parentRegional)
|
|
84
|
+
try:
|
|
85
|
+
instancesZonal = self._client.list_instances(reqZonal)
|
|
86
|
+
instancesRegional = self._client.list_instances(reqRegional)
|
|
87
|
+
except GoogleCloudError as e:
|
|
88
|
+
xpk_print(f"Exception while trying to list instances {e}")
|
|
89
|
+
xpk_exit(1)
|
|
90
|
+
|
|
91
|
+
fullname_zonal = self.get_instance_fullname(self.zone)
|
|
92
|
+
fullname_regional = self.get_instance_fullname(self.region)
|
|
93
|
+
|
|
94
|
+
for instance in instancesZonal:
|
|
95
|
+
if instance.name == fullname_zonal:
|
|
96
|
+
return instance # pytype: disable=bad-return-type
|
|
97
|
+
|
|
98
|
+
for instance in instancesRegional:
|
|
99
|
+
if instance.name == fullname_regional:
|
|
100
|
+
return instance # pytype: disable=bad-return-type
|
|
101
|
+
|
|
102
|
+
def check_instance_exists(self) -> bool:
|
|
103
|
+
"""Check if Filestore instance exists"""
|
|
104
|
+
instance = self.get_instance()
|
|
105
|
+
return instance is not None
|
|
106
|
+
|
|
107
|
+
def load_instance(self) -> None:
|
|
108
|
+
if self.instance is None:
|
|
109
|
+
self.instance = self.get_instance()
|
|
110
|
+
|
|
111
|
+
def get_instance_location(self) -> str:
|
|
112
|
+
"""Get Filestore instance's location"""
|
|
113
|
+
self.load_instance()
|
|
114
|
+
return str(self.instance.name.split("/")[3])
|
|
115
|
+
|
|
116
|
+
def create_instance(
|
|
117
|
+
self,
|
|
118
|
+
vol: str,
|
|
119
|
+
size: int,
|
|
120
|
+
tier: str,
|
|
121
|
+
connect_mode=None,
|
|
122
|
+
reserved_ip_range=None,
|
|
123
|
+
network: str = "default",
|
|
124
|
+
description: str = "XPK created filestore instance",
|
|
125
|
+
kms_key_name=None,
|
|
126
|
+
source_backup=None,
|
|
127
|
+
nfs_export_options=None,
|
|
128
|
+
modes=None,
|
|
129
|
+
) -> None:
|
|
130
|
+
"""Create new Filestore instance"""
|
|
131
|
+
|
|
132
|
+
location = (
|
|
133
|
+
self.zone
|
|
134
|
+
if TIERS[tier].value == Availability.ZONAL.value
|
|
135
|
+
else self.region
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
file_shares = [
|
|
139
|
+
FileShareConfig(
|
|
140
|
+
name=vol,
|
|
141
|
+
capacity_gb=size,
|
|
142
|
+
source_backup=source_backup,
|
|
143
|
+
nfs_export_options=nfs_export_options,
|
|
144
|
+
)
|
|
145
|
+
]
|
|
146
|
+
networks = [
|
|
147
|
+
NetworkConfig(
|
|
148
|
+
network=network,
|
|
149
|
+
modes=modes,
|
|
150
|
+
reserved_ip_range=reserved_ip_range,
|
|
151
|
+
connect_mode=connect_mode,
|
|
152
|
+
)
|
|
153
|
+
]
|
|
154
|
+
request = filestore_v1.CreateInstanceRequest(
|
|
155
|
+
parent=self.get_parent(location),
|
|
156
|
+
instance_id=self.name,
|
|
157
|
+
instance=Instance(
|
|
158
|
+
description=description,
|
|
159
|
+
tier=tier,
|
|
160
|
+
kms_key_name=kms_key_name,
|
|
161
|
+
file_shares=file_shares,
|
|
162
|
+
networks=networks,
|
|
163
|
+
),
|
|
164
|
+
)
|
|
165
|
+
# Make the request
|
|
166
|
+
operation = self._client.create_instance(request=request)
|
|
167
|
+
xpk_print("Waiting for filestore creation to complete...")
|
|
168
|
+
self.instance = None
|
|
169
|
+
try:
|
|
170
|
+
self.instance = operation.result()
|
|
171
|
+
except GoogleCloudError as e:
|
|
172
|
+
xpk_print(f"Error while creating Filestore instance: {e}")
|
|
173
|
+
xpk_exit(1)
|
|
174
|
+
xpk_print(
|
|
175
|
+
f"Filestore instance {self.get_instance_fullname(location)} created"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def delete_filestore_instance(self):
|
|
179
|
+
# Initialize request
|
|
180
|
+
name = self.get_instance_fullname()
|
|
181
|
+
request = filestore_v1.DeleteInstanceRequest(name=name)
|
|
182
|
+
|
|
183
|
+
# Make the request
|
|
184
|
+
operation = self._client.delete_instance(request)
|
|
185
|
+
xpk_print("Waiting for filestore deletion to complete...")
|
|
186
|
+
try:
|
|
187
|
+
operation.result()
|
|
188
|
+
except GoogleCloudError as e:
|
|
189
|
+
xpk_print(f"Error while deleting Filestore instance: {e}")
|
|
190
|
+
xpk_exit(1)
|
|
191
|
+
xpk_print(f"Filestore instance {name} deleted")
|
|
192
|
+
|
|
193
|
+
def create_sc(self, name: str, network: str) -> dict:
|
|
194
|
+
"""Create a yaml representing filestore StorageClass."""
|
|
195
|
+
data = templates.load(FS_SC_PATH)
|
|
196
|
+
data["metadata"]["name"] = get_storage_class_name(name)
|
|
197
|
+
data["parameters"]["tier"] = self.instance.tier.name
|
|
198
|
+
data["parameters"][
|
|
199
|
+
"network"
|
|
200
|
+
] = f"projects/{self.project}/global/networks/{network}"
|
|
201
|
+
return data
|
|
202
|
+
|
|
203
|
+
def create_pv(self, name: str, vol: str, access_mode: str) -> dict:
|
|
204
|
+
"""Create a yaml representing filestore PersistentVolume."""
|
|
205
|
+
data = templates.load(FS_PV_PATH)
|
|
206
|
+
data["metadata"]["name"] = get_pv_name(name)
|
|
207
|
+
data["spec"]["storageClassName"] = get_storage_class_name(name)
|
|
208
|
+
data["spec"]["capacity"]["storage"] = self.instance.file_shares[
|
|
209
|
+
0
|
|
210
|
+
].capacity_gb
|
|
211
|
+
data["spec"]["accessModes"] = [access_mode]
|
|
212
|
+
volumeHandle = f"{self.get_instance_fullname()}/volumes/{vol}"
|
|
213
|
+
data["spec"]["csi"]["volumeHandle"] = volumeHandle
|
|
214
|
+
data["spec"]["csi"]["volumeAttributes"]["ip"] = self.instance.networks[
|
|
215
|
+
0
|
|
216
|
+
].ip_addresses[0]
|
|
217
|
+
data["spec"]["csi"]["volumeAttributes"]["volume"] = vol
|
|
218
|
+
return data
|
|
219
|
+
|
|
220
|
+
def create_pvc(self, name: str, access_mode: str) -> dict:
|
|
221
|
+
"""Create a yaml representing filestore PersistentVolumeClaim."""
|
|
222
|
+
data = templates.load(FS_PVC_PATH)
|
|
223
|
+
data["metadata"]["name"] = get_pvc_name(name)
|
|
224
|
+
data["spec"]["accessModes"] = [access_mode]
|
|
225
|
+
data["spec"]["storageClassName"] = get_storage_class_name(name)
|
|
226
|
+
data["spec"]["volumeName"] = get_pv_name(name)
|
|
227
|
+
data["spec"]["resources"]["requests"]["storage"] = (
|
|
228
|
+
self.instance.file_shares[0].capacity_gb
|
|
229
|
+
)
|
|
230
|
+
return data
|
|
231
|
+
|
|
232
|
+
def manifest(
|
|
233
|
+
self, name: str, vol: str, access_mode: str, network: str
|
|
234
|
+
) -> list[dict]:
|
|
235
|
+
self.load_instance()
|
|
236
|
+
pv = self.create_pv(name, vol, access_mode)
|
|
237
|
+
pvc = self.create_pvc(name, access_mode)
|
|
238
|
+
sc = self.create_sc(name, network)
|
|
239
|
+
return [pv, pvc, sc]
|
|
240
|
+
|
|
241
|
+
def get_parent(self, location: str | None = None) -> str:
|
|
242
|
+
"""Get the Filestore's parent's name"""
|
|
243
|
+
if location is None:
|
|
244
|
+
location = self.get_instance_location()
|
|
245
|
+
return f"projects/{self.project}/locations/{location}"
|
|
246
|
+
|
|
247
|
+
def get_instance_fullname(self, location: str | None = None) -> str:
|
|
248
|
+
"""Get the Filestore's full name"""
|
|
249
|
+
if location is None:
|
|
250
|
+
location = self.get_instance_location()
|
|
251
|
+
return f"projects/{self.project}/locations/{location}/instances/{self.name}"
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2025 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import subprocess
|
|
18
|
+
import sys
|
|
19
|
+
from dataclasses import dataclass
|
|
20
|
+
|
|
21
|
+
from ..utils.console import xpk_print
|
|
22
|
+
from .commands import run_command_for_value
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def get_project():
|
|
26
|
+
"""Get GCE project from `gcloud config get project`.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
The project name.
|
|
30
|
+
"""
|
|
31
|
+
completed_command = subprocess.run(
|
|
32
|
+
['gcloud', 'config', 'get', 'project'], check=True, capture_output=True
|
|
33
|
+
)
|
|
34
|
+
project_outputs = completed_command.stdout.decode().strip().split('\n')
|
|
35
|
+
if len(project_outputs) < 1 or project_outputs[-1] == '':
|
|
36
|
+
sys.exit(
|
|
37
|
+
'You must specify the project in the project flag or set it with'
|
|
38
|
+
" 'gcloud config set project <project>'"
|
|
39
|
+
)
|
|
40
|
+
return project_outputs[
|
|
41
|
+
-1
|
|
42
|
+
] # The project name lives on the last line of the output
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def get_zone():
|
|
46
|
+
"""Get GCE zone from `gcloud config get compute/zone`.
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
The zone name.
|
|
50
|
+
"""
|
|
51
|
+
completed_command = subprocess.run(
|
|
52
|
+
['gcloud', 'config', 'get', 'compute/zone'],
|
|
53
|
+
check=True,
|
|
54
|
+
capture_output=True,
|
|
55
|
+
)
|
|
56
|
+
zone_outputs = completed_command.stdout.decode().strip().split('\n')
|
|
57
|
+
if len(zone_outputs) < 1 or zone_outputs[-1] == '':
|
|
58
|
+
sys.exit(
|
|
59
|
+
"You must specify the zone in the zone flag or set it with 'gcloud"
|
|
60
|
+
" config set compute/zone <zone>'"
|
|
61
|
+
)
|
|
62
|
+
return zone_outputs[-1] # The zone name lives on the last line of the output
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def add_zone_and_project(args):
|
|
66
|
+
"""Obtains the zone and project names from gcloud configs if not defined.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
args: user provided arguments for running the command.
|
|
70
|
+
"""
|
|
71
|
+
if not args.project:
|
|
72
|
+
args.project = get_project()
|
|
73
|
+
if not args.zone:
|
|
74
|
+
args.zone = get_zone()
|
|
75
|
+
xpk_print(f'Working on {args.project} and {args.zone}')
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def zone_to_region(zone) -> str:
|
|
79
|
+
"""Helper function converts zone name to region name.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
zone: zone name.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
The region name.
|
|
86
|
+
"""
|
|
87
|
+
zone_terms = zone.split('-')
|
|
88
|
+
return zone_terms[0] + '-' + zone_terms[1] # pytype: disable=bad-return-type
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
@dataclass
|
|
92
|
+
class GkeServerConfig:
|
|
93
|
+
"""Stores the valid gke versions based on gcloud recommendations."""
|
|
94
|
+
|
|
95
|
+
default_rapid_gke_version: str
|
|
96
|
+
valid_versions: set[str]
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def get_gke_server_config(args) -> tuple[int, GkeServerConfig | None]:
|
|
100
|
+
"""Determine the GKE versions supported by gcloud currently.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
args: user provided arguments for running the command.
|
|
104
|
+
|
|
105
|
+
Returns:
|
|
106
|
+
Tuple of
|
|
107
|
+
int: 0 if successful and 1 otherwise.
|
|
108
|
+
GkeServerConfig: stores valid gke version to use in node pool and cluster.
|
|
109
|
+
"""
|
|
110
|
+
base_command = (
|
|
111
|
+
'gcloud container get-server-config'
|
|
112
|
+
f' --project={args.project} --region={zone_to_region(args.zone)}'
|
|
113
|
+
)
|
|
114
|
+
default_rapid_gke_version_cmd = (
|
|
115
|
+
base_command
|
|
116
|
+
+ ' --flatten="channels" --filter="channels.channel=RAPID"'
|
|
117
|
+
' --format="value(channels.defaultVersion)"'
|
|
118
|
+
)
|
|
119
|
+
valid_versions_cmd = (
|
|
120
|
+
base_command
|
|
121
|
+
+ ' --flatten="channels" --filter="channels.channel=RAPID"'
|
|
122
|
+
' --format="value(channels.validVersions)"'
|
|
123
|
+
)
|
|
124
|
+
base_command_description = 'Determine server supported GKE versions for '
|
|
125
|
+
|
|
126
|
+
server_config_commands_and_descriptions = [
|
|
127
|
+
(
|
|
128
|
+
default_rapid_gke_version_cmd,
|
|
129
|
+
base_command_description + 'default rapid gke version',
|
|
130
|
+
),
|
|
131
|
+
(
|
|
132
|
+
valid_versions_cmd,
|
|
133
|
+
base_command_description + 'valid versions',
|
|
134
|
+
),
|
|
135
|
+
]
|
|
136
|
+
command_outputs = []
|
|
137
|
+
|
|
138
|
+
for command, command_description in server_config_commands_and_descriptions:
|
|
139
|
+
return_code, cmd_output = run_command_for_value(
|
|
140
|
+
command,
|
|
141
|
+
command_description,
|
|
142
|
+
args,
|
|
143
|
+
hide_error=True,
|
|
144
|
+
)
|
|
145
|
+
if return_code != 0:
|
|
146
|
+
xpk_print(f'Unable to get server config for {command_description}.')
|
|
147
|
+
return return_code, None
|
|
148
|
+
command_outputs.append(cmd_output)
|
|
149
|
+
|
|
150
|
+
return 0, GkeServerConfig(
|
|
151
|
+
default_rapid_gke_version=command_outputs[0].strip(),
|
|
152
|
+
valid_versions=set(command_outputs[1].split(';')),
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def get_gke_control_plane_version(
|
|
157
|
+
args, gke_server_config: GkeServerConfig
|
|
158
|
+
) -> tuple[int, str | None]:
|
|
159
|
+
"""Determine gke control plane version for cluster creation.
|
|
160
|
+
|
|
161
|
+
Args:
|
|
162
|
+
args: user provided arguments for running the command.
|
|
163
|
+
gke_server_config: holds valid gke versions and recommended default version.
|
|
164
|
+
|
|
165
|
+
Returns:
|
|
166
|
+
Tuple of
|
|
167
|
+
int: 0 if successful and 1 otherwise.
|
|
168
|
+
str: gke control plane version to use.
|
|
169
|
+
"""
|
|
170
|
+
|
|
171
|
+
# Override with user provide gke version if specified.
|
|
172
|
+
if args.gke_version is not None:
|
|
173
|
+
master_gke_version = args.gke_version
|
|
174
|
+
else:
|
|
175
|
+
master_gke_version = gke_server_config.default_rapid_gke_version
|
|
176
|
+
|
|
177
|
+
is_valid_version = master_gke_version in gke_server_config.valid_versions
|
|
178
|
+
|
|
179
|
+
if not is_valid_version:
|
|
180
|
+
xpk_print(
|
|
181
|
+
f'Planned GKE Version: {master_gke_version}\n Valid Versions:'
|
|
182
|
+
f'\n{gke_server_config.valid_versions}\nRecommended / Default GKE'
|
|
183
|
+
f' Version: {gke_server_config.default_rapid_gke_version}'
|
|
184
|
+
)
|
|
185
|
+
xpk_print(
|
|
186
|
+
f'Error: Planned GKE Version {master_gke_version} is not valid.'
|
|
187
|
+
f'Checks failed: Is Version Valid: {is_valid_version}'
|
|
188
|
+
)
|
|
189
|
+
xpk_print(
|
|
190
|
+
'Please select a gke version from the above list using --gke-version=x'
|
|
191
|
+
' argument or rely on the default gke version:'
|
|
192
|
+
f' {gke_server_config.default_rapid_gke_version}'
|
|
193
|
+
)
|
|
194
|
+
return 1, None
|
|
195
|
+
|
|
196
|
+
return 0, master_gke_version
|
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .docker_manager import CommandRunner
|
|
18
|
+
from ..utils.console import xpk_exit, xpk_print
|
|
19
|
+
from .remote_state.remote_state_client import RemoteStateClient
|
|
20
|
+
|
|
21
|
+
xpk_gcloud_cfg_path = '~/gcloud/cfg'
|
|
22
|
+
xpk_deployment_dir = '/deployment'
|
|
23
|
+
gcluster_deploy_command = 'gcluster deploy'
|
|
24
|
+
gcluster_create_command = 'gcluster create'
|
|
25
|
+
gcluster_destroy_command = 'gcluster destroy'
|
|
26
|
+
blueprint_file_name = 'xpk_blueprint.yaml'
|
|
27
|
+
deployment_module = '/out/xpk-deployment'
|
|
28
|
+
a3_utils_dir_name = 'a3-mega-xpk'
|
|
29
|
+
config_map_repo_path = 'src/xpk/blueprints/a3-mega-xpk/config-map.yaml.tftpl'
|
|
30
|
+
kueue_config_repo_path = (
|
|
31
|
+
'src/xpk/blueprints/a3-mega-xpk/kueue-xpk-configuration.yaml.tftpl'
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class GclusterManager:
|
|
36
|
+
"""Manager is a class responsible for running cluster toolkit commands.
|
|
37
|
+
Attributes:
|
|
38
|
+
- gcluster_command_runner (CommandRunner) : instance of class implementing CommandRunner abstract methods.
|
|
39
|
+
Methods:
|
|
40
|
+
- deploy : run a deployment process of cluster toolkit. This method will invoke gcluster create and than gcluster deploy commands.
|
|
41
|
+
- destroy_deployment : run gcluster command to destroy existing deployment.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
gcluster_command_runner: CommandRunner,
|
|
47
|
+
remote_state_client: RemoteStateClient | None,
|
|
48
|
+
) -> None:
|
|
49
|
+
self.gcluster_command_runner = gcluster_command_runner
|
|
50
|
+
self.remote_state_client = remote_state_client
|
|
51
|
+
|
|
52
|
+
def _run_create_deployment_cmd(
|
|
53
|
+
self, blueprint_container_path: str, prefix: str = ''
|
|
54
|
+
):
|
|
55
|
+
xpk_print('Creating deployment resources...')
|
|
56
|
+
cluster_create_cmd = (
|
|
57
|
+
f'{gcluster_create_command} -o {self._get_deployment_path(prefix)}'
|
|
58
|
+
f' {blueprint_container_path} -w --force'
|
|
59
|
+
)
|
|
60
|
+
self.gcluster_command_runner.run_command(cluster_create_cmd)
|
|
61
|
+
xpk_print('Creating deployment resources completed.')
|
|
62
|
+
|
|
63
|
+
def _run_deploy_cmd(
|
|
64
|
+
self,
|
|
65
|
+
deployment_name: str,
|
|
66
|
+
auto_approve: bool,
|
|
67
|
+
dry_run: bool,
|
|
68
|
+
prefix: str = '',
|
|
69
|
+
):
|
|
70
|
+
xpk_print('Deploying resources...')
|
|
71
|
+
deploy_cmd = (
|
|
72
|
+
f'{gcluster_deploy_command} {self._get_deployment_path(prefix)}/{deployment_name}'
|
|
73
|
+
)
|
|
74
|
+
if auto_approve is True:
|
|
75
|
+
deploy_cmd += ' --auto-approve'
|
|
76
|
+
if dry_run is True:
|
|
77
|
+
return
|
|
78
|
+
self.gcluster_command_runner.run_command(deploy_cmd)
|
|
79
|
+
xpk_print('Deployment completed.')
|
|
80
|
+
|
|
81
|
+
def deploy(
|
|
82
|
+
self,
|
|
83
|
+
blueprint_path: str,
|
|
84
|
+
deployment_name: str,
|
|
85
|
+
prefix: str = '',
|
|
86
|
+
auto_approve: bool = True,
|
|
87
|
+
dry_run: bool = False,
|
|
88
|
+
) -> None:
|
|
89
|
+
""" "deploy method provisions a new cluster using Cluster Toolkit.
|
|
90
|
+
It will invoke gcluster create and then gcluster deploy commands.
|
|
91
|
+
The files staged or created during running gcluster command will be managed by gcluster_command_runner in its working directory."
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
blueprint_path (str): path pointing to blueprint which will be deployed.
|
|
95
|
+
deployment_name (str): name of the deployment.
|
|
96
|
+
auto_approve (bool, optional): If set to true deployment command will be auto approved. Currently only True is supported. Defaults to True.
|
|
97
|
+
dry_run (bool, optional): If set to True gcluster will not deploy. Defaults to False.
|
|
98
|
+
Returns:
|
|
99
|
+
None
|
|
100
|
+
"""
|
|
101
|
+
xpk_print(f'Deploying blueprint from path {blueprint_path} ...')
|
|
102
|
+
self._run_create_deployment_cmd(
|
|
103
|
+
blueprint_container_path=blueprint_path, prefix=prefix
|
|
104
|
+
)
|
|
105
|
+
self._run_deploy_cmd(
|
|
106
|
+
deployment_name=deployment_name,
|
|
107
|
+
prefix=prefix,
|
|
108
|
+
auto_approve=auto_approve,
|
|
109
|
+
dry_run=dry_run,
|
|
110
|
+
)
|
|
111
|
+
xpk_print('Deploying blueprint completed!')
|
|
112
|
+
|
|
113
|
+
def _run_destroy_command(
|
|
114
|
+
self,
|
|
115
|
+
deployment_name: str,
|
|
116
|
+
prefix: str = '',
|
|
117
|
+
auto_approve: bool = True,
|
|
118
|
+
dry_run: bool = False,
|
|
119
|
+
):
|
|
120
|
+
destroy_cmd = (
|
|
121
|
+
f'{gcluster_destroy_command} {self._get_deployment_path(prefix)}/{deployment_name}'
|
|
122
|
+
)
|
|
123
|
+
if auto_approve is True:
|
|
124
|
+
destroy_cmd += ' --auto-approve'
|
|
125
|
+
if dry_run is True:
|
|
126
|
+
xpk_print(f'executing command {destroy_cmd}')
|
|
127
|
+
return
|
|
128
|
+
self.gcluster_command_runner.run_command(destroy_cmd)
|
|
129
|
+
|
|
130
|
+
def _get_deployment_path(self, prefix: str = '') -> str:
|
|
131
|
+
prefix = f'/{prefix}' if prefix != '' else ''
|
|
132
|
+
return f'deployments{prefix}'
|
|
133
|
+
|
|
134
|
+
def destroy_deployment(self, deployment_name: str, prefix: str = '') -> None:
|
|
135
|
+
"""Destroy deployment.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
deployment_name (str): name of deployment to destroy.
|
|
139
|
+
"""
|
|
140
|
+
xpk_print(f'Destroying {deployment_name} started...')
|
|
141
|
+
self._run_destroy_command(deployment_name, prefix=prefix)
|
|
142
|
+
xpk_print(f'Destroying {deployment_name} completed!')
|
|
143
|
+
|
|
144
|
+
def stage_files(
|
|
145
|
+
self, blueprint_file: str, blueprint_dependencies: str, prefix: str = ''
|
|
146
|
+
) -> str:
|
|
147
|
+
"""Uploads blueprint file and directory to gcluster working directory."""
|
|
148
|
+
xpk_print(
|
|
149
|
+
"Staging (sending) blueprint file to gcluster's working directory..."
|
|
150
|
+
)
|
|
151
|
+
staged_blueprint = self.gcluster_command_runner.upload_file_to_working_dir(
|
|
152
|
+
blueprint_file, prefix
|
|
153
|
+
)
|
|
154
|
+
if len(blueprint_dependencies) > 0:
|
|
155
|
+
self.gcluster_command_runner.upload_directory_to_working_dir(
|
|
156
|
+
blueprint_dependencies, prefix
|
|
157
|
+
)
|
|
158
|
+
xpk_print('Staging blueprint completed!')
|
|
159
|
+
xpk_print(f"File path in gcluster's working directory: {staged_blueprint}")
|
|
160
|
+
return staged_blueprint
|
|
161
|
+
|
|
162
|
+
def upload_state(self) -> None:
|
|
163
|
+
xpk_print('Uploading state.')
|
|
164
|
+
if self.remote_state_client is None:
|
|
165
|
+
xpk_print('No remote state defined')
|
|
166
|
+
xpk_exit(1)
|
|
167
|
+
self.remote_state_client.upload_state()
|
|
168
|
+
|
|
169
|
+
def download_state(self) -> None:
|
|
170
|
+
if self.remote_state_client is None:
|
|
171
|
+
xpk_print('No remote state defined')
|
|
172
|
+
xpk_exit(1)
|
|
173
|
+
|
|
174
|
+
if self.remote_state_client.check_remote_state_exists():
|
|
175
|
+
self.remote_state_client.download_state()
|
|
176
|
+
xpk_print('Remote state not found.')
|
xpk/core/gcsfuse.py
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright 2024 Google LLC
|
|
3
|
+
|
|
4
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
5
|
+
you may not use this file except in compliance with the License.
|
|
6
|
+
You may obtain a copy of the License at
|
|
7
|
+
|
|
8
|
+
https://www.apache.org/licenses/LICENSE-2.0
|
|
9
|
+
|
|
10
|
+
Unless required by applicable law or agreed to in writing, software
|
|
11
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
12
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
13
|
+
See the License for the specific language governing permissions and
|
|
14
|
+
limitations under the License.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from ..utils import templates
|
|
18
|
+
|
|
19
|
+
FUSE_PV_PATH = "/../templates/fuse-pv.yaml"
|
|
20
|
+
FUSE_PVC_PATH = "/../templates/fuse-pvc.yaml"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def create_pv(name: str, size: int, bucket: str) -> dict:
|
|
24
|
+
data = templates.load(FUSE_PV_PATH)
|
|
25
|
+
data["metadata"]["name"] = f"{name}-pv"
|
|
26
|
+
data["spec"]["capacity"]["storage"] = f"{size}Gi"
|
|
27
|
+
data["spec"]["csi"]["volumeHandle"] = bucket
|
|
28
|
+
return data
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def create_pvc(name: str, size: int) -> dict:
|
|
32
|
+
data = templates.load(FUSE_PVC_PATH)
|
|
33
|
+
data["metadata"]["name"] = f"{name}-pvc"
|
|
34
|
+
data["spec"]["resources"]["requests"]["storage"] = f"{size}Gi"
|
|
35
|
+
data["spec"]["volumeName"] = f"{name}-pv"
|
|
36
|
+
return data
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def manifest(name: str, bucket: str, size: int) -> list[dict]:
|
|
40
|
+
"""Creates GCS FUSE manifest file.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
path (str): path to the file where the manifest will be created
|
|
44
|
+
name (str): base name of the volumes
|
|
45
|
+
bucket (str): name of the storage bucket
|
|
46
|
+
size (str): size of the storage
|
|
47
|
+
"""
|
|
48
|
+
pv = create_pv(name, size, bucket)
|
|
49
|
+
pvc = create_pvc(name, size)
|
|
50
|
+
return [pv, pvc]
|