xpk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. xpk/__init__.py +15 -0
  2. xpk/api/__init__.py +15 -0
  3. xpk/api/storage_crd.yaml +52 -0
  4. xpk/commands/__init__.py +15 -0
  5. xpk/commands/batch.py +131 -0
  6. xpk/commands/cluster.py +808 -0
  7. xpk/commands/cluster_gcluster.py +269 -0
  8. xpk/commands/common.py +44 -0
  9. xpk/commands/config.py +29 -0
  10. xpk/commands/info.py +243 -0
  11. xpk/commands/inspector.py +357 -0
  12. xpk/commands/job.py +199 -0
  13. xpk/commands/kind.py +283 -0
  14. xpk/commands/kjob_common.py +44 -0
  15. xpk/commands/run.py +128 -0
  16. xpk/commands/shell.py +140 -0
  17. xpk/commands/storage.py +267 -0
  18. xpk/commands/version.py +27 -0
  19. xpk/commands/workload.py +889 -0
  20. xpk/core/__init__.py +15 -0
  21. xpk/core/blueprint/__init__.py +15 -0
  22. xpk/core/blueprint/blueprint_definitions.py +62 -0
  23. xpk/core/blueprint/blueprint_generator.py +708 -0
  24. xpk/core/capacity.py +185 -0
  25. xpk/core/cluster.py +564 -0
  26. xpk/core/cluster_private.py +200 -0
  27. xpk/core/commands.py +356 -0
  28. xpk/core/config.py +179 -0
  29. xpk/core/docker_container.py +225 -0
  30. xpk/core/docker_image.py +210 -0
  31. xpk/core/docker_manager.py +308 -0
  32. xpk/core/docker_resources.py +350 -0
  33. xpk/core/filestore.py +251 -0
  34. xpk/core/gcloud_context.py +196 -0
  35. xpk/core/gcluster_manager.py +176 -0
  36. xpk/core/gcsfuse.py +50 -0
  37. xpk/core/kjob.py +444 -0
  38. xpk/core/kueue.py +358 -0
  39. xpk/core/monitoring.py +134 -0
  40. xpk/core/nap.py +361 -0
  41. xpk/core/network.py +377 -0
  42. xpk/core/nodepool.py +581 -0
  43. xpk/core/pathways.py +377 -0
  44. xpk/core/ray.py +222 -0
  45. xpk/core/remote_state/__init__.py +15 -0
  46. xpk/core/remote_state/fuse_remote_state.py +99 -0
  47. xpk/core/remote_state/remote_state_client.py +38 -0
  48. xpk/core/resources.py +238 -0
  49. xpk/core/scheduling.py +253 -0
  50. xpk/core/storage.py +581 -0
  51. xpk/core/system_characteristics.py +1432 -0
  52. xpk/core/vertex.py +105 -0
  53. xpk/core/workload.py +341 -0
  54. xpk/core/workload_decorators/__init__.py +15 -0
  55. xpk/core/workload_decorators/rdma_decorator.py +129 -0
  56. xpk/core/workload_decorators/storage_decorator.py +52 -0
  57. xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
  58. xpk/main.py +75 -0
  59. xpk/parser/__init__.py +15 -0
  60. xpk/parser/batch.py +43 -0
  61. xpk/parser/cluster.py +662 -0
  62. xpk/parser/common.py +259 -0
  63. xpk/parser/config.py +49 -0
  64. xpk/parser/core.py +135 -0
  65. xpk/parser/info.py +64 -0
  66. xpk/parser/inspector.py +65 -0
  67. xpk/parser/job.py +147 -0
  68. xpk/parser/kind.py +95 -0
  69. xpk/parser/run.py +47 -0
  70. xpk/parser/shell.py +59 -0
  71. xpk/parser/storage.py +316 -0
  72. xpk/parser/validators.py +39 -0
  73. xpk/parser/version.py +23 -0
  74. xpk/parser/workload.py +726 -0
  75. xpk/templates/__init__.py +15 -0
  76. xpk/templates/storage.yaml +13 -0
  77. xpk/utils/__init__.py +15 -0
  78. xpk/utils/console.py +55 -0
  79. xpk/utils/file.py +82 -0
  80. xpk/utils/gcs_utils.py +125 -0
  81. xpk/utils/kubectl.py +57 -0
  82. xpk/utils/network.py +168 -0
  83. xpk/utils/objects.py +88 -0
  84. xpk/utils/templates.py +28 -0
  85. xpk/utils/validation.py +80 -0
  86. xpk/utils/yaml.py +30 -0
  87. xpk-0.0.1.dist-info/LICENSE +202 -0
  88. xpk-0.0.1.dist-info/METADATA +1498 -0
  89. xpk-0.0.1.dist-info/RECORD +92 -0
  90. xpk-0.0.1.dist-info/WHEEL +5 -0
  91. xpk-0.0.1.dist-info/entry_points.txt +2 -0
  92. xpk-0.0.1.dist-info/top_level.txt +1 -0
xpk/core/filestore.py ADDED
@@ -0,0 +1,251 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from enum import Enum
18
+
19
+ from google.cloud import filestore_v1
20
+ from google.cloud.exceptions import GoogleCloudError
21
+ from google.cloud.filestore_v1.types import (
22
+ FileShareConfig,
23
+ Instance,
24
+ NetworkConfig,
25
+ )
26
+
27
+ from ..utils import templates
28
+ from ..utils.console import xpk_exit, xpk_print
29
+ from .cluster import zone_to_region
30
+
31
+ FS_PV_PATH = "/../templates/filestore-pv.yaml"
32
+ FS_PVC_PATH = "/../templates/filestore-pvc.yaml"
33
+ FS_SC_PATH = "/../templates/filestore-sc.yaml"
34
+
35
+
36
+ class Availability(Enum):
37
+ ZONAL = "Zonal"
38
+ REGIONAL = "Regional"
39
+
40
+
41
+ TIERS = {
42
+ "BASIC_HDD": Availability.ZONAL,
43
+ "BASIC_SSD": Availability.ZONAL,
44
+ "ZONAL": Availability.ZONAL,
45
+ "REGIONAL": Availability.REGIONAL,
46
+ "ENTERPRISE": Availability.REGIONAL,
47
+ }
48
+
49
+
50
+ def get_storage_class_name(storage_name: str) -> str:
51
+ return f"{storage_name}-sc"
52
+
53
+
54
+ def get_pv_name(storage_name: str) -> str:
55
+ return f"{storage_name}-pv"
56
+
57
+
58
+ def get_pvc_name(storage_name: str) -> str:
59
+ return f"{storage_name}-pvc"
60
+
61
+
62
+ class FilestoreClient:
63
+ """FilestoreClient is a class for interacting with GCP filestore instances."""
64
+
65
+ def __init__(
66
+ self,
67
+ zone: str,
68
+ name: str,
69
+ project: str,
70
+ ) -> None:
71
+ self.zone = zone
72
+ self.region = zone_to_region(zone)
73
+ self.name = name
74
+ self.project = project
75
+ self._client = filestore_v1.CloudFilestoreManagerClient()
76
+ self.instance: Instance | None = None
77
+
78
+ def get_instance(self) -> Instance | None:
79
+ """Get existing Filestore instance"""
80
+ parentZonal = self.get_parent(self.zone)
81
+ parentRegional = self.get_parent(self.region)
82
+ reqZonal = filestore_v1.ListInstancesRequest(parent=parentZonal)
83
+ reqRegional = filestore_v1.ListInstancesRequest(parent=parentRegional)
84
+ try:
85
+ instancesZonal = self._client.list_instances(reqZonal)
86
+ instancesRegional = self._client.list_instances(reqRegional)
87
+ except GoogleCloudError as e:
88
+ xpk_print(f"Exception while trying to list instances {e}")
89
+ xpk_exit(1)
90
+
91
+ fullname_zonal = self.get_instance_fullname(self.zone)
92
+ fullname_regional = self.get_instance_fullname(self.region)
93
+
94
+ for instance in instancesZonal:
95
+ if instance.name == fullname_zonal:
96
+ return instance # pytype: disable=bad-return-type
97
+
98
+ for instance in instancesRegional:
99
+ if instance.name == fullname_regional:
100
+ return instance # pytype: disable=bad-return-type
101
+
102
+ def check_instance_exists(self) -> bool:
103
+ """Check if Filestore instance exists"""
104
+ instance = self.get_instance()
105
+ return instance is not None
106
+
107
+ def load_instance(self) -> None:
108
+ if self.instance is None:
109
+ self.instance = self.get_instance()
110
+
111
+ def get_instance_location(self) -> str:
112
+ """Get Filestore instance's location"""
113
+ self.load_instance()
114
+ return str(self.instance.name.split("/")[3])
115
+
116
+ def create_instance(
117
+ self,
118
+ vol: str,
119
+ size: int,
120
+ tier: str,
121
+ connect_mode=None,
122
+ reserved_ip_range=None,
123
+ network: str = "default",
124
+ description: str = "XPK created filestore instance",
125
+ kms_key_name=None,
126
+ source_backup=None,
127
+ nfs_export_options=None,
128
+ modes=None,
129
+ ) -> None:
130
+ """Create new Filestore instance"""
131
+
132
+ location = (
133
+ self.zone
134
+ if TIERS[tier].value == Availability.ZONAL.value
135
+ else self.region
136
+ )
137
+
138
+ file_shares = [
139
+ FileShareConfig(
140
+ name=vol,
141
+ capacity_gb=size,
142
+ source_backup=source_backup,
143
+ nfs_export_options=nfs_export_options,
144
+ )
145
+ ]
146
+ networks = [
147
+ NetworkConfig(
148
+ network=network,
149
+ modes=modes,
150
+ reserved_ip_range=reserved_ip_range,
151
+ connect_mode=connect_mode,
152
+ )
153
+ ]
154
+ request = filestore_v1.CreateInstanceRequest(
155
+ parent=self.get_parent(location),
156
+ instance_id=self.name,
157
+ instance=Instance(
158
+ description=description,
159
+ tier=tier,
160
+ kms_key_name=kms_key_name,
161
+ file_shares=file_shares,
162
+ networks=networks,
163
+ ),
164
+ )
165
+ # Make the request
166
+ operation = self._client.create_instance(request=request)
167
+ xpk_print("Waiting for filestore creation to complete...")
168
+ self.instance = None
169
+ try:
170
+ self.instance = operation.result()
171
+ except GoogleCloudError as e:
172
+ xpk_print(f"Error while creating Filestore instance: {e}")
173
+ xpk_exit(1)
174
+ xpk_print(
175
+ f"Filestore instance {self.get_instance_fullname(location)} created"
176
+ )
177
+
178
+ def delete_filestore_instance(self):
179
+ # Initialize request
180
+ name = self.get_instance_fullname()
181
+ request = filestore_v1.DeleteInstanceRequest(name=name)
182
+
183
+ # Make the request
184
+ operation = self._client.delete_instance(request)
185
+ xpk_print("Waiting for filestore deletion to complete...")
186
+ try:
187
+ operation.result()
188
+ except GoogleCloudError as e:
189
+ xpk_print(f"Error while deleting Filestore instance: {e}")
190
+ xpk_exit(1)
191
+ xpk_print(f"Filestore instance {name} deleted")
192
+
193
+ def create_sc(self, name: str, network: str) -> dict:
194
+ """Create a yaml representing filestore StorageClass."""
195
+ data = templates.load(FS_SC_PATH)
196
+ data["metadata"]["name"] = get_storage_class_name(name)
197
+ data["parameters"]["tier"] = self.instance.tier.name
198
+ data["parameters"][
199
+ "network"
200
+ ] = f"projects/{self.project}/global/networks/{network}"
201
+ return data
202
+
203
+ def create_pv(self, name: str, vol: str, access_mode: str) -> dict:
204
+ """Create a yaml representing filestore PersistentVolume."""
205
+ data = templates.load(FS_PV_PATH)
206
+ data["metadata"]["name"] = get_pv_name(name)
207
+ data["spec"]["storageClassName"] = get_storage_class_name(name)
208
+ data["spec"]["capacity"]["storage"] = self.instance.file_shares[
209
+ 0
210
+ ].capacity_gb
211
+ data["spec"]["accessModes"] = [access_mode]
212
+ volumeHandle = f"{self.get_instance_fullname()}/volumes/{vol}"
213
+ data["spec"]["csi"]["volumeHandle"] = volumeHandle
214
+ data["spec"]["csi"]["volumeAttributes"]["ip"] = self.instance.networks[
215
+ 0
216
+ ].ip_addresses[0]
217
+ data["spec"]["csi"]["volumeAttributes"]["volume"] = vol
218
+ return data
219
+
220
+ def create_pvc(self, name: str, access_mode: str) -> dict:
221
+ """Create a yaml representing filestore PersistentVolumeClaim."""
222
+ data = templates.load(FS_PVC_PATH)
223
+ data["metadata"]["name"] = get_pvc_name(name)
224
+ data["spec"]["accessModes"] = [access_mode]
225
+ data["spec"]["storageClassName"] = get_storage_class_name(name)
226
+ data["spec"]["volumeName"] = get_pv_name(name)
227
+ data["spec"]["resources"]["requests"]["storage"] = (
228
+ self.instance.file_shares[0].capacity_gb
229
+ )
230
+ return data
231
+
232
+ def manifest(
233
+ self, name: str, vol: str, access_mode: str, network: str
234
+ ) -> list[dict]:
235
+ self.load_instance()
236
+ pv = self.create_pv(name, vol, access_mode)
237
+ pvc = self.create_pvc(name, access_mode)
238
+ sc = self.create_sc(name, network)
239
+ return [pv, pvc, sc]
240
+
241
+ def get_parent(self, location: str | None = None) -> str:
242
+ """Get the Filestore's parent's name"""
243
+ if location is None:
244
+ location = self.get_instance_location()
245
+ return f"projects/{self.project}/locations/{location}"
246
+
247
+ def get_instance_fullname(self, location: str | None = None) -> str:
248
+ """Get the Filestore's full name"""
249
+ if location is None:
250
+ location = self.get_instance_location()
251
+ return f"projects/{self.project}/locations/{location}/instances/{self.name}"
@@ -0,0 +1,196 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import subprocess
18
+ import sys
19
+ from dataclasses import dataclass
20
+
21
+ from ..utils.console import xpk_print
22
+ from .commands import run_command_for_value
23
+
24
+
25
+ def get_project():
26
+ """Get GCE project from `gcloud config get project`.
27
+
28
+ Returns:
29
+ The project name.
30
+ """
31
+ completed_command = subprocess.run(
32
+ ['gcloud', 'config', 'get', 'project'], check=True, capture_output=True
33
+ )
34
+ project_outputs = completed_command.stdout.decode().strip().split('\n')
35
+ if len(project_outputs) < 1 or project_outputs[-1] == '':
36
+ sys.exit(
37
+ 'You must specify the project in the project flag or set it with'
38
+ " 'gcloud config set project <project>'"
39
+ )
40
+ return project_outputs[
41
+ -1
42
+ ] # The project name lives on the last line of the output
43
+
44
+
45
+ def get_zone():
46
+ """Get GCE zone from `gcloud config get compute/zone`.
47
+
48
+ Returns:
49
+ The zone name.
50
+ """
51
+ completed_command = subprocess.run(
52
+ ['gcloud', 'config', 'get', 'compute/zone'],
53
+ check=True,
54
+ capture_output=True,
55
+ )
56
+ zone_outputs = completed_command.stdout.decode().strip().split('\n')
57
+ if len(zone_outputs) < 1 or zone_outputs[-1] == '':
58
+ sys.exit(
59
+ "You must specify the zone in the zone flag or set it with 'gcloud"
60
+ " config set compute/zone <zone>'"
61
+ )
62
+ return zone_outputs[-1] # The zone name lives on the last line of the output
63
+
64
+
65
+ def add_zone_and_project(args):
66
+ """Obtains the zone and project names from gcloud configs if not defined.
67
+
68
+ Args:
69
+ args: user provided arguments for running the command.
70
+ """
71
+ if not args.project:
72
+ args.project = get_project()
73
+ if not args.zone:
74
+ args.zone = get_zone()
75
+ xpk_print(f'Working on {args.project} and {args.zone}')
76
+
77
+
78
+ def zone_to_region(zone) -> str:
79
+ """Helper function converts zone name to region name.
80
+
81
+ Args:
82
+ zone: zone name.
83
+
84
+ Returns:
85
+ The region name.
86
+ """
87
+ zone_terms = zone.split('-')
88
+ return zone_terms[0] + '-' + zone_terms[1] # pytype: disable=bad-return-type
89
+
90
+
91
+ @dataclass
92
+ class GkeServerConfig:
93
+ """Stores the valid gke versions based on gcloud recommendations."""
94
+
95
+ default_rapid_gke_version: str
96
+ valid_versions: set[str]
97
+
98
+
99
+ def get_gke_server_config(args) -> tuple[int, GkeServerConfig | None]:
100
+ """Determine the GKE versions supported by gcloud currently.
101
+
102
+ Args:
103
+ args: user provided arguments for running the command.
104
+
105
+ Returns:
106
+ Tuple of
107
+ int: 0 if successful and 1 otherwise.
108
+ GkeServerConfig: stores valid gke version to use in node pool and cluster.
109
+ """
110
+ base_command = (
111
+ 'gcloud container get-server-config'
112
+ f' --project={args.project} --region={zone_to_region(args.zone)}'
113
+ )
114
+ default_rapid_gke_version_cmd = (
115
+ base_command
116
+ + ' --flatten="channels" --filter="channels.channel=RAPID"'
117
+ ' --format="value(channels.defaultVersion)"'
118
+ )
119
+ valid_versions_cmd = (
120
+ base_command
121
+ + ' --flatten="channels" --filter="channels.channel=RAPID"'
122
+ ' --format="value(channels.validVersions)"'
123
+ )
124
+ base_command_description = 'Determine server supported GKE versions for '
125
+
126
+ server_config_commands_and_descriptions = [
127
+ (
128
+ default_rapid_gke_version_cmd,
129
+ base_command_description + 'default rapid gke version',
130
+ ),
131
+ (
132
+ valid_versions_cmd,
133
+ base_command_description + 'valid versions',
134
+ ),
135
+ ]
136
+ command_outputs = []
137
+
138
+ for command, command_description in server_config_commands_and_descriptions:
139
+ return_code, cmd_output = run_command_for_value(
140
+ command,
141
+ command_description,
142
+ args,
143
+ hide_error=True,
144
+ )
145
+ if return_code != 0:
146
+ xpk_print(f'Unable to get server config for {command_description}.')
147
+ return return_code, None
148
+ command_outputs.append(cmd_output)
149
+
150
+ return 0, GkeServerConfig(
151
+ default_rapid_gke_version=command_outputs[0].strip(),
152
+ valid_versions=set(command_outputs[1].split(';')),
153
+ )
154
+
155
+
156
+ def get_gke_control_plane_version(
157
+ args, gke_server_config: GkeServerConfig
158
+ ) -> tuple[int, str | None]:
159
+ """Determine gke control plane version for cluster creation.
160
+
161
+ Args:
162
+ args: user provided arguments for running the command.
163
+ gke_server_config: holds valid gke versions and recommended default version.
164
+
165
+ Returns:
166
+ Tuple of
167
+ int: 0 if successful and 1 otherwise.
168
+ str: gke control plane version to use.
169
+ """
170
+
171
+ # Override with user provide gke version if specified.
172
+ if args.gke_version is not None:
173
+ master_gke_version = args.gke_version
174
+ else:
175
+ master_gke_version = gke_server_config.default_rapid_gke_version
176
+
177
+ is_valid_version = master_gke_version in gke_server_config.valid_versions
178
+
179
+ if not is_valid_version:
180
+ xpk_print(
181
+ f'Planned GKE Version: {master_gke_version}\n Valid Versions:'
182
+ f'\n{gke_server_config.valid_versions}\nRecommended / Default GKE'
183
+ f' Version: {gke_server_config.default_rapid_gke_version}'
184
+ )
185
+ xpk_print(
186
+ f'Error: Planned GKE Version {master_gke_version} is not valid.'
187
+ f'Checks failed: Is Version Valid: {is_valid_version}'
188
+ )
189
+ xpk_print(
190
+ 'Please select a gke version from the above list using --gke-version=x'
191
+ ' argument or rely on the default gke version:'
192
+ f' {gke_server_config.default_rapid_gke_version}'
193
+ )
194
+ return 1, None
195
+
196
+ return 0, master_gke_version
@@ -0,0 +1,176 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from .docker_manager import CommandRunner
18
+ from ..utils.console import xpk_exit, xpk_print
19
+ from .remote_state.remote_state_client import RemoteStateClient
20
+
21
+ xpk_gcloud_cfg_path = '~/gcloud/cfg'
22
+ xpk_deployment_dir = '/deployment'
23
+ gcluster_deploy_command = 'gcluster deploy'
24
+ gcluster_create_command = 'gcluster create'
25
+ gcluster_destroy_command = 'gcluster destroy'
26
+ blueprint_file_name = 'xpk_blueprint.yaml'
27
+ deployment_module = '/out/xpk-deployment'
28
+ a3_utils_dir_name = 'a3-mega-xpk'
29
+ config_map_repo_path = 'src/xpk/blueprints/a3-mega-xpk/config-map.yaml.tftpl'
30
+ kueue_config_repo_path = (
31
+ 'src/xpk/blueprints/a3-mega-xpk/kueue-xpk-configuration.yaml.tftpl'
32
+ )
33
+
34
+
35
+ class GclusterManager:
36
+ """Manager is a class responsible for running cluster toolkit commands.
37
+ Attributes:
38
+ - gcluster_command_runner (CommandRunner) : instance of class implementing CommandRunner abstract methods.
39
+ Methods:
40
+ - deploy : run a deployment process of cluster toolkit. This method will invoke gcluster create and than gcluster deploy commands.
41
+ - destroy_deployment : run gcluster command to destroy existing deployment.
42
+ """
43
+
44
+ def __init__(
45
+ self,
46
+ gcluster_command_runner: CommandRunner,
47
+ remote_state_client: RemoteStateClient | None,
48
+ ) -> None:
49
+ self.gcluster_command_runner = gcluster_command_runner
50
+ self.remote_state_client = remote_state_client
51
+
52
+ def _run_create_deployment_cmd(
53
+ self, blueprint_container_path: str, prefix: str = ''
54
+ ):
55
+ xpk_print('Creating deployment resources...')
56
+ cluster_create_cmd = (
57
+ f'{gcluster_create_command} -o {self._get_deployment_path(prefix)}'
58
+ f' {blueprint_container_path} -w --force'
59
+ )
60
+ self.gcluster_command_runner.run_command(cluster_create_cmd)
61
+ xpk_print('Creating deployment resources completed.')
62
+
63
+ def _run_deploy_cmd(
64
+ self,
65
+ deployment_name: str,
66
+ auto_approve: bool,
67
+ dry_run: bool,
68
+ prefix: str = '',
69
+ ):
70
+ xpk_print('Deploying resources...')
71
+ deploy_cmd = (
72
+ f'{gcluster_deploy_command} {self._get_deployment_path(prefix)}/{deployment_name}'
73
+ )
74
+ if auto_approve is True:
75
+ deploy_cmd += ' --auto-approve'
76
+ if dry_run is True:
77
+ return
78
+ self.gcluster_command_runner.run_command(deploy_cmd)
79
+ xpk_print('Deployment completed.')
80
+
81
+ def deploy(
82
+ self,
83
+ blueprint_path: str,
84
+ deployment_name: str,
85
+ prefix: str = '',
86
+ auto_approve: bool = True,
87
+ dry_run: bool = False,
88
+ ) -> None:
89
+ """ "deploy method provisions a new cluster using Cluster Toolkit.
90
+ It will invoke gcluster create and then gcluster deploy commands.
91
+ The files staged or created during running gcluster command will be managed by gcluster_command_runner in its working directory."
92
+
93
+ Args:
94
+ blueprint_path (str): path pointing to blueprint which will be deployed.
95
+ deployment_name (str): name of the deployment.
96
+ auto_approve (bool, optional): If set to true deployment command will be auto approved. Currently only True is supported. Defaults to True.
97
+ dry_run (bool, optional): If set to True gcluster will not deploy. Defaults to False.
98
+ Returns:
99
+ None
100
+ """
101
+ xpk_print(f'Deploying blueprint from path {blueprint_path} ...')
102
+ self._run_create_deployment_cmd(
103
+ blueprint_container_path=blueprint_path, prefix=prefix
104
+ )
105
+ self._run_deploy_cmd(
106
+ deployment_name=deployment_name,
107
+ prefix=prefix,
108
+ auto_approve=auto_approve,
109
+ dry_run=dry_run,
110
+ )
111
+ xpk_print('Deploying blueprint completed!')
112
+
113
+ def _run_destroy_command(
114
+ self,
115
+ deployment_name: str,
116
+ prefix: str = '',
117
+ auto_approve: bool = True,
118
+ dry_run: bool = False,
119
+ ):
120
+ destroy_cmd = (
121
+ f'{gcluster_destroy_command} {self._get_deployment_path(prefix)}/{deployment_name}'
122
+ )
123
+ if auto_approve is True:
124
+ destroy_cmd += ' --auto-approve'
125
+ if dry_run is True:
126
+ xpk_print(f'executing command {destroy_cmd}')
127
+ return
128
+ self.gcluster_command_runner.run_command(destroy_cmd)
129
+
130
+ def _get_deployment_path(self, prefix: str = '') -> str:
131
+ prefix = f'/{prefix}' if prefix != '' else ''
132
+ return f'deployments{prefix}'
133
+
134
+ def destroy_deployment(self, deployment_name: str, prefix: str = '') -> None:
135
+ """Destroy deployment.
136
+
137
+ Args:
138
+ deployment_name (str): name of deployment to destroy.
139
+ """
140
+ xpk_print(f'Destroying {deployment_name} started...')
141
+ self._run_destroy_command(deployment_name, prefix=prefix)
142
+ xpk_print(f'Destroying {deployment_name} completed!')
143
+
144
+ def stage_files(
145
+ self, blueprint_file: str, blueprint_dependencies: str, prefix: str = ''
146
+ ) -> str:
147
+ """Uploads blueprint file and directory to gcluster working directory."""
148
+ xpk_print(
149
+ "Staging (sending) blueprint file to gcluster's working directory..."
150
+ )
151
+ staged_blueprint = self.gcluster_command_runner.upload_file_to_working_dir(
152
+ blueprint_file, prefix
153
+ )
154
+ if len(blueprint_dependencies) > 0:
155
+ self.gcluster_command_runner.upload_directory_to_working_dir(
156
+ blueprint_dependencies, prefix
157
+ )
158
+ xpk_print('Staging blueprint completed!')
159
+ xpk_print(f"File path in gcluster's working directory: {staged_blueprint}")
160
+ return staged_blueprint
161
+
162
+ def upload_state(self) -> None:
163
+ xpk_print('Uploading state.')
164
+ if self.remote_state_client is None:
165
+ xpk_print('No remote state defined')
166
+ xpk_exit(1)
167
+ self.remote_state_client.upload_state()
168
+
169
+ def download_state(self) -> None:
170
+ if self.remote_state_client is None:
171
+ xpk_print('No remote state defined')
172
+ xpk_exit(1)
173
+
174
+ if self.remote_state_client.check_remote_state_exists():
175
+ self.remote_state_client.download_state()
176
+ xpk_print('Remote state not found.')
xpk/core/gcsfuse.py ADDED
@@ -0,0 +1,50 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..utils import templates
18
+
19
+ FUSE_PV_PATH = "/../templates/fuse-pv.yaml"
20
+ FUSE_PVC_PATH = "/../templates/fuse-pvc.yaml"
21
+
22
+
23
+ def create_pv(name: str, size: int, bucket: str) -> dict:
24
+ data = templates.load(FUSE_PV_PATH)
25
+ data["metadata"]["name"] = f"{name}-pv"
26
+ data["spec"]["capacity"]["storage"] = f"{size}Gi"
27
+ data["spec"]["csi"]["volumeHandle"] = bucket
28
+ return data
29
+
30
+
31
+ def create_pvc(name: str, size: int) -> dict:
32
+ data = templates.load(FUSE_PVC_PATH)
33
+ data["metadata"]["name"] = f"{name}-pvc"
34
+ data["spec"]["resources"]["requests"]["storage"] = f"{size}Gi"
35
+ data["spec"]["volumeName"] = f"{name}-pv"
36
+ return data
37
+
38
+
39
+ def manifest(name: str, bucket: str, size: int) -> list[dict]:
40
+ """Creates GCS FUSE manifest file.
41
+
42
+ Args:
43
+ path (str): path to the file where the manifest will be created
44
+ name (str): base name of the volumes
45
+ bucket (str): name of the storage bucket
46
+ size (str): size of the storage
47
+ """
48
+ pv = create_pv(name, size, bucket)
49
+ pvc = create_pvc(name, size)
50
+ return [pv, pvc]