xpk 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. xpk/__init__.py +15 -0
  2. xpk/api/__init__.py +15 -0
  3. xpk/api/storage_crd.yaml +52 -0
  4. xpk/commands/__init__.py +15 -0
  5. xpk/commands/batch.py +131 -0
  6. xpk/commands/cluster.py +808 -0
  7. xpk/commands/cluster_gcluster.py +269 -0
  8. xpk/commands/common.py +44 -0
  9. xpk/commands/config.py +29 -0
  10. xpk/commands/info.py +243 -0
  11. xpk/commands/inspector.py +357 -0
  12. xpk/commands/job.py +199 -0
  13. xpk/commands/kind.py +283 -0
  14. xpk/commands/kjob_common.py +44 -0
  15. xpk/commands/run.py +128 -0
  16. xpk/commands/shell.py +140 -0
  17. xpk/commands/storage.py +267 -0
  18. xpk/commands/version.py +27 -0
  19. xpk/commands/workload.py +889 -0
  20. xpk/core/__init__.py +15 -0
  21. xpk/core/blueprint/__init__.py +15 -0
  22. xpk/core/blueprint/blueprint_definitions.py +62 -0
  23. xpk/core/blueprint/blueprint_generator.py +708 -0
  24. xpk/core/capacity.py +185 -0
  25. xpk/core/cluster.py +564 -0
  26. xpk/core/cluster_private.py +200 -0
  27. xpk/core/commands.py +356 -0
  28. xpk/core/config.py +179 -0
  29. xpk/core/docker_container.py +225 -0
  30. xpk/core/docker_image.py +210 -0
  31. xpk/core/docker_manager.py +308 -0
  32. xpk/core/docker_resources.py +350 -0
  33. xpk/core/filestore.py +251 -0
  34. xpk/core/gcloud_context.py +196 -0
  35. xpk/core/gcluster_manager.py +176 -0
  36. xpk/core/gcsfuse.py +50 -0
  37. xpk/core/kjob.py +444 -0
  38. xpk/core/kueue.py +358 -0
  39. xpk/core/monitoring.py +134 -0
  40. xpk/core/nap.py +361 -0
  41. xpk/core/network.py +377 -0
  42. xpk/core/nodepool.py +581 -0
  43. xpk/core/pathways.py +377 -0
  44. xpk/core/ray.py +222 -0
  45. xpk/core/remote_state/__init__.py +15 -0
  46. xpk/core/remote_state/fuse_remote_state.py +99 -0
  47. xpk/core/remote_state/remote_state_client.py +38 -0
  48. xpk/core/resources.py +238 -0
  49. xpk/core/scheduling.py +253 -0
  50. xpk/core/storage.py +581 -0
  51. xpk/core/system_characteristics.py +1432 -0
  52. xpk/core/vertex.py +105 -0
  53. xpk/core/workload.py +341 -0
  54. xpk/core/workload_decorators/__init__.py +15 -0
  55. xpk/core/workload_decorators/rdma_decorator.py +129 -0
  56. xpk/core/workload_decorators/storage_decorator.py +52 -0
  57. xpk/core/workload_decorators/tcpxo_decorator.py +190 -0
  58. xpk/main.py +75 -0
  59. xpk/parser/__init__.py +15 -0
  60. xpk/parser/batch.py +43 -0
  61. xpk/parser/cluster.py +662 -0
  62. xpk/parser/common.py +259 -0
  63. xpk/parser/config.py +49 -0
  64. xpk/parser/core.py +135 -0
  65. xpk/parser/info.py +64 -0
  66. xpk/parser/inspector.py +65 -0
  67. xpk/parser/job.py +147 -0
  68. xpk/parser/kind.py +95 -0
  69. xpk/parser/run.py +47 -0
  70. xpk/parser/shell.py +59 -0
  71. xpk/parser/storage.py +316 -0
  72. xpk/parser/validators.py +39 -0
  73. xpk/parser/version.py +23 -0
  74. xpk/parser/workload.py +726 -0
  75. xpk/templates/__init__.py +15 -0
  76. xpk/templates/storage.yaml +13 -0
  77. xpk/utils/__init__.py +15 -0
  78. xpk/utils/console.py +55 -0
  79. xpk/utils/file.py +82 -0
  80. xpk/utils/gcs_utils.py +125 -0
  81. xpk/utils/kubectl.py +57 -0
  82. xpk/utils/network.py +168 -0
  83. xpk/utils/objects.py +88 -0
  84. xpk/utils/templates.py +28 -0
  85. xpk/utils/validation.py +80 -0
  86. xpk/utils/yaml.py +30 -0
  87. xpk-0.0.1.dist-info/LICENSE +202 -0
  88. xpk-0.0.1.dist-info/METADATA +1498 -0
  89. xpk-0.0.1.dist-info/RECORD +92 -0
  90. xpk-0.0.1.dist-info/WHEEL +5 -0
  91. xpk-0.0.1.dist-info/entry_points.txt +2 -0
  92. xpk-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,269 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import os
18
+
19
+ from ..core.remote_state.remote_state_client import RemoteStateClient
20
+ from ..core.remote_state.fuse_remote_state import FuseStateClient
21
+ from ..core.blueprint.blueprint_generator import (
22
+ BlueprintGenerator,
23
+ BlueprintGeneratorOutput,
24
+ a3mega_device_type,
25
+ a3ultra_device_type,
26
+ supported_device_types,
27
+ )
28
+ from ..core.commands import run_command_for_value
29
+ from ..core.capacity import get_capacity_type
30
+ from ..core.docker_manager import DockerManager
31
+ from ..core.gcloud_context import zone_to_region
32
+ from ..core.gcluster_manager import GclusterManager
33
+ from ..utils.console import xpk_exit, xpk_print
34
+ from ..utils.file import ensure_directory_exists
35
+ from ..utils.network import all_IPs_cidr
36
+ from ..utils.objects import hash_string
37
+ from ..core.cluster import get_cluster_credentials
38
+ from ..core.kjob import apply_kjob_crds, prepare_kjob
39
+
40
+ blueprints_path = os.path.abspath('xpkclusters/blueprints')
41
+ gcluster_working_dir = os.path.abspath('xpkclusters/gcluster-out')
42
+ gcloud_cfg_path = os.path.expanduser('~/.config/gcloud')
43
+
44
+
45
+ def cluster_create(args) -> None:
46
+ """Function around cluster creation using Cluster toolkit.
47
+
48
+ Args:
49
+ args: user provided arguments for running the command.
50
+
51
+ Returns:
52
+ 0 if successful and 1 otherwise.
53
+ """
54
+ check_gcloud_authenticated()
55
+ prepare_directories()
56
+ region = zone_to_region(args.zone)
57
+
58
+ # unique_name uses shortened hash string, so still name collision is possible
59
+ unique_name = get_unique_name(args.project, region, args.cluster)
60
+ # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
61
+ prefix = get_prefix_path(args.project, region)
62
+ remote_state_client = None
63
+ if args.cluster_state_gcs_bucket is not None:
64
+ remote_state_client = FuseStateClient(
65
+ bucket=args.cluster_state_gcs_bucket,
66
+ state_directory=os.path.join(blueprints_path, prefix, unique_name),
67
+ prefix=prefix,
68
+ cluster=args.cluster,
69
+ deployment_name=unique_name,
70
+ )
71
+ gcm = prepare_gcluster_manager(remote_state_client)
72
+
73
+ bp = generate_blueprint(blueprint_name=unique_name, args=args, prefix=prefix)
74
+
75
+ # staging: sending the blueprint file(s) to gcluster's working directory
76
+ bp_staged_path = gcm.stage_files(
77
+ blueprint_file=bp.blueprint_file,
78
+ blueprint_dependencies=bp.blueprint_dependencies,
79
+ prefix=prefix,
80
+ )
81
+ gcm.deploy(
82
+ blueprint_path=bp_staged_path,
83
+ deployment_name=unique_name,
84
+ prefix=prefix,
85
+ )
86
+ if args.cluster_state_gcs_bucket is not None:
87
+ gcm.upload_state()
88
+
89
+ get_cluster_credentials(args)
90
+
91
+ err_code = apply_kjob_crds(args)
92
+ if err_code > 0:
93
+ xpk_exit(err_code)
94
+
95
+ err_code = prepare_kjob(args)
96
+ if err_code > 0:
97
+ xpk_exit(err_code)
98
+
99
+ xpk_exit(0)
100
+
101
+
102
+ def cluster_delete(args) -> None:
103
+ """Function around cluster delete for the clusters created by Cluster toolkit.
104
+
105
+ Args:
106
+ args: user provided arguments for running the command.
107
+
108
+ Returns:
109
+ 0 if successful and 1 otherwise.
110
+ """
111
+ check_gcloud_authenticated()
112
+ prepare_directories()
113
+ region = zone_to_region(args.zone)
114
+ unique_name = get_unique_name(args.project, region, args.cluster)
115
+ # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
116
+ prefix = get_prefix_path(args.project, region)
117
+ remote_state_client = None
118
+ if args.cluster_state_gcs_bucket is not None:
119
+ remote_state_client = FuseStateClient(
120
+ bucket=args.cluster_state_gcs_bucket,
121
+ state_directory=os.path.join(blueprints_path, prefix, unique_name),
122
+ prefix=prefix,
123
+ cluster=args.cluster,
124
+ deployment_name=unique_name,
125
+ )
126
+ gcm = prepare_gcluster_manager(remote_state_client)
127
+
128
+ # unique_name uses shortened hash string, so still name collision is possible
129
+ unique_name = get_unique_name(args.project, region, args.cluster)
130
+ # prefix is to prevent name collisions for blueprints and also deployments by storing them in prefix directory. Ex.: blueprints/{prefix}/cluster_name_hash
131
+ prefix = get_prefix_path(args.project, region)
132
+ if args.cluster_state_gcs_bucket is not None:
133
+ gcm.download_state()
134
+
135
+ bp = BlueprintGeneratorOutput(
136
+ blueprint_file=os.path.join(blueprints_path, prefix, unique_name)
137
+ + '.yaml',
138
+ blueprint_dependencies=os.path.join(
139
+ blueprints_path, prefix, unique_name
140
+ ),
141
+ )
142
+
143
+ gcm.stage_files(
144
+ blueprint_file=bp.blueprint_file,
145
+ blueprint_dependencies=bp.blueprint_dependencies,
146
+ prefix=prefix,
147
+ )
148
+ gcm.destroy_deployment(deployment_name=unique_name, prefix=prefix)
149
+
150
+ xpk_exit(0)
151
+
152
+
153
+ def created_by_gcluster(args) -> bool:
154
+ prepare_directories()
155
+ region = zone_to_region(args.zone)
156
+ unique_name = get_unique_name(args.project, region, args.cluster)
157
+ prefix = get_prefix_path(args.project, region)
158
+ bpg = prepare_blueprint_generator()
159
+ return bpg.blueprint_exists(unique_name, prefix)
160
+
161
+
162
+ def get_unique_name(project_id, region, cluster_name):
163
+ unique_string_hash = hash_string(
164
+ input_string=f'{project_id}-{region}-{cluster_name}'.lower(), length=5
165
+ )
166
+ return f'{cluster_name}-{unique_string_hash}'
167
+
168
+
169
+ def get_prefix_path(project_id, region):
170
+ return f'{project_id}-{region}'.lower()
171
+
172
+
173
+ def prepare_directories() -> None:
174
+ ensure_directory_exists(blueprints_path)
175
+ ensure_directory_exists(gcluster_working_dir)
176
+
177
+
178
+ def check_gcloud_authenticated():
179
+ if not os.path.exists(gcloud_cfg_path):
180
+ xpk_print(
181
+ 'Failed to find gcloud credential directory.'
182
+ f' {gcloud_cfg_path} {blueprints_path} {gcluster_working_dir}'
183
+ )
184
+ xpk_print(
185
+ 'Please authenticate to gcloud ("gcloud auth application-default'
186
+ ' login") and then run your command.'
187
+ )
188
+ xpk_exit(1)
189
+
190
+
191
+ def prepare_gcluster_manager(
192
+ remote_state_client: RemoteStateClient | None,
193
+ ) -> GclusterManager:
194
+ dm = DockerManager(
195
+ working_dir=gcluster_working_dir, gcloud_cfg_path=gcloud_cfg_path
196
+ )
197
+ dm.initialize()
198
+ return GclusterManager(
199
+ gcluster_command_runner=dm, remote_state_client=remote_state_client
200
+ )
201
+
202
+
203
+ def prepare_blueprint_generator() -> BlueprintGenerator:
204
+ return BlueprintGenerator(storage_path=blueprints_path)
205
+
206
+
207
+ def validate_state_gcs_bucket(args):
208
+ bucket_validate_cmd = (
209
+ f'gcloud storage buckets describe gs://{args.cluster_state_gcs_bucket}'
210
+ )
211
+ err_code, _ = run_command_for_value(
212
+ bucket_validate_cmd,
213
+ 'Validate remote state bucket existence.',
214
+ global_args=args,
215
+ )
216
+ if err_code != 0:
217
+ xpk_exit(err_code)
218
+
219
+
220
+ def generate_blueprint(
221
+ blueprint_name, args, prefix=None
222
+ ) -> BlueprintGeneratorOutput:
223
+ capacity_type, return_code = get_capacity_type(args)
224
+ if return_code != 0:
225
+ xpk_print('Capacity type is invalid.')
226
+ xpk_exit(return_code)
227
+
228
+ bpg = prepare_blueprint_generator()
229
+
230
+ if args.cluster_state_gcs_bucket is not None:
231
+ validate_state_gcs_bucket(args)
232
+
233
+ if args.device_type in supported_device_types:
234
+ if args.device_type == a3mega_device_type:
235
+ num_nodes = args.num_nodes if not args.num_nodes is None else 2
236
+ return bpg.generate_a3_mega_blueprint(
237
+ blueprint_name=blueprint_name,
238
+ prefix=prefix,
239
+ cluster_name=args.cluster,
240
+ region=zone_to_region(args.zone),
241
+ project_id=args.project,
242
+ zone=args.zone,
243
+ auth_cidr=all_IPs_cidr,
244
+ num_nodes=num_nodes,
245
+ reservation=args.reservation if args.reservation else None,
246
+ capacity_type=capacity_type,
247
+ system_node_pool_machine_type=args.default_pool_cpu_machine_type,
248
+ system_node_pool_min_node_count=args.default_pool_cpu_num_nodes,
249
+ gcs_bucket=args.cluster_state_gcs_bucket,
250
+ )
251
+ if args.device_type == a3ultra_device_type:
252
+ num_nodes = args.num_nodes if not args.num_nodes is None else 2
253
+ return bpg.generate_a3_ultra_blueprint(
254
+ blueprint_name=blueprint_name,
255
+ prefix=prefix,
256
+ cluster_name=args.cluster,
257
+ region=zone_to_region(args.zone),
258
+ project_id=args.project,
259
+ zone=args.zone,
260
+ auth_cidr=all_IPs_cidr,
261
+ num_nodes=num_nodes,
262
+ reservation=args.reservation if args.reservation else None,
263
+ enable_filestore_csi_driver=args.enable_gcpfilestore_csi_driver,
264
+ capacity_type=capacity_type,
265
+ system_node_pool_machine_type=args.default_pool_cpu_machine_type,
266
+ system_node_pool_min_node_count=args.default_pool_cpu_num_nodes,
267
+ gcs_bucket=args.cluster_state_gcs_bucket,
268
+ )
269
+ return None
xpk/commands/common.py ADDED
@@ -0,0 +1,44 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.commands import run_command_with_updates_retry
18
+ from ..core.gcloud_context import zone_to_region
19
+ from ..utils.console import xpk_print
20
+
21
+
22
+ def set_cluster_command(args) -> int:
23
+ """Run cluster configuration command to set the kubectl config.
24
+
25
+ Args:
26
+ args: user provided arguments for running the command.
27
+
28
+ Returns:
29
+ 0 if successful and 1 otherwise.
30
+ """
31
+ command = (
32
+ 'gcloud container clusters get-credentials'
33
+ f' {args.cluster} --region={zone_to_region(args.zone)}'
34
+ f' --project={args.project} &&'
35
+ ' kubectl config view && kubectl config set-context --current'
36
+ ' --namespace=default'
37
+ )
38
+ task = f'get-credentials to cluster {args.cluster}'
39
+ return_code = run_command_with_updates_retry(
40
+ command, task, args, verbose=False
41
+ )
42
+ if return_code != 0:
43
+ xpk_print(f'{task} returned ERROR {return_code}')
44
+ return return_code
xpk/commands/config.py ADDED
@@ -0,0 +1,29 @@
1
+ """
2
+ Copyright 2025 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ from ..core.config import XpkConfig
18
+ from ..utils.console import xpk_print
19
+
20
+ xpk_cfg = XpkConfig()
21
+
22
+
23
+ def set_config(args):
24
+ xpk_cfg.set(args.set_config_args[0], args.set_config_args[1])
25
+
26
+
27
+ def get_config(args):
28
+ value = xpk_cfg.get(args.get_config_key[0])
29
+ xpk_print(value)
xpk/commands/info.py ADDED
@@ -0,0 +1,243 @@
1
+ """
2
+ Copyright 2024 Google LLC
3
+
4
+ Licensed under the Apache License, Version 2.0 (the "License");
5
+ you may not use this file except in compliance with the License.
6
+ You may obtain a copy of the License at
7
+
8
+ https://www.apache.org/licenses/LICENSE-2.0
9
+
10
+ Unless required by applicable law or agreed to in writing, software
11
+ distributed under the License is distributed on an "AS IS" BASIS,
12
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ See the License for the specific language governing permissions and
14
+ limitations under the License.
15
+ """
16
+
17
+ import json
18
+ from argparse import Namespace
19
+
20
+ from tabulate import tabulate
21
+
22
+ from ..core.commands import run_command_for_value
23
+ from ..core.gcloud_context import add_zone_and_project
24
+ from ..core.kueue import verify_kueuectl
25
+ from ..utils.console import xpk_exit, xpk_print
26
+ from .common import set_cluster_command
27
+
28
+ table_fmt = 'plain'
29
+
30
+
31
+ def info(args: Namespace) -> None:
32
+ """Provide info about localqueues, clusterqueues and their resources.
33
+
34
+ Args:
35
+ args: user provided arguments for running the command.
36
+ Returns:
37
+ None
38
+ """
39
+ add_zone_and_project(args)
40
+ set_cluster_command_code = set_cluster_command(args)
41
+ if set_cluster_command_code != 0:
42
+ xpk_exit(set_cluster_command_code)
43
+
44
+ verify_kueuectl(args)
45
+ lq, cq = bool(args.localqueue), bool(args.clusterqueue)
46
+ if not lq and not cq:
47
+ lq, cq = True, True
48
+
49
+ lqs, cqs = None, None
50
+ if lq:
51
+ lqs = run_kueuectl_list_localqueue(args)
52
+
53
+ cqs = run_kueuectl_list_clusterqueue(args)
54
+ quotas = get_nominal_quotas(cqs)
55
+
56
+ if lq:
57
+ print_formatted_lqs(lqs, quotas)
58
+
59
+ if cq:
60
+ print_formatted_cqs(cqs, quotas)
61
+
62
+
63
+ def get_nominal_quotas(cqs: list[dict]) -> dict[str, dict[str, str]]:
64
+ """Get quotas from clusterqueues.
65
+ This function retrieves how much of resource in each flavor is assigned to cluster queue.
66
+ It parses flavors of passed cluster queues.
67
+ Args:
68
+ - cqs - list of cluster queues.
69
+ Returns:
70
+ - dictionary of cluster queues resources quotas in format:
71
+ {cq_name:{"flavorName:resourceName":quota}}
72
+ """
73
+ try:
74
+ cq_list = json.loads(cqs)['items']
75
+ except ValueError:
76
+ xpk_print('Incorrect respone from list clusterqueue')
77
+ xpk_print(cqs)
78
+ xpk_exit(1)
79
+
80
+ quotas = {}
81
+ for cq in cq_list:
82
+ spec = cq['spec']
83
+ cq_name = cq['metadata']['name']
84
+ quotas[cq_name] = {}
85
+ for rg in spec['resourceGroups']:
86
+ for flavor in rg['flavors']:
87
+ name = flavor['name']
88
+ for resource in flavor['resources']:
89
+ key = f'{name}:{resource["name"]}'
90
+ quotas[cq_name][key] = resource['nominalQuota']
91
+ return quotas
92
+
93
+
94
+ def print_formatted_cqs(cqs: list[dict], nominalQuotas) -> None:
95
+ try:
96
+ cq_list = json.loads(cqs)['items']
97
+ except ValueError:
98
+ xpk_print('Incorrect respone from list clusterqueue')
99
+ xpk_print(cqs)
100
+ xpk_exit(1)
101
+
102
+ cq_usages = parse_queue_lists(cq_list, nominalQuotas)
103
+
104
+ xpk_print(
105
+ 'Cluster Queues usage \n',
106
+ tabulate(cq_usages, headers='keys', tablefmt=table_fmt),
107
+ )
108
+
109
+
110
+ def print_formatted_lqs(lqs: list[dict], nominalQuotas) -> None:
111
+ try:
112
+ lq_list = json.loads(lqs)['items']
113
+ except ValueError:
114
+ xpk_print('Incorrect respone from list localqueue')
115
+ xpk_print(lqs)
116
+ xpk_exit(1)
117
+
118
+ lq_usages = parse_queue_lists(lq_list, nominalQuotas)
119
+ xpk_print(
120
+ 'Local Queues usage \n',
121
+ tabulate(lq_usages, headers='keys', tablefmt=table_fmt),
122
+ )
123
+
124
+
125
+ def parse_queue_lists(
126
+ qs: list[dict],
127
+ flavor_resource_quotas: dict,
128
+ reservation_key: str = 'flavorsReservation',
129
+ ) -> list[dict]:
130
+ qs_usage_list = []
131
+ for q in qs:
132
+ queue_name = q['metadata']['name']
133
+ q_pending_workloads = q['status']['pendingWorkloads']
134
+ q_admitted_workloads = q['status']['admittedWorkloads']
135
+ q_status = {
136
+ 'QUEUE': queue_name,
137
+ 'ADMITTED_WORKLOADS': q_admitted_workloads,
138
+ 'PENDING_WORKLOADS': q_pending_workloads,
139
+ }
140
+ q_status.update(
141
+ get_flavors_usage(q, reservation_key, flavor_resource_quotas)
142
+ )
143
+ qs_usage_list.append(q_status)
144
+ return qs_usage_list
145
+
146
+
147
+ def get_flavors_resources_reservations(
148
+ cq_name: str, flavors_res: list[dict]
149
+ ) -> dict[str, dict[str, str]]:
150
+ """Get usage of flavors resources.
151
+ This function parser flavorsReservation section of clusterQueue of LocalQueue.
152
+ Args:
153
+ - cq_name - name of ClusterQueue to which flavors belong.
154
+ - flavors_res - list of reservations made by flavors
155
+ Returns:
156
+ Dict containing usage of each resource in flavor for each flavor in cluster or local queue.
157
+ Dict format: {cq_name: {{flavor:resource}:reservation}}
158
+ """
159
+ reservations = {}
160
+ reservations[cq_name] = {}
161
+ for flavor_name, flavor_resources_reservation_list in flavors_res.items():
162
+ for resource in flavor_resources_reservation_list:
163
+ reservations[cq_name][f'{flavor_name}:{resource["name"]}'] = resource[
164
+ 'total'
165
+ ]
166
+
167
+ return reservations
168
+
169
+
170
+ def get_flavors_usage(
171
+ q_entry: dict, res_field: str, flavor_resource_quotas: dict
172
+ ) -> list[dict]:
173
+ """Parse q_entry to retrieve list of each resource usage in flavour.
174
+ Args:
175
+ q_entry - single entry into either LocalQueue or ClusterQueue structured as json
176
+ flavor_resource_quotas - nominalQuota of flavors resource usage for each clusterqueue
177
+ Returns:
178
+ list of dicts where each list entry is in format (key, entry) where:
179
+ - key is flavorName:resourceName
180
+ - entry is flavorResourceReservation/flavorResourceQuota
181
+ """
182
+ status = q_entry['status']
183
+ flavors_res = status[res_field]
184
+ queue_type = q_entry['kind']
185
+
186
+ flavors_res = {flavor['name']: flavor['resources'] for flavor in flavors_res}
187
+ usage_fraction = {}
188
+ cq_name = (
189
+ q_entry['metadata']['name']
190
+ if queue_type == 'ClusterQueue'
191
+ else q_entry['spec']['clusterQueue']
192
+ )
193
+
194
+ reservations = get_flavors_resources_reservations(cq_name, flavors_res)
195
+
196
+ for cq_name, cq_reservations in reservations.items():
197
+ cq_nominal_quotas = flavor_resource_quotas[cq_name]
198
+
199
+ for flavor_resource, flavor_resource_quota in cq_nominal_quotas.items():
200
+ flavor_resource_reservation = cq_reservations[flavor_resource]
201
+ usage_fraction[flavor_resource] = (
202
+ f'{flavor_resource_reservation}/{flavor_resource_quota}'
203
+ )
204
+ return usage_fraction
205
+
206
+
207
+ def run_kueuectl_list_localqueue(args: Namespace) -> str:
208
+ """Run the kueuectl list localqueue command.
209
+
210
+ Args:
211
+ args: user provided arguments for running the command.
212
+
213
+ Returns:
214
+ kueuectl list localqueue formatted as json string.
215
+ """
216
+ command = 'kubectl kueue list localqueue -o json'
217
+ if args.namespace != '':
218
+ command += f' --namespace {args.namespace}'
219
+ return_code, val = run_command_for_value(command, 'list localqueue', args)
220
+
221
+ if return_code != 0:
222
+ xpk_print(f'Cluster info request returned ERROR {return_code}')
223
+ xpk_exit(return_code)
224
+ return val
225
+
226
+
227
+ def run_kueuectl_list_clusterqueue(args: Namespace) -> str:
228
+ """Run the kueuectl list clusterqueue command.
229
+
230
+ Args:
231
+ args: user provided arguments for running the command.
232
+
233
+ Returns:
234
+ kueuectl list clusterqueue formatted as json string
235
+ """
236
+ command = 'kubectl kueue list clusterqueue -o json'
237
+
238
+ return_code, val = run_command_for_value(command, 'list clusterqueue', args)
239
+
240
+ if return_code != 0:
241
+ xpk_print(f'Cluster info request returned ERROR {return_code}')
242
+ xpk_exit(return_code)
243
+ return val