vantage-cli 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vantage_cli/__init__.py +131 -0
- vantage_cli/apps/__init__.py +22 -0
- vantage_cli/apps/common.py +78 -0
- vantage_cli/apps/juju_localhost/__init__.py +17 -0
- vantage_cli/apps/juju_localhost/app.py +255 -0
- vantage_cli/apps/juju_localhost/bundle_yaml.py +143 -0
- vantage_cli/apps/microk8s/README.md +47 -0
- vantage_cli/apps/microk8s/__init__.py +3 -0
- vantage_cli/apps/microk8s/app.py +301 -0
- vantage_cli/apps/multipass_singlenode/__init__.py +12 -0
- vantage_cli/apps/multipass_singlenode/app.py +173 -0
- vantage_cli/apps/templates.py +178 -0
- vantage_cli/auth.py +429 -0
- vantage_cli/cache.py +143 -0
- vantage_cli/client.py +84 -0
- vantage_cli/command_base.py +63 -0
- vantage_cli/commands/__init__.py +1 -0
- vantage_cli/commands/clouds/__init__.py +20 -0
- vantage_cli/commands/clouds/add.py +81 -0
- vantage_cli/commands/clouds/delete.py +61 -0
- vantage_cli/commands/clouds/render.py +146 -0
- vantage_cli/commands/clouds/update.py +97 -0
- vantage_cli/commands/clusters/__init__.py +27 -0
- vantage_cli/commands/clusters/create.py +270 -0
- vantage_cli/commands/clusters/delete.py +101 -0
- vantage_cli/commands/clusters/get.py +30 -0
- vantage_cli/commands/clusters/list.py +84 -0
- vantage_cli/commands/clusters/render.py +233 -0
- vantage_cli/commands/clusters/schema.py +31 -0
- vantage_cli/commands/clusters/utils.py +248 -0
- vantage_cli/commands/profile/__init__.py +30 -0
- vantage_cli/commands/profile/crud.py +529 -0
- vantage_cli/commands/profile/render.py +55 -0
- vantage_cli/config.py +161 -0
- vantage_cli/constants.py +40 -0
- vantage_cli/exceptions.py +127 -0
- vantage_cli/format.py +39 -0
- vantage_cli/gql_client.py +655 -0
- vantage_cli/main.py +303 -0
- vantage_cli/render.py +56 -0
- vantage_cli/schemas.py +48 -0
- vantage_cli/time_loop.py +124 -0
- vantage_cli-0.1.1.dist-info/METADATA +30 -0
- vantage_cli-0.1.1.dist-info/RECORD +46 -0
- vantage_cli-0.1.1.dist-info/WHEEL +4 -0
- vantage_cli-0.1.1.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
This app deploys the slurm-operator to microk8s for a fully functional slurm on k8s on localhost.
|
|
2
|
+
|
|
3
|
+
For more on the slurm-operator see: https://github.com/SlinkyProject/slurm-operator
|
|
4
|
+
For more on microk8s see <https://microk8s>
|
|
5
|
+
|
|
6
|
+
### Install and Configure microk8s
|
|
7
|
+
```bash
|
|
8
|
+
sudo snap install microk8s --channel 1.29-strict/stable
|
|
9
|
+
|
|
10
|
+
sudo microk8s.enable hostpath-storage
|
|
11
|
+
sudo microk8s.enable dns
|
|
12
|
+
sudo microk8s.enable metallb:10.64.140.43-10.64.140.49
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### install slurm operator
|
|
16
|
+
```bash
|
|
17
|
+
sudo microk8s.helm repo add jetstack https://charts.jetstack.io
|
|
18
|
+
sudo microk8s.helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
|
|
19
|
+
sudo microk8s.helm repo update
|
|
20
|
+
|
|
21
|
+
sudo microk8s.helm install cert-manager jetstack/cert-manager --namespace cert-manager --create-namespace
|
|
22
|
+
sudo microk8s.helm install prometheus prometheus-community/kube-prometheus-stack --namespace prometheus --create-namespace
|
|
23
|
+
sudo microk8s.helm install slurm-operator-crds oci://ghcr.io/slinkyproject/charts/slurm-operator-crds
|
|
24
|
+
|
|
25
|
+
curl -L https://raw.githubusercontent.com/SlinkyProject/slurm-operator/refs/tags/v0.4.0/helm/slurm-operator/values.yaml -o values-operator.yaml
|
|
26
|
+
sudo microk8s.helm install slurm-operator oci://ghcr.io/slinkyproject/charts/slurm-operator --values=values-operator.yaml --version=0.4.0 --namespace=slinky --create-namespace
|
|
27
|
+
|
|
28
|
+
microk8s.kubectl --namespace=slinky get pods
|
|
29
|
+
|
|
30
|
+
# Install SLURM Cluster
|
|
31
|
+
curl -L https://raw.githubusercontent.com/SlinkyProject/slurm-operator/refs/tags/v0.4.0/helm/slurm/values.yaml -o values-slurm.yaml
|
|
32
|
+
|
|
33
|
+
sudo microk8s.helm install slurm oci://ghcr.io/slinkyproject/charts/slurm --values=values-slurm.yaml --version=0.4.0 --namespace=slurm --create-namespace
|
|
34
|
+
|
|
35
|
+
microk8s.kubectl --namespace=slurm get pods
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
### Testing
|
|
40
|
+
```bash
|
|
41
|
+
SLURM_LOGIN_IP="$(kubectl get services -n slurm slurm-login-slinky -o jsonpath='{.status.loadBalancer.ingress[0].ip}')"
|
|
42
|
+
SLURM_LOGIN_PORT="$(kubectl get services -n slurm slurm-login-slinky -o jsonpath='{.status.loadBalancer.ingress[0].ports[0].port}')"
|
|
43
|
+
## Assuming your public SSH key was configured in `login.rootSshAuthorizedKeys`.
|
|
44
|
+
ssh -p ${SLURM_LOGIN_PORT:-22} root@${SLURM_LOGIN_IP}
|
|
45
|
+
## Assuming SSSD is configured.
|
|
46
|
+
ssh -p ${SLURM_LOGIN_PORT:-22} ${USER}@${SLURM_LOGIN_IP}
|
|
47
|
+
```
|
|
@@ -0,0 +1,301 @@
|
|
|
1
|
+
# © 2025 Vantage Compute, Inc. All rights reserved.
|
|
2
|
+
# Confidential and proprietary. Unauthorized use prohibited.
|
|
3
|
+
"""MicroK8s application support for deploying the Slurm Operator & Slurm cluster.
|
|
4
|
+
|
|
5
|
+
Implements the manual steps documented in the local README:
|
|
6
|
+
|
|
7
|
+
1. Enable required MicroK8s addons (hostpath-storage, dns, metallb)
|
|
8
|
+
2. Add Helm repositories (jetstack, prometheus-community)
|
|
9
|
+
3. Install cert-manager, Prometheus stack, Slurm Operator CRDs
|
|
10
|
+
4. Download values files & install Slurm Operator
|
|
11
|
+
5. Install a Slurm cluster release
|
|
12
|
+
|
|
13
|
+
Notes:
|
|
14
|
+
- These steps are inherently idempotent; failures on already-installed/ enabled
|
|
15
|
+
components are treated as warnings (not fatal) when safe.
|
|
16
|
+
- This command invokes system binaries (sudo microk8s.* & curl). The user must
|
|
17
|
+
have the appropriate privileges. We intentionally keep the logic simple and
|
|
18
|
+
transparent; advanced lifecycle management belongs in a dedicated orchestrator.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import os
|
|
24
|
+
import shutil
|
|
25
|
+
import subprocess
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any, Dict, Optional
|
|
28
|
+
|
|
29
|
+
import typer
|
|
30
|
+
from loguru import logger
|
|
31
|
+
from rich.console import Console
|
|
32
|
+
from rich.panel import Panel
|
|
33
|
+
from typing_extensions import Annotated
|
|
34
|
+
|
|
35
|
+
from vantage_cli.apps.common import (
|
|
36
|
+
validate_client_credentials,
|
|
37
|
+
validate_cluster_data,
|
|
38
|
+
)
|
|
39
|
+
from vantage_cli.config import attach_settings
|
|
40
|
+
|
|
41
|
+
DEFAULT_METALLB_RANGE = "10.64.140.43-10.64.140.49"
|
|
42
|
+
SLURM_OPERATOR_VERSION = "0.4.0"
|
|
43
|
+
SLURM_OPERATOR_REPO_BASE = (
|
|
44
|
+
"https://raw.githubusercontent.com/SlinkyProject/slurm-operator/refs/tags"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def _run(
|
|
49
|
+
cmd: list[str],
|
|
50
|
+
console: Console,
|
|
51
|
+
*,
|
|
52
|
+
check: bool = True,
|
|
53
|
+
allow_fail: bool = False,
|
|
54
|
+
env: Optional[dict[str, str]] = None,
|
|
55
|
+
) -> subprocess.CompletedProcess:
|
|
56
|
+
"""Run a shell command (no shell=True) and optionally allow failure.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
cmd: Command & arguments
|
|
60
|
+
console: Rich console for user feedback
|
|
61
|
+
check: If True raise on non‑zero exit unless allow_fail
|
|
62
|
+
allow_fail: If True convert non‑zero exit into warning
|
|
63
|
+
env: Optional environment overrides
|
|
64
|
+
"""
|
|
65
|
+
console.print(f"[dim]$ {' '.join(cmd)}[/dim]")
|
|
66
|
+
try:
|
|
67
|
+
cp = subprocess.run(
|
|
68
|
+
cmd,
|
|
69
|
+
stdout=subprocess.PIPE,
|
|
70
|
+
stderr=subprocess.STDOUT,
|
|
71
|
+
text=True,
|
|
72
|
+
env=env or os.environ.copy(),
|
|
73
|
+
check=False,
|
|
74
|
+
)
|
|
75
|
+
except FileNotFoundError:
|
|
76
|
+
console.print(f"[red]Command not found: {cmd[0]}[/red]")
|
|
77
|
+
if check and not allow_fail:
|
|
78
|
+
raise typer.Exit(code=1)
|
|
79
|
+
return subprocess.CompletedProcess(cmd, 127, "")
|
|
80
|
+
|
|
81
|
+
if cp.returncode != 0 and check and not allow_fail:
|
|
82
|
+
console.print(f"[red]Command failed (exit {cp.returncode}): {' '.join(cmd)}[/red]")
|
|
83
|
+
console.print(cp.stdout)
|
|
84
|
+
raise typer.Exit(code=cp.returncode)
|
|
85
|
+
|
|
86
|
+
if cp.returncode != 0 and allow_fail:
|
|
87
|
+
console.print(
|
|
88
|
+
f"[yellow]Warning: command returned {cp.returncode} (continuing): {' '.join(cmd)}[/yellow]"
|
|
89
|
+
)
|
|
90
|
+
logger.warning(
|
|
91
|
+
f"Non‑fatal command failure: {' '.join(cmd)} -> {cp.returncode}\n{cp.stdout}"
|
|
92
|
+
)
|
|
93
|
+
return cp
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def deploy(
|
|
97
|
+
ctx: typer.Context,
|
|
98
|
+
cluster_data: Optional[Dict[str, Any]] = None,
|
|
99
|
+
*,
|
|
100
|
+
metallb_range: str = DEFAULT_METALLB_RANGE,
|
|
101
|
+
) -> None:
|
|
102
|
+
"""Deploy Slurm Operator & Slurm cluster onto MicroK8s.
|
|
103
|
+
|
|
104
|
+
This will perform a sequence of imperative steps. It aims for a *best effort*
|
|
105
|
+
deployment suitable for local development / experimentation—not production.
|
|
106
|
+
"""
|
|
107
|
+
console = Console()
|
|
108
|
+
console.print(Panel("Slurm Operator on MicroK8s"))
|
|
109
|
+
console.print("Deploying slurm-operator & slurm cluster on MicroK8s...")
|
|
110
|
+
|
|
111
|
+
microk8s_bin = shutil.which("microk8s")
|
|
112
|
+
if not microk8s_bin:
|
|
113
|
+
console.print(
|
|
114
|
+
"[red]microk8s binary not found in PATH. Please install microk8s first.[/red]"
|
|
115
|
+
)
|
|
116
|
+
raise typer.Exit(code=1)
|
|
117
|
+
|
|
118
|
+
# Validate cluster data & surface credentials (even if not strictly needed yet)
|
|
119
|
+
if cluster_data:
|
|
120
|
+
cluster_data = validate_cluster_data(cluster_data, console)
|
|
121
|
+
validate_client_credentials(cluster_data, console)
|
|
122
|
+
|
|
123
|
+
# 1. Wait for microk8s to be ready
|
|
124
|
+
_run(["sudo", microk8s_bin, "status", "--wait-ready"], console, allow_fail=False)
|
|
125
|
+
|
|
126
|
+
# 2. Enable core addons (idempotent; allow failures for already enabled)
|
|
127
|
+
_run(["sudo", microk8s_bin, "enable", "hostpath-storage"], console, allow_fail=True)
|
|
128
|
+
_run(["sudo", microk8s_bin, "enable", "dns"], console, allow_fail=True)
|
|
129
|
+
_run(["sudo", microk8s_bin, "enable", f"metallb:{metallb_range}"], console, allow_fail=True)
|
|
130
|
+
|
|
131
|
+
# 3. Helm repositories (microk8s embeds helm3 as 'microk8s.helm')
|
|
132
|
+
helm_cmd = shutil.which("microk8s.helm") or "microk8s.helm"
|
|
133
|
+
_run(
|
|
134
|
+
["sudo", helm_cmd, "repo", "add", "jetstack", "https://charts.jetstack.io"],
|
|
135
|
+
console,
|
|
136
|
+
allow_fail=True,
|
|
137
|
+
)
|
|
138
|
+
_run(
|
|
139
|
+
[
|
|
140
|
+
"sudo",
|
|
141
|
+
helm_cmd,
|
|
142
|
+
"repo",
|
|
143
|
+
"add",
|
|
144
|
+
"prometheus-community",
|
|
145
|
+
"https://prometheus-community.github.io/helm-charts",
|
|
146
|
+
],
|
|
147
|
+
console,
|
|
148
|
+
allow_fail=True,
|
|
149
|
+
)
|
|
150
|
+
_run(["sudo", helm_cmd, "repo", "update"], console, allow_fail=True)
|
|
151
|
+
|
|
152
|
+
# 4. Install cert-manager (CRDs handled by chart flags)
|
|
153
|
+
_run(
|
|
154
|
+
[
|
|
155
|
+
"sudo",
|
|
156
|
+
helm_cmd,
|
|
157
|
+
"install",
|
|
158
|
+
"cert-manager",
|
|
159
|
+
"jetstack/cert-manager",
|
|
160
|
+
"--namespace",
|
|
161
|
+
"cert-manager",
|
|
162
|
+
"--create-namespace",
|
|
163
|
+
"--set",
|
|
164
|
+
"installCRDs=true",
|
|
165
|
+
],
|
|
166
|
+
console,
|
|
167
|
+
allow_fail=True,
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# 5. Install Prometheus stack for metrics
|
|
171
|
+
_run(
|
|
172
|
+
[
|
|
173
|
+
"sudo",
|
|
174
|
+
helm_cmd,
|
|
175
|
+
"install",
|
|
176
|
+
"prometheus",
|
|
177
|
+
"prometheus-community/kube-prometheus-stack",
|
|
178
|
+
"--namespace",
|
|
179
|
+
"prometheus",
|
|
180
|
+
"--create-namespace",
|
|
181
|
+
],
|
|
182
|
+
console,
|
|
183
|
+
allow_fail=True,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# 6. Install Slurm Operator CRDs
|
|
187
|
+
_run(
|
|
188
|
+
[
|
|
189
|
+
"sudo",
|
|
190
|
+
helm_cmd,
|
|
191
|
+
"install",
|
|
192
|
+
"slurm-operator-crds",
|
|
193
|
+
"oci://ghcr.io/slinkyproject/charts/slurm-operator-crds",
|
|
194
|
+
],
|
|
195
|
+
console,
|
|
196
|
+
allow_fail=True,
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# 7. Download operator & slurm values (so user can inspect / tweak) then install
|
|
200
|
+
work_dir = Path.cwd() / "microk8s-slurm"
|
|
201
|
+
work_dir.mkdir(exist_ok=True)
|
|
202
|
+
operator_values = work_dir / "values-operator.yaml"
|
|
203
|
+
slurm_values = work_dir / "values-slurm.yaml"
|
|
204
|
+
|
|
205
|
+
operator_values_url = (
|
|
206
|
+
f"{SLURM_OPERATOR_REPO_BASE}/v{SLURM_OPERATOR_VERSION}/helm/slurm-operator/values.yaml"
|
|
207
|
+
)
|
|
208
|
+
slurm_values_url = (
|
|
209
|
+
f"{SLURM_OPERATOR_REPO_BASE}/v{SLURM_OPERATOR_VERSION}/helm/slurm/values.yaml"
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
if not operator_values.exists():
|
|
213
|
+
_run(
|
|
214
|
+
["curl", "-L", operator_values_url, "-o", str(operator_values)],
|
|
215
|
+
console,
|
|
216
|
+
allow_fail=False,
|
|
217
|
+
)
|
|
218
|
+
else:
|
|
219
|
+
console.print(f"[green]Using existing {operator_values}[/green]")
|
|
220
|
+
if not slurm_values.exists():
|
|
221
|
+
_run(["curl", "-L", slurm_values_url, "-o", str(slurm_values)], console, allow_fail=False)
|
|
222
|
+
else:
|
|
223
|
+
console.print(f"[green]Using existing {slurm_values}[/green]")
|
|
224
|
+
|
|
225
|
+
_run(
|
|
226
|
+
[
|
|
227
|
+
"sudo",
|
|
228
|
+
helm_cmd,
|
|
229
|
+
"install",
|
|
230
|
+
"slurm-operator",
|
|
231
|
+
"oci://ghcr.io/slinkyproject/charts/slurm-operator",
|
|
232
|
+
f"--values={operator_values}",
|
|
233
|
+
f"--version={SLURM_OPERATOR_VERSION}",
|
|
234
|
+
"--namespace",
|
|
235
|
+
"slinky",
|
|
236
|
+
"--create-namespace",
|
|
237
|
+
],
|
|
238
|
+
console,
|
|
239
|
+
allow_fail=True,
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
# 8. Install Slurm cluster
|
|
243
|
+
_run(
|
|
244
|
+
[
|
|
245
|
+
"sudo",
|
|
246
|
+
helm_cmd,
|
|
247
|
+
"install",
|
|
248
|
+
"slurm",
|
|
249
|
+
"oci://ghcr.io/slinkyproject/charts/slurm",
|
|
250
|
+
f"--values={slurm_values}",
|
|
251
|
+
f"--version={SLURM_OPERATOR_VERSION}",
|
|
252
|
+
"--namespace",
|
|
253
|
+
"slurm",
|
|
254
|
+
"--create-namespace",
|
|
255
|
+
],
|
|
256
|
+
console,
|
|
257
|
+
allow_fail=True,
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
console.print(
|
|
261
|
+
"[green]Deployment steps executed. Pods may take a few minutes to become ready.[/green]"
|
|
262
|
+
)
|
|
263
|
+
console.print("Check status with: sudo microk8s.kubectl get pods -A | grep -E 'slinky|slurm'")
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
@attach_settings
|
|
267
|
+
async def deploy_command(
|
|
268
|
+
ctx: typer.Context,
|
|
269
|
+
cluster_name: Annotated[
|
|
270
|
+
Optional[str],
|
|
271
|
+
typer.Option(
|
|
272
|
+
"--cluster-name",
|
|
273
|
+
help="Existing cluster name to fetch credentials (optional)",
|
|
274
|
+
),
|
|
275
|
+
] = None,
|
|
276
|
+
metallb_range: Annotated[
|
|
277
|
+
str,
|
|
278
|
+
typer.Option(
|
|
279
|
+
"--metallb-range",
|
|
280
|
+
help="Address range to configure for MetalLB (start-end)",
|
|
281
|
+
show_default=True,
|
|
282
|
+
),
|
|
283
|
+
] = DEFAULT_METALLB_RANGE,
|
|
284
|
+
) -> None:
|
|
285
|
+
"""CLI entrypoint for deploying MicroK8s Slurm stack."""
|
|
286
|
+
cluster_data = None
|
|
287
|
+
if cluster_name:
|
|
288
|
+
try:
|
|
289
|
+
from vantage_cli.commands.clusters import (
|
|
290
|
+
utils as cluster_utils, # local import to avoid cycles
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
cluster_data = await cluster_utils.get_cluster_by_name(
|
|
294
|
+
ctx=ctx, cluster_name=cluster_name
|
|
295
|
+
)
|
|
296
|
+
except Exception as e: # noqa: BLE001
|
|
297
|
+
logger.warning(f"Could not retrieve cluster '{cluster_name}': {e}")
|
|
298
|
+
await deploy(ctx=ctx, cluster_data=cluster_data, metallb_range=metallb_range)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
__all__ = ["deploy", "deploy_command"]
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Multipass single-node deployment app package."""
|
|
2
|
+
|
|
3
|
+
from vantage_cli import AsyncTyper
|
|
4
|
+
|
|
5
|
+
from .app import deploy_command
|
|
6
|
+
|
|
7
|
+
multipass_singlenode_app = AsyncTyper(
|
|
8
|
+
name="multipass-singlenode",
|
|
9
|
+
help="Multipass single-node application commands.",
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
multipass_singlenode_app.command("deploy")(deploy_command)
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Multipass singlenode application support."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from shutil import which
|
|
7
|
+
from typing import Any, Dict, Optional
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
from loguru import logger
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
from rich.panel import Panel
|
|
13
|
+
from typing_extensions import Annotated
|
|
14
|
+
|
|
15
|
+
from vantage_cli.apps.common import (
|
|
16
|
+
require_client_secret,
|
|
17
|
+
validate_client_credentials,
|
|
18
|
+
validate_cluster_data,
|
|
19
|
+
)
|
|
20
|
+
from vantage_cli.apps.templates import CloudInitTemplate, DeploymentContext
|
|
21
|
+
from vantage_cli.config import attach_settings
|
|
22
|
+
from vantage_cli.constants import (
|
|
23
|
+
ERROR_MULTIPASS_NOT_FOUND,
|
|
24
|
+
MULTIPASS_CLOUD_IMAGE_DEST,
|
|
25
|
+
MULTIPASS_CLOUD_IMAGE_LOCAL,
|
|
26
|
+
MULTIPASS_CLOUD_IMAGE_URL,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
# Note: Cloud-init generation now handled by centralized template engine
|
|
30
|
+
# See vantage_cli/apps/templates.py for CloudInitTemplate and DeploymentContext
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
async def deploy(ctx: typer.Context, cluster_data: Optional[Dict[str, Any]] = None) -> None:
|
|
34
|
+
"""Deploy a single-node SLURM cluster using Multipass.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
ctx: Typer context containing settings and configuration
|
|
38
|
+
cluster_data: Dictionary containing cluster configuration including client credentials
|
|
39
|
+
|
|
40
|
+
Raises:
|
|
41
|
+
typer.Exit: If deployment fails due to missing dependencies or invalid configuration
|
|
42
|
+
"""
|
|
43
|
+
console = Console()
|
|
44
|
+
console.print(Panel("Multipass Singlenode Application"))
|
|
45
|
+
console.print("Deploying multipass singlenode application...")
|
|
46
|
+
|
|
47
|
+
multipass = which("multipass") # Ensure multipass is installed
|
|
48
|
+
if not multipass:
|
|
49
|
+
console.print(ERROR_MULTIPASS_NOT_FOUND)
|
|
50
|
+
console.print(f"{os.environ.get('PATH')}") # Print the PATH environment variable
|
|
51
|
+
raise typer.Exit(code=1)
|
|
52
|
+
|
|
53
|
+
# Validate cluster data and extract credentials
|
|
54
|
+
cluster_data = validate_cluster_data(cluster_data, console)
|
|
55
|
+
client_id, _ = validate_client_credentials(cluster_data, console)
|
|
56
|
+
|
|
57
|
+
# Extract cluster name from cluster data
|
|
58
|
+
cluster_name = cluster_data.get("name", "unknown-cluster")
|
|
59
|
+
|
|
60
|
+
# Get client secret (import locally to avoid circular import)
|
|
61
|
+
from vantage_cli.commands.clusters import utils as cluster_utils
|
|
62
|
+
|
|
63
|
+
client_secret = await cluster_utils.get_cluster_client_secret(ctx=ctx, client_id=client_id)
|
|
64
|
+
client_secret = require_client_secret(client_secret, console)
|
|
65
|
+
|
|
66
|
+
logger.debug("Client secret obtained (or placeholder used).")
|
|
67
|
+
|
|
68
|
+
# Use jupyterhub_token from cluster data if available, otherwise generate a default
|
|
69
|
+
jupyterhub_token = None
|
|
70
|
+
if cluster_data and "creationParameters" in cluster_data:
|
|
71
|
+
if jupyterhub_token_data := cluster_data["creationParameters"].get("jupyterhub_token"):
|
|
72
|
+
jupyterhub_token = jupyterhub_token_data
|
|
73
|
+
if not jupyterhub_token:
|
|
74
|
+
jupyterhub_token = "default-token-for-testing"
|
|
75
|
+
|
|
76
|
+
logger.debug(f"Using JupyterHub token: {jupyterhub_token[:10]}...")
|
|
77
|
+
|
|
78
|
+
# Create deployment context for template engine
|
|
79
|
+
deployment_context = DeploymentContext(
|
|
80
|
+
cluster_name=cluster_name,
|
|
81
|
+
client_id=client_id,
|
|
82
|
+
client_secret=client_secret,
|
|
83
|
+
oidc_domain=ctx.obj.settings.oidc_domain,
|
|
84
|
+
oidc_base_url=ctx.obj.settings.oidc_base_url,
|
|
85
|
+
base_api_url=ctx.obj.settings.api_base_url,
|
|
86
|
+
tunnel_api_url=ctx.obj.settings.tunnel_api_url,
|
|
87
|
+
jupyterhub_token=jupyterhub_token,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
instance_name = f"vantage-multipass-singlenode-{client_id.split('-')[0]}"
|
|
91
|
+
console.print(f"Launching: {instance_name}")
|
|
92
|
+
|
|
93
|
+
shared_dir = Path.home() / "multipass-singlenode" / "shared"
|
|
94
|
+
shared_dir.mkdir(parents=True, exist_ok=True)
|
|
95
|
+
shared_dir.chmod(0o755) # Fixed: use Path.chmod instead of shutil.chmod
|
|
96
|
+
|
|
97
|
+
# Use a standard Ubuntu image for now since the custom Vantage image may not be available
|
|
98
|
+
image_origin = MULTIPASS_CLOUD_IMAGE_URL
|
|
99
|
+
if MULTIPASS_CLOUD_IMAGE_LOCAL.exists():
|
|
100
|
+
image_origin = f"file://{MULTIPASS_CLOUD_IMAGE_LOCAL}"
|
|
101
|
+
elif MULTIPASS_CLOUD_IMAGE_DEST.exists():
|
|
102
|
+
image_origin = f"file://{MULTIPASS_CLOUD_IMAGE_DEST}"
|
|
103
|
+
# Note: Fallback to standard Ubuntu image if custom image not available
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
# Get the number of CPUs available
|
|
107
|
+
cpu_count = str(os.cpu_count() or 2) # Default to 2 if unable to determine
|
|
108
|
+
|
|
109
|
+
p = subprocess.Popen(
|
|
110
|
+
[
|
|
111
|
+
"multipass",
|
|
112
|
+
"launch",
|
|
113
|
+
"--verbose",
|
|
114
|
+
f"-c{cpu_count}",
|
|
115
|
+
"-m4GB",
|
|
116
|
+
"-d10GB",
|
|
117
|
+
"--mount",
|
|
118
|
+
f"{shared_dir}:/shared",
|
|
119
|
+
"-n",
|
|
120
|
+
instance_name,
|
|
121
|
+
"--cloud-init",
|
|
122
|
+
"-", # Use stdin for cloud-init
|
|
123
|
+
image_origin,
|
|
124
|
+
],
|
|
125
|
+
stdin=subprocess.PIPE,
|
|
126
|
+
)
|
|
127
|
+
# Generate cloud-init configuration using template engine
|
|
128
|
+
cloud_init_template = CloudInitTemplate()
|
|
129
|
+
cloud_init_config = cloud_init_template.generate_multipass_config(deployment_context)
|
|
130
|
+
p.communicate(input=cloud_init_config.encode("utf-8"))
|
|
131
|
+
|
|
132
|
+
if p.returncode == 0:
|
|
133
|
+
console.print(f"[green]Successfully launched instance: {instance_name}[/green]")
|
|
134
|
+
console.print("Use 'multipass list' to see the instance status.")
|
|
135
|
+
console.print(f"Use 'multipass shell {instance_name}' to access the instance shell.")
|
|
136
|
+
console.print("Remember to set up your SSH keys for passwordless access if needed.")
|
|
137
|
+
console.print(
|
|
138
|
+
"It may take a few minutes for all services to start inside the instance."
|
|
139
|
+
)
|
|
140
|
+
else:
|
|
141
|
+
console.print(
|
|
142
|
+
f"[red]Error launching multipass instance: return code {p.returncode}[/red]"
|
|
143
|
+
)
|
|
144
|
+
raise typer.Exit(code=1)
|
|
145
|
+
|
|
146
|
+
except subprocess.CalledProcessError as e:
|
|
147
|
+
console.print(f"[red]Error launching multipass instance: {e}[/red]")
|
|
148
|
+
raise typer.Exit(code=1)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
# Typer CLI commands
|
|
152
|
+
@attach_settings
|
|
153
|
+
async def deploy_command(
|
|
154
|
+
ctx: typer.Context,
|
|
155
|
+
cluster_name: Annotated[
|
|
156
|
+
str,
|
|
157
|
+
typer.Argument(help="Name of the cluster to deploy"),
|
|
158
|
+
],
|
|
159
|
+
) -> None:
|
|
160
|
+
"""Deploy a Vantage Multipass Singlenode SLURM cluster."""
|
|
161
|
+
console = Console()
|
|
162
|
+
console.print(Panel("Multipass Singlenode SLURM Application"))
|
|
163
|
+
console.print("Deploying multipass singlenode slurm application...")
|
|
164
|
+
# Import locally to avoid circular import
|
|
165
|
+
from vantage_cli.commands.clusters import utils as cluster_utils
|
|
166
|
+
|
|
167
|
+
cluster_data = await cluster_utils.get_cluster_by_name(ctx=ctx, cluster_name=cluster_name)
|
|
168
|
+
|
|
169
|
+
if not cluster_data:
|
|
170
|
+
console.print("[red]Error: No cluster data found.[/red]")
|
|
171
|
+
raise typer.Exit(code=1)
|
|
172
|
+
|
|
173
|
+
await deploy(ctx=ctx, cluster_data=cluster_data)
|