swchmonclient 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ from .deployer import deploy_monitoring, undeploy_monitoring
2
+ from .metrics import (
3
+ query_metric_values,
4
+ query_metric_values_raw,
5
+ subscribe_metric,
6
+ subscribe_metric_raw,
7
+ unsubscribe_metric,
8
+ )
9
+
10
+ __all__ = [
11
+ "deploy_monitoring",
12
+ "query_metric_values",
13
+ "query_metric_values_raw",
14
+ "undeploy_monitoring",
15
+ "subscribe_metric",
16
+ "subscribe_metric_raw",
17
+ "unsubscribe_metric",
18
+ ]
19
+ __version__ = "0.1.0"
@@ -0,0 +1,615 @@
1
+ import os
2
+ from collections.abc import Iterable, Sequence
3
+ from logging import Logger
4
+ from pathlib import Path
5
+ from tempfile import NamedTemporaryFile
6
+ from typing import Any
7
+ from urllib.error import URLError
8
+ from urllib.request import urlopen
9
+
10
+ import yaml
11
+ from kubernetes import config, dynamic
12
+ from kubernetes.client import ApiClient, CoreV1Api
13
+ from kubernetes.client.exceptions import ApiException
14
+
15
+ from .exceptions import DeploymentError
16
+
17
+
18
+ MONITORING_TEMPLATE_MANIFEST = "./manifests/emsconfig.yaml"
19
+ MONITORING_DEPLOY_MANIFESTS = (
20
+ MONITORING_TEMPLATE_MANIFEST,
21
+ "./manifests/ems+netdata-k3s_parametric.yaml",
22
+ )
23
+ MONITORING_UNDEPLOY_MANIFESTS = (
24
+ MONITORING_TEMPLATE_MANIFEST,
25
+ "./manifests/ems+netdata-k3s_parametric.yaml",
26
+ )
27
+ MONITORING_EXTRA_RESOURCES_TO_DELETE = (
28
+ ("apps/v1", "DaemonSet", "ems-client-daemonset", "daemonset"),
29
+ ("v1", "ConfigMap", "ems-client-configmap", "configmap"),
30
+ ("v1", "ConfigMap", "monitoring-configmap", "configmap"),
31
+ )
32
+ DEFAULT_MONITORING_NAMESPACE = "default"
33
+ MANIFEST_DOWNLOAD_TIMEOUT_SECONDS = 30
34
+ MONITORING_MANIFEST_RELEASE_URLS = {
35
+ MONITORING_TEMPLATE_MANIFEST: (
36
+ "https://github.com/Swarmchestrate/monitoring-client/releases/download/v0.1.0/emsconfig.yaml"
37
+ ),
38
+ "./manifests/ems+netdata-k3s_parametric.yaml": (
39
+ "https://github.com/Swarmchestrate/monitoring-client/releases/download/v0.1.0/ems+netdata-k3s_parametric.yaml"
40
+ ),
41
+ }
42
+
43
+
44
+ class K8sDeployer:
45
+ """Deploy and remove Kubernetes resources from manifest files."""
46
+
47
+ def __init__(
48
+ self,
49
+ kubeconfig_path: str | None = None,
50
+ context: str | None = None,
51
+ in_cluster_fallback: bool = True,
52
+ ) -> None:
53
+ self._load_config(kubeconfig_path=kubeconfig_path, context=context, in_cluster_fallback=in_cluster_fallback)
54
+ self.api_client = ApiClient()
55
+ self.core_v1_api = CoreV1Api(self.api_client)
56
+ self.dynamic_client = dynamic.DynamicClient(self.api_client)
57
+
58
+ def _load_config(
59
+ self,
60
+ kubeconfig_path: str | None,
61
+ context: str | None,
62
+ in_cluster_fallback: bool,
63
+ ) -> None:
64
+ if kubeconfig_path is not None:
65
+ try:
66
+ config.load_kube_config(config_file=kubeconfig_path, context=context)
67
+ return
68
+ except Exception as kubeconfig_error:
69
+ if not in_cluster_fallback:
70
+ raise DeploymentError(f"Unable to load kubeconfig: {kubeconfig_error}") from kubeconfig_error
71
+ try:
72
+ config.load_incluster_config()
73
+ return
74
+ except Exception as incluster_error:
75
+ raise DeploymentError(
76
+ "Unable to load Kubernetes configuration from kubeconfig or in-cluster environment"
77
+ ) from incluster_error
78
+
79
+ try:
80
+ config.load_kube_config(config_file=kubeconfig_path, context=context)
81
+ return
82
+ except Exception as kubeconfig_error:
83
+ for discovered_path in self._discover_kubeconfig_paths():
84
+ try:
85
+ config.load_kube_config(config_file=discovered_path, context=context)
86
+ return
87
+ except Exception:
88
+ continue
89
+ if not in_cluster_fallback:
90
+ raise DeploymentError(f"Unable to load kubeconfig: {kubeconfig_error}") from kubeconfig_error
91
+ try:
92
+ config.load_incluster_config()
93
+ return
94
+ except Exception as incluster_error:
95
+ raise DeploymentError(
96
+ "Unable to load Kubernetes configuration from kubeconfig or in-cluster environment"
97
+ ) from incluster_error
98
+
99
+ @staticmethod
100
+ def _discover_kubeconfig_paths() -> list[str]:
101
+ candidates: list[str] = []
102
+
103
+ env_kubeconfig = os.getenv("KUBECONFIG")
104
+ if env_kubeconfig:
105
+ candidates.extend(path for path in env_kubeconfig.split(os.pathsep) if path)
106
+
107
+ candidates.extend(
108
+ [
109
+ "./k3s.yaml",
110
+ "./kubeconfig",
111
+ str(Path.home() / "k3s.yaml"),
112
+ str(Path.home() / ".kube" / "config"),
113
+ ]
114
+ )
115
+
116
+ discovered: list[str] = []
117
+ seen_paths: set[str] = set()
118
+ for candidate in candidates:
119
+ resolved_candidate = os.path.abspath(os.path.expanduser(candidate))
120
+ if resolved_candidate in seen_paths or not os.path.isfile(resolved_candidate):
121
+ continue
122
+ seen_paths.add(resolved_candidate)
123
+ discovered.append(resolved_candidate)
124
+
125
+ return discovered
126
+
127
+ def _iter_manifest_documents(self, manifest_path: str) -> Iterable[dict[str, Any]]:
128
+ try:
129
+ with open(manifest_path, "r", encoding="utf-8") as handle:
130
+ documents = list(yaml.safe_load_all(handle))
131
+ except FileNotFoundError as error:
132
+ raise DeploymentError(f"Manifest file not found: {manifest_path}") from error
133
+ except yaml.YAMLError as error:
134
+ raise DeploymentError(f"Invalid YAML in manifest file {manifest_path}: {error}") from error
135
+
136
+ for document in documents:
137
+ if isinstance(document, dict) and document.get("kind") and document.get("apiVersion"):
138
+ yield document
139
+
140
+ def _resource_from_doc(self, document: dict[str, Any]):
141
+ return self.dynamic_client.resources.get(
142
+ api_version=document["apiVersion"],
143
+ kind=document["kind"],
144
+ )
145
+
146
+ def deploy_manifest(self, manifest_path: str, namespace: str | None = None) -> list[dict[str, str]]:
147
+ """Create or patch resources from a manifest file and return resource references."""
148
+ deployed: list[dict[str, str]] = []
149
+
150
+ for document in self._iter_manifest_documents(manifest_path):
151
+ resource = self._resource_from_doc(document)
152
+ metadata = document.setdefault("metadata", {})
153
+ name = metadata.get("name")
154
+ if not name:
155
+ raise DeploymentError("Each manifest document must include metadata.name")
156
+
157
+ resource_namespace = namespace or metadata.get("namespace") or "default"
158
+ namespaced = bool(getattr(resource, "namespaced", True))
159
+ if namespaced:
160
+ metadata["namespace"] = resource_namespace
161
+
162
+ try:
163
+ if namespaced:
164
+ resource.create(body=document, namespace=resource_namespace)
165
+ else:
166
+ resource.create(body=document)
167
+ except ApiException as error:
168
+ if error.status == 409:
169
+ if namespaced:
170
+ resource.patch(
171
+ name=name,
172
+ namespace=resource_namespace,
173
+ body=document,
174
+ content_type="application/merge-patch+json",
175
+ )
176
+ else:
177
+ resource.patch(
178
+ name=name,
179
+ body=document,
180
+ content_type="application/merge-patch+json",
181
+ )
182
+ else:
183
+ raise DeploymentError(
184
+ f"Failed to deploy {document['kind']}/{name}: {error.reason}"
185
+ ) from error
186
+ except Exception as error:
187
+ raise DeploymentError(f"Failed to deploy {document['kind']}/{name}: {error}") from error
188
+
189
+ deployed.append(
190
+ {
191
+ "apiVersion": document["apiVersion"],
192
+ "kind": document["kind"],
193
+ "name": name,
194
+ "namespace": resource_namespace if namespaced else "",
195
+ }
196
+ )
197
+
198
+ return deployed
199
+
200
+ def destroy_manifest(self, manifest_path: str, namespace: str | None = None) -> int:
201
+ """Delete resources listed in the given manifest file."""
202
+ deleted = 0
203
+
204
+ for document in self._iter_manifest_documents(manifest_path):
205
+ resource = self._resource_from_doc(document)
206
+ metadata = document.get("metadata", {})
207
+ name = metadata.get("name")
208
+ if not name:
209
+ continue
210
+
211
+ resource_namespace = namespace or metadata.get("namespace") or "default"
212
+ namespaced = bool(getattr(resource, "namespaced", True))
213
+
214
+ try:
215
+ if namespaced:
216
+ resource.delete(name=name, namespace=resource_namespace)
217
+ else:
218
+ resource.delete(name=name)
219
+ deleted += 1
220
+ except ApiException as error:
221
+ if error.status != 404:
222
+ raise DeploymentError(
223
+ f"Failed to delete {document['kind']}/{name}: {error.reason}"
224
+ ) from error
225
+ except Exception as error:
226
+ raise DeploymentError(f"Failed to delete {document['kind']}/{name}: {error}") from error
227
+
228
+ return deleted
229
+
230
+ def destroy_resource(
231
+ self,
232
+ api_version: str,
233
+ kind: str,
234
+ name: str,
235
+ namespace: str | None = None,
236
+ ) -> int:
237
+ """Delete a single named resource and return 1 if it was deleted, else 0."""
238
+ try:
239
+ resource = self.dynamic_client.resources.get(api_version=api_version, kind=kind)
240
+ namespaced = bool(getattr(resource, "namespaced", True))
241
+
242
+ if namespaced:
243
+ resource_namespace = namespace or "default"
244
+ resource.delete(name=name, namespace=resource_namespace)
245
+ else:
246
+ resource.delete(name=name)
247
+ except ApiException as error:
248
+ if error.status == 404:
249
+ return 0
250
+ raise DeploymentError(f"Failed to delete {kind}/{name}: {error.reason}") from error
251
+ except Exception as error:
252
+ raise DeploymentError(f"Failed to delete {kind}/{name}: {error}") from error
253
+
254
+ return 1
255
+
256
+ def destroy_app(
257
+ self,
258
+ label_selector: str,
259
+ namespace: str = "default",
260
+ kinds: list[tuple[str, str]] | None = None,
261
+ ) -> int:
262
+ """Delete resources matching a label selector in a namespace."""
263
+ default_kinds = [
264
+ ("apps/v1", "Deployment"),
265
+ ("apps/v1", "StatefulSet"),
266
+ ("apps/v1", "DaemonSet"),
267
+ ("batch/v1", "Job"),
268
+ ("v1", "Service"),
269
+ ("v1", "Pod"),
270
+ ("v1", "ConfigMap"),
271
+ ("v1", "Secret"),
272
+ ]
273
+ resolved_kinds = kinds or default_kinds
274
+
275
+ deleted = 0
276
+ for api_version, kind in resolved_kinds:
277
+ try:
278
+ resource = self.dynamic_client.resources.get(api_version=api_version, kind=kind)
279
+ namespaced = bool(getattr(resource, "namespaced", True))
280
+ if namespaced:
281
+ result = resource.get(namespace=namespace, label_selector=label_selector)
282
+ else:
283
+ result = resource.get(label_selector=label_selector)
284
+
285
+ for item in getattr(result, "items", []):
286
+ item_name = item.metadata.name
287
+ if namespaced:
288
+ resource.delete(name=item_name, namespace=namespace)
289
+ else:
290
+ resource.delete(name=item_name)
291
+ deleted += 1
292
+ except ApiException as error:
293
+ if error.status not in (403, 404):
294
+ raise DeploymentError(
295
+ f"Failed to destroy resources for {kind} with selector {label_selector}: {error.reason}"
296
+ ) from error
297
+ except Exception as error:
298
+ raise DeploymentError(
299
+ f"Failed to destroy resources for {kind} with selector {label_selector}: {error}"
300
+ ) from error
301
+
302
+ return deleted
303
+
304
+ def get_vm_private_ips(self) -> list[str]:
305
+ """Return Kubernetes node InternalIP addresses in API order, without duplicates."""
306
+ try:
307
+ nodes = self.core_v1_api.list_node().items
308
+ except ApiException as error:
309
+ raise DeploymentError(f"Failed to list Kubernetes nodes: {error.reason}") from error
310
+ except Exception as error:
311
+ raise DeploymentError(f"Failed to list Kubernetes nodes: {error}") from error
312
+
313
+ private_ips: list[str] = []
314
+ seen_ips: set[str] = set()
315
+ for node in nodes:
316
+ addresses = getattr(getattr(node, "status", None), "addresses", []) or []
317
+ for address in addresses:
318
+ if getattr(address, "type", None) != "InternalIP":
319
+ continue
320
+ ip = getattr(address, "address", None)
321
+ if not isinstance(ip, str) or not ip or ip in seen_ips:
322
+ continue
323
+ seen_ips.add(ip)
324
+ private_ips.append(ip)
325
+ break
326
+
327
+ return private_ips
328
+
329
+
330
+ def _resolve_manifests_with_optional_render(
331
+ manifests: Sequence[str],
332
+ template_manifest_path: str | None = None,
333
+ template_variables: dict[str, str] | None = None,
334
+ ) -> tuple[list[dict[str, Any]], str | None]:
335
+ from .renderer import render_manifest
336
+
337
+ rendered_template_path: str | None = None
338
+ manifest_variables = template_variables or {}
339
+
340
+ if template_manifest_path:
341
+ rendered_template_path = render_manifest(template_manifest_path, **manifest_variables)
342
+
343
+ resolved_manifests = [
344
+ {
345
+ "display_path": manifest,
346
+ "actual_path": rendered_template_path if template_manifest_path and manifest == template_manifest_path else manifest,
347
+ "variables": manifest_variables if template_manifest_path and manifest == template_manifest_path else None,
348
+ }
349
+ for manifest in manifests
350
+ ]
351
+
352
+ return resolved_manifests, rendered_template_path
353
+
354
+
355
+ def _fetch_manifest_release_bytes(url: str) -> bytes:
356
+ try:
357
+ with urlopen(url, timeout=MANIFEST_DOWNLOAD_TIMEOUT_SECONDS) as response:
358
+ return response.read()
359
+ except URLError as error:
360
+ raise DeploymentError(f"Unable to download manifest from {url}: {error}") from error
361
+
362
+
363
+ def _write_manifest_file(manifest_path: str, content: bytes) -> None:
364
+ destination = Path(manifest_path)
365
+ destination.parent.mkdir(parents=True, exist_ok=True)
366
+
367
+ with NamedTemporaryFile(dir=destination.parent, delete=False) as temp_file:
368
+ temp_file.write(content)
369
+ temp_path = temp_file.name
370
+
371
+ os.replace(temp_path, destination)
372
+
373
+
374
+ def _ensure_monitoring_manifest(manifest_path: str, logger: Logger) -> None:
375
+ release_url = MONITORING_MANIFEST_RELEASE_URLS[manifest_path]
376
+ release_content: bytes | None = None
377
+
378
+ if os.path.exists(manifest_path):
379
+ logger.info("Found local manifest %s.", manifest_path)
380
+ try:
381
+ with open(manifest_path, "rb") as local_manifest:
382
+ local_content = local_manifest.read()
383
+ except OSError as error:
384
+ raise DeploymentError(f"Unable to read local manifest {manifest_path}: {error}") from error
385
+
386
+ release_content = _fetch_manifest_release_bytes(release_url)
387
+ if local_content == release_content:
388
+ logger.info("Local manifest %s matches release asset.", manifest_path)
389
+ else:
390
+ logger.warning("Local manifest %s differs from release asset %s.", manifest_path, release_url)
391
+ _write_manifest_file(manifest_path, release_content)
392
+ logger.info("Downloaded updated manifest to %s.", manifest_path)
393
+ return
394
+
395
+ logger.info("Local manifest %s not found. Downloading from %s ...", manifest_path, release_url)
396
+ release_content = _fetch_manifest_release_bytes(release_url)
397
+ _write_manifest_file(manifest_path, release_content)
398
+ logger.info("Downloaded manifest to %s.", manifest_path)
399
+
400
+
401
+ def _ensure_monitoring_manifests(logger: Logger) -> None:
402
+ for manifest_path in MONITORING_MANIFEST_RELEASE_URLS:
403
+ _ensure_monitoring_manifest(manifest_path, logger)
404
+
405
+
406
+ def _deploy_manifests_with_optional_render(
407
+ manifests: Sequence[str],
408
+ kubeconfig_path: str | None = None,
409
+ context: str | None = None,
410
+ template_manifest_path: str | None = None,
411
+ template_variables: dict[str, str] | None = None,
412
+ logger: Logger | None = None,
413
+ logger_name: str = "swchmonclient.deploy",
414
+ ) -> int:
415
+ """Deploy manifests and optionally render one manifest before deployment.
416
+
417
+ If ``template_manifest_path`` is provided and it appears in ``manifests``, that
418
+ manifest is rendered with ``template_variables`` and the rendered file is used
419
+ for deployment, while logs still show the original manifest path.
420
+ """
421
+ from .logging_utils import configure_stdout_logger
422
+ active_logger = logger or configure_stdout_logger(logger_name)
423
+ deployer = K8sDeployer(kubeconfig_path=kubeconfig_path, context=context)
424
+ overall_ok = True
425
+ rendered_template_path: str | None = None
426
+
427
+ try:
428
+ resolved_manifests, rendered_template_path = _resolve_manifests_with_optional_render(
429
+ manifests=manifests,
430
+ template_manifest_path=template_manifest_path,
431
+ template_variables=template_variables,
432
+ )
433
+
434
+ for manifest in resolved_manifests:
435
+ manifest_path = manifest["actual_path"]
436
+ display_path = manifest["display_path"]
437
+ variables = manifest["variables"]
438
+
439
+ if variables:
440
+ active_logger.info("Deploying %s with variables:", display_path)
441
+ for key, value in variables.items():
442
+ active_logger.info(" • %s: %s", key, value)
443
+ else:
444
+ active_logger.info("Deploying %s ...", display_path)
445
+
446
+ try:
447
+ deployed = deployer.deploy_manifest(manifest_path)
448
+ except DeploymentError as error:
449
+ active_logger.error(" ERROR: %s", error)
450
+ overall_ok = False
451
+ continue
452
+
453
+ if not deployed:
454
+ active_logger.info("No valid Kubernetes resources found in the manifest.")
455
+ continue
456
+
457
+ active_logger.info(" Created or patched resources:")
458
+ for resource in deployed:
459
+ namespace = resource.get("namespace") or "<cluster-scoped>"
460
+ active_logger.info(
461
+ " - %s/%s (apiVersion=%s, namespace=%s)",
462
+ resource["kind"],
463
+ resource["name"],
464
+ resource["apiVersion"],
465
+ namespace,
466
+ )
467
+ finally:
468
+ if rendered_template_path and os.path.exists(rendered_template_path):
469
+ os.unlink(rendered_template_path)
470
+
471
+ if overall_ok:
472
+ active_logger.info("All manifests deployed successfully.")
473
+ return 0
474
+
475
+ active_logger.info("One or more manifests failed to deploy.")
476
+ return 1
477
+
478
+
479
+ def _undeploy_manifests_with_optional_render(
480
+ manifests: Sequence[str],
481
+ kubeconfig_path: str | None = None,
482
+ context: str | None = None,
483
+ namespace: str | None = None,
484
+ template_manifest_path: str | None = None,
485
+ template_variables: dict[str, str] | None = None,
486
+ extra_resources_to_delete: Sequence[tuple[str, str, str, str]] = (),
487
+ logger: Logger | None = None,
488
+ logger_name: str = "swchmonclient.undeploy",
489
+ ) -> int:
490
+ from .logging_utils import configure_stdout_logger
491
+
492
+ active_logger = logger or configure_stdout_logger(logger_name)
493
+ deployer = K8sDeployer(kubeconfig_path=kubeconfig_path, context=context)
494
+ overall_ok = True
495
+ rendered_template_path: str | None = None
496
+
497
+ try:
498
+ resolved_manifests, rendered_template_path = _resolve_manifests_with_optional_render(
499
+ manifests=manifests,
500
+ template_manifest_path=template_manifest_path,
501
+ template_variables=template_variables,
502
+ )
503
+
504
+ for manifest in resolved_manifests:
505
+ manifest_path = manifest["actual_path"]
506
+ display_path = manifest["display_path"]
507
+ variables = manifest["variables"]
508
+
509
+ if variables:
510
+ active_logger.info("Undeploying %s with variables:", display_path)
511
+ for key, value in variables.items():
512
+ active_logger.info(" • %s: %s", key, value)
513
+ else:
514
+ active_logger.info("Undeploying %s ...", display_path)
515
+
516
+ try:
517
+ deleted = deployer.destroy_manifest(manifest_path, namespace=namespace)
518
+ active_logger.info(" Done. %s resource(s) deleted.", deleted)
519
+ except DeploymentError as error:
520
+ active_logger.error(" ERROR: %s", error)
521
+ overall_ok = False
522
+
523
+ for api_version, kind, name, resource_label in extra_resources_to_delete:
524
+ active_logger.info("Undeploying %s/%s ...", kind, name)
525
+ try:
526
+ deleted = deployer.destroy_resource(
527
+ api_version=api_version,
528
+ kind=kind,
529
+ name=name,
530
+ namespace=namespace or DEFAULT_MONITORING_NAMESPACE,
531
+ )
532
+ active_logger.info(" Done. %s %s resource(s) deleted.", deleted, resource_label)
533
+ except DeploymentError as error:
534
+ active_logger.error(" ERROR: %s", error)
535
+ overall_ok = False
536
+ finally:
537
+ if rendered_template_path and os.path.exists(rendered_template_path):
538
+ os.unlink(rendered_template_path)
539
+
540
+ if overall_ok:
541
+ active_logger.info("All manifests undeployed successfully.")
542
+ return 0
543
+
544
+ active_logger.info("One or more manifests failed to undeploy.")
545
+ return 1
546
+
547
+
548
+ def deploy_monitoring(
549
+ kubeconfig_path: str | None,
550
+ sat_file: str,
551
+ optimusdb_url: str,
552
+ logger: Logger | None = None,
553
+ ) -> int:
554
+ """Deploy the standard monitoring stack manifests."""
555
+ from .logging_utils import configure_stdout_logger
556
+
557
+ active_logger = logger or configure_stdout_logger("swchmonclient.deploy_monitoring")
558
+ try:
559
+ _ensure_monitoring_manifests(active_logger)
560
+ except DeploymentError as error:
561
+ active_logger.error(" ERROR: %s", error)
562
+ active_logger.info("One or more manifests failed to deploy.")
563
+ return 1
564
+ return _deploy_manifests_with_optional_render(
565
+ manifests=MONITORING_DEPLOY_MANIFESTS,
566
+ kubeconfig_path=kubeconfig_path,
567
+ template_manifest_path=MONITORING_TEMPLATE_MANIFEST,
568
+ template_variables={
569
+ "sat_file": sat_file,
570
+ "optimusdb_url": optimusdb_url,
571
+ },
572
+ logger=active_logger,
573
+ logger_name="swchmonclient.deploy_monitoring",
574
+ )
575
+
576
+
577
+ def undeploy_monitoring(
578
+ kubeconfig_path: str | None,
579
+ sat_file: str,
580
+ optimusdb_url: str,
581
+ namespace: str | None = None,
582
+ logger: Logger | None = None,
583
+ ) -> int:
584
+ """Undeploy the standard monitoring stack manifests and cleanup resources."""
585
+ from .logging_utils import configure_stdout_logger
586
+
587
+ active_logger = logger or configure_stdout_logger("swchmonclient.undeploy_monitoring")
588
+ try:
589
+ _ensure_monitoring_manifests(active_logger)
590
+ except DeploymentError as error:
591
+ active_logger.error(" ERROR: %s", error)
592
+ active_logger.info("One or more manifests failed to undeploy.")
593
+ return 1
594
+ return _undeploy_manifests_with_optional_render(
595
+ manifests=MONITORING_UNDEPLOY_MANIFESTS,
596
+ kubeconfig_path=kubeconfig_path,
597
+ namespace=namespace,
598
+ template_manifest_path=MONITORING_TEMPLATE_MANIFEST,
599
+ template_variables={
600
+ "sat_file": sat_file,
601
+ "optimusdb_url": optimusdb_url,
602
+ },
603
+ extra_resources_to_delete=MONITORING_EXTRA_RESOURCES_TO_DELETE,
604
+ logger=active_logger,
605
+ logger_name="swchmonclient.undeploy_monitoring",
606
+ )
607
+
608
+
609
+ def get_vm_private_ips(
610
+ kubeconfig_path: str | None = None,
611
+ context: str | None = None,
612
+ ) -> list[str]:
613
+ """Load Kubernetes config and return the cluster nodes' private IP addresses."""
614
+ deployer = K8sDeployer(kubeconfig_path=kubeconfig_path, context=context)
615
+ return deployer.get_vm_private_ips()
@@ -0,0 +1,10 @@
1
+ class DeploymentError(Exception):
2
+ """Raised when deployment or deletion actions fail."""
3
+
4
+
5
+ class ThreadManagementError(Exception):
6
+ """Raised when thread lifecycle operations fail."""
7
+
8
+
9
+ class MetricSubscriptionError(Exception):
10
+ """Raised when metric subscription lifecycle operations fail."""