wnm 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wnm might be problematic. Click here for more details.

@@ -0,0 +1,443 @@
1
+ """
2
+ SystemdManager: Manage nodes via systemd services.
3
+
4
+ Handles node lifecycle operations using systemd unit files and systemctl commands.
5
+ Requires sudo privileges for systemctl and firewall operations.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import re
11
+ import subprocess
12
+ import time
13
+
14
+ from wnm.common import DEAD, RESTARTING, RUNNING, STOPPED, UPGRADING
15
+ from wnm.config import BOOTSTRAP_CACHE_DIR, LOG_DIR
16
+ from wnm.models import Node
17
+ from wnm.process_managers.base import NodeProcess, ProcessManager
18
+ from wnm.utils import (
19
+ get_antnode_version,
20
+ get_node_age,
21
+ read_node_metadata,
22
+ read_node_metrics,
23
+ )
24
+
25
+
26
+ class SystemdManager(ProcessManager):
27
+ """Manage nodes as systemd services"""
28
+
29
+ def __init__(self, session_factory=None, firewall_type: str = None):
30
+ """
31
+ Initialize SystemdManager.
32
+
33
+ Args:
34
+ session_factory: SQLAlchemy session factory (optional, for status updates)
35
+ firewall_type: Type of firewall to use (defaults to auto-detect)
36
+ """
37
+ super().__init__(firewall_type)
38
+ self.S = session_factory
39
+
40
+ def create_node(self, node: Node, binary_path: str) -> bool:
41
+ """
42
+ Create and start a new node as a systemd service.
43
+
44
+ Args:
45
+ node: Node database record with configuration
46
+ binary_path: Path to the antnode binary
47
+
48
+ Returns:
49
+ True if node was created successfully
50
+ """
51
+ logging.info(f"Creating systemd node {node.id}")
52
+
53
+ # Prepare service name
54
+ service_name = f"antnode{node.node_name}.service"
55
+ log_dir = f"{LOG_DIR}/antnode{node.node_name}"
56
+
57
+ # Create directories
58
+ try:
59
+ subprocess.run(
60
+ ["sudo", "mkdir", "-p", node.root_dir, log_dir],
61
+ stdout=subprocess.PIPE,
62
+ check=True,
63
+ )
64
+ except subprocess.CalledProcessError as err:
65
+ logging.error(f"Failed to create directories: {err}")
66
+ return False
67
+
68
+ # Copy binary to node directory
69
+ try:
70
+ subprocess.run(
71
+ ["sudo", "cp", binary_path, node.root_dir],
72
+ stdout=subprocess.PIPE,
73
+ check=True,
74
+ )
75
+ except subprocess.CalledProcessError as err:
76
+ logging.error(f"Failed to copy binary: {err}")
77
+ return False
78
+
79
+ # Change ownership
80
+ user = getattr(node, "user", "ant")
81
+ try:
82
+ subprocess.run(
83
+ ["sudo", "chown", "-R", f"{user}:{user}", node.root_dir, log_dir],
84
+ stdout=subprocess.PIPE,
85
+ check=True,
86
+ )
87
+ except subprocess.CalledProcessError as err:
88
+ logging.error(f"Failed to change ownership: {err}")
89
+ return False
90
+
91
+ # Build systemd service unit
92
+ env_string = f'Environment="{node.environment}"' if node.environment else ""
93
+ binary_in_node_dir = f"{node.root_dir}/antnode"
94
+
95
+ service_content = f"""[Unit]
96
+ Description=antnode{node.node_name}
97
+ [Service]
98
+ {env_string}
99
+ User={user}
100
+ ExecStart={binary_in_node_dir} --bootstrap-cache-dir {BOOTSTRAP_CACHE_DIR} --root-dir {node.root_dir} --port {node.port} --enable-metrics-server --metrics-server-port {node.metrics_port} --log-output-dest {log_dir} --max-log-files 1 --max-archived-log-files 1 --rewards-address {node.wallet} {node.network}
101
+ Restart=always
102
+ #RestartSec=300
103
+ """
104
+
105
+ # Write service file
106
+ try:
107
+ subprocess.run(
108
+ ["sudo", "tee", f"/etc/systemd/system/{service_name}"],
109
+ input=service_content,
110
+ text=True,
111
+ stdout=subprocess.PIPE,
112
+ check=True,
113
+ )
114
+ except subprocess.CalledProcessError as err:
115
+ logging.error(f"Failed to write service file: {err}")
116
+ return False
117
+
118
+ # Reload systemd
119
+ try:
120
+ subprocess.run(
121
+ ["sudo", "systemctl", "daemon-reload"],
122
+ stdout=subprocess.PIPE,
123
+ check=True,
124
+ )
125
+ except subprocess.CalledProcessError as err:
126
+ logging.error(f"Failed to reload systemd: {err}")
127
+ return False
128
+
129
+ # Start the node
130
+ return self.start_node(node)
131
+
132
+ def start_node(self, node: Node) -> bool:
133
+ """
134
+ Start a systemd node.
135
+
136
+ Args:
137
+ node: Node database record
138
+
139
+ Returns:
140
+ True if node started successfully
141
+ """
142
+ logging.info(f"Starting systemd node {node.id}")
143
+
144
+ # Start service
145
+ try:
146
+ result = subprocess.run(
147
+ ["sudo", "systemctl", "start", node.service],
148
+ stdout=subprocess.PIPE,
149
+ stderr=subprocess.STDOUT,
150
+ text=True,
151
+ )
152
+ if "Failed to start" in result.stdout:
153
+ logging.error(f"Failed to start node: {result.stdout}")
154
+ return False
155
+ except subprocess.CalledProcessError as err:
156
+ logging.error(f"Failed to start node: {err}")
157
+ return False
158
+
159
+ # Open firewall port
160
+ self.enable_firewall_port(node.port)
161
+
162
+ return True
163
+
164
+ def stop_node(self, node: Node) -> bool:
165
+ """
166
+ Stop a systemd node.
167
+
168
+ Args:
169
+ node: Node database record
170
+
171
+ Returns:
172
+ True if node stopped successfully
173
+ """
174
+ logging.info(f"Stopping systemd node {node.id}")
175
+
176
+ # Stop service
177
+ try:
178
+ subprocess.run(
179
+ ["sudo", "systemctl", "stop", node.service],
180
+ stdout=subprocess.PIPE,
181
+ check=True,
182
+ )
183
+ except subprocess.CalledProcessError as err:
184
+ logging.error(f"Failed to stop node: {err}")
185
+ return False
186
+
187
+ # Close firewall port
188
+ self.disable_firewall_port(node.port)
189
+
190
+ return True
191
+
192
+ def restart_node(self, node: Node) -> bool:
193
+ """
194
+ Restart a systemd node.
195
+
196
+ Args:
197
+ node: Node database record
198
+
199
+ Returns:
200
+ True if node restarted successfully
201
+ """
202
+ logging.info(f"Restarting systemd node {node.id}")
203
+
204
+ try:
205
+ subprocess.run(
206
+ ["sudo", "systemctl", "restart", node.service],
207
+ stdout=subprocess.PIPE,
208
+ check=True,
209
+ )
210
+ except subprocess.CalledProcessError as err:
211
+ logging.error(f"Failed to restart node: {err}")
212
+ return False
213
+
214
+ return True
215
+
216
+ def get_status(self, node: Node) -> NodeProcess:
217
+ """
218
+ Get current status of a systemd node.
219
+
220
+ Args:
221
+ node: Node database record
222
+
223
+ Returns:
224
+ NodeProcess with current status
225
+ """
226
+ try:
227
+ result = subprocess.run(
228
+ ["systemctl", "show", node.service, "--property=MainPID,ActiveState"],
229
+ stdout=subprocess.PIPE,
230
+ text=True,
231
+ check=True,
232
+ )
233
+
234
+ # Parse output
235
+ lines = result.stdout.strip().split("\n")
236
+ state_info = dict(line.split("=", 1) for line in lines if "=" in line)
237
+
238
+ pid = int(state_info.get("MainPID", 0))
239
+ active_state = state_info.get("ActiveState", "unknown")
240
+
241
+ # Map systemd state to our status
242
+ if active_state == "active":
243
+ status = RUNNING
244
+ elif active_state == "inactive" or active_state == "failed":
245
+ status = STOPPED
246
+ else:
247
+ status = "UNKNOWN"
248
+
249
+ # Check if root directory exists
250
+ if not os.path.isdir(node.root_dir):
251
+ status = DEAD
252
+
253
+ return NodeProcess(
254
+ node_id=node.id, pid=pid if pid > 0 else None, status=status
255
+ )
256
+
257
+ except (subprocess.CalledProcessError, ValueError, KeyError) as err:
258
+ logging.error(f"Failed to get node status: {err}")
259
+ return NodeProcess(node_id=node.id, pid=None, status="UNKNOWN")
260
+
261
+ def remove_node(self, node: Node) -> bool:
262
+ """
263
+ Stop and remove a systemd node.
264
+
265
+ Args:
266
+ node: Node database record
267
+
268
+ Returns:
269
+ True if node was removed successfully
270
+ """
271
+ logging.info(f"Removing systemd node {node.id}")
272
+
273
+ # Stop the node first
274
+ self.stop_node(node)
275
+
276
+ nodename = f"antnode{node.node_name}"
277
+
278
+ # Remove data and logs
279
+ try:
280
+ subprocess.run(
281
+ ["sudo", "rm", "-rf", node.root_dir, f"{LOG_DIR}/{nodename}"],
282
+ check=True,
283
+ )
284
+ except subprocess.CalledProcessError as err:
285
+ logging.error(f"Failed to remove node data: {err}")
286
+
287
+ # Remove service file
288
+ try:
289
+ subprocess.run(
290
+ ["sudo", "rm", "-f", f"/etc/systemd/system/{node.service}"],
291
+ check=True,
292
+ )
293
+ except subprocess.CalledProcessError as err:
294
+ logging.error(f"Failed to remove service file: {err}")
295
+
296
+ # Reload systemd
297
+ try:
298
+ subprocess.run(
299
+ ["sudo", "systemctl", "daemon-reload"],
300
+ stdout=subprocess.PIPE,
301
+ check=True,
302
+ )
303
+ except subprocess.CalledProcessError as err:
304
+ logging.error(f"Failed to reload systemd: {err}")
305
+
306
+ return True
307
+
308
+ def survey_nodes(self, machine_config) -> list:
309
+ """
310
+ Survey all systemd-managed antnode services.
311
+
312
+ Scans /etc/systemd/system for antnode*.service files and
313
+ collects their configuration and current status.
314
+
315
+ Args:
316
+ machine_config: Machine configuration object
317
+
318
+ Returns:
319
+ List of node dictionaries ready for database insertion
320
+ """
321
+ systemd_dir = "/etc/systemd/system"
322
+ service_names = []
323
+
324
+ # Scan for antnode service files
325
+ if os.path.exists(systemd_dir):
326
+ try:
327
+ for file in os.listdir(systemd_dir):
328
+ if re.match(r"antnode[\d]+\.service", file):
329
+ service_names.append(file)
330
+ except PermissionError as e:
331
+ logging.error(f"Permission denied reading {systemd_dir}: {e}")
332
+ return []
333
+ except Exception as e:
334
+ logging.error(f"Error listing systemd services: {e}")
335
+ return []
336
+
337
+ if not service_names:
338
+ logging.info("No systemd antnode services found")
339
+ return []
340
+
341
+ logging.info(f"Found {len(service_names)} systemd services to survey")
342
+
343
+ details = []
344
+ for service_name in service_names:
345
+ logging.debug(f"{time.strftime('%Y-%m-%d %H:%M')} surveying {service_name}")
346
+
347
+ node_id_match = re.findall(r"antnode([\d]+)\.service", service_name)
348
+ if not node_id_match:
349
+ logging.info(f"Can't decode {service_name}")
350
+ continue
351
+
352
+ card = {
353
+ "node_name": node_id_match[0],
354
+ "service": service_name,
355
+ "timestamp": int(time.time()),
356
+ "host": machine_config.host or "127.0.0.1",
357
+ "method": "systemd",
358
+ "layout": "1",
359
+ }
360
+
361
+ # Read configuration from systemd service file
362
+ config = self._read_service_file(service_name, machine_config)
363
+ card.update(config)
364
+
365
+ if not config:
366
+ logging.warning(f"Could not read config from {service_name}")
367
+ continue
368
+
369
+ # Check if node is running by querying metrics port
370
+ metadata = read_node_metadata(card["host"], card["metrics_port"])
371
+
372
+ if isinstance(metadata, dict) and metadata.get("status") == RUNNING:
373
+ # Node is running - collect metadata and metrics
374
+ card.update(metadata)
375
+ card.update(read_node_metrics(card["host"], card["metrics_port"]))
376
+ else:
377
+ # Node is stopped
378
+ if not os.path.isdir(card.get("root_dir", "")):
379
+ card["status"] = DEAD
380
+ card["version"] = ""
381
+ else:
382
+ card["status"] = STOPPED
383
+ card["version"] = get_antnode_version(card.get("binary", ""))
384
+ card["peer_id"] = ""
385
+ card["records"] = 0
386
+ card["uptime"] = 0
387
+ card["shunned"] = 0
388
+
389
+ card["age"] = get_node_age(card.get("root_dir", ""))
390
+ card["host"] = machine_config.host # Ensure we use machine config host
391
+
392
+ details.append(card)
393
+
394
+ return details
395
+
396
+ def _read_service_file(self, service_name: str, machine_config) -> dict:
397
+ """
398
+ Read node configuration from a systemd service file.
399
+
400
+ Args:
401
+ service_name: Name of the service file (e.g., "antnode0001.service")
402
+ machine_config: Machine configuration object
403
+
404
+ Returns:
405
+ Dictionary with node configuration, or empty dict on error
406
+ """
407
+ details = {}
408
+ service_path = f"/etc/systemd/system/{service_name}"
409
+
410
+ try:
411
+ with open(service_path, "r") as file:
412
+ data = file.read()
413
+
414
+ details["id"] = int(re.findall(r"antnode(\d+)", service_name)[0])
415
+ details["binary"] = re.findall(r"ExecStart=([^ ]+)", data)[0]
416
+ details["user"] = re.findall(r"User=(\w+)", data)[0]
417
+ details["root_dir"] = re.findall(r"--root-dir ([\w\/]+)", data)[0]
418
+ details["port"] = int(re.findall(r"--port (\d+)", data)[0])
419
+ details["metrics_port"] = int(
420
+ re.findall(r"--metrics-server-port (\d+)", data)[0]
421
+ )
422
+ details["wallet"] = re.findall(r"--rewards-address ([^ ]+)", data)[0]
423
+ details["network"] = re.findall(r"--rewards-address [^ ]+ ([\w\-]+)", data)[
424
+ 0
425
+ ]
426
+
427
+ # Check for IP listen address
428
+ ip_matches = re.findall(r"--ip ([^ ]+)", data)
429
+ if ip_matches:
430
+ ip = ip_matches[0]
431
+ # If wildcard listen address, use default
432
+ details["host"] = machine_config.host if ip == "0.0.0.0" else ip
433
+ else:
434
+ details["host"] = machine_config.host
435
+
436
+ # Check for environment variables
437
+ env_matches = re.findall(r'Environment="(.+)"', data)
438
+ details["environment"] = env_matches[0] if env_matches else ""
439
+
440
+ except Exception as e:
441
+ logging.debug(f"Error reading service file {service_path}: {e}")
442
+
443
+ return details