wnm 0.0.9__py3-none-any.whl → 0.0.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wnm might be problematic. Click here for more details.

@@ -0,0 +1,529 @@
1
+ """
2
+ SystemdManager: Manage nodes via systemd services.
3
+
4
+ Handles node lifecycle operations using systemd unit files and systemctl commands.
5
+ Requires sudo privileges for systemctl and firewall operations.
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ import re
11
+ import shutil
12
+ import subprocess
13
+ import time
14
+
15
+ from wnm.common import DEAD, RESTARTING, RUNNING, STOPPED, UPGRADING
16
+ from wnm.config import BOOTSTRAP_CACHE_DIR, IS_ROOT, LOG_DIR
17
+ from wnm.models import Node
18
+ from wnm.process_managers.base import NodeProcess, ProcessManager
19
+ from wnm.utils import (
20
+ get_antnode_version,
21
+ get_node_age,
22
+ read_node_metadata,
23
+ read_node_metrics,
24
+ )
25
+
26
+
27
+ class SystemdManager(ProcessManager):
28
+ """Manage nodes as systemd services (system or user mode)"""
29
+
30
+ def __init__(self, session_factory=None, firewall_type: str = None):
31
+ """
32
+ Initialize SystemdManager.
33
+
34
+ Args:
35
+ session_factory: SQLAlchemy session factory (optional, for status updates)
36
+ firewall_type: Type of firewall to use (defaults to auto-detect, null for non-root)
37
+ """
38
+ # Determine if we're using system or user services
39
+ # Root users use system services in /etc/systemd/system/
40
+ # Non-root users use user services in ~/.config/systemd/user/
41
+ self.use_system_services = IS_ROOT
42
+
43
+ # Non-root users should use null firewall by default (to avoid sudo)
44
+ if not IS_ROOT and firewall_type is None:
45
+ firewall_type = "null"
46
+
47
+ super().__init__(firewall_type)
48
+ self.S = session_factory
49
+
50
+ if self.use_system_services:
51
+ self.service_dir = "/etc/systemd/system"
52
+ self.systemctl_cmd = ["sudo", "systemctl"]
53
+ else:
54
+ self.service_dir = os.path.expanduser("~/.config/systemd/user")
55
+ self.systemctl_cmd = ["systemctl", "--user"]
56
+ # Create user service directory if it doesn't exist
57
+ os.makedirs(self.service_dir, exist_ok=True)
58
+
59
+ def create_node(self, node: Node, binary_path: str) -> bool:
60
+ """
61
+ Create and start a new node as a systemd service.
62
+
63
+ Args:
64
+ node: Node database record with configuration
65
+ binary_path: Path to the antnode binary
66
+
67
+ Returns:
68
+ True if node was created successfully
69
+ """
70
+ logging.info(f"Creating systemd node {node.id}")
71
+
72
+ # Prepare service name
73
+ service_name = f"antnode{node.node_name}.service"
74
+ log_dir = f"{LOG_DIR}/antnode{node.node_name}"
75
+
76
+ # Create directories
77
+ if self.use_system_services:
78
+ # Root: use sudo for system paths
79
+ try:
80
+ subprocess.run(
81
+ ["sudo", "mkdir", "-p", node.root_dir, log_dir],
82
+ stdout=subprocess.PIPE,
83
+ check=True,
84
+ )
85
+ except subprocess.CalledProcessError as err:
86
+ logging.error(f"Failed to create directories: {err}")
87
+ return False
88
+ else:
89
+ # Non-root: create in user paths without sudo
90
+ try:
91
+ os.makedirs(node.root_dir, exist_ok=True)
92
+ os.makedirs(log_dir, exist_ok=True)
93
+ except OSError as err:
94
+ logging.error(f"Failed to create directories: {err}")
95
+ return False
96
+
97
+ # Copy binary to node directory
98
+ if self.use_system_services:
99
+ # Root: use sudo to copy
100
+ try:
101
+ subprocess.run(
102
+ ["sudo", "cp", binary_path, node.root_dir],
103
+ stdout=subprocess.PIPE,
104
+ check=True,
105
+ )
106
+ except subprocess.CalledProcessError as err:
107
+ logging.error(f"Failed to copy binary: {err}")
108
+ return False
109
+ else:
110
+ # Non-root: copy as current user
111
+ try:
112
+ shutil.copy2(binary_path, node.root_dir)
113
+ binary_dest = os.path.join(node.root_dir, "antnode")
114
+ os.chmod(binary_dest, 0o755)
115
+ except (OSError, shutil.Error) as err:
116
+ logging.error(f"Failed to copy binary: {err}")
117
+ return False
118
+
119
+ # Change ownership (only when running as root)
120
+ # When running as non-root user, files remain owned by current user
121
+ if self.use_system_services:
122
+ user = getattr(node, "user", "ant")
123
+ try:
124
+ subprocess.run(
125
+ ["sudo", "chown", "-R", f"{user}:{user}", node.root_dir, log_dir],
126
+ stdout=subprocess.PIPE,
127
+ check=True,
128
+ )
129
+ except subprocess.CalledProcessError as err:
130
+ logging.error(f"Failed to change ownership: {err}")
131
+ return False
132
+
133
+ # Build systemd service unit
134
+ env_string = f'Environment="{node.environment}"' if node.environment else ""
135
+ binary_in_node_dir = f"{node.root_dir}/antnode"
136
+
137
+ # Determine which user to run as
138
+ # System services (root): use 'ant' user for security
139
+ # User services (non-root): don't specify User= (runs as current user)
140
+ if self.use_system_services:
141
+ user = getattr(node, "user", "ant")
142
+ user_line = f"User={user}"
143
+ else:
144
+ user_line = "" # User services run as the invoking user
145
+
146
+ service_content = f"""[Unit]
147
+ Description=antnode{node.node_name}
148
+ [Service]
149
+ {env_string}
150
+ {user_line}
151
+ ExecStart={binary_in_node_dir} --bootstrap-cache-dir {BOOTSTRAP_CACHE_DIR} --root-dir {node.root_dir} --port {node.port} --enable-metrics-server --metrics-server-port {node.metrics_port} --log-output-dest {log_dir} --max-log-files 1 --max-archived-log-files 1 --rewards-address {node.wallet} {node.network}
152
+ Restart=always
153
+ #RestartSec=300
154
+ """
155
+
156
+ # Write service file
157
+ service_path = f"{self.service_dir}/{service_name}"
158
+ if self.use_system_services:
159
+ # System services: use sudo to write to /etc/systemd/system
160
+ try:
161
+ subprocess.run(
162
+ ["sudo", "tee", service_path],
163
+ input=service_content,
164
+ text=True,
165
+ stdout=subprocess.PIPE,
166
+ check=True,
167
+ )
168
+ except subprocess.CalledProcessError as err:
169
+ logging.error(f"Failed to write service file: {err}")
170
+ return False
171
+ else:
172
+ # User services: write directly to ~/.config/systemd/user
173
+ try:
174
+ with open(service_path, "w") as f:
175
+ f.write(service_content)
176
+ except OSError as err:
177
+ logging.error(f"Failed to write service file: {err}")
178
+ return False
179
+
180
+ # Reload systemd
181
+ try:
182
+ subprocess.run(
183
+ self.systemctl_cmd + ["daemon-reload"],
184
+ stdout=subprocess.PIPE,
185
+ check=True,
186
+ )
187
+ except subprocess.CalledProcessError as err:
188
+ logging.error(f"Failed to reload systemd: {err}")
189
+ return False
190
+
191
+ # Start the node
192
+ return self.start_node(node)
193
+
194
+ def start_node(self, node: Node) -> bool:
195
+ """
196
+ Start a systemd node.
197
+
198
+ Args:
199
+ node: Node database record
200
+
201
+ Returns:
202
+ True if node started successfully
203
+ """
204
+ logging.info(f"Starting systemd node {node.id}")
205
+
206
+ # Start service
207
+ try:
208
+ result = subprocess.run(
209
+ self.systemctl_cmd + ["start", node.service],
210
+ stdout=subprocess.PIPE,
211
+ stderr=subprocess.STDOUT,
212
+ text=True,
213
+ )
214
+ if "Failed to start" in result.stdout:
215
+ logging.error(f"Failed to start node: {result.stdout}")
216
+ return False
217
+ except subprocess.CalledProcessError as err:
218
+ logging.error(f"Failed to start node: {err}")
219
+ return False
220
+
221
+ # Open firewall port
222
+ self.enable_firewall_port(node.port)
223
+
224
+ return True
225
+
226
+ def stop_node(self, node: Node) -> bool:
227
+ """
228
+ Stop a systemd node.
229
+
230
+ Args:
231
+ node: Node database record
232
+
233
+ Returns:
234
+ True if node stopped successfully
235
+ """
236
+ logging.info(f"Stopping systemd node {node.id}")
237
+
238
+ # Stop service
239
+ try:
240
+ subprocess.run(
241
+ self.systemctl_cmd + ["stop", node.service],
242
+ stdout=subprocess.PIPE,
243
+ check=True,
244
+ )
245
+ except subprocess.CalledProcessError as err:
246
+ logging.error(f"Failed to stop node: {err}")
247
+ return False
248
+
249
+ # Close firewall port
250
+ self.disable_firewall_port(node.port)
251
+
252
+ return True
253
+
254
+ def restart_node(self, node: Node) -> bool:
255
+ """
256
+ Restart a systemd node.
257
+
258
+ Args:
259
+ node: Node database record
260
+
261
+ Returns:
262
+ True if node restarted successfully
263
+ """
264
+ logging.info(f"Restarting systemd node {node.id}")
265
+
266
+ try:
267
+ subprocess.run(
268
+ self.systemctl_cmd + ["restart", node.service],
269
+ stdout=subprocess.PIPE,
270
+ check=True,
271
+ )
272
+ except subprocess.CalledProcessError as err:
273
+ logging.error(f"Failed to restart node: {err}")
274
+ return False
275
+
276
+ return True
277
+
278
+ def get_status(self, node: Node) -> NodeProcess:
279
+ """
280
+ Get current status of a systemd node.
281
+
282
+ Args:
283
+ node: Node database record
284
+
285
+ Returns:
286
+ NodeProcess with current status
287
+ """
288
+ try:
289
+ result = subprocess.run(
290
+ self.systemctl_cmd
291
+ + ["show", node.service, "--property=MainPID,ActiveState"],
292
+ stdout=subprocess.PIPE,
293
+ text=True,
294
+ check=True,
295
+ )
296
+
297
+ # Parse output
298
+ lines = result.stdout.strip().split("\n")
299
+ state_info = dict(line.split("=", 1) for line in lines if "=" in line)
300
+
301
+ pid = int(state_info.get("MainPID", 0))
302
+ active_state = state_info.get("ActiveState", "unknown")
303
+
304
+ # Map systemd state to our status
305
+ if active_state == "active":
306
+ status = RUNNING
307
+ elif active_state == "inactive" or active_state == "failed":
308
+ status = STOPPED
309
+ else:
310
+ status = "UNKNOWN"
311
+
312
+ # Check if root directory exists
313
+ if not os.path.isdir(node.root_dir):
314
+ status = DEAD
315
+
316
+ return NodeProcess(
317
+ node_id=node.id, pid=pid if pid > 0 else None, status=status
318
+ )
319
+
320
+ except (subprocess.CalledProcessError, ValueError, KeyError) as err:
321
+ logging.error(f"Failed to get node status: {err}")
322
+ return NodeProcess(node_id=node.id, pid=None, status="UNKNOWN")
323
+
324
+ def remove_node(self, node: Node) -> bool:
325
+ """
326
+ Stop and remove a systemd node.
327
+
328
+ Args:
329
+ node: Node database record
330
+
331
+ Returns:
332
+ True if node was removed successfully
333
+ """
334
+ logging.info(f"Removing systemd node {node.id}")
335
+
336
+ # Stop the node first
337
+ self.stop_node(node)
338
+
339
+ nodename = f"antnode{node.node_name}"
340
+ log_path = f"{LOG_DIR}/{nodename}"
341
+
342
+ # Remove data and logs
343
+ if self.use_system_services:
344
+ # System services: use sudo to remove
345
+ try:
346
+ subprocess.run(
347
+ ["sudo", "rm", "-rf", node.root_dir, log_path],
348
+ check=True,
349
+ )
350
+ except subprocess.CalledProcessError as err:
351
+ logging.error(f"Failed to remove node data: {err}")
352
+ else:
353
+ # User services: remove as current user
354
+ try:
355
+ if os.path.exists(node.root_dir):
356
+ shutil.rmtree(node.root_dir)
357
+ if os.path.exists(log_path):
358
+ shutil.rmtree(log_path)
359
+ except (OSError, shutil.Error) as err:
360
+ logging.error(f"Failed to remove node data: {err}")
361
+
362
+ # Remove service file
363
+ service_path = f"{self.service_dir}/{node.service}"
364
+ if self.use_system_services:
365
+ # System services: use sudo to remove
366
+ try:
367
+ subprocess.run(
368
+ ["sudo", "rm", "-f", service_path],
369
+ check=True,
370
+ )
371
+ except subprocess.CalledProcessError as err:
372
+ logging.error(f"Failed to remove service file: {err}")
373
+ else:
374
+ # User services: remove as current user
375
+ try:
376
+ if os.path.exists(service_path):
377
+ os.remove(service_path)
378
+ except OSError as err:
379
+ logging.error(f"Failed to remove service file: {err}")
380
+
381
+ # Reload systemd
382
+ try:
383
+ subprocess.run(
384
+ self.systemctl_cmd + ["daemon-reload"],
385
+ stdout=subprocess.PIPE,
386
+ check=True,
387
+ )
388
+ except subprocess.CalledProcessError as err:
389
+ logging.error(f"Failed to reload systemd: {err}")
390
+
391
+ return True
392
+
393
+ def survey_nodes(self, machine_config) -> list:
394
+ """
395
+ Survey all systemd-managed antnode services.
396
+
397
+ Scans systemd service directory (system or user) for antnode*.service files and
398
+ collects their configuration and current status.
399
+
400
+ Args:
401
+ machine_config: Machine configuration object
402
+
403
+ Returns:
404
+ List of node dictionaries ready for database insertion
405
+ """
406
+ service_names = []
407
+
408
+ # Scan for antnode service files in the appropriate directory
409
+ if os.path.exists(self.service_dir):
410
+ try:
411
+ for file in os.listdir(self.service_dir):
412
+ if re.match(r"antnode[\d]+\.service", file):
413
+ service_names.append(file)
414
+ except PermissionError as e:
415
+ logging.error(f"Permission denied reading {self.service_dir}: {e}")
416
+ return []
417
+ except Exception as e:
418
+ logging.error(f"Error listing systemd services: {e}")
419
+ return []
420
+
421
+ if not service_names:
422
+ logging.info("No systemd antnode services found")
423
+ return []
424
+
425
+ logging.info(f"Found {len(service_names)} systemd services to survey")
426
+
427
+ details = []
428
+ for service_name in service_names:
429
+ logging.debug(f"{time.strftime('%Y-%m-%d %H:%M')} surveying {service_name}")
430
+
431
+ node_id_match = re.findall(r"antnode([\d]+)\.service", service_name)
432
+ if not node_id_match:
433
+ logging.info(f"Can't decode {service_name}")
434
+ continue
435
+
436
+ card = {
437
+ "node_name": node_id_match[0],
438
+ "service": service_name,
439
+ "timestamp": int(time.time()),
440
+ "host": machine_config.host or "127.0.0.1",
441
+ "method": "systemd",
442
+ "layout": "1",
443
+ }
444
+
445
+ # Read configuration from systemd service file
446
+ config = self._read_service_file(service_name, machine_config)
447
+ card.update(config)
448
+
449
+ if not config:
450
+ logging.warning(f"Could not read config from {service_name}")
451
+ continue
452
+
453
+ # Check if node is running by querying metrics port
454
+ metadata = read_node_metadata(card["host"], card["metrics_port"])
455
+
456
+ if isinstance(metadata, dict) and metadata.get("status") == RUNNING:
457
+ # Node is running - collect metadata and metrics
458
+ card.update(metadata)
459
+ card.update(read_node_metrics(card["host"], card["metrics_port"]))
460
+ else:
461
+ # Node is stopped
462
+ if not os.path.isdir(card.get("root_dir", "")):
463
+ card["status"] = DEAD
464
+ card["version"] = ""
465
+ else:
466
+ card["status"] = STOPPED
467
+ card["version"] = get_antnode_version(card.get("binary", ""))
468
+ card["peer_id"] = ""
469
+ card["records"] = 0
470
+ card["uptime"] = 0
471
+ card["shunned"] = 0
472
+
473
+ card["age"] = get_node_age(card.get("root_dir", ""))
474
+ card["host"] = machine_config.host # Ensure we use machine config host
475
+
476
+ details.append(card)
477
+
478
+ return details
479
+
480
+ def _read_service_file(self, service_name: str, machine_config) -> dict:
481
+ """
482
+ Read node configuration from a systemd service file.
483
+
484
+ Args:
485
+ service_name: Name of the service file (e.g., "antnode0001.service")
486
+ machine_config: Machine configuration object
487
+
488
+ Returns:
489
+ Dictionary with node configuration, or empty dict on error
490
+ """
491
+ details = {}
492
+ service_path = f"{self.service_dir}/{service_name}"
493
+
494
+ try:
495
+ with open(service_path, "r") as file:
496
+ data = file.read()
497
+
498
+ details["id"] = int(re.findall(r"antnode(\d+)", service_name)[0])
499
+ details["binary"] = re.findall(r"ExecStart=([^ ]+)", data)[0]
500
+ # User field may be empty for user services
501
+ user_matches = re.findall(r"User=(\w+)", data)
502
+ details["user"] = user_matches[0] if user_matches else os.getenv("USER", "nobody")
503
+ details["root_dir"] = re.findall(r"--root-dir ([\w\/]+)", data)[0]
504
+ details["port"] = int(re.findall(r"--port (\d+)", data)[0])
505
+ details["metrics_port"] = int(
506
+ re.findall(r"--metrics-server-port (\d+)", data)[0]
507
+ )
508
+ details["wallet"] = re.findall(r"--rewards-address ([^ ]+)", data)[0]
509
+ details["network"] = re.findall(r"--rewards-address [^ ]+ ([\w\-]+)", data)[
510
+ 0
511
+ ]
512
+
513
+ # Check for IP listen address
514
+ ip_matches = re.findall(r"--ip ([^ ]+)", data)
515
+ if ip_matches:
516
+ ip = ip_matches[0]
517
+ # If wildcard listen address, use default
518
+ details["host"] = machine_config.host if ip == "0.0.0.0" else ip
519
+ else:
520
+ details["host"] = machine_config.host
521
+
522
+ # Check for environment variables
523
+ env_matches = re.findall(r'Environment="(.+)"', data)
524
+ details["environment"] = env_matches[0] if env_matches else ""
525
+
526
+ except Exception as e:
527
+ logging.debug(f"Error reading service file {service_path}: {e}")
528
+
529
+ return details