wnm 0.0.8__py3-none-any.whl → 0.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wnm might be problematic. Click here for more details.
- wnm/__init__.py +1 -1
- wnm/__main__.py +206 -953
- wnm/actions.py +45 -0
- wnm/common.py +21 -0
- wnm/config.py +653 -1
- wnm/decision_engine.py +388 -0
- wnm/executor.py +1292 -0
- wnm/firewall/__init__.py +13 -0
- wnm/firewall/base.py +71 -0
- wnm/firewall/factory.py +95 -0
- wnm/firewall/null_firewall.py +71 -0
- wnm/firewall/ufw_manager.py +118 -0
- wnm/migration.py +42 -0
- wnm/models.py +389 -122
- wnm/process_managers/__init__.py +23 -0
- wnm/process_managers/base.py +203 -0
- wnm/process_managers/docker_manager.py +371 -0
- wnm/process_managers/factory.py +83 -0
- wnm/process_managers/launchd_manager.py +592 -0
- wnm/process_managers/setsid_manager.py +340 -0
- wnm/process_managers/systemd_manager.py +443 -0
- wnm/reports.py +286 -0
- wnm/utils.py +403 -0
- wnm-0.0.10.dist-info/METADATA +316 -0
- wnm-0.0.10.dist-info/RECORD +28 -0
- {wnm-0.0.8.dist-info → wnm-0.0.10.dist-info}/WHEEL +1 -1
- wnm-0.0.8.dist-info/METADATA +0 -93
- wnm-0.0.8.dist-info/RECORD +0 -9
- {wnm-0.0.8.dist-info → wnm-0.0.10.dist-info}/entry_points.txt +0 -0
- {wnm-0.0.8.dist-info → wnm-0.0.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SystemdManager: Manage nodes via systemd services.
|
|
3
|
+
|
|
4
|
+
Handles node lifecycle operations using systemd unit files and systemctl commands.
|
|
5
|
+
Requires sudo privileges for systemctl and firewall operations.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
import os
|
|
10
|
+
import re
|
|
11
|
+
import subprocess
|
|
12
|
+
import time
|
|
13
|
+
|
|
14
|
+
from wnm.common import DEAD, RESTARTING, RUNNING, STOPPED, UPGRADING
|
|
15
|
+
from wnm.config import BOOTSTRAP_CACHE_DIR, LOG_DIR
|
|
16
|
+
from wnm.models import Node
|
|
17
|
+
from wnm.process_managers.base import NodeProcess, ProcessManager
|
|
18
|
+
from wnm.utils import (
|
|
19
|
+
get_antnode_version,
|
|
20
|
+
get_node_age,
|
|
21
|
+
read_node_metadata,
|
|
22
|
+
read_node_metrics,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class SystemdManager(ProcessManager):
|
|
27
|
+
"""Manage nodes as systemd services"""
|
|
28
|
+
|
|
29
|
+
def __init__(self, session_factory=None, firewall_type: str = None):
|
|
30
|
+
"""
|
|
31
|
+
Initialize SystemdManager.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
session_factory: SQLAlchemy session factory (optional, for status updates)
|
|
35
|
+
firewall_type: Type of firewall to use (defaults to auto-detect)
|
|
36
|
+
"""
|
|
37
|
+
super().__init__(firewall_type)
|
|
38
|
+
self.S = session_factory
|
|
39
|
+
|
|
40
|
+
def create_node(self, node: Node, binary_path: str) -> bool:
|
|
41
|
+
"""
|
|
42
|
+
Create and start a new node as a systemd service.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
node: Node database record with configuration
|
|
46
|
+
binary_path: Path to the antnode binary
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
True if node was created successfully
|
|
50
|
+
"""
|
|
51
|
+
logging.info(f"Creating systemd node {node.id}")
|
|
52
|
+
|
|
53
|
+
# Prepare service name
|
|
54
|
+
service_name = f"antnode{node.node_name}.service"
|
|
55
|
+
log_dir = f"{LOG_DIR}/antnode{node.node_name}"
|
|
56
|
+
|
|
57
|
+
# Create directories
|
|
58
|
+
try:
|
|
59
|
+
subprocess.run(
|
|
60
|
+
["sudo", "mkdir", "-p", node.root_dir, log_dir],
|
|
61
|
+
stdout=subprocess.PIPE,
|
|
62
|
+
check=True,
|
|
63
|
+
)
|
|
64
|
+
except subprocess.CalledProcessError as err:
|
|
65
|
+
logging.error(f"Failed to create directories: {err}")
|
|
66
|
+
return False
|
|
67
|
+
|
|
68
|
+
# Copy binary to node directory
|
|
69
|
+
try:
|
|
70
|
+
subprocess.run(
|
|
71
|
+
["sudo", "cp", binary_path, node.root_dir],
|
|
72
|
+
stdout=subprocess.PIPE,
|
|
73
|
+
check=True,
|
|
74
|
+
)
|
|
75
|
+
except subprocess.CalledProcessError as err:
|
|
76
|
+
logging.error(f"Failed to copy binary: {err}")
|
|
77
|
+
return False
|
|
78
|
+
|
|
79
|
+
# Change ownership
|
|
80
|
+
user = getattr(node, "user", "ant")
|
|
81
|
+
try:
|
|
82
|
+
subprocess.run(
|
|
83
|
+
["sudo", "chown", "-R", f"{user}:{user}", node.root_dir, log_dir],
|
|
84
|
+
stdout=subprocess.PIPE,
|
|
85
|
+
check=True,
|
|
86
|
+
)
|
|
87
|
+
except subprocess.CalledProcessError as err:
|
|
88
|
+
logging.error(f"Failed to change ownership: {err}")
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
# Build systemd service unit
|
|
92
|
+
env_string = f'Environment="{node.environment}"' if node.environment else ""
|
|
93
|
+
binary_in_node_dir = f"{node.root_dir}/antnode"
|
|
94
|
+
|
|
95
|
+
service_content = f"""[Unit]
|
|
96
|
+
Description=antnode{node.node_name}
|
|
97
|
+
[Service]
|
|
98
|
+
{env_string}
|
|
99
|
+
User={user}
|
|
100
|
+
ExecStart={binary_in_node_dir} --bootstrap-cache-dir {BOOTSTRAP_CACHE_DIR} --root-dir {node.root_dir} --port {node.port} --enable-metrics-server --metrics-server-port {node.metrics_port} --log-output-dest {log_dir} --max-log-files 1 --max-archived-log-files 1 --rewards-address {node.wallet} {node.network}
|
|
101
|
+
Restart=always
|
|
102
|
+
#RestartSec=300
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
# Write service file
|
|
106
|
+
try:
|
|
107
|
+
subprocess.run(
|
|
108
|
+
["sudo", "tee", f"/etc/systemd/system/{service_name}"],
|
|
109
|
+
input=service_content,
|
|
110
|
+
text=True,
|
|
111
|
+
stdout=subprocess.PIPE,
|
|
112
|
+
check=True,
|
|
113
|
+
)
|
|
114
|
+
except subprocess.CalledProcessError as err:
|
|
115
|
+
logging.error(f"Failed to write service file: {err}")
|
|
116
|
+
return False
|
|
117
|
+
|
|
118
|
+
# Reload systemd
|
|
119
|
+
try:
|
|
120
|
+
subprocess.run(
|
|
121
|
+
["sudo", "systemctl", "daemon-reload"],
|
|
122
|
+
stdout=subprocess.PIPE,
|
|
123
|
+
check=True,
|
|
124
|
+
)
|
|
125
|
+
except subprocess.CalledProcessError as err:
|
|
126
|
+
logging.error(f"Failed to reload systemd: {err}")
|
|
127
|
+
return False
|
|
128
|
+
|
|
129
|
+
# Start the node
|
|
130
|
+
return self.start_node(node)
|
|
131
|
+
|
|
132
|
+
def start_node(self, node: Node) -> bool:
|
|
133
|
+
"""
|
|
134
|
+
Start a systemd node.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
node: Node database record
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
True if node started successfully
|
|
141
|
+
"""
|
|
142
|
+
logging.info(f"Starting systemd node {node.id}")
|
|
143
|
+
|
|
144
|
+
# Start service
|
|
145
|
+
try:
|
|
146
|
+
result = subprocess.run(
|
|
147
|
+
["sudo", "systemctl", "start", node.service],
|
|
148
|
+
stdout=subprocess.PIPE,
|
|
149
|
+
stderr=subprocess.STDOUT,
|
|
150
|
+
text=True,
|
|
151
|
+
)
|
|
152
|
+
if "Failed to start" in result.stdout:
|
|
153
|
+
logging.error(f"Failed to start node: {result.stdout}")
|
|
154
|
+
return False
|
|
155
|
+
except subprocess.CalledProcessError as err:
|
|
156
|
+
logging.error(f"Failed to start node: {err}")
|
|
157
|
+
return False
|
|
158
|
+
|
|
159
|
+
# Open firewall port
|
|
160
|
+
self.enable_firewall_port(node.port)
|
|
161
|
+
|
|
162
|
+
return True
|
|
163
|
+
|
|
164
|
+
def stop_node(self, node: Node) -> bool:
|
|
165
|
+
"""
|
|
166
|
+
Stop a systemd node.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
node: Node database record
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
True if node stopped successfully
|
|
173
|
+
"""
|
|
174
|
+
logging.info(f"Stopping systemd node {node.id}")
|
|
175
|
+
|
|
176
|
+
# Stop service
|
|
177
|
+
try:
|
|
178
|
+
subprocess.run(
|
|
179
|
+
["sudo", "systemctl", "stop", node.service],
|
|
180
|
+
stdout=subprocess.PIPE,
|
|
181
|
+
check=True,
|
|
182
|
+
)
|
|
183
|
+
except subprocess.CalledProcessError as err:
|
|
184
|
+
logging.error(f"Failed to stop node: {err}")
|
|
185
|
+
return False
|
|
186
|
+
|
|
187
|
+
# Close firewall port
|
|
188
|
+
self.disable_firewall_port(node.port)
|
|
189
|
+
|
|
190
|
+
return True
|
|
191
|
+
|
|
192
|
+
def restart_node(self, node: Node) -> bool:
|
|
193
|
+
"""
|
|
194
|
+
Restart a systemd node.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
node: Node database record
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
True if node restarted successfully
|
|
201
|
+
"""
|
|
202
|
+
logging.info(f"Restarting systemd node {node.id}")
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
subprocess.run(
|
|
206
|
+
["sudo", "systemctl", "restart", node.service],
|
|
207
|
+
stdout=subprocess.PIPE,
|
|
208
|
+
check=True,
|
|
209
|
+
)
|
|
210
|
+
except subprocess.CalledProcessError as err:
|
|
211
|
+
logging.error(f"Failed to restart node: {err}")
|
|
212
|
+
return False
|
|
213
|
+
|
|
214
|
+
return True
|
|
215
|
+
|
|
216
|
+
def get_status(self, node: Node) -> NodeProcess:
|
|
217
|
+
"""
|
|
218
|
+
Get current status of a systemd node.
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
node: Node database record
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
NodeProcess with current status
|
|
225
|
+
"""
|
|
226
|
+
try:
|
|
227
|
+
result = subprocess.run(
|
|
228
|
+
["systemctl", "show", node.service, "--property=MainPID,ActiveState"],
|
|
229
|
+
stdout=subprocess.PIPE,
|
|
230
|
+
text=True,
|
|
231
|
+
check=True,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
# Parse output
|
|
235
|
+
lines = result.stdout.strip().split("\n")
|
|
236
|
+
state_info = dict(line.split("=", 1) for line in lines if "=" in line)
|
|
237
|
+
|
|
238
|
+
pid = int(state_info.get("MainPID", 0))
|
|
239
|
+
active_state = state_info.get("ActiveState", "unknown")
|
|
240
|
+
|
|
241
|
+
# Map systemd state to our status
|
|
242
|
+
if active_state == "active":
|
|
243
|
+
status = RUNNING
|
|
244
|
+
elif active_state == "inactive" or active_state == "failed":
|
|
245
|
+
status = STOPPED
|
|
246
|
+
else:
|
|
247
|
+
status = "UNKNOWN"
|
|
248
|
+
|
|
249
|
+
# Check if root directory exists
|
|
250
|
+
if not os.path.isdir(node.root_dir):
|
|
251
|
+
status = DEAD
|
|
252
|
+
|
|
253
|
+
return NodeProcess(
|
|
254
|
+
node_id=node.id, pid=pid if pid > 0 else None, status=status
|
|
255
|
+
)
|
|
256
|
+
|
|
257
|
+
except (subprocess.CalledProcessError, ValueError, KeyError) as err:
|
|
258
|
+
logging.error(f"Failed to get node status: {err}")
|
|
259
|
+
return NodeProcess(node_id=node.id, pid=None, status="UNKNOWN")
|
|
260
|
+
|
|
261
|
+
def remove_node(self, node: Node) -> bool:
|
|
262
|
+
"""
|
|
263
|
+
Stop and remove a systemd node.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
node: Node database record
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
True if node was removed successfully
|
|
270
|
+
"""
|
|
271
|
+
logging.info(f"Removing systemd node {node.id}")
|
|
272
|
+
|
|
273
|
+
# Stop the node first
|
|
274
|
+
self.stop_node(node)
|
|
275
|
+
|
|
276
|
+
nodename = f"antnode{node.node_name}"
|
|
277
|
+
|
|
278
|
+
# Remove data and logs
|
|
279
|
+
try:
|
|
280
|
+
subprocess.run(
|
|
281
|
+
["sudo", "rm", "-rf", node.root_dir, f"{LOG_DIR}/{nodename}"],
|
|
282
|
+
check=True,
|
|
283
|
+
)
|
|
284
|
+
except subprocess.CalledProcessError as err:
|
|
285
|
+
logging.error(f"Failed to remove node data: {err}")
|
|
286
|
+
|
|
287
|
+
# Remove service file
|
|
288
|
+
try:
|
|
289
|
+
subprocess.run(
|
|
290
|
+
["sudo", "rm", "-f", f"/etc/systemd/system/{node.service}"],
|
|
291
|
+
check=True,
|
|
292
|
+
)
|
|
293
|
+
except subprocess.CalledProcessError as err:
|
|
294
|
+
logging.error(f"Failed to remove service file: {err}")
|
|
295
|
+
|
|
296
|
+
# Reload systemd
|
|
297
|
+
try:
|
|
298
|
+
subprocess.run(
|
|
299
|
+
["sudo", "systemctl", "daemon-reload"],
|
|
300
|
+
stdout=subprocess.PIPE,
|
|
301
|
+
check=True,
|
|
302
|
+
)
|
|
303
|
+
except subprocess.CalledProcessError as err:
|
|
304
|
+
logging.error(f"Failed to reload systemd: {err}")
|
|
305
|
+
|
|
306
|
+
return True
|
|
307
|
+
|
|
308
|
+
def survey_nodes(self, machine_config) -> list:
|
|
309
|
+
"""
|
|
310
|
+
Survey all systemd-managed antnode services.
|
|
311
|
+
|
|
312
|
+
Scans /etc/systemd/system for antnode*.service files and
|
|
313
|
+
collects their configuration and current status.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
machine_config: Machine configuration object
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
List of node dictionaries ready for database insertion
|
|
320
|
+
"""
|
|
321
|
+
systemd_dir = "/etc/systemd/system"
|
|
322
|
+
service_names = []
|
|
323
|
+
|
|
324
|
+
# Scan for antnode service files
|
|
325
|
+
if os.path.exists(systemd_dir):
|
|
326
|
+
try:
|
|
327
|
+
for file in os.listdir(systemd_dir):
|
|
328
|
+
if re.match(r"antnode[\d]+\.service", file):
|
|
329
|
+
service_names.append(file)
|
|
330
|
+
except PermissionError as e:
|
|
331
|
+
logging.error(f"Permission denied reading {systemd_dir}: {e}")
|
|
332
|
+
return []
|
|
333
|
+
except Exception as e:
|
|
334
|
+
logging.error(f"Error listing systemd services: {e}")
|
|
335
|
+
return []
|
|
336
|
+
|
|
337
|
+
if not service_names:
|
|
338
|
+
logging.info("No systemd antnode services found")
|
|
339
|
+
return []
|
|
340
|
+
|
|
341
|
+
logging.info(f"Found {len(service_names)} systemd services to survey")
|
|
342
|
+
|
|
343
|
+
details = []
|
|
344
|
+
for service_name in service_names:
|
|
345
|
+
logging.debug(f"{time.strftime('%Y-%m-%d %H:%M')} surveying {service_name}")
|
|
346
|
+
|
|
347
|
+
node_id_match = re.findall(r"antnode([\d]+)\.service", service_name)
|
|
348
|
+
if not node_id_match:
|
|
349
|
+
logging.info(f"Can't decode {service_name}")
|
|
350
|
+
continue
|
|
351
|
+
|
|
352
|
+
card = {
|
|
353
|
+
"node_name": node_id_match[0],
|
|
354
|
+
"service": service_name,
|
|
355
|
+
"timestamp": int(time.time()),
|
|
356
|
+
"host": machine_config.host or "127.0.0.1",
|
|
357
|
+
"method": "systemd",
|
|
358
|
+
"layout": "1",
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
# Read configuration from systemd service file
|
|
362
|
+
config = self._read_service_file(service_name, machine_config)
|
|
363
|
+
card.update(config)
|
|
364
|
+
|
|
365
|
+
if not config:
|
|
366
|
+
logging.warning(f"Could not read config from {service_name}")
|
|
367
|
+
continue
|
|
368
|
+
|
|
369
|
+
# Check if node is running by querying metrics port
|
|
370
|
+
metadata = read_node_metadata(card["host"], card["metrics_port"])
|
|
371
|
+
|
|
372
|
+
if isinstance(metadata, dict) and metadata.get("status") == RUNNING:
|
|
373
|
+
# Node is running - collect metadata and metrics
|
|
374
|
+
card.update(metadata)
|
|
375
|
+
card.update(read_node_metrics(card["host"], card["metrics_port"]))
|
|
376
|
+
else:
|
|
377
|
+
# Node is stopped
|
|
378
|
+
if not os.path.isdir(card.get("root_dir", "")):
|
|
379
|
+
card["status"] = DEAD
|
|
380
|
+
card["version"] = ""
|
|
381
|
+
else:
|
|
382
|
+
card["status"] = STOPPED
|
|
383
|
+
card["version"] = get_antnode_version(card.get("binary", ""))
|
|
384
|
+
card["peer_id"] = ""
|
|
385
|
+
card["records"] = 0
|
|
386
|
+
card["uptime"] = 0
|
|
387
|
+
card["shunned"] = 0
|
|
388
|
+
|
|
389
|
+
card["age"] = get_node_age(card.get("root_dir", ""))
|
|
390
|
+
card["host"] = machine_config.host # Ensure we use machine config host
|
|
391
|
+
|
|
392
|
+
details.append(card)
|
|
393
|
+
|
|
394
|
+
return details
|
|
395
|
+
|
|
396
|
+
def _read_service_file(self, service_name: str, machine_config) -> dict:
|
|
397
|
+
"""
|
|
398
|
+
Read node configuration from a systemd service file.
|
|
399
|
+
|
|
400
|
+
Args:
|
|
401
|
+
service_name: Name of the service file (e.g., "antnode0001.service")
|
|
402
|
+
machine_config: Machine configuration object
|
|
403
|
+
|
|
404
|
+
Returns:
|
|
405
|
+
Dictionary with node configuration, or empty dict on error
|
|
406
|
+
"""
|
|
407
|
+
details = {}
|
|
408
|
+
service_path = f"/etc/systemd/system/{service_name}"
|
|
409
|
+
|
|
410
|
+
try:
|
|
411
|
+
with open(service_path, "r") as file:
|
|
412
|
+
data = file.read()
|
|
413
|
+
|
|
414
|
+
details["id"] = int(re.findall(r"antnode(\d+)", service_name)[0])
|
|
415
|
+
details["binary"] = re.findall(r"ExecStart=([^ ]+)", data)[0]
|
|
416
|
+
details["user"] = re.findall(r"User=(\w+)", data)[0]
|
|
417
|
+
details["root_dir"] = re.findall(r"--root-dir ([\w\/]+)", data)[0]
|
|
418
|
+
details["port"] = int(re.findall(r"--port (\d+)", data)[0])
|
|
419
|
+
details["metrics_port"] = int(
|
|
420
|
+
re.findall(r"--metrics-server-port (\d+)", data)[0]
|
|
421
|
+
)
|
|
422
|
+
details["wallet"] = re.findall(r"--rewards-address ([^ ]+)", data)[0]
|
|
423
|
+
details["network"] = re.findall(r"--rewards-address [^ ]+ ([\w\-]+)", data)[
|
|
424
|
+
0
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
# Check for IP listen address
|
|
428
|
+
ip_matches = re.findall(r"--ip ([^ ]+)", data)
|
|
429
|
+
if ip_matches:
|
|
430
|
+
ip = ip_matches[0]
|
|
431
|
+
# If wildcard listen address, use default
|
|
432
|
+
details["host"] = machine_config.host if ip == "0.0.0.0" else ip
|
|
433
|
+
else:
|
|
434
|
+
details["host"] = machine_config.host
|
|
435
|
+
|
|
436
|
+
# Check for environment variables
|
|
437
|
+
env_matches = re.findall(r'Environment="(.+)"', data)
|
|
438
|
+
details["environment"] = env_matches[0] if env_matches else ""
|
|
439
|
+
|
|
440
|
+
except Exception as e:
|
|
441
|
+
logging.debug(f"Error reading service file {service_path}: {e}")
|
|
442
|
+
|
|
443
|
+
return details
|