wnm 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wnm might be problematic. Click here for more details.

wnm/executor.py ADDED
@@ -0,0 +1,1292 @@
1
+ """Action executor for performing node lifecycle operations.
2
+
3
+ This module contains the ActionExecutor class which takes planned actions
4
+ from the DecisionEngine and executes them using ProcessManager abstractions.
5
+ """
6
+
7
+ import logging
8
+ import os
9
+ import shutil
10
+ import subprocess
11
+ import time
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from packaging.version import Version
15
+ from sqlalchemy import func, insert, select, text
16
+ from sqlalchemy.orm import scoped_session
17
+
18
+ from wnm.actions import Action, ActionType
19
+ from wnm.common import (
20
+ DEAD,
21
+ DISABLED,
22
+ METRICS_PORT_BASE,
23
+ PORT_MULTIPLIER,
24
+ REMOVING,
25
+ RESTARTING,
26
+ RUNNING,
27
+ STOPPED,
28
+ UPGRADING,
29
+ )
30
+ from wnm.config import LOG_DIR
31
+ from wnm.models import Machine, Node
32
+ from wnm.process_managers.factory import get_default_manager_type, get_process_manager
33
+ from wnm.utils import (
34
+ get_antnode_version,
35
+ parse_service_names,
36
+ update_nodes,
37
+ )
38
+
39
+
40
+ class ActionExecutor:
41
+ """Executes planned actions on nodes.
42
+
43
+ The ActionExecutor takes Action objects from the DecisionEngine and
44
+ performs the actual operations by calling utility functions and
45
+ managing database state.
46
+ """
47
+
48
+ def __init__(self, session_factory: scoped_session):
49
+ """Initialize the action executor.
50
+
51
+ Args:
52
+ session_factory: SQLAlchemy session factory for database operations
53
+ """
54
+ self.S = session_factory
55
+
56
+ def _get_process_manager(self, node: Node):
57
+ """Get the appropriate process manager for a node.
58
+
59
+ Args:
60
+ node: Node database record
61
+
62
+ Returns:
63
+ ProcessManager instance for the node's manager type
64
+ """
65
+ # Get manager type from node, or use machine config default
66
+ manager_type = getattr(node, "manager_type", None)
67
+ return get_process_manager(manager_type)
68
+
69
+ def _set_node_status(self, node_id: int, status: str) -> bool:
70
+ """Update node status in database.
71
+
72
+ Args:
73
+ node_id: ID of the node
74
+ status: New status to set
75
+
76
+ Returns:
77
+ True if status was updated successfully
78
+ """
79
+ try:
80
+ with self.S() as session:
81
+ session.query(Node).filter(Node.id == node_id).update(
82
+ {"status": status, "timestamp": int(time.time())}
83
+ )
84
+ session.commit()
85
+ return True
86
+ except Exception as e:
87
+ logging.error(f"Failed to set node status for {node_id}: {e}")
88
+ return False
89
+
90
+ def _upgrade_node_binary(self, node: Node, new_version: str) -> bool:
91
+ """Upgrade a node's binary and restart it.
92
+
93
+ Args:
94
+ node: Node to upgrade
95
+ new_version: Version string for the new binary
96
+
97
+ Returns:
98
+ True if upgrade succeeded
99
+ """
100
+ # Source binary location
101
+ source_binary = os.path.expanduser("~/.local/bin/antnode")
102
+
103
+ # Copy new binary to node directory
104
+ try:
105
+ shutil.copy2(source_binary, node.binary)
106
+ os.chmod(node.binary, 0o755)
107
+ logging.info(f"Copied new binary from {source_binary} to {node.binary}")
108
+ except (OSError, shutil.Error) as err:
109
+ logging.error(f"Failed to copy binary for upgrade: {err}")
110
+ return False
111
+
112
+ # Restart the node with new binary
113
+ manager = self._get_process_manager(node)
114
+ if not manager.restart_node(node):
115
+ logging.error(f"Failed to restart node {node.id} during upgrade")
116
+ return False
117
+
118
+ # Update status to UPGRADING
119
+ with self.S() as session:
120
+ session.query(Node).filter(Node.id == node.id).update(
121
+ {
122
+ "status": UPGRADING,
123
+ "timestamp": int(time.time()),
124
+ "version": new_version,
125
+ }
126
+ )
127
+ session.commit()
128
+
129
+ return True
130
+
131
+ def execute(
132
+ self,
133
+ actions: List[Action],
134
+ machine_config: Dict[str, Any],
135
+ metrics: Dict[str, Any],
136
+ dry_run: bool = False,
137
+ ) -> Dict[str, Any]:
138
+ """Execute a list of actions.
139
+
140
+ Args:
141
+ actions: List of Action objects to execute
142
+ machine_config: Machine configuration dictionary
143
+ metrics: Current system metrics
144
+ dry_run: If True, log actions without executing them
145
+
146
+ Returns:
147
+ Dictionary with execution status and results
148
+ """
149
+ if not actions:
150
+ return {"status": "no-actions", "results": []}
151
+
152
+ results = []
153
+
154
+ for action in actions:
155
+ logging.info(
156
+ f"Executing: {action.type.value} (priority={action.priority}, reason={action.reason})"
157
+ )
158
+
159
+ try:
160
+ result = self._execute_action(action, machine_config, metrics, dry_run)
161
+ results.append(result)
162
+ except Exception as e:
163
+ logging.error(f"Failed to execute {action.type.value}: {e}")
164
+ results.append(
165
+ {"action": action.type.value, "success": False, "error": str(e)}
166
+ )
167
+
168
+ # Return status from the first (highest priority) action
169
+ if results:
170
+ return results[0]
171
+ return {"status": "no-results"}
172
+
173
+ def _execute_action(
174
+ self,
175
+ action: Action,
176
+ machine_config: Dict[str, Any],
177
+ metrics: Dict[str, Any],
178
+ dry_run: bool,
179
+ ) -> Dict[str, Any]:
180
+ """Execute a single action.
181
+
182
+ Args:
183
+ action: The action to execute
184
+ machine_config: Machine configuration
185
+ metrics: Current metrics
186
+ dry_run: If True, log without executing
187
+
188
+ Returns:
189
+ Dictionary with execution result
190
+ """
191
+ if action.type == ActionType.RESURVEY_NODES:
192
+ return self._execute_resurvey(machine_config, dry_run)
193
+
194
+ elif action.type == ActionType.REMOVE_NODE:
195
+ return self._execute_remove_node(action, dry_run)
196
+
197
+ elif action.type == ActionType.STOP_NODE:
198
+ return self._execute_stop_node(machine_config, dry_run)
199
+
200
+ elif action.type == ActionType.UPGRADE_NODE:
201
+ return self._execute_upgrade_node(metrics, dry_run)
202
+
203
+ elif action.type == ActionType.START_NODE:
204
+ return self._execute_start_node(metrics, dry_run)
205
+
206
+ elif action.type == ActionType.ADD_NODE:
207
+ return self._execute_add_node(machine_config, metrics, dry_run)
208
+
209
+ elif action.type == ActionType.SURVEY_NODES:
210
+ return self._execute_survey(dry_run)
211
+
212
+ else:
213
+ logging.warning(f"Unknown action type: {action.type}")
214
+ return {"status": "unknown-action", "action": action.type.value}
215
+
216
+ def _execute_resurvey(
217
+ self, machine_config: Dict[str, Any], dry_run: bool
218
+ ) -> Dict[str, Any]:
219
+ """Execute node resurvey after system reboot."""
220
+ if dry_run:
221
+ logging.warning("DRYRUN: System rebooted, survey nodes")
222
+ else:
223
+ update_nodes(self.S)
224
+ # Update the last stopped time
225
+ with self.S() as session:
226
+ session.query(Machine).filter(Machine.id == 1).update(
227
+ {"last_stopped_at": int(time.time())}
228
+ )
229
+ session.commit()
230
+
231
+ return {"status": "system-rebooted"}
232
+
233
+ def _execute_remove_node(self, action: Action, dry_run: bool) -> Dict[str, Any]:
234
+ """Execute node removal.
235
+
236
+ If reason contains 'dead', remove all dead nodes.
237
+ Otherwise, remove youngest stopped or running node based on reason.
238
+ """
239
+ if "dead" in action.reason.lower():
240
+ # Remove all dead nodes
241
+ if dry_run:
242
+ logging.warning("DRYRUN: Remove Dead Nodes")
243
+ else:
244
+ with self.S() as session:
245
+ broken = session.execute(
246
+ select(Node)
247
+ .where(Node.status == DEAD)
248
+ .order_by(Node.timestamp.asc())
249
+ ).all()
250
+
251
+ for row in broken:
252
+ node = row[0]
253
+ logging.info(f"Removing dead node {node.id}")
254
+ manager = self._get_process_manager(node)
255
+ manager.remove_node(node)
256
+ # Delete from database immediately (no delay for dead nodes)
257
+ with self.S() as session:
258
+ session.delete(node)
259
+ session.commit()
260
+
261
+ return {"status": "removed-dead-nodes"}
262
+
263
+ elif "stopped" in action.reason.lower():
264
+ # Remove youngest stopped node
265
+ with self.S() as session:
266
+ youngest = session.execute(
267
+ select(Node).where(Node.status == STOPPED).order_by(Node.age.desc())
268
+ ).first()
269
+
270
+ if youngest:
271
+ if dry_run:
272
+ logging.warning("DRYRUN: Remove youngest stopped node")
273
+ else:
274
+ node = youngest[0]
275
+ manager = self._get_process_manager(node)
276
+ manager.remove_node(node)
277
+ # Delete from database immediately (no delay for stopped nodes)
278
+ with self.S() as session:
279
+ session.delete(node)
280
+ session.commit()
281
+ return {"status": "removed-stopped-node"}
282
+ else:
283
+ return {"status": "no-stopped-nodes-to-remove"}
284
+
285
+ else:
286
+ # Remove youngest running node (with delay)
287
+ with self.S() as session:
288
+ youngest = session.execute(
289
+ select(Node).where(Node.status == RUNNING).order_by(Node.age.desc())
290
+ ).first()
291
+
292
+ if youngest:
293
+ if dry_run:
294
+ logging.warning("DRYRUN: Remove youngest running node")
295
+ else:
296
+ node = youngest[0]
297
+ manager = self._get_process_manager(node)
298
+ manager.stop_node(node)
299
+ # Mark as REMOVING (will be deleted later after delay)
300
+ self._set_node_status(node.id, REMOVING)
301
+ return {"status": "removed-running-node"}
302
+ else:
303
+ return {"status": "no-running-nodes-to-remove"}
304
+
305
+ def _execute_stop_node(
306
+ self, machine_config: Dict[str, Any], dry_run: bool
307
+ ) -> Dict[str, Any]:
308
+ """Execute node stop (to reduce resource usage)."""
309
+ with self.S() as session:
310
+ youngest = session.execute(
311
+ select(Node).where(Node.status == RUNNING).order_by(Node.age.desc())
312
+ ).first()
313
+
314
+ if youngest:
315
+ if dry_run:
316
+ logging.warning("DRYRUN: Stopping youngest node")
317
+ else:
318
+ node = youngest[0]
319
+ manager = self._get_process_manager(node)
320
+ manager.stop_node(node)
321
+ self._set_node_status(node.id, STOPPED)
322
+ # Update the last stopped time
323
+ with self.S() as session:
324
+ session.query(Machine).filter(Machine.id == 1).update(
325
+ {"last_stopped_at": int(time.time())}
326
+ )
327
+ session.commit()
328
+ return {"status": "stopped-node"}
329
+ else:
330
+ return {"status": "no-nodes-to-stop"}
331
+
332
+ def _execute_upgrade_node(
333
+ self, metrics: Dict[str, Any], dry_run: bool
334
+ ) -> Dict[str, Any]:
335
+ """Execute node upgrade (oldest running node with outdated version)."""
336
+ with self.S() as session:
337
+ oldest = session.execute(
338
+ select(Node)
339
+ .where(Node.status == RUNNING)
340
+ .where(Node.version != metrics["antnode_version"])
341
+ .order_by(Node.age.asc())
342
+ ).first()
343
+
344
+ if oldest:
345
+ if dry_run:
346
+ logging.warning("DRYRUN: Upgrade oldest node")
347
+ else:
348
+ node = oldest[0]
349
+ # If we don't have a version number from metadata, grab from binary
350
+ if not node.version:
351
+ node.version = get_antnode_version(node.binary)
352
+
353
+ # Perform the upgrade (copies binary, restarts, sets UPGRADING status)
354
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
355
+ return {"status": "upgrade-failed"}
356
+
357
+ return {"status": "upgrading-node"}
358
+ else:
359
+ return {"status": "no-nodes-to-upgrade"}
360
+
361
+ def _execute_start_node(
362
+ self, metrics: Dict[str, Any], dry_run: bool
363
+ ) -> Dict[str, Any]:
364
+ """Execute starting a stopped node (may upgrade first if needed)."""
365
+ with self.S() as session:
366
+ oldest = session.execute(
367
+ select(Node).where(Node.status == STOPPED).order_by(Node.age.asc())
368
+ ).first()
369
+
370
+ if oldest:
371
+ node = oldest[0]
372
+ # If we don't have a version number from metadata, grab from binary
373
+ if not node.version:
374
+ node.version = get_antnode_version(node.binary)
375
+
376
+ # If the stopped version is old, upgrade it (which also starts it)
377
+ if Version(metrics["antnode_version"]) > Version(node.version):
378
+ if dry_run:
379
+ logging.warning("DRYRUN: Upgrade and start stopped node")
380
+ else:
381
+ # Perform the upgrade (copies binary, restarts, sets UPGRADING status)
382
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
383
+ return {"status": "failed-upgrade"}
384
+ return {"status": "upgrading-stopped-node"}
385
+ else:
386
+ if dry_run:
387
+ logging.warning("DRYRUN: Start stopped node")
388
+ return {"status": "starting-node"}
389
+ else:
390
+ manager = self._get_process_manager(node)
391
+ if manager.start_node(node):
392
+ self._set_node_status(node.id, RESTARTING)
393
+ return {"status": "started-node"}
394
+ else:
395
+ return {"status": "failed-start-node"}
396
+ else:
397
+ return {"status": "no-stopped-nodes"}
398
+
399
+ def _execute_add_node(
400
+ self, machine_config: Dict[str, Any], metrics: Dict[str, Any], dry_run: bool
401
+ ) -> Dict[str, Any]:
402
+ """Execute adding a new node."""
403
+ if dry_run:
404
+ logging.warning("DRYRUN: Add a node")
405
+ return {"status": "add-node"}
406
+
407
+ # Find next available node ID (look for holes first)
408
+ # First check if node 1 exists
409
+ with self.S() as session:
410
+ node_1_exists = session.execute(
411
+ select(Node.id).where(Node.id == 1)
412
+ ).first()
413
+
414
+ if not node_1_exists:
415
+ # Node 1 is available, use it
416
+ node_id = 1
417
+ else:
418
+ # Look for holes in the sequence
419
+ sql = text(
420
+ "select n1.id + 1 as id from node n1 "
421
+ + "left join node n2 on n2.id = n1.id + 1 "
422
+ + "where n2.id is null "
423
+ + "and n1.id <> (select max(id) from node) "
424
+ + "order by n1.id;"
425
+ )
426
+ with self.S() as session:
427
+ result = session.execute(sql).first()
428
+
429
+ if result:
430
+ node_id = result[0]
431
+ else:
432
+ # No holes, use max + 1
433
+ with self.S() as session:
434
+ result = session.execute(
435
+ select(Node.id).order_by(Node.id.desc())
436
+ ).first()
437
+ node_id = result[0] + 1 if result else 1
438
+
439
+ # Determine the appropriate manager type for this system
440
+ manager_type = get_default_manager_type()
441
+
442
+ # Create node object
443
+ node = Node(
444
+ id=node_id,
445
+ node_name=f"{node_id:04}",
446
+ service=f"antnode{node_id:04}.service",
447
+ user=machine_config.get("user", "ant"),
448
+ version=metrics["antnode_version"],
449
+ root_dir=f"{machine_config['node_storage']}/antnode{node_id:04}",
450
+ binary=f"{machine_config['node_storage']}/antnode{node_id:04}/antnode",
451
+ port=machine_config["port_start"] * PORT_MULTIPLIER + node_id,
452
+ metrics_port=METRICS_PORT_BASE + node_id,
453
+ network="evm-arbitrum-one",
454
+ wallet=machine_config["rewards_address"],
455
+ peer_id="",
456
+ status=STOPPED,
457
+ timestamp=int(time.time()),
458
+ records=0,
459
+ uptime=0,
460
+ shunned=0,
461
+ age=int(time.time()),
462
+ host=machine_config["host"],
463
+ method=manager_type,
464
+ layout="1",
465
+ environment=machine_config.get("environment", ""),
466
+ manager_type=manager_type,
467
+ )
468
+
469
+ # Insert into database
470
+ with self.S() as session:
471
+ session.add(node)
472
+ session.commit()
473
+ session.refresh(node) # Get the persisted node
474
+
475
+ # Create the node using process manager
476
+ source_binary = os.path.expanduser("~/.local/bin/antnode")
477
+ manager = self._get_process_manager(node)
478
+
479
+ if not manager.create_node(node, source_binary):
480
+ logging.error(f"Failed to create node {node.id}")
481
+ return {"status": "failed-create-node"}
482
+
483
+ # Update status to RESTARTING (node is starting up)
484
+ self._set_node_status(node.id, RESTARTING)
485
+
486
+ return {"status": "added-node"}
487
+
488
+ def _execute_survey(self, dry_run: bool) -> Dict[str, Any]:
489
+ """Execute node survey (idle monitoring)."""
490
+ if dry_run:
491
+ logging.warning("DRYRUN: Update nodes")
492
+ else:
493
+ update_nodes(self.S)
494
+ return {"status": "idle"}
495
+
496
+ def _parse_node_name(self, service_name: str) -> Optional[int]:
497
+ """Parse node ID from service name like 'antnode0001'.
498
+
499
+ Args:
500
+ service_name: Node name (e.g., 'antnode0001')
501
+
502
+ Returns:
503
+ Node ID as integer, or None if parsing fails
504
+ """
505
+ import re
506
+ match = re.match(r"antnode(\d+)", service_name)
507
+ if match:
508
+ return int(match.group(1))
509
+ return None
510
+
511
+ def _get_node_by_name(self, service_name: str) -> Optional[Node]:
512
+ """Get node by service name.
513
+
514
+ Args:
515
+ service_name: Node name (e.g., 'antnode0001')
516
+
517
+ Returns:
518
+ Node object or None if not found
519
+ """
520
+ node_id = self._parse_node_name(service_name)
521
+ if node_id is None:
522
+ logging.error(f"Invalid node name format: {service_name}")
523
+ return None
524
+
525
+ with self.S() as session:
526
+ result = session.execute(
527
+ select(Node).where(Node.id == node_id)
528
+ ).first()
529
+
530
+ if result:
531
+ return result[0]
532
+ else:
533
+ logging.error(f"Node not found: {service_name} (id={node_id})")
534
+ return None
535
+
536
+ def execute_forced_action(
537
+ self,
538
+ action_type: str,
539
+ machine_config: Dict[str, Any],
540
+ metrics: Dict[str, Any],
541
+ service_name: Optional[str] = None,
542
+ dry_run: bool = False,
543
+ count: int = 1,
544
+ ) -> Dict[str, Any]:
545
+ """Execute a forced action bypassing the decision engine.
546
+
547
+ Args:
548
+ action_type: Type of action ('add', 'remove', 'upgrade', 'start', 'stop', 'disable', 'teardown')
549
+ machine_config: Machine configuration
550
+ metrics: Current system metrics
551
+ service_name: Optional node name for targeted operations
552
+ dry_run: If True, log without executing
553
+ count: Number of nodes to affect (for add, remove, start, stop, upgrade actions)
554
+
555
+ Returns:
556
+ Dictionary with execution result
557
+ """
558
+ if action_type == "add":
559
+ return self._force_add_node(machine_config, metrics, dry_run, count)
560
+ elif action_type == "remove":
561
+ return self._force_remove_node(service_name, dry_run, count)
562
+ elif action_type == "upgrade":
563
+ return self._force_upgrade_node(service_name, metrics, dry_run, count)
564
+ elif action_type == "start":
565
+ return self._force_start_node(service_name, metrics, dry_run, count)
566
+ elif action_type == "stop":
567
+ return self._force_stop_node(service_name, dry_run, count)
568
+ elif action_type == "disable":
569
+ return self._force_disable_node(service_name, dry_run)
570
+ elif action_type == "teardown":
571
+ return self._force_teardown_cluster(machine_config, dry_run)
572
+ elif action_type == "survey":
573
+ return self._force_survey_nodes(service_name, dry_run)
574
+ else:
575
+ return {"status": "error", "message": f"Unknown action type: {action_type}"}
576
+
577
+ def _force_add_node(
578
+ self, machine_config: Dict[str, Any], metrics: Dict[str, Any], dry_run: bool, count: int = 1
579
+ ) -> Dict[str, Any]:
580
+ """Force add new nodes.
581
+
582
+ Args:
583
+ machine_config: Machine configuration
584
+ metrics: Current system metrics
585
+ dry_run: If True, log without executing
586
+ count: Number of nodes to add (default: 1)
587
+
588
+ Returns:
589
+ Dictionary with execution result
590
+ """
591
+ logging.info(f"Forced action: Adding {count} node(s)")
592
+
593
+ if count < 1:
594
+ return {"status": "error", "message": "count must be at least 1"}
595
+
596
+ added_nodes = []
597
+ failed_nodes = []
598
+
599
+ # Track the start time to identify newly created nodes
600
+ start_time = int(time.time())
601
+
602
+ for i in range(count):
603
+ result = self._execute_add_node(machine_config, metrics, dry_run)
604
+ if result["status"] in ["added-node", "add-node"]:
605
+ # Get the node that was just added (youngest by age >= start_time)
606
+ if not dry_run:
607
+ with self.S() as session:
608
+ newest = session.execute(
609
+ select(Node).where(Node.age >= start_time).order_by(Node.age.desc())
610
+ ).first()
611
+ if newest:
612
+ added_nodes.append(newest[0].service.replace(".service", ""))
613
+ else:
614
+ added_nodes.append(f"node-{i+1}")
615
+ else:
616
+ failed_nodes.append({"index": i+1, "error": result.get("status", "unknown error")})
617
+
618
+ if count == 1:
619
+ # Keep backward compatibility for single node
620
+ return result
621
+
622
+ return {
623
+ "status": "added-nodes" if not dry_run else "add-nodes-dryrun",
624
+ "added_count": len(added_nodes),
625
+ "added_nodes": added_nodes if added_nodes else None,
626
+ "failed_count": len(failed_nodes),
627
+ "failed_nodes": failed_nodes if failed_nodes else None,
628
+ }
629
+
630
+ def _force_remove_node(
631
+ self, service_name: Optional[str], dry_run: bool, count: int = 1
632
+ ) -> Dict[str, Any]:
633
+ """Force remove nodes (specific or youngest by age).
634
+
635
+ Args:
636
+ service_name: Optional comma-separated list of service names
637
+ dry_run: If True, log without executing
638
+ count: Number of nodes to remove when service_name is not specified (default: 1)
639
+
640
+ Returns:
641
+ Dictionary with execution result
642
+ """
643
+ # Parse comma-separated service names
644
+ service_names = parse_service_names(service_name)
645
+
646
+ if service_names:
647
+ # Remove specific nodes
648
+ removed_nodes = []
649
+ failed_nodes = []
650
+
651
+ for name in service_names:
652
+ node = self._get_node_by_name(name)
653
+ if not node:
654
+ failed_nodes.append({"service": name, "error": "not found"})
655
+ continue
656
+
657
+ logging.info(f"Forced action: Removing node {name}")
658
+ if dry_run:
659
+ logging.warning(f"DRYRUN: Remove node {name}")
660
+ removed_nodes.append(name)
661
+ else:
662
+ try:
663
+ manager = self._get_process_manager(node)
664
+ manager.remove_node(node)
665
+ # Remove from database immediately
666
+ with self.S() as session:
667
+ session.delete(node)
668
+ session.commit()
669
+ removed_nodes.append(name)
670
+ except Exception as e:
671
+ logging.error(f"Failed to remove node {name}: {e}")
672
+ failed_nodes.append({"service": name, "error": str(e)})
673
+
674
+ return {
675
+ "status": "removed-nodes" if not dry_run else "remove-dryrun",
676
+ "removed_count": len(removed_nodes),
677
+ "removed_nodes": removed_nodes,
678
+ "failed_count": len(failed_nodes),
679
+ "failed_nodes": failed_nodes if failed_nodes else None,
680
+ }
681
+ else:
682
+ # Remove youngest nodes (default behavior - highest age value)
683
+ if count < 1:
684
+ return {"status": "error", "message": "count must be at least 1"}
685
+
686
+ logging.info(f"Forced action: Removing {count} youngest node(s)")
687
+
688
+ # Get youngest nodes (highest age values)
689
+ with self.S() as session:
690
+ youngest_nodes = session.execute(
691
+ select(Node).order_by(Node.age.desc()).limit(count)
692
+ ).all()
693
+
694
+ if not youngest_nodes:
695
+ return {"status": "error", "message": "No nodes to remove"}
696
+
697
+ if len(youngest_nodes) < count:
698
+ logging.warning(f"Only {len(youngest_nodes)} nodes available, removing all of them")
699
+
700
+ removed_nodes = []
701
+ failed_nodes = []
702
+
703
+ for row in youngest_nodes:
704
+ node = row[0]
705
+ if dry_run:
706
+ logging.warning(f"DRYRUN: Remove youngest node {node.node_name}")
707
+ removed_nodes.append(node.service.replace(".service", ""))
708
+ else:
709
+ try:
710
+ manager = self._get_process_manager(node)
711
+ manager.remove_node(node)
712
+ # Remove from database immediately
713
+ with self.S() as session:
714
+ session.delete(node)
715
+ session.commit()
716
+ removed_nodes.append(node.service.replace(".service", ""))
717
+ except Exception as e:
718
+ logging.error(f"Failed to remove node {node.node_name}: {e}")
719
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": str(e)})
720
+
721
+ if count == 1 and len(removed_nodes) == 1:
722
+ # Keep backward compatibility for single node
723
+ # Extract node name from service name (e.g., "antnode0001" -> "0001")
724
+ node_name = removed_nodes[0].replace("antnode", "")
725
+ return {"status": "removed-node", "node": node_name}
726
+
727
+ return {
728
+ "status": "removed-nodes" if not dry_run else "remove-dryrun",
729
+ "removed_count": len(removed_nodes),
730
+ "removed_nodes": removed_nodes if removed_nodes else None,
731
+ "failed_count": len(failed_nodes),
732
+ "failed_nodes": failed_nodes if failed_nodes else None,
733
+ }
734
+
735
+ def _force_upgrade_node(
736
+ self, service_name: Optional[str], metrics: Dict[str, Any], dry_run: bool, count: int = 1
737
+ ) -> Dict[str, Any]:
738
+ """Force upgrade nodes (specific or oldest running nodes by age).
739
+
740
+ Args:
741
+ service_name: Optional comma-separated list of service names
742
+ metrics: Current system metrics
743
+ dry_run: If True, log without executing
744
+ count: Number of nodes to upgrade when service_name is not specified (default: 1)
745
+
746
+ Returns:
747
+ Dictionary with execution result
748
+ """
749
+ # Parse comma-separated service names
750
+ service_names = parse_service_names(service_name)
751
+
752
+ if service_names:
753
+ # Upgrade specific nodes
754
+ upgraded_nodes = []
755
+ failed_nodes = []
756
+
757
+ for name in service_names:
758
+ node = self._get_node_by_name(name)
759
+ if not node:
760
+ failed_nodes.append({"service": name, "error": "not found"})
761
+ continue
762
+
763
+ logging.info(f"Forced action: Upgrading node {name}")
764
+ if dry_run:
765
+ logging.warning(f"DRYRUN: Upgrade node {name}")
766
+ upgraded_nodes.append(name)
767
+ else:
768
+ try:
769
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
770
+ failed_nodes.append({"service": name, "error": "upgrade failed"})
771
+ else:
772
+ upgraded_nodes.append(name)
773
+ except Exception as e:
774
+ logging.error(f"Failed to upgrade node {name}: {e}")
775
+ failed_nodes.append({"service": name, "error": str(e)})
776
+
777
+ return {
778
+ "status": "upgraded-nodes" if not dry_run else "upgrade-dryrun",
779
+ "upgraded_count": len(upgraded_nodes),
780
+ "upgraded_nodes": upgraded_nodes,
781
+ "failed_count": len(failed_nodes),
782
+ "failed_nodes": failed_nodes if failed_nodes else None,
783
+ }
784
+ else:
785
+ # Upgrade oldest running nodes (default behavior - lowest age values)
786
+ if count < 1:
787
+ return {"status": "error", "message": "count must be at least 1"}
788
+
789
+ logging.info(f"Forced action: Upgrading {count} oldest running node(s)")
790
+
791
+ # Get oldest running nodes (lowest age values)
792
+ with self.S() as session:
793
+ oldest_nodes = session.execute(
794
+ select(Node)
795
+ .where(Node.status == RUNNING)
796
+ .order_by(Node.age.asc())
797
+ .limit(count)
798
+ ).all()
799
+
800
+ if not oldest_nodes:
801
+ return {"status": "error", "message": "No running nodes to upgrade"}
802
+
803
+ if len(oldest_nodes) < count:
804
+ logging.warning(f"Only {len(oldest_nodes)} running nodes available, upgrading all of them")
805
+
806
+ upgraded_nodes = []
807
+ failed_nodes = []
808
+
809
+ for row in oldest_nodes:
810
+ node = row[0]
811
+ if dry_run:
812
+ logging.warning(f"DRYRUN: Upgrade oldest node {node.node_name}")
813
+ upgraded_nodes.append(node.service.replace(".service", ""))
814
+ else:
815
+ try:
816
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
817
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": "upgrade failed"})
818
+ else:
819
+ upgraded_nodes.append(node.service.replace(".service", ""))
820
+ except Exception as e:
821
+ logging.error(f"Failed to upgrade node {node.node_name}: {e}")
822
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": str(e)})
823
+
824
+ if count == 1 and len(upgraded_nodes) == 1:
825
+ # Keep backward compatibility for single node
826
+ # Extract node name from service name (e.g., "antnode0001" -> "0001")
827
+ node_name = upgraded_nodes[0].replace("antnode", "")
828
+ return {"status": "upgraded-node", "node": node_name}
829
+
830
+ return {
831
+ "status": "upgraded-nodes" if not dry_run else "upgrade-dryrun",
832
+ "upgraded_count": len(upgraded_nodes),
833
+ "upgraded_nodes": upgraded_nodes if upgraded_nodes else None,
834
+ "failed_count": len(failed_nodes),
835
+ "failed_nodes": failed_nodes if failed_nodes else None,
836
+ }
837
+
838
+ def _force_stop_node(
839
+ self, service_name: Optional[str], dry_run: bool, count: int = 1
840
+ ) -> Dict[str, Any]:
841
+ """Force stop nodes (specific or youngest running nodes by age).
842
+
843
+ Args:
844
+ service_name: Optional comma-separated list of service names
845
+ dry_run: If True, log without executing
846
+ count: Number of nodes to stop when service_name is not specified (default: 1)
847
+
848
+ Returns:
849
+ Dictionary with execution result
850
+ """
851
+ # Parse comma-separated service names
852
+ service_names = parse_service_names(service_name)
853
+
854
+ if service_names:
855
+ # Stop specific nodes
856
+ stopped_nodes = []
857
+ failed_nodes = []
858
+
859
+ for name in service_names:
860
+ node = self._get_node_by_name(name)
861
+ if not node:
862
+ failed_nodes.append({"service": name, "error": "not found"})
863
+ continue
864
+
865
+ logging.info(f"Forced action: Stopping node {name}")
866
+ if dry_run:
867
+ logging.warning(f"DRYRUN: Stop node {name}")
868
+ stopped_nodes.append(name)
869
+ else:
870
+ try:
871
+ manager = self._get_process_manager(node)
872
+ manager.stop_node(node)
873
+ self._set_node_status(node.id, STOPPED)
874
+ stopped_nodes.append(name)
875
+ except Exception as e:
876
+ logging.error(f"Failed to stop node {name}: {e}")
877
+ failed_nodes.append({"service": name, "error": str(e)})
878
+
879
+ return {
880
+ "status": "stopped-nodes" if not dry_run else "stop-dryrun",
881
+ "stopped_count": len(stopped_nodes),
882
+ "stopped_nodes": stopped_nodes,
883
+ "failed_count": len(failed_nodes),
884
+ "failed_nodes": failed_nodes if failed_nodes else None,
885
+ }
886
+ else:
887
+ # Stop youngest running nodes (default behavior - highest age values)
888
+ if count < 1:
889
+ return {"status": "error", "message": "count must be at least 1"}
890
+
891
+ logging.info(f"Forced action: Stopping {count} youngest running node(s)")
892
+
893
+ # Get youngest running nodes (highest age values)
894
+ with self.S() as session:
895
+ youngest_nodes = session.execute(
896
+ select(Node)
897
+ .where(Node.status == RUNNING)
898
+ .order_by(Node.age.desc())
899
+ .limit(count)
900
+ ).all()
901
+
902
+ if not youngest_nodes:
903
+ return {"status": "error", "message": "No running nodes to stop"}
904
+
905
+ if len(youngest_nodes) < count:
906
+ logging.warning(f"Only {len(youngest_nodes)} running nodes available, stopping all of them")
907
+
908
+ stopped_nodes = []
909
+ failed_nodes = []
910
+
911
+ for row in youngest_nodes:
912
+ node = row[0]
913
+ if dry_run:
914
+ logging.warning(f"DRYRUN: Stop youngest node {node.node_name}")
915
+ stopped_nodes.append(node.service.replace(".service", ""))
916
+ else:
917
+ try:
918
+ manager = self._get_process_manager(node)
919
+ manager.stop_node(node)
920
+ self._set_node_status(node.id, STOPPED)
921
+ stopped_nodes.append(node.service.replace(".service", ""))
922
+ except Exception as e:
923
+ logging.error(f"Failed to stop node {node.node_name}: {e}")
924
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": str(e)})
925
+
926
+ if count == 1 and len(stopped_nodes) == 1:
927
+ # Keep backward compatibility for single node
928
+ # Extract node name from service name (e.g., "antnode0001" -> "0001")
929
+ node_name = stopped_nodes[0].replace("antnode", "")
930
+ return {"status": "stopped-node", "node": node_name}
931
+
932
+ return {
933
+ "status": "stopped-nodes" if not dry_run else "stop-dryrun",
934
+ "stopped_count": len(stopped_nodes),
935
+ "stopped_nodes": stopped_nodes if stopped_nodes else None,
936
+ "failed_count": len(failed_nodes),
937
+ "failed_nodes": failed_nodes if failed_nodes else None,
938
+ }
939
+
940
+ def _force_start_node(
941
+ self, service_name: Optional[str], metrics: Dict[str, Any], dry_run: bool, count: int = 1
942
+ ) -> Dict[str, Any]:
943
+ """Force start nodes (specific or oldest stopped nodes by age).
944
+
945
+ Args:
946
+ service_name: Optional comma-separated list of service names
947
+ metrics: Current system metrics
948
+ dry_run: If True, log without executing
949
+ count: Number of nodes to start when service_name is not specified (default: 1)
950
+
951
+ Returns:
952
+ Dictionary with execution result
953
+ """
954
+ # Parse comma-separated service names
955
+ service_names = parse_service_names(service_name)
956
+
957
+ if service_names:
958
+ # Start specific nodes
959
+ started_nodes = []
960
+ upgraded_nodes = []
961
+ failed_nodes = []
962
+
963
+ for name in service_names:
964
+ node = self._get_node_by_name(name)
965
+ if not node:
966
+ failed_nodes.append({"service": name, "error": "not found"})
967
+ continue
968
+
969
+ if node.status == RUNNING:
970
+ failed_nodes.append({"service": name, "error": "already running"})
971
+ continue
972
+
973
+ logging.info(f"Forced action: Starting node {name}")
974
+ if dry_run:
975
+ logging.warning(f"DRYRUN: Start node {name}")
976
+ started_nodes.append(name)
977
+ else:
978
+ try:
979
+ # Check if node needs upgrade
980
+ if not node.version:
981
+ node.version = get_antnode_version(node.binary)
982
+
983
+ # If the stopped version is old, upgrade it (which also starts it)
984
+ if Version(metrics["antnode_version"]) > Version(node.version):
985
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
986
+ failed_nodes.append({"service": name, "error": "upgrade failed"})
987
+ else:
988
+ upgraded_nodes.append(name)
989
+ else:
990
+ manager = self._get_process_manager(node)
991
+ if manager.start_node(node):
992
+ self._set_node_status(node.id, RESTARTING)
993
+ started_nodes.append(name)
994
+ else:
995
+ failed_nodes.append({"service": name, "error": "start failed"})
996
+ except Exception as e:
997
+ logging.error(f"Failed to start node {name}: {e}")
998
+ failed_nodes.append({"service": name, "error": str(e)})
999
+
1000
+ return {
1001
+ "status": "started-nodes" if not dry_run else "start-dryrun",
1002
+ "started_count": len(started_nodes),
1003
+ "started_nodes": started_nodes,
1004
+ "upgraded_count": len(upgraded_nodes),
1005
+ "upgraded_nodes": upgraded_nodes if upgraded_nodes else None,
1006
+ "failed_count": len(failed_nodes),
1007
+ "failed_nodes": failed_nodes if failed_nodes else None,
1008
+ }
1009
+ else:
1010
+ # Start oldest stopped nodes (default behavior - lowest age values)
1011
+ if count < 1:
1012
+ return {"status": "error", "message": "count must be at least 1"}
1013
+
1014
+ logging.info(f"Forced action: Starting {count} oldest stopped node(s)")
1015
+
1016
+ # Get oldest stopped nodes (lowest age values)
1017
+ with self.S() as session:
1018
+ oldest_nodes = session.execute(
1019
+ select(Node)
1020
+ .where(Node.status == STOPPED)
1021
+ .order_by(Node.age.asc())
1022
+ .limit(count)
1023
+ ).all()
1024
+
1025
+ if not oldest_nodes:
1026
+ return {"status": "error", "message": "No stopped nodes to start"}
1027
+
1028
+ if len(oldest_nodes) < count:
1029
+ logging.warning(f"Only {len(oldest_nodes)} stopped nodes available, starting all of them")
1030
+
1031
+ started_nodes = []
1032
+ upgraded_nodes = []
1033
+ failed_nodes = []
1034
+
1035
+ for row in oldest_nodes:
1036
+ node = row[0]
1037
+ if dry_run:
1038
+ logging.warning(f"DRYRUN: Start oldest stopped node {node.node_name}")
1039
+ started_nodes.append(node.service.replace(".service", ""))
1040
+ else:
1041
+ try:
1042
+ # Check if node needs upgrade
1043
+ if not node.version:
1044
+ node.version = get_antnode_version(node.binary)
1045
+
1046
+ # If the stopped version is old, upgrade it (which also starts it)
1047
+ if Version(metrics["antnode_version"]) > Version(node.version):
1048
+ if not self._upgrade_node_binary(node, metrics["antnode_version"]):
1049
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": "upgrade failed"})
1050
+ else:
1051
+ upgraded_nodes.append(node.service.replace(".service", ""))
1052
+ else:
1053
+ manager = self._get_process_manager(node)
1054
+ if manager.start_node(node):
1055
+ self._set_node_status(node.id, RESTARTING)
1056
+ started_nodes.append(node.service.replace(".service", ""))
1057
+ else:
1058
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": "start failed"})
1059
+ except Exception as e:
1060
+ logging.error(f"Failed to start node {node.node_name}: {e}")
1061
+ failed_nodes.append({"service": node.service.replace(".service", ""), "error": str(e)})
1062
+
1063
+ if count == 1 and len(started_nodes) == 1:
1064
+ # Keep backward compatibility for single node
1065
+ # Extract node name from service name (e.g., "antnode0001" -> "0001")
1066
+ node_name = started_nodes[0].replace("antnode", "")
1067
+ return {"status": "started-node", "node": node_name}
1068
+ elif count == 1 and len(upgraded_nodes) == 1:
1069
+ # Keep backward compatibility for single node upgrade
1070
+ # Extract node name from service name (e.g., "antnode0001" -> "0001")
1071
+ node_name = upgraded_nodes[0].replace("antnode", "")
1072
+ return {"status": "upgrading-node", "node": node_name}
1073
+
1074
+ return {
1075
+ "status": "started-nodes" if not dry_run else "start-dryrun",
1076
+ "started_count": len(started_nodes),
1077
+ "started_nodes": started_nodes if started_nodes else None,
1078
+ "upgraded_count": len(upgraded_nodes),
1079
+ "upgraded_nodes": upgraded_nodes if upgraded_nodes else None,
1080
+ "failed_count": len(failed_nodes),
1081
+ "failed_nodes": failed_nodes if failed_nodes else None,
1082
+ }
1083
+
1084
+ def _force_disable_node(
1085
+ self, service_name: Optional[str], dry_run: bool
1086
+ ) -> Dict[str, Any]:
1087
+ """Force disable a specific node (service_name required)."""
1088
+ if not service_name:
1089
+ return {"status": "error", "message": "service_name required for disable action"}
1090
+
1091
+ # Parse comma-separated service names
1092
+ service_names = parse_service_names(service_name)
1093
+
1094
+ disabled_nodes = []
1095
+ failed_nodes = []
1096
+
1097
+ for name in service_names:
1098
+ node = self._get_node_by_name(name)
1099
+ if not node:
1100
+ failed_nodes.append({"service": name, "error": "not found"})
1101
+ continue
1102
+
1103
+ logging.info(f"Forced action: Disabling node {name}")
1104
+ if dry_run:
1105
+ logging.warning(f"DRYRUN: Disable node {name}")
1106
+ disabled_nodes.append(name)
1107
+ else:
1108
+ try:
1109
+ # Stop the node if it's running
1110
+ if node.status == RUNNING:
1111
+ manager = self._get_process_manager(node)
1112
+ manager.stop_node(node)
1113
+ self._set_node_status(node.id, DISABLED)
1114
+ disabled_nodes.append(name)
1115
+ except Exception as e:
1116
+ logging.error(f"Failed to disable node {name}: {e}")
1117
+ failed_nodes.append({"service": name, "error": str(e)})
1118
+
1119
+ return {
1120
+ "status": "disabled-nodes" if not dry_run else "disable-dryrun",
1121
+ "disabled_count": len(disabled_nodes),
1122
+ "disabled_nodes": disabled_nodes,
1123
+ "failed_count": len(failed_nodes),
1124
+ "failed_nodes": failed_nodes if failed_nodes else None,
1125
+ }
1126
+
1127
+ def _force_teardown_cluster(
1128
+ self, machine_config: Dict[str, Any], dry_run: bool
1129
+ ) -> Dict[str, Any]:
1130
+ """Force teardown the entire cluster."""
1131
+ logging.info("Forced action: Tearing down cluster")
1132
+
1133
+ # Get all nodes
1134
+ with self.S() as session:
1135
+ all_nodes = session.execute(
1136
+ select(Node).order_by(Node.id.asc())
1137
+ ).all()
1138
+
1139
+ if not all_nodes:
1140
+ return {"status": "no-nodes", "message": "No nodes to teardown"}
1141
+
1142
+ # Get the process manager (use the first node's manager or default)
1143
+ if all_nodes:
1144
+ sample_node = all_nodes[0][0]
1145
+ manager = self._get_process_manager(sample_node)
1146
+ else:
1147
+ manager = get_process_manager()
1148
+
1149
+ # Try manager-specific teardown first
1150
+ if hasattr(manager, 'teardown_cluster'):
1151
+ logging.info(f"Using {manager.__class__.__name__} teardown_cluster method")
1152
+ if dry_run:
1153
+ logging.warning("DRYRUN: Teardown cluster via manager")
1154
+ else:
1155
+ if manager.teardown_cluster():
1156
+ # Remove all nodes from database
1157
+ with self.S() as session:
1158
+ session.query(Node).delete()
1159
+ session.commit()
1160
+ return {"status": "cluster-teardown", "method": "manager-specific"}
1161
+
1162
+ # Fall back to removing each node individually (without delay)
1163
+ logging.info("Using default teardown (remove all nodes)")
1164
+ removed_count = 0
1165
+ for row in all_nodes:
1166
+ node = row[0]
1167
+ if dry_run:
1168
+ logging.warning(f"DRYRUN: Remove node {node.node_name}")
1169
+ removed_count += 1
1170
+ else:
1171
+ try:
1172
+ manager = self._get_process_manager(node)
1173
+ manager.remove_node(node)
1174
+ with self.S() as session:
1175
+ session.delete(node)
1176
+ session.commit()
1177
+ removed_count += 1
1178
+ logging.info(f"Removed node {node.node_name}")
1179
+ except Exception as e:
1180
+ logging.error(f"Failed to remove node {node.node_name}: {e}")
1181
+
1182
+ return {
1183
+ "status": "cluster-teardown",
1184
+ "method": "individual-remove",
1185
+ "removed_count": removed_count,
1186
+ }
1187
+
1188
+ def _survey_specific_nodes(self, service_names: List[str], dry_run: bool) -> Dict[str, Any]:
1189
+ """Survey specific nodes by service name.
1190
+
1191
+ Args:
1192
+ service_names: List of service names to survey
1193
+ dry_run: If True, log without executing
1194
+
1195
+ Returns:
1196
+ Dictionary with survey results
1197
+ """
1198
+ from wnm.utils import read_node_metrics, read_node_metadata, update_node_from_metrics
1199
+
1200
+ surveyed_nodes = []
1201
+ failed_nodes = []
1202
+
1203
+ for service_name in service_names:
1204
+ node = self._get_node_by_name(service_name)
1205
+ if not node:
1206
+ failed_nodes.append({"service": service_name, "error": "not found"})
1207
+ continue
1208
+
1209
+ if node.status == DISABLED:
1210
+ failed_nodes.append({"service": service_name, "error": "disabled"})
1211
+ continue
1212
+
1213
+ if dry_run:
1214
+ logging.warning(f"DRYRUN: Survey node {service_name}")
1215
+ surveyed_nodes.append(service_name)
1216
+ else:
1217
+ logging.info(f"Surveying node {service_name}")
1218
+
1219
+ # Check metadata first
1220
+ node_metadata = read_node_metadata(node.host, node.metrics_port)
1221
+
1222
+ # If metadata fails, fake metrics with 0's
1223
+ if node_metadata["status"] == STOPPED:
1224
+ node_metrics = {
1225
+ "status": STOPPED,
1226
+ "uptime": 0,
1227
+ "records": 0,
1228
+ "shunned": 0,
1229
+ "connected_peers": 0
1230
+ }
1231
+ else:
1232
+ # Metadata succeeded, now get metrics
1233
+ node_metrics = read_node_metrics(node.host, node.metrics_port)
1234
+
1235
+ # Skip update if node is stopped and already marked as stopped
1236
+ if node_metadata["status"] == STOPPED and node.status == STOPPED:
1237
+ surveyed_nodes.append(service_name)
1238
+ continue
1239
+
1240
+ update_node_from_metrics(self.S, node.id, node_metrics, node_metadata)
1241
+ surveyed_nodes.append(service_name)
1242
+
1243
+ return {
1244
+ "status": "survey-complete" if not dry_run else "survey-dryrun",
1245
+ "surveyed_count": len(surveyed_nodes),
1246
+ "surveyed_nodes": surveyed_nodes,
1247
+ "failed_count": len(failed_nodes),
1248
+ "failed_nodes": failed_nodes if failed_nodes else None,
1249
+ }
1250
+
1251
+ def _force_survey_nodes(self, service_name: Optional[str] = None, dry_run: bool = False) -> Dict[str, Any]:
1252
+ """Force a survey of all nodes or specific nodes to update their status and metrics.
1253
+
1254
+ Args:
1255
+ service_name: Optional comma-separated list of service names to survey
1256
+ dry_run: If True, log without executing
1257
+
1258
+ Returns:
1259
+ Dictionary with survey results
1260
+ """
1261
+ # Parse service names if provided
1262
+ service_names = parse_service_names(service_name)
1263
+
1264
+ if service_names:
1265
+ # Survey specific nodes
1266
+ logging.info(f"Forced action: Surveying {len(service_names)} specific nodes")
1267
+ return self._survey_specific_nodes(service_names, dry_run)
1268
+ else:
1269
+ # Survey all nodes
1270
+ logging.info("Forced action: Surveying all nodes")
1271
+
1272
+ if dry_run:
1273
+ logging.warning("DRYRUN: Survey all nodes")
1274
+ # Get count of non-disabled nodes
1275
+ with self.S() as session:
1276
+ from wnm.common import DISABLED
1277
+ node_count = session.execute(
1278
+ select(func.count(Node.id)).where(Node.status != DISABLED)
1279
+ ).scalar()
1280
+ return {"status": "survey-dryrun", "node_count": node_count}
1281
+
1282
+ # Update all nodes
1283
+ update_nodes(self.S)
1284
+
1285
+ # Get updated count
1286
+ with self.S() as session:
1287
+ from wnm.common import DISABLED
1288
+ node_count = session.execute(
1289
+ select(func.count(Node.id)).where(Node.status != DISABLED)
1290
+ ).scalar()
1291
+
1292
+ return {"status": "survey-complete", "node_count": node_count}