wnm 0.0.9__py3-none-any.whl → 0.0.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wnm might be problematic. Click here for more details.

wnm/reports.py ADDED
@@ -0,0 +1,286 @@
1
+ """
2
+ Reports module for weave-node-manager (wnm).
3
+
4
+ Provides formatted reporting capabilities for node status and details.
5
+ """
6
+
7
+ import json
8
+ import logging
9
+ from typing import List, Optional
10
+
11
+ from sqlalchemy import select
12
+
13
+ from wnm.models import Node
14
+ from wnm.common import RUNNING, STOPPED, UPGRADING, RESTARTING, REMOVING, DISABLED, DEAD
15
+ from wnm.utils import parse_service_names
16
+
17
+
18
+ class NodeReporter:
19
+ """
20
+ Reporter class for generating node status reports.
21
+
22
+ Supports two report types:
23
+ - node-status: Tabular summary of nodes
24
+ - node-status-details: Detailed node information
25
+ """
26
+
27
+ def __init__(self, session_factory):
28
+ """
29
+ Initialize reporter with database session factory.
30
+
31
+ Args:
32
+ session_factory: SQLAlchemy scoped_session factory
33
+ """
34
+ self.S = session_factory
35
+ self.logger = logging.getLogger(__name__)
36
+
37
+ def _get_nodes(self, service_names: Optional[List[str]] = None) -> List[Node]:
38
+ """
39
+ Retrieve nodes from database.
40
+
41
+ Args:
42
+ service_names: Optional list of specific service names to retrieve
43
+
44
+ Returns:
45
+ List of Node objects, ordered appropriately
46
+ """
47
+ with self.S() as session:
48
+ if service_names:
49
+ # Get specific nodes in the order requested
50
+ nodes = []
51
+ for service_name in service_names:
52
+ result = session.execute(
53
+ select(Node).where(Node.service == service_name)
54
+ ).first()
55
+ if result:
56
+ nodes.append(result[0])
57
+ else:
58
+ self.logger.warning(f"Node {service_name} not found in database")
59
+ return nodes
60
+ else:
61
+ # Get all nodes ordered by ID (numerical order)
62
+ results = session.execute(
63
+ select(Node).order_by(Node.id)
64
+ ).all()
65
+ return [row[0] for row in results]
66
+
67
+ def node_status_report(
68
+ self,
69
+ service_name: Optional[str] = None,
70
+ report_format: str = "text"
71
+ ) -> str:
72
+ """
73
+ Generate tabular node status report.
74
+
75
+ Format (text):
76
+ Service Name Peer ID Status Connected Peers
77
+ antnode0001 12D3Koo... RUNNING 4
78
+
79
+ Format (json):
80
+ [{"service_name": "antnode0001", "peer_id": "12D3Koo...",
81
+ "status": "RUNNING", "connected_peers": 4}]
82
+
83
+ Args:
84
+ service_name: Optional comma-separated list of service names
85
+ report_format: Output format ("text" or "json")
86
+
87
+ Returns:
88
+ Formatted string report
89
+ """
90
+ service_names = parse_service_names(service_name)
91
+ nodes = self._get_nodes(service_names)
92
+
93
+ if not nodes:
94
+ if report_format == "json":
95
+ return json.dumps({"error": "No nodes found"}, indent=2)
96
+ return "No nodes found."
97
+
98
+ if report_format == "json":
99
+ # Build JSON output with only the specified fields
100
+ node_dicts = []
101
+ for node in nodes:
102
+ node_dict = {
103
+ "service_name": node.service,
104
+ "peer_id": node.peer_id or "-",
105
+ "status": node.status,
106
+ "connected_peers": node.connected_peers if node.connected_peers is not None else 0,
107
+ }
108
+ node_dicts.append(node_dict)
109
+
110
+ # If single node, return object; if multiple, return array
111
+ if len(node_dicts) == 1:
112
+ return json.dumps(node_dicts[0], indent=2)
113
+ else:
114
+ return json.dumps(node_dicts, indent=2)
115
+
116
+ # Build text report
117
+ lines = []
118
+
119
+ # Header
120
+ header = f"{'Service Name':<20}{'Peer ID':<50}{'Status':<15}{'Connected Peers':>15}"
121
+ lines.append(header)
122
+
123
+ # Node rows
124
+ for node in nodes:
125
+ service_col = f"{node.service:<20}"
126
+ peer_id_col = f"{(node.peer_id or '-'):<50}"
127
+ status_col = f"{node.status:<15}"
128
+ # Connected peers from connected_peers field
129
+ peers = node.connected_peers if node.connected_peers is not None else 0
130
+ peers_col = f"{peers:>15}"
131
+
132
+ lines.append(f"{service_col}{peer_id_col}{status_col}{peers_col}")
133
+
134
+ return "\n".join(lines)
135
+
136
+ def node_status_details_report(
137
+ self,
138
+ service_name: Optional[str] = None,
139
+ report_format: str = "text"
140
+ ) -> str:
141
+ """
142
+ Generate detailed node status report.
143
+
144
+ Supports two formats:
145
+ - text: key: value format
146
+ - json: JSON format with snake_case keys
147
+
148
+ Args:
149
+ service_name: Optional comma-separated list of service names
150
+ report_format: Output format ("text" or "json")
151
+
152
+ Returns:
153
+ Formatted string report
154
+ """
155
+ service_names = parse_service_names(service_name)
156
+ nodes = self._get_nodes(service_names)
157
+
158
+ if not nodes:
159
+ if report_format == "json":
160
+ return json.dumps({"error": "No nodes found"}, indent=2)
161
+ return "No nodes found."
162
+
163
+ if report_format == "json":
164
+ return self._format_details_json(nodes)
165
+ else:
166
+ return self._format_details_text(nodes)
167
+
168
+ def _format_details_text(self, nodes: List[Node]) -> str:
169
+ """
170
+ Format node details as text (key: value format).
171
+
172
+ Args:
173
+ nodes: List of Node objects
174
+
175
+ Returns:
176
+ Formatted text string
177
+ """
178
+ sections = []
179
+
180
+ for node in nodes:
181
+ lines = []
182
+
183
+ # Service Name
184
+ lines.append(f"Service Name: {node.service}")
185
+
186
+ # Version
187
+ lines.append(f"Version: {node.version or 'unknown'}")
188
+
189
+ # Port
190
+ lines.append(f"Port: {node.port}")
191
+
192
+ # Metrics Port
193
+ lines.append(f"Metrics Port: {node.metrics_port}")
194
+
195
+ # Data path (root_dir)
196
+ lines.append(f"Data path: {node.root_dir}")
197
+
198
+ # Log path - construct from root_dir
199
+ log_path = f"{node.root_dir}/logs" if node.root_dir else "unknown"
200
+ lines.append(f"Log path: {log_path}")
201
+
202
+ # Bin path - construct from root_dir and binary name
203
+ bin_path = f"{node.root_dir}/{node.binary}" if node.root_dir and node.binary else "unknown"
204
+ lines.append(f"Bin Path: {bin_path}")
205
+
206
+ # Connected peers from connected_peers field
207
+ connected_peers = node.connected_peers if node.connected_peers is not None else 0
208
+ lines.append(f"Connected peers: {connected_peers}")
209
+
210
+ # Rewards address from node's wallet field
211
+ rewards_address = node.wallet or "unknown"
212
+ lines.append(f"Rewards address: {rewards_address}")
213
+
214
+ # Age in seconds
215
+ age_seconds = node.age if node.age is not None else 0
216
+ lines.append(f"Age: {age_seconds}")
217
+
218
+ # Peer ID
219
+ lines.append(f"Peer ID: {node.peer_id or '-'}")
220
+
221
+ # Status
222
+ lines.append(f"Status: {node.status}")
223
+
224
+ sections.append("\n".join(lines))
225
+
226
+ # Separate multiple nodes with blank line
227
+ return "\n\n".join(sections)
228
+
229
+ def _format_details_json(self, nodes: List[Node]) -> str:
230
+ """
231
+ Format node details as JSON using snake_case field names from model.
232
+
233
+ Args:
234
+ nodes: List of Node objects
235
+
236
+ Returns:
237
+ JSON formatted string
238
+ """
239
+ # Use the __json__ method from the Node model
240
+ node_dicts = [node.__json__() for node in nodes]
241
+
242
+ # If single node, return object; if multiple, return array
243
+ if len(node_dicts) == 1:
244
+ return json.dumps(node_dicts[0], indent=2)
245
+ else:
246
+ return json.dumps(node_dicts, indent=2)
247
+
248
+
249
+ def generate_node_status_report(
250
+ session_factory,
251
+ service_name: Optional[str] = None,
252
+ report_format: str = "text"
253
+ ) -> str:
254
+ """
255
+ Convenience function to generate node status report.
256
+
257
+ Args:
258
+ session_factory: SQLAlchemy scoped_session factory
259
+ service_name: Optional comma-separated list of service names
260
+ report_format: Output format ("text" or "json")
261
+
262
+ Returns:
263
+ Formatted report string
264
+ """
265
+ reporter = NodeReporter(session_factory)
266
+ return reporter.node_status_report(service_name, report_format)
267
+
268
+
269
+ def generate_node_status_details_report(
270
+ session_factory,
271
+ service_name: Optional[str] = None,
272
+ report_format: str = "text"
273
+ ) -> str:
274
+ """
275
+ Convenience function to generate node status details report.
276
+
277
+ Args:
278
+ session_factory: SQLAlchemy scoped_session factory
279
+ service_name: Optional comma-separated list of service names
280
+ report_format: Output format ("text" or "json")
281
+
282
+ Returns:
283
+ Formatted report string
284
+ """
285
+ reporter = NodeReporter(session_factory)
286
+ return reporter.node_status_details_report(service_name, report_format)
wnm/utils.py ADDED
@@ -0,0 +1,403 @@
1
+ import logging
2
+ import os
3
+ import re
4
+ import shutil
5
+ import subprocess
6
+ import sys
7
+ import time
8
+ from collections import Counter
9
+ from typing import List, Optional
10
+
11
+ import psutil
12
+ import requests
13
+ from sqlalchemy import create_engine, delete, insert, select, text, update
14
+ from sqlalchemy.orm import scoped_session, sessionmaker
15
+
16
+ from wnm.common import (
17
+ DEAD,
18
+ DISABLED,
19
+ DONATE,
20
+ METRICS_PORT_BASE,
21
+ MIGRATING,
22
+ MIN_NODES_THRESHOLD,
23
+ PORT_MULTIPLIER,
24
+ QUEEN,
25
+ REMOVING,
26
+ RESTARTING,
27
+ RUNNING,
28
+ STOPPED,
29
+ UPGRADING,
30
+ )
31
+ from wnm.config import BOOTSTRAP_CACHE_DIR, LOG_DIR, PLATFORM
32
+ from wnm.models import Base, Machine, Node
33
+
34
+
35
+ def parse_service_names(service_name_str: Optional[str]) -> Optional[List[str]]:
36
+ """Parse comma-separated service names.
37
+
38
+ Args:
39
+ service_name_str: Comma-separated service names (e.g., "antnode0001,antnode0003")
40
+
41
+ Returns:
42
+ List of service names, or None if input is None/empty
43
+ """
44
+ if not service_name_str:
45
+ return None
46
+
47
+ # Split by comma and strip whitespace
48
+ names = [name.strip() for name in service_name_str.split(',')]
49
+ # Filter out empty strings
50
+ return [name for name in names if name]
51
+
52
+
53
+ # Read config from systemd service file
54
+ def read_node_metadata(host, port):
55
+ # Only return version number when we have one, to stop clobbering the binary check
56
+ try:
57
+ url = "http://{0}:{1}/metadata".format(host, port)
58
+ response = requests.get(url, timeout=5)
59
+ data = response.text
60
+ except requests.exceptions.ConnectionError:
61
+ logging.debug("Connection Refused on port: {0}:{1}".format(host, str(port)))
62
+ return {"status": STOPPED, "peer_id": ""}
63
+ except Exception as error:
64
+ template = "In RNMd - An exception of type {0} occurred. Arguments:\n{1!r}"
65
+ message = template.format(type(error).__name__, error.args)
66
+ logging.info(message)
67
+ return {"status": STOPPED, "peer_id": ""}
68
+ # collect a dict to return
69
+ card = {}
70
+ try:
71
+ card["version"] = re.findall(r'{antnode_version="([\d\.]+)"}', data)[0]
72
+ except (IndexError, KeyError) as e:
73
+ logging.info(f"No version found: {e}")
74
+ try:
75
+ card["peer_id"] = re.findall(r'{peer_id="([\w\d]+)"}', data)[0]
76
+ except (IndexError, KeyError) as e:
77
+ logging.debug(f"No peer_id found: {e}")
78
+ card["peer_id"] = ""
79
+ card["status"] = RUNNING if "version" in card else STOPPED
80
+ return card
81
+
82
+
83
+ # Read data from metrics port
84
+ def read_node_metrics(host, port):
85
+ metrics = {}
86
+ try:
87
+ url = "http://{0}:{1}/metrics".format(host, port)
88
+ response = requests.get(url, timeout=5)
89
+ metrics["status"] = RUNNING
90
+ metrics["uptime"] = int(
91
+ (re.findall(r"ant_node_uptime ([\d]+)", response.text) or [0])[0]
92
+ )
93
+ metrics["records"] = int(
94
+ (
95
+ re.findall(r"ant_networking_records_stored ([\d]+)", response.text)
96
+ or [0]
97
+ )[0]
98
+ )
99
+ metrics["shunned"] = int(
100
+ (
101
+ re.findall(
102
+ r"ant_networking_shunned_by_close_group ([\d]+)", response.text
103
+ )
104
+ or [0]
105
+ )[0]
106
+ )
107
+ metrics["connected_peers"] = int(
108
+ (
109
+ re.findall(r"ant_networking_connected_peers ([\d]+)", response.text)
110
+ or [0]
111
+ )[0]
112
+ )
113
+ except requests.exceptions.ConnectionError:
114
+ logging.debug("Connection Refused on port: {0}:{1}".format(host, str(port)))
115
+ metrics["status"] = STOPPED
116
+ metrics["uptime"] = 0
117
+ metrics["records"] = 0
118
+ metrics["shunned"] = 0
119
+ metrics["connected_peers"] = 0
120
+ except Exception as error:
121
+ template = "in:RNM - An exception of type {0} occurred. Arguments:\n{1!r}"
122
+ message = template.format(type(error).__name__, error.args)
123
+ logging.info(message)
124
+ metrics["status"] = STOPPED
125
+ metrics["uptime"] = 0
126
+ metrics["records"] = 0
127
+ metrics["shunned"] = 0
128
+ metrics["connected_peers"] = 0
129
+ return metrics
130
+
131
+
132
+ # Read antnode binary version
133
+ def get_antnode_version(binary):
134
+ try:
135
+ data = subprocess.run(
136
+ [binary, "--version"], stdout=subprocess.PIPE
137
+ ).stdout.decode("utf-8")
138
+ return re.findall(r"Autonomi Node v([\d\.]+)", data)[0]
139
+ except Exception as error:
140
+ template = "In GAV - An exception of type {0} occurred. Arguments:\n{1!r}"
141
+ message = template.format(type(error).__name__, error.args)
142
+ logging.info(message)
143
+ return 0
144
+
145
+
146
+ # Determine how long this node has been around by looking at it's secret_key file
147
+ def get_node_age(root_dir):
148
+ try:
149
+ return int(os.stat("{0}/secret-key".format(root_dir)).st_mtime)
150
+ except (FileNotFoundError, OSError) as e:
151
+ logging.debug(f"Unable to get node age for {root_dir}: {e}")
152
+ return 0
153
+
154
+
155
+ # Survey nodes by reading metadata from metrics ports or binary --version
156
+ def get_machine_metrics(S, node_storage, remove_limit, crisis_bytes):
157
+ metrics = {}
158
+
159
+ with S() as session:
160
+ db_nodes = session.execute(select(Node.status, Node.version)).all()
161
+
162
+ # Get system start time before we probe metrics
163
+ try:
164
+ if PLATFORM == "Darwin":
165
+ # macOS: use sysctl kern.boottime
166
+ p = subprocess.run(
167
+ ["sysctl", "-n", "kern.boottime"],
168
+ stdout=subprocess.PIPE,
169
+ stderr=subprocess.STDOUT,
170
+ check=True,
171
+ ).stdout.decode("utf-8")
172
+ # Parse: { sec = 1234567890, usec = 0 }
173
+ match = re.search(r"sec = (\d+)", p)
174
+ if match:
175
+ metrics["system_start"] = int(match.group(1))
176
+ else:
177
+ raise ValueError("Could not parse kern.boottime")
178
+ else:
179
+ # Linux: use uptime --since
180
+ p = subprocess.run(
181
+ ["uptime", "--since"],
182
+ stdout=subprocess.PIPE,
183
+ stderr=subprocess.STDOUT,
184
+ ).stdout.decode("utf-8")
185
+ if re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}", p):
186
+ metrics["system_start"] = int(
187
+ time.mktime(time.strptime(p.strip(), "%Y-%m-%d %H:%M:%S"))
188
+ )
189
+ except (subprocess.CalledProcessError, ValueError) as err:
190
+ logging.error("GMM ERROR:", err)
191
+ metrics["system_start"] = 0
192
+
193
+ # Get some initial stats for comparing after a few seconds
194
+ # We start these counters AFTER reading the database
195
+ start_time = time.time()
196
+ start_disk_counters = psutil.disk_io_counters()
197
+ start_net_counters = psutil.net_io_counters()
198
+
199
+ metrics["total_nodes"] = len(db_nodes)
200
+ data = Counter(node[0] for node in db_nodes)
201
+ metrics["running_nodes"] = data[RUNNING]
202
+ metrics["stopped_nodes"] = data[STOPPED]
203
+ metrics["restarting_nodes"] = data[RESTARTING]
204
+ metrics["upgrading_nodes"] = data[UPGRADING]
205
+ metrics["migrating_nodes"] = data[MIGRATING]
206
+ metrics["removing_nodes"] = data[REMOVING]
207
+ metrics["dead_nodes"] = data[DEAD]
208
+ metrics["antnode"] = shutil.which("antnode")
209
+ if not metrics["antnode"]:
210
+ logging.warning("Unable to locate current antnode binary, exiting")
211
+ sys.exit(1)
212
+ metrics["antnode_version"] = get_antnode_version(metrics["antnode"])
213
+ metrics["queen_node_version"] = (
214
+ db_nodes[0][1] if metrics["total_nodes"] > 0 else metrics["antnode_version"]
215
+ )
216
+ metrics["nodes_latest_v"] = (
217
+ sum(1 for node in db_nodes if node[1] == metrics["antnode_version"]) or 0
218
+ )
219
+ metrics["nodes_no_version"] = sum(1 for node in db_nodes if not node[1]) or 0
220
+ metrics["nodes_to_upgrade"] = (
221
+ metrics["total_nodes"] - metrics["nodes_latest_v"] - metrics["nodes_no_version"]
222
+ )
223
+ metrics["nodes_by_version"] = Counter(ver[1] for ver in db_nodes)
224
+
225
+ # Windows has to build load average over 5 seconds. The first 5 seconds returns 0's
226
+ # I don't plan on supporting windows, but if this get's modular, I don't want this
227
+ # issue to be skipped
228
+ # if platform.system() == "Windows":
229
+ # discard=psutil.getloadavg()
230
+ # time.sleep(5)
231
+ metrics["load_average_1"], metrics["load_average_5"], metrics["load_average_15"] = (
232
+ psutil.getloadavg()
233
+ )
234
+ # Get CPU Metrics over 1 second
235
+ cpu_times = psutil.cpu_times_percent(1)
236
+ if PLATFORM == "Darwin":
237
+ # macOS: cpu_times has (user, nice, system, idle) - no iowait
238
+ metrics["idle_cpu_percent"] = cpu_times.idle
239
+ metrics["io_wait"] = 0 # Not available on macOS
240
+ else:
241
+ # Linux: cpu_times has (user, nice, system, idle, iowait, ...)
242
+ metrics["idle_cpu_percent"], metrics["io_wait"] = cpu_times[3:5]
243
+ # Really we returned Idle percent, subtract from 100 to get used.
244
+ metrics["used_cpu_percent"] = 100 - metrics["idle_cpu_percent"]
245
+ data = psutil.virtual_memory()
246
+ # print(data)
247
+ metrics["used_mem_percent"] = data.percent
248
+ metrics["free_mem_percent"] = 100 - metrics["used_mem_percent"]
249
+ data = psutil.disk_io_counters()
250
+ # This only checks the drive mapped to the first node and will need to be updated
251
+ # when we eventually support multiple drives
252
+ data = psutil.disk_usage(node_storage)
253
+ metrics["used_hd_percent"] = data.percent
254
+ metrics["total_hd_bytes"] = data.total
255
+ end_time = time.time()
256
+ end_disk_counters = psutil.disk_io_counters()
257
+ end_net_counters = psutil.net_io_counters()
258
+ metrics["hdio_write_bytes"] = int(
259
+ (end_disk_counters.write_bytes - start_disk_counters.write_bytes)
260
+ / (end_time - start_time)
261
+ )
262
+ metrics["hdio_read_bytes"] = int(
263
+ (end_disk_counters.read_bytes - start_disk_counters.read_bytes)
264
+ / (end_time - start_time)
265
+ )
266
+ metrics["netio_write_bytes"] = int(
267
+ (end_net_counters.bytes_sent - start_net_counters.bytes_sent)
268
+ / (end_time - start_time)
269
+ )
270
+ metrics["netio_read_bytes"] = int(
271
+ (end_net_counters.bytes_recv - start_net_counters.bytes_recv)
272
+ / (end_time - start_time)
273
+ )
274
+ # print (json.dumps(metrics,indent=2))
275
+ # How close (out of 100) to removal limit will we be with a max bytes per node (2GB default)
276
+ # For running nodes with Porpoise(tm).
277
+ metrics["node_hd_crisis"] = int(
278
+ (
279
+ ((metrics["total_nodes"]) * int(crisis_bytes))
280
+ / (metrics["total_hd_bytes"] * (remove_limit / 100))
281
+ )
282
+ * 100
283
+ )
284
+ return metrics
285
+
286
+
287
+ # Update node with metrics result
288
+ def update_node_from_metrics(S, id, metrics, metadata):
289
+ try:
290
+ # We check the binary version in other code, so lets stop clobbering it when a node is stopped
291
+ card = {
292
+ "status": metrics["status"],
293
+ "timestamp": int(time.time()),
294
+ "uptime": metrics["uptime"],
295
+ "records": metrics["records"],
296
+ "shunned": metrics["shunned"],
297
+ "connected_peers": metrics["connected_peers"],
298
+ "peer_id": metadata["peer_id"],
299
+ }
300
+ if "version" in metadata:
301
+ card["version"] = metadata["version"]
302
+ with S() as session:
303
+ session.query(Node).filter(Node.id == id).update(card)
304
+ session.commit()
305
+ except Exception as error:
306
+ template = "In UNFM - An exception of type {0} occurred. Arguments:\n{1!r}"
307
+ message = template.format(type(error).__name__, error.args)
308
+ logging.warning(message)
309
+ return False
310
+ else:
311
+ return True
312
+
313
+
314
+ # Set Node status
315
+ def update_counters(S, old, config):
316
+ # Are we already removing a node
317
+ if old["removing_nodes"]:
318
+ with S() as session:
319
+ removals = session.execute(
320
+ select(Node.timestamp, Node.id)
321
+ .where(Node.status == REMOVING)
322
+ .order_by(Node.timestamp.asc())
323
+ ).all()
324
+ # Iterate through active removals
325
+ records_to_remove = len(removals)
326
+ for check in removals:
327
+ # If the delay_remove timer has expired, delete the entry
328
+ if isinstance(check[0], int) and check[0] < (
329
+ int(time.time()) - config["delay_remove"]
330
+ ):
331
+ logging.info("Deleting removed node " + str(check[1]))
332
+ with S() as session:
333
+ session.execute(delete(Node).where(Node.id == check[1]))
334
+ session.commit()
335
+ records_to_remove -= 1
336
+ old["removing_nodes"] = records_to_remove
337
+ # Are we already upgrading a node
338
+ if old["upgrading_nodes"]:
339
+ with S() as session:
340
+ upgrades = session.execute(
341
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port)
342
+ .where(Node.status == UPGRADING)
343
+ .order_by(Node.timestamp.asc())
344
+ ).all()
345
+ # Iterate through active upgrades
346
+ records_to_upgrade = len(upgrades)
347
+ for check in upgrades:
348
+ # If the delay_upgrade timer has expired, check on status
349
+ if isinstance(check[0], int) and check[0] < (
350
+ int(time.time()) - config["delay_upgrade"]
351
+ ):
352
+ logging.info("Updating upgraded node " + str(check[1]))
353
+ node_metrics = read_node_metrics(check[2], check[3])
354
+ node_metadata = read_node_metadata(check[2], check[3])
355
+ if node_metrics and node_metadata:
356
+ update_node_from_metrics(S, check[1], node_metrics, node_metadata)
357
+ records_to_upgrade -= 1
358
+ old["upgrading_nodes"] = records_to_upgrade
359
+ # Are we already restarting a node
360
+ if old["restarting_nodes"]:
361
+ with S() as session:
362
+ restarts = session.execute(
363
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port)
364
+ .where(Node.status == RESTARTING)
365
+ .order_by(Node.timestamp.asc())
366
+ ).all()
367
+ # Iterate through active upgrades
368
+ records_to_restart = len(restarts)
369
+ for check in restarts:
370
+ # If the delay_start timer has expired, check on status
371
+ if isinstance(check[0], int) and check[0] < (
372
+ int(time.time()) - config["delay_start"]
373
+ ):
374
+ logging.info("Updating restarted node " + str(check[1]))
375
+ node_metrics = read_node_metrics(check[2], check[3])
376
+ node_metadata = read_node_metadata(check[2], check[3])
377
+ if node_metrics and node_metadata:
378
+ update_node_from_metrics(S, check[1], node_metrics, node_metadata)
379
+ records_to_restart -= 1
380
+ old["restarting_nodes"] = records_to_restart
381
+ return old
382
+
383
+
384
+ # Enable firewall for port
385
+ def update_nodes(S):
386
+ with S() as session:
387
+ nodes = session.execute(
388
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port, Node.status)
389
+ .where(Node.status != DISABLED)
390
+ .order_by(Node.timestamp.asc())
391
+ ).all()
392
+ # Iterate through all records
393
+ for check in nodes:
394
+ # Check on status
395
+ if isinstance(check[0], int):
396
+ logging.debug("Updating info on node " + str(check[1]))
397
+ node_metrics = read_node_metrics(check[2], check[3])
398
+ node_metadata = read_node_metadata(check[2], check[3])
399
+ if node_metrics and node_metadata:
400
+ # Don't write updates for stopped nodes that are already marked as stopped
401
+ if node_metadata["status"] == STOPPED and check[4] == STOPPED:
402
+ continue
403
+ update_node_from_metrics(S, check[1], node_metrics, node_metadata)