wnm 0.0.8__py3-none-any.whl → 0.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wnm might be problematic. Click here for more details.

wnm/__main__.py CHANGED
@@ -1,29 +1,38 @@
1
- import os, sys
2
- import re, json, requests, time
3
- import subprocess, logging
1
+ import json
2
+ import logging
3
+ import os
4
+ import re
5
+ import shutil
6
+ import subprocess
7
+ import sys
8
+ import time
4
9
  from collections import Counter
5
- from packaging.version import Version
10
+
11
+ import psutil
12
+ import requests
6
13
  from dotenv import load_dotenv
7
- import psutil, shutil
14
+ from packaging.version import Version
15
+ from sqlalchemy import create_engine, delete, insert, select, text, update
16
+ from sqlalchemy.orm import scoped_session, sessionmaker
8
17
 
9
18
  from wnm.models import Base, Machine, Node
10
- from sqlalchemy import create_engine, select, insert, update, delete, text
11
- from sqlalchemy.orm import sessionmaker, scoped_session
12
19
 
13
20
  logging.basicConfig(level=logging.INFO)
14
- #Info level logging for sqlalchemy is too verbose, only use when needed
15
- logging.getLogger('sqlalchemy.engine.Engine').disabled = True
16
-
21
+ # Info level logging for sqlalchemy is too verbose, only use when needed
22
+ logging.getLogger("sqlalchemy.engine.Engine").disabled = True
23
+
17
24
  # import .env
18
25
  basedir = os.path.abspath(os.path.dirname(__file__))
19
- load_dotenv(os.path.join(basedir, '.env'))
26
+ load_dotenv(os.path.join(basedir, ".env"))
20
27
 
21
28
  # simulate arg/yaml configuration
22
29
  config = {}
23
- config['db']='sqlite:///colony.db'
24
- config['DonateAddress'] = os.getenv('DonateAddress') or '0x00455d78f850b0358E8cea5be24d415E01E107CF'
25
- config['ANMHost'] = os.getenv('ANMHost') or '127.0.0.1'
26
- config['CrisisBytes'] = os.getenv('CrisisBytes') or 2 * 10 ** 9 # default 2gb/node
30
+ config["db"] = "sqlite:///colony.db"
31
+ config["DonateAddress"] = (
32
+ os.getenv("DonateAddress") or "0x00455d78f850b0358E8cea5be24d415E01E107CF"
33
+ )
34
+ config["ANMHost"] = os.getenv("ANMHost") or "127.0.0.1"
35
+ config["CrisisBytes"] = os.getenv("CrisisBytes") or 2 * 10**9 # default 2gb/node
27
36
 
28
37
 
29
38
  # Setup Database engine
@@ -41,26 +50,26 @@ S = scoped_session(session_factory)
41
50
  # else:
42
51
 
43
52
  # Primary node for want of one
44
- QUEEN=1
53
+ QUEEN = 1
45
54
 
46
55
  # Donation address
47
- DONATE=config["DonateAddress"]
48
- #Keep these as strings so they can be grepped in logs
49
- STOPPED="STOPPED" #0 Node is not responding to it's metrics port
50
- RUNNING="RUNNING" #1 Node is responding to it's metrics port
51
- UPGRADING="UPGRADING" #2 Upgrade in progress
52
- DISABLED="DISABLED" #-1 Do not start
53
- RESTARTING="RESTARTING" #3 re/starting a server intionally
54
- MIGRATING="MIGRATING" #4 Moving volumes in progress
55
- REMOVING="REMOVING" #5 Removing node in progress
56
- DEAD="DEAD" #-86 Broken node to cleanup
57
-
58
- ANM_HOST=config["ANMHost"]
56
+ DONATE = config["DonateAddress"]
57
+ # Keep these as strings so they can be grepped in logs
58
+ STOPPED = "STOPPED" # 0 Node is not responding to it's metrics port
59
+ RUNNING = "RUNNING" # 1 Node is responding to it's metrics port
60
+ UPGRADING = "UPGRADING" # 2 Upgrade in progress
61
+ DISABLED = "DISABLED" # -1 Do not start
62
+ RESTARTING = "RESTARTING" # 3 re/starting a server intionally
63
+ MIGRATING = "MIGRATING" # 4 Moving volumes in progress
64
+ REMOVING = "REMOVING" # 5 Removing node in progress
65
+ DEAD = "DEAD" # -86 Broken node to cleanup
66
+
67
+ ANM_HOST = config["ANMHost"]
59
68
  # Baseline bytes per node
60
- CRISIS_BYTES=config["CrisisBytes"]
69
+ CRISIS_BYTES = config["CrisisBytes"]
61
70
 
62
71
  # A storage place for ant node data
63
- Workers=[]
72
+ Workers = []
64
73
 
65
74
  # Detect ANM (but don't upgrade)
66
75
  if os.path.exists("/var/antctl/system"):
@@ -68,7 +77,7 @@ if os.path.exists("/var/antctl/system"):
68
77
  if os.path.exists("/etc/cron.d/anm"):
69
78
  # remove cron to disable old anm
70
79
  try:
71
- subprocess.run(['sudo','rm', '/etc/cron.d/anm'])
80
+ subprocess.run(["sudo", "rm", "/etc/cron.d/anm"])
72
81
  except Exception as error:
73
82
  template = "In GAV - An exception of type {0} occurred. Arguments:\n{1!r}"
74
83
  message = template.format(type(error).__name__, error.args)
@@ -85,6 +94,7 @@ if os.path.exists("/var/antctl/wnm_active"):
85
94
  logging.info("wnm still running")
86
95
  sys.exit(1)
87
96
 
97
+
88
98
  # Get anm configuration
89
99
  def load_anm_config():
90
100
  anm_config = {}
@@ -95,111 +105,137 @@ def load_anm_config():
95
105
  # What can we save from /var/antctl/config
96
106
  if os.path.exists("/var/antctl/config"):
97
107
  load_dotenv("/var/antctl/config")
98
- anm_config["NodeCap"] = int(os.getenv('NodeCap') or 20)
99
- anm_config["CpuLessThan"] = int(os.getenv('CpuLessThan') or 50)
100
- anm_config["CpuRemove"] = int(os.getenv('CpuRemove') or 70)
101
- anm_config["MemLessThan"] = int(os.getenv('MemLessThan') or 70)
102
- anm_config["MemRemove"] = int(os.getenv('MemRemove') or 90)
103
- anm_config["HDLessThan"] = int(os.getenv('HDLessThan') or 70)
104
- anm_config["HDRemove"] = int(os.getenv('HDRemove') or 90)
105
- anm_config["DelayStart"] = int(os.getenv('DelayStart') or 5)
106
- anm_config["DelayUpgrade"] = int(os.getenv('DelayUpgrade') or 5)
107
- anm_config["DelayRestart"] = int(os.getenv('DelayRestart') or 10)
108
- anm_config["DelayRemove"] = int(os.getenv('DelayRemove') or 300)
109
- anm_config["NodeStorage"] = os.getenv('NodeStorage') or "/var/antctl/services"
108
+ anm_config["NodeCap"] = int(os.getenv("NodeCap") or 20)
109
+ anm_config["CpuLessThan"] = int(os.getenv("CpuLessThan") or 50)
110
+ anm_config["CpuRemove"] = int(os.getenv("CpuRemove") or 70)
111
+ anm_config["MemLessThan"] = int(os.getenv("MemLessThan") or 70)
112
+ anm_config["MemRemove"] = int(os.getenv("MemRemove") or 90)
113
+ anm_config["HDLessThan"] = int(os.getenv("HDLessThan") or 70)
114
+ anm_config["HDRemove"] = int(os.getenv("HDRemove") or 90)
115
+ anm_config["DelayStart"] = int(os.getenv("DelayStart") or 5)
116
+ anm_config["DelayUpgrade"] = int(os.getenv("DelayUpgrade") or 5)
117
+ anm_config["DelayRestart"] = int(os.getenv("DelayRestart") or 10)
118
+ anm_config["DelayRemove"] = int(os.getenv("DelayRemove") or 300)
119
+ anm_config["NodeStorage"] = os.getenv("NodeStorage") or "/var/antctl/services"
110
120
  # Default to the faucet donation address
111
121
  try:
112
- anm_config["RewardsAddress"] = re.findall(r"--rewards-address ([\dA-Fa-fXx]+)",os.getenv('RewardsAddress'))[0]
122
+ anm_config["RewardsAddress"] = re.findall(
123
+ r"--rewards-address ([\dA-Fa-fXx]+)", os.getenv("RewardsAddress")
124
+ )[0]
113
125
  except:
114
126
  try:
115
- anm_config["RewardsAddress"] = re.findall(r"([\dA-Fa-fXx]+)",os.getenv("RewardsAddress"))[0]
127
+ anm_config["RewardsAddress"] = re.findall(
128
+ r"([\dA-Fa-fXx]+)", os.getenv("RewardsAddress")
129
+ )[0]
116
130
  except:
117
131
  logging.warning("Unable to detect RewardsAddress")
118
132
  sys.exit(1)
119
- anm_config["DonateAddress"]=os.getenv("DonateAddress") or DONATE
120
- anm_config["MaxLoadAverageAllowed"]=float(os.getenv("MaxLoadAverageAllowed") or anm_config["CpuCount"])
121
- anm_config["DesiredLoadAverage"]=float(os.getenv("DesiredLoadAverage") or (anm_config["CpuCount"] * .6))
133
+ anm_config["DonateAddress"] = os.getenv("DonateAddress") or DONATE
134
+ anm_config["MaxLoadAverageAllowed"] = float(
135
+ os.getenv("MaxLoadAverageAllowed") or anm_config["CpuCount"]
136
+ )
137
+ anm_config["DesiredLoadAverage"] = float(
138
+ os.getenv("DesiredLoadAverage") or (anm_config["CpuCount"] * 0.6)
139
+ )
122
140
 
123
141
  try:
124
- with open('/usr/bin/anms.sh', 'r') as file:
142
+ with open("/usr/bin/anms.sh", "r") as file:
125
143
  data = file.read()
126
- anm_config["PortStart"]=int(re.findall(r"ntpr\=(\d+)",data)[0])
144
+ anm_config["PortStart"] = int(re.findall(r"ntpr\=(\d+)", data)[0])
127
145
  except:
128
- anm_config["PortStart"]=55
129
-
130
- anm_config["HDIOReadLessThan"] = float(os.getenv('HDIOReadLessThan') or 0.0)
131
- anm_config["HDIOReadRemove"] = float(os.getenv('HDIOReadRemove') or 0.0)
132
- anm_config["HDIOWriteLessThan"] = float(os.getenv('HDIOWriteLessThan') or 0.0)
133
- anm_config["HDIOWriteRemove"] = float(os.getenv('HDIOWriteRemove') or 0.0)
134
- anm_config["NetIOReadLessThan"] = float(os.getenv('NetIOReadLessThan') or 0.0)
135
- anm_config["NetIOReadRemove"] = float(os.getenv('NetIOReadRemove') or 0.0)
136
- anm_config["NetIOWriteLessThan"] = float(os.getenv('NetIOWriteLessThan') or 0.0)
137
- anm_config["NetIOWriteRemove"] = float(os.getenv('NetIOWriteRemove') or 0.0)
138
- # Timer for last stopped nodes
139
- anm_config["LastStoppedAt"]=0
146
+ anm_config["PortStart"] = 55
140
147
 
148
+ anm_config["HDIOReadLessThan"] = float(os.getenv("HDIOReadLessThan") or 0.0)
149
+ anm_config["HDIOReadRemove"] = float(os.getenv("HDIOReadRemove") or 0.0)
150
+ anm_config["HDIOWriteLessThan"] = float(os.getenv("HDIOWriteLessThan") or 0.0)
151
+ anm_config["HDIOWriteRemove"] = float(os.getenv("HDIOWriteRemove") or 0.0)
152
+ anm_config["NetIOReadLessThan"] = float(os.getenv("NetIOReadLessThan") or 0.0)
153
+ anm_config["NetIOReadRemove"] = float(os.getenv("NetIOReadRemove") or 0.0)
154
+ anm_config["NetIOWriteLessThan"] = float(os.getenv("NetIOWriteLessThan") or 0.0)
155
+ anm_config["NetIOWriteRemove"] = float(os.getenv("NetIOWriteRemove") or 0.0)
156
+ # Timer for last stopped nodes
157
+ anm_config["LastStoppedAt"] = 0
141
158
 
142
159
  return anm_config
143
160
 
161
+
144
162
  # Read confirm from systemd service file
145
163
  def read_systemd_service(antnode):
146
- details={}
164
+ details = {}
147
165
  try:
148
- with open('/etc/systemd/system/'+antnode, 'r') as file:
166
+ with open("/etc/systemd/system/" + antnode, "r") as file:
149
167
  data = file.read()
150
- details['id']=int(re.findall(r"antnode(\d+)",antnode)[0])
151
- details['binary']=re.findall(r"ExecStart=([^ ]+)",data)[0]
152
- details["user"]=re.findall(r"User=(\w+)",data)[0]
153
- details["root_dir"]=re.findall(r"--root-dir ([\w\/]+)",data)[0]
154
- details["port"]=int(re.findall(r"--port (\d+)",data)[0])
155
- details["metrics_port"]=int(re.findall(r"--metrics-server-port (\d+)",data)[0])
156
- details["wallet"]=re.findall(r"--rewards-address ([^ ]+)",data)[0]
157
- details["network"]=re.findall(r"--rewards-address [^ ]+ ([\w\-]+)",data)[0]
168
+ details["id"] = int(re.findall(r"antnode(\d+)", antnode)[0])
169
+ details["binary"] = re.findall(r"ExecStart=([^ ]+)", data)[0]
170
+ details["user"] = re.findall(r"User=(\w+)", data)[0]
171
+ details["root_dir"] = re.findall(r"--root-dir ([\w\/]+)", data)[0]
172
+ details["port"] = int(re.findall(r"--port (\d+)", data)[0])
173
+ details["metrics_port"] = int(
174
+ re.findall(r"--metrics-server-port (\d+)", data)[0]
175
+ )
176
+ details["wallet"] = re.findall(r"--rewards-address ([^ ]+)", data)[0]
177
+ details["network"] = re.findall(r"--rewards-address [^ ]+ ([\w\-]+)", data)[0]
158
178
  except:
159
179
  pass
160
-
180
+
161
181
  return details
162
182
 
183
+
163
184
  # Read data from metadata endpoint
164
- def read_node_metadata(host,port):
185
+ def read_node_metadata(host, port):
165
186
  # Only return version number when we have one, to stop clobbering the binary check
166
187
  try:
167
- url = "http://{0}:{1}/metadata".format(host,port)
188
+ url = "http://{0}:{1}/metadata".format(host, port)
168
189
  response = requests.get(url)
169
- data=response.text
190
+ data = response.text
170
191
  except requests.exceptions.ConnectionError:
171
- logging.debug("Connection Refused on port: {0}:{1}".format(host,str(port)))
172
- return {"status": STOPPED, "peer_id":""}
192
+ logging.debug("Connection Refused on port: {0}:{1}".format(host, str(port)))
193
+ return {"status": STOPPED, "peer_id": ""}
173
194
  except Exception as error:
174
195
  template = "In RNMd - An exception of type {0} occurred. Arguments:\n{1!r}"
175
196
  message = template.format(type(error).__name__, error.args)
176
197
  logging.info(message)
177
- return {"status": STOPPED, "peer_id":""}
198
+ return {"status": STOPPED, "peer_id": ""}
178
199
  # collect a dict to return
179
- card={}
200
+ card = {}
180
201
  try:
181
- card["version"] = re.findall(r'{antnode_version="([\d\.]+)"}',data)[0]
202
+ card["version"] = re.findall(r'{antnode_version="([\d\.]+)"}', data)[0]
182
203
  except:
183
- logging.info('No version found')
204
+ logging.info("No version found")
184
205
  try:
185
- card["peer_id"] = re.findall(r'{peer_id="([\w\d]+)"}',data)[0]
206
+ card["peer_id"] = re.findall(r'{peer_id="([\w\d]+)"}', data)[0]
186
207
  except:
187
208
  card["peer_id"] = ""
188
209
  card["status"] = RUNNING if "version" in card else STOPPED
189
210
  return card
190
211
 
212
+
191
213
  # Read data from metrics port
192
- def read_node_metrics(host,port):
193
- metrics={}
214
+ def read_node_metrics(host, port):
215
+ metrics = {}
194
216
  try:
195
- url = "http://{0}:{1}/metrics".format(host,port)
217
+ url = "http://{0}:{1}/metrics".format(host, port)
196
218
  response = requests.get(url)
197
219
  metrics["status"] = RUNNING
198
- metrics["uptime"] = int((re.findall(r'ant_node_uptime ([\d]+)',response.text) or [0])[0])
199
- metrics["records"] = int((re.findall(r'ant_networking_records_stored ([\d]+)',response.text) or [0])[0])
200
- metrics["shunned"] = int((re.findall(r'ant_networking_shunned_by_close_group ([\d]+)',response.text) or [0])[0])
220
+ metrics["uptime"] = int(
221
+ (re.findall(r"ant_node_uptime ([\d]+)", response.text) or [0])[0]
222
+ )
223
+ metrics["records"] = int(
224
+ (
225
+ re.findall(r"ant_networking_records_stored ([\d]+)", response.text)
226
+ or [0]
227
+ )[0]
228
+ )
229
+ metrics["shunned"] = int(
230
+ (
231
+ re.findall(
232
+ r"ant_networking_shunned_by_close_group ([\d]+)", response.text
233
+ )
234
+ or [0]
235
+ )[0]
236
+ )
201
237
  except requests.exceptions.ConnectionError:
202
- logging.debug("Connection Refused on port: {0}:{1}".format(host,str(port)))
238
+ logging.debug("Connection Refused on port: {0}:{1}".format(host, str(port)))
203
239
  metrics["status"] = STOPPED
204
240
  metrics["uptime"] = 0
205
241
  metrics["records"] = 0
@@ -213,104 +249,116 @@ def read_node_metrics(host,port):
213
249
  metrics["records"] = 0
214
250
  metrics["shunned"] = 0
215
251
  return metrics
216
-
252
+
253
+
217
254
  # Read antnode binary version
218
255
  def get_antnode_version(binary):
219
256
  try:
220
- data = subprocess.run([binary, '--version'], stdout=subprocess.PIPE).stdout.decode('utf-8')
221
- return re.findall(r'Autonomi Node v([\d\.]+)',data)[0]
257
+ data = subprocess.run(
258
+ [binary, "--version"], stdout=subprocess.PIPE
259
+ ).stdout.decode("utf-8")
260
+ return re.findall(r"Autonomi Node v([\d\.]+)", data)[0]
222
261
  except Exception as error:
223
262
  template = "In GAV - An exception of type {0} occurred. Arguments:\n{1!r}"
224
263
  message = template.format(type(error).__name__, error.args)
225
264
  logging.info(message)
226
265
  return 0
227
-
266
+
267
+
228
268
  # Determine how long this node has been around by looking at it's secret_key file
229
269
  def get_node_age(root_dir):
230
270
  try:
231
271
  return int(os.stat("{0}/secret-key".format(root_dir)).st_mtime)
232
272
  except:
233
273
  return 0
234
-
274
+
275
+
235
276
  # Survey nodes by reading metadata from metrics ports or binary --version
236
277
  def survey_anm_nodes(antnodes):
237
278
  # Build a list of node dictionaries to return
238
- details=[]
279
+ details = []
239
280
  # Iterate on nodes
240
281
  for node in antnodes:
241
282
  # Initialize a dict
242
- logging.debug("{0} surveying node {1} ".format(time.strftime("%Y-%m-%d %H:%M"),node))
243
- if not re.findall(r"antnode([\d]+).service",node):
244
- logging.info("can't decode "+str(node))
283
+ logging.debug(
284
+ "{0} surveying node {1} ".format(time.strftime("%Y-%m-%d %H:%M"), node)
285
+ )
286
+ if not re.findall(r"antnode([\d]+).service", node):
287
+ logging.info("can't decode " + str(node))
245
288
  continue
246
- card={"nodename":re.findall(r"antnode([\d]+).service",node)[0],
247
- "service": node,
248
- "timestamp": int(time.time()),
249
- "host": ANM_HOST or '127.0.0.1'
250
- }
289
+ card = {
290
+ "nodename": re.findall(r"antnode([\d]+).service", node)[0],
291
+ "service": node,
292
+ "timestamp": int(time.time()),
293
+ "host": ANM_HOST or "127.0.0.1",
294
+ }
251
295
  # Load what systemd has configured
252
296
  card.update(read_systemd_service(node))
253
- #print(json.dumps(card,indent=2))
297
+ # print(json.dumps(card,indent=2))
254
298
  # Read metadata from metrics_port
255
- metadata = read_node_metadata(card["host"],card["metrics_port"])
256
- #print(json.dumps(metadata,indent=2))
257
- if isinstance(metadata,dict) and \
258
- "status" in metadata and \
259
- metadata["status"]==RUNNING:
299
+ metadata = read_node_metadata(card["host"], card["metrics_port"])
300
+ # print(json.dumps(metadata,indent=2))
301
+ if (
302
+ isinstance(metadata, dict)
303
+ and "status" in metadata
304
+ and metadata["status"] == RUNNING
305
+ ):
260
306
  # soak up metadata
261
307
  card.update(metadata)
262
308
  # The ports up, so grab metrics too
263
- card.update(read_node_metrics(card["host"],card["metrics_port"]))
309
+ card.update(read_node_metrics(card["host"], card["metrics_port"]))
264
310
  # Else run binary to get version
265
311
  else:
266
312
  # If the root directory of the node is missing, it's a bad node
267
313
  if not os.path.isdir(card["root_dir"]):
268
- card["status"]=DEAD
269
- card["version"]=''
314
+ card["status"] = DEAD
315
+ card["version"] = ""
270
316
  else:
271
- card["status"]=STOPPED
272
- card["version"]=get_antnode_version(card["binary"])
273
- card["peer_id"]=''
274
- card["records"]=0
275
- card["uptime"]=0
276
- card["shunned"]=0
277
- card["age"]=get_node_age(card["root_dir"])
317
+ card["status"] = STOPPED
318
+ card["version"] = get_antnode_version(card["binary"])
319
+ card["peer_id"] = ""
320
+ card["records"] = 0
321
+ card["uptime"] = 0
322
+ card["shunned"] = 0
323
+ card["age"] = get_node_age(card["root_dir"])
278
324
  # harcoded for anm
279
- card["host"]=ANM_HOST
325
+ card["host"] = ANM_HOST
280
326
  # Append the node dict to the detail list
281
327
  details.append(card)
282
-
328
+
283
329
  return details
284
330
 
331
+
285
332
  # Survey server instance
286
333
  def survey_machine():
287
334
  # Make a bucket
288
- antnodes=[]
335
+ antnodes = []
289
336
  # For all service files
290
337
  for file in os.listdir("/etc/systemd/system"):
291
338
  # Find antnodes
292
- if re.match(r'antnode[\d]+\.service',file):
339
+ if re.match(r"antnode[\d]+\.service", file):
293
340
  antnodes.append(file)
294
- #if len(antnodes)>=5:
341
+ # if len(antnodes)>=5:
295
342
  # break
296
343
  # Iterate over defined nodes and get details
297
344
  # Ingests a list of service files and outputs a list of dictionaries
298
345
  return survey_anm_nodes(antnodes)
299
346
 
347
+
300
348
  # Read system status
301
- def get_machine_metrics(node_storage,remove_limit):
349
+ def get_machine_metrics(node_storage, remove_limit):
302
350
  metrics = {}
303
351
 
304
352
  with S() as session:
305
- db_nodes=session.execute(select(Node.status,Node.version)).all()
306
-
353
+ db_nodes = session.execute(select(Node.status, Node.version)).all()
354
+
307
355
  # Get some initial stats for comparing after a few seconds
308
356
  # We start these counters AFTER reading the database
309
- start_time=time.time()
310
- start_disk_counters=psutil.disk_io_counters()
311
- start_net_counters=psutil.net_io_counters()
357
+ start_time = time.time()
358
+ start_disk_counters = psutil.disk_io_counters()
359
+ start_net_counters = psutil.net_io_counters()
312
360
 
313
- metrics["TotalNodes"]=len(db_nodes)
361
+ metrics["TotalNodes"] = len(db_nodes)
314
362
  data = Counter(node[0] for node in db_nodes)
315
363
  metrics["RunningNodes"] = data[RUNNING]
316
364
  metrics["StoppedNodes"] = data[STOPPED]
@@ -319,62 +367,90 @@ def get_machine_metrics(node_storage,remove_limit):
319
367
  metrics["MigratingNodes"] = data[MIGRATING]
320
368
  metrics["RemovingNodes"] = data[REMOVING]
321
369
  metrics["DeadNodes"] = data[DEAD]
322
- metrics["antnode"]=shutil.which("antnode")
370
+ metrics["antnode"] = shutil.which("antnode")
323
371
  if not metrics["antnode"]:
324
372
  logging.warning("Unable to locate current antnode binary, exiting")
325
373
  sys.exit(1)
326
- metrics["AntNodeVersion"]=get_antnode_version(metrics["antnode"])
327
- metrics["NodesLatestV"]=sum(1 for node in db_nodes if node[1]==metrics["AntNodeVersion"]) or 0
328
- metrics["NodesNoVersion"]=sum(1 for node in db_nodes if not node[1]) or 0
329
- metrics["NodesToUpgrade"]=metrics["TotalNodes"] - metrics["NodesLatestV"] - metrics["NodesNoVersion"]
374
+ metrics["AntNodeVersion"] = get_antnode_version(metrics["antnode"])
375
+ metrics["NodesLatestV"] = (
376
+ sum(1 for node in db_nodes if node[1] == metrics["AntNodeVersion"]) or 0
377
+ )
378
+ metrics["NodesNoVersion"] = sum(1 for node in db_nodes if not node[1]) or 0
379
+ metrics["NodesToUpgrade"] = (
380
+ metrics["TotalNodes"] - metrics["NodesLatestV"] - metrics["NodesNoVersion"]
381
+ )
330
382
 
331
383
  # Windows has to build load average over 5 seconds. The first 5 seconds returns 0's
332
- # I don't plan on supporting windows, but if this get's modular, I don't want this
384
+ # I don't plan on supporting windows, but if this get's modular, I don't want this
333
385
  # issue to be skipped
334
- #if platform.system() == "Windows":
386
+ # if platform.system() == "Windows":
335
387
  # discard=psutil.getloadavg()
336
388
  # time.sleep(5)
337
- metrics["LoadAverage1"],metrics["LoadAverage5"],metrics["LoadAverage15"]=psutil.getloadavg()
389
+ metrics["LoadAverage1"], metrics["LoadAverage5"], metrics["LoadAverage15"] = (
390
+ psutil.getloadavg()
391
+ )
338
392
  # Get CPU Metrics over 1 second
339
- metrics["IdleCpuPercent"],metrics["IOWait"] = psutil.cpu_times_percent(1)[3:5]
393
+ metrics["IdleCpuPercent"], metrics["IOWait"] = psutil.cpu_times_percent(1)[3:5]
340
394
  # Really we returned Idle percent, subtract from 100 to get used.
341
395
  metrics["UsedCpuPercent"] = 100 - metrics["IdleCpuPercent"]
342
- data=psutil.virtual_memory()
343
- #print(data)
344
- metrics["UsedMemPercent"]=data.percent
345
- metrics["FreeMemPercent"]=100-metrics["UsedMemPercent"]
346
- data=psutil.disk_io_counters()
396
+ data = psutil.virtual_memory()
397
+ # print(data)
398
+ metrics["UsedMemPercent"] = data.percent
399
+ metrics["FreeMemPercent"] = 100 - metrics["UsedMemPercent"]
400
+ data = psutil.disk_io_counters()
347
401
  # This only checks the drive mapped to the first node and will need to be updated
348
402
  # when we eventually support multiple drives
349
- data=psutil.disk_usage(node_storage)
350
- metrics["UsedHDPercent"]=data.percent
351
- metrics["TotalHDBytes"]=data.total
352
- end_time=time.time()
353
- end_disk_counters=psutil.disk_io_counters()
354
- end_net_counters=psutil.net_io_counters()
355
- metrics["HDWriteBytes"]=int((end_disk_counters.write_bytes-start_disk_counters.write_bytes)/(end_time-start_time))
356
- metrics["HDReadBytes"]=int((end_disk_counters.read_bytes-start_disk_counters.read_bytes)/(end_time-start_time))
357
- metrics["NetWriteBytes"]=int((end_net_counters.bytes_sent-start_net_counters.bytes_sent)/(end_time-start_time))
358
- metrics["NetReadBytes"]=int((end_net_counters.bytes_recv-start_net_counters.bytes_recv)/(end_time-start_time))
359
- #print (json.dumps(metrics,indent=2))
403
+ data = psutil.disk_usage(node_storage)
404
+ metrics["UsedHDPercent"] = data.percent
405
+ metrics["TotalHDBytes"] = data.total
406
+ end_time = time.time()
407
+ end_disk_counters = psutil.disk_io_counters()
408
+ end_net_counters = psutil.net_io_counters()
409
+ metrics["HDWriteBytes"] = int(
410
+ (end_disk_counters.write_bytes - start_disk_counters.write_bytes)
411
+ / (end_time - start_time)
412
+ )
413
+ metrics["HDReadBytes"] = int(
414
+ (end_disk_counters.read_bytes - start_disk_counters.read_bytes)
415
+ / (end_time - start_time)
416
+ )
417
+ metrics["NetWriteBytes"] = int(
418
+ (end_net_counters.bytes_sent - start_net_counters.bytes_sent)
419
+ / (end_time - start_time)
420
+ )
421
+ metrics["NetReadBytes"] = int(
422
+ (end_net_counters.bytes_recv - start_net_counters.bytes_recv)
423
+ / (end_time - start_time)
424
+ )
425
+ # print (json.dumps(metrics,indent=2))
360
426
  # How close (out of 100) to removal limit will we be with a max bytes per node (2GB default)
361
427
  # For running nodes with Porpoise(tm).
362
- metrics["NodeHDCrisis"]=int((((metrics["TotalNodes"])*CRISIS_BYTES)/(metrics["TotalHDBytes"]*(remove_limit/100)))*100)
428
+ metrics["NodeHDCrisis"] = int(
429
+ (
430
+ ((metrics["TotalNodes"]) * CRISIS_BYTES)
431
+ / (metrics["TotalHDBytes"] * (remove_limit / 100))
432
+ )
433
+ * 100
434
+ )
363
435
  return metrics
364
436
 
437
+
365
438
  # Update node with metrics result
366
- def update_node_from_metrics(id,metrics,metadata):
439
+ def update_node_from_metrics(id, metrics, metadata):
367
440
  try:
368
441
  # We check the binary version in other code, so lets stop clobbering it when a node is stopped
369
- card={'status': metrics["status"], 'timestamp': int(time.time()),
370
- 'uptime': metrics["uptime"], 'records': metrics["records"],
371
- 'shunned': metrics["shunned"],
372
- 'peer_id': metadata["peer_id"]}
442
+ card = {
443
+ "status": metrics["status"],
444
+ "timestamp": int(time.time()),
445
+ "uptime": metrics["uptime"],
446
+ "records": metrics["records"],
447
+ "shunned": metrics["shunned"],
448
+ "peer_id": metadata["peer_id"],
449
+ }
373
450
  if "version" in metadata:
374
- card['version']=metadata["version"]
451
+ card["version"] = metadata["version"]
375
452
  with S() as session:
376
- session.query(Node).filter(Node.id == id).\
377
- update(card)
453
+ session.query(Node).filter(Node.id == id).update(card)
378
454
  session.commit()
379
455
  except Exception as error:
380
456
  template = "In UNFM - An exception of type {0} occurred. Arguments:\n{1!r}"
@@ -383,259 +459,315 @@ def update_node_from_metrics(id,metrics,metadata):
383
459
  return False
384
460
  else:
385
461
  return True
386
-
462
+
463
+
387
464
  # Set Node status
388
- def set_node_status(id,status):
389
- logging.info("Setting node status: {0} {1}".format(id,status))
465
+ def set_node_status(id, status):
466
+ logging.info("Setting node status: {0} {1}".format(id, status))
390
467
  try:
391
468
  with S() as session:
392
- session.query(Node).filter(Node.id == id).\
393
- update({'status': status, 'timestamp': int(time.time())})
469
+ session.query(Node).filter(Node.id == id).update(
470
+ {"status": status, "timestamp": int(time.time())}
471
+ )
394
472
  session.commit()
395
473
  except:
396
474
  return False
397
475
  else:
398
476
  return True
399
477
 
478
+
400
479
  # Update metrics after checking counters
401
- def update_counters(old,config):
480
+ def update_counters(old, config):
402
481
  # Are we already removing a node
403
482
  if old["RemovingNodes"]:
404
483
  with S() as session:
405
- removals=session.execute(select(Node.timestamp,Node.id)\
406
- .where(Node.status == REMOVING)\
407
- .order_by(Node.timestamp.asc())).all()
484
+ removals = session.execute(
485
+ select(Node.timestamp, Node.id)
486
+ .where(Node.status == REMOVING)
487
+ .order_by(Node.timestamp.asc())
488
+ ).all()
408
489
  # Iterate through active removals
409
490
  records_to_remove = len(removals)
410
491
  for check in removals:
411
492
  # If the DelayRemove timer has expired, delete the entry
412
- if isinstance(check[0],int) and \
413
- check[0] < (int(time.time()) - (config["DelayRemove"]*60)):
414
- logging.info("Deleting removed node "+str(check[1]))
493
+ if isinstance(check[0], int) and check[0] < (
494
+ int(time.time()) - (config["DelayRemove"] * 60)
495
+ ):
496
+ logging.info("Deleting removed node " + str(check[1]))
415
497
  with S() as session:
416
- session.execute(delete(Node).where(Node.id==check[1]))
498
+ session.execute(delete(Node).where(Node.id == check[1]))
417
499
  session.commit()
418
- records_to_remove-=1
419
- old["RemovingNodes"]=records_to_remove
500
+ records_to_remove -= 1
501
+ old["RemovingNodes"] = records_to_remove
420
502
  # Are we already upgrading a node
421
503
  if old["UpgradingNodes"]:
422
504
  with S() as session:
423
- upgrades=session.execute(select(Node.timestamp,Node.id,Node.host,Node.metrics_port)\
424
- .where(Node.status == UPGRADING)\
425
- .order_by(Node.timestamp.asc())).all()
505
+ upgrades = session.execute(
506
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port)
507
+ .where(Node.status == UPGRADING)
508
+ .order_by(Node.timestamp.asc())
509
+ ).all()
426
510
  # Iterate through active upgrades
427
511
  records_to_upgrade = len(upgrades)
428
512
  for check in upgrades:
429
513
  # If the DelayUpgrade timer has expired, check on status
430
- if isinstance(check[0],int) and \
431
- check[0] < (int(time.time()) - (config["DelayUpgrade"]*60)):
432
- logging.info("Updating upgraded node "+str(check[1]))
433
- node_metrics=read_node_metrics(check[2],check[3])
434
- node_metadata=read_node_metadata(check[2],check[3])
514
+ if isinstance(check[0], int) and check[0] < (
515
+ int(time.time()) - (config["DelayUpgrade"] * 60)
516
+ ):
517
+ logging.info("Updating upgraded node " + str(check[1]))
518
+ node_metrics = read_node_metrics(check[2], check[3])
519
+ node_metadata = read_node_metadata(check[2], check[3])
435
520
  if node_metrics and node_metadata:
436
- update_node_from_metrics(check[1],node_metrics,node_metadata)
437
- records_to_upgrade-=1
438
- old["UpgradingNodes"]=records_to_upgrade
521
+ update_node_from_metrics(check[1], node_metrics, node_metadata)
522
+ records_to_upgrade -= 1
523
+ old["UpgradingNodes"] = records_to_upgrade
439
524
  # Are we already restarting a node
440
525
  if old["RestartingNodes"]:
441
526
  with S() as session:
442
- restarts=session.execute(select(Node.timestamp,Node.id,Node.host,Node.metrics_port)\
443
- .where(Node.status == RESTARTING)\
444
- .order_by(Node.timestamp.asc())).all()
527
+ restarts = session.execute(
528
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port)
529
+ .where(Node.status == RESTARTING)
530
+ .order_by(Node.timestamp.asc())
531
+ ).all()
445
532
  # Iterate through active upgrades
446
533
  records_to_restart = len(restarts)
447
534
  for check in restarts:
448
535
  # If the DelayUpgrade timer has expired, check on status
449
- if isinstance(check[0],int) and \
450
- check[0] < (int(time.time()) - (config["DelayStart"]*60)):
451
- logging.info("Updating restarted node "+str(check[1]))
452
- node_metrics=read_node_metrics(check[2],check[3])
453
- node_metadata=read_node_metadata(check[2],check[3])
536
+ if isinstance(check[0], int) and check[0] < (
537
+ int(time.time()) - (config["DelayStart"] * 60)
538
+ ):
539
+ logging.info("Updating restarted node " + str(check[1]))
540
+ node_metrics = read_node_metrics(check[2], check[3])
541
+ node_metadata = read_node_metadata(check[2], check[3])
454
542
  if node_metrics and node_metadata:
455
- update_node_from_metrics(check[1],node_metrics,node_metadata)
456
- records_to_restart-=1
457
- old["RestartingNodes"]=records_to_restart
458
- return(old)
543
+ update_node_from_metrics(check[1], node_metrics, node_metadata)
544
+ records_to_restart -= 1
545
+ old["RestartingNodes"] = records_to_restart
546
+ return old
547
+
459
548
 
460
549
  # Enable firewall for port
461
- def enable_firewall(port,node):
550
+ def enable_firewall(port, node):
462
551
  logging.info("enable firewall port {0}/udp".format(port))
463
552
  # Close ufw firewall
464
553
  try:
465
- subprocess.run(['sudo','ufw','allow',"{0}/udp".format(port),'comment',node], stdout=subprocess.PIPE)
554
+ subprocess.run(
555
+ ["sudo", "ufw", "allow", "{0}/udp".format(port), "comment", node],
556
+ stdout=subprocess.PIPE,
557
+ )
466
558
  except subprocess.CalledProcessError as err:
467
- logging.error( 'EF Error:', err )
559
+ logging.error("EF Error:", err)
560
+
468
561
 
469
562
  # Disable firewall for port
470
563
  def disable_firewall(port):
471
564
  logging.info("disable firewall port {0}/udp".format(port))
472
565
  # Close ufw firewall
473
566
  try:
474
- subprocess.run(['sudo','ufw','delete','allow',"{0}/udp".format(port)], stdout=subprocess.PIPE)
567
+ subprocess.run(
568
+ ["sudo", "ufw", "delete", "allow", "{0}/udp".format(port)],
569
+ stdout=subprocess.PIPE,
570
+ )
475
571
  except subprocess.CalledProcessError as err:
476
- logging.error( 'DF ERROR:', err )
572
+ logging.error("DF ERROR:", err)
573
+
477
574
 
478
575
  # Start a systemd node
479
576
  def start_systemd_node(node):
480
- logging.info("Starting node "+str(node.id))
577
+ logging.info("Starting node " + str(node.id))
481
578
  # Try to start the service
482
579
  try:
483
- p = subprocess.run(['sudo', 'systemctl', 'start', node.service], stdout=subprocess.PIPE,stderr=subprocess.STDOUT).stdout.decode('utf-8')
484
- if re.match(r'Failed to start',p):
485
- logging.error( 'SSN2 ERROR:', p )
580
+ p = subprocess.run(
581
+ ["sudo", "systemctl", "start", node.service],
582
+ stdout=subprocess.PIPE,
583
+ stderr=subprocess.STDOUT,
584
+ ).stdout.decode("utf-8")
585
+ if re.match(r"Failed to start", p):
586
+ logging.error("SSN2 ERROR:", p)
486
587
  return False
487
588
  except subprocess.CalledProcessError as err:
488
- logging.error( 'SSN1 ERROR:', err )
489
- return False
589
+ logging.error("SSN1 ERROR:", err)
590
+ return False
490
591
  # Open a firewall hole for the data port
491
- enable_firewall(node.port,node.service)
592
+ enable_firewall(node.port, node.service)
492
593
  # Update node status
493
- set_node_status(node.id,RESTARTING)
594
+ set_node_status(node.id, RESTARTING)
494
595
  return True
495
596
 
597
+
496
598
  # Stop a systemd node
497
599
  def stop_systemd_node(node):
498
- logging.info("Stopping node: "+node.service)
600
+ logging.info("Stopping node: " + node.service)
499
601
  # Send a stop signal to the process
500
602
  try:
501
- subprocess.run(['sudo', 'systemctl', 'stop', node.service], stdout=subprocess.PIPE)
603
+ subprocess.run(
604
+ ["sudo", "systemctl", "stop", node.service], stdout=subprocess.PIPE
605
+ )
502
606
  except subprocess.CalledProcessError as err:
503
- logging.error( 'SSN2 ERROR:', err )
607
+ logging.error("SSN2 ERROR:", err)
504
608
  disable_firewall(node.port)
505
- set_node_status(node.id,STOPPED)
609
+ set_node_status(node.id, STOPPED)
506
610
 
507
611
  return True
508
612
 
613
+
509
614
  # Upgrade a node
510
- def upgrade_node(node,metrics):
511
- logging.info("Upgrading node "+str(node.id))
615
+ def upgrade_node(node, metrics):
616
+ logging.info("Upgrading node " + str(node.id))
512
617
  # Copy current node binary
513
618
  try:
514
- subprocess.run(['sudo', 'cp', '-f', metrics["antnode"], node.binary])
619
+ subprocess.run(["sudo", "cp", "-f", metrics["antnode"], node.binary])
515
620
  except subprocess.CalledProcessError as err:
516
- logging.error( 'UN1 ERROR:', err )
621
+ logging.error("UN1 ERROR:", err)
517
622
  try:
518
- subprocess.run(['sudo', 'systemctl', 'restart', node.service])
623
+ subprocess.run(["sudo", "systemctl", "restart", node.service])
519
624
  except subprocess.CalledProcessError as err:
520
- logging.error( 'UN2 ERROR:', err )
521
- version=get_antnode_version(node.binary)
625
+ logging.error("UN2 ERROR:", err)
626
+ version = get_antnode_version(node.binary)
522
627
  try:
523
628
  with S() as session:
524
- session.query(Node).filter(Node.id == node.id).\
525
- update({'status': UPGRADING, 'timestamp': int(time.time()),
526
- 'version': metrics["AntNodeVersion"]})
629
+ session.query(Node).filter(Node.id == node.id).update(
630
+ {
631
+ "status": UPGRADING,
632
+ "timestamp": int(time.time()),
633
+ "version": metrics["AntNodeVersion"],
634
+ }
635
+ )
527
636
  session.commit()
528
637
  except:
529
638
  return False
530
639
  else:
531
640
  return True
532
641
 
642
+
533
643
  # Remove a node
534
644
  def remove_node(id):
535
- logging.info("Removing node "+str(id))
645
+ logging.info("Removing node " + str(id))
536
646
 
537
647
  with S() as session:
538
648
  node = session.execute(select(Node).where(Node.id == id)).first()
539
649
  # Grab Node from Row
540
- node=node[0]
650
+ node = node[0]
541
651
  if stop_systemd_node(node):
542
652
  # Mark this node as REMOVING
543
- set_node_status(id,REMOVING)
653
+ set_node_status(id, REMOVING)
544
654
 
545
- nodename=f"antnode{node.nodename}"
655
+ nodename = f"antnode{node.nodename}"
546
656
  # Remove node data and log
547
657
  try:
548
- subprocess.run(['sudo', 'rm', '-rf', node.root_dir, f"/var/log/antnode/{nodename}"])
658
+ subprocess.run(
659
+ ["sudo", "rm", "-rf", node.root_dir, f"/var/log/antnode/{nodename}"]
660
+ )
549
661
  except subprocess.CalledProcessError as err:
550
- logging.error( 'RN1 ERROR:', err )
662
+ logging.error("RN1 ERROR:", err)
551
663
  # Remove systemd service file
552
664
  try:
553
- subprocess.run(['sudo', 'rm', '-f', f"/etc/systemd/system/{node.service}"])
665
+ subprocess.run(["sudo", "rm", "-f", f"/etc/systemd/system/{node.service}"])
554
666
  except subprocess.CalledProcessError as err:
555
- logging.error( 'RN2 ERROR:', err )
556
- # Tell system to reload systemd files
667
+ logging.error("RN2 ERROR:", err)
668
+ # Tell system to reload systemd files
557
669
  try:
558
- subprocess.run(['sudo', 'systemctl', 'daemon-reload'])
670
+ subprocess.run(["sudo", "systemctl", "daemon-reload"])
559
671
  except subprocess.CalledProcessError as err:
560
- logging.error( 'RN3 ERROR:', err )
561
- #print(json.dumps(node,indent=2))
672
+ logging.error("RN3 ERROR:", err)
673
+ # print(json.dumps(node,indent=2))
674
+
562
675
 
563
676
  # Rescan nodes for status
564
677
  def update_nodes():
565
678
  with S() as session:
566
- nodes=session.execute(select(Node.timestamp,Node.id,Node.host,Node.metrics_port,Node.status)\
567
- .where(Node.status != DISABLED)\
568
- .order_by(Node.timestamp.asc())).all()
679
+ nodes = session.execute(
680
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port, Node.status)
681
+ .where(Node.status != DISABLED)
682
+ .order_by(Node.timestamp.asc())
683
+ ).all()
569
684
  # Iterate through all records
570
685
  for check in nodes:
571
686
  # Check on status
572
- if isinstance(check[0],int):
573
- logging.debug("Updating info on node "+str(check[1]))
574
- node_metrics=read_node_metrics(check[2],check[3])
575
- node_metadata=read_node_metadata(check[2],check[3])
687
+ if isinstance(check[0], int):
688
+ logging.debug("Updating info on node " + str(check[1]))
689
+ node_metrics = read_node_metrics(check[2], check[3])
690
+ node_metadata = read_node_metadata(check[2], check[3])
576
691
  if node_metrics and node_metadata:
577
692
  # Don't write updates for stopped nodes that are already marked as stopped
578
- if node_metadata["status"]==STOPPED and check[4]==STOPPED:
693
+ if node_metadata["status"] == STOPPED and check[4] == STOPPED:
579
694
  continue
580
- update_node_from_metrics(check[1],node_metrics,node_metadata)
581
-
695
+ update_node_from_metrics(check[1], node_metrics, node_metadata)
696
+
697
+
582
698
  # Create a new node
583
- def create_node(config,metrics):
699
+ def create_node(config, metrics):
584
700
  logging.info("Creating new node")
585
701
  # Create a holding place for the new node
586
702
  card = {}
587
703
  # Find the next available node number by first looking for holes
588
- sql = text('select n1.id + 1 as id from node n1 ' + \
589
- 'left join node n2 on n2.id = n1.id + 1 ' + \
590
- 'where n2.id is null ' + \
591
- 'and n1.id <> (select max(id) from node) ' + \
592
- 'order by n1.id;')
704
+ sql = text(
705
+ "select n1.id + 1 as id from node n1 "
706
+ + "left join node n2 on n2.id = n1.id + 1 "
707
+ + "where n2.id is null "
708
+ + "and n1.id <> (select max(id) from node) "
709
+ + "order by n1.id;"
710
+ )
593
711
  with S() as session:
594
712
  result = session.execute(sql).first()
595
713
  if result:
596
- card['id']=result[0]
714
+ card["id"] = result[0]
597
715
  # Otherwise get the max node number and add 1
598
716
  else:
599
717
  with S() as session:
600
718
  result = session.execute(select(Node.id).order_by(Node.id.desc())).first()
601
- card['id']=result[0]+1
719
+ card["id"] = result[0] + 1
602
720
  # Set the node name
603
- card['nodename']=f'{card['id']:04}'
604
- card['service']=f'antnode{card['nodename']}.service'
605
- card['user']='ant'
606
- card['version']=metrics["AntNodeVersion"]
607
- card['root_dir']=f"{config['NodeStorage']}/antnode{card['nodename']}"
608
- card['binary']=f"{card['root_dir']}/antnode"
609
- card['port']=config["PortStart"]*1000+card['id']
610
- card['metrics_port']=13*1000+card['id']
611
- card['network']='evm-arbitrum-one'
612
- card['wallet']=config["RewardsAddress"]
613
- card['peer_id']=''
614
- card['status']=STOPPED
615
- card['timestamp']=int(time.time())
616
- card['records']=0
617
- card['uptime']=0
618
- card['shunned']=0
619
- card['age']=card['timestamp']
620
- card['host']=ANM_HOST
621
- log_dir=f"/var/log/antnode/antnode{card['nodename']}"
721
+ card["nodename"] = f"{card['id']:04}"
722
+ card["service"] = f"antnode{card['nodename']}.service"
723
+ card["user"] = "ant"
724
+ card["version"] = metrics["AntNodeVersion"]
725
+ card["root_dir"] = f"{config['NodeStorage']}/antnode{card['nodename']}"
726
+ card["binary"] = f"{card['root_dir']}/antnode"
727
+ card["port"] = config["PortStart"] * 1000 + card["id"]
728
+ card["metrics_port"] = 13 * 1000 + card["id"]
729
+ card["network"] = "evm-arbitrum-one"
730
+ card["wallet"] = config["RewardsAddress"]
731
+ card["peer_id"] = ""
732
+ card["status"] = STOPPED
733
+ card["timestamp"] = int(time.time())
734
+ card["records"] = 0
735
+ card["uptime"] = 0
736
+ card["shunned"] = 0
737
+ card["age"] = card["timestamp"]
738
+ card["host"] = ANM_HOST
739
+ log_dir = f"/var/log/antnode/antnode{card['nodename']}"
622
740
  # Create the node directory and log directory
623
741
  try:
624
- subprocess.run(['sudo','mkdir','-p',card["root_dir"],log_dir], stdout=subprocess.PIPE)
742
+ subprocess.run(
743
+ ["sudo", "mkdir", "-p", card["root_dir"], log_dir], stdout=subprocess.PIPE
744
+ )
625
745
  except subprocess.CalledProcessError as err:
626
- logging.error( 'CN1 ERROR:', err )
746
+ logging.error("CN1 ERROR:", err)
627
747
  # Copy the binary to the node directory
628
748
  try:
629
- subprocess.run(['sudo','cp',metrics["antnode"],card["root_dir"]], stdout=subprocess.PIPE)
749
+ subprocess.run(
750
+ ["sudo", "cp", metrics["antnode"], card["root_dir"]], stdout=subprocess.PIPE
751
+ )
630
752
  except subprocess.CalledProcessError as err:
631
- logging.error( 'CN2 ERROR:', err )
753
+ logging.error("CN2 ERROR:", err)
632
754
  # Change owner of the node directory and log directories
633
755
  try:
634
- subprocess.run(['sudo','chown','-R',f'{card["user"]}:{card["user"]}',card["root_dir"],log_dir], stdout=subprocess.PIPE)
756
+ subprocess.run(
757
+ [
758
+ "sudo",
759
+ "chown",
760
+ "-R",
761
+ f'{card["user"]}:{card["user"]}',
762
+ card["root_dir"],
763
+ log_dir,
764
+ ],
765
+ stdout=subprocess.PIPE,
766
+ )
635
767
  except subprocess.CalledProcessError as err:
636
- logging.error( 'CN3 ERROR:', err )
768
+ logging.error("CN3 ERROR:", err)
637
769
  # build the systemd service unit
638
- service=f"""[Unit]
770
+ service = f"""[Unit]
639
771
  Description=antnode{card['nodename']}
640
772
  [Service]
641
773
  User={card['user']}
@@ -645,130 +777,178 @@ Restart=always
645
777
  """
646
778
  # Write the systemd service unit with sudo tee since we're running as not root
647
779
  try:
648
- subprocess.run(['sudo','tee',f'/etc/systemd/system/{card["service"]}'],input=service,text=True, stdout=subprocess.PIPE)
780
+ subprocess.run(
781
+ ["sudo", "tee", f'/etc/systemd/system/{card["service"]}'],
782
+ input=service,
783
+ text=True,
784
+ stdout=subprocess.PIPE,
785
+ )
649
786
  except subprocess.CalledProcessError as err:
650
- logging.error( 'CN4 ERROR:', err )
787
+ logging.error("CN4 ERROR:", err)
651
788
  # Reload systemd service files to get our new one
652
789
  try:
653
- subprocess.run(['sudo','systemctl','daemon-reload'], stdout=subprocess.PIPE)
790
+ subprocess.run(["sudo", "systemctl", "daemon-reload"], stdout=subprocess.PIPE)
654
791
  except subprocess.CalledProcessError as err:
655
- logging.error( 'CN5 ERROR:', err )
792
+ logging.error("CN5 ERROR:", err)
656
793
  # Add the new node to the database
657
794
  with S() as session:
658
- session.execute(
659
- insert(Node),[card]
660
- )
795
+ session.execute(insert(Node), [card])
661
796
  session.commit()
662
797
  # Now we grab the node object from the database to pass to start node
663
798
  with S() as session:
664
- card=session.execute(select(Node).where(Node.id == card['id'])).first()
799
+ card = session.execute(select(Node).where(Node.id == card["id"])).first()
665
800
  # Get the Node object from the Row
666
- card=card[0]
801
+ card = card[0]
667
802
  # Start the new node
668
803
  return start_systemd_node(card)
669
- #print(json.dumps(card,indent=2))
804
+ # print(json.dumps(card,indent=2))
670
805
  return True
671
-
806
+
672
807
 
673
808
  # Make a decision about what to do
674
- def choose_action(config,metrics,db_nodes):
809
+ def choose_action(config, metrics, db_nodes):
675
810
  # Gather knowlege
676
- features={}
677
- features["AllowCpu"]=metrics["UsedCpuPercent"] < config["CpuLessThan"]
678
- features["AllowMem"]=metrics["UsedMemPercent"] < config["MemLessThan"]
679
- features["AllowHD"]=metrics["UsedHDPercent"] < config["HDLessThan"]
680
- features["RemCpu"]=metrics["UsedCpuPercent"] > config["CpuRemove"]
681
- features["RemMem"]=metrics["UsedMemPercent"] > config["MemRemove"]
682
- features["RemHD"]=metrics["UsedHDPercent"] > config["HDRemove"]
683
- features["AllowNodeCap"]=metrics["RunningNodes"] < config["NodeCap"]
811
+ features = {}
812
+ features["AllowCpu"] = metrics["UsedCpuPercent"] < config["CpuLessThan"]
813
+ features["AllowMem"] = metrics["UsedMemPercent"] < config["MemLessThan"]
814
+ features["AllowHD"] = metrics["UsedHDPercent"] < config["HDLessThan"]
815
+ features["RemCpu"] = metrics["UsedCpuPercent"] > config["CpuRemove"]
816
+ features["RemMem"] = metrics["UsedMemPercent"] > config["MemRemove"]
817
+ features["RemHD"] = metrics["UsedHDPercent"] > config["HDRemove"]
818
+ features["AllowNodeCap"] = metrics["RunningNodes"] < config["NodeCap"]
684
819
  # These are new features, so ignore them if not configured
685
- if (config["NetIOReadLessThan"]+config["NetIOReadRemove"]+
686
- config["NetIOWriteLessThan"]+config["NetIOWriteRemove"]>1):
687
- features["AllowNetIO"]=metrics["NetReadBytes"] < config["NetIOReadLessThan"] and \
688
- metrics["NetWriteBytes"] < config["NetIOWriteLessThan"]
689
- features["RemoveNetIO"]=metrics["NetReadBytes"] > config["NetIORemove"] or \
690
- metrics["NetWriteBytes"] > config["NetIORemove"]
820
+ if (
821
+ config["NetIOReadLessThan"]
822
+ + config["NetIOReadRemove"]
823
+ + config["NetIOWriteLessThan"]
824
+ + config["NetIOWriteRemove"]
825
+ > 1
826
+ ):
827
+ features["AllowNetIO"] = (
828
+ metrics["NetReadBytes"] < config["NetIOReadLessThan"]
829
+ and metrics["NetWriteBytes"] < config["NetIOWriteLessThan"]
830
+ )
831
+ features["RemoveNetIO"] = (
832
+ metrics["NetReadBytes"] > config["NetIORemove"]
833
+ or metrics["NetWriteBytes"] > config["NetIORemove"]
834
+ )
691
835
  else:
692
- features["AllowNetIO"]=True
693
- features["RemoveNetIO"]=False
694
- if (config["HDIOReadLessThan"]+config["HDIOReadRemove"]+
695
- config["HDIOWriteLessThan"]+config["HDIOWriteRemove"]>1):
696
- features["AllowHDIO"]=metrics["HDReadBytes"] < config["HDIOReadLessThan"] and \
697
- metrics["HDWriteBytes"] < config["HDIOWriteLessThan"]
698
- features["RemoveHDIO"]=metrics["HDReadBytes"] > config["HDIORemove"] or \
699
- metrics["HDWriteBytes"] > config["HDtIORemove"]
836
+ features["AllowNetIO"] = True
837
+ features["RemoveNetIO"] = False
838
+ if (
839
+ config["HDIOReadLessThan"]
840
+ + config["HDIOReadRemove"]
841
+ + config["HDIOWriteLessThan"]
842
+ + config["HDIOWriteRemove"]
843
+ > 1
844
+ ):
845
+ features["AllowHDIO"] = (
846
+ metrics["HDReadBytes"] < config["HDIOReadLessThan"]
847
+ and metrics["HDWriteBytes"] < config["HDIOWriteLessThan"]
848
+ )
849
+ features["RemoveHDIO"] = (
850
+ metrics["HDReadBytes"] > config["HDIORemove"]
851
+ or metrics["HDWriteBytes"] > config["HDtIORemove"]
852
+ )
700
853
  else:
701
- features["AllowHDIO"]=True
702
- features["RemoveHDIO"]=False
703
- features["LoadAllow"] = metrics["LoadAverage1"] < config["DesiredLoadAverage"] and \
704
- metrics["LoadAverage5"] < config["DesiredLoadAverage"] and \
705
- metrics["LoadAverage15"] < config["DesiredLoadAverage"]
706
- features["LoadNotAllow"] = metrics["LoadAverage1"] > config["MaxLoadAverageAllowed"] or \
707
- metrics["LoadAverage5"] > config["MaxLoadAverageAllowed"] or \
708
- metrics["LoadAverage15"] > config["MaxLoadAverageAllowed"]
854
+ features["AllowHDIO"] = True
855
+ features["RemoveHDIO"] = False
856
+ features["LoadAllow"] = (
857
+ metrics["LoadAverage1"] < config["DesiredLoadAverage"]
858
+ and metrics["LoadAverage5"] < config["DesiredLoadAverage"]
859
+ and metrics["LoadAverage15"] < config["DesiredLoadAverage"]
860
+ )
861
+ features["LoadNotAllow"] = (
862
+ metrics["LoadAverage1"] > config["MaxLoadAverageAllowed"]
863
+ or metrics["LoadAverage5"] > config["MaxLoadAverageAllowed"]
864
+ or metrics["LoadAverage15"] > config["MaxLoadAverageAllowed"]
865
+ )
709
866
  # Check records for expired status
710
- metrics=update_counters(metrics,config)
867
+ metrics = update_counters(metrics, config)
711
868
  # If we have other thing going on, don't add more nodes
712
- features["AddNewNode"]=sum([ metrics.get(m, 0) \
713
- for m in ['UpgradingNodes',
714
- 'RestartingNodes','MigratingNodes',
715
- 'RemovingNodes'] ]) == 0 and \
716
- features["AllowCpu"] and features["AllowHD"] and \
717
- features["AllowMem"] and features["AllowNodeCap"] and \
718
- features["AllowHDIO"] and features["AllowNetIO"] and \
719
- features["LoadAllow"]
869
+ features["AddNewNode"] = (
870
+ sum(
871
+ [
872
+ metrics.get(m, 0)
873
+ for m in [
874
+ "UpgradingNodes",
875
+ "RestartingNodes",
876
+ "MigratingNodes",
877
+ "RemovingNodes",
878
+ ]
879
+ ]
880
+ )
881
+ == 0
882
+ and features["AllowCpu"]
883
+ and features["AllowHD"]
884
+ and features["AllowMem"]
885
+ and features["AllowNodeCap"]
886
+ and features["AllowHDIO"]
887
+ and features["AllowNetIO"]
888
+ and features["LoadAllow"]
889
+ and metrics["TotalNodes"] < config["NodeCap"]
890
+ )
720
891
  # Are we overlimit on nodes
721
- features["Remove"] =features["LoadNotAllow"] or features["RemCpu"] or \
722
- features["RemHD"] or features["RemMem"] or \
723
- features["RemoveHDIO"] or features["RemoveNetIO"] or \
724
- metrics["TotalNodes"] > config["NodeCap"]
892
+ features["Remove"] = (
893
+ features["LoadNotAllow"]
894
+ or features["RemCpu"]
895
+ or features["RemHD"]
896
+ or features["RemMem"]
897
+ or features["RemoveHDIO"]
898
+ or features["RemoveNetIO"]
899
+ or metrics["TotalNodes"] > config["NodeCap"]
900
+ )
725
901
  # If we have nodes to upgrade
726
902
  if metrics["NodesToUpgrade"] >= 1:
727
903
  # Make sure current version is equal or newer than version on first node.
728
904
  if Version(metrics["AntNodeVersion"]) < Version(db_nodes[0][1]):
729
905
  logging.warning("node upgrade cancelled due to lower version")
730
- features["Upgrade"]=False
906
+ features["Upgrade"] = False
731
907
  else:
732
908
  if features["Remove"]:
733
909
  logging.info("Can't upgrade while removing is required")
734
- features["Upgrade"]=False
910
+ features["Upgrade"] = False
735
911
  else:
736
- features["Upgrade"]=True
912
+ features["Upgrade"] = True
737
913
  else:
738
- features["Upgrade"]=False
739
-
914
+ features["Upgrade"] = False
740
915
 
741
- logging.info(json.dumps(features,indent=2))
916
+ logging.info(json.dumps(features, indent=2))
742
917
  ##### Decisions
743
918
 
744
919
  # Actually, removing DEAD nodes take priority
745
920
  if metrics["DeadNodes"] > 1:
746
921
  with S() as session:
747
- broken=session.execute(select(Node.timestamp,Node.id,Node.host,Node.metrics_port)\
748
- .where(Node.status == DEAD)\
749
- .order_by(Node.timestamp.asc())).all()
922
+ broken = session.execute(
923
+ select(Node.timestamp, Node.id, Node.host, Node.metrics_port)
924
+ .where(Node.status == DEAD)
925
+ .order_by(Node.timestamp.asc())
926
+ ).all()
750
927
  # Iterate through dead nodes and remove them all
751
928
  for check in broken:
752
929
  # Remove broken nodes
753
- logging.info("Removing dead node "+str(check[1]))
930
+ logging.info("Removing dead node " + str(check[1]))
754
931
  remove_node(check[1])
755
932
  return {"status": "removed-dead-nodes"}
756
933
  # If we have nodes with no version number, update from binary
757
934
  if metrics["NodesNoVersion"] > 1:
758
935
  with S() as session:
759
- no_version=session.execute(select(Node.timestamp,Node.id,Node.binary)\
760
- .where(Node.version == '')\
761
- .order_by(Node.timestamp.asc())).all()
936
+ no_version = session.execute(
937
+ select(Node.timestamp, Node.id, Node.binary)
938
+ .where(Node.version == "")
939
+ .order_by(Node.timestamp.asc())
940
+ ).all()
762
941
  # Iterate through nodes with no version number
763
942
  for check in no_version:
764
943
  # Update version number from binary
765
- version=get_antnode_version(check[2])
944
+ version = get_antnode_version(check[2])
766
945
  logging.info(f"Updating version number for node {check[1]} to {version}")
767
946
  with S() as session:
768
- session.query(Node).filter(Node.id == check[1]).\
769
- update({'version': version})
947
+ session.query(Node).filter(Node.id == check[1]).update(
948
+ {"version": version}
949
+ )
770
950
  session.commit()
771
-
951
+
772
952
  # If we're restarting, wait patiently as metrics could be skewed
773
953
  if metrics["RestartingNodes"]:
774
954
  logging.info("Still waiting for RestartDelay")
@@ -789,65 +969,74 @@ def choose_action(config,metrics,db_nodes):
789
969
  if metrics["StoppedNodes"] > 0:
790
970
  # What is the youngest stopped node
791
971
  with S() as session:
792
- youngest=session.execute(select(Node.id)\
793
- .where(Node.status == STOPPED)\
794
- .order_by(Node.age.desc())).first()
972
+ youngest = session.execute(
973
+ select(Node.id)
974
+ .where(Node.status == STOPPED)
975
+ .order_by(Node.age.desc())
976
+ ).first()
795
977
  if youngest:
796
978
  # Remove the youngest node
797
979
  remove_node(youngest[0])
798
- return{"status": REMOVING}
980
+ return {"status": REMOVING}
799
981
  # No low hanging fruit. let's start with the youngest running node
800
982
  with S() as session:
801
- youngest=session.execute(select(Node.id)\
802
- .where(Node.status == RUNNING)\
803
- .order_by(Node.age.desc())).first()
983
+ youngest = session.execute(
984
+ select(Node.id)
985
+ .where(Node.status == RUNNING)
986
+ .order_by(Node.age.desc())
987
+ ).first()
804
988
  if youngest:
805
989
  # Remove the youngest node
806
990
  remove_node(youngest[0])
807
- return{"status": REMOVING}
808
- return{"status": "nothing-to-remove"}
991
+ return {"status": REMOVING}
992
+ return {"status": "nothing-to-remove"}
809
993
  # Otherwise, let's try just stopping a node to bring IO/Mem/Cpu down
810
994
  else:
811
995
  # If we just stopped a node, wait
812
- if int(config["LastStoppedAt"] or 0) > (int(time.time()) - (config["DelayRemove"]*60)):
996
+ if int(config["LastStoppedAt"] or 0) > (
997
+ int(time.time()) - (config["DelayRemove"] * 60)
998
+ ):
813
999
  logging.info("Still waiting for RemoveDelay")
814
- return {"status": 'waiting-to-stop'}
1000
+ return {"status": "waiting-to-stop"}
815
1001
  # Start with the youngest running node
816
1002
  with S() as session:
817
- youngest=session.execute(select(Node)\
818
- .where(Node.status == RUNNING)\
819
- .order_by(Node.age.desc())).first()
1003
+ youngest = session.execute(
1004
+ select(Node).where(Node.status == RUNNING).order_by(Node.age.desc())
1005
+ ).first()
820
1006
  if youngest:
821
1007
  # Stop the youngest node
822
1008
  stop_systemd_node(youngest[0])
823
1009
  # Update the last stopped time
824
1010
  with S() as session:
825
- session.query(Machine).filter(Machine.id == 1).\
826
- update({'LastStoppedAt': int(time.time())})
1011
+ session.query(Machine).filter(Machine.id == 1).update(
1012
+ {"LastStoppedAt": int(time.time())}
1013
+ )
827
1014
  session.commit()
828
- return{"status": STOPPED}
1015
+ return {"status": STOPPED}
829
1016
  else:
830
- return{"status": "nothing-to-stop"}
831
-
1017
+ return {"status": "nothing-to-stop"}
1018
+
832
1019
  # Do we have upgrading to do?
833
- if features["Upgrade"]:
1020
+ if features["Upgrade"]:
834
1021
  # Let's find the oldest running node not using the current version
835
1022
  with S() as session:
836
- oldest=session.execute(select(Node)\
837
- .where(Node.status == RUNNING)\
838
- .where(Node.version != metrics["AntNodeVersion"])
839
- .order_by(Node.age.asc())).first()
1023
+ oldest = session.execute(
1024
+ select(Node)
1025
+ .where(Node.status == RUNNING)
1026
+ .where(Node.version != metrics["AntNodeVersion"])
1027
+ .order_by(Node.age.asc())
1028
+ ).first()
840
1029
  if oldest:
841
1030
  # Get Node from Row
842
1031
  oldest = oldest[0]
843
1032
  # If we don't have a version number from metadata, grab from binary
844
1033
  if not oldest.version:
845
- oldest.version=get_antnode_version(oldest.binary)
846
- #print(json.dumps(oldest))
1034
+ oldest.version = get_antnode_version(oldest.binary)
1035
+ # print(json.dumps(oldest))
847
1036
  # Upgrade the oldest node
848
- upgrade_node(oldest,metrics)
849
- return{"status": UPGRADING}
850
-
1037
+ upgrade_node(oldest, metrics)
1038
+ return {"status": UPGRADING}
1039
+
851
1040
  # If AddNewNode
852
1041
  # If stopped nodes available
853
1042
  # Check oldest stopped version
@@ -862,29 +1051,29 @@ def choose_action(config,metrics,db_nodes):
862
1051
  if metrics["StoppedNodes"] > 0:
863
1052
  # What is the oldest stopped node
864
1053
  with S() as session:
865
- oldest=session.execute(select(Node)\
866
- .where(Node.status == STOPPED)\
867
- .order_by(Node.age.asc())).first()
1054
+ oldest = session.execute(
1055
+ select(Node).where(Node.status == STOPPED).order_by(Node.age.asc())
1056
+ ).first()
868
1057
  if oldest:
869
1058
  # Get Node from Row
870
- oldest=oldest[0]
1059
+ oldest = oldest[0]
871
1060
  # If we don't have a version number from metadata, grab from binary
872
1061
  if not oldest.version:
873
- oldest.version=get_antnode_version(oldest.binary)
1062
+ oldest.version = get_antnode_version(oldest.binary)
874
1063
  # If the stopped version is old, upgrade it
875
1064
  if Version(metrics["AntNodeVersion"]) > Version(oldest.version):
876
- upgrade_node(oldest,metrics)
877
- return{"status": UPGRADING}
1065
+ upgrade_node(oldest, metrics)
1066
+ return {"status": UPGRADING}
878
1067
  else:
879
1068
  if start_systemd_node(oldest):
880
- return{"status": RESTARTING}
1069
+ return {"status": RESTARTING}
881
1070
  else:
882
- return{"status": "failed-start-node"}
1071
+ return {"status": "failed-start-node"}
883
1072
  # Hmm, still in Start mode, we shouldn't get here
884
- return {"status": 'START'}
1073
+ return {"status": "START"}
885
1074
  # Still in Add mode, add a new node
886
1075
  if metrics["TotalNodes"] < config["NodeCap"]:
887
- if create_node(config,metrics):
1076
+ if create_node(config, metrics):
888
1077
  return {"status": "ADD"}
889
1078
  else:
890
1079
  return {"status": "failed-create-node"}
@@ -892,12 +1081,13 @@ def choose_action(config,metrics,db_nodes):
892
1081
  return {"status": "node-cap-reached"}
893
1082
  # If we have nothing to do, Survey the node ports
894
1083
  update_nodes()
895
- return{"status": "idle"}
1084
+ return {"status": "idle"}
1085
+
896
1086
 
897
1087
  def main():
898
1088
  # We're starting, so lets create a lock file
899
1089
  try:
900
- with open('/var/antctl/wnm_active', 'w') as file:
1090
+ with open("/var/antctl/wnm_active", "w") as file:
901
1091
  file.write(str(int(time.time())))
902
1092
  except:
903
1093
  logging.error("Unable to create lock file, exiting")
@@ -905,78 +1095,90 @@ def main():
905
1095
 
906
1096
  # See if we already have a known state in the database
907
1097
  with S() as session:
908
- db_nodes=session.execute(select(Node.status,Node.version,
909
- Node.host,Node.metrics_port,
910
- Node.port,Node.age,Node.id,
911
- Node.timestamp)).all()
912
- anm_config=session.execute(select(Machine)).all()
1098
+ db_nodes = session.execute(
1099
+ select(
1100
+ Node.status,
1101
+ Node.version,
1102
+ Node.host,
1103
+ Node.metrics_port,
1104
+ Node.port,
1105
+ Node.age,
1106
+ Node.id,
1107
+ Node.timestamp,
1108
+ )
1109
+ ).all()
1110
+ anm_config = session.execute(select(Machine)).all()
913
1111
 
914
1112
  if db_nodes:
915
- # anm_config by default loads a parameter array,
1113
+ # anm_config by default loads a parameter array,
916
1114
  # use the __json__ method to return a dict from the first node
917
1115
  anm_config = json.loads(json.dumps(anm_config[0][0])) or load_anm_config()
918
- metrics=get_machine_metrics(anm_config["NodeStorage"],anm_config["HDRemove"])
919
- #node_metrics = read_node_metrics(db_nodes[0][2],db_nodes[0][3])
920
- #print(db_nodes[0])
921
- #print(node_metrics)
922
- #print(anm_config)
923
- #print(json.dumps(anm_config,indent=4))
924
- #print("Node: ",db_nodes)
1116
+ metrics = get_machine_metrics(anm_config["NodeStorage"], anm_config["HDRemove"])
1117
+ # node_metrics = read_node_metrics(db_nodes[0][2],db_nodes[0][3])
1118
+ # print(db_nodes[0])
1119
+ # print(node_metrics)
1120
+ # print(anm_config)
1121
+ # print(json.dumps(anm_config,indent=4))
1122
+ # print("Node: ",db_nodes)
925
1123
  logging.info("Found {counter} nodes migrated".format(counter=len(db_nodes)))
926
1124
 
927
1125
  else:
928
1126
  anm_config = load_anm_config()
929
- #print(anm_config)
1127
+ # print(anm_config)
930
1128
  Workers = survey_machine() or []
931
1129
 
932
- #""""
1130
+ # """"
933
1131
  with S() as session:
934
- session.execute(
935
- insert(Node),Workers
936
- )
1132
+ session.execute(insert(Node), Workers)
937
1133
  session.commit()
938
- #"""
1134
+ # """
939
1135
 
940
1136
  with S() as session:
941
- session.execute(
942
- insert(Machine),[anm_config]
943
- )
1137
+ session.execute(insert(Machine), [anm_config])
944
1138
  session.commit()
945
1139
 
946
1140
  # Now load subset of data to work with
947
1141
  with S() as session:
948
- db_nodes=session.execute(select(Node.status,Node.version,
949
- Node.host,Node.metrics_port,
950
- Node.port,Node.age,Node.id,
951
- Node.timestamp)).all()
1142
+ db_nodes = session.execute(
1143
+ select(
1144
+ Node.status,
1145
+ Node.version,
1146
+ Node.host,
1147
+ Node.metrics_port,
1148
+ Node.port,
1149
+ Node.age,
1150
+ Node.id,
1151
+ Node.timestamp,
1152
+ )
1153
+ ).all()
952
1154
 
953
-
954
-
955
- #print(json.dumps(anm_config,indent=4))
1155
+ # print(json.dumps(anm_config,indent=4))
956
1156
  logging.info("Found {counter} nodes configured".format(counter=len(db_nodes)))
957
1157
 
958
- #versions = [v[1] for worker in Workers if (v := worker.get('version'))]
959
- #data = Counter(ver for ver in versions)
960
-
1158
+ # versions = [v[1] for worker in Workers if (v := worker.get('version'))]
1159
+ # data = Counter(ver for ver in versions)
961
1160
 
962
1161
  data = Counter(status[0] for status in db_nodes)
963
- #print(data)
964
- print("Running Nodes:",data[RUNNING])
965
- print("Restarting Nodes:",data[RESTARTING])
966
- print("Stopped Nodes:",data[STOPPED])
967
- print("Upgrading Nodes:",data[UPGRADING])
968
- print("Removing Nodes:",data[REMOVING])
1162
+ # print(data)
1163
+ print("Running Nodes:", data[RUNNING])
1164
+ print("Restarting Nodes:", data[RESTARTING])
1165
+ print("Stopped Nodes:", data[STOPPED])
1166
+ print("Upgrading Nodes:", data[UPGRADING])
1167
+ print("Removing Nodes:", data[REMOVING])
969
1168
  data = Counter(ver[1] for ver in db_nodes)
970
- print("Versions:",data)
1169
+ print("Versions:", data)
971
1170
 
972
- machine_metrics = get_machine_metrics(anm_config['NodeStorage'],anm_config["HDRemove"])
973
- print(json.dumps(anm_config,indent=2))
974
- print(json.dumps(machine_metrics,indent=2))
975
- this_action=choose_action(anm_config,machine_metrics,db_nodes)
976
- print("Action:",json.dumps(this_action,indent=2))
1171
+ machine_metrics = get_machine_metrics(
1172
+ anm_config["NodeStorage"], anm_config["HDRemove"]
1173
+ )
1174
+ print(json.dumps(anm_config, indent=2))
1175
+ print(json.dumps(machine_metrics, indent=2))
1176
+ this_action = choose_action(anm_config, machine_metrics, db_nodes)
1177
+ print("Action:", json.dumps(this_action, indent=2))
977
1178
  # Remove lock file
978
1179
  os.remove("/var/antctl/wnm_active")
979
1180
 
1181
+
980
1182
  if __name__ == "__main__":
981
1183
  main()
982
1184