wmglobalqueue 2.4.2rc7__py3-none-any.whl → 2.4.2rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wmglobalqueue might be problematic. Click here for more details.
- Utils/CertTools.py +38 -0
- WMCore/Database/CMSCouch.py +83 -5
- WMCore/Database/CouchMonitoring.py +450 -0
- WMCore/Services/Rucio/Rucio.py +5 -2
- WMCore/__init__.py +1 -1
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/METADATA +1 -1
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/RECORD +98 -97
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/bin/wmc-dist-patch +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/bin/wmc-dist-unpatch +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/bin/wmc-httpd +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/.couchapprc +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/README.md +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/index.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/js/ElementInfoByWorkflow.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/js/StuckElementInfo.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/js/WorkloadInfoTable.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/js/dataTable.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/js/namespace.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/_attachments/style/main.css +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/couchapp.json +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/filters/childQueueFilter.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/filters/filterDeletedDocs.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/filters/queueFilter.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/language +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lib/mustache.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lib/validate.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lib/workqueue_utils.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lists/elementsDetail.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lists/filter.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lists/stuckElements.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lists/workRestrictions.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/lists/workflowSummary.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/rewrites.json +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/shows/redirect.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/shows/status.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/ElementSummaryByWorkflow.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/StuckElementSummary.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/TaskStatus.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/WorkflowSummary.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/partials/workqueue-common-lib.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/partials/yui-lib-remote.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/templates/partials/yui-lib.html +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/updates/in-place.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/validate_doc_update.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.couch.app.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.pathbinder.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activeData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activeData/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activeParentData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activeParentData/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activePileupData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/activePileupData/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/analyticsData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/analyticsData/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/availableByPriority/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/conflicts/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elements/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByParent/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByParentData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByPileupData/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByStatus/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsBySubscription/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByWorkflow/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsByWorkflow/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/elementsDetailByWorkflowAndStatus/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobStatusByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobStatusByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByStatus/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByStatus/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/openRequests/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/recent-items/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/specsByWorkflow/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/stuckElements/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsUrl/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsUrl/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/workflowSummary/map.js +0 -0
- {wmglobalqueue-2.4.2rc7.data → wmglobalqueue-2.4.2rc8.data}/data/data/couchapps/WorkQueue/views/workflowSummary/reduce.js +0 -0
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/WHEEL +0 -0
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/licenses/LICENSE +0 -0
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/licenses/NOTICE +0 -0
- {wmglobalqueue-2.4.2rc7.dist-info → wmglobalqueue-2.4.2rc8.dist-info}/top_level.txt +0 -0
Utils/CertTools.py
CHANGED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Module to deal with user certificates and CAs
|
|
3
|
+
"""
|
|
1
4
|
from builtins import str
|
|
2
5
|
import os
|
|
3
6
|
|
|
@@ -60,3 +63,38 @@ def getCAPathFromEnv():
|
|
|
60
63
|
you need to set either the X509_CERT_DIR variable or the cacert key of the request.
|
|
61
64
|
"""
|
|
62
65
|
return os.environ.get("X509_CERT_DIR")
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def caBundle(caPath="/etc/grid-security/certificates"):
|
|
69
|
+
"""
|
|
70
|
+
Load all PEM certificates from the given caPath and write them as single CA bundle PEM.
|
|
71
|
+
|
|
72
|
+
:param caPath: Path to directory containing .pem certificate files.
|
|
73
|
+
:return: A single string containing all concatenated PEM pemCertificates which may be
|
|
74
|
+
written to a caBundleFile if necessary (used by requests library)
|
|
75
|
+
"""
|
|
76
|
+
if not os.path.isdir(caPath):
|
|
77
|
+
raise ValueError(f"Invalid caPath: {caPath} is not a directory")
|
|
78
|
+
|
|
79
|
+
pemCertificates = []
|
|
80
|
+
|
|
81
|
+
for fileName in sorted(os.listdir(caPath)):
|
|
82
|
+
filePath = os.path.join(caPath, fileName)
|
|
83
|
+
|
|
84
|
+
# Only consider readable files that look like PEM certificates
|
|
85
|
+
if not os.path.isfile(filePath):
|
|
86
|
+
continue
|
|
87
|
+
if not fileName.endswith(".pem"):
|
|
88
|
+
continue
|
|
89
|
+
|
|
90
|
+
try:
|
|
91
|
+
with open(filePath, "r", encoding="utf-8") as istream:
|
|
92
|
+
certData = istream.read()
|
|
93
|
+
if "BEGIN CERTIFICATE" in certData:
|
|
94
|
+
pemCertificates.append(certData)
|
|
95
|
+
except Exception as e:
|
|
96
|
+
print(f"Warning: Could not read {filePath}: {e}")
|
|
97
|
+
|
|
98
|
+
if len(pemCertificates) == 0:
|
|
99
|
+
raise ValueError(f"No PEM files found in {caPath}")
|
|
100
|
+
return "\n".join(pemCertificates)
|
WMCore/Database/CMSCouch.py
CHANGED
|
@@ -25,12 +25,14 @@ import logging
|
|
|
25
25
|
import re
|
|
26
26
|
import time
|
|
27
27
|
import sys
|
|
28
|
+
from pprint import pformat
|
|
28
29
|
from datetime import datetime
|
|
29
30
|
from http.client import HTTPException
|
|
30
31
|
|
|
31
32
|
from Utils.IteratorTools import grouper, nestedDictUpdate
|
|
32
33
|
from WMCore.Lexicon import sanitizeURL
|
|
33
34
|
from WMCore.Services.Requests import JSONRequests
|
|
35
|
+
from WMCore.Database.CouchMonitoring import checkStatus
|
|
34
36
|
|
|
35
37
|
|
|
36
38
|
def check_name(dbname):
|
|
@@ -1274,6 +1276,55 @@ class CouchMonitor(object):
|
|
|
1274
1276
|
return resp
|
|
1275
1277
|
return data.get("docs", resp)
|
|
1276
1278
|
|
|
1279
|
+
def couchReplicationStatus(self):
|
|
1280
|
+
"""
|
|
1281
|
+
check couchdb replication status with compatible output of checkCouchReplications
|
|
1282
|
+
|
|
1283
|
+
:return: a list of dictionaries with the status of the replications and an
|
|
1284
|
+
error message
|
|
1285
|
+
"""
|
|
1286
|
+
output = []
|
|
1287
|
+
sdict = checkStatus(kind='scheduler')
|
|
1288
|
+
rdict = checkStatus(kind='replicator')
|
|
1289
|
+
method = 'scheduler+replicator'
|
|
1290
|
+
# update sdict only with entries from replicator dict which are not present in scheduler
|
|
1291
|
+
for key, val in rdict['current_status'].items():
|
|
1292
|
+
if key not in sdict['current_status']:
|
|
1293
|
+
sdict['current_status'][key] = val
|
|
1294
|
+
stateFailures = ['error', 'failed']
|
|
1295
|
+
for rid, record in sdict['current_status'].items():
|
|
1296
|
+
if record['state'] in stateFailures:
|
|
1297
|
+
status['state'] = 'error'
|
|
1298
|
+
source = sanitizeURL(record['source'])
|
|
1299
|
+
target = sanitizeURL(record['target'])
|
|
1300
|
+
error = record['error']
|
|
1301
|
+
history = pformat(record['history'])
|
|
1302
|
+
msg = f"Replication from {source} to {target} for document {rid} is in a bad state: {error}; "
|
|
1303
|
+
msg += f"History: {history}"
|
|
1304
|
+
status = {'name': 'CouchServer', 'status': 'error', 'error_message': msg, 'method': method}
|
|
1305
|
+
output.append(status)
|
|
1306
|
+
|
|
1307
|
+
# if our replication is fine we should check that it is not in a stale phase
|
|
1308
|
+
activeTasks = self.getActiveTasks()
|
|
1309
|
+
activeTasks = [task for task in activeTasks if task["type"].lower() == "replication"]
|
|
1310
|
+
resp = self.checkReplicationState()
|
|
1311
|
+
for replTask in activeTasks:
|
|
1312
|
+
if self.isReplicationStale(replTask):
|
|
1313
|
+
source = sanitizeURL(replTask['source'])['url']
|
|
1314
|
+
target = sanitizeURL(replTask['target'])['url']
|
|
1315
|
+
msg = f"Replication from {source} to {target} is stale and it's last"
|
|
1316
|
+
msg += f"update time was at: {replTask.get('updated_on')}"
|
|
1317
|
+
resp['status'] = 'error'
|
|
1318
|
+
resp['error_message'] += msg
|
|
1319
|
+
resp['method'] = 'stale phase'
|
|
1320
|
+
resp['name'] = 'CouchServer'
|
|
1321
|
+
output.append(resp)
|
|
1322
|
+
# check if we did not record any replication status, then add the ok status
|
|
1323
|
+
if len(output) == 0:
|
|
1324
|
+
status = {'name': 'CouchServer', 'status': 'ok', 'error_message': ''}
|
|
1325
|
+
output.append(status)
|
|
1326
|
+
return output
|
|
1327
|
+
|
|
1277
1328
|
def checkCouchReplications(self, replicationsList):
|
|
1278
1329
|
"""
|
|
1279
1330
|
Check whether the list of expected replications exist in CouchDB
|
|
@@ -1281,9 +1332,11 @@ class CouchMonitor(object):
|
|
|
1281
1332
|
|
|
1282
1333
|
:param replicationsList: a list of dictionary with the replication
|
|
1283
1334
|
document setup.
|
|
1284
|
-
:return: a
|
|
1335
|
+
:return: a list of dictionaries with the status of the replications and an
|
|
1285
1336
|
error message
|
|
1286
1337
|
"""
|
|
1338
|
+
output = []
|
|
1339
|
+
method = 'comparison of replications docs vs active tasks'
|
|
1287
1340
|
activeTasks = self.getActiveTasks()
|
|
1288
1341
|
# filter out any task that is not a database replication
|
|
1289
1342
|
activeTasks = [task for task in activeTasks if task["type"].lower() == "replication"]
|
|
@@ -1292,12 +1345,12 @@ class CouchMonitor(object):
|
|
|
1292
1345
|
msg = f"Expected to have {len(replicationsList)} replication tasks, "
|
|
1293
1346
|
msg += f"but only {len(activeTasks)} in CouchDB. "
|
|
1294
1347
|
msg += f"Current replications are: {activeTasks}"
|
|
1295
|
-
|
|
1348
|
+
status = {'name': 'CouchServer', 'status': 'error', 'error_message': msg, 'method': method}
|
|
1349
|
+
output.append(status)
|
|
1296
1350
|
|
|
1297
1351
|
resp = self.checkReplicationState()
|
|
1298
1352
|
if resp['status'] != 'ok':
|
|
1299
|
-
|
|
1300
|
-
return resp
|
|
1353
|
+
output.append(resp)
|
|
1301
1354
|
|
|
1302
1355
|
# finally, check if replications are being updated in a timely fashion
|
|
1303
1356
|
for replTask in activeTasks:
|
|
@@ -1308,7 +1361,15 @@ class CouchMonitor(object):
|
|
|
1308
1361
|
msg += f"update time was at: {replTask.get('updated_on')}"
|
|
1309
1362
|
resp['status'] = 'error'
|
|
1310
1363
|
resp['error_message'] += msg
|
|
1311
|
-
|
|
1364
|
+
resp['method'] = method
|
|
1365
|
+
resp['name'] = 'CouchServer'
|
|
1366
|
+
output.append(resp)
|
|
1367
|
+
|
|
1368
|
+
# check if we did not record any replication status, then add the ok status
|
|
1369
|
+
if len(output) == 0:
|
|
1370
|
+
status = {'name': 'CouchServer', 'status': 'ok', 'error_message': ''}
|
|
1371
|
+
output.append(status)
|
|
1372
|
+
return output
|
|
1312
1373
|
|
|
1313
1374
|
def checkReplicationState(self):
|
|
1314
1375
|
"""
|
|
@@ -1347,3 +1408,20 @@ class CouchMonitor(object):
|
|
|
1347
1408
|
# then it has been recently updated
|
|
1348
1409
|
return True
|
|
1349
1410
|
return False
|
|
1411
|
+
|
|
1412
|
+
def isReplicationStale(self, replInfo, niter=10):
|
|
1413
|
+
"""
|
|
1414
|
+
Ensure that the replication document is up-to-date as a
|
|
1415
|
+
function of the checkpoint interval.
|
|
1416
|
+
|
|
1417
|
+
:param replInfo: dictionary with the replication information
|
|
1418
|
+
:param niter: number of iteration for checkpoint interval
|
|
1419
|
+
:return: True if replication is working fine, otherwise False
|
|
1420
|
+
"""
|
|
1421
|
+
maxUpdateInterval = niter * replInfo['checkpoint_interval'] / 1000
|
|
1422
|
+
lastUpdate = replInfo["updated_on"]
|
|
1423
|
+
|
|
1424
|
+
if lastUpdate + maxUpdateInterval > int(time.time()):
|
|
1425
|
+
# then it has been recently updated and it means replication is not stale
|
|
1426
|
+
return False
|
|
1427
|
+
return True
|
|
@@ -0,0 +1,450 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
This module provides helper functions to obtain and handle CouchDB Replication data:
|
|
4
|
+
- getSchedulerJobDocs get replication status based on scheduler information
|
|
5
|
+
- getReplicatorDocs get replication status based on replicator information
|
|
6
|
+
- compareReplicationStatus compares previous and current statuses
|
|
7
|
+
- formatPrometheusMetrics format status metrics in Prometheus format
|
|
8
|
+
- createAlerts create alerts from given status dict
|
|
9
|
+
- checkStatus perform all checks for couchdb replication
|
|
10
|
+
|
|
11
|
+
Example of using Flask framework to serve prometheus metrics about CouchDB replication
|
|
12
|
+
|
|
13
|
+
import requests
|
|
14
|
+
from flask import Flask, Response
|
|
15
|
+
import threading
|
|
16
|
+
import time
|
|
17
|
+
|
|
18
|
+
app = Flask(__name__)
|
|
19
|
+
status_cache = {}
|
|
20
|
+
|
|
21
|
+
@app.route("/metrics")
|
|
22
|
+
def metrics():
|
|
23
|
+
return Response(formatPrometheusMetrics(status_cache), mimetype="text/plain")
|
|
24
|
+
|
|
25
|
+
def daemonCouchReplicationStatus(interval=30):
|
|
26
|
+
global status_cache
|
|
27
|
+
while True:
|
|
28
|
+
new_status = getSchedulerJobDocs(COUCHDB_URL, USERNAME, PASSWORD)
|
|
29
|
+
status_cache = new_status
|
|
30
|
+
time.sleep(interval)
|
|
31
|
+
|
|
32
|
+
if __name__ == "__main__":
|
|
33
|
+
# Start the background thread to update replication status periodically
|
|
34
|
+
threading.Thread(target=daemonCouchReplicationStatus, daemon=True).start()
|
|
35
|
+
# Run the Flask app
|
|
36
|
+
app.run(host="0.0.0.0", port=8000)
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
import os
|
|
40
|
+
import json
|
|
41
|
+
import requests
|
|
42
|
+
import tempfile
|
|
43
|
+
|
|
44
|
+
# WMCore modules
|
|
45
|
+
from Utils.CertTools import cert, ckey, caBundle
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def getSchedulerJobDocs(couchdbUrl):
|
|
49
|
+
"""
|
|
50
|
+
Fetch CouchDB replication statuses. The logic is based on /_scheduler/jobs CouchDB end-point
|
|
51
|
+
see https://docs.couchdb.org/en/stable/api/server/common.html#api-server-scheduler-jobs
|
|
52
|
+
:param couchdbUrl: url of couch db
|
|
53
|
+
:return: dictionary of statuses for all found replication documents
|
|
54
|
+
"""
|
|
55
|
+
username, password = couchCredentials()
|
|
56
|
+
auth = (username, password) if username and password else None
|
|
57
|
+
try:
|
|
58
|
+
response = requests.get(f"{couchdbUrl}/_scheduler/jobs", auth=auth)
|
|
59
|
+
response.raise_for_status()
|
|
60
|
+
data = response.json()
|
|
61
|
+
|
|
62
|
+
statuses = {}
|
|
63
|
+
for job in data.get('jobs', []):
|
|
64
|
+
doc_id = job.get('doc_id') or job.get('id')
|
|
65
|
+
source = job.get('source')
|
|
66
|
+
target = job.get('target')
|
|
67
|
+
history = job.get('history', [])
|
|
68
|
+
info = job.get('info', {})
|
|
69
|
+
|
|
70
|
+
# Determine current state from latest history item
|
|
71
|
+
state = history[0]['type'] if history else 'unknown'
|
|
72
|
+
|
|
73
|
+
# Detect error if 'crashed' exists in any history entry
|
|
74
|
+
error = None
|
|
75
|
+
for h in history:
|
|
76
|
+
if h.get('type') == 'crashed':
|
|
77
|
+
error = f"Job previous crashed at {h.get('timestamp')} due to {h.get('reason')}"
|
|
78
|
+
break
|
|
79
|
+
|
|
80
|
+
# check info document
|
|
81
|
+
if info and info.get('doc_write_failures', 0) != 0:
|
|
82
|
+
error = f"found failure of replication jobs in {couchdbUrl}/_scheduler/jobs "
|
|
83
|
+
state = "error"
|
|
84
|
+
# try to get more info about the error
|
|
85
|
+
try:
|
|
86
|
+
response = requests.get(f"{couchdbUrl}/_scheduler/docs/_replicator/{doc_id}", auth=auth)
|
|
87
|
+
response.raise_for_status()
|
|
88
|
+
data = response.json()
|
|
89
|
+
error += f" Replicator state for {doc_id}: "
|
|
90
|
+
error += json.dumps(data)
|
|
91
|
+
except:
|
|
92
|
+
pass
|
|
93
|
+
|
|
94
|
+
statuses[doc_id] = {
|
|
95
|
+
'state': state,
|
|
96
|
+
'source': source,
|
|
97
|
+
'target': target,
|
|
98
|
+
'error': error,
|
|
99
|
+
'history': history
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return statuses
|
|
103
|
+
except requests.RequestException as e:
|
|
104
|
+
print(f"Error fetching scheduler jobs: {e}")
|
|
105
|
+
return {}
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def getReplicatorDocs(url=None):
|
|
109
|
+
"""
|
|
110
|
+
Helper function to get all replicator docs and return summary dictionary
|
|
111
|
+
:param url: url of the couchdb
|
|
112
|
+
:return: replication summary dictionary
|
|
113
|
+
"""
|
|
114
|
+
username, password = couchCredentials()
|
|
115
|
+
auth = (username, password) if username and password else None
|
|
116
|
+
if not url:
|
|
117
|
+
url = "http://localhost:5984"
|
|
118
|
+
headers = {"Accept": "application/json"}
|
|
119
|
+
|
|
120
|
+
# Get list of all documents in _replicator
|
|
121
|
+
r = requests.get(f"{url}/_replicator/_all_docs?include_docs=true",
|
|
122
|
+
headers=headers, auth=auth)
|
|
123
|
+
|
|
124
|
+
if r.status_code != 200:
|
|
125
|
+
raise Exception(f"Failed to fetch replication docs: {r.text}")
|
|
126
|
+
|
|
127
|
+
data = r.json()
|
|
128
|
+
result = {}
|
|
129
|
+
|
|
130
|
+
for row in data.get("rows", []):
|
|
131
|
+
doc = row.get("doc", {})
|
|
132
|
+
doc_id = doc.get("_id")
|
|
133
|
+
if doc_id.startswith("_design/"):
|
|
134
|
+
continue # skip design docs
|
|
135
|
+
|
|
136
|
+
summary = {
|
|
137
|
+
"state": doc.get("_replication_state"),
|
|
138
|
+
"source": doc.get("source"),
|
|
139
|
+
"target": doc.get("target"),
|
|
140
|
+
"error": doc.get("_replication_state_reason"),
|
|
141
|
+
"history": []
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
history = doc.get("_replication_history", [])
|
|
145
|
+
for h in history:
|
|
146
|
+
entry = {
|
|
147
|
+
"timestamp": h.get("start_time") or h.get("end_time"),
|
|
148
|
+
"type": h.get("type") or "unknown"
|
|
149
|
+
}
|
|
150
|
+
summary["history"].append(entry)
|
|
151
|
+
|
|
152
|
+
result[doc_id] = summary
|
|
153
|
+
|
|
154
|
+
return result
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def compareReplicationStatus(prev, curr):
|
|
158
|
+
"""
|
|
159
|
+
Helper function to compare replication status from previous to current state
|
|
160
|
+
:param prev: previous replication status dictionary
|
|
161
|
+
:param curr: current replication status dictionary
|
|
162
|
+
:return: dictionary of changes
|
|
163
|
+
"""
|
|
164
|
+
changes = {}
|
|
165
|
+
for key in curr:
|
|
166
|
+
if key not in prev or prev[key] != curr[key]:
|
|
167
|
+
changes[key] = {
|
|
168
|
+
'old': prev.get(key),
|
|
169
|
+
'new': curr[key]
|
|
170
|
+
}
|
|
171
|
+
return changes
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def formatPrometheusMetrics(statuses):
|
|
175
|
+
"""
|
|
176
|
+
Helper function to provide Prometheus metrics from given status dictionary
|
|
177
|
+
:param statuses: replication status dictionary
|
|
178
|
+
:return: prometheus metrics
|
|
179
|
+
"""
|
|
180
|
+
states = {'error': -1, 'completed': 0, 'started': 1, 'added': 2, 'waiting': 3, 'triggered': 4, 'failed': 5}
|
|
181
|
+
lines = [
|
|
182
|
+
f'# HELP couchdb_replication_state Replication state: {states}',
|
|
183
|
+
'# TYPE couchdb_replication_state gauge'
|
|
184
|
+
]
|
|
185
|
+
for key, status in statuses.items():
|
|
186
|
+
label = f'replId="{key}",source="{status["source"]}",target="{status["target"]}"'
|
|
187
|
+
value = 0 # default error/other
|
|
188
|
+
for k, v in states.items():
|
|
189
|
+
if status['state'] == k:
|
|
190
|
+
value = v
|
|
191
|
+
break
|
|
192
|
+
lines.append(f'couchdb_replication_state{{{label}}} {value}')
|
|
193
|
+
return '\n'.join(lines)
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
def createAlerts(statuses):
|
|
197
|
+
"""
|
|
198
|
+
Helper function to check alerts of replication status dictionary
|
|
199
|
+
:param statuses: replication status dictionary
|
|
200
|
+
:return: alerts dictionary
|
|
201
|
+
"""
|
|
202
|
+
alerts = {}
|
|
203
|
+
for key, status in statuses.items():
|
|
204
|
+
if status['state'] != 'completed':
|
|
205
|
+
alerts[key] = f"Replication state for {key} is '{status['state']}', error: {status['error']}"
|
|
206
|
+
return alerts
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def couchCredentials():
|
|
210
|
+
"""
|
|
211
|
+
Select CouchDB credentials from provided secrets file
|
|
212
|
+
:return: tuple of (user, password)
|
|
213
|
+
"""
|
|
214
|
+
fname = os.getenv('WMAGENT_SECRETS_LOCATION', '')
|
|
215
|
+
if fname == "":
|
|
216
|
+
raise Exception("No WMAGENT_SECRETS_LOCATION in environment")
|
|
217
|
+
user = ''
|
|
218
|
+
password = ''
|
|
219
|
+
data = ''
|
|
220
|
+
with open(fname, 'r', encoding="utf-8") as istream:
|
|
221
|
+
data = istream.read()
|
|
222
|
+
for item in data.split('\n'):
|
|
223
|
+
if 'COUCH_USER' in item:
|
|
224
|
+
user = item.split('=')[-1]
|
|
225
|
+
if 'COUCH_PASS' in item:
|
|
226
|
+
password = item.split('=')[-1]
|
|
227
|
+
return user, password
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def checkStatus(url=None, prevStatus=None, kind="scheduler"):
|
|
231
|
+
"""
|
|
232
|
+
Perform check of replication statuses
|
|
233
|
+
:param url: couchdb URL
|
|
234
|
+
:param prevStatus: previous status dictionary
|
|
235
|
+
:param kind: kind of data look-up, e.g. scheduler or replicator
|
|
236
|
+
:return: dictionary of current couchdb replication
|
|
237
|
+
|
|
238
|
+
Here is an example of such dictionary structure:
|
|
239
|
+
{'current_status': currStatus (dictionary),
|
|
240
|
+
'previous_status': prevStatus (dictionary),
|
|
241
|
+
'changes': changes (dictionary),
|
|
242
|
+
'metrics': metrics (string),
|
|
243
|
+
'alerts': alerts (dictionary)}
|
|
244
|
+
|
|
245
|
+
Then, current and previous status dictionaries have the following form:
|
|
246
|
+
{
|
|
247
|
+
"14843c24643f8960eb159f5912f0f938": {
|
|
248
|
+
"state": "started",
|
|
249
|
+
"source": "https://xxx.cern.ch/couchdb/workqueue/",
|
|
250
|
+
"target": "http://127.0.0.1:5984/workqueue_inbox/",
|
|
251
|
+
"error": "Job previously crashed at 2025-05-05T18:47:11Z due to {changes_reader_died,{timeout,ibrowse_stream_cleanup}}",
|
|
252
|
+
"history": [
|
|
253
|
+
{
|
|
254
|
+
"timestamp": "2025-05-05T18:47:11Z",
|
|
255
|
+
"type": "started"
|
|
256
|
+
},
|
|
257
|
+
...
|
|
258
|
+
]
|
|
259
|
+
},
|
|
260
|
+
"14843c24643f8960eb159f5912f0e51e": {
|
|
261
|
+
"state": "started",
|
|
262
|
+
"source": "http://127.0.0.1:5984/wmagent_summary/",
|
|
263
|
+
"target": "https://xxx.cern.ch/couchdb/wmstats/",
|
|
264
|
+
"error": null,
|
|
265
|
+
"history": [
|
|
266
|
+
{
|
|
267
|
+
"timestamp": "2025-04-09T11:19:36Z",
|
|
268
|
+
"type": "started"
|
|
269
|
+
},
|
|
270
|
+
{
|
|
271
|
+
"timestamp": "2025-04-09T11:19:36Z",
|
|
272
|
+
"type": "added"
|
|
273
|
+
}
|
|
274
|
+
]
|
|
275
|
+
},
|
|
276
|
+
...
|
|
277
|
+
}
|
|
278
|
+
"""
|
|
279
|
+
if not prevStatus:
|
|
280
|
+
prevStatus = {}
|
|
281
|
+
if not url:
|
|
282
|
+
url = "http://localhost:5984"
|
|
283
|
+
|
|
284
|
+
# first let's get statuses of documents
|
|
285
|
+
if kind == "scheduler":
|
|
286
|
+
currStatus = getSchedulerJobDocs(url)
|
|
287
|
+
elif kind == "replicator":
|
|
288
|
+
currStatus = getReplicatorDocs(url)
|
|
289
|
+
else:
|
|
290
|
+
raise Exception("Unsupported kind of documents '{kind}', should be either scheduler or replicator")
|
|
291
|
+
|
|
292
|
+
# now we can find out changes from previous statuses
|
|
293
|
+
changes = compareReplicationStatus(prevStatus, currStatus)
|
|
294
|
+
|
|
295
|
+
# construct prometheus metrics with current statuses
|
|
296
|
+
metrics = formatPrometheusMetrics(currStatus)
|
|
297
|
+
|
|
298
|
+
# construct alerts with current statuses
|
|
299
|
+
alerts = createAlerts(currStatus)
|
|
300
|
+
|
|
301
|
+
# build final dictionary to return upstream
|
|
302
|
+
sdict = {'current_status': currStatus,
|
|
303
|
+
'previous_status': prevStatus,
|
|
304
|
+
'changes': changes,
|
|
305
|
+
'metrics': metrics,
|
|
306
|
+
'alerts': alerts}
|
|
307
|
+
return sdict
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def getDocCount(url, auth, certTuple, caCert):
|
|
311
|
+
"""
|
|
312
|
+
helper function to get document counts
|
|
313
|
+
:param url: url of the couchdb
|
|
314
|
+
:param auth: couchdb authentication credentials tuple
|
|
315
|
+
:param caCert: ca bundle file name
|
|
316
|
+
:return: document count
|
|
317
|
+
"""
|
|
318
|
+
resp = requests.get(url, auth=auth, cert=certTuple, verify=caCert or True)
|
|
319
|
+
resp.raise_for_status()
|
|
320
|
+
return resp.json().get('doc_count', -1)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def getReplicationState(url, auth, certTuple, caCert):
|
|
324
|
+
"""
|
|
325
|
+
helper function to get replication state from given couchdb url
|
|
326
|
+
:param url: url of the couchdb
|
|
327
|
+
:param auth: couchdb authentication credentials tuple
|
|
328
|
+
:param caCert: ca bundle file name
|
|
329
|
+
:return: tuple of replication state and its time
|
|
330
|
+
"""
|
|
331
|
+
resp = requests.get(url, auth=auth, cert=certTuple, verify=caCert or True)
|
|
332
|
+
resp.raise_for_status()
|
|
333
|
+
doc = resp.json()
|
|
334
|
+
return doc.get('_replication_state'), doc.get('_replication_state_time')
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def compareCouchInstances(sourceUrl, targetUrl, replUrl):
|
|
338
|
+
"""
|
|
339
|
+
Compare the number of documents between source and destination CouchDB databases.
|
|
340
|
+
Monitor replication if the counts differ but replication status is OK.
|
|
341
|
+
|
|
342
|
+
Parameters:
|
|
343
|
+
:param sourceUrl: str, e.g. http://localhost:5984/source_db
|
|
344
|
+
:param targetUrl: str, e.g. http://localhost:5984/dest_db
|
|
345
|
+
:param replUrl: str, e.g. http://localhost:5984/_replicator/<replId>
|
|
346
|
+
"""
|
|
347
|
+
user, password = couchCredentials()
|
|
348
|
+
auth = (user, password)
|
|
349
|
+
sdict = {}
|
|
350
|
+
userCert = cert() if cert() else ''
|
|
351
|
+
userCkey = ckey() if ckey() else ''
|
|
352
|
+
if userCkey == '' or userCert == '':
|
|
353
|
+
return sdict
|
|
354
|
+
certTuple = (userCert, userCkey)
|
|
355
|
+
with tempfile.NamedTemporaryFile(mode='w+', suffix=".pem", delete=True) as tfile:
|
|
356
|
+
capath = os.environ.get("X509_CERT_DIR", '/etc/grid-security/certificates')
|
|
357
|
+
cacerts = caBundle(capath)
|
|
358
|
+
tfile.write(cacerts)
|
|
359
|
+
tfile.flush()
|
|
360
|
+
|
|
361
|
+
sourceCount = getDocCount(sourceUrl, auth, certTuple, tfile.name)
|
|
362
|
+
targetCount = getDocCount(targetUrl, auth, certTuple, tfile.name)
|
|
363
|
+
state, stateTime = getReplicationState(replUrl, auth, certTuple, tfile.name)
|
|
364
|
+
|
|
365
|
+
sdict = {
|
|
366
|
+
"source": sourceUrl,
|
|
367
|
+
"target": targetUrl,
|
|
368
|
+
"source_count": sourceCount,
|
|
369
|
+
"target_count": targetCount,
|
|
370
|
+
"state": state,
|
|
371
|
+
"state_timestamp": stateTime
|
|
372
|
+
}
|
|
373
|
+
return sdict
|
|
374
|
+
|
|
375
|
+
|
|
376
|
+
def exampleReplicationStatus(sourceUrl=None):
|
|
377
|
+
"""
|
|
378
|
+
Example function to test replication status either based on scheduler or replicator info
|
|
379
|
+
This function should run on a node with local CouchDB access as all of its logic
|
|
380
|
+
relies on using localhost:5984 URL
|
|
381
|
+
"""
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
print(f"checking {sourceUrl}")
|
|
385
|
+
|
|
386
|
+
# let's first test scheduler info
|
|
387
|
+
sdict = checkStatus(url=sourceUrl, kind="scheduler")
|
|
388
|
+
print('--- status based on scheduler info ---')
|
|
389
|
+
print(sdict['current_status'])
|
|
390
|
+
print('--- metrics ---')
|
|
391
|
+
print(sdict['metrics'])
|
|
392
|
+
if sdict.get('alerts', None):
|
|
393
|
+
print('--- alerts ---')
|
|
394
|
+
for k, msg in sdict['alerts'].items():
|
|
395
|
+
print(f"{k}: {msg}")
|
|
396
|
+
|
|
397
|
+
print()
|
|
398
|
+
|
|
399
|
+
# now let's test replicator info
|
|
400
|
+
rdict = checkStatus(url=sourceUrl, kind="replicator")
|
|
401
|
+
print('--- status based on replicator info ---')
|
|
402
|
+
print(rdict['current_status'])
|
|
403
|
+
print('--- metrics ---')
|
|
404
|
+
print(rdict['metrics'])
|
|
405
|
+
if rdict.get('alerts', None):
|
|
406
|
+
print('--- alerts ---')
|
|
407
|
+
for k, msg in rdict['alerts'].items():
|
|
408
|
+
print(f"{k}: {msg}")
|
|
409
|
+
|
|
410
|
+
except Exception as exp:
|
|
411
|
+
print(str(exp))
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
def exampleIndividualDocument(sourceUrl, targetUrl, replUrl):
|
|
415
|
+
"""
|
|
416
|
+
Example function how to test check status of particular replication document
|
|
417
|
+
This function should run through CMSWEB frontend URLs as we need to compare
|
|
418
|
+
documents in both source and target CouchDB instances
|
|
419
|
+
:param sourceUrl: source couchdb URL, e.g. https://xxx.cern.ch/couchdb/test_db
|
|
420
|
+
:param targetUrl: target couchdb URL, e.g. https://xxx.cern.ch/couchdb/test_db
|
|
421
|
+
:param replUrl: replication URL, e.g. https://xxx.cern.ch/couchdb/test_db/_replicator/bla
|
|
422
|
+
"""
|
|
423
|
+
try:
|
|
424
|
+
result = compareCouchInstances(sourceUrl, targetUrl, replUrl)
|
|
425
|
+
print('--- compare CouchDB Instances ---')
|
|
426
|
+
print('source: ', sourceUrl)
|
|
427
|
+
print('target: ', targetUrl)
|
|
428
|
+
print(result)
|
|
429
|
+
except:
|
|
430
|
+
pass
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def test():
|
|
434
|
+
"""
|
|
435
|
+
test functions
|
|
436
|
+
"""
|
|
437
|
+
import sys
|
|
438
|
+
if len(sys.argv) > 1:
|
|
439
|
+
sourceUrl = sys.argv[1]
|
|
440
|
+
exampleReplicationStatus(sourceUrl)
|
|
441
|
+
else:
|
|
442
|
+
print("Cannot run tests, please provide at least CouchDB source URL, or <srcUrl> <targetUrl> <replicationId>")
|
|
443
|
+
if len(sys.argv) == 4:
|
|
444
|
+
sourceUrl = sys.argv[1]
|
|
445
|
+
targetUrl = sys.argv[2]
|
|
446
|
+
replUrl = sys.argv[3]
|
|
447
|
+
exampleIndividualDocument(sourceUrl, targetUrl, replUrl)
|
|
448
|
+
|
|
449
|
+
if __name__ == '__main__':
|
|
450
|
+
test()
|
WMCore/Services/Rucio/Rucio.py
CHANGED
|
@@ -706,15 +706,18 @@ class Rucio(object):
|
|
|
706
706
|
Update rule information for a given rule id
|
|
707
707
|
:param ruleId: string with the rule id
|
|
708
708
|
:param opts: dictionary, rule id options passed to Rucio
|
|
709
|
-
:return: boolean status
|
|
709
|
+
:return: boolean status to represent whether it succeeded or not.
|
|
710
|
+
ok status code and RuleNotFound exception are considered as succeeded,
|
|
711
|
+
any other Exception case is considered as failed.
|
|
710
712
|
"""
|
|
711
|
-
status =
|
|
713
|
+
status = True
|
|
712
714
|
try:
|
|
713
715
|
status = self.cli.update_replication_rule(ruleId, opts)
|
|
714
716
|
except RuleNotFound:
|
|
715
717
|
self.logger.error("Cannot find any information for rule id: %s", ruleId)
|
|
716
718
|
except Exception as ex:
|
|
717
719
|
self.logger.error("Exception updating rule id: %s. Error: %s", ruleId, str(ex))
|
|
720
|
+
status = False
|
|
718
721
|
return status
|
|
719
722
|
|
|
720
723
|
def deleteRule(self, ruleId, purgeReplicas=False):
|
WMCore/__init__.py
CHANGED