wmglobalqueue 2.4.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Utils/CPMetrics.py +270 -0
- Utils/CertTools.py +100 -0
- Utils/EmailAlert.py +50 -0
- Utils/ExtendedUnitTestCase.py +62 -0
- Utils/FileTools.py +182 -0
- Utils/IteratorTools.py +80 -0
- Utils/MathUtils.py +31 -0
- Utils/MemoryCache.py +119 -0
- Utils/Patterns.py +24 -0
- Utils/Pipeline.py +137 -0
- Utils/PortForward.py +97 -0
- Utils/ProcFS.py +112 -0
- Utils/ProcessStats.py +194 -0
- Utils/PythonVersion.py +17 -0
- Utils/Signals.py +36 -0
- Utils/TemporaryEnvironment.py +27 -0
- Utils/Throttled.py +227 -0
- Utils/Timers.py +130 -0
- Utils/Timestamps.py +86 -0
- Utils/TokenManager.py +143 -0
- Utils/Tracing.py +60 -0
- Utils/TwPrint.py +98 -0
- Utils/Utilities.py +318 -0
- Utils/__init__.py +11 -0
- Utils/wmcoreDTools.py +707 -0
- WMCore/ACDC/Collection.py +57 -0
- WMCore/ACDC/CollectionTypes.py +12 -0
- WMCore/ACDC/CouchCollection.py +67 -0
- WMCore/ACDC/CouchFileset.py +238 -0
- WMCore/ACDC/CouchService.py +73 -0
- WMCore/ACDC/DataCollectionService.py +485 -0
- WMCore/ACDC/Fileset.py +94 -0
- WMCore/ACDC/__init__.py +11 -0
- WMCore/Algorithms/Alarm.py +39 -0
- WMCore/Algorithms/MathAlgos.py +274 -0
- WMCore/Algorithms/MiscAlgos.py +67 -0
- WMCore/Algorithms/ParseXMLFile.py +115 -0
- WMCore/Algorithms/Permissions.py +27 -0
- WMCore/Algorithms/Singleton.py +58 -0
- WMCore/Algorithms/SubprocessAlgos.py +129 -0
- WMCore/Algorithms/__init__.py +7 -0
- WMCore/Cache/GenericDataCache.py +98 -0
- WMCore/Cache/WMConfigCache.py +572 -0
- WMCore/Cache/__init__.py +0 -0
- WMCore/Configuration.py +659 -0
- WMCore/DAOFactory.py +47 -0
- WMCore/DataStructs/File.py +177 -0
- WMCore/DataStructs/Fileset.py +140 -0
- WMCore/DataStructs/Job.py +182 -0
- WMCore/DataStructs/JobGroup.py +142 -0
- WMCore/DataStructs/JobPackage.py +49 -0
- WMCore/DataStructs/LumiList.py +734 -0
- WMCore/DataStructs/Mask.py +219 -0
- WMCore/DataStructs/MathStructs/ContinuousSummaryHistogram.py +197 -0
- WMCore/DataStructs/MathStructs/DiscreteSummaryHistogram.py +92 -0
- WMCore/DataStructs/MathStructs/SummaryHistogram.py +117 -0
- WMCore/DataStructs/MathStructs/__init__.py +0 -0
- WMCore/DataStructs/Pickleable.py +24 -0
- WMCore/DataStructs/Run.py +256 -0
- WMCore/DataStructs/Subscription.py +175 -0
- WMCore/DataStructs/WMObject.py +47 -0
- WMCore/DataStructs/WorkUnit.py +112 -0
- WMCore/DataStructs/Workflow.py +60 -0
- WMCore/DataStructs/__init__.py +8 -0
- WMCore/Database/CMSCouch.py +1430 -0
- WMCore/Database/ConfigDBMap.py +29 -0
- WMCore/Database/CouchMonitoring.py +450 -0
- WMCore/Database/CouchUtils.py +118 -0
- WMCore/Database/DBCore.py +198 -0
- WMCore/Database/DBCreator.py +113 -0
- WMCore/Database/DBExceptionHandler.py +59 -0
- WMCore/Database/DBFactory.py +117 -0
- WMCore/Database/DBFormatter.py +177 -0
- WMCore/Database/Dialects.py +13 -0
- WMCore/Database/ExecuteDAO.py +327 -0
- WMCore/Database/MongoDB.py +241 -0
- WMCore/Database/MySQL/Destroy.py +42 -0
- WMCore/Database/MySQL/ListUserContent.py +20 -0
- WMCore/Database/MySQL/__init__.py +9 -0
- WMCore/Database/MySQLCore.py +132 -0
- WMCore/Database/Oracle/Destroy.py +56 -0
- WMCore/Database/Oracle/ListUserContent.py +19 -0
- WMCore/Database/Oracle/__init__.py +9 -0
- WMCore/Database/ResultSet.py +44 -0
- WMCore/Database/Transaction.py +91 -0
- WMCore/Database/__init__.py +9 -0
- WMCore/Database/ipy_profile_couch.py +438 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/CleanUpTask.py +29 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/HeartbeatMonitor.py +105 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/LocationUpdateTask.py +28 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/ReqMgrInteractionTask.py +35 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/__init__.py +0 -0
- WMCore/GlobalWorkQueue/__init__.py +0 -0
- WMCore/GroupUser/CouchObject.py +127 -0
- WMCore/GroupUser/Decorators.py +51 -0
- WMCore/GroupUser/Group.py +33 -0
- WMCore/GroupUser/Interface.py +73 -0
- WMCore/GroupUser/User.py +96 -0
- WMCore/GroupUser/__init__.py +11 -0
- WMCore/Lexicon.py +836 -0
- WMCore/REST/Auth.py +202 -0
- WMCore/REST/CherryPyPeriodicTask.py +166 -0
- WMCore/REST/Error.py +333 -0
- WMCore/REST/Format.py +642 -0
- WMCore/REST/HeartbeatMonitorBase.py +90 -0
- WMCore/REST/Main.py +636 -0
- WMCore/REST/Server.py +2435 -0
- WMCore/REST/Services.py +24 -0
- WMCore/REST/Test.py +120 -0
- WMCore/REST/Tools.py +38 -0
- WMCore/REST/Validation.py +250 -0
- WMCore/REST/__init__.py +1 -0
- WMCore/ReqMgr/DataStructs/RequestStatus.py +209 -0
- WMCore/ReqMgr/DataStructs/RequestType.py +13 -0
- WMCore/ReqMgr/DataStructs/__init__.py +0 -0
- WMCore/ReqMgr/__init__.py +1 -0
- WMCore/Services/AlertManager/AlertManagerAPI.py +111 -0
- WMCore/Services/AlertManager/__init__.py +0 -0
- WMCore/Services/CRIC/CRIC.py +238 -0
- WMCore/Services/CRIC/__init__.py +0 -0
- WMCore/Services/DBS/DBS3Reader.py +1044 -0
- WMCore/Services/DBS/DBSConcurrency.py +44 -0
- WMCore/Services/DBS/DBSErrors.py +112 -0
- WMCore/Services/DBS/DBSReader.py +23 -0
- WMCore/Services/DBS/DBSUtils.py +166 -0
- WMCore/Services/DBS/DBSWriterObjects.py +381 -0
- WMCore/Services/DBS/ProdException.py +133 -0
- WMCore/Services/DBS/__init__.py +8 -0
- WMCore/Services/FWJRDB/FWJRDBAPI.py +118 -0
- WMCore/Services/FWJRDB/__init__.py +0 -0
- WMCore/Services/HTTPS/HTTPSAuthHandler.py +66 -0
- WMCore/Services/HTTPS/__init__.py +0 -0
- WMCore/Services/LogDB/LogDB.py +201 -0
- WMCore/Services/LogDB/LogDBBackend.py +191 -0
- WMCore/Services/LogDB/LogDBExceptions.py +11 -0
- WMCore/Services/LogDB/LogDBReport.py +85 -0
- WMCore/Services/LogDB/__init__.py +0 -0
- WMCore/Services/MSPileup/__init__.py +0 -0
- WMCore/Services/MSUtils/MSUtils.py +54 -0
- WMCore/Services/MSUtils/__init__.py +0 -0
- WMCore/Services/McM/McM.py +173 -0
- WMCore/Services/McM/__init__.py +8 -0
- WMCore/Services/MonIT/Grafana.py +133 -0
- WMCore/Services/MonIT/__init__.py +0 -0
- WMCore/Services/PyCondor/PyCondorAPI.py +154 -0
- WMCore/Services/PyCondor/__init__.py +0 -0
- WMCore/Services/ReqMgr/ReqMgr.py +261 -0
- WMCore/Services/ReqMgr/__init__.py +0 -0
- WMCore/Services/ReqMgrAux/ReqMgrAux.py +419 -0
- WMCore/Services/ReqMgrAux/__init__.py +0 -0
- WMCore/Services/RequestDB/RequestDBReader.py +267 -0
- WMCore/Services/RequestDB/RequestDBWriter.py +39 -0
- WMCore/Services/RequestDB/__init__.py +0 -0
- WMCore/Services/Requests.py +624 -0
- WMCore/Services/Rucio/Rucio.py +1290 -0
- WMCore/Services/Rucio/RucioUtils.py +74 -0
- WMCore/Services/Rucio/__init__.py +0 -0
- WMCore/Services/RucioConMon/RucioConMon.py +121 -0
- WMCore/Services/RucioConMon/__init__.py +0 -0
- WMCore/Services/Service.py +400 -0
- WMCore/Services/StompAMQ/__init__.py +0 -0
- WMCore/Services/TagCollector/TagCollector.py +155 -0
- WMCore/Services/TagCollector/XMLUtils.py +98 -0
- WMCore/Services/TagCollector/__init__.py +0 -0
- WMCore/Services/UUIDLib.py +13 -0
- WMCore/Services/UserFileCache/UserFileCache.py +160 -0
- WMCore/Services/UserFileCache/__init__.py +8 -0
- WMCore/Services/WMAgent/WMAgent.py +63 -0
- WMCore/Services/WMAgent/__init__.py +0 -0
- WMCore/Services/WMArchive/CMSSWMetrics.py +526 -0
- WMCore/Services/WMArchive/DataMap.py +463 -0
- WMCore/Services/WMArchive/WMArchive.py +33 -0
- WMCore/Services/WMArchive/__init__.py +0 -0
- WMCore/Services/WMBS/WMBS.py +97 -0
- WMCore/Services/WMBS/__init__.py +0 -0
- WMCore/Services/WMStats/DataStruct/RequestInfoCollection.py +300 -0
- WMCore/Services/WMStats/DataStruct/__init__.py +0 -0
- WMCore/Services/WMStats/WMStatsPycurl.py +145 -0
- WMCore/Services/WMStats/WMStatsReader.py +445 -0
- WMCore/Services/WMStats/WMStatsWriter.py +273 -0
- WMCore/Services/WMStats/__init__.py +0 -0
- WMCore/Services/WMStatsServer/WMStatsServer.py +134 -0
- WMCore/Services/WMStatsServer/__init__.py +0 -0
- WMCore/Services/WorkQueue/WorkQueue.py +492 -0
- WMCore/Services/WorkQueue/__init__.py +0 -0
- WMCore/Services/__init__.py +8 -0
- WMCore/Services/pycurl_manager.py +574 -0
- WMCore/WMBase.py +50 -0
- WMCore/WMConnectionBase.py +164 -0
- WMCore/WMException.py +183 -0
- WMCore/WMExceptions.py +269 -0
- WMCore/WMFactory.py +76 -0
- WMCore/WMInit.py +377 -0
- WMCore/WMLogging.py +104 -0
- WMCore/WMSpec/ConfigSectionTree.py +442 -0
- WMCore/WMSpec/Persistency.py +135 -0
- WMCore/WMSpec/Steps/BuildMaster.py +87 -0
- WMCore/WMSpec/Steps/BuildTools.py +201 -0
- WMCore/WMSpec/Steps/Builder.py +97 -0
- WMCore/WMSpec/Steps/Diagnostic.py +89 -0
- WMCore/WMSpec/Steps/Emulator.py +62 -0
- WMCore/WMSpec/Steps/ExecuteMaster.py +208 -0
- WMCore/WMSpec/Steps/Executor.py +210 -0
- WMCore/WMSpec/Steps/StepFactory.py +213 -0
- WMCore/WMSpec/Steps/TaskEmulator.py +75 -0
- WMCore/WMSpec/Steps/Template.py +204 -0
- WMCore/WMSpec/Steps/Templates/AlcaHarvest.py +76 -0
- WMCore/WMSpec/Steps/Templates/CMSSW.py +613 -0
- WMCore/WMSpec/Steps/Templates/DQMUpload.py +59 -0
- WMCore/WMSpec/Steps/Templates/DeleteFiles.py +70 -0
- WMCore/WMSpec/Steps/Templates/LogArchive.py +84 -0
- WMCore/WMSpec/Steps/Templates/LogCollect.py +105 -0
- WMCore/WMSpec/Steps/Templates/StageOut.py +105 -0
- WMCore/WMSpec/Steps/Templates/__init__.py +10 -0
- WMCore/WMSpec/Steps/WMExecutionFailure.py +21 -0
- WMCore/WMSpec/Steps/__init__.py +8 -0
- WMCore/WMSpec/Utilities.py +63 -0
- WMCore/WMSpec/WMSpecErrors.py +12 -0
- WMCore/WMSpec/WMStep.py +347 -0
- WMCore/WMSpec/WMTask.py +1997 -0
- WMCore/WMSpec/WMWorkload.py +2288 -0
- WMCore/WMSpec/WMWorkloadTools.py +382 -0
- WMCore/WMSpec/__init__.py +9 -0
- WMCore/WorkQueue/DataLocationMapper.py +273 -0
- WMCore/WorkQueue/DataStructs/ACDCBlock.py +47 -0
- WMCore/WorkQueue/DataStructs/Block.py +48 -0
- WMCore/WorkQueue/DataStructs/CouchWorkQueueElement.py +148 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElement.py +274 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElementResult.py +152 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElementsSummary.py +185 -0
- WMCore/WorkQueue/DataStructs/__init__.py +0 -0
- WMCore/WorkQueue/Policy/End/EndPolicyInterface.py +44 -0
- WMCore/WorkQueue/Policy/End/SingleShot.py +22 -0
- WMCore/WorkQueue/Policy/End/__init__.py +32 -0
- WMCore/WorkQueue/Policy/PolicyInterface.py +17 -0
- WMCore/WorkQueue/Policy/Start/Block.py +258 -0
- WMCore/WorkQueue/Policy/Start/Dataset.py +180 -0
- WMCore/WorkQueue/Policy/Start/MonteCarlo.py +131 -0
- WMCore/WorkQueue/Policy/Start/ResubmitBlock.py +171 -0
- WMCore/WorkQueue/Policy/Start/StartPolicyInterface.py +316 -0
- WMCore/WorkQueue/Policy/Start/__init__.py +34 -0
- WMCore/WorkQueue/Policy/__init__.py +57 -0
- WMCore/WorkQueue/WMBSHelper.py +772 -0
- WMCore/WorkQueue/WorkQueue.py +1237 -0
- WMCore/WorkQueue/WorkQueueBackend.py +750 -0
- WMCore/WorkQueue/WorkQueueBase.py +39 -0
- WMCore/WorkQueue/WorkQueueExceptions.py +44 -0
- WMCore/WorkQueue/WorkQueueReqMgrInterface.py +278 -0
- WMCore/WorkQueue/WorkQueueUtils.py +130 -0
- WMCore/WorkQueue/__init__.py +13 -0
- WMCore/Wrappers/JsonWrapper/JSONThunker.py +342 -0
- WMCore/Wrappers/JsonWrapper/__init__.py +7 -0
- WMCore/Wrappers/__init__.py +6 -0
- WMCore/__init__.py +10 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-dist-patch +15 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-dist-unpatch +8 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-httpd +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/.couchapprc +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/README.md +40 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/index.html +264 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/ElementInfoByWorkflow.js +96 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/StuckElementInfo.js +57 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/WorkloadInfoTable.js +80 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/dataTable.js +70 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/namespace.js +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/style/main.css +75 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/couchapp.json +4 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/childQueueFilter.js +13 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/filterDeletedDocs.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/queueFilter.js +11 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/language +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/mustache.js +333 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/validate.js +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/workqueue_utils.js +61 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/elementsDetail.js +28 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/filter.js +86 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/stuckElements.js +38 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/workRestrictions.js +153 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/workflowSummary.js +28 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/rewrites.json +73 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/shows/redirect.js +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/shows/status.js +40 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/ElementSummaryByWorkflow.html +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/StuckElementSummary.html +26 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/TaskStatus.html +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/WorkflowSummary.html +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/workqueue-common-lib.html +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib-remote.html +16 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib.html +18 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/updates/in-place.js +50 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/validate_doc_update.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.couch.app.js +235 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.pathbinder.js +173 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeParentData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeParentData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activePileupData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activePileupData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/analyticsData/map.js +11 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/analyticsData/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/availableByPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/conflicts/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elements/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByParent/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByParentData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByPileupData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByStatus/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsBySubscription/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/reduce.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsDetailByWorkflowAndStatus/map.js +26 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/map.js +10 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatus/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatus/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/openRequests/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/recent-items/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/specsByWorkflow/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/stuckElements/map.js +38 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/map.js +12 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/reduce.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrl/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrl/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/workflowSummary/map.js +9 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/workflowSummary/reduce.js +10 -0
- wmglobalqueue-2.4.5.1.dist-info/METADATA +26 -0
- wmglobalqueue-2.4.5.1.dist-info/RECORD +347 -0
- wmglobalqueue-2.4.5.1.dist-info/WHEEL +5 -0
- wmglobalqueue-2.4.5.1.dist-info/licenses/LICENSE +202 -0
- wmglobalqueue-2.4.5.1.dist-info/licenses/NOTICE +16 -0
- wmglobalqueue-2.4.5.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1237 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
|
|
3
|
+
"""
|
|
4
|
+
WorkQueue provides functionality to queue large chunks of work,
|
|
5
|
+
thus acting as a buffer for the next steps in job processing
|
|
6
|
+
|
|
7
|
+
WMSpec objects are fed into the queue, split into coarse grained work units
|
|
8
|
+
and released when a suitable resource is found to execute them.
|
|
9
|
+
|
|
10
|
+
https://twiki.cern.ch/twiki/bin/view/CMS/WMCoreJobPool
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import division, print_function
|
|
14
|
+
|
|
15
|
+
from builtins import str as newstr, bytes
|
|
16
|
+
from future.utils import viewitems, listvalues
|
|
17
|
+
|
|
18
|
+
import os
|
|
19
|
+
import threading
|
|
20
|
+
import time
|
|
21
|
+
from collections import defaultdict
|
|
22
|
+
|
|
23
|
+
from WMCore import Lexicon
|
|
24
|
+
from WMCore.ACDC.DataCollectionService import DataCollectionService
|
|
25
|
+
from WMCore.Database.CMSCouch import CouchInternalServerError, CouchNotFoundError
|
|
26
|
+
from WMCore.Services.CRIC.CRIC import CRIC
|
|
27
|
+
from WMCore.Services.DBS.DBSReader import DBSReader
|
|
28
|
+
from WMCore.Services.LogDB.LogDB import LogDB
|
|
29
|
+
from WMCore.Services.ReqMgr.ReqMgr import ReqMgr
|
|
30
|
+
from WMCore.Services.RequestDB.RequestDBReader import RequestDBReader
|
|
31
|
+
from WMCore.Services.Rucio.Rucio import Rucio
|
|
32
|
+
from WMCore.Services.WorkQueue.WorkQueue import WorkQueue as WorkQueueDS
|
|
33
|
+
from WMCore.WMSpec.WMWorkload import WMWorkloadHelper, getWorkloadFromTask
|
|
34
|
+
from WMCore.WorkQueue.DataLocationMapper import WorkQueueDataLocationMapper
|
|
35
|
+
from WMCore.WorkQueue.DataStructs.ACDCBlock import ACDCBlock
|
|
36
|
+
from WMCore.WorkQueue.DataStructs.WorkQueueElement import possibleSites
|
|
37
|
+
from WMCore.WorkQueue.DataStructs.WorkQueueElementsSummary import getGlobalSiteStatusSummary
|
|
38
|
+
from WMCore.WorkQueue.Policy.End import endPolicy
|
|
39
|
+
from WMCore.WorkQueue.Policy.Start import startPolicy
|
|
40
|
+
from WMCore.WorkQueue.WorkQueueBackend import WorkQueueBackend
|
|
41
|
+
from WMCore.WorkQueue.WorkQueueBase import WorkQueueBase
|
|
42
|
+
from WMCore.WorkQueue.WorkQueueExceptions import (TERMINAL_EXCEPTIONS, WorkQueueError, WorkQueueNoMatchingElements,
|
|
43
|
+
WorkQueueWMSpecError)
|
|
44
|
+
from WMCore.WorkQueue.WorkQueueUtils import cmsSiteNames
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
# Convenience constructor functions
|
|
48
|
+
|
|
49
|
+
def globalQueue(logger=None, dbi=None, **kwargs):
|
|
50
|
+
"""Convenience method to create a WorkQueue suitable for use globally
|
|
51
|
+
"""
|
|
52
|
+
defaults = {'PopulateFilesets': False,
|
|
53
|
+
'LocalQueueFlag': False,
|
|
54
|
+
'TrackLocationOrSubscription': 'location'
|
|
55
|
+
}
|
|
56
|
+
defaults.update(kwargs)
|
|
57
|
+
return WorkQueue(logger, dbi, **defaults)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def localQueue(logger=None, dbi=None, **kwargs):
|
|
61
|
+
"""Convenience method to create a WorkQueue suitable for use locally
|
|
62
|
+
"""
|
|
63
|
+
defaults = {'TrackLocationOrSubscription': 'location'}
|
|
64
|
+
defaults.update(kwargs)
|
|
65
|
+
return WorkQueue(logger, dbi, **defaults)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class WorkQueue(WorkQueueBase):
|
|
69
|
+
"""
|
|
70
|
+
_WorkQueue_
|
|
71
|
+
|
|
72
|
+
WorkQueue object - interface to WorkQueue functionality.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(self, logger=None, dbi=None, **params):
|
|
76
|
+
|
|
77
|
+
WorkQueueBase.__init__(self, logger, dbi)
|
|
78
|
+
self.parent_queue = None
|
|
79
|
+
self.params = params
|
|
80
|
+
|
|
81
|
+
# config argument (within params) shall be reference to
|
|
82
|
+
# Configuration instance
|
|
83
|
+
self.config = params.get("Config", None)
|
|
84
|
+
self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
|
|
85
|
+
if not self.params.get('CouchUrl'):
|
|
86
|
+
raise RuntimeError('CouchUrl config value mandatory')
|
|
87
|
+
self.params.setdefault('DbName', 'workqueue')
|
|
88
|
+
self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
|
|
89
|
+
self.params.setdefault('ParentQueueCouchUrl', None) # We get work from here
|
|
90
|
+
|
|
91
|
+
self.backend = WorkQueueBackend(self.params['CouchUrl'], self.params['DbName'],
|
|
92
|
+
self.params['InboxDbName'],
|
|
93
|
+
self.params['ParentQueueCouchUrl'], self.params.get('QueueURL'),
|
|
94
|
+
logger=self.logger)
|
|
95
|
+
self.workqueueDS = WorkQueueDS(self.params['CouchUrl'], self.params['DbName'],
|
|
96
|
+
self.params['InboxDbName'])
|
|
97
|
+
if self.params.get('ParentQueueCouchUrl'):
|
|
98
|
+
try:
|
|
99
|
+
if self.params.get('ParentQueueInboxCouchDBName'):
|
|
100
|
+
self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
|
|
101
|
+
self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1],
|
|
102
|
+
self.params['ParentQueueInboxCouchDBName'])
|
|
103
|
+
else:
|
|
104
|
+
self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
|
|
105
|
+
self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])
|
|
106
|
+
except IndexError as ex:
|
|
107
|
+
# Probable cause: Someone didn't put the global WorkQueue name in
|
|
108
|
+
# the ParentCouchUrl
|
|
109
|
+
msg = "Parsing failure for ParentQueueCouchUrl - probably missing dbname in input\n"
|
|
110
|
+
msg += "Exception: %s\n" % str(ex)
|
|
111
|
+
msg += str("ParentQueueCouchUrl: %s\n" % self.params['ParentQueueCouchUrl'])
|
|
112
|
+
self.logger.error(msg)
|
|
113
|
+
raise WorkQueueError(msg)
|
|
114
|
+
self.params['ParentQueueCouchUrl'] = self.parent_queue.queueUrl
|
|
115
|
+
|
|
116
|
+
# save each DBSReader instance in the class object, such that
|
|
117
|
+
# the same object is not shared amongst multiple threads
|
|
118
|
+
self.dbses = {}
|
|
119
|
+
|
|
120
|
+
self.params.setdefault('QueueDepth', 1) # when less than this locally
|
|
121
|
+
self.params.setdefault('WorkPerCycle', 100)
|
|
122
|
+
self.params.setdefault('RowsPerSlice', 2500)
|
|
123
|
+
self.params.setdefault('MaxRowsPerCycle', 50000)
|
|
124
|
+
self.params.setdefault('LocationRefreshInterval', 600)
|
|
125
|
+
self.params.setdefault('FullLocationRefreshInterval', 7200)
|
|
126
|
+
self.params.setdefault('TrackLocationOrSubscription', 'location')
|
|
127
|
+
self.params.setdefault('ReleaseIncompleteBlocks', False)
|
|
128
|
+
self.params.setdefault('ReleaseRequireSubscribed', True)
|
|
129
|
+
self.params.setdefault('PopulateFilesets', True)
|
|
130
|
+
self.params.setdefault('LocalQueueFlag', True)
|
|
131
|
+
self.params.setdefault('QueueRetryTime', 86400)
|
|
132
|
+
self.params.setdefault('stuckElementAlertTime', 172800)
|
|
133
|
+
self.params.setdefault('reqmgrCompleteGraceTime', 604800)
|
|
134
|
+
self.params.setdefault('cancelGraceTime', 86400)
|
|
135
|
+
|
|
136
|
+
self.params.setdefault('JobDumpConfig', None)
|
|
137
|
+
self.params.setdefault('BossAirConfig', None)
|
|
138
|
+
|
|
139
|
+
self.params['QueueURL'] = self.backend.queueUrl # url this queue is visible on
|
|
140
|
+
# backend took previous QueueURL and sanitized it
|
|
141
|
+
self.params.setdefault('WMBSUrl', None) # this will only be set on local Queue
|
|
142
|
+
if self.params.get('WMBSUrl'):
|
|
143
|
+
self.params['WMBSUrl'] = Lexicon.sanitizeURL(self.params['WMBSUrl'])['url']
|
|
144
|
+
self.params.setdefault('Team', "")
|
|
145
|
+
|
|
146
|
+
if self.params.get('CacheDir'):
|
|
147
|
+
try:
|
|
148
|
+
os.makedirs(self.params['CacheDir'])
|
|
149
|
+
except OSError:
|
|
150
|
+
pass
|
|
151
|
+
elif self.params.get('PopulateFilesets'):
|
|
152
|
+
raise RuntimeError('CacheDir mandatory for local queue')
|
|
153
|
+
|
|
154
|
+
if self.params.get('CRIC'):
|
|
155
|
+
self.cric = self.params['CRIC']
|
|
156
|
+
else:
|
|
157
|
+
self.cric = CRIC()
|
|
158
|
+
|
|
159
|
+
self.params.setdefault('SplittingMapping', {})
|
|
160
|
+
self.params['SplittingMapping'].setdefault('DatasetBlock',
|
|
161
|
+
{'name': 'Block',
|
|
162
|
+
'args': {}}
|
|
163
|
+
)
|
|
164
|
+
self.params['SplittingMapping'].setdefault('MonteCarlo',
|
|
165
|
+
{'name': 'MonteCarlo',
|
|
166
|
+
'args': {}}
|
|
167
|
+
)
|
|
168
|
+
self.params['SplittingMapping'].setdefault('Dataset',
|
|
169
|
+
{'name': 'Dataset',
|
|
170
|
+
'args': {}}
|
|
171
|
+
)
|
|
172
|
+
self.params['SplittingMapping'].setdefault('Block',
|
|
173
|
+
{'name': 'Block',
|
|
174
|
+
'args': {}}
|
|
175
|
+
)
|
|
176
|
+
self.params['SplittingMapping'].setdefault('ResubmitBlock',
|
|
177
|
+
{'name': 'ResubmitBlock',
|
|
178
|
+
'args': {}}
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
self.params.setdefault('EndPolicySettings', {})
|
|
182
|
+
|
|
183
|
+
assert (self.params['TrackLocationOrSubscription'] in ('subscription',
|
|
184
|
+
'location'))
|
|
185
|
+
# Can only release blocks on location
|
|
186
|
+
if self.params['TrackLocationOrSubscription'] == 'location':
|
|
187
|
+
if self.params['SplittingMapping']['DatasetBlock']['name'] != 'Block':
|
|
188
|
+
raise RuntimeError('Only blocks can be released on location')
|
|
189
|
+
|
|
190
|
+
self.params.setdefault('rucioAccount', "wmcore_transferor")
|
|
191
|
+
|
|
192
|
+
self.rucio = Rucio(self.params['rucioAccount'],
|
|
193
|
+
self.params['rucioUrl'], self.params['rucioAuthUrl'],
|
|
194
|
+
configDict=dict(logger=self.logger))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
self.dataLocationMapper = WorkQueueDataLocationMapper(self.logger, self.backend,
|
|
198
|
+
rucio=self.rucio,
|
|
199
|
+
cric=self.cric,
|
|
200
|
+
locationFrom=self.params['TrackLocationOrSubscription'],
|
|
201
|
+
incompleteBlocks=self.params['ReleaseIncompleteBlocks'],
|
|
202
|
+
requireBlocksSubscribed=not self.params[
|
|
203
|
+
'ReleaseIncompleteBlocks'],
|
|
204
|
+
fullRefreshInterval=self.params[
|
|
205
|
+
'FullLocationRefreshInterval'],
|
|
206
|
+
updateIntervalCoarseness=self.params[
|
|
207
|
+
'LocationRefreshInterval'])
|
|
208
|
+
|
|
209
|
+
# used for only global WQ
|
|
210
|
+
if self.params.get('ReqMgrServiceURL'):
|
|
211
|
+
self.reqmgrSvc = ReqMgr(self.params['ReqMgrServiceURL'])
|
|
212
|
+
|
|
213
|
+
if self.params.get('RequestDBURL'):
|
|
214
|
+
# This is need for getting post call
|
|
215
|
+
# TODO: Change ReqMgr api to accept post for for retrieving the data and remove this
|
|
216
|
+
self.requestDB = RequestDBReader(self.params['RequestDBURL'])
|
|
217
|
+
|
|
218
|
+
# set the thread name before create the log db.
|
|
219
|
+
# only sets that when it is not set already
|
|
220
|
+
# setLogDB
|
|
221
|
+
|
|
222
|
+
myThread = threading.currentThread()
|
|
223
|
+
if myThread.getName() == "MainThread": # this should be only GQ case other cases thread name should be set
|
|
224
|
+
myThread.setName(self.__class__.__name__)
|
|
225
|
+
|
|
226
|
+
centralurl = self.params.get("central_logdb_url")
|
|
227
|
+
identifier = self.params.get("log_reporter")
|
|
228
|
+
self.logdb = LogDB(centralurl, identifier, logger=self.logger)
|
|
229
|
+
|
|
230
|
+
self.logger.debug("WorkQueue created successfully")
|
|
231
|
+
|
|
232
|
+
def __len__(self):
|
|
233
|
+
"""Returns number of Available elements in queue"""
|
|
234
|
+
return self.backend.queueLength()
|
|
235
|
+
|
|
236
|
+
def setStatus(self, status, elementIDs=None, SubscriptionId=None, WorkflowName=None):
|
|
237
|
+
"""
|
|
238
|
+
_setStatus_, throws an exception if no elements are updated
|
|
239
|
+
|
|
240
|
+
"""
|
|
241
|
+
try:
|
|
242
|
+
if not elementIDs:
|
|
243
|
+
elementIDs = []
|
|
244
|
+
iter(elementIDs)
|
|
245
|
+
if isinstance(elementIDs, (newstr, bytes)):
|
|
246
|
+
raise TypeError
|
|
247
|
+
except TypeError:
|
|
248
|
+
elementIDs = [elementIDs]
|
|
249
|
+
|
|
250
|
+
if status == 'Canceled': # Cancel needs special actions
|
|
251
|
+
return self.cancelWork(elementIDs, SubscriptionId, WorkflowName)
|
|
252
|
+
|
|
253
|
+
args = {}
|
|
254
|
+
if SubscriptionId:
|
|
255
|
+
args['SubscriptionId'] = SubscriptionId
|
|
256
|
+
if WorkflowName:
|
|
257
|
+
args['RequestName'] = WorkflowName
|
|
258
|
+
|
|
259
|
+
affected = self.backend.getElements(elementIDs=elementIDs, **args)
|
|
260
|
+
if not affected:
|
|
261
|
+
raise WorkQueueNoMatchingElements("No matching elements")
|
|
262
|
+
|
|
263
|
+
for x in affected:
|
|
264
|
+
x['Status'] = status
|
|
265
|
+
elements = self.backend.saveElements(*affected)
|
|
266
|
+
if len(affected) != len(elements):
|
|
267
|
+
raise RuntimeError("Some elements not updated, see log for details")
|
|
268
|
+
|
|
269
|
+
return elements
|
|
270
|
+
|
|
271
|
+
def setPriority(self, newpriority, *workflowNames):
|
|
272
|
+
"""
|
|
273
|
+
Update priority for a workflow, throw exception if no elements affected
|
|
274
|
+
"""
|
|
275
|
+
self.logger.info("Priority change request to %s for %s", newpriority, str(workflowNames))
|
|
276
|
+
affected = []
|
|
277
|
+
for wf in workflowNames:
|
|
278
|
+
affected.extend(self.backend.getElements(returnIdOnly=True, RequestName=wf))
|
|
279
|
+
|
|
280
|
+
self.backend.updateElements(*affected, Priority=newpriority)
|
|
281
|
+
|
|
282
|
+
if not affected:
|
|
283
|
+
raise RuntimeError("Priority not changed: No matching elements")
|
|
284
|
+
|
|
285
|
+
def resetWork(self, ids):
|
|
286
|
+
"""Put work back in Available state, from here either another queue
|
|
287
|
+
or wmbs can pick it up.
|
|
288
|
+
|
|
289
|
+
If work was Acquired by a child queue, the next status update will
|
|
290
|
+
cancel the work in the child.
|
|
291
|
+
|
|
292
|
+
Note: That the same child queue is free to pick the work up again,
|
|
293
|
+
there is no permanent blacklist of queues.
|
|
294
|
+
"""
|
|
295
|
+
self.logger.info("Resetting elements %s", str(ids))
|
|
296
|
+
try:
|
|
297
|
+
iter(ids)
|
|
298
|
+
except TypeError:
|
|
299
|
+
ids = [ids]
|
|
300
|
+
|
|
301
|
+
return self.backend.updateElements(*ids, Status='Available',
|
|
302
|
+
ChildQueueUrl=None, WMBSUrl=None)
|
|
303
|
+
|
|
304
|
+
def getWork(self, jobSlots, siteJobCounts, excludeWorkflows=None):
|
|
305
|
+
"""
|
|
306
|
+
Get available work from the queue, inject into wmbs & mark as running
|
|
307
|
+
|
|
308
|
+
jobSlots is dict format of {site: estimateJobSlot}
|
|
309
|
+
of the resources to get work for.
|
|
310
|
+
|
|
311
|
+
siteJobCounts is a dict format of {site: {prio: jobs}}
|
|
312
|
+
"""
|
|
313
|
+
excludeWorkflows = excludeWorkflows or []
|
|
314
|
+
results = []
|
|
315
|
+
if not self.backend.isAvailable():
|
|
316
|
+
self.logger.warning('Backend busy or down: skipping fetching of work')
|
|
317
|
+
return results
|
|
318
|
+
|
|
319
|
+
# TODO AMR: perhaps numElems limit should be removed for LQ -> WMBS acquisition
|
|
320
|
+
matches, _ = self.backend.availableWork(jobSlots, siteJobCounts,
|
|
321
|
+
excludeWorkflows=excludeWorkflows,
|
|
322
|
+
numElems=self.params['WorkPerCycle'],
|
|
323
|
+
rowsPerSlice=self.params['RowsPerSlice'],
|
|
324
|
+
maxRows=self.params['MaxRowsPerCycle'])
|
|
325
|
+
|
|
326
|
+
self.logger.info('Got %i elements matching the constraints', len(matches))
|
|
327
|
+
if not matches:
|
|
328
|
+
return results
|
|
329
|
+
|
|
330
|
+
myThread = threading.currentThread()
|
|
331
|
+
# cache wmspecs for lifetime of function call, likely we will have multiple elements for same spec.
|
|
332
|
+
# TODO: Check to see if we can skip spec loading - need to persist some more details to element
|
|
333
|
+
wmspecCache = {}
|
|
334
|
+
for match in matches:
|
|
335
|
+
blockName, dbsBlock = None, None
|
|
336
|
+
if self.params['PopulateFilesets']:
|
|
337
|
+
if match['RequestName'] not in wmspecCache:
|
|
338
|
+
wmspec = self.backend.getWMSpec(match['RequestName'])
|
|
339
|
+
wmspecCache[match['RequestName']] = wmspec
|
|
340
|
+
else:
|
|
341
|
+
wmspec = wmspecCache[match['RequestName']]
|
|
342
|
+
|
|
343
|
+
try:
|
|
344
|
+
if match['StartPolicy'] == 'Dataset':
|
|
345
|
+
# actually returns dataset name and dataset info
|
|
346
|
+
blockName, dbsBlock = self._getDBSDataset(match)
|
|
347
|
+
elif match['Inputs']:
|
|
348
|
+
blockName, dbsBlock = self._getDBSBlock(match, wmspec)
|
|
349
|
+
except Exception as ex:
|
|
350
|
+
msg = "%s, %s: \n" % (wmspec.name(), list(match['Inputs']))
|
|
351
|
+
msg += "failed to retrieve data from DBS/Rucio in LQ: \n%s" % str(ex)
|
|
352
|
+
self.logger.exception(msg)
|
|
353
|
+
self.logdb.post(wmspec.name(), msg, 'error')
|
|
354
|
+
continue
|
|
355
|
+
|
|
356
|
+
try:
|
|
357
|
+
match['Subscription'] = self._wmbsPreparation(match,
|
|
358
|
+
wmspec,
|
|
359
|
+
blockName,
|
|
360
|
+
dbsBlock)
|
|
361
|
+
self.logdb.delete(wmspec.name(), "error", this_thread=True)
|
|
362
|
+
except Exception as ex:
|
|
363
|
+
if getattr(myThread, 'transaction', None) is not None:
|
|
364
|
+
myThread.transaction.rollback()
|
|
365
|
+
msg = "Failed to create subscription for %s with block name %s" % (wmspec.name(), blockName)
|
|
366
|
+
msg += "\nError: %s" % str(ex)
|
|
367
|
+
self.logger.exception(msg)
|
|
368
|
+
self.logdb.post(wmspec.name(), msg, 'error')
|
|
369
|
+
continue
|
|
370
|
+
|
|
371
|
+
results.append(match)
|
|
372
|
+
|
|
373
|
+
del wmspecCache # remove cache explicitly
|
|
374
|
+
self.logger.info('Injected %s out of %s units into WMBS', len(results), len(matches))
|
|
375
|
+
return results
|
|
376
|
+
|
|
377
|
+
def _getDbs(self, dbsUrl):
|
|
378
|
+
"""
|
|
379
|
+
If we have already construct a DBSReader object pointing to
|
|
380
|
+
the DBS URL provided, return it. Otherwise, create and return
|
|
381
|
+
a new instance.
|
|
382
|
+
:param dbsUrl: string with the DBS url
|
|
383
|
+
:return: an instance of DBSReader
|
|
384
|
+
"""
|
|
385
|
+
if dbsUrl in self.dbses:
|
|
386
|
+
return self.dbses[dbsUrl]
|
|
387
|
+
return DBSReader(dbsUrl)
|
|
388
|
+
|
|
389
|
+
def _getDBSDataset(self, match):
|
|
390
|
+
"""
|
|
391
|
+
Given a workqueue element with Dataset start policy, find all blocks
|
|
392
|
+
with valid files and resolve their location in Rucio.
|
|
393
|
+
:param match: workqueue element dictionary
|
|
394
|
+
:return: a tuple of the dataset name and its files and RSEs
|
|
395
|
+
"""
|
|
396
|
+
dbsDatasetDict = {'Files': [], 'PhEDExNodeNames': []}
|
|
397
|
+
dbs = self._getDbs(match['Dbs'])
|
|
398
|
+
datasetName = list(match['Inputs'])[0]
|
|
399
|
+
|
|
400
|
+
blocks = dbs.listFileBlocks(datasetName)
|
|
401
|
+
for blockName in blocks:
|
|
402
|
+
blockSummary = dbs.getFileBlock(blockName)
|
|
403
|
+
if not blockSummary['Files']:
|
|
404
|
+
self.logger.warning("Block name %s has no valid files. Skipping it.", blockName)
|
|
405
|
+
continue
|
|
406
|
+
blockSummary['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
|
|
407
|
+
account=self.params['rucioAccount'])
|
|
408
|
+
dbsDatasetDict['Files'].extend(blockSummary['Files'])
|
|
409
|
+
dbsDatasetDict['PhEDExNodeNames'].extend(blockSummary['PhEDExNodeNames'])
|
|
410
|
+
|
|
411
|
+
dbsDatasetDict['PhEDExNodeNames'] = list(set(dbsDatasetDict['PhEDExNodeNames']))
|
|
412
|
+
|
|
413
|
+
return datasetName, dbsDatasetDict
|
|
414
|
+
|
|
415
|
+
def _getDBSBlock(self, match, wmspec):
|
|
416
|
+
"""Get DBS info for this block"""
|
|
417
|
+
blockName = list(match['Inputs'])[0] # TODO: Allow more than one
|
|
418
|
+
|
|
419
|
+
if match['ACDC']:
|
|
420
|
+
acdcInfo = match['ACDC']
|
|
421
|
+
acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
|
|
422
|
+
splitedBlockName = ACDCBlock.splitBlockName(blockName)
|
|
423
|
+
fileLists = acdc.getChunkFiles(acdcInfo['collection'],
|
|
424
|
+
acdcInfo['fileset'],
|
|
425
|
+
splitedBlockName['Offset'],
|
|
426
|
+
splitedBlockName['NumOfFiles'])
|
|
427
|
+
|
|
428
|
+
block = {}
|
|
429
|
+
block["Files"] = fileLists
|
|
430
|
+
return blockName, block
|
|
431
|
+
else:
|
|
432
|
+
dbs = self._getDbs(match['Dbs'])
|
|
433
|
+
if wmspec.getTask(match['TaskName']).parentProcessingFlag():
|
|
434
|
+
dbsBlockDict = dbs.getFileBlockWithParents(blockName)
|
|
435
|
+
dbsBlockDict['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
|
|
436
|
+
account=self.params['rucioAccount'])
|
|
437
|
+
elif wmspec.getRequestType() == 'StoreResults':
|
|
438
|
+
dbsBlockDict = dbs.getFileBlock(blockName)
|
|
439
|
+
dbsBlockDict['PhEDExNodeNames'] = dbs.listFileBlockLocation(blockName)
|
|
440
|
+
else:
|
|
441
|
+
dbsBlockDict = dbs.getFileBlock(blockName)
|
|
442
|
+
dbsBlockDict['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
|
|
443
|
+
account=self.params['rucioAccount'])
|
|
444
|
+
|
|
445
|
+
return blockName, dbsBlockDict
|
|
446
|
+
|
|
447
|
+
def _wmbsPreparation(self, match, wmspec, blockName, dbsBlock):
|
|
448
|
+
"""Inject data into wmbs and create subscription. """
|
|
449
|
+
from WMCore.WorkQueue.WMBSHelper import WMBSHelper
|
|
450
|
+
# the parent element (from local couch) can be fetch via:
|
|
451
|
+
# curl -ks -X GET 'http://localhost:5984/workqueue/<ParentQueueId>'
|
|
452
|
+
|
|
453
|
+
# Keep in mind that WQE contains sites, wmbs location contains pnns
|
|
454
|
+
commonSites = possibleSites(match)
|
|
455
|
+
commonLocation = self.cric.PSNstoPNNs(commonSites, allowPNNLess=True)
|
|
456
|
+
msg = "Running WMBS preparation for %s with ParentQueueId %s,\n with common location %s"
|
|
457
|
+
self.logger.info(msg, match['RequestName'], match['ParentQueueId'], commonLocation)
|
|
458
|
+
|
|
459
|
+
mask = match['Mask']
|
|
460
|
+
wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask,
|
|
461
|
+
self.params['CacheDir'], commonLocation)
|
|
462
|
+
|
|
463
|
+
sub, match['NumOfFilesAdded'] = wmbsHelper.createSubscriptionAndAddFiles(block=dbsBlock)
|
|
464
|
+
self.logger.info("Created top level subscription %s for %s with %s files",
|
|
465
|
+
sub['id'], match['RequestName'], match['NumOfFilesAdded'])
|
|
466
|
+
|
|
467
|
+
# update couch with wmbs subscription info
|
|
468
|
+
match['SubscriptionId'] = sub['id']
|
|
469
|
+
match['Status'] = 'Running'
|
|
470
|
+
# do update rather than save to avoid conflicts from other thread writes
|
|
471
|
+
self.backend.updateElements(match.id, Status='Running', SubscriptionId=sub['id'],
|
|
472
|
+
NumOfFilesAdded=match['NumOfFilesAdded'])
|
|
473
|
+
self.logger.info("LQE %s set to 'Running' for request %s", match.id, match['RequestName'])
|
|
474
|
+
|
|
475
|
+
return sub
|
|
476
|
+
|
|
477
|
+
def _assignToChildQueue(self, queue, *elements):
|
|
478
|
+
"""Assign work from parent to queue"""
|
|
479
|
+
workByRequest = {}
|
|
480
|
+
for ele in elements:
|
|
481
|
+
ele['Status'] = 'Negotiating'
|
|
482
|
+
ele['ChildQueueUrl'] = queue
|
|
483
|
+
ele['ParentQueueUrl'] = self.params['ParentQueueCouchUrl']
|
|
484
|
+
ele['WMBSUrl'] = self.params["WMBSUrl"]
|
|
485
|
+
workByRequest.setdefault(ele['RequestName'], 0)
|
|
486
|
+
workByRequest[ele['RequestName']] += 1
|
|
487
|
+
self.logger.info("Setting GQE status to 'Negotiating' and assigning to this child queue for:")
|
|
488
|
+
for reqName, numElem in workByRequest.items():
|
|
489
|
+
self.logger.info(" %d elements for: %s", numElem, reqName)
|
|
490
|
+
|
|
491
|
+
work = self.parent_queue.saveElements(*elements)
|
|
492
|
+
self.logger.info("GQE successfully saved for:")
|
|
493
|
+
for ele in work:
|
|
494
|
+
self.logger.info(" %s under GQE id: %s", ele['RequestName'], ele.id)
|
|
495
|
+
return work
|
|
496
|
+
|
|
497
|
+
def doneWork(self, elementIDs=None, SubscriptionId=None, WorkflowName=None):
|
|
498
|
+
"""Mark work as done
|
|
499
|
+
"""
|
|
500
|
+
return self.setStatus('Done', elementIDs=elementIDs,
|
|
501
|
+
SubscriptionId=SubscriptionId,
|
|
502
|
+
WorkflowName=WorkflowName)
|
|
503
|
+
|
|
504
|
+
def killWMBSWorkflows(self, reqNames):
|
|
505
|
+
"""
|
|
506
|
+
Kill/cancel workflows in WMBS and CouchDB.
|
|
507
|
+
Also update job state transition in three data sources: local couch,
|
|
508
|
+
local WMBS and dashboard.
|
|
509
|
+
:param reqNames: list of request names
|
|
510
|
+
:return: a list of workflows that failed to be cancelled
|
|
511
|
+
"""
|
|
512
|
+
failedWfs = []
|
|
513
|
+
if not reqNames:
|
|
514
|
+
return failedWfs
|
|
515
|
+
|
|
516
|
+
# import inside function since GQ doesn't need this.
|
|
517
|
+
from WMCore.WorkQueue.WMBSHelper import killWorkflow
|
|
518
|
+
myThread = threading.currentThread()
|
|
519
|
+
myThread.dbi = self.conn.dbi
|
|
520
|
+
myThread.logger = self.logger
|
|
521
|
+
|
|
522
|
+
for workflow in reqNames:
|
|
523
|
+
try:
|
|
524
|
+
self.logger.info("Killing workflow in WMBS: %s", workflow)
|
|
525
|
+
killWorkflow(workflow, self.params["JobDumpConfig"], self.params["BossAirConfig"])
|
|
526
|
+
except Exception as ex:
|
|
527
|
+
failedWfs.append(workflow)
|
|
528
|
+
msg = "Failed to kill workflow '%s' in WMBS. Error: %s" % (workflow, str(ex))
|
|
529
|
+
msg += "\nIt will be retried in the next loop"
|
|
530
|
+
self.logger.error(msg)
|
|
531
|
+
return failedWfs
|
|
532
|
+
|
|
533
|
+
def cancelWork(self, elementIDs=None, SubscriptionId=None, WorkflowName=None, elements=None):
|
|
534
|
+
"""Cancel work - delete in wmbs, delete from workqueue db, set canceled in inbox
|
|
535
|
+
Elements may be directly provided or determined from series of filter arguments
|
|
536
|
+
"""
|
|
537
|
+
if not elements:
|
|
538
|
+
args = {}
|
|
539
|
+
if SubscriptionId:
|
|
540
|
+
args['SubscriptionId'] = SubscriptionId
|
|
541
|
+
if WorkflowName:
|
|
542
|
+
args['RequestName'] = WorkflowName
|
|
543
|
+
elements = self.backend.getElements(elementIDs=elementIDs, **args)
|
|
544
|
+
|
|
545
|
+
# take wf from args in case no elements exist for workflow (i.e. work was negotiating)
|
|
546
|
+
requestNames = set([x['RequestName'] for x in elements]) | set([wf for wf in [WorkflowName] if wf])
|
|
547
|
+
if not requestNames:
|
|
548
|
+
return []
|
|
549
|
+
inbox_elements = []
|
|
550
|
+
for wf in requestNames:
|
|
551
|
+
inbox_elements.extend(self.backend.getInboxElements(WorkflowName=wf))
|
|
552
|
+
|
|
553
|
+
# if local queue, kill jobs, update parent to Canceled and delete elements
|
|
554
|
+
if self.params['LocalQueueFlag']:
|
|
555
|
+
# if we can talk to wmbs kill the jobs
|
|
556
|
+
badWfsCancel = []
|
|
557
|
+
if self.params['PopulateFilesets']:
|
|
558
|
+
self.logger.info("Canceling work for workflow(s): %s", requestNames)
|
|
559
|
+
badWfsCancel = self.killWMBSWorkflows(requestNames)
|
|
560
|
+
# now we remove any wf that failed to be cancelled (and its inbox elements)
|
|
561
|
+
requestNames -= set(badWfsCancel)
|
|
562
|
+
for wf in badWfsCancel:
|
|
563
|
+
elementsToRemove = self.backend.getInboxElements(WorkflowName=wf)
|
|
564
|
+
inbox_elements = list(set(inbox_elements) - set(elementsToRemove))
|
|
565
|
+
self.logger.info("New list of cancelled requests: %s", requestNames)
|
|
566
|
+
|
|
567
|
+
# Don't update as fails sometimes due to conflicts (#3856)
|
|
568
|
+
for x in inbox_elements:
|
|
569
|
+
if x['Status'] != 'Canceled':
|
|
570
|
+
x.load().__setitem__('Status', 'Canceled')
|
|
571
|
+
|
|
572
|
+
self.backend.saveElements(*inbox_elements)
|
|
573
|
+
|
|
574
|
+
# if global queue, update non-acquired to Canceled, update parent to CancelRequested
|
|
575
|
+
else:
|
|
576
|
+
# Cancel in global if work has not been passed to a child queue
|
|
577
|
+
elements_to_cancel = [x for x in elements if not x['ChildQueueUrl'] and x['Status'] != 'Canceled']
|
|
578
|
+
# ensure all elements receive cancel request, covers case where initial cancel request missed some elements
|
|
579
|
+
# without this elements may avoid the cancel and not be cleared up till they finish
|
|
580
|
+
elements_not_requested = [x for x in elements if
|
|
581
|
+
x['ChildQueueUrl'] and (x['Status'] != 'CancelRequested' and not x.inEndState())]
|
|
582
|
+
|
|
583
|
+
self.logger.info("Canceling work for workflow(s): %s", requestNames)
|
|
584
|
+
if elements_to_cancel:
|
|
585
|
+
self.backend.updateElements(*[x.id for x in elements_to_cancel], Status='Canceled')
|
|
586
|
+
self.logger.info("Cancel-ed element(s) %s", str([x.id for x in elements_to_cancel]))
|
|
587
|
+
|
|
588
|
+
if elements_not_requested:
|
|
589
|
+
# Don't update as fails sometimes due to conflicts (#3856)
|
|
590
|
+
for x in elements_not_requested:
|
|
591
|
+
x.load().__setitem__('Status', 'CancelRequested')
|
|
592
|
+
self.backend.saveElements(*elements_not_requested)
|
|
593
|
+
self.logger.info("CancelRequest-ed element(s) %s", str([x.id for x in elements_not_requested]))
|
|
594
|
+
|
|
595
|
+
inboxElemIds = [x.id for x in inbox_elements if x['Status'] != 'CancelRequested' and not x.inEndState()]
|
|
596
|
+
self.backend.updateInboxElements(*inboxElemIds, Status='CancelRequested')
|
|
597
|
+
# if we haven't had any updates for a while assume agent is dead and move to canceled
|
|
598
|
+
if self.params.get('cancelGraceTime', -1) > 0 and elements:
|
|
599
|
+
last_update = max([float(x.updatetime) for x in elements])
|
|
600
|
+
if (time.time() - last_update) > self.params['cancelGraceTime']:
|
|
601
|
+
self.logger.info("%s cancellation has stalled, mark as finished", elements[0]['RequestName'])
|
|
602
|
+
# Don't update as fails sometimes due to conflicts (#3856)
|
|
603
|
+
for x in elements:
|
|
604
|
+
if not x.inEndState():
|
|
605
|
+
x.load().__setitem__('Status', 'Canceled')
|
|
606
|
+
self.backend.saveElements(*[x for x in elements if not x.inEndState()])
|
|
607
|
+
|
|
608
|
+
return [x.id for x in elements]
|
|
609
|
+
|
|
610
|
+
def deleteWorkflows(self, *requests):
|
|
611
|
+
"""Delete requests if finished"""
|
|
612
|
+
for request in requests:
|
|
613
|
+
request = self.backend.getInboxElements(elementIDs=[request])
|
|
614
|
+
if len(request) != 1:
|
|
615
|
+
raise RuntimeError('Invalid number of requests for %s' % request[0]['RequestName'])
|
|
616
|
+
request = request[0]
|
|
617
|
+
|
|
618
|
+
if request.inEndState():
|
|
619
|
+
self.logger.info('Deleting request "%s" as it is %s', request.id, request['Status'])
|
|
620
|
+
self.backend.deleteElements(request)
|
|
621
|
+
else:
|
|
622
|
+
self.logger.debug('Not deleting "%s" as it is %s', request.id, request['Status'])
|
|
623
|
+
|
|
624
|
+
# NOTE: this function is not executed by local workqueue
|
|
625
|
+
def queueWork(self, wmspecUrl, request=None, team=None):
|
|
626
|
+
"""
|
|
627
|
+
Take and queue work from a WMSpec.
|
|
628
|
+
|
|
629
|
+
If request name is provided but doesn't match WMSpec name
|
|
630
|
+
an error is raised.
|
|
631
|
+
|
|
632
|
+
If team is provided work will only be available to queue's
|
|
633
|
+
belonging to that team.
|
|
634
|
+
|
|
635
|
+
Duplicate specs will be ignored.
|
|
636
|
+
"""
|
|
637
|
+
self.logger.info('queueWork() begin queueing "%s"', wmspecUrl)
|
|
638
|
+
wmspec = WMWorkloadHelper()
|
|
639
|
+
wmspec.load(wmspecUrl)
|
|
640
|
+
|
|
641
|
+
if request: # validate request name
|
|
642
|
+
if request != wmspec.name():
|
|
643
|
+
raise WorkQueueWMSpecError(wmspec,
|
|
644
|
+
'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
|
|
645
|
+
|
|
646
|
+
# Either pull the existing inbox element or create a new one.
|
|
647
|
+
try:
|
|
648
|
+
inbound = self.backend.getInboxElements(elementIDs=[wmspec.name()], loadSpec=True)
|
|
649
|
+
self.logger.info('Resume splitting of "%s"', wmspec.name())
|
|
650
|
+
except CouchNotFoundError:
|
|
651
|
+
inbound = [self.backend.createWork(wmspec, Status='Negotiating',
|
|
652
|
+
TeamName=team, WMBSUrl=self.params["WMBSUrl"])]
|
|
653
|
+
self.backend.insertElements(inbound)
|
|
654
|
+
|
|
655
|
+
work = self.processInboundWork(inbound, throw=True)
|
|
656
|
+
return len(work)
|
|
657
|
+
|
|
658
|
+
def addWork(self, inboundElem, rucioObj=None):
|
|
659
|
+
"""
|
|
660
|
+
Check and add new elements to an existing running request,
|
|
661
|
+
if supported by the start policy.
|
|
662
|
+
|
|
663
|
+
:param inboundElem: dict representation for a WorkQueueElement object,
|
|
664
|
+
including the WMSpec file.
|
|
665
|
+
:param rucioObj: object to the Rucio class
|
|
666
|
+
:return: amount of new work units added to the request
|
|
667
|
+
"""
|
|
668
|
+
result = []
|
|
669
|
+
self.logger.info('Trying to add more work for: %s', inboundElem['RequestName'])
|
|
670
|
+
|
|
671
|
+
try:
|
|
672
|
+
# Check we haven't already split the work, unless it's continuous processing
|
|
673
|
+
work, rejectedWork, badWork = self._splitWork(inboundElem['WMSpec'], data=inboundElem['Inputs'],
|
|
674
|
+
mask=inboundElem['Mask'], inbound=inboundElem,
|
|
675
|
+
continuous=True, rucioObj=rucioObj)
|
|
676
|
+
|
|
677
|
+
# if there is new work, then insert it into the database
|
|
678
|
+
newWork = self.backend.insertElements(work, parent=inboundElem)
|
|
679
|
+
|
|
680
|
+
# store the inputs in the global queue inbox workflow element
|
|
681
|
+
processedInputs = []
|
|
682
|
+
for unit in newWork:
|
|
683
|
+
processedInputs.extend(list(unit['Inputs']))
|
|
684
|
+
|
|
685
|
+
# update the list of processed and rejected inputs with what is already
|
|
686
|
+
# defined in the workqueue inbox
|
|
687
|
+
processedInputs.extend(inboundElem['ProcessedInputs'])
|
|
688
|
+
rejectedWork.extend(inboundElem['RejectedInputs'])
|
|
689
|
+
if newWork:
|
|
690
|
+
# then also update the timestamp for when new data was found
|
|
691
|
+
self.backend.updateInboxElements(inboundElem.id,
|
|
692
|
+
ProcessedInputs=processedInputs,
|
|
693
|
+
RejectedInputs=rejectedWork,
|
|
694
|
+
TimestampFoundNewData=int(time.time()))
|
|
695
|
+
# if global queue, then update workflow stats to request mgr couch doc
|
|
696
|
+
# remove the "UnittestFlag" - need to create the reqmgrSvc emulator
|
|
697
|
+
if not self.params.get("UnittestFlag", False):
|
|
698
|
+
# get statistics for the new work. It's already validated on the server side
|
|
699
|
+
totalStats = self._getTotalStats(newWork)
|
|
700
|
+
self.reqmgrSvc.updateRequestStats(inboundElem['WMSpec'].name(), totalStats)
|
|
701
|
+
|
|
702
|
+
if badWork:
|
|
703
|
+
msg = "Request with the following unprocessable input data: %s" % badWork
|
|
704
|
+
self.logdb.post(inboundElem['RequestName'], msg, 'warning')
|
|
705
|
+
except Exception as exc:
|
|
706
|
+
self.logger.error('Generic exception adding work to WQE inbox: %s. Error: %s',
|
|
707
|
+
inboundElem, str(exc))
|
|
708
|
+
else:
|
|
709
|
+
result.extend(newWork)
|
|
710
|
+
|
|
711
|
+
self.logger.info('Added %d new elements for request: %s', len(result), inboundElem['RequestName'])
|
|
712
|
+
return len(result)
|
|
713
|
+
|
|
714
|
+
def status(self, status=None, elementIDs=None,
|
|
715
|
+
dictKey=None, wmbsInfo=None, loadSpec=False,
|
|
716
|
+
**filters):
|
|
717
|
+
"""
|
|
718
|
+
Return elements in the queue.
|
|
719
|
+
|
|
720
|
+
status, elementIDs & filters are 'AND'ed together to filter elements.
|
|
721
|
+
dictKey returns the output as a dict with the dictKey as the key.
|
|
722
|
+
wmbsInfo causes elements to be synced with their status in WMBS.
|
|
723
|
+
loadSpec causes the workflow for each spec to be loaded.
|
|
724
|
+
"""
|
|
725
|
+
items = self.backend.getElements(status=status,
|
|
726
|
+
elementIDs=elementIDs,
|
|
727
|
+
loadSpec=loadSpec,
|
|
728
|
+
**filters)
|
|
729
|
+
|
|
730
|
+
if wmbsInfo:
|
|
731
|
+
self.logger.debug("Syncing element statuses with WMBS for workflow: %s", filters.get("RequestName"))
|
|
732
|
+
for item in items:
|
|
733
|
+
for wmbs in wmbsInfo:
|
|
734
|
+
if item['SubscriptionId'] == wmbs['subscription_id']:
|
|
735
|
+
item.updateFromSubscription(wmbs)
|
|
736
|
+
break
|
|
737
|
+
|
|
738
|
+
# if dictKey, format as a dict with the appropriate key
|
|
739
|
+
if dictKey:
|
|
740
|
+
tmp = defaultdict(list)
|
|
741
|
+
for item in items:
|
|
742
|
+
tmp[item[dictKey]].append(item)
|
|
743
|
+
items = dict(tmp)
|
|
744
|
+
return items
|
|
745
|
+
|
|
746
|
+
def getWMBSSubscriptionStatus(self):
|
|
747
|
+
"""
|
|
748
|
+
Fetches all the subscriptions in this agent and make a summary of
|
|
749
|
+
every single one of them, to be used to update WQEs
|
|
750
|
+
:return: a list of dictionaries
|
|
751
|
+
"""
|
|
752
|
+
from WMCore.WorkQueue.WMBSHelper import wmbsSubscriptionStatus
|
|
753
|
+
self.logger.info("Fetching WMBS subscription status information")
|
|
754
|
+
wmbsStatus = wmbsSubscriptionStatus(logger=self.logger,
|
|
755
|
+
dbi=self.conn.dbi,
|
|
756
|
+
conn=self.conn.getDBConn(),
|
|
757
|
+
transaction=self.conn.existingTransaction())
|
|
758
|
+
return wmbsStatus
|
|
759
|
+
|
|
760
|
+
def statusInbox(self, status=None, elementIDs=None, dictKey=None, **filters):
|
|
761
|
+
"""
|
|
762
|
+
Return elements in the inbox.
|
|
763
|
+
|
|
764
|
+
status, elementIDs & filters are 'AND'ed together to filter elements.
|
|
765
|
+
dictKey returns the output as a dict with the dictKey as the key.
|
|
766
|
+
"""
|
|
767
|
+
items = self.backend.getInboxElements(status, elementIDs, **filters)
|
|
768
|
+
|
|
769
|
+
# if dictKey, given format as a dict with the appropriate key
|
|
770
|
+
if dictKey:
|
|
771
|
+
tmp = defaultdict(list)
|
|
772
|
+
for item in items:
|
|
773
|
+
tmp[item[dictKey]].append(item)
|
|
774
|
+
items = dict(tmp)
|
|
775
|
+
|
|
776
|
+
return items
|
|
777
|
+
|
|
778
|
+
def updateLocationInfo(self):
|
|
779
|
+
"""
|
|
780
|
+
Update locations info for elements.
|
|
781
|
+
"""
|
|
782
|
+
self.logger.info('Executing data location update...')
|
|
783
|
+
if not self.backend.isAvailable():
|
|
784
|
+
self.logger.warning('Backend busy or down: skipping location update')
|
|
785
|
+
return 0
|
|
786
|
+
result = self.dataLocationMapper()
|
|
787
|
+
self.backend.recordTaskActivity('location_refresh')
|
|
788
|
+
return result
|
|
789
|
+
|
|
790
|
+
def _printLog(self, msg, printFlag, logLevel):
|
|
791
|
+
if printFlag:
|
|
792
|
+
print(msg)
|
|
793
|
+
else:
|
|
794
|
+
getattr(self.logger, logLevel)(msg)
|
|
795
|
+
|
|
796
|
+
def pullWorkConditionCheck(self, printFlag=False):
|
|
797
|
+
|
|
798
|
+
if not self.params['ParentQueueCouchUrl']:
|
|
799
|
+
msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided'
|
|
800
|
+
self._printLog(msg, printFlag, "warning")
|
|
801
|
+
return False
|
|
802
|
+
if not self.backend.isAvailable() or not self.parent_queue.isAvailable():
|
|
803
|
+
msg = 'Backend busy or down: skipping work pull'
|
|
804
|
+
self._printLog(msg, printFlag, "warning")
|
|
805
|
+
return False
|
|
806
|
+
|
|
807
|
+
left_over = self.parent_queue.getElements('Negotiating', returnIdOnly=True,
|
|
808
|
+
ChildQueueUrl=self.params['QueueURL'])
|
|
809
|
+
if left_over:
|
|
810
|
+
msg = 'Not pulling more work. Still replicating %d previous units, ids:\n%s' % (len(left_over), left_over)
|
|
811
|
+
self._printLog(msg, printFlag, "warning")
|
|
812
|
+
return False
|
|
813
|
+
|
|
814
|
+
still_processing = self.backend.getInboxElements('Negotiating', returnIdOnly=True)
|
|
815
|
+
if still_processing:
|
|
816
|
+
msg = 'Not pulling more work. Still processing %d previous units' % len(still_processing)
|
|
817
|
+
self._printLog(msg, printFlag, "warning")
|
|
818
|
+
return False
|
|
819
|
+
|
|
820
|
+
return True
|
|
821
|
+
|
|
822
|
+
def freeResouceCheck(self):
|
|
823
|
+
"""
|
|
824
|
+
This method looks into the WMBS and BossAir tables and collect
|
|
825
|
+
two types of information:
|
|
826
|
+
1) sites and the total slots available for job creation
|
|
827
|
+
2) sites and the number of pending jobs grouped by priority
|
|
828
|
+
With that information in hands, it looks at the local workqueue elements
|
|
829
|
+
sitting in Available status and update the 2nd data structure (thus it
|
|
830
|
+
updates number of jobs pending by priority according to the LQEs), which
|
|
831
|
+
is then used to know which work can be acquired from the parent queue or not.
|
|
832
|
+
:return: a tuple of dictionaries (or empty lists)
|
|
833
|
+
"""
|
|
834
|
+
from WMCore.WorkQueue.WMBSHelper import freeSlots
|
|
835
|
+
resources, jobCounts = freeSlots(self.params['QueueDepth'], knownCmsSites=cmsSiteNames())
|
|
836
|
+
# now update jobCounts with work that is already available in the local queue
|
|
837
|
+
_, jobCounts = self.backend.calculateAvailableWork(resources, jobCounts)
|
|
838
|
+
|
|
839
|
+
return (resources, jobCounts)
|
|
840
|
+
|
|
841
|
+
def getAvailableWorkfromParent(self, resources, jobCounts, printFlag=False):
|
|
842
|
+
self.logger.info("Going to fetch work from the parent queue: %s", self.parent_queue.queueUrl)
|
|
843
|
+
work, _ = self.parent_queue.availableWork(resources, jobCounts, self.params['Team'],
|
|
844
|
+
numElems=self.params['WorkPerCycle'],
|
|
845
|
+
rowsPerSlice=self.params['RowsPerSlice'],
|
|
846
|
+
maxRows=self.params['MaxRowsPerCycle'])
|
|
847
|
+
if not work:
|
|
848
|
+
self._printLog('No available work in parent queue.', printFlag, "warning")
|
|
849
|
+
return work
|
|
850
|
+
|
|
851
|
+
def pullWork(self, resources=None):
|
|
852
|
+
"""
|
|
853
|
+
Pull work from another WorkQueue to be processed:
|
|
854
|
+
:param resources: optional dictionary with sites and the amount
|
|
855
|
+
of slots free
|
|
856
|
+
"""
|
|
857
|
+
jobCounts = {}
|
|
858
|
+
if self.pullWorkConditionCheck() is False:
|
|
859
|
+
return 0
|
|
860
|
+
|
|
861
|
+
# NOTE: resources parameter is only used by unit tests, which do
|
|
862
|
+
# not use WMBS and BossAir tables
|
|
863
|
+
if not resources:
|
|
864
|
+
(resources, jobCounts) = self.freeResouceCheck()
|
|
865
|
+
if not resources and not jobCounts:
|
|
866
|
+
return 0
|
|
867
|
+
|
|
868
|
+
work = self.getAvailableWorkfromParent(resources, jobCounts)
|
|
869
|
+
if not work:
|
|
870
|
+
return 0
|
|
871
|
+
|
|
872
|
+
work = self._assignToChildQueue(self.params['QueueURL'], *work)
|
|
873
|
+
|
|
874
|
+
return len(work)
|
|
875
|
+
|
|
876
|
+
def closeWork(self):
|
|
877
|
+
"""
|
|
878
|
+
Global queue service that looks for the inbox elements that are still active
|
|
879
|
+
and checks whether they should be closed for new data or not.
|
|
880
|
+
An element is closed automatically when one of the following conditions holds true:
|
|
881
|
+
- The StartPolicy doesn't define a OpenRunningTimeout or this delay is set to 0
|
|
882
|
+
- A period longer than OpenRunningTimeout has passed since the last child element
|
|
883
|
+
was created or an open block was found and the StartPolicy newDataAvailable
|
|
884
|
+
function returns False.
|
|
885
|
+
|
|
886
|
+
:return: list of workqueue_inbox elements that have been closed
|
|
887
|
+
"""
|
|
888
|
+
workflowsToClose = []
|
|
889
|
+
if self.params['LocalQueueFlag']:
|
|
890
|
+
# this is a Global WorkQueue only functionality
|
|
891
|
+
return workflowsToClose
|
|
892
|
+
if not self.backend.isAvailable():
|
|
893
|
+
self.logger.warning('Backend busy or down: Can not close work at this time')
|
|
894
|
+
return workflowsToClose
|
|
895
|
+
|
|
896
|
+
workflowsToCheck = self.backend.getInboxElements(OpenForNewData=True)
|
|
897
|
+
self.logger.info("Retrieved a list of %d open workflows", len(workflowsToCheck))
|
|
898
|
+
currentTime = time.time()
|
|
899
|
+
for element in workflowsToCheck:
|
|
900
|
+
# fetch attributes from the inbox workqueue element
|
|
901
|
+
startPol = element.get('StartPolicy', {})
|
|
902
|
+
openRunningTimeout = startPol.get('OpenRunningTimeout', 0)
|
|
903
|
+
foundNewDataTime = element.get('TimestampFoundNewData', 0)
|
|
904
|
+
if not openRunningTimeout:
|
|
905
|
+
self.logger.info("Workflow %s has no OpenRunningTimeout. Queuing to be closed.",
|
|
906
|
+
element['RequestName'])
|
|
907
|
+
workflowsToClose.append(element.id)
|
|
908
|
+
elif (currentTime - foundNewDataTime) > openRunningTimeout:
|
|
909
|
+
# then it's been too long since the last element has been found
|
|
910
|
+
self.logger.info("Workflow %s has expired OpenRunningTimeout. Queuing to be closed.",
|
|
911
|
+
element['RequestName'])
|
|
912
|
+
workflowsToClose.append(element.id)
|
|
913
|
+
|
|
914
|
+
if workflowsToClose:
|
|
915
|
+
try:
|
|
916
|
+
self.logger.info('Closing workflows in workqueue_inbox for: %s', workflowsToClose)
|
|
917
|
+
self.backend.updateInboxElements(*workflowsToClose, OpenForNewData=False)
|
|
918
|
+
msg = 'Closed inbox elements for: %s.\n' % ', '.join(workflowsToClose)
|
|
919
|
+
except CouchInternalServerError as ex:
|
|
920
|
+
msg = 'Failed to close workflows with a CouchInternalServerError exception. '
|
|
921
|
+
msg += 'Details: {}'.format(str(ex))
|
|
922
|
+
self.logger.error(msg)
|
|
923
|
+
except Exception as ex:
|
|
924
|
+
msg = 'Failed to close workflows with a generic exception. '
|
|
925
|
+
msg += 'Details: {}'.format(str(ex))
|
|
926
|
+
self.logger.exception(msg)
|
|
927
|
+
else:
|
|
928
|
+
msg = 'No workflows to close.\n'
|
|
929
|
+
|
|
930
|
+
self.backend.recordTaskActivity('workclosing', msg)
|
|
931
|
+
|
|
932
|
+
return workflowsToClose
|
|
933
|
+
|
|
934
|
+
def deleteCompletedWFElements(self):
|
|
935
|
+
"""
|
|
936
|
+
deletes Workflow when workflow is in finished status
|
|
937
|
+
"""
|
|
938
|
+
deletableStates = ["completed", "closed-out", "failed",
|
|
939
|
+
"announced", "aborted-completed", "rejected",
|
|
940
|
+
"normal-archived", "aborted-archived", "rejected-archived"]
|
|
941
|
+
|
|
942
|
+
# fetch workflows known to workqueue + workqueue_inbox and with spec attachments
|
|
943
|
+
reqNames = self.backend.getWorkflows(includeInbox=True, includeSpecs=True)
|
|
944
|
+
self.logger.info("Retrieved %d workflows known by WorkQueue", len(reqNames))
|
|
945
|
+
requestsInfo = self.requestDB.getRequestByNames(reqNames)
|
|
946
|
+
deleteRequests = []
|
|
947
|
+
for key, value in viewitems(requestsInfo):
|
|
948
|
+
if (value["RequestStatus"] is None) or (value["RequestStatus"] in deletableStates):
|
|
949
|
+
deleteRequests.append(key)
|
|
950
|
+
self.logger.info("Found %d out of %d workflows in a deletable state",
|
|
951
|
+
len(deleteRequests), len(reqNames))
|
|
952
|
+
return self.backend.deleteWQElementsByWorkflow(deleteRequests)
|
|
953
|
+
|
|
954
|
+
def performSyncAndCancelAction(self, skipWMBS):
|
|
955
|
+
"""
|
|
956
|
+
Apply end policies to determine work status & cleanup finished work
|
|
957
|
+
"""
|
|
958
|
+
if not self.backend.isAvailable():
|
|
959
|
+
self.logger.warning('Backend busy or down: skipping cleanup tasks')
|
|
960
|
+
return
|
|
961
|
+
|
|
962
|
+
if self.params['LocalQueueFlag']:
|
|
963
|
+
self.backend.fixConflicts() # before doing anything fix any conflicts
|
|
964
|
+
|
|
965
|
+
wf_to_cancel = [] # record what we did for task_activity
|
|
966
|
+
finished_elements = []
|
|
967
|
+
|
|
968
|
+
useWMBS = not skipWMBS and self.params['LocalQueueFlag']
|
|
969
|
+
if useWMBS:
|
|
970
|
+
wmbsWflowSummary = self.getWMBSSubscriptionStatus()
|
|
971
|
+
else:
|
|
972
|
+
wmbsWflowSummary = []
|
|
973
|
+
# Get queue elements grouped by their workflow with updated wmbs progress
|
|
974
|
+
# Cancel if requested, update locally and remove obsolete elements
|
|
975
|
+
self.logger.info('Fetching workflow information (including inbox and specs)')
|
|
976
|
+
workflowsList = self.backend.getWorkflows(includeInbox=True, includeSpecs=True)
|
|
977
|
+
for wf in workflowsList:
|
|
978
|
+
parentQueueDeleted = True
|
|
979
|
+
try:
|
|
980
|
+
elements = self.status(RequestName=wf, wmbsInfo=wmbsWflowSummary)
|
|
981
|
+
parents = self.backend.getInboxElements(RequestName=wf)
|
|
982
|
+
|
|
983
|
+
self.logger.debug("Queue %s status follows:", self.backend.queueUrl)
|
|
984
|
+
results = endPolicy(elements, parents, self.params['EndPolicySettings'])
|
|
985
|
+
for result in results:
|
|
986
|
+
self.logger.debug("Request %s, Status %s, Full info: %s",
|
|
987
|
+
result['RequestName'], result['Status'], result)
|
|
988
|
+
|
|
989
|
+
# check for cancellation requests (affects entire workflow)
|
|
990
|
+
if result['Status'] == 'CancelRequested':
|
|
991
|
+
self.logger.info('Canceling work for workflow: %s', wf)
|
|
992
|
+
canceled = self.cancelWork(WorkflowName=wf)
|
|
993
|
+
if canceled: # global wont cancel if work in child queue
|
|
994
|
+
wf_to_cancel.append(wf)
|
|
995
|
+
break
|
|
996
|
+
elif result['Status'] == 'Negotiating':
|
|
997
|
+
self.logger.debug("Waiting for %s to finish splitting", wf)
|
|
998
|
+
continue
|
|
999
|
+
|
|
1000
|
+
parent = result['ParentQueueElement']
|
|
1001
|
+
if parent.modified:
|
|
1002
|
+
self.backend.saveElements(parent)
|
|
1003
|
+
|
|
1004
|
+
if result.inEndState():
|
|
1005
|
+
if elements:
|
|
1006
|
+
self.logger.debug("Request %s finished (%s)",
|
|
1007
|
+
result['RequestName'], parent.statusMetrics())
|
|
1008
|
+
finished_elements.extend(result['Elements'])
|
|
1009
|
+
else:
|
|
1010
|
+
parentQueueDeleted = False
|
|
1011
|
+
continue
|
|
1012
|
+
|
|
1013
|
+
updated_elements = [x for x in result['Elements'] if x.modified]
|
|
1014
|
+
for x in updated_elements:
|
|
1015
|
+
self.logger.debug("Updating progress %s (%s): %s", x['RequestName'], x.id, x.statusMetrics())
|
|
1016
|
+
self.backend.updateElements(x.id, **x.statusMetrics())
|
|
1017
|
+
|
|
1018
|
+
if not parentQueueDeleted:
|
|
1019
|
+
self.logger.info('Waiting for parent queue to delete "%s"', wf)
|
|
1020
|
+
|
|
1021
|
+
except Exception as ex:
|
|
1022
|
+
self.logger.error('Error processing workflow "%s": %s', wf, str(ex))
|
|
1023
|
+
|
|
1024
|
+
msg = 'Finished elements: %s\nCanceled workflows: %s' % (', '.join(["%s (%s)" % (x.id, x['RequestName']) \
|
|
1025
|
+
for x in finished_elements]),
|
|
1026
|
+
', '.join(wf_to_cancel))
|
|
1027
|
+
|
|
1028
|
+
self.logger.debug(msg)
|
|
1029
|
+
self.backend.recordTaskActivity('housekeeping', msg)
|
|
1030
|
+
|
|
1031
|
+
def performQueueCleanupActions(self, skipWMBS=False):
|
|
1032
|
+
|
|
1033
|
+
try:
|
|
1034
|
+
self.logger.info("Deleting completed workflow WQ elements ...")
|
|
1035
|
+
res = self.deleteCompletedWFElements()
|
|
1036
|
+
self.logger.info("Deleted %d elements from workqueue/inbox database", res)
|
|
1037
|
+
except Exception as ex:
|
|
1038
|
+
self.logger.exception('Error deleting WQ elements. Details: %s', str(ex))
|
|
1039
|
+
|
|
1040
|
+
try:
|
|
1041
|
+
self.logger.info("Syncing and cancelling work ...")
|
|
1042
|
+
self.performSyncAndCancelAction(skipWMBS)
|
|
1043
|
+
except Exception as ex:
|
|
1044
|
+
self.logger.error('Error syncing and canceling WQ elements. Details: %s', str(ex))
|
|
1045
|
+
|
|
1046
|
+
def _splitWork(self, wmspec, data=None, mask=None, inbound=None, continuous=False, rucioObj=None):
|
|
1047
|
+
"""
|
|
1048
|
+
Split work from a parent into WorkQeueueElements.
|
|
1049
|
+
|
|
1050
|
+
If data param supplied use that rather than getting input data from
|
|
1051
|
+
wmspec. Used for instance when global splits by Block (avoids having to
|
|
1052
|
+
modify wmspec block whitelist - thus all appear as same wf in wmbs)
|
|
1053
|
+
|
|
1054
|
+
mask can be used to specify i.e. event range.
|
|
1055
|
+
|
|
1056
|
+
The inbound and continuous parameters are used to split
|
|
1057
|
+
and already split inbox element.
|
|
1058
|
+
"""
|
|
1059
|
+
# give preference to rucio object created by the CherryPy threads
|
|
1060
|
+
if not rucioObj:
|
|
1061
|
+
rucioObj = self.rucio
|
|
1062
|
+
|
|
1063
|
+
totalUnits, rejectedWork, badWork = [], [], []
|
|
1064
|
+
# split each top level task into constituent work elements
|
|
1065
|
+
# get the acdc server and db name
|
|
1066
|
+
for topLevelTask in wmspec.taskIterator():
|
|
1067
|
+
spec = getWorkloadFromTask(topLevelTask)
|
|
1068
|
+
policyName = spec.startPolicy()
|
|
1069
|
+
if not policyName:
|
|
1070
|
+
raise RuntimeError("WMSpec doesn't define policyName, current value: '%s'" % policyName)
|
|
1071
|
+
|
|
1072
|
+
policy = startPolicy(policyName, self.params['SplittingMapping'],
|
|
1073
|
+
rucioObj=rucioObj, logger=self.logger)
|
|
1074
|
+
if not policy.supportsWorkAddition() and continuous:
|
|
1075
|
+
# Can't split further with a policy that doesn't allow it
|
|
1076
|
+
continue
|
|
1077
|
+
if continuous:
|
|
1078
|
+
policy.modifyPolicyForWorkAddition(inbound)
|
|
1079
|
+
self.logger.info('Splitting %s with policy name %s and policy params %s',
|
|
1080
|
+
topLevelTask.getPathName(), policyName,
|
|
1081
|
+
self.params['SplittingMapping'].get(policyName))
|
|
1082
|
+
units, rejectedWork, badWork = policy(spec, topLevelTask, data, mask, continuous=continuous)
|
|
1083
|
+
self.logger.info('Work splitting completed with %d units, %d rejectedWork and %d badWork',
|
|
1084
|
+
len(units), len(rejectedWork), len(badWork))
|
|
1085
|
+
for unit in units:
|
|
1086
|
+
msg = 'Queuing element {} for {} with policy {}, '.format(unit.id, unit['Task'].getPathName(),
|
|
1087
|
+
unit['StartPolicy'])
|
|
1088
|
+
msg += 'with {} job(s) and {} lumis'.format(unit['Jobs'], unit['NumberOfLumis'])
|
|
1089
|
+
if unit['Inputs']:
|
|
1090
|
+
msg += ' on %s' % list(unit['Inputs'])[0]
|
|
1091
|
+
if unit['Mask']:
|
|
1092
|
+
msg += ' on events %d-%d' % (unit['Mask']['FirstEvent'], unit['Mask']['LastEvent'])
|
|
1093
|
+
self.logger.info(msg)
|
|
1094
|
+
totalUnits.extend(units)
|
|
1095
|
+
|
|
1096
|
+
return (totalUnits, rejectedWork, badWork)
|
|
1097
|
+
|
|
1098
|
+
def _getTotalStats(self, units):
|
|
1099
|
+
totalToplevelJobs = 0
|
|
1100
|
+
totalEvents = 0
|
|
1101
|
+
totalLumis = 0
|
|
1102
|
+
totalFiles = 0
|
|
1103
|
+
|
|
1104
|
+
for unit in units:
|
|
1105
|
+
totalToplevelJobs += unit['Jobs']
|
|
1106
|
+
totalEvents += unit['NumberOfEvents']
|
|
1107
|
+
totalLumis += unit['NumberOfLumis']
|
|
1108
|
+
totalFiles += unit['NumberOfFiles']
|
|
1109
|
+
|
|
1110
|
+
return {'total_jobs': totalToplevelJobs,
|
|
1111
|
+
'input_events': totalEvents,
|
|
1112
|
+
'input_lumis': totalLumis,
|
|
1113
|
+
'input_num_files': totalFiles}
|
|
1114
|
+
|
|
1115
|
+
def processInboundWork(self, inbound_work=None, throw=False, continuous=False, rucioObj=None):
|
|
1116
|
+
"""Retrieve work from inbox, split and store
|
|
1117
|
+
If request passed then only process that request
|
|
1118
|
+
"""
|
|
1119
|
+
inbound_work = inbound_work or []
|
|
1120
|
+
msg = "Executing processInboundWork with {} inbound_work, ".format(len(inbound_work))
|
|
1121
|
+
msg += "throw: {} and continuous: {}".format(throw, continuous)
|
|
1122
|
+
self.logger.info(msg)
|
|
1123
|
+
if self.params['LocalQueueFlag']:
|
|
1124
|
+
self.logger.info("fixing conflict...")
|
|
1125
|
+
self.backend.fixConflicts() # db should be consistent
|
|
1126
|
+
|
|
1127
|
+
result = []
|
|
1128
|
+
if not inbound_work and continuous:
|
|
1129
|
+
# This is not supported
|
|
1130
|
+
return result
|
|
1131
|
+
if not inbound_work:
|
|
1132
|
+
inbound_work = self.backend.getElementsForSplitting()
|
|
1133
|
+
self.logger.info('Retrieved %d elements for splitting with continuous flag: %s',
|
|
1134
|
+
len(inbound_work), continuous)
|
|
1135
|
+
for inbound in inbound_work:
|
|
1136
|
+
try:
|
|
1137
|
+
# Check we haven't already split the work, unless it's continuous processing
|
|
1138
|
+
work = not continuous and self.backend.getElementsForParent(inbound)
|
|
1139
|
+
if work:
|
|
1140
|
+
self.logger.info('Request "%s" already split - Resuming', inbound['RequestName'])
|
|
1141
|
+
else:
|
|
1142
|
+
work, rejectedWork, badWork = self._splitWork(inbound['WMSpec'], data=inbound['Inputs'],
|
|
1143
|
+
mask=inbound['Mask'], inbound=inbound,
|
|
1144
|
+
continuous=continuous, rucioObj=rucioObj)
|
|
1145
|
+
|
|
1146
|
+
# save inbound work to signal we have completed queueing
|
|
1147
|
+
# if this fails, rerunning will pick up here
|
|
1148
|
+
newWork = self.backend.insertElements(work, parent=inbound)
|
|
1149
|
+
# get statistics for the new work
|
|
1150
|
+
totalStats = self._getTotalStats(newWork)
|
|
1151
|
+
|
|
1152
|
+
if not continuous:
|
|
1153
|
+
# Update to Acquired when it's the first processing of inbound work
|
|
1154
|
+
self.backend.updateInboxElements(inbound.id, Status='Acquired')
|
|
1155
|
+
|
|
1156
|
+
# store the inputs in the global queue inbox workflow element
|
|
1157
|
+
if not self.params.get('LocalQueueFlag'):
|
|
1158
|
+
processedInputs = []
|
|
1159
|
+
for unit in work:
|
|
1160
|
+
processedInputs.extend(list(unit['Inputs']))
|
|
1161
|
+
self.backend.updateInboxElements(inbound.id, ProcessedInputs=processedInputs,
|
|
1162
|
+
RejectedInputs=rejectedWork)
|
|
1163
|
+
# if global queue, then update workflow stats to request mgr couch doc
|
|
1164
|
+
# remove the "UnittestFlag" - need to create the reqmgrSvc emulator
|
|
1165
|
+
if not self.params.get("UnittestFlag", False):
|
|
1166
|
+
self.reqmgrSvc.updateRequestStats(inbound['WMSpec'].name(), totalStats)
|
|
1167
|
+
|
|
1168
|
+
if badWork:
|
|
1169
|
+
msg = "Request with the following unprocessable input data: %s" % badWork
|
|
1170
|
+
self.logdb.post(inbound['RequestName'], msg, 'warning')
|
|
1171
|
+
except TERMINAL_EXCEPTIONS as ex:
|
|
1172
|
+
msg = 'Terminal exception splitting WQE: %s' % inbound
|
|
1173
|
+
self.logger.error(msg)
|
|
1174
|
+
self.logdb.post(inbound['RequestName'], msg, 'error')
|
|
1175
|
+
if not continuous:
|
|
1176
|
+
# Only fail on first splitting
|
|
1177
|
+
self.logger.error('Failing workflow "%s": %s', inbound['RequestName'], str(ex))
|
|
1178
|
+
self.backend.updateInboxElements(inbound.id, Status='Failed')
|
|
1179
|
+
if throw:
|
|
1180
|
+
raise
|
|
1181
|
+
except Exception as ex:
|
|
1182
|
+
if continuous:
|
|
1183
|
+
continue
|
|
1184
|
+
msg = 'Exception splitting wqe %s for %s: %s' % (inbound.id, inbound['RequestName'], str(ex))
|
|
1185
|
+
self.logger.exception(msg)
|
|
1186
|
+
self.logdb.post(inbound['RequestName'], msg, 'error')
|
|
1187
|
+
|
|
1188
|
+
if throw:
|
|
1189
|
+
raise
|
|
1190
|
+
continue
|
|
1191
|
+
else:
|
|
1192
|
+
result.extend(work)
|
|
1193
|
+
|
|
1194
|
+
requests = ', '.join(list(set(['"%s"' % x['RequestName'] for x in result])))
|
|
1195
|
+
if requests:
|
|
1196
|
+
self.logger.info('Split work for request(s): %s', requests)
|
|
1197
|
+
|
|
1198
|
+
return result
|
|
1199
|
+
|
|
1200
|
+
def getWMBSInjectionStatus(self, workflowName=None, drainMode=False):
|
|
1201
|
+
"""
|
|
1202
|
+
if the parent queue exist return the result from parent queue.
|
|
1203
|
+
other wise return the result from the current queue.
|
|
1204
|
+
(In general parent queue always exist when it is called from local queue
|
|
1205
|
+
except T1 skim case)
|
|
1206
|
+
returns list of [{workflowName: injection status (True or False)}]
|
|
1207
|
+
if the workflow is not exist return []
|
|
1208
|
+
"""
|
|
1209
|
+
if self.parent_queue and not drainMode:
|
|
1210
|
+
return self.parent_queue.getWMBSInjectStatus(workflowName)
|
|
1211
|
+
return self.backend.getWMBSInjectStatus(workflowName)
|
|
1212
|
+
|
|
1213
|
+
def monitorWorkQueue(self, status=None):
|
|
1214
|
+
"""
|
|
1215
|
+
Uses the workqueue data-service to retrieve a few basic information
|
|
1216
|
+
regarding all the elements in the queue.
|
|
1217
|
+
"""
|
|
1218
|
+
status = status or []
|
|
1219
|
+
results = {}
|
|
1220
|
+
start = int(time.time())
|
|
1221
|
+
results['workByStatus'] = self.workqueueDS.getJobsByStatus()
|
|
1222
|
+
results['workByStatusAndPriority'] = self.workqueueDS.getJobsByStatusAndPriority()
|
|
1223
|
+
results['workByAgentAndStatus'] = self.workqueueDS.getChildQueuesAndStatus()
|
|
1224
|
+
results['workByAgentAndPriority'] = self.workqueueDS.getChildQueuesAndPriority()
|
|
1225
|
+
|
|
1226
|
+
# now the heavy procesing for the site information
|
|
1227
|
+
elements = self.workqueueDS.getElementsByStatus(status)
|
|
1228
|
+
uniSites, posSites = getGlobalSiteStatusSummary(elements, status=status)
|
|
1229
|
+
results['uniqueJobsPerSiteAAA'] = uniSites
|
|
1230
|
+
results['possibleJobsPerSiteAAA'] = posSites
|
|
1231
|
+
uniSites, posSites = getGlobalSiteStatusSummary(elements, status=status, dataLocality=True)
|
|
1232
|
+
results['uniqueJobsPerSite'] = uniSites
|
|
1233
|
+
results['possibleJobsPerSite'] = posSites
|
|
1234
|
+
|
|
1235
|
+
end = int(time.time())
|
|
1236
|
+
results["total_query_time"] = end - start
|
|
1237
|
+
return results
|