wmglobalqueue 2.4.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Utils/CPMetrics.py +270 -0
- Utils/CertTools.py +100 -0
- Utils/EmailAlert.py +50 -0
- Utils/ExtendedUnitTestCase.py +62 -0
- Utils/FileTools.py +182 -0
- Utils/IteratorTools.py +80 -0
- Utils/MathUtils.py +31 -0
- Utils/MemoryCache.py +119 -0
- Utils/Patterns.py +24 -0
- Utils/Pipeline.py +137 -0
- Utils/PortForward.py +97 -0
- Utils/ProcFS.py +112 -0
- Utils/ProcessStats.py +194 -0
- Utils/PythonVersion.py +17 -0
- Utils/Signals.py +36 -0
- Utils/TemporaryEnvironment.py +27 -0
- Utils/Throttled.py +227 -0
- Utils/Timers.py +130 -0
- Utils/Timestamps.py +86 -0
- Utils/TokenManager.py +143 -0
- Utils/Tracing.py +60 -0
- Utils/TwPrint.py +98 -0
- Utils/Utilities.py +318 -0
- Utils/__init__.py +11 -0
- Utils/wmcoreDTools.py +707 -0
- WMCore/ACDC/Collection.py +57 -0
- WMCore/ACDC/CollectionTypes.py +12 -0
- WMCore/ACDC/CouchCollection.py +67 -0
- WMCore/ACDC/CouchFileset.py +238 -0
- WMCore/ACDC/CouchService.py +73 -0
- WMCore/ACDC/DataCollectionService.py +485 -0
- WMCore/ACDC/Fileset.py +94 -0
- WMCore/ACDC/__init__.py +11 -0
- WMCore/Algorithms/Alarm.py +39 -0
- WMCore/Algorithms/MathAlgos.py +274 -0
- WMCore/Algorithms/MiscAlgos.py +67 -0
- WMCore/Algorithms/ParseXMLFile.py +115 -0
- WMCore/Algorithms/Permissions.py +27 -0
- WMCore/Algorithms/Singleton.py +58 -0
- WMCore/Algorithms/SubprocessAlgos.py +129 -0
- WMCore/Algorithms/__init__.py +7 -0
- WMCore/Cache/GenericDataCache.py +98 -0
- WMCore/Cache/WMConfigCache.py +572 -0
- WMCore/Cache/__init__.py +0 -0
- WMCore/Configuration.py +659 -0
- WMCore/DAOFactory.py +47 -0
- WMCore/DataStructs/File.py +177 -0
- WMCore/DataStructs/Fileset.py +140 -0
- WMCore/DataStructs/Job.py +182 -0
- WMCore/DataStructs/JobGroup.py +142 -0
- WMCore/DataStructs/JobPackage.py +49 -0
- WMCore/DataStructs/LumiList.py +734 -0
- WMCore/DataStructs/Mask.py +219 -0
- WMCore/DataStructs/MathStructs/ContinuousSummaryHistogram.py +197 -0
- WMCore/DataStructs/MathStructs/DiscreteSummaryHistogram.py +92 -0
- WMCore/DataStructs/MathStructs/SummaryHistogram.py +117 -0
- WMCore/DataStructs/MathStructs/__init__.py +0 -0
- WMCore/DataStructs/Pickleable.py +24 -0
- WMCore/DataStructs/Run.py +256 -0
- WMCore/DataStructs/Subscription.py +175 -0
- WMCore/DataStructs/WMObject.py +47 -0
- WMCore/DataStructs/WorkUnit.py +112 -0
- WMCore/DataStructs/Workflow.py +60 -0
- WMCore/DataStructs/__init__.py +8 -0
- WMCore/Database/CMSCouch.py +1430 -0
- WMCore/Database/ConfigDBMap.py +29 -0
- WMCore/Database/CouchMonitoring.py +450 -0
- WMCore/Database/CouchUtils.py +118 -0
- WMCore/Database/DBCore.py +198 -0
- WMCore/Database/DBCreator.py +113 -0
- WMCore/Database/DBExceptionHandler.py +59 -0
- WMCore/Database/DBFactory.py +117 -0
- WMCore/Database/DBFormatter.py +177 -0
- WMCore/Database/Dialects.py +13 -0
- WMCore/Database/ExecuteDAO.py +327 -0
- WMCore/Database/MongoDB.py +241 -0
- WMCore/Database/MySQL/Destroy.py +42 -0
- WMCore/Database/MySQL/ListUserContent.py +20 -0
- WMCore/Database/MySQL/__init__.py +9 -0
- WMCore/Database/MySQLCore.py +132 -0
- WMCore/Database/Oracle/Destroy.py +56 -0
- WMCore/Database/Oracle/ListUserContent.py +19 -0
- WMCore/Database/Oracle/__init__.py +9 -0
- WMCore/Database/ResultSet.py +44 -0
- WMCore/Database/Transaction.py +91 -0
- WMCore/Database/__init__.py +9 -0
- WMCore/Database/ipy_profile_couch.py +438 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/CleanUpTask.py +29 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/HeartbeatMonitor.py +105 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/LocationUpdateTask.py +28 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/ReqMgrInteractionTask.py +35 -0
- WMCore/GlobalWorkQueue/CherryPyThreads/__init__.py +0 -0
- WMCore/GlobalWorkQueue/__init__.py +0 -0
- WMCore/GroupUser/CouchObject.py +127 -0
- WMCore/GroupUser/Decorators.py +51 -0
- WMCore/GroupUser/Group.py +33 -0
- WMCore/GroupUser/Interface.py +73 -0
- WMCore/GroupUser/User.py +96 -0
- WMCore/GroupUser/__init__.py +11 -0
- WMCore/Lexicon.py +836 -0
- WMCore/REST/Auth.py +202 -0
- WMCore/REST/CherryPyPeriodicTask.py +166 -0
- WMCore/REST/Error.py +333 -0
- WMCore/REST/Format.py +642 -0
- WMCore/REST/HeartbeatMonitorBase.py +90 -0
- WMCore/REST/Main.py +636 -0
- WMCore/REST/Server.py +2435 -0
- WMCore/REST/Services.py +24 -0
- WMCore/REST/Test.py +120 -0
- WMCore/REST/Tools.py +38 -0
- WMCore/REST/Validation.py +250 -0
- WMCore/REST/__init__.py +1 -0
- WMCore/ReqMgr/DataStructs/RequestStatus.py +209 -0
- WMCore/ReqMgr/DataStructs/RequestType.py +13 -0
- WMCore/ReqMgr/DataStructs/__init__.py +0 -0
- WMCore/ReqMgr/__init__.py +1 -0
- WMCore/Services/AlertManager/AlertManagerAPI.py +111 -0
- WMCore/Services/AlertManager/__init__.py +0 -0
- WMCore/Services/CRIC/CRIC.py +238 -0
- WMCore/Services/CRIC/__init__.py +0 -0
- WMCore/Services/DBS/DBS3Reader.py +1044 -0
- WMCore/Services/DBS/DBSConcurrency.py +44 -0
- WMCore/Services/DBS/DBSErrors.py +112 -0
- WMCore/Services/DBS/DBSReader.py +23 -0
- WMCore/Services/DBS/DBSUtils.py +166 -0
- WMCore/Services/DBS/DBSWriterObjects.py +381 -0
- WMCore/Services/DBS/ProdException.py +133 -0
- WMCore/Services/DBS/__init__.py +8 -0
- WMCore/Services/FWJRDB/FWJRDBAPI.py +118 -0
- WMCore/Services/FWJRDB/__init__.py +0 -0
- WMCore/Services/HTTPS/HTTPSAuthHandler.py +66 -0
- WMCore/Services/HTTPS/__init__.py +0 -0
- WMCore/Services/LogDB/LogDB.py +201 -0
- WMCore/Services/LogDB/LogDBBackend.py +191 -0
- WMCore/Services/LogDB/LogDBExceptions.py +11 -0
- WMCore/Services/LogDB/LogDBReport.py +85 -0
- WMCore/Services/LogDB/__init__.py +0 -0
- WMCore/Services/MSPileup/__init__.py +0 -0
- WMCore/Services/MSUtils/MSUtils.py +54 -0
- WMCore/Services/MSUtils/__init__.py +0 -0
- WMCore/Services/McM/McM.py +173 -0
- WMCore/Services/McM/__init__.py +8 -0
- WMCore/Services/MonIT/Grafana.py +133 -0
- WMCore/Services/MonIT/__init__.py +0 -0
- WMCore/Services/PyCondor/PyCondorAPI.py +154 -0
- WMCore/Services/PyCondor/__init__.py +0 -0
- WMCore/Services/ReqMgr/ReqMgr.py +261 -0
- WMCore/Services/ReqMgr/__init__.py +0 -0
- WMCore/Services/ReqMgrAux/ReqMgrAux.py +419 -0
- WMCore/Services/ReqMgrAux/__init__.py +0 -0
- WMCore/Services/RequestDB/RequestDBReader.py +267 -0
- WMCore/Services/RequestDB/RequestDBWriter.py +39 -0
- WMCore/Services/RequestDB/__init__.py +0 -0
- WMCore/Services/Requests.py +624 -0
- WMCore/Services/Rucio/Rucio.py +1290 -0
- WMCore/Services/Rucio/RucioUtils.py +74 -0
- WMCore/Services/Rucio/__init__.py +0 -0
- WMCore/Services/RucioConMon/RucioConMon.py +121 -0
- WMCore/Services/RucioConMon/__init__.py +0 -0
- WMCore/Services/Service.py +400 -0
- WMCore/Services/StompAMQ/__init__.py +0 -0
- WMCore/Services/TagCollector/TagCollector.py +155 -0
- WMCore/Services/TagCollector/XMLUtils.py +98 -0
- WMCore/Services/TagCollector/__init__.py +0 -0
- WMCore/Services/UUIDLib.py +13 -0
- WMCore/Services/UserFileCache/UserFileCache.py +160 -0
- WMCore/Services/UserFileCache/__init__.py +8 -0
- WMCore/Services/WMAgent/WMAgent.py +63 -0
- WMCore/Services/WMAgent/__init__.py +0 -0
- WMCore/Services/WMArchive/CMSSWMetrics.py +526 -0
- WMCore/Services/WMArchive/DataMap.py +463 -0
- WMCore/Services/WMArchive/WMArchive.py +33 -0
- WMCore/Services/WMArchive/__init__.py +0 -0
- WMCore/Services/WMBS/WMBS.py +97 -0
- WMCore/Services/WMBS/__init__.py +0 -0
- WMCore/Services/WMStats/DataStruct/RequestInfoCollection.py +300 -0
- WMCore/Services/WMStats/DataStruct/__init__.py +0 -0
- WMCore/Services/WMStats/WMStatsPycurl.py +145 -0
- WMCore/Services/WMStats/WMStatsReader.py +445 -0
- WMCore/Services/WMStats/WMStatsWriter.py +273 -0
- WMCore/Services/WMStats/__init__.py +0 -0
- WMCore/Services/WMStatsServer/WMStatsServer.py +134 -0
- WMCore/Services/WMStatsServer/__init__.py +0 -0
- WMCore/Services/WorkQueue/WorkQueue.py +492 -0
- WMCore/Services/WorkQueue/__init__.py +0 -0
- WMCore/Services/__init__.py +8 -0
- WMCore/Services/pycurl_manager.py +574 -0
- WMCore/WMBase.py +50 -0
- WMCore/WMConnectionBase.py +164 -0
- WMCore/WMException.py +183 -0
- WMCore/WMExceptions.py +269 -0
- WMCore/WMFactory.py +76 -0
- WMCore/WMInit.py +377 -0
- WMCore/WMLogging.py +104 -0
- WMCore/WMSpec/ConfigSectionTree.py +442 -0
- WMCore/WMSpec/Persistency.py +135 -0
- WMCore/WMSpec/Steps/BuildMaster.py +87 -0
- WMCore/WMSpec/Steps/BuildTools.py +201 -0
- WMCore/WMSpec/Steps/Builder.py +97 -0
- WMCore/WMSpec/Steps/Diagnostic.py +89 -0
- WMCore/WMSpec/Steps/Emulator.py +62 -0
- WMCore/WMSpec/Steps/ExecuteMaster.py +208 -0
- WMCore/WMSpec/Steps/Executor.py +210 -0
- WMCore/WMSpec/Steps/StepFactory.py +213 -0
- WMCore/WMSpec/Steps/TaskEmulator.py +75 -0
- WMCore/WMSpec/Steps/Template.py +204 -0
- WMCore/WMSpec/Steps/Templates/AlcaHarvest.py +76 -0
- WMCore/WMSpec/Steps/Templates/CMSSW.py +613 -0
- WMCore/WMSpec/Steps/Templates/DQMUpload.py +59 -0
- WMCore/WMSpec/Steps/Templates/DeleteFiles.py +70 -0
- WMCore/WMSpec/Steps/Templates/LogArchive.py +84 -0
- WMCore/WMSpec/Steps/Templates/LogCollect.py +105 -0
- WMCore/WMSpec/Steps/Templates/StageOut.py +105 -0
- WMCore/WMSpec/Steps/Templates/__init__.py +10 -0
- WMCore/WMSpec/Steps/WMExecutionFailure.py +21 -0
- WMCore/WMSpec/Steps/__init__.py +8 -0
- WMCore/WMSpec/Utilities.py +63 -0
- WMCore/WMSpec/WMSpecErrors.py +12 -0
- WMCore/WMSpec/WMStep.py +347 -0
- WMCore/WMSpec/WMTask.py +1997 -0
- WMCore/WMSpec/WMWorkload.py +2288 -0
- WMCore/WMSpec/WMWorkloadTools.py +382 -0
- WMCore/WMSpec/__init__.py +9 -0
- WMCore/WorkQueue/DataLocationMapper.py +273 -0
- WMCore/WorkQueue/DataStructs/ACDCBlock.py +47 -0
- WMCore/WorkQueue/DataStructs/Block.py +48 -0
- WMCore/WorkQueue/DataStructs/CouchWorkQueueElement.py +148 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElement.py +274 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElementResult.py +152 -0
- WMCore/WorkQueue/DataStructs/WorkQueueElementsSummary.py +185 -0
- WMCore/WorkQueue/DataStructs/__init__.py +0 -0
- WMCore/WorkQueue/Policy/End/EndPolicyInterface.py +44 -0
- WMCore/WorkQueue/Policy/End/SingleShot.py +22 -0
- WMCore/WorkQueue/Policy/End/__init__.py +32 -0
- WMCore/WorkQueue/Policy/PolicyInterface.py +17 -0
- WMCore/WorkQueue/Policy/Start/Block.py +258 -0
- WMCore/WorkQueue/Policy/Start/Dataset.py +180 -0
- WMCore/WorkQueue/Policy/Start/MonteCarlo.py +131 -0
- WMCore/WorkQueue/Policy/Start/ResubmitBlock.py +171 -0
- WMCore/WorkQueue/Policy/Start/StartPolicyInterface.py +316 -0
- WMCore/WorkQueue/Policy/Start/__init__.py +34 -0
- WMCore/WorkQueue/Policy/__init__.py +57 -0
- WMCore/WorkQueue/WMBSHelper.py +772 -0
- WMCore/WorkQueue/WorkQueue.py +1237 -0
- WMCore/WorkQueue/WorkQueueBackend.py +750 -0
- WMCore/WorkQueue/WorkQueueBase.py +39 -0
- WMCore/WorkQueue/WorkQueueExceptions.py +44 -0
- WMCore/WorkQueue/WorkQueueReqMgrInterface.py +278 -0
- WMCore/WorkQueue/WorkQueueUtils.py +130 -0
- WMCore/WorkQueue/__init__.py +13 -0
- WMCore/Wrappers/JsonWrapper/JSONThunker.py +342 -0
- WMCore/Wrappers/JsonWrapper/__init__.py +7 -0
- WMCore/Wrappers/__init__.py +6 -0
- WMCore/__init__.py +10 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-dist-patch +15 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-dist-unpatch +8 -0
- wmglobalqueue-2.4.5.1.data/data/bin/wmc-httpd +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/.couchapprc +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/README.md +40 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/index.html +264 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/ElementInfoByWorkflow.js +96 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/StuckElementInfo.js +57 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/WorkloadInfoTable.js +80 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/dataTable.js +70 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/js/namespace.js +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/_attachments/style/main.css +75 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/couchapp.json +4 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/childQueueFilter.js +13 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/filterDeletedDocs.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/filters/queueFilter.js +11 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/language +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/mustache.js +333 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/validate.js +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lib/workqueue_utils.js +61 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/elementsDetail.js +28 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/filter.js +86 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/stuckElements.js +38 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/workRestrictions.js +153 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/lists/workflowSummary.js +28 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/rewrites.json +73 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/shows/redirect.js +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/shows/status.js +40 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/ElementSummaryByWorkflow.html +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/StuckElementSummary.html +26 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/TaskStatus.html +23 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/WorkflowSummary.html +27 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/workqueue-common-lib.html +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib-remote.html +16 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib.html +18 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/updates/in-place.js +50 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/validate_doc_update.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.couch.app.js +235 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.pathbinder.js +173 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeParentData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activeParentData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activePileupData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/activePileupData/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/analyticsData/map.js +11 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/analyticsData/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/availableByPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/conflicts/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elements/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByParent/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByParentData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByPileupData/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByStatus/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsBySubscription/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/map.js +8 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/reduce.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/elementsDetailByWorkflowAndStatus/map.js +26 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/map.js +10 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatus/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatus/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/openRequests/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/recent-items/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/reduce.js +1 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/specsByWorkflow/map.js +5 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/stuckElements/map.js +38 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/map.js +12 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/reduce.js +3 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrl/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrl/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/map.js +6 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/reduce.js +2 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/workflowSummary/map.js +9 -0
- wmglobalqueue-2.4.5.1.data/data/data/couchapps/WorkQueue/views/workflowSummary/reduce.js +10 -0
- wmglobalqueue-2.4.5.1.dist-info/METADATA +26 -0
- wmglobalqueue-2.4.5.1.dist-info/RECORD +347 -0
- wmglobalqueue-2.4.5.1.dist-info/WHEEL +5 -0
- wmglobalqueue-2.4.5.1.dist-info/licenses/LICENSE +202 -0
- wmglobalqueue-2.4.5.1.dist-info/licenses/NOTICE +16 -0
- wmglobalqueue-2.4.5.1.dist-info/top_level.txt +2 -0
|
@@ -0,0 +1,1044 @@
|
|
|
1
|
+
#!/usr/bin/env python
|
|
2
|
+
"""
|
|
3
|
+
_DBSReader_
|
|
4
|
+
|
|
5
|
+
Readonly DBS Interface
|
|
6
|
+
|
|
7
|
+
"""
|
|
8
|
+
from __future__ import print_function, division
|
|
9
|
+
|
|
10
|
+
from builtins import object, str, bytes
|
|
11
|
+
from future.utils import viewitems
|
|
12
|
+
|
|
13
|
+
from Utils.Utilities import decodeBytesToUnicode, encodeUnicodeToBytesConditional
|
|
14
|
+
|
|
15
|
+
import logging
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
|
|
18
|
+
from RestClient.ErrorHandling.RestClientExceptions import HTTPError
|
|
19
|
+
from dbs.apis.dbsClient import DbsApi
|
|
20
|
+
from dbs.exceptions.dbsClientException import dbsClientException
|
|
21
|
+
from retry import retry
|
|
22
|
+
|
|
23
|
+
from Utils.IteratorTools import grouper, makeListElementsUnique
|
|
24
|
+
from Utils.PythonVersion import PY2
|
|
25
|
+
from WMCore.Services.DBS.DBSErrors import DBSReaderError, formatEx3
|
|
26
|
+
from WMCore.Services.DBS.DBSUtils import dbsListFileParents, dbsListFileLumis, \
|
|
27
|
+
dbsBlockOrigin, dbsParentFilesGivenParentDataset
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
### Needed for the pycurl comment, leave it out for now
|
|
31
|
+
# from WMCore.Services.pycurl_manager import getdata as multi_getdata
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def remapDBS3Keys(data, stringify=False, **others):
|
|
35
|
+
"""Fields have been renamed between DBS2 and 3, take fields from DBS3
|
|
36
|
+
and map to DBS2 values
|
|
37
|
+
"""
|
|
38
|
+
mapping = {'num_file': 'NumberOfFiles', 'num_files': 'NumberOfFiles', 'num_event': 'NumberOfEvents',
|
|
39
|
+
'num_block': 'NumberOfBlocks', 'num_lumi': 'NumberOfLumis',
|
|
40
|
+
'event_count': 'NumberOfEvents', 'run_num': 'RunNumber',
|
|
41
|
+
'file_size': 'FileSize', 'block_size': 'BlockSize',
|
|
42
|
+
'file_count': 'NumberOfFiles', 'logical_file_name': 'LogicalFileName',
|
|
43
|
+
'adler32': 'Adler32', 'check_sum': 'Checksum', 'md5': 'Md5',
|
|
44
|
+
'block_name': 'BlockName', 'lumi_section_num': 'LumiSectionNumber'}
|
|
45
|
+
|
|
46
|
+
mapping.update(others)
|
|
47
|
+
formatFunc = lambda x: encodeUnicodeToBytesConditional(x, condition=PY2 and stringify)
|
|
48
|
+
for name, newname in viewitems(mapping):
|
|
49
|
+
if name in data:
|
|
50
|
+
data[newname] = formatFunc(data[name])
|
|
51
|
+
return data
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@retry(tries=3, delay=1)
|
|
55
|
+
def getDataTiers(dbsUrl):
|
|
56
|
+
"""
|
|
57
|
+
Function to retrieve all the datatiers from DBS.
|
|
58
|
+
NOTE: to be used with some caching (MemoryCacheStruct)
|
|
59
|
+
:param dbsUrl: the DBS URL string
|
|
60
|
+
:return: a list of strings/datatiers
|
|
61
|
+
"""
|
|
62
|
+
dbs = DbsApi(dbsUrl)
|
|
63
|
+
return [tier['data_tier_name'] for tier in dbs.listDataTiers()]
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
# emulator hook is used to swap the class instance
|
|
67
|
+
# when emulator values are set.
|
|
68
|
+
# Look WMQuality.Emulators.EmulatorSetup module for the values
|
|
69
|
+
# @emulatorHook
|
|
70
|
+
class DBS3Reader(object):
|
|
71
|
+
"""
|
|
72
|
+
_DBSReader_
|
|
73
|
+
|
|
74
|
+
General API for reading data from DBS
|
|
75
|
+
"""
|
|
76
|
+
|
|
77
|
+
def __init__(self, url, logger=None, parallel=None, **contact):
|
|
78
|
+
"""
|
|
79
|
+
DBS3Reader constructor
|
|
80
|
+
|
|
81
|
+
:param url: url of DBS server
|
|
82
|
+
:param logger: logger to be used by this class
|
|
83
|
+
:param parallel: optional parameter to specify parallel execution of some APIs
|
|
84
|
+
You may pass any true value, e.g. True or 1. The parallel APIs are:
|
|
85
|
+
listDatasetFileDetails, listFileBlockLocation, getParentFilesGivenParentDataset
|
|
86
|
+
:param contact: optional parameters to pass to DbsApi class
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
# instantiate dbs api object
|
|
90
|
+
try:
|
|
91
|
+
self.dbsURL = url.replace("cmsweb.cern.ch", "cmsweb-prod.cern.ch")
|
|
92
|
+
self.dbs = DbsApi(self.dbsURL, **contact)
|
|
93
|
+
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
|
94
|
+
self.parallel = parallel
|
|
95
|
+
except Exception as ex:
|
|
96
|
+
msg = "Error in DBSReader with DbsApi\n"
|
|
97
|
+
msg += "%s\n" % formatEx3(ex)
|
|
98
|
+
raise DBSReaderError(msg) from None
|
|
99
|
+
|
|
100
|
+
def _getLumiList(self, blockName=None, lfns=None, validFileOnly=1):
|
|
101
|
+
"""
|
|
102
|
+
currently only take one lfn but dbs api need be updated
|
|
103
|
+
"""
|
|
104
|
+
try:
|
|
105
|
+
if blockName:
|
|
106
|
+
lumiLists = self.dbs.listFileLumis(block_name=blockName, validFileOnly=validFileOnly)
|
|
107
|
+
elif lfns:
|
|
108
|
+
lumiLists = []
|
|
109
|
+
for slfn in grouper(lfns, 50):
|
|
110
|
+
lumiLists.extend(self.dbs.listFileLumiArray(logical_file_name=slfn))
|
|
111
|
+
else:
|
|
112
|
+
# shouldn't call this with both blockName and lfns empty
|
|
113
|
+
# but still returns empty dict for that case
|
|
114
|
+
return {}
|
|
115
|
+
except Exception as ex:
|
|
116
|
+
msg = "Error in "
|
|
117
|
+
msg += "DBSReader.listFileLumiArray(%s)\n" % lfns
|
|
118
|
+
msg += "%s\n" % formatEx3(ex)
|
|
119
|
+
raise DBSReaderError(msg) from None
|
|
120
|
+
|
|
121
|
+
lumiDict = {}
|
|
122
|
+
for lumisItem in lumiLists:
|
|
123
|
+
lumiDict.setdefault(lumisItem['logical_file_name'], [])
|
|
124
|
+
item = {}
|
|
125
|
+
item["RunNumber"] = lumisItem['run_num']
|
|
126
|
+
item['LumiSectionNumber'] = lumisItem['lumi_section_num']
|
|
127
|
+
if lumisItem.get('event_count', None) is not None:
|
|
128
|
+
item['EventCount'] = lumisItem['event_count']
|
|
129
|
+
lumiDict[lumisItem['logical_file_name']].append(item)
|
|
130
|
+
# TODO: add key for lumi and event pair.
|
|
131
|
+
return lumiDict
|
|
132
|
+
|
|
133
|
+
def checkDBSServer(self):
|
|
134
|
+
"""
|
|
135
|
+
check whether dbs server is up and running
|
|
136
|
+
returns {"dbs_instance": "prod/global", "dbs_version": "3.3.144"}
|
|
137
|
+
"""
|
|
138
|
+
try:
|
|
139
|
+
return self.dbs.serverinfo()
|
|
140
|
+
except Exception as ex:
|
|
141
|
+
msg = "Error in "
|
|
142
|
+
msg += "DBS server is not up: %s" % self.dbsURL
|
|
143
|
+
msg += "%s\n" % formatEx3(ex)
|
|
144
|
+
raise DBSReaderError(msg) from None
|
|
145
|
+
|
|
146
|
+
def listPrimaryDatasets(self, match='*'):
|
|
147
|
+
"""
|
|
148
|
+
_listPrimaryDatasets_
|
|
149
|
+
|
|
150
|
+
return a list of primary datasets, The full dataset name must be provided
|
|
151
|
+
pattern based mathcing is no longer supported.
|
|
152
|
+
If no expression is provided, all datasets are returned
|
|
153
|
+
"""
|
|
154
|
+
try:
|
|
155
|
+
result = self.dbs.listPrimaryDatasets(primary_ds_name=match)
|
|
156
|
+
except Exception as ex:
|
|
157
|
+
msg = "Error in DBSReader.listPrimaryDataset(%s)\n" % match
|
|
158
|
+
msg += "%s\n" % formatEx3(ex)
|
|
159
|
+
raise DBSReaderError(msg) from None
|
|
160
|
+
|
|
161
|
+
result = [x['primary_ds_name'] for x in result]
|
|
162
|
+
return result
|
|
163
|
+
|
|
164
|
+
def matchProcessedDatasets(self, primary, tier, process):
|
|
165
|
+
"""
|
|
166
|
+
_matchProcessedDatasets_
|
|
167
|
+
|
|
168
|
+
return a list of Processed datasets
|
|
169
|
+
"""
|
|
170
|
+
result = []
|
|
171
|
+
try:
|
|
172
|
+
datasets = self.dbs.listDatasets(primary_ds_name=primary, data_tier_name=tier, detail=True)
|
|
173
|
+
except dbsClientException as ex:
|
|
174
|
+
msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
|
|
175
|
+
msg += "%s\n" % formatEx3(ex)
|
|
176
|
+
raise DBSReaderError(msg) from None
|
|
177
|
+
|
|
178
|
+
for dataset in datasets:
|
|
179
|
+
dataset = remapDBS3Keys(dataset, processed_ds_name='Name')
|
|
180
|
+
dataset['PathList'] = [dataset['dataset']]
|
|
181
|
+
if dataset['Name'] == process:
|
|
182
|
+
result.append(dataset)
|
|
183
|
+
return result
|
|
184
|
+
|
|
185
|
+
def listRuns(self, dataset=None, block=None):
|
|
186
|
+
"""
|
|
187
|
+
it gets list of DbsRun object but for our purpose
|
|
188
|
+
only list of number is collected.
|
|
189
|
+
DbsRun (RunNumber,
|
|
190
|
+
NumberOfEvents,
|
|
191
|
+
NumberOfLumiSections,
|
|
192
|
+
TotalLuminosity,
|
|
193
|
+
StoreNumber,
|
|
194
|
+
StartOfRungetLong,
|
|
195
|
+
EndOfRun,
|
|
196
|
+
CreationDate,
|
|
197
|
+
CreatedBy,
|
|
198
|
+
LastModificationDate,
|
|
199
|
+
LastModifiedBy
|
|
200
|
+
)
|
|
201
|
+
"""
|
|
202
|
+
runs = []
|
|
203
|
+
try:
|
|
204
|
+
if block:
|
|
205
|
+
results = self.dbs.listRuns(block_name=block)
|
|
206
|
+
else:
|
|
207
|
+
results = self.dbs.listRuns(dataset=dataset)
|
|
208
|
+
except dbsClientException as ex:
|
|
209
|
+
msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
|
|
210
|
+
msg += "%s\n" % formatEx3(ex)
|
|
211
|
+
raise DBSReaderError(msg) from None
|
|
212
|
+
for x in results:
|
|
213
|
+
runs.extend(x['run_num'])
|
|
214
|
+
return runs
|
|
215
|
+
|
|
216
|
+
def listRunLumis(self, dataset=None, block=None):
|
|
217
|
+
"""
|
|
218
|
+
It gets a list of DBSRun objects and returns the number of lumisections per run
|
|
219
|
+
DbsRun (RunNumber,
|
|
220
|
+
NumberOfEvents,
|
|
221
|
+
NumberOfLumiSections,
|
|
222
|
+
TotalLuminosity,
|
|
223
|
+
StoreNumber,
|
|
224
|
+
StartOfRungetLong,
|
|
225
|
+
EndOfRun,
|
|
226
|
+
CreationDate,
|
|
227
|
+
CreatedBy,
|
|
228
|
+
LastModificationDate,
|
|
229
|
+
LastModifiedBy
|
|
230
|
+
)
|
|
231
|
+
"""
|
|
232
|
+
# Pointless code in python3
|
|
233
|
+
block = decodeBytesToUnicode(block)
|
|
234
|
+
dataset = decodeBytesToUnicode(dataset)
|
|
235
|
+
|
|
236
|
+
try:
|
|
237
|
+
if block:
|
|
238
|
+
results = self.dbs.listRuns(block_name=block)
|
|
239
|
+
else:
|
|
240
|
+
results = self.dbs.listRuns(dataset=dataset)
|
|
241
|
+
except dbsClientException as ex:
|
|
242
|
+
msg = "Error in DBSReader.listRuns(%s, %s)\n" % (dataset, block)
|
|
243
|
+
msg += "%s\n" % formatEx3(ex)
|
|
244
|
+
raise DBSReaderError(msg) from None
|
|
245
|
+
|
|
246
|
+
# send runDict format as result, this format is for sync with dbs2 call
|
|
247
|
+
# which has {run_number: num_lumis} but dbs3 call doesn't return num Lumis
|
|
248
|
+
# So it returns {run_number: None}
|
|
249
|
+
# TODO: After DBS2 is completely removed change the return format more sensible one
|
|
250
|
+
|
|
251
|
+
runDict = {}
|
|
252
|
+
for x in results:
|
|
253
|
+
for runNumber in x["run_num"]:
|
|
254
|
+
runDict[runNumber] = None
|
|
255
|
+
return runDict
|
|
256
|
+
|
|
257
|
+
def listProcessedDatasets(self, primary, dataTier='*'):
|
|
258
|
+
"""
|
|
259
|
+
_listProcessedDatasets_
|
|
260
|
+
|
|
261
|
+
return a list of Processed datasets for the primary and optional
|
|
262
|
+
data tier value
|
|
263
|
+
|
|
264
|
+
"""
|
|
265
|
+
try:
|
|
266
|
+
result = self.dbs.listDatasets(primary_ds_name=primary, data_tier_name=dataTier)
|
|
267
|
+
except dbsClientException as ex:
|
|
268
|
+
msg = "Error in DBSReader.listProcessedDatasets(%s)\n" % primary
|
|
269
|
+
msg += "%s\n" % formatEx3(ex)
|
|
270
|
+
raise DBSReaderError(msg) from None
|
|
271
|
+
|
|
272
|
+
result = [x['dataset'].split('/')[2] for x in result]
|
|
273
|
+
return result
|
|
274
|
+
|
|
275
|
+
def listDatasetFiles(self, datasetPath):
|
|
276
|
+
"""
|
|
277
|
+
_listDatasetFiles_
|
|
278
|
+
|
|
279
|
+
Get list of files for dataset
|
|
280
|
+
|
|
281
|
+
"""
|
|
282
|
+
return [x['logical_file_name'] for x in self.dbs.listFileArray(dataset=datasetPath)]
|
|
283
|
+
|
|
284
|
+
def listDatatiers(self):
|
|
285
|
+
"""
|
|
286
|
+
_listDatatiers_
|
|
287
|
+
|
|
288
|
+
Get a list of datatiers known by DBS.
|
|
289
|
+
"""
|
|
290
|
+
return [tier['data_tier_name'] for tier in self.dbs.listDataTiers()]
|
|
291
|
+
|
|
292
|
+
def listDatasetFileDetails(self, datasetPath, getParents=False, getLumis=True, validFileOnly=1):
|
|
293
|
+
"""
|
|
294
|
+
TODO: This is completely wrong need to be redone. or be removed - getting dataset altogether
|
|
295
|
+
might be to costly
|
|
296
|
+
|
|
297
|
+
_listDatasetFileDetails_
|
|
298
|
+
|
|
299
|
+
Get list of lumis, events, and parents for each file in a dataset
|
|
300
|
+
Return a dict where the keys are the files, and for each file we have something like:
|
|
301
|
+
{ 'NumberOfEvents': 545,
|
|
302
|
+
'BlockName': '/HighPileUp/Run2011A-v1/RAW#dd6e0796-cbcc-11e0-80a9-003048caaace',
|
|
303
|
+
'Lumis': {173658: [8, 12, 9, 14, 19, 109, 105]},
|
|
304
|
+
'Parents': [],
|
|
305
|
+
'Checksum': '22218315',
|
|
306
|
+
'Adler32': 'a41a1446',
|
|
307
|
+
'FileSize': 286021145,
|
|
308
|
+
'ValidFile': 1
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
"""
|
|
312
|
+
fileDetails = self.getFileListByDataset(dataset=datasetPath, validFileOnly=validFileOnly, detail=True)
|
|
313
|
+
blocks = set() # the set of blocks of the dataset
|
|
314
|
+
# Iterate over the files and prepare the set of blocks and a dict where the keys are the files
|
|
315
|
+
files = {}
|
|
316
|
+
for f in fileDetails:
|
|
317
|
+
blocks.add(f['block_name'])
|
|
318
|
+
files[f['logical_file_name']] = remapDBS3Keys(f, stringify=True)
|
|
319
|
+
files[f['logical_file_name']]['ValidFile'] = f['is_file_valid']
|
|
320
|
+
files[f['logical_file_name']]['Lumis'] = {}
|
|
321
|
+
files[f['logical_file_name']]['Parents'] = []
|
|
322
|
+
|
|
323
|
+
# parallel execution for listFileParents and listFileLumis APIs
|
|
324
|
+
if self.parallel:
|
|
325
|
+
if getParents:
|
|
326
|
+
block_parents = dbsListFileParents(self.dbsURL, blocks)
|
|
327
|
+
for blockName, parents in block_parents.items():
|
|
328
|
+
for p in parents:
|
|
329
|
+
if p['logical_file_name'] in files: # invalid files are not there if validFileOnly=1
|
|
330
|
+
files[p['logical_file_name']]['Parents'].extend(p['parent_logical_file_name'])
|
|
331
|
+
if getLumis:
|
|
332
|
+
block_file_lumis = dbsListFileLumis(self.dbsURL, blocks)
|
|
333
|
+
for blockName, file_lumis in block_file_lumis.items():
|
|
334
|
+
for f in file_lumis:
|
|
335
|
+
if f['logical_file_name'] in files: # invalid files are not there if validFileOnly=1
|
|
336
|
+
if f['run_num'] in files[f['logical_file_name']]['Lumis']:
|
|
337
|
+
files[f['logical_file_name']]['Lumis'][f['run_num']].extend(f['lumi_section_num'])
|
|
338
|
+
else:
|
|
339
|
+
files[f['logical_file_name']]['Lumis'][f['run_num']] = f['lumi_section_num']
|
|
340
|
+
return files
|
|
341
|
+
|
|
342
|
+
# Iterate over the blocks and get parents and lumis
|
|
343
|
+
for blockName in blocks:
|
|
344
|
+
# get the parents
|
|
345
|
+
if getParents:
|
|
346
|
+
parents = self.dbs.listFileParents(block_name=blockName)
|
|
347
|
+
for p in parents:
|
|
348
|
+
if p['logical_file_name'] in files: # invalid files are not there if validFileOnly=1
|
|
349
|
+
files[p['logical_file_name']]['Parents'].extend(p['parent_logical_file_name'])
|
|
350
|
+
|
|
351
|
+
if getLumis:
|
|
352
|
+
# get the lumis
|
|
353
|
+
file_lumis = self.dbs.listFileLumis(block_name=blockName)
|
|
354
|
+
for f in file_lumis:
|
|
355
|
+
if f['logical_file_name'] in files: # invalid files are not there if validFileOnly=1
|
|
356
|
+
if f['run_num'] in files[f['logical_file_name']]['Lumis']:
|
|
357
|
+
files[f['logical_file_name']]['Lumis'][f['run_num']].extend(f['lumi_section_num'])
|
|
358
|
+
else:
|
|
359
|
+
files[f['logical_file_name']]['Lumis'][f['run_num']] = f['lumi_section_num']
|
|
360
|
+
|
|
361
|
+
return files
|
|
362
|
+
|
|
363
|
+
def crossCheck(self, datasetPath, *lfns):
|
|
364
|
+
"""
|
|
365
|
+
_crossCheck_
|
|
366
|
+
|
|
367
|
+
For the dataset provided, check that the lfns listed all exist
|
|
368
|
+
in the dataset.
|
|
369
|
+
|
|
370
|
+
Return the list of lfns that are in the dataset
|
|
371
|
+
"""
|
|
372
|
+
allLfns = []
|
|
373
|
+
try:
|
|
374
|
+
for fileDict in self.dbs.listFileArray(dataset=datasetPath, validFileOnly=1, detail=False):
|
|
375
|
+
allLfns.append(fileDict['logical_file_name'])
|
|
376
|
+
except Exception as exc:
|
|
377
|
+
msg = "Error in DBSReader.crossCheck({}) with {} lfns.".format(datasetPath, len(lfns))
|
|
378
|
+
msg += "\nDetails: {}\n".format(formatEx3(exc))
|
|
379
|
+
raise DBSReaderError(msg) from None
|
|
380
|
+
setOfAllLfns = set(allLfns)
|
|
381
|
+
setOfKnownLfns = set(lfns)
|
|
382
|
+
return list(setOfAllLfns.intersection(setOfKnownLfns))
|
|
383
|
+
|
|
384
|
+
def crossCheckMissing(self, datasetPath, *lfns):
|
|
385
|
+
"""
|
|
386
|
+
_crossCheckMissing_
|
|
387
|
+
|
|
388
|
+
As cross check, but return value is a list of files that
|
|
389
|
+
are *not* known by DBS
|
|
390
|
+
"""
|
|
391
|
+
allLfns = []
|
|
392
|
+
try:
|
|
393
|
+
for fileDict in self.dbs.listFileArray(dataset=datasetPath, validFileOnly=1, detail=False):
|
|
394
|
+
allLfns.append(fileDict['logical_file_name'])
|
|
395
|
+
except Exception as exc:
|
|
396
|
+
msg = "Error in DBSReader.crossCheckMissing({}) with {} lfns.".format(datasetPath, len(lfns))
|
|
397
|
+
msg += "\nDetails: {}\n".format(formatEx3(exc))
|
|
398
|
+
raise DBSReaderError(msg) from None
|
|
399
|
+
setOfAllLfns = set(allLfns)
|
|
400
|
+
setOfKnownLfns = set(lfns)
|
|
401
|
+
knownFiles = setOfAllLfns.intersection(setOfKnownLfns)
|
|
402
|
+
unknownFiles = setOfKnownLfns.difference(knownFiles)
|
|
403
|
+
return list(unknownFiles)
|
|
404
|
+
|
|
405
|
+
def getDBSSummaryInfo(self, dataset=None, block=None):
|
|
406
|
+
"""
|
|
407
|
+
Get dataset summary includes # of files, events, blocks and total size
|
|
408
|
+
"""
|
|
409
|
+
if dataset:
|
|
410
|
+
self.checkDatasetPath(dataset)
|
|
411
|
+
try:
|
|
412
|
+
if block:
|
|
413
|
+
summary = self.dbs.listFileSummaries(block_name=block, validFileOnly=1)
|
|
414
|
+
else:
|
|
415
|
+
summary = self.dbs.listFileSummaries(dataset=dataset, validFileOnly=1)
|
|
416
|
+
except Exception as ex:
|
|
417
|
+
msg = "Error in DBSReader.getDBSSummaryInfo(%s, %s)\n" % (dataset, block)
|
|
418
|
+
msg += "%s\n" % formatEx3(ex)
|
|
419
|
+
raise DBSReaderError(msg) from None
|
|
420
|
+
|
|
421
|
+
if not summary: # missing data or all files invalid
|
|
422
|
+
return {}
|
|
423
|
+
|
|
424
|
+
result = remapDBS3Keys(summary[0], stringify=True)
|
|
425
|
+
result['path'] = dataset if dataset else ''
|
|
426
|
+
result['block'] = block if block else ''
|
|
427
|
+
return result
|
|
428
|
+
|
|
429
|
+
def listFileBlocks(self, dataset, blockName=None):
|
|
430
|
+
"""
|
|
431
|
+
_listFileBlocks_
|
|
432
|
+
|
|
433
|
+
Retrieve a list of fileblock names for a dataset
|
|
434
|
+
|
|
435
|
+
"""
|
|
436
|
+
self.checkDatasetPath(dataset)
|
|
437
|
+
args = {'dataset': dataset, 'detail': False}
|
|
438
|
+
if blockName:
|
|
439
|
+
args['block_name'] = blockName
|
|
440
|
+
args['detail'] = True
|
|
441
|
+
try:
|
|
442
|
+
blocks = self.dbs.listBlocks(**args)
|
|
443
|
+
except dbsClientException as ex:
|
|
444
|
+
msg = "Error in DBSReader.listFileBlocks(%s)\n" % dataset
|
|
445
|
+
msg += "%s\n" % formatEx3(ex)
|
|
446
|
+
raise DBSReaderError(msg) from None
|
|
447
|
+
|
|
448
|
+
result = [x['block_name'] for x in blocks]
|
|
449
|
+
|
|
450
|
+
return result
|
|
451
|
+
|
|
452
|
+
def blockExists(self, fileBlockName):
|
|
453
|
+
"""
|
|
454
|
+
_blockExists_
|
|
455
|
+
|
|
456
|
+
Check to see if block with name provided exists in the DBS
|
|
457
|
+
Instance.
|
|
458
|
+
|
|
459
|
+
Return True if exists, False if not
|
|
460
|
+
|
|
461
|
+
"""
|
|
462
|
+
self.checkBlockName(fileBlockName)
|
|
463
|
+
try:
|
|
464
|
+
|
|
465
|
+
blocks = self.dbs.listBlocks(block_name=fileBlockName)
|
|
466
|
+
except Exception as ex:
|
|
467
|
+
msg = "Error in "
|
|
468
|
+
msg += "DBSReader.blockExists(%s)\n" % fileBlockName
|
|
469
|
+
msg += "%s\n" % formatEx3(ex)
|
|
470
|
+
raise DBSReaderError(msg) from None
|
|
471
|
+
|
|
472
|
+
if len(blocks) == 0:
|
|
473
|
+
return False
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
def listFilesInBlock(self, fileBlockName, lumis=True, validFileOnly=1):
|
|
477
|
+
"""
|
|
478
|
+
_listFilesInBlock_
|
|
479
|
+
|
|
480
|
+
Get a list of files in the named fileblock
|
|
481
|
+
TODO: lumis can be false when lumi splitting is not required
|
|
482
|
+
However WMBSHelper expect file['LumiList'] to get the run number
|
|
483
|
+
so for now it will be always true.
|
|
484
|
+
We need to clean code up when dbs2 is completely deprecated.
|
|
485
|
+
calling lumis for run number is expensive.
|
|
486
|
+
"""
|
|
487
|
+
result = []
|
|
488
|
+
if not self.blockExists(fileBlockName):
|
|
489
|
+
msg = "DBSReader.listFilesInBlock(%s): No matching data"
|
|
490
|
+
raise DBSReaderError(msg % fileBlockName) from None
|
|
491
|
+
|
|
492
|
+
try:
|
|
493
|
+
files = self.dbs.listFileArray(block_name=fileBlockName, validFileOnly=validFileOnly, detail=True)
|
|
494
|
+
except dbsClientException as ex:
|
|
495
|
+
msg = "Error in "
|
|
496
|
+
msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
|
|
497
|
+
msg += "%s\n" % formatEx3(ex)
|
|
498
|
+
raise DBSReaderError(msg) from None
|
|
499
|
+
|
|
500
|
+
if not files:
|
|
501
|
+
# there are no valid files in this block, stop here!
|
|
502
|
+
return result
|
|
503
|
+
|
|
504
|
+
if lumis:
|
|
505
|
+
lumiDict = self._getLumiList(blockName=fileBlockName, validFileOnly=validFileOnly)
|
|
506
|
+
|
|
507
|
+
for fileInfo in files:
|
|
508
|
+
if lumis:
|
|
509
|
+
fileInfo["LumiList"] = lumiDict[fileInfo['logical_file_name']]
|
|
510
|
+
result.append(remapDBS3Keys(fileInfo, stringify=True))
|
|
511
|
+
return result
|
|
512
|
+
|
|
513
|
+
def listFilesInBlockWithParents(self, fileBlockName, lumis=True, validFileOnly=1):
|
|
514
|
+
"""
|
|
515
|
+
_listFilesInBlockWithParents_
|
|
516
|
+
|
|
517
|
+
Get a list of files in the named fileblock including
|
|
518
|
+
the parents of that file.
|
|
519
|
+
TODO: lumis can be false when lumi splitting is not required
|
|
520
|
+
However WMBSHelper expect file['LumiList'] to get the run number
|
|
521
|
+
so for now it will be always true.
|
|
522
|
+
|
|
523
|
+
"""
|
|
524
|
+
if not self.blockExists(fileBlockName):
|
|
525
|
+
msg = "DBSReader.listFilesInBlockWithParents(%s): No matching data"
|
|
526
|
+
raise DBSReaderError(msg % fileBlockName) from None
|
|
527
|
+
|
|
528
|
+
try:
|
|
529
|
+
# TODO: shoud we get only valid block for this?
|
|
530
|
+
files = self.dbs.listFileParents(block_name=fileBlockName)
|
|
531
|
+
fileDetails = self.listFilesInBlock(fileBlockName, lumis, validFileOnly)
|
|
532
|
+
|
|
533
|
+
except dbsClientException as ex:
|
|
534
|
+
msg = "Error in "
|
|
535
|
+
msg += "DBSReader.listFilesInBlockWithParents(%s)\n" % (
|
|
536
|
+
fileBlockName,)
|
|
537
|
+
msg += "%s\n" % formatEx3(ex)
|
|
538
|
+
raise DBSReaderError(msg) from None
|
|
539
|
+
|
|
540
|
+
childByParents = defaultdict(list)
|
|
541
|
+
for f in files:
|
|
542
|
+
# Probably a child can have more than 1 parent file
|
|
543
|
+
for fp in f['parent_logical_file_name']:
|
|
544
|
+
childByParents[fp].append(f['logical_file_name'])
|
|
545
|
+
|
|
546
|
+
parentsLFNs = list(childByParents)
|
|
547
|
+
|
|
548
|
+
if len(parentsLFNs) == 0:
|
|
549
|
+
msg = "Error in "
|
|
550
|
+
msg += "DBSReader.listFilesInBlockWithParents(%s)\n There is no parents files" % (
|
|
551
|
+
fileBlockName)
|
|
552
|
+
raise DBSReaderError(msg) from None
|
|
553
|
+
|
|
554
|
+
parentFilesDetail = []
|
|
555
|
+
# TODO: slicing parentLFNs util DBS api is handling that.
|
|
556
|
+
# Remove slicing if DBS api handles
|
|
557
|
+
for pLFNs in grouper(parentsLFNs, 50):
|
|
558
|
+
parentFilesDetail.extend(self.dbs.listFileArray(logical_file_name=pLFNs, detail=True))
|
|
559
|
+
|
|
560
|
+
if lumis:
|
|
561
|
+
parentLumis = self._getLumiList(lfns=parentsLFNs)
|
|
562
|
+
|
|
563
|
+
parentsByLFN = defaultdict(list)
|
|
564
|
+
|
|
565
|
+
for pf in parentFilesDetail:
|
|
566
|
+
parentLFN = pf['logical_file_name']
|
|
567
|
+
dbsFile = remapDBS3Keys(pf, stringify=True)
|
|
568
|
+
if lumis:
|
|
569
|
+
dbsFile["LumiList"] = parentLumis[parentLFN]
|
|
570
|
+
|
|
571
|
+
for childLFN in childByParents[parentLFN]:
|
|
572
|
+
parentsByLFN[childLFN].append(dbsFile)
|
|
573
|
+
|
|
574
|
+
for fileInfo in fileDetails:
|
|
575
|
+
fileInfo["ParentList"] = parentsByLFN[fileInfo['logical_file_name']]
|
|
576
|
+
|
|
577
|
+
return fileDetails
|
|
578
|
+
|
|
579
|
+
def lfnsInBlock(self, fileBlockName):
|
|
580
|
+
"""
|
|
581
|
+
_lfnsInBlock_
|
|
582
|
+
|
|
583
|
+
LFN list only for block, details = False => faster query
|
|
584
|
+
|
|
585
|
+
"""
|
|
586
|
+
if not self.blockExists(fileBlockName):
|
|
587
|
+
msg = "DBSReader.lfnsInBlock(%s): No matching data"
|
|
588
|
+
raise DBSReaderError(msg % fileBlockName) from None
|
|
589
|
+
|
|
590
|
+
try:
|
|
591
|
+
lfns = self.dbs.listFileArray(block_name=fileBlockName, validFileOnly=1, detail=False)
|
|
592
|
+
return lfns
|
|
593
|
+
except dbsClientException as ex:
|
|
594
|
+
msg = "Error in "
|
|
595
|
+
msg += "DBSReader.listFilesInBlock(%s)\n" % fileBlockName
|
|
596
|
+
msg += "%s\n" % formatEx3(ex)
|
|
597
|
+
raise DBSReaderError(msg) from None
|
|
598
|
+
|
|
599
|
+
def listFileBlockLocation(self, fileBlockNames):
|
|
600
|
+
"""
|
|
601
|
+
_listFileBlockLocation_
|
|
602
|
+
|
|
603
|
+
Get origin_site_name of a block
|
|
604
|
+
|
|
605
|
+
"""
|
|
606
|
+
|
|
607
|
+
singleBlockName = None
|
|
608
|
+
if isinstance(fileBlockNames, (str, bytes)):
|
|
609
|
+
singleBlockName = fileBlockNames
|
|
610
|
+
fileBlockNames = [fileBlockNames]
|
|
611
|
+
|
|
612
|
+
for block in fileBlockNames:
|
|
613
|
+
self.checkBlockName(block)
|
|
614
|
+
|
|
615
|
+
locations = {}
|
|
616
|
+
node_filter = set(['UNKNOWN', None])
|
|
617
|
+
|
|
618
|
+
blocksInfo = {}
|
|
619
|
+
try:
|
|
620
|
+
if self.parallel:
|
|
621
|
+
data = dbsBlockOrigin(self.dbsURL, fileBlockNames)
|
|
622
|
+
for block, items in data.items():
|
|
623
|
+
blocksInfo.setdefault(block, [])
|
|
624
|
+
for blockInfo in items:
|
|
625
|
+
blocksInfo[block].append(blockInfo['origin_site_name'])
|
|
626
|
+
else:
|
|
627
|
+
for block in fileBlockNames:
|
|
628
|
+
blocksInfo.setdefault(block, [])
|
|
629
|
+
# there should be only one element with a single origin site string ...
|
|
630
|
+
for blockInfo in self.dbs.listBlockOrigin(block_name=block):
|
|
631
|
+
blocksInfo[block].append(blockInfo['origin_site_name'])
|
|
632
|
+
except dbsClientException as ex:
|
|
633
|
+
msg = "Error in DBS3Reader: self.dbs.listBlockOrigin(block_name=%s)\n" % fileBlockNames
|
|
634
|
+
msg += "%s\n" % formatEx3(ex)
|
|
635
|
+
raise DBSReaderError(msg) from None
|
|
636
|
+
|
|
637
|
+
for block in fileBlockNames:
|
|
638
|
+
valid_nodes = set(blocksInfo.get(block, [])) - node_filter
|
|
639
|
+
locations[block] = list(valid_nodes)
|
|
640
|
+
|
|
641
|
+
# returning single list if a single block is passed
|
|
642
|
+
if singleBlockName:
|
|
643
|
+
return locations[singleBlockName]
|
|
644
|
+
|
|
645
|
+
return locations
|
|
646
|
+
|
|
647
|
+
def getFileBlock(self, fileBlockName):
|
|
648
|
+
"""
|
|
649
|
+
Retrieve a list of files in the block; a flag whether the
|
|
650
|
+
block is still open or not; and it used to resolve the block
|
|
651
|
+
location via PhEDEx.
|
|
652
|
+
|
|
653
|
+
:return: a dictionary in the format of:
|
|
654
|
+
{"PhEDExNodeNames" : [],
|
|
655
|
+
"Files" : { LFN : Events }}
|
|
656
|
+
"""
|
|
657
|
+
result = {"PhEDExNodeNames": [], # FIXME: we better get rid of this line!
|
|
658
|
+
"Files": self.listFilesInBlock(fileBlockName)}
|
|
659
|
+
return result
|
|
660
|
+
|
|
661
|
+
def getFileBlockWithParents(self, fileBlockName):
|
|
662
|
+
"""
|
|
663
|
+
Retrieve a list of parent files in the block; a flag whether the
|
|
664
|
+
block is still open or not; and it used to resolve the block
|
|
665
|
+
location via PhEDEx.
|
|
666
|
+
|
|
667
|
+
:return: a dictionary in the format of:
|
|
668
|
+
{"PhEDExNodeNames" : [],
|
|
669
|
+
"Files" : { LFN : Events }}
|
|
670
|
+
"""
|
|
671
|
+
fileBlockName = decodeBytesToUnicode(fileBlockName)
|
|
672
|
+
|
|
673
|
+
if not self.blockExists(fileBlockName):
|
|
674
|
+
msg = "DBSReader.getFileBlockWithParents(%s): No matching data"
|
|
675
|
+
raise DBSReaderError(msg % fileBlockName) from None
|
|
676
|
+
|
|
677
|
+
result = {"PhEDExNodeNames": [], # FIXME: we better get rid of this line!
|
|
678
|
+
"Files": self.listFilesInBlockWithParents(fileBlockName)}
|
|
679
|
+
return result
|
|
680
|
+
|
|
681
|
+
def listBlockParents(self, blockName):
|
|
682
|
+
"""
|
|
683
|
+
Return a list of parent blocks for a given child block name
|
|
684
|
+
"""
|
|
685
|
+
# FIXME: note the different returned data structure
|
|
686
|
+
result = []
|
|
687
|
+
self.checkBlockName(blockName)
|
|
688
|
+
blocks = self.dbs.listBlockParents(block_name=blockName)
|
|
689
|
+
result = [block['parent_block_name'] for block in blocks]
|
|
690
|
+
return result
|
|
691
|
+
|
|
692
|
+
def blockToDatasetPath(self, blockName):
|
|
693
|
+
"""
|
|
694
|
+
_blockToDatasetPath_
|
|
695
|
+
|
|
696
|
+
Given a block name, get the dataset Path associated with that
|
|
697
|
+
Block.
|
|
698
|
+
|
|
699
|
+
Returns the dataset path, or None if not found
|
|
700
|
+
|
|
701
|
+
"""
|
|
702
|
+
self.checkBlockName(blockName)
|
|
703
|
+
try:
|
|
704
|
+
blocks = self.dbs.listBlocks(block_name=blockName, detail=True)
|
|
705
|
+
except Exception as ex:
|
|
706
|
+
msg = "Error in "
|
|
707
|
+
msg += "DBSReader.blockToDatasetPath(%s)\n" % blockName
|
|
708
|
+
msg += "%s\n" % formatEx3(ex)
|
|
709
|
+
raise DBSReaderError(msg) from None
|
|
710
|
+
|
|
711
|
+
if blocks == []:
|
|
712
|
+
return None
|
|
713
|
+
|
|
714
|
+
pathname = blocks[-1].get('dataset', None)
|
|
715
|
+
return pathname
|
|
716
|
+
|
|
717
|
+
def listDatasetLocation(self, datasetName):
|
|
718
|
+
"""
|
|
719
|
+
_listDatasetLocation_
|
|
720
|
+
|
|
721
|
+
List the origin SEs where there is at least a block of the given
|
|
722
|
+
dataset.
|
|
723
|
+
"""
|
|
724
|
+
self.checkDatasetPath(datasetName)
|
|
725
|
+
|
|
726
|
+
locations = set()
|
|
727
|
+
try:
|
|
728
|
+
blocksInfo = self.dbs.listBlockOrigin(dataset=datasetName)
|
|
729
|
+
except dbsClientException as ex:
|
|
730
|
+
msg = "Error in DBSReader: dbsApi.listBlocks(dataset=%s)\n" % datasetName
|
|
731
|
+
msg += "%s\n" % formatEx3(ex)
|
|
732
|
+
raise DBSReaderError(msg) from None
|
|
733
|
+
|
|
734
|
+
if not blocksInfo: # no data location from dbs
|
|
735
|
+
return list(locations)
|
|
736
|
+
|
|
737
|
+
for blockInfo in blocksInfo:
|
|
738
|
+
if blockInfo.get("origin_site_name", None) not in ['UNKNOWN', None]:
|
|
739
|
+
locations.add(blockInfo['origin_site_name'])
|
|
740
|
+
|
|
741
|
+
return list(locations)
|
|
742
|
+
|
|
743
|
+
def checkDatasetPath(self, pathName):
|
|
744
|
+
"""
|
|
745
|
+
This method raises an exception for any invalid dataset name
|
|
746
|
+
and datasets unknown to DBS. Otherwise None is returned.
|
|
747
|
+
"""
|
|
748
|
+
if pathName in ("", None):
|
|
749
|
+
raise DBSReaderError("Invalid Dataset Path name: => %s <=" % pathName) from None
|
|
750
|
+
else:
|
|
751
|
+
try:
|
|
752
|
+
result = self.dbs.listDatasets(dataset=pathName, dataset_access_type='*')
|
|
753
|
+
if len(result) == 0:
|
|
754
|
+
raise DBSReaderError("Dataset %s doesn't exist in DBS %s" % (pathName, self.dbsURL)) from None
|
|
755
|
+
except (dbsClientException, HTTPError) as ex:
|
|
756
|
+
msg = "Error in "
|
|
757
|
+
msg += "DBSReader.checkDatasetPath(%s)\n" % pathName
|
|
758
|
+
msg += "%s\n" % formatEx3(ex)
|
|
759
|
+
raise DBSReaderError(msg) from None
|
|
760
|
+
return
|
|
761
|
+
|
|
762
|
+
def checkBlockName(self, blockName):
|
|
763
|
+
"""
|
|
764
|
+
_checkBlockName_
|
|
765
|
+
"""
|
|
766
|
+
if blockName in ("", "*", None):
|
|
767
|
+
raise DBSReaderError("Invalid Block name: => %s <=" % blockName) from None
|
|
768
|
+
|
|
769
|
+
def getFileListByDataset(self, dataset, validFileOnly=1, detail=True):
|
|
770
|
+
|
|
771
|
+
"""
|
|
772
|
+
_getFileListByDataset_
|
|
773
|
+
|
|
774
|
+
Given a dataset, retrieves all blocks, lfns and number of events (among other
|
|
775
|
+
not really important info).
|
|
776
|
+
Returns a list of dict.
|
|
777
|
+
"""
|
|
778
|
+
|
|
779
|
+
try:
|
|
780
|
+
fileList = self.dbs.listFileArray(dataset=dataset, validFileOnly=validFileOnly, detail=detail)
|
|
781
|
+
return fileList
|
|
782
|
+
except dbsClientException as ex:
|
|
783
|
+
msg = "Error in "
|
|
784
|
+
msg += "DBSReader.getFileListByDataset(%s)\n" % dataset
|
|
785
|
+
msg += "%s\n" % formatEx3(ex)
|
|
786
|
+
raise DBSReaderError(msg) from None
|
|
787
|
+
|
|
788
|
+
def listDatasetParents(self, childDataset):
|
|
789
|
+
"""
|
|
790
|
+
list the the parents dataset path given childDataset
|
|
791
|
+
"""
|
|
792
|
+
try:
|
|
793
|
+
parentList = self.dbs.listDatasetParents(dataset=childDataset)
|
|
794
|
+
return parentList
|
|
795
|
+
except Exception as ex:
|
|
796
|
+
msg = "Error in "
|
|
797
|
+
msg += "DBSReader.listDatasetParents(%s)\n" % childDataset
|
|
798
|
+
msg += "%s\n" % formatEx3(ex)
|
|
799
|
+
raise DBSReaderError(msg) from None
|
|
800
|
+
|
|
801
|
+
# def getListFilesByLumiAndDataset(self, dataset, files):
|
|
802
|
+
# "Unsing pycurl to get all the child parents pair for given dataset"
|
|
803
|
+
#
|
|
804
|
+
# urls = ['%s/data/dbs/fileparentbylumis?block_name=%s' % (
|
|
805
|
+
# self.dbsURL, b["block_name"]) for b in self.dbs.listBlocks(dataset=dataset)]
|
|
806
|
+
#
|
|
807
|
+
# data = multi_getdata(urls, ckey(), cert())
|
|
808
|
+
# rdict = {}
|
|
809
|
+
# for row in data:
|
|
810
|
+
# try:
|
|
811
|
+
# data = json.loads(row['data'])
|
|
812
|
+
# rdict[req] = data['result'][0] # we get back {'result': [workflow]} dict
|
|
813
|
+
# except Exception as exp:
|
|
814
|
+
# print("ERROR: fail to load data as json record, error=%s" % str(exp))
|
|
815
|
+
# print(row)
|
|
816
|
+
# return rdict
|
|
817
|
+
|
|
818
|
+
def getParentFilesGivenParentDataset(self, parentDataset, childLFNs):
|
|
819
|
+
"""
|
|
820
|
+
returns parent files for given childLFN when DBS doesn't have direct parent child relationship in DB
|
|
821
|
+
Only use this for finding missing parents
|
|
822
|
+
|
|
823
|
+
:param parentDataset: parent dataset for childLFN
|
|
824
|
+
:param childLFN: a file in child dataset
|
|
825
|
+
:return: set of parent files for childLFN
|
|
826
|
+
"""
|
|
827
|
+
fInfo = self.dbs.listFileLumiArray(logical_file_name=childLFNs)
|
|
828
|
+
if self.parallel:
|
|
829
|
+
return dbsParentFilesGivenParentDataset(self.dbsURL, parentDataset, fInfo)
|
|
830
|
+
|
|
831
|
+
parentFiles = defaultdict(set)
|
|
832
|
+
for f in fInfo:
|
|
833
|
+
pFileList = self.dbs.listFiles(dataset=parentDataset, run_num=f['run_num'], lumi_list=f['lumi_section_num'])
|
|
834
|
+
pFiles = set([x['logical_file_name'] for x in pFileList])
|
|
835
|
+
parentFiles[f['logical_file_name']] = parentFiles[f['logical_file_name']].union(pFiles)
|
|
836
|
+
return parentFiles
|
|
837
|
+
|
|
838
|
+
def getParentFilesByLumi(self, childLFN):
|
|
839
|
+
"""
|
|
840
|
+
get the parent file's lfns by lumi (This might not be the actual parentage relations in DBS just parentage by Lumis).
|
|
841
|
+
use for only specific lfn for validating purpose, for the parentage fix use findAndInsertMissingParentage
|
|
842
|
+
:param childLFN:
|
|
843
|
+
:return: list of dictionary with parent files for given child LFN and parent dataset
|
|
844
|
+
[{"ParentDataset": /abc/bad/ddd, "ParentFiles": [alf, baf, ...]]
|
|
845
|
+
"""
|
|
846
|
+
childDatasets = self.dbs.listDatasets(logical_file_name=childLFN)
|
|
847
|
+
result = []
|
|
848
|
+
for i in childDatasets:
|
|
849
|
+
parents = self.dbs.listDatasetParents(dataset=i["dataset"])
|
|
850
|
+
for parent in parents:
|
|
851
|
+
parentFiles = self.getParentFilesGivenParentDataset(parent['parent_dataset'], childLFN)
|
|
852
|
+
result.append({"ParentDataset": parent['parent_dataset'], "ParentFiles": list(parentFiles[childLFN])})
|
|
853
|
+
return result
|
|
854
|
+
|
|
855
|
+
def insertFileParents(self, childBlockName, childParentsIDPairs, missingFiles=0):
|
|
856
|
+
"""
|
|
857
|
+
For a given block name, inject its child/parent file id relationship
|
|
858
|
+
:param childBlockName: child block name
|
|
859
|
+
:param childParentsIDPairs: list of list child and parent file ids, i.e. [[1,2], [3,4]...]
|
|
860
|
+
dbs validate child ids from the childBlockName
|
|
861
|
+
:param missingFiles: an integer with the number of children files missing parents
|
|
862
|
+
:return: None
|
|
863
|
+
"""
|
|
864
|
+
self.logger.debug("Going to insert parentage for child_parent_id_list: %s",
|
|
865
|
+
childParentsIDPairs)
|
|
866
|
+
return self.dbs.insertFileParents({"block_name": childBlockName,
|
|
867
|
+
"child_parent_id_list": childParentsIDPairs,
|
|
868
|
+
"missing_files": missingFiles})
|
|
869
|
+
|
|
870
|
+
def listBlocksWithNoParents(self, childDataset):
|
|
871
|
+
"""
|
|
872
|
+
Given a dataset name, list all its blocks, fetch their parentage
|
|
873
|
+
blocks and return a set of blocks without any parentage information.
|
|
874
|
+
:param childDataset: string with a dataset name
|
|
875
|
+
:return: set of child blocks with no parentBlock
|
|
876
|
+
"""
|
|
877
|
+
allBlocks = self.dbs.listBlocks(dataset=childDataset)
|
|
878
|
+
blockNames = []
|
|
879
|
+
for block in allBlocks:
|
|
880
|
+
blockNames.append(block['block_name'])
|
|
881
|
+
parentBlocks = self.dbs.listBlockParents(block_name=blockNames)
|
|
882
|
+
|
|
883
|
+
cblock = set()
|
|
884
|
+
for pblock in parentBlocks:
|
|
885
|
+
cblock.add(pblock['this_block_name'])
|
|
886
|
+
|
|
887
|
+
noParentBlocks = set(blockNames) - cblock
|
|
888
|
+
return noParentBlocks
|
|
889
|
+
|
|
890
|
+
def listFilesWithNoParents(self, childBlockName):
|
|
891
|
+
"""
|
|
892
|
+
:param childBlockName:
|
|
893
|
+
:return:
|
|
894
|
+
"""
|
|
895
|
+
allFiles = self.dbs.listFiles(block_name=childBlockName)
|
|
896
|
+
parentFiles = self.dbs.listFileParents(block_name=childBlockName)
|
|
897
|
+
|
|
898
|
+
allFileNames = set()
|
|
899
|
+
for fInfo in allFiles:
|
|
900
|
+
allFileNames.add(fInfo['logical_file_name'])
|
|
901
|
+
|
|
902
|
+
cfile = set()
|
|
903
|
+
for pFile in parentFiles:
|
|
904
|
+
cfile.add(pFile['logical_file_name'])
|
|
905
|
+
|
|
906
|
+
noParentFiles = allFileNames - cfile
|
|
907
|
+
return list(noParentFiles)
|
|
908
|
+
|
|
909
|
+
def fixMissingParentageDatasets(self, childDataset, insertFlag=True):
|
|
910
|
+
"""
|
|
911
|
+
:param childDataset: child dataset need to set the parentage correctly.
|
|
912
|
+
:return: blocks which failed to insert parentage. for retry
|
|
913
|
+
"""
|
|
914
|
+
parentDatasets = self.listDatasetParents(childDataset)
|
|
915
|
+
self.logger.info("Parent datasets for %s are: %s", childDataset, parentDatasets)
|
|
916
|
+
# parentDatasets format is
|
|
917
|
+
# [{'this_dataset': '/SingleMuon/Run2016D-03Feb2017-v1/MINIAOD',
|
|
918
|
+
# 'parent_dataset_id': 13265209,
|
|
919
|
+
# 'parent_dataset': '/SingleMuon/Run2016D-23Sep2016-v1/AOD'}]
|
|
920
|
+
if not parentDatasets:
|
|
921
|
+
self.logger.warning("No parent dataset found for child dataset %s", childDataset)
|
|
922
|
+
return {}
|
|
923
|
+
|
|
924
|
+
parentFlatData = self.getParentDatasetTrio(childDataset)
|
|
925
|
+
|
|
926
|
+
blocks = self.listBlocksWithNoParents(childDataset)
|
|
927
|
+
failedBlocks = []
|
|
928
|
+
self.logger.info("Found %d blocks without parentage information", len(blocks))
|
|
929
|
+
for blockName in blocks:
|
|
930
|
+
try:
|
|
931
|
+
listChildParent, countMissingFiles = self._compileParentageList(blockName, parentFlatData)
|
|
932
|
+
# insert block parentage information to DBS
|
|
933
|
+
if insertFlag and any(listChildParent):
|
|
934
|
+
self.insertFileParents(blockName, listChildParent, countMissingFiles)
|
|
935
|
+
self.logger.info("Parentage information successfully added to DBS for block %s", blockName)
|
|
936
|
+
else:
|
|
937
|
+
self.logger.warning("No parentage information added to DBS for block %s", blockName)
|
|
938
|
+
except Exception as ex:
|
|
939
|
+
self.logger.exception(
|
|
940
|
+
"Parentage update failed for block %s with error %s", blockName, str(ex))
|
|
941
|
+
failedBlocks.append(blockName)
|
|
942
|
+
|
|
943
|
+
return failedBlocks
|
|
944
|
+
|
|
945
|
+
def _compileParentageList(self, blockName, parentRunLumi):
|
|
946
|
+
"""
|
|
947
|
+
Method to find out child and parent file relationship based
|
|
948
|
+
on their run/lumi tuples.
|
|
949
|
+
:param blockName: string with the child block name
|
|
950
|
+
:param parentRunLumi: a set like {(1, 53): 3077147397, (1, 54): 3077147397, (1, 27): 3077147397
|
|
951
|
+
:return: a list of child/parent file id tuples and a set of children files
|
|
952
|
+
that are missing parent files, e.g.
|
|
953
|
+
[[3077147917, 3077148037], [3077147917, 3077148037], 123
|
|
954
|
+
"""
|
|
955
|
+
self.logger.info("Compiling parentage list for block: %s", blockName)
|
|
956
|
+
# fetch run/lumi and file ids for the child block name
|
|
957
|
+
childFlatData = self.getChildBlockTrio(blockName)
|
|
958
|
+
|
|
959
|
+
self.logger.debug("Block name: %s has this run/lumi/file id information: %s",
|
|
960
|
+
blockName, childFlatData)
|
|
961
|
+
listChildParent = []
|
|
962
|
+
|
|
963
|
+
# first resolve parentage for all common runLumi pairs between childBlock and parentDataset
|
|
964
|
+
withParents = set()
|
|
965
|
+
for runLumi in childFlatData.keys() & parentRunLumi.keys():
|
|
966
|
+
childFileId = childFlatData[runLumi]
|
|
967
|
+
withParents.add(childFileId)
|
|
968
|
+
parentFileId = parentRunLumi[runLumi]
|
|
969
|
+
listChildParent.append([childFileId, parentFileId])
|
|
970
|
+
|
|
971
|
+
# the next for loop will find all the children files with missing parents
|
|
972
|
+
# and set their parent file id to -1 instead, unless that child id already has
|
|
973
|
+
# a valid parent file for other lumi(s)
|
|
974
|
+
missingParents = set()
|
|
975
|
+
for runLumi in childFlatData.keys() - parentRunLumi.keys():
|
|
976
|
+
childFileId = childFlatData[runLumi]
|
|
977
|
+
msg = "Child file id: %s, with run/lumi: %s, has no match in the parent dataset. "
|
|
978
|
+
if childFileId in withParents:
|
|
979
|
+
msg += "It does have parent files for other run/lumis though."
|
|
980
|
+
self.logger.warning(msg, childFileId, runLumi)
|
|
981
|
+
continue
|
|
982
|
+
missingParents.add(childFileId)
|
|
983
|
+
listChildParent.append([childFileId, -1])
|
|
984
|
+
msg += "Adding it with -1 parentage information to DBS."
|
|
985
|
+
self.logger.warning(msg, childFileId, runLumi)
|
|
986
|
+
self.logger.debug("Files with parent: %s, without: %s, non-unique tuples: %d",
|
|
987
|
+
withParents, missingParents, len(listChildParent))
|
|
988
|
+
|
|
989
|
+
# now find out files missing parent that do not have any other parent
|
|
990
|
+
missingParents = missingParents - withParents
|
|
991
|
+
# and make it a unique list of child/parent file ids
|
|
992
|
+
listChildParent = makeListElementsUnique(listChildParent)
|
|
993
|
+
|
|
994
|
+
self.logger.info("Block: %s has %d child/parent tuples and it is missing %d files",
|
|
995
|
+
blockName, len(listChildParent), len(missingParents))
|
|
996
|
+
return listChildParent, len(missingParents)
|
|
997
|
+
|
|
998
|
+
def getParentDatasetTrio(self, childDataset):
|
|
999
|
+
"""
|
|
1000
|
+
Provided a dataset name, return all the parent dataset information, such as:
|
|
1001
|
+
- file ids, run number and lumi section
|
|
1002
|
+
NOTE: This API is meant to be used by the StepChainParentage thread only!!!
|
|
1003
|
+
:param childDataset: name of the child dataset
|
|
1004
|
+
:return: A dictionary using (run, lumi) tuples as keys and fileIds as values
|
|
1005
|
+
{(1, 5110): 2746490237,
|
|
1006
|
+
(1, 5959): 2746487877,
|
|
1007
|
+
(1, 5990): 2746487877,
|
|
1008
|
+
...}
|
|
1009
|
+
"""
|
|
1010
|
+
# the call to DBS from bellow will return data in the following format of:
|
|
1011
|
+
# {554307997: [[1, 557179], [1, 557178],...
|
|
1012
|
+
# such that: key is file id, in each list is [run_number, lumi_section_numer].
|
|
1013
|
+
parentFullInfo = self.dbs.listParentDSTrio(dataset=childDataset)
|
|
1014
|
+
|
|
1015
|
+
parentFlatData = {}
|
|
1016
|
+
for parentDataset in parentFullInfo:
|
|
1017
|
+
for fileId in parentDataset:
|
|
1018
|
+
for runLumiPair in parentDataset[fileId]:
|
|
1019
|
+
parentFlatData[tuple(runLumiPair)] = fileId
|
|
1020
|
+
return parentFlatData
|
|
1021
|
+
|
|
1022
|
+
def getChildBlockTrio(self, childBlock):
|
|
1023
|
+
"""
|
|
1024
|
+
Provided a block name, return all block contents information, such as:
|
|
1025
|
+
- file ids, run number and lumi section
|
|
1026
|
+
NOTE: This API is meant to be used by the StepChainParentage thread only!!!
|
|
1027
|
+
:param chilBlock: name of the child block
|
|
1028
|
+
:return: A dictionary using (run, lumi) tuples as keys and fileIds as values
|
|
1029
|
+
{(1, 5110): 2746490237,
|
|
1030
|
+
(1, 5959): 2746487877,
|
|
1031
|
+
(1, 5990): 2746487877,
|
|
1032
|
+
...}
|
|
1033
|
+
"""
|
|
1034
|
+
# the call to DBS from bellow will return data in the following format of:
|
|
1035
|
+
# {554307997: [[1, 557179], [1, 557178],...
|
|
1036
|
+
# such that: key is file id, in each list is [run_number, lumi_section_numer].
|
|
1037
|
+
childBlockInfo = self.dbs.listBlockTrio(block_name=childBlock)
|
|
1038
|
+
|
|
1039
|
+
childFlatData = {}
|
|
1040
|
+
for childBlock in childBlockInfo:
|
|
1041
|
+
for fileId in childBlock:
|
|
1042
|
+
for runLumiPair in childBlock[fileId]:
|
|
1043
|
+
childFlatData[tuple(runLumiPair)] = fileId
|
|
1044
|
+
return childFlatData
|