wmglobalqueue 2.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wmglobalqueue might be problematic. Click here for more details.

Files changed (345) hide show
  1. Utils/CPMetrics.py +270 -0
  2. Utils/CertTools.py +62 -0
  3. Utils/EmailAlert.py +50 -0
  4. Utils/ExtendedUnitTestCase.py +62 -0
  5. Utils/FileTools.py +182 -0
  6. Utils/IteratorTools.py +80 -0
  7. Utils/MathUtils.py +31 -0
  8. Utils/MemoryCache.py +119 -0
  9. Utils/Patterns.py +24 -0
  10. Utils/Pipeline.py +137 -0
  11. Utils/PortForward.py +97 -0
  12. Utils/ProcessStats.py +103 -0
  13. Utils/PythonVersion.py +17 -0
  14. Utils/Signals.py +36 -0
  15. Utils/TemporaryEnvironment.py +27 -0
  16. Utils/Throttled.py +227 -0
  17. Utils/Timers.py +130 -0
  18. Utils/Timestamps.py +86 -0
  19. Utils/TokenManager.py +143 -0
  20. Utils/Tracing.py +60 -0
  21. Utils/TwPrint.py +98 -0
  22. Utils/Utilities.py +308 -0
  23. Utils/__init__.py +11 -0
  24. WMCore/ACDC/Collection.py +57 -0
  25. WMCore/ACDC/CollectionTypes.py +12 -0
  26. WMCore/ACDC/CouchCollection.py +67 -0
  27. WMCore/ACDC/CouchFileset.py +238 -0
  28. WMCore/ACDC/CouchService.py +73 -0
  29. WMCore/ACDC/DataCollectionService.py +485 -0
  30. WMCore/ACDC/Fileset.py +94 -0
  31. WMCore/ACDC/__init__.py +11 -0
  32. WMCore/Algorithms/Alarm.py +39 -0
  33. WMCore/Algorithms/MathAlgos.py +274 -0
  34. WMCore/Algorithms/MiscAlgos.py +67 -0
  35. WMCore/Algorithms/ParseXMLFile.py +115 -0
  36. WMCore/Algorithms/Permissions.py +27 -0
  37. WMCore/Algorithms/Singleton.py +58 -0
  38. WMCore/Algorithms/SubprocessAlgos.py +129 -0
  39. WMCore/Algorithms/__init__.py +7 -0
  40. WMCore/Cache/GenericDataCache.py +98 -0
  41. WMCore/Cache/WMConfigCache.py +572 -0
  42. WMCore/Cache/__init__.py +0 -0
  43. WMCore/Configuration.py +651 -0
  44. WMCore/DAOFactory.py +47 -0
  45. WMCore/DataStructs/File.py +177 -0
  46. WMCore/DataStructs/Fileset.py +140 -0
  47. WMCore/DataStructs/Job.py +182 -0
  48. WMCore/DataStructs/JobGroup.py +142 -0
  49. WMCore/DataStructs/JobPackage.py +49 -0
  50. WMCore/DataStructs/LumiList.py +734 -0
  51. WMCore/DataStructs/Mask.py +219 -0
  52. WMCore/DataStructs/MathStructs/ContinuousSummaryHistogram.py +197 -0
  53. WMCore/DataStructs/MathStructs/DiscreteSummaryHistogram.py +92 -0
  54. WMCore/DataStructs/MathStructs/SummaryHistogram.py +117 -0
  55. WMCore/DataStructs/MathStructs/__init__.py +0 -0
  56. WMCore/DataStructs/Pickleable.py +24 -0
  57. WMCore/DataStructs/Run.py +256 -0
  58. WMCore/DataStructs/Subscription.py +175 -0
  59. WMCore/DataStructs/WMObject.py +47 -0
  60. WMCore/DataStructs/WorkUnit.py +112 -0
  61. WMCore/DataStructs/Workflow.py +60 -0
  62. WMCore/DataStructs/__init__.py +8 -0
  63. WMCore/Database/CMSCouch.py +1349 -0
  64. WMCore/Database/ConfigDBMap.py +29 -0
  65. WMCore/Database/CouchUtils.py +118 -0
  66. WMCore/Database/DBCore.py +198 -0
  67. WMCore/Database/DBCreator.py +113 -0
  68. WMCore/Database/DBExceptionHandler.py +57 -0
  69. WMCore/Database/DBFactory.py +110 -0
  70. WMCore/Database/DBFormatter.py +177 -0
  71. WMCore/Database/Dialects.py +13 -0
  72. WMCore/Database/ExecuteDAO.py +327 -0
  73. WMCore/Database/MongoDB.py +241 -0
  74. WMCore/Database/MySQL/Destroy.py +42 -0
  75. WMCore/Database/MySQL/ListUserContent.py +20 -0
  76. WMCore/Database/MySQL/__init__.py +9 -0
  77. WMCore/Database/MySQLCore.py +132 -0
  78. WMCore/Database/Oracle/Destroy.py +56 -0
  79. WMCore/Database/Oracle/ListUserContent.py +19 -0
  80. WMCore/Database/Oracle/__init__.py +9 -0
  81. WMCore/Database/ResultSet.py +44 -0
  82. WMCore/Database/Transaction.py +91 -0
  83. WMCore/Database/__init__.py +9 -0
  84. WMCore/Database/ipy_profile_couch.py +438 -0
  85. WMCore/GlobalWorkQueue/CherryPyThreads/CleanUpTask.py +29 -0
  86. WMCore/GlobalWorkQueue/CherryPyThreads/HeartbeatMonitor.py +105 -0
  87. WMCore/GlobalWorkQueue/CherryPyThreads/LocationUpdateTask.py +28 -0
  88. WMCore/GlobalWorkQueue/CherryPyThreads/ReqMgrInteractionTask.py +35 -0
  89. WMCore/GlobalWorkQueue/CherryPyThreads/__init__.py +0 -0
  90. WMCore/GlobalWorkQueue/__init__.py +0 -0
  91. WMCore/GroupUser/CouchObject.py +127 -0
  92. WMCore/GroupUser/Decorators.py +51 -0
  93. WMCore/GroupUser/Group.py +33 -0
  94. WMCore/GroupUser/Interface.py +73 -0
  95. WMCore/GroupUser/User.py +96 -0
  96. WMCore/GroupUser/__init__.py +11 -0
  97. WMCore/Lexicon.py +836 -0
  98. WMCore/REST/Auth.py +202 -0
  99. WMCore/REST/CherryPyPeriodicTask.py +166 -0
  100. WMCore/REST/Error.py +333 -0
  101. WMCore/REST/Format.py +642 -0
  102. WMCore/REST/HeartbeatMonitorBase.py +90 -0
  103. WMCore/REST/Main.py +623 -0
  104. WMCore/REST/Server.py +2435 -0
  105. WMCore/REST/Services.py +24 -0
  106. WMCore/REST/Test.py +120 -0
  107. WMCore/REST/Tools.py +38 -0
  108. WMCore/REST/Validation.py +250 -0
  109. WMCore/REST/__init__.py +1 -0
  110. WMCore/ReqMgr/DataStructs/RequestStatus.py +209 -0
  111. WMCore/ReqMgr/DataStructs/RequestType.py +13 -0
  112. WMCore/ReqMgr/DataStructs/__init__.py +0 -0
  113. WMCore/ReqMgr/__init__.py +1 -0
  114. WMCore/Services/AlertManager/AlertManagerAPI.py +111 -0
  115. WMCore/Services/AlertManager/__init__.py +0 -0
  116. WMCore/Services/CRIC/CRIC.py +238 -0
  117. WMCore/Services/CRIC/__init__.py +0 -0
  118. WMCore/Services/DBS/DBS3Reader.py +1044 -0
  119. WMCore/Services/DBS/DBSConcurrency.py +44 -0
  120. WMCore/Services/DBS/DBSErrors.py +113 -0
  121. WMCore/Services/DBS/DBSReader.py +23 -0
  122. WMCore/Services/DBS/DBSUtils.py +139 -0
  123. WMCore/Services/DBS/DBSWriterObjects.py +381 -0
  124. WMCore/Services/DBS/ProdException.py +133 -0
  125. WMCore/Services/DBS/__init__.py +8 -0
  126. WMCore/Services/FWJRDB/FWJRDBAPI.py +118 -0
  127. WMCore/Services/FWJRDB/__init__.py +0 -0
  128. WMCore/Services/HTTPS/HTTPSAuthHandler.py +66 -0
  129. WMCore/Services/HTTPS/__init__.py +0 -0
  130. WMCore/Services/LogDB/LogDB.py +201 -0
  131. WMCore/Services/LogDB/LogDBBackend.py +191 -0
  132. WMCore/Services/LogDB/LogDBExceptions.py +11 -0
  133. WMCore/Services/LogDB/LogDBReport.py +85 -0
  134. WMCore/Services/LogDB/__init__.py +0 -0
  135. WMCore/Services/MSPileup/__init__.py +0 -0
  136. WMCore/Services/MSUtils/MSUtils.py +54 -0
  137. WMCore/Services/MSUtils/__init__.py +0 -0
  138. WMCore/Services/McM/McM.py +173 -0
  139. WMCore/Services/McM/__init__.py +8 -0
  140. WMCore/Services/MonIT/Grafana.py +133 -0
  141. WMCore/Services/MonIT/__init__.py +0 -0
  142. WMCore/Services/PyCondor/PyCondorAPI.py +154 -0
  143. WMCore/Services/PyCondor/PyCondorUtils.py +105 -0
  144. WMCore/Services/PyCondor/__init__.py +0 -0
  145. WMCore/Services/ReqMgr/ReqMgr.py +261 -0
  146. WMCore/Services/ReqMgr/__init__.py +0 -0
  147. WMCore/Services/ReqMgrAux/ReqMgrAux.py +419 -0
  148. WMCore/Services/ReqMgrAux/__init__.py +0 -0
  149. WMCore/Services/RequestDB/RequestDBReader.py +267 -0
  150. WMCore/Services/RequestDB/RequestDBWriter.py +39 -0
  151. WMCore/Services/RequestDB/__init__.py +0 -0
  152. WMCore/Services/Requests.py +624 -0
  153. WMCore/Services/Rucio/Rucio.py +1287 -0
  154. WMCore/Services/Rucio/RucioUtils.py +74 -0
  155. WMCore/Services/Rucio/__init__.py +0 -0
  156. WMCore/Services/RucioConMon/RucioConMon.py +128 -0
  157. WMCore/Services/RucioConMon/__init__.py +0 -0
  158. WMCore/Services/Service.py +400 -0
  159. WMCore/Services/StompAMQ/__init__.py +0 -0
  160. WMCore/Services/TagCollector/TagCollector.py +155 -0
  161. WMCore/Services/TagCollector/XMLUtils.py +98 -0
  162. WMCore/Services/TagCollector/__init__.py +0 -0
  163. WMCore/Services/UUIDLib.py +13 -0
  164. WMCore/Services/UserFileCache/UserFileCache.py +160 -0
  165. WMCore/Services/UserFileCache/__init__.py +8 -0
  166. WMCore/Services/WMAgent/WMAgent.py +63 -0
  167. WMCore/Services/WMAgent/__init__.py +0 -0
  168. WMCore/Services/WMArchive/CMSSWMetrics.py +526 -0
  169. WMCore/Services/WMArchive/DataMap.py +463 -0
  170. WMCore/Services/WMArchive/WMArchive.py +33 -0
  171. WMCore/Services/WMArchive/__init__.py +0 -0
  172. WMCore/Services/WMBS/WMBS.py +97 -0
  173. WMCore/Services/WMBS/__init__.py +0 -0
  174. WMCore/Services/WMStats/DataStruct/RequestInfoCollection.py +300 -0
  175. WMCore/Services/WMStats/DataStruct/__init__.py +0 -0
  176. WMCore/Services/WMStats/WMStatsPycurl.py +145 -0
  177. WMCore/Services/WMStats/WMStatsReader.py +445 -0
  178. WMCore/Services/WMStats/WMStatsWriter.py +273 -0
  179. WMCore/Services/WMStats/__init__.py +0 -0
  180. WMCore/Services/WMStatsServer/WMStatsServer.py +134 -0
  181. WMCore/Services/WMStatsServer/__init__.py +0 -0
  182. WMCore/Services/WorkQueue/WorkQueue.py +492 -0
  183. WMCore/Services/WorkQueue/__init__.py +0 -0
  184. WMCore/Services/__init__.py +8 -0
  185. WMCore/Services/pycurl_manager.py +574 -0
  186. WMCore/WMBase.py +50 -0
  187. WMCore/WMConnectionBase.py +164 -0
  188. WMCore/WMException.py +183 -0
  189. WMCore/WMExceptions.py +269 -0
  190. WMCore/WMFactory.py +76 -0
  191. WMCore/WMInit.py +228 -0
  192. WMCore/WMLogging.py +108 -0
  193. WMCore/WMSpec/ConfigSectionTree.py +442 -0
  194. WMCore/WMSpec/Persistency.py +135 -0
  195. WMCore/WMSpec/Steps/BuildMaster.py +87 -0
  196. WMCore/WMSpec/Steps/BuildTools.py +201 -0
  197. WMCore/WMSpec/Steps/Builder.py +97 -0
  198. WMCore/WMSpec/Steps/Diagnostic.py +89 -0
  199. WMCore/WMSpec/Steps/Emulator.py +62 -0
  200. WMCore/WMSpec/Steps/ExecuteMaster.py +208 -0
  201. WMCore/WMSpec/Steps/Executor.py +210 -0
  202. WMCore/WMSpec/Steps/StepFactory.py +213 -0
  203. WMCore/WMSpec/Steps/TaskEmulator.py +75 -0
  204. WMCore/WMSpec/Steps/Template.py +204 -0
  205. WMCore/WMSpec/Steps/Templates/AlcaHarvest.py +76 -0
  206. WMCore/WMSpec/Steps/Templates/CMSSW.py +613 -0
  207. WMCore/WMSpec/Steps/Templates/DQMUpload.py +59 -0
  208. WMCore/WMSpec/Steps/Templates/DeleteFiles.py +70 -0
  209. WMCore/WMSpec/Steps/Templates/LogArchive.py +84 -0
  210. WMCore/WMSpec/Steps/Templates/LogCollect.py +105 -0
  211. WMCore/WMSpec/Steps/Templates/StageOut.py +105 -0
  212. WMCore/WMSpec/Steps/Templates/__init__.py +10 -0
  213. WMCore/WMSpec/Steps/WMExecutionFailure.py +21 -0
  214. WMCore/WMSpec/Steps/__init__.py +8 -0
  215. WMCore/WMSpec/Utilities.py +63 -0
  216. WMCore/WMSpec/WMSpecErrors.py +12 -0
  217. WMCore/WMSpec/WMStep.py +347 -0
  218. WMCore/WMSpec/WMTask.py +1980 -0
  219. WMCore/WMSpec/WMWorkload.py +2288 -0
  220. WMCore/WMSpec/WMWorkloadTools.py +370 -0
  221. WMCore/WMSpec/__init__.py +9 -0
  222. WMCore/WorkQueue/DataLocationMapper.py +273 -0
  223. WMCore/WorkQueue/DataStructs/ACDCBlock.py +47 -0
  224. WMCore/WorkQueue/DataStructs/Block.py +48 -0
  225. WMCore/WorkQueue/DataStructs/CouchWorkQueueElement.py +148 -0
  226. WMCore/WorkQueue/DataStructs/WorkQueueElement.py +274 -0
  227. WMCore/WorkQueue/DataStructs/WorkQueueElementResult.py +152 -0
  228. WMCore/WorkQueue/DataStructs/WorkQueueElementsSummary.py +185 -0
  229. WMCore/WorkQueue/DataStructs/__init__.py +0 -0
  230. WMCore/WorkQueue/Policy/End/EndPolicyInterface.py +44 -0
  231. WMCore/WorkQueue/Policy/End/SingleShot.py +22 -0
  232. WMCore/WorkQueue/Policy/End/__init__.py +32 -0
  233. WMCore/WorkQueue/Policy/PolicyInterface.py +17 -0
  234. WMCore/WorkQueue/Policy/Start/Block.py +258 -0
  235. WMCore/WorkQueue/Policy/Start/Dataset.py +180 -0
  236. WMCore/WorkQueue/Policy/Start/MonteCarlo.py +131 -0
  237. WMCore/WorkQueue/Policy/Start/ResubmitBlock.py +171 -0
  238. WMCore/WorkQueue/Policy/Start/StartPolicyInterface.py +316 -0
  239. WMCore/WorkQueue/Policy/Start/__init__.py +34 -0
  240. WMCore/WorkQueue/Policy/__init__.py +57 -0
  241. WMCore/WorkQueue/WMBSHelper.py +772 -0
  242. WMCore/WorkQueue/WorkQueue.py +1237 -0
  243. WMCore/WorkQueue/WorkQueueBackend.py +750 -0
  244. WMCore/WorkQueue/WorkQueueBase.py +39 -0
  245. WMCore/WorkQueue/WorkQueueExceptions.py +44 -0
  246. WMCore/WorkQueue/WorkQueueReqMgrInterface.py +278 -0
  247. WMCore/WorkQueue/WorkQueueUtils.py +130 -0
  248. WMCore/WorkQueue/__init__.py +13 -0
  249. WMCore/Wrappers/JsonWrapper/JSONThunker.py +342 -0
  250. WMCore/Wrappers/JsonWrapper/__init__.py +7 -0
  251. WMCore/Wrappers/__init__.py +6 -0
  252. WMCore/__init__.py +10 -0
  253. wmglobalqueue-2.3.10.data/data/bin/wmc-dist-patch +15 -0
  254. wmglobalqueue-2.3.10.data/data/bin/wmc-dist-unpatch +8 -0
  255. wmglobalqueue-2.3.10.data/data/bin/wmc-httpd +3 -0
  256. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/.couchapprc +1 -0
  257. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/README.md +40 -0
  258. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/index.html +264 -0
  259. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/js/ElementInfoByWorkflow.js +96 -0
  260. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/js/StuckElementInfo.js +57 -0
  261. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/js/WorkloadInfoTable.js +80 -0
  262. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/js/dataTable.js +70 -0
  263. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/js/namespace.js +23 -0
  264. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/_attachments/style/main.css +75 -0
  265. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/couchapp.json +4 -0
  266. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/filters/childQueueFilter.js +13 -0
  267. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/filters/filterDeletedDocs.js +3 -0
  268. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/filters/queueFilter.js +11 -0
  269. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/language +1 -0
  270. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lib/mustache.js +333 -0
  271. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lib/validate.js +27 -0
  272. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lib/workqueue_utils.js +61 -0
  273. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lists/elementsDetail.js +28 -0
  274. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lists/filter.js +86 -0
  275. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lists/stuckElements.js +38 -0
  276. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lists/workRestrictions.js +153 -0
  277. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/lists/workflowSummary.js +28 -0
  278. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/rewrites.json +73 -0
  279. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/shows/redirect.js +23 -0
  280. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/shows/status.js +40 -0
  281. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/ElementSummaryByWorkflow.html +27 -0
  282. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/StuckElementSummary.html +26 -0
  283. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/TaskStatus.html +23 -0
  284. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/WorkflowSummary.html +27 -0
  285. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/partials/workqueue-common-lib.html +2 -0
  286. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib-remote.html +16 -0
  287. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/templates/partials/yui-lib.html +18 -0
  288. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/updates/in-place.js +50 -0
  289. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/validate_doc_update.js +8 -0
  290. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.couch.app.js +235 -0
  291. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/vendor/couchapp/_attachments/jquery.pathbinder.js +173 -0
  292. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activeData/map.js +8 -0
  293. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activeData/reduce.js +2 -0
  294. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activeParentData/map.js +8 -0
  295. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activeParentData/reduce.js +2 -0
  296. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activePileupData/map.js +8 -0
  297. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/activePileupData/reduce.js +2 -0
  298. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/analyticsData/map.js +11 -0
  299. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/analyticsData/reduce.js +1 -0
  300. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/availableByPriority/map.js +6 -0
  301. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/conflicts/map.js +5 -0
  302. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elements/map.js +5 -0
  303. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByData/map.js +8 -0
  304. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByParent/map.js +8 -0
  305. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByParentData/map.js +8 -0
  306. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByPileupData/map.js +8 -0
  307. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByStatus/map.js +8 -0
  308. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsBySubscription/map.js +6 -0
  309. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/map.js +8 -0
  310. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsByWorkflow/reduce.js +3 -0
  311. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/elementsDetailByWorkflowAndStatus/map.js +26 -0
  312. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/map.js +10 -0
  313. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobInjectStatusByRequest/reduce.js +1 -0
  314. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/map.js +6 -0
  315. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobStatusByRequest/reduce.js +1 -0
  316. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/map.js +6 -0
  317. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndPriority/reduce.js +1 -0
  318. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/map.js +6 -0
  319. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByChildQueueAndStatus/reduce.js +1 -0
  320. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByRequest/map.js +6 -0
  321. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByRequest/reduce.js +1 -0
  322. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByStatus/map.js +6 -0
  323. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByStatus/reduce.js +1 -0
  324. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/map.js +6 -0
  325. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/jobsByStatusAndPriority/reduce.js +1 -0
  326. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/openRequests/map.js +6 -0
  327. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/recent-items/map.js +5 -0
  328. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/map.js +6 -0
  329. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/siteWhitelistByRequest/reduce.js +1 -0
  330. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/specsByWorkflow/map.js +5 -0
  331. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/stuckElements/map.js +38 -0
  332. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/map.js +12 -0
  333. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsInjectStatusByRequest/reduce.js +3 -0
  334. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsUrl/map.js +6 -0
  335. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsUrl/reduce.js +2 -0
  336. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/map.js +6 -0
  337. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/wmbsUrlByRequest/reduce.js +2 -0
  338. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/workflowSummary/map.js +9 -0
  339. wmglobalqueue-2.3.10.data/data/data/couchapps/WorkQueue/views/workflowSummary/reduce.js +10 -0
  340. wmglobalqueue-2.3.10.dist-info/LICENSE +202 -0
  341. wmglobalqueue-2.3.10.dist-info/METADATA +24 -0
  342. wmglobalqueue-2.3.10.dist-info/NOTICE +16 -0
  343. wmglobalqueue-2.3.10.dist-info/RECORD +345 -0
  344. wmglobalqueue-2.3.10.dist-info/WHEEL +5 -0
  345. wmglobalqueue-2.3.10.dist-info/top_level.txt +2 -0
@@ -0,0 +1,1237 @@
1
+ #!/usr/bin/env python
2
+
3
+ """
4
+ WorkQueue provides functionality to queue large chunks of work,
5
+ thus acting as a buffer for the next steps in job processing
6
+
7
+ WMSpec objects are fed into the queue, split into coarse grained work units
8
+ and released when a suitable resource is found to execute them.
9
+
10
+ https://twiki.cern.ch/twiki/bin/view/CMS/WMCoreJobPool
11
+ """
12
+
13
+ from __future__ import division, print_function
14
+
15
+ from builtins import str as newstr, bytes
16
+ from future.utils import viewitems, listvalues
17
+
18
+ import os
19
+ import threading
20
+ import time
21
+ from collections import defaultdict
22
+
23
+ from WMCore import Lexicon
24
+ from WMCore.ACDC.DataCollectionService import DataCollectionService
25
+ from WMCore.Database.CMSCouch import CouchInternalServerError, CouchNotFoundError
26
+ from WMCore.Services.CRIC.CRIC import CRIC
27
+ from WMCore.Services.DBS.DBSReader import DBSReader
28
+ from WMCore.Services.LogDB.LogDB import LogDB
29
+ from WMCore.Services.ReqMgr.ReqMgr import ReqMgr
30
+ from WMCore.Services.RequestDB.RequestDBReader import RequestDBReader
31
+ from WMCore.Services.Rucio.Rucio import Rucio
32
+ from WMCore.Services.WorkQueue.WorkQueue import WorkQueue as WorkQueueDS
33
+ from WMCore.WMSpec.WMWorkload import WMWorkloadHelper, getWorkloadFromTask
34
+ from WMCore.WorkQueue.DataLocationMapper import WorkQueueDataLocationMapper
35
+ from WMCore.WorkQueue.DataStructs.ACDCBlock import ACDCBlock
36
+ from WMCore.WorkQueue.DataStructs.WorkQueueElement import possibleSites
37
+ from WMCore.WorkQueue.DataStructs.WorkQueueElementsSummary import getGlobalSiteStatusSummary
38
+ from WMCore.WorkQueue.Policy.End import endPolicy
39
+ from WMCore.WorkQueue.Policy.Start import startPolicy
40
+ from WMCore.WorkQueue.WorkQueueBackend import WorkQueueBackend
41
+ from WMCore.WorkQueue.WorkQueueBase import WorkQueueBase
42
+ from WMCore.WorkQueue.WorkQueueExceptions import (TERMINAL_EXCEPTIONS, WorkQueueError, WorkQueueNoMatchingElements,
43
+ WorkQueueWMSpecError)
44
+ from WMCore.WorkQueue.WorkQueueUtils import cmsSiteNames
45
+
46
+
47
+ # Convenience constructor functions
48
+
49
+ def globalQueue(logger=None, dbi=None, **kwargs):
50
+ """Convenience method to create a WorkQueue suitable for use globally
51
+ """
52
+ defaults = {'PopulateFilesets': False,
53
+ 'LocalQueueFlag': False,
54
+ 'TrackLocationOrSubscription': 'location'
55
+ }
56
+ defaults.update(kwargs)
57
+ return WorkQueue(logger, dbi, **defaults)
58
+
59
+
60
+ def localQueue(logger=None, dbi=None, **kwargs):
61
+ """Convenience method to create a WorkQueue suitable for use locally
62
+ """
63
+ defaults = {'TrackLocationOrSubscription': 'location'}
64
+ defaults.update(kwargs)
65
+ return WorkQueue(logger, dbi, **defaults)
66
+
67
+
68
+ class WorkQueue(WorkQueueBase):
69
+ """
70
+ _WorkQueue_
71
+
72
+ WorkQueue object - interface to WorkQueue functionality.
73
+ """
74
+
75
+ def __init__(self, logger=None, dbi=None, **params):
76
+
77
+ WorkQueueBase.__init__(self, logger, dbi)
78
+ self.parent_queue = None
79
+ self.params = params
80
+
81
+ # config argument (within params) shall be reference to
82
+ # Configuration instance
83
+ self.config = params.get("Config", None)
84
+ self.params.setdefault('CouchUrl', os.environ.get('COUCHURL'))
85
+ if not self.params.get('CouchUrl'):
86
+ raise RuntimeError('CouchUrl config value mandatory')
87
+ self.params.setdefault('DbName', 'workqueue')
88
+ self.params.setdefault('InboxDbName', self.params['DbName'] + '_inbox')
89
+ self.params.setdefault('ParentQueueCouchUrl', None) # We get work from here
90
+
91
+ self.backend = WorkQueueBackend(self.params['CouchUrl'], self.params['DbName'],
92
+ self.params['InboxDbName'],
93
+ self.params['ParentQueueCouchUrl'], self.params.get('QueueURL'),
94
+ logger=self.logger)
95
+ self.workqueueDS = WorkQueueDS(self.params['CouchUrl'], self.params['DbName'],
96
+ self.params['InboxDbName'])
97
+ if self.params.get('ParentQueueCouchUrl'):
98
+ try:
99
+ if self.params.get('ParentQueueInboxCouchDBName'):
100
+ self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
101
+ self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1],
102
+ self.params['ParentQueueInboxCouchDBName'])
103
+ else:
104
+ self.parent_queue = WorkQueueBackend(self.params['ParentQueueCouchUrl'].rsplit('/', 1)[0],
105
+ self.params['ParentQueueCouchUrl'].rsplit('/', 1)[1])
106
+ except IndexError as ex:
107
+ # Probable cause: Someone didn't put the global WorkQueue name in
108
+ # the ParentCouchUrl
109
+ msg = "Parsing failure for ParentQueueCouchUrl - probably missing dbname in input\n"
110
+ msg += "Exception: %s\n" % str(ex)
111
+ msg += str("ParentQueueCouchUrl: %s\n" % self.params['ParentQueueCouchUrl'])
112
+ self.logger.error(msg)
113
+ raise WorkQueueError(msg)
114
+ self.params['ParentQueueCouchUrl'] = self.parent_queue.queueUrl
115
+
116
+ # save each DBSReader instance in the class object, such that
117
+ # the same object is not shared amongst multiple threads
118
+ self.dbses = {}
119
+
120
+ self.params.setdefault('QueueDepth', 1) # when less than this locally
121
+ self.params.setdefault('WorkPerCycle', 100)
122
+ self.params.setdefault('RowsPerSlice', 2500)
123
+ self.params.setdefault('MaxRowsPerCycle', 50000)
124
+ self.params.setdefault('LocationRefreshInterval', 600)
125
+ self.params.setdefault('FullLocationRefreshInterval', 7200)
126
+ self.params.setdefault('TrackLocationOrSubscription', 'location')
127
+ self.params.setdefault('ReleaseIncompleteBlocks', False)
128
+ self.params.setdefault('ReleaseRequireSubscribed', True)
129
+ self.params.setdefault('PopulateFilesets', True)
130
+ self.params.setdefault('LocalQueueFlag', True)
131
+ self.params.setdefault('QueueRetryTime', 86400)
132
+ self.params.setdefault('stuckElementAlertTime', 172800)
133
+ self.params.setdefault('reqmgrCompleteGraceTime', 604800)
134
+ self.params.setdefault('cancelGraceTime', 86400)
135
+
136
+ self.params.setdefault('JobDumpConfig', None)
137
+ self.params.setdefault('BossAirConfig', None)
138
+
139
+ self.params['QueueURL'] = self.backend.queueUrl # url this queue is visible on
140
+ # backend took previous QueueURL and sanitized it
141
+ self.params.setdefault('WMBSUrl', None) # this will only be set on local Queue
142
+ if self.params.get('WMBSUrl'):
143
+ self.params['WMBSUrl'] = Lexicon.sanitizeURL(self.params['WMBSUrl'])['url']
144
+ self.params.setdefault('Team', "")
145
+
146
+ if self.params.get('CacheDir'):
147
+ try:
148
+ os.makedirs(self.params['CacheDir'])
149
+ except OSError:
150
+ pass
151
+ elif self.params.get('PopulateFilesets'):
152
+ raise RuntimeError('CacheDir mandatory for local queue')
153
+
154
+ if self.params.get('CRIC'):
155
+ self.cric = self.params['CRIC']
156
+ else:
157
+ self.cric = CRIC()
158
+
159
+ self.params.setdefault('SplittingMapping', {})
160
+ self.params['SplittingMapping'].setdefault('DatasetBlock',
161
+ {'name': 'Block',
162
+ 'args': {}}
163
+ )
164
+ self.params['SplittingMapping'].setdefault('MonteCarlo',
165
+ {'name': 'MonteCarlo',
166
+ 'args': {}}
167
+ )
168
+ self.params['SplittingMapping'].setdefault('Dataset',
169
+ {'name': 'Dataset',
170
+ 'args': {}}
171
+ )
172
+ self.params['SplittingMapping'].setdefault('Block',
173
+ {'name': 'Block',
174
+ 'args': {}}
175
+ )
176
+ self.params['SplittingMapping'].setdefault('ResubmitBlock',
177
+ {'name': 'ResubmitBlock',
178
+ 'args': {}}
179
+ )
180
+
181
+ self.params.setdefault('EndPolicySettings', {})
182
+
183
+ assert (self.params['TrackLocationOrSubscription'] in ('subscription',
184
+ 'location'))
185
+ # Can only release blocks on location
186
+ if self.params['TrackLocationOrSubscription'] == 'location':
187
+ if self.params['SplittingMapping']['DatasetBlock']['name'] != 'Block':
188
+ raise RuntimeError('Only blocks can be released on location')
189
+
190
+ self.params.setdefault('rucioAccount', "wmcore_transferor")
191
+
192
+ self.rucio = Rucio(self.params['rucioAccount'],
193
+ self.params['rucioUrl'], self.params['rucioAuthUrl'],
194
+ configDict=dict(logger=self.logger))
195
+
196
+
197
+ self.dataLocationMapper = WorkQueueDataLocationMapper(self.logger, self.backend,
198
+ rucio=self.rucio,
199
+ cric=self.cric,
200
+ locationFrom=self.params['TrackLocationOrSubscription'],
201
+ incompleteBlocks=self.params['ReleaseIncompleteBlocks'],
202
+ requireBlocksSubscribed=not self.params[
203
+ 'ReleaseIncompleteBlocks'],
204
+ fullRefreshInterval=self.params[
205
+ 'FullLocationRefreshInterval'],
206
+ updateIntervalCoarseness=self.params[
207
+ 'LocationRefreshInterval'])
208
+
209
+ # used for only global WQ
210
+ if self.params.get('ReqMgrServiceURL'):
211
+ self.reqmgrSvc = ReqMgr(self.params['ReqMgrServiceURL'])
212
+
213
+ if self.params.get('RequestDBURL'):
214
+ # This is need for getting post call
215
+ # TODO: Change ReqMgr api to accept post for for retrieving the data and remove this
216
+ self.requestDB = RequestDBReader(self.params['RequestDBURL'])
217
+
218
+ # set the thread name before create the log db.
219
+ # only sets that when it is not set already
220
+ # setLogDB
221
+
222
+ myThread = threading.currentThread()
223
+ if myThread.getName() == "MainThread": # this should be only GQ case other cases thread name should be set
224
+ myThread.setName(self.__class__.__name__)
225
+
226
+ centralurl = self.params.get("central_logdb_url")
227
+ identifier = self.params.get("log_reporter")
228
+ self.logdb = LogDB(centralurl, identifier, logger=self.logger)
229
+
230
+ self.logger.debug("WorkQueue created successfully")
231
+
232
+ def __len__(self):
233
+ """Returns number of Available elements in queue"""
234
+ return self.backend.queueLength()
235
+
236
+ def setStatus(self, status, elementIDs=None, SubscriptionId=None, WorkflowName=None):
237
+ """
238
+ _setStatus_, throws an exception if no elements are updated
239
+
240
+ """
241
+ try:
242
+ if not elementIDs:
243
+ elementIDs = []
244
+ iter(elementIDs)
245
+ if isinstance(elementIDs, (newstr, bytes)):
246
+ raise TypeError
247
+ except TypeError:
248
+ elementIDs = [elementIDs]
249
+
250
+ if status == 'Canceled': # Cancel needs special actions
251
+ return self.cancelWork(elementIDs, SubscriptionId, WorkflowName)
252
+
253
+ args = {}
254
+ if SubscriptionId:
255
+ args['SubscriptionId'] = SubscriptionId
256
+ if WorkflowName:
257
+ args['RequestName'] = WorkflowName
258
+
259
+ affected = self.backend.getElements(elementIDs=elementIDs, **args)
260
+ if not affected:
261
+ raise WorkQueueNoMatchingElements("No matching elements")
262
+
263
+ for x in affected:
264
+ x['Status'] = status
265
+ elements = self.backend.saveElements(*affected)
266
+ if len(affected) != len(elements):
267
+ raise RuntimeError("Some elements not updated, see log for details")
268
+
269
+ return elements
270
+
271
+ def setPriority(self, newpriority, *workflowNames):
272
+ """
273
+ Update priority for a workflow, throw exception if no elements affected
274
+ """
275
+ self.logger.info("Priority change request to %s for %s", newpriority, str(workflowNames))
276
+ affected = []
277
+ for wf in workflowNames:
278
+ affected.extend(self.backend.getElements(returnIdOnly=True, RequestName=wf))
279
+
280
+ self.backend.updateElements(*affected, Priority=newpriority)
281
+
282
+ if not affected:
283
+ raise RuntimeError("Priority not changed: No matching elements")
284
+
285
+ def resetWork(self, ids):
286
+ """Put work back in Available state, from here either another queue
287
+ or wmbs can pick it up.
288
+
289
+ If work was Acquired by a child queue, the next status update will
290
+ cancel the work in the child.
291
+
292
+ Note: That the same child queue is free to pick the work up again,
293
+ there is no permanent blacklist of queues.
294
+ """
295
+ self.logger.info("Resetting elements %s", str(ids))
296
+ try:
297
+ iter(ids)
298
+ except TypeError:
299
+ ids = [ids]
300
+
301
+ return self.backend.updateElements(*ids, Status='Available',
302
+ ChildQueueUrl=None, WMBSUrl=None)
303
+
304
+ def getWork(self, jobSlots, siteJobCounts, excludeWorkflows=None):
305
+ """
306
+ Get available work from the queue, inject into wmbs & mark as running
307
+
308
+ jobSlots is dict format of {site: estimateJobSlot}
309
+ of the resources to get work for.
310
+
311
+ siteJobCounts is a dict format of {site: {prio: jobs}}
312
+ """
313
+ excludeWorkflows = excludeWorkflows or []
314
+ results = []
315
+ if not self.backend.isAvailable():
316
+ self.logger.warning('Backend busy or down: skipping fetching of work')
317
+ return results
318
+
319
+ # TODO AMR: perhaps numElems limit should be removed for LQ -> WMBS acquisition
320
+ matches, _ = self.backend.availableWork(jobSlots, siteJobCounts,
321
+ excludeWorkflows=excludeWorkflows,
322
+ numElems=self.params['WorkPerCycle'],
323
+ rowsPerSlice=self.params['RowsPerSlice'],
324
+ maxRows=self.params['MaxRowsPerCycle'])
325
+
326
+ self.logger.info('Got %i elements matching the constraints', len(matches))
327
+ if not matches:
328
+ return results
329
+
330
+ myThread = threading.currentThread()
331
+ # cache wmspecs for lifetime of function call, likely we will have multiple elements for same spec.
332
+ # TODO: Check to see if we can skip spec loading - need to persist some more details to element
333
+ wmspecCache = {}
334
+ for match in matches:
335
+ blockName, dbsBlock = None, None
336
+ if self.params['PopulateFilesets']:
337
+ if match['RequestName'] not in wmspecCache:
338
+ wmspec = self.backend.getWMSpec(match['RequestName'])
339
+ wmspecCache[match['RequestName']] = wmspec
340
+ else:
341
+ wmspec = wmspecCache[match['RequestName']]
342
+
343
+ try:
344
+ if match['StartPolicy'] == 'Dataset':
345
+ # actually returns dataset name and dataset info
346
+ blockName, dbsBlock = self._getDBSDataset(match)
347
+ elif match['Inputs']:
348
+ blockName, dbsBlock = self._getDBSBlock(match, wmspec)
349
+ except Exception as ex:
350
+ msg = "%s, %s: \n" % (wmspec.name(), list(match['Inputs']))
351
+ msg += "failed to retrieve data from DBS/Rucio in LQ: \n%s" % str(ex)
352
+ self.logger.exception(msg)
353
+ self.logdb.post(wmspec.name(), msg, 'error')
354
+ continue
355
+
356
+ try:
357
+ match['Subscription'] = self._wmbsPreparation(match,
358
+ wmspec,
359
+ blockName,
360
+ dbsBlock)
361
+ self.logdb.delete(wmspec.name(), "error", this_thread=True)
362
+ except Exception as ex:
363
+ if getattr(myThread, 'transaction', None) is not None:
364
+ myThread.transaction.rollback()
365
+ msg = "Failed to create subscription for %s with block name %s" % (wmspec.name(), blockName)
366
+ msg += "\nError: %s" % str(ex)
367
+ self.logger.exception(msg)
368
+ self.logdb.post(wmspec.name(), msg, 'error')
369
+ continue
370
+
371
+ results.append(match)
372
+
373
+ del wmspecCache # remove cache explicitly
374
+ self.logger.info('Injected %s out of %s units into WMBS', len(results), len(matches))
375
+ return results
376
+
377
+ def _getDbs(self, dbsUrl):
378
+ """
379
+ If we have already construct a DBSReader object pointing to
380
+ the DBS URL provided, return it. Otherwise, create and return
381
+ a new instance.
382
+ :param dbsUrl: string with the DBS url
383
+ :return: an instance of DBSReader
384
+ """
385
+ if dbsUrl in self.dbses:
386
+ return self.dbses[dbsUrl]
387
+ return DBSReader(dbsUrl)
388
+
389
+ def _getDBSDataset(self, match):
390
+ """
391
+ Given a workqueue element with Dataset start policy, find all blocks
392
+ with valid files and resolve their location in Rucio.
393
+ :param match: workqueue element dictionary
394
+ :return: a tuple of the dataset name and its files and RSEs
395
+ """
396
+ dbsDatasetDict = {'Files': [], 'PhEDExNodeNames': []}
397
+ dbs = self._getDbs(match['Dbs'])
398
+ datasetName = list(match['Inputs'])[0]
399
+
400
+ blocks = dbs.listFileBlocks(datasetName)
401
+ for blockName in blocks:
402
+ blockSummary = dbs.getFileBlock(blockName)
403
+ if not blockSummary['Files']:
404
+ self.logger.warning("Block name %s has no valid files. Skipping it.", blockName)
405
+ continue
406
+ blockSummary['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
407
+ account=self.params['rucioAccount'])
408
+ dbsDatasetDict['Files'].extend(blockSummary['Files'])
409
+ dbsDatasetDict['PhEDExNodeNames'].extend(blockSummary['PhEDExNodeNames'])
410
+
411
+ dbsDatasetDict['PhEDExNodeNames'] = list(set(dbsDatasetDict['PhEDExNodeNames']))
412
+
413
+ return datasetName, dbsDatasetDict
414
+
415
+ def _getDBSBlock(self, match, wmspec):
416
+ """Get DBS info for this block"""
417
+ blockName = list(match['Inputs'])[0] # TODO: Allow more than one
418
+
419
+ if match['ACDC']:
420
+ acdcInfo = match['ACDC']
421
+ acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
422
+ splitedBlockName = ACDCBlock.splitBlockName(blockName)
423
+ fileLists = acdc.getChunkFiles(acdcInfo['collection'],
424
+ acdcInfo['fileset'],
425
+ splitedBlockName['Offset'],
426
+ splitedBlockName['NumOfFiles'])
427
+
428
+ block = {}
429
+ block["Files"] = fileLists
430
+ return blockName, block
431
+ else:
432
+ dbs = self._getDbs(match['Dbs'])
433
+ if wmspec.getTask(match['TaskName']).parentProcessingFlag():
434
+ dbsBlockDict = dbs.getFileBlockWithParents(blockName)
435
+ dbsBlockDict['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
436
+ account=self.params['rucioAccount'])
437
+ elif wmspec.getRequestType() == 'StoreResults':
438
+ dbsBlockDict = dbs.getFileBlock(blockName)
439
+ dbsBlockDict['PhEDExNodeNames'] = dbs.listFileBlockLocation(blockName)
440
+ else:
441
+ dbsBlockDict = dbs.getFileBlock(blockName)
442
+ dbsBlockDict['PhEDExNodeNames'] = self.rucio.getDataLockedAndAvailable(name=blockName,
443
+ account=self.params['rucioAccount'])
444
+
445
+ return blockName, dbsBlockDict
446
+
447
+ def _wmbsPreparation(self, match, wmspec, blockName, dbsBlock):
448
+ """Inject data into wmbs and create subscription. """
449
+ from WMCore.WorkQueue.WMBSHelper import WMBSHelper
450
+ # the parent element (from local couch) can be fetch via:
451
+ # curl -ks -X GET 'http://localhost:5984/workqueue/<ParentQueueId>'
452
+
453
+ # Keep in mind that WQE contains sites, wmbs location contains pnns
454
+ commonSites = possibleSites(match)
455
+ commonLocation = self.cric.PSNstoPNNs(commonSites, allowPNNLess=True)
456
+ msg = "Running WMBS preparation for %s with ParentQueueId %s,\n with common location %s"
457
+ self.logger.info(msg, match['RequestName'], match['ParentQueueId'], commonLocation)
458
+
459
+ mask = match['Mask']
460
+ wmbsHelper = WMBSHelper(wmspec, match['TaskName'], blockName, mask,
461
+ self.params['CacheDir'], commonLocation)
462
+
463
+ sub, match['NumOfFilesAdded'] = wmbsHelper.createSubscriptionAndAddFiles(block=dbsBlock)
464
+ self.logger.info("Created top level subscription %s for %s with %s files",
465
+ sub['id'], match['RequestName'], match['NumOfFilesAdded'])
466
+
467
+ # update couch with wmbs subscription info
468
+ match['SubscriptionId'] = sub['id']
469
+ match['Status'] = 'Running'
470
+ # do update rather than save to avoid conflicts from other thread writes
471
+ self.backend.updateElements(match.id, Status='Running', SubscriptionId=sub['id'],
472
+ NumOfFilesAdded=match['NumOfFilesAdded'])
473
+ self.logger.info("LQE %s set to 'Running' for request %s", match.id, match['RequestName'])
474
+
475
+ return sub
476
+
477
+ def _assignToChildQueue(self, queue, *elements):
478
+ """Assign work from parent to queue"""
479
+ workByRequest = {}
480
+ for ele in elements:
481
+ ele['Status'] = 'Negotiating'
482
+ ele['ChildQueueUrl'] = queue
483
+ ele['ParentQueueUrl'] = self.params['ParentQueueCouchUrl']
484
+ ele['WMBSUrl'] = self.params["WMBSUrl"]
485
+ workByRequest.setdefault(ele['RequestName'], 0)
486
+ workByRequest[ele['RequestName']] += 1
487
+ self.logger.info("Setting GQE status to 'Negotiating' and assigning to this child queue for:")
488
+ for reqName, numElem in workByRequest.items():
489
+ self.logger.info(" %d elements for: %s", numElem, reqName)
490
+
491
+ work = self.parent_queue.saveElements(*elements)
492
+ self.logger.info("GQE successfully saved for:")
493
+ for ele in work:
494
+ self.logger.info(" %s under GQE id: %s", ele['RequestName'], ele.id)
495
+ return work
496
+
497
+ def doneWork(self, elementIDs=None, SubscriptionId=None, WorkflowName=None):
498
+ """Mark work as done
499
+ """
500
+ return self.setStatus('Done', elementIDs=elementIDs,
501
+ SubscriptionId=SubscriptionId,
502
+ WorkflowName=WorkflowName)
503
+
504
+ def killWMBSWorkflows(self, reqNames):
505
+ """
506
+ Kill/cancel workflows in WMBS and CouchDB.
507
+ Also update job state transition in three data sources: local couch,
508
+ local WMBS and dashboard.
509
+ :param reqNames: list of request names
510
+ :return: a list of workflows that failed to be cancelled
511
+ """
512
+ failedWfs = []
513
+ if not reqNames:
514
+ return failedWfs
515
+
516
+ # import inside function since GQ doesn't need this.
517
+ from WMCore.WorkQueue.WMBSHelper import killWorkflow
518
+ myThread = threading.currentThread()
519
+ myThread.dbi = self.conn.dbi
520
+ myThread.logger = self.logger
521
+
522
+ for workflow in reqNames:
523
+ try:
524
+ self.logger.info("Killing workflow in WMBS: %s", workflow)
525
+ killWorkflow(workflow, self.params["JobDumpConfig"], self.params["BossAirConfig"])
526
+ except Exception as ex:
527
+ failedWfs.append(workflow)
528
+ msg = "Failed to kill workflow '%s' in WMBS. Error: %s" % (workflow, str(ex))
529
+ msg += "\nIt will be retried in the next loop"
530
+ self.logger.error(msg)
531
+ return failedWfs
532
+
533
+ def cancelWork(self, elementIDs=None, SubscriptionId=None, WorkflowName=None, elements=None):
534
+ """Cancel work - delete in wmbs, delete from workqueue db, set canceled in inbox
535
+ Elements may be directly provided or determined from series of filter arguments
536
+ """
537
+ if not elements:
538
+ args = {}
539
+ if SubscriptionId:
540
+ args['SubscriptionId'] = SubscriptionId
541
+ if WorkflowName:
542
+ args['RequestName'] = WorkflowName
543
+ elements = self.backend.getElements(elementIDs=elementIDs, **args)
544
+
545
+ # take wf from args in case no elements exist for workflow (i.e. work was negotiating)
546
+ requestNames = set([x['RequestName'] for x in elements]) | set([wf for wf in [WorkflowName] if wf])
547
+ if not requestNames:
548
+ return []
549
+ inbox_elements = []
550
+ for wf in requestNames:
551
+ inbox_elements.extend(self.backend.getInboxElements(WorkflowName=wf))
552
+
553
+ # if local queue, kill jobs, update parent to Canceled and delete elements
554
+ if self.params['LocalQueueFlag']:
555
+ # if we can talk to wmbs kill the jobs
556
+ badWfsCancel = []
557
+ if self.params['PopulateFilesets']:
558
+ self.logger.info("Canceling work for workflow(s): %s", requestNames)
559
+ badWfsCancel = self.killWMBSWorkflows(requestNames)
560
+ # now we remove any wf that failed to be cancelled (and its inbox elements)
561
+ requestNames -= set(badWfsCancel)
562
+ for wf in badWfsCancel:
563
+ elementsToRemove = self.backend.getInboxElements(WorkflowName=wf)
564
+ inbox_elements = list(set(inbox_elements) - set(elementsToRemove))
565
+ self.logger.info("New list of cancelled requests: %s", requestNames)
566
+
567
+ # Don't update as fails sometimes due to conflicts (#3856)
568
+ for x in inbox_elements:
569
+ if x['Status'] != 'Canceled':
570
+ x.load().__setitem__('Status', 'Canceled')
571
+
572
+ self.backend.saveElements(*inbox_elements)
573
+
574
+ # if global queue, update non-acquired to Canceled, update parent to CancelRequested
575
+ else:
576
+ # Cancel in global if work has not been passed to a child queue
577
+ elements_to_cancel = [x for x in elements if not x['ChildQueueUrl'] and x['Status'] != 'Canceled']
578
+ # ensure all elements receive cancel request, covers case where initial cancel request missed some elements
579
+ # without this elements may avoid the cancel and not be cleared up till they finish
580
+ elements_not_requested = [x for x in elements if
581
+ x['ChildQueueUrl'] and (x['Status'] != 'CancelRequested' and not x.inEndState())]
582
+
583
+ self.logger.info("Canceling work for workflow(s): %s", requestNames)
584
+ if elements_to_cancel:
585
+ self.backend.updateElements(*[x.id for x in elements_to_cancel], Status='Canceled')
586
+ self.logger.info("Cancel-ed element(s) %s", str([x.id for x in elements_to_cancel]))
587
+
588
+ if elements_not_requested:
589
+ # Don't update as fails sometimes due to conflicts (#3856)
590
+ for x in elements_not_requested:
591
+ x.load().__setitem__('Status', 'CancelRequested')
592
+ self.backend.saveElements(*elements_not_requested)
593
+ self.logger.info("CancelRequest-ed element(s) %s", str([x.id for x in elements_not_requested]))
594
+
595
+ inboxElemIds = [x.id for x in inbox_elements if x['Status'] != 'CancelRequested' and not x.inEndState()]
596
+ self.backend.updateInboxElements(*inboxElemIds, Status='CancelRequested')
597
+ # if we haven't had any updates for a while assume agent is dead and move to canceled
598
+ if self.params.get('cancelGraceTime', -1) > 0 and elements:
599
+ last_update = max([float(x.updatetime) for x in elements])
600
+ if (time.time() - last_update) > self.params['cancelGraceTime']:
601
+ self.logger.info("%s cancellation has stalled, mark as finished", elements[0]['RequestName'])
602
+ # Don't update as fails sometimes due to conflicts (#3856)
603
+ for x in elements:
604
+ if not x.inEndState():
605
+ x.load().__setitem__('Status', 'Canceled')
606
+ self.backend.saveElements(*[x for x in elements if not x.inEndState()])
607
+
608
+ return [x.id for x in elements]
609
+
610
+ def deleteWorkflows(self, *requests):
611
+ """Delete requests if finished"""
612
+ for request in requests:
613
+ request = self.backend.getInboxElements(elementIDs=[request])
614
+ if len(request) != 1:
615
+ raise RuntimeError('Invalid number of requests for %s' % request[0]['RequestName'])
616
+ request = request[0]
617
+
618
+ if request.inEndState():
619
+ self.logger.info('Deleting request "%s" as it is %s', request.id, request['Status'])
620
+ self.backend.deleteElements(request)
621
+ else:
622
+ self.logger.debug('Not deleting "%s" as it is %s', request.id, request['Status'])
623
+
624
+ # NOTE: this function is not executed by local workqueue
625
+ def queueWork(self, wmspecUrl, request=None, team=None):
626
+ """
627
+ Take and queue work from a WMSpec.
628
+
629
+ If request name is provided but doesn't match WMSpec name
630
+ an error is raised.
631
+
632
+ If team is provided work will only be available to queue's
633
+ belonging to that team.
634
+
635
+ Duplicate specs will be ignored.
636
+ """
637
+ self.logger.info('queueWork() begin queueing "%s"', wmspecUrl)
638
+ wmspec = WMWorkloadHelper()
639
+ wmspec.load(wmspecUrl)
640
+
641
+ if request: # validate request name
642
+ if request != wmspec.name():
643
+ raise WorkQueueWMSpecError(wmspec,
644
+ 'Request & workflow name mismatch %s vs %s' % (request, wmspec.name()))
645
+
646
+ # Either pull the existing inbox element or create a new one.
647
+ try:
648
+ inbound = self.backend.getInboxElements(elementIDs=[wmspec.name()], loadSpec=True)
649
+ self.logger.info('Resume splitting of "%s"', wmspec.name())
650
+ except CouchNotFoundError:
651
+ inbound = [self.backend.createWork(wmspec, Status='Negotiating',
652
+ TeamName=team, WMBSUrl=self.params["WMBSUrl"])]
653
+ self.backend.insertElements(inbound)
654
+
655
+ work = self.processInboundWork(inbound, throw=True)
656
+ return len(work)
657
+
658
+ def addWork(self, inboundElem, rucioObj=None):
659
+ """
660
+ Check and add new elements to an existing running request,
661
+ if supported by the start policy.
662
+
663
+ :param inboundElem: dict representation for a WorkQueueElement object,
664
+ including the WMSpec file.
665
+ :param rucioObj: object to the Rucio class
666
+ :return: amount of new work units added to the request
667
+ """
668
+ result = []
669
+ self.logger.info('Trying to add more work for: %s', inboundElem['RequestName'])
670
+
671
+ try:
672
+ # Check we haven't already split the work, unless it's continuous processing
673
+ work, rejectedWork, badWork = self._splitWork(inboundElem['WMSpec'], data=inboundElem['Inputs'],
674
+ mask=inboundElem['Mask'], inbound=inboundElem,
675
+ continuous=True, rucioObj=rucioObj)
676
+
677
+ # if there is new work, then insert it into the database
678
+ newWork = self.backend.insertElements(work, parent=inboundElem)
679
+
680
+ # store the inputs in the global queue inbox workflow element
681
+ processedInputs = []
682
+ for unit in newWork:
683
+ processedInputs.extend(list(unit['Inputs']))
684
+
685
+ # update the list of processed and rejected inputs with what is already
686
+ # defined in the workqueue inbox
687
+ processedInputs.extend(inboundElem['ProcessedInputs'])
688
+ rejectedWork.extend(inboundElem['RejectedInputs'])
689
+ if newWork:
690
+ # then also update the timestamp for when new data was found
691
+ self.backend.updateInboxElements(inboundElem.id,
692
+ ProcessedInputs=processedInputs,
693
+ RejectedInputs=rejectedWork,
694
+ TimestampFoundNewData=int(time.time()))
695
+ # if global queue, then update workflow stats to request mgr couch doc
696
+ # remove the "UnittestFlag" - need to create the reqmgrSvc emulator
697
+ if not self.params.get("UnittestFlag", False):
698
+ # get statistics for the new work. It's already validated on the server side
699
+ totalStats = self._getTotalStats(newWork)
700
+ self.reqmgrSvc.updateRequestStats(inboundElem['WMSpec'].name(), totalStats)
701
+
702
+ if badWork:
703
+ msg = "Request with the following unprocessable input data: %s" % badWork
704
+ self.logdb.post(inboundElem['RequestName'], msg, 'warning')
705
+ except Exception as exc:
706
+ self.logger.error('Generic exception adding work to WQE inbox: %s. Error: %s',
707
+ inboundElem, str(exc))
708
+ else:
709
+ result.extend(newWork)
710
+
711
+ self.logger.info('Added %d new elements for request: %s', len(result), inboundElem['RequestName'])
712
+ return len(result)
713
+
714
+ def status(self, status=None, elementIDs=None,
715
+ dictKey=None, wmbsInfo=None, loadSpec=False,
716
+ **filters):
717
+ """
718
+ Return elements in the queue.
719
+
720
+ status, elementIDs & filters are 'AND'ed together to filter elements.
721
+ dictKey returns the output as a dict with the dictKey as the key.
722
+ wmbsInfo causes elements to be synced with their status in WMBS.
723
+ loadSpec causes the workflow for each spec to be loaded.
724
+ """
725
+ items = self.backend.getElements(status=status,
726
+ elementIDs=elementIDs,
727
+ loadSpec=loadSpec,
728
+ **filters)
729
+
730
+ if wmbsInfo:
731
+ self.logger.debug("Syncing element statuses with WMBS for workflow: %s", filters.get("RequestName"))
732
+ for item in items:
733
+ for wmbs in wmbsInfo:
734
+ if item['SubscriptionId'] == wmbs['subscription_id']:
735
+ item.updateFromSubscription(wmbs)
736
+ break
737
+
738
+ # if dictKey, format as a dict with the appropriate key
739
+ if dictKey:
740
+ tmp = defaultdict(list)
741
+ for item in items:
742
+ tmp[item[dictKey]].append(item)
743
+ items = dict(tmp)
744
+ return items
745
+
746
+ def getWMBSSubscriptionStatus(self):
747
+ """
748
+ Fetches all the subscriptions in this agent and make a summary of
749
+ every single one of them, to be used to update WQEs
750
+ :return: a list of dictionaries
751
+ """
752
+ from WMCore.WorkQueue.WMBSHelper import wmbsSubscriptionStatus
753
+ self.logger.info("Fetching WMBS subscription status information")
754
+ wmbsStatus = wmbsSubscriptionStatus(logger=self.logger,
755
+ dbi=self.conn.dbi,
756
+ conn=self.conn.getDBConn(),
757
+ transaction=self.conn.existingTransaction())
758
+ return wmbsStatus
759
+
760
+ def statusInbox(self, status=None, elementIDs=None, dictKey=None, **filters):
761
+ """
762
+ Return elements in the inbox.
763
+
764
+ status, elementIDs & filters are 'AND'ed together to filter elements.
765
+ dictKey returns the output as a dict with the dictKey as the key.
766
+ """
767
+ items = self.backend.getInboxElements(status, elementIDs, **filters)
768
+
769
+ # if dictKey, given format as a dict with the appropriate key
770
+ if dictKey:
771
+ tmp = defaultdict(list)
772
+ for item in items:
773
+ tmp[item[dictKey]].append(item)
774
+ items = dict(tmp)
775
+
776
+ return items
777
+
778
+ def updateLocationInfo(self):
779
+ """
780
+ Update locations info for elements.
781
+ """
782
+ self.logger.info('Executing data location update...')
783
+ if not self.backend.isAvailable():
784
+ self.logger.warning('Backend busy or down: skipping location update')
785
+ return 0
786
+ result = self.dataLocationMapper()
787
+ self.backend.recordTaskActivity('location_refresh')
788
+ return result
789
+
790
+ def _printLog(self, msg, printFlag, logLevel):
791
+ if printFlag:
792
+ print(msg)
793
+ else:
794
+ getattr(self.logger, logLevel)(msg)
795
+
796
+ def pullWorkConditionCheck(self, printFlag=False):
797
+
798
+ if not self.params['ParentQueueCouchUrl']:
799
+ msg = 'Unable to pull work from parent, ParentQueueCouchUrl not provided'
800
+ self._printLog(msg, printFlag, "warning")
801
+ return False
802
+ if not self.backend.isAvailable() or not self.parent_queue.isAvailable():
803
+ msg = 'Backend busy or down: skipping work pull'
804
+ self._printLog(msg, printFlag, "warning")
805
+ return False
806
+
807
+ left_over = self.parent_queue.getElements('Negotiating', returnIdOnly=True,
808
+ ChildQueueUrl=self.params['QueueURL'])
809
+ if left_over:
810
+ msg = 'Not pulling more work. Still replicating %d previous units, ids:\n%s' % (len(left_over), left_over)
811
+ self._printLog(msg, printFlag, "warning")
812
+ return False
813
+
814
+ still_processing = self.backend.getInboxElements('Negotiating', returnIdOnly=True)
815
+ if still_processing:
816
+ msg = 'Not pulling more work. Still processing %d previous units' % len(still_processing)
817
+ self._printLog(msg, printFlag, "warning")
818
+ return False
819
+
820
+ return True
821
+
822
+ def freeResouceCheck(self):
823
+ """
824
+ This method looks into the WMBS and BossAir tables and collect
825
+ two types of information:
826
+ 1) sites and the total slots available for job creation
827
+ 2) sites and the number of pending jobs grouped by priority
828
+ With that information in hands, it looks at the local workqueue elements
829
+ sitting in Available status and update the 2nd data structure (thus it
830
+ updates number of jobs pending by priority according to the LQEs), which
831
+ is then used to know which work can be acquired from the parent queue or not.
832
+ :return: a tuple of dictionaries (or empty lists)
833
+ """
834
+ from WMCore.WorkQueue.WMBSHelper import freeSlots
835
+ resources, jobCounts = freeSlots(self.params['QueueDepth'], knownCmsSites=cmsSiteNames())
836
+ # now update jobCounts with work that is already available in the local queue
837
+ _, jobCounts = self.backend.calculateAvailableWork(resources, jobCounts)
838
+
839
+ return (resources, jobCounts)
840
+
841
+ def getAvailableWorkfromParent(self, resources, jobCounts, printFlag=False):
842
+ self.logger.info("Going to fetch work from the parent queue: %s", self.parent_queue.queueUrl)
843
+ work, _ = self.parent_queue.availableWork(resources, jobCounts, self.params['Team'],
844
+ numElems=self.params['WorkPerCycle'],
845
+ rowsPerSlice=self.params['RowsPerSlice'],
846
+ maxRows=self.params['MaxRowsPerCycle'])
847
+ if not work:
848
+ self._printLog('No available work in parent queue.', printFlag, "warning")
849
+ return work
850
+
851
+ def pullWork(self, resources=None):
852
+ """
853
+ Pull work from another WorkQueue to be processed:
854
+ :param resources: optional dictionary with sites and the amount
855
+ of slots free
856
+ """
857
+ jobCounts = {}
858
+ if self.pullWorkConditionCheck() is False:
859
+ return 0
860
+
861
+ # NOTE: resources parameter is only used by unit tests, which do
862
+ # not use WMBS and BossAir tables
863
+ if not resources:
864
+ (resources, jobCounts) = self.freeResouceCheck()
865
+ if not resources and not jobCounts:
866
+ return 0
867
+
868
+ work = self.getAvailableWorkfromParent(resources, jobCounts)
869
+ if not work:
870
+ return 0
871
+
872
+ work = self._assignToChildQueue(self.params['QueueURL'], *work)
873
+
874
+ return len(work)
875
+
876
+ def closeWork(self):
877
+ """
878
+ Global queue service that looks for the inbox elements that are still active
879
+ and checks whether they should be closed for new data or not.
880
+ An element is closed automatically when one of the following conditions holds true:
881
+ - The StartPolicy doesn't define a OpenRunningTimeout or this delay is set to 0
882
+ - A period longer than OpenRunningTimeout has passed since the last child element
883
+ was created or an open block was found and the StartPolicy newDataAvailable
884
+ function returns False.
885
+
886
+ :return: list of workqueue_inbox elements that have been closed
887
+ """
888
+ workflowsToClose = []
889
+ if self.params['LocalQueueFlag']:
890
+ # this is a Global WorkQueue only functionality
891
+ return workflowsToClose
892
+ if not self.backend.isAvailable():
893
+ self.logger.warning('Backend busy or down: Can not close work at this time')
894
+ return workflowsToClose
895
+
896
+ workflowsToCheck = self.backend.getInboxElements(OpenForNewData=True)
897
+ self.logger.info("Retrieved a list of %d open workflows", len(workflowsToCheck))
898
+ currentTime = time.time()
899
+ for element in workflowsToCheck:
900
+ # fetch attributes from the inbox workqueue element
901
+ startPol = element.get('StartPolicy', {})
902
+ openRunningTimeout = startPol.get('OpenRunningTimeout', 0)
903
+ foundNewDataTime = element.get('TimestampFoundNewData', 0)
904
+ if not openRunningTimeout:
905
+ self.logger.info("Workflow %s has no OpenRunningTimeout. Queuing to be closed.",
906
+ element['RequestName'])
907
+ workflowsToClose.append(element.id)
908
+ elif (currentTime - foundNewDataTime) > openRunningTimeout:
909
+ # then it's been too long since the last element has been found
910
+ self.logger.info("Workflow %s has expired OpenRunningTimeout. Queuing to be closed.",
911
+ element['RequestName'])
912
+ workflowsToClose.append(element.id)
913
+
914
+ if workflowsToClose:
915
+ try:
916
+ self.logger.info('Closing workflows in workqueue_inbox for: %s', workflowsToClose)
917
+ self.backend.updateInboxElements(*workflowsToClose, OpenForNewData=False)
918
+ msg = 'Closed inbox elements for: %s.\n' % ', '.join(workflowsToClose)
919
+ except CouchInternalServerError as ex:
920
+ msg = 'Failed to close workflows with a CouchInternalServerError exception. '
921
+ msg += 'Details: {}'.format(str(ex))
922
+ self.logger.error(msg)
923
+ except Exception as ex:
924
+ msg = 'Failed to close workflows with a generic exception. '
925
+ msg += 'Details: {}'.format(str(ex))
926
+ self.logger.exception(msg)
927
+ else:
928
+ msg = 'No workflows to close.\n'
929
+
930
+ self.backend.recordTaskActivity('workclosing', msg)
931
+
932
+ return workflowsToClose
933
+
934
+ def deleteCompletedWFElements(self):
935
+ """
936
+ deletes Workflow when workflow is in finished status
937
+ """
938
+ deletableStates = ["completed", "closed-out", "failed",
939
+ "announced", "aborted-completed", "rejected",
940
+ "normal-archived", "aborted-archived", "rejected-archived"]
941
+
942
+ # fetch workflows known to workqueue + workqueue_inbox and with spec attachments
943
+ reqNames = self.backend.getWorkflows(includeInbox=True, includeSpecs=True)
944
+ self.logger.info("Retrieved %d workflows known by WorkQueue", len(reqNames))
945
+ requestsInfo = self.requestDB.getRequestByNames(reqNames)
946
+ deleteRequests = []
947
+ for key, value in viewitems(requestsInfo):
948
+ if (value["RequestStatus"] is None) or (value["RequestStatus"] in deletableStates):
949
+ deleteRequests.append(key)
950
+ self.logger.info("Found %d out of %d workflows in a deletable state",
951
+ len(deleteRequests), len(reqNames))
952
+ return self.backend.deleteWQElementsByWorkflow(deleteRequests)
953
+
954
+ def performSyncAndCancelAction(self, skipWMBS):
955
+ """
956
+ Apply end policies to determine work status & cleanup finished work
957
+ """
958
+ if not self.backend.isAvailable():
959
+ self.logger.warning('Backend busy or down: skipping cleanup tasks')
960
+ return
961
+
962
+ if self.params['LocalQueueFlag']:
963
+ self.backend.fixConflicts() # before doing anything fix any conflicts
964
+
965
+ wf_to_cancel = [] # record what we did for task_activity
966
+ finished_elements = []
967
+
968
+ useWMBS = not skipWMBS and self.params['LocalQueueFlag']
969
+ if useWMBS:
970
+ wmbsWflowSummary = self.getWMBSSubscriptionStatus()
971
+ else:
972
+ wmbsWflowSummary = []
973
+ # Get queue elements grouped by their workflow with updated wmbs progress
974
+ # Cancel if requested, update locally and remove obsolete elements
975
+ self.logger.info('Fetching workflow information (including inbox and specs)')
976
+ workflowsList = self.backend.getWorkflows(includeInbox=True, includeSpecs=True)
977
+ for wf in workflowsList:
978
+ parentQueueDeleted = True
979
+ try:
980
+ elements = self.status(RequestName=wf, wmbsInfo=wmbsWflowSummary)
981
+ parents = self.backend.getInboxElements(RequestName=wf)
982
+
983
+ self.logger.debug("Queue %s status follows:", self.backend.queueUrl)
984
+ results = endPolicy(elements, parents, self.params['EndPolicySettings'])
985
+ for result in results:
986
+ self.logger.debug("Request %s, Status %s, Full info: %s",
987
+ result['RequestName'], result['Status'], result)
988
+
989
+ # check for cancellation requests (affects entire workflow)
990
+ if result['Status'] == 'CancelRequested':
991
+ self.logger.info('Canceling work for workflow: %s', wf)
992
+ canceled = self.cancelWork(WorkflowName=wf)
993
+ if canceled: # global wont cancel if work in child queue
994
+ wf_to_cancel.append(wf)
995
+ break
996
+ elif result['Status'] == 'Negotiating':
997
+ self.logger.debug("Waiting for %s to finish splitting", wf)
998
+ continue
999
+
1000
+ parent = result['ParentQueueElement']
1001
+ if parent.modified:
1002
+ self.backend.saveElements(parent)
1003
+
1004
+ if result.inEndState():
1005
+ if elements:
1006
+ self.logger.debug("Request %s finished (%s)",
1007
+ result['RequestName'], parent.statusMetrics())
1008
+ finished_elements.extend(result['Elements'])
1009
+ else:
1010
+ parentQueueDeleted = False
1011
+ continue
1012
+
1013
+ updated_elements = [x for x in result['Elements'] if x.modified]
1014
+ for x in updated_elements:
1015
+ self.logger.debug("Updating progress %s (%s): %s", x['RequestName'], x.id, x.statusMetrics())
1016
+ self.backend.updateElements(x.id, **x.statusMetrics())
1017
+
1018
+ if not parentQueueDeleted:
1019
+ self.logger.info('Waiting for parent queue to delete "%s"', wf)
1020
+
1021
+ except Exception as ex:
1022
+ self.logger.error('Error processing workflow "%s": %s', wf, str(ex))
1023
+
1024
+ msg = 'Finished elements: %s\nCanceled workflows: %s' % (', '.join(["%s (%s)" % (x.id, x['RequestName']) \
1025
+ for x in finished_elements]),
1026
+ ', '.join(wf_to_cancel))
1027
+
1028
+ self.logger.debug(msg)
1029
+ self.backend.recordTaskActivity('housekeeping', msg)
1030
+
1031
+ def performQueueCleanupActions(self, skipWMBS=False):
1032
+
1033
+ try:
1034
+ self.logger.info("Deleting completed workflow WQ elements ...")
1035
+ res = self.deleteCompletedWFElements()
1036
+ self.logger.info("Deleted %d elements from workqueue/inbox database", res)
1037
+ except Exception as ex:
1038
+ self.logger.exception('Error deleting WQ elements. Details: %s', str(ex))
1039
+
1040
+ try:
1041
+ self.logger.info("Syncing and cancelling work ...")
1042
+ self.performSyncAndCancelAction(skipWMBS)
1043
+ except Exception as ex:
1044
+ self.logger.error('Error syncing and canceling WQ elements. Details: %s', str(ex))
1045
+
1046
+ def _splitWork(self, wmspec, data=None, mask=None, inbound=None, continuous=False, rucioObj=None):
1047
+ """
1048
+ Split work from a parent into WorkQeueueElements.
1049
+
1050
+ If data param supplied use that rather than getting input data from
1051
+ wmspec. Used for instance when global splits by Block (avoids having to
1052
+ modify wmspec block whitelist - thus all appear as same wf in wmbs)
1053
+
1054
+ mask can be used to specify i.e. event range.
1055
+
1056
+ The inbound and continuous parameters are used to split
1057
+ and already split inbox element.
1058
+ """
1059
+ # give preference to rucio object created by the CherryPy threads
1060
+ if not rucioObj:
1061
+ rucioObj = self.rucio
1062
+
1063
+ totalUnits, rejectedWork, badWork = [], [], []
1064
+ # split each top level task into constituent work elements
1065
+ # get the acdc server and db name
1066
+ for topLevelTask in wmspec.taskIterator():
1067
+ spec = getWorkloadFromTask(topLevelTask)
1068
+ policyName = spec.startPolicy()
1069
+ if not policyName:
1070
+ raise RuntimeError("WMSpec doesn't define policyName, current value: '%s'" % policyName)
1071
+
1072
+ policy = startPolicy(policyName, self.params['SplittingMapping'],
1073
+ rucioObj=rucioObj, logger=self.logger)
1074
+ if not policy.supportsWorkAddition() and continuous:
1075
+ # Can't split further with a policy that doesn't allow it
1076
+ continue
1077
+ if continuous:
1078
+ policy.modifyPolicyForWorkAddition(inbound)
1079
+ self.logger.info('Splitting %s with policy name %s and policy params %s',
1080
+ topLevelTask.getPathName(), policyName,
1081
+ self.params['SplittingMapping'].get(policyName))
1082
+ units, rejectedWork, badWork = policy(spec, topLevelTask, data, mask, continuous=continuous)
1083
+ self.logger.info('Work splitting completed with %d units, %d rejectedWork and %d badWork',
1084
+ len(units), len(rejectedWork), len(badWork))
1085
+ for unit in units:
1086
+ msg = 'Queuing element {} for {} with policy {}, '.format(unit.id, unit['Task'].getPathName(),
1087
+ unit['StartPolicy'])
1088
+ msg += 'with {} job(s) and {} lumis'.format(unit['Jobs'], unit['NumberOfLumis'])
1089
+ if unit['Inputs']:
1090
+ msg += ' on %s' % list(unit['Inputs'])[0]
1091
+ if unit['Mask']:
1092
+ msg += ' on events %d-%d' % (unit['Mask']['FirstEvent'], unit['Mask']['LastEvent'])
1093
+ self.logger.info(msg)
1094
+ totalUnits.extend(units)
1095
+
1096
+ return (totalUnits, rejectedWork, badWork)
1097
+
1098
+ def _getTotalStats(self, units):
1099
+ totalToplevelJobs = 0
1100
+ totalEvents = 0
1101
+ totalLumis = 0
1102
+ totalFiles = 0
1103
+
1104
+ for unit in units:
1105
+ totalToplevelJobs += unit['Jobs']
1106
+ totalEvents += unit['NumberOfEvents']
1107
+ totalLumis += unit['NumberOfLumis']
1108
+ totalFiles += unit['NumberOfFiles']
1109
+
1110
+ return {'total_jobs': totalToplevelJobs,
1111
+ 'input_events': totalEvents,
1112
+ 'input_lumis': totalLumis,
1113
+ 'input_num_files': totalFiles}
1114
+
1115
+ def processInboundWork(self, inbound_work=None, throw=False, continuous=False, rucioObj=None):
1116
+ """Retrieve work from inbox, split and store
1117
+ If request passed then only process that request
1118
+ """
1119
+ inbound_work = inbound_work or []
1120
+ msg = "Executing processInboundWork with {} inbound_work, ".format(len(inbound_work))
1121
+ msg += "throw: {} and continuous: {}".format(throw, continuous)
1122
+ self.logger.info(msg)
1123
+ if self.params['LocalQueueFlag']:
1124
+ self.logger.info("fixing conflict...")
1125
+ self.backend.fixConflicts() # db should be consistent
1126
+
1127
+ result = []
1128
+ if not inbound_work and continuous:
1129
+ # This is not supported
1130
+ return result
1131
+ if not inbound_work:
1132
+ inbound_work = self.backend.getElementsForSplitting()
1133
+ self.logger.info('Retrieved %d elements for splitting with continuous flag: %s',
1134
+ len(inbound_work), continuous)
1135
+ for inbound in inbound_work:
1136
+ try:
1137
+ # Check we haven't already split the work, unless it's continuous processing
1138
+ work = not continuous and self.backend.getElementsForParent(inbound)
1139
+ if work:
1140
+ self.logger.info('Request "%s" already split - Resuming', inbound['RequestName'])
1141
+ else:
1142
+ work, rejectedWork, badWork = self._splitWork(inbound['WMSpec'], data=inbound['Inputs'],
1143
+ mask=inbound['Mask'], inbound=inbound,
1144
+ continuous=continuous, rucioObj=rucioObj)
1145
+
1146
+ # save inbound work to signal we have completed queueing
1147
+ # if this fails, rerunning will pick up here
1148
+ newWork = self.backend.insertElements(work, parent=inbound)
1149
+ # get statistics for the new work
1150
+ totalStats = self._getTotalStats(newWork)
1151
+
1152
+ if not continuous:
1153
+ # Update to Acquired when it's the first processing of inbound work
1154
+ self.backend.updateInboxElements(inbound.id, Status='Acquired')
1155
+
1156
+ # store the inputs in the global queue inbox workflow element
1157
+ if not self.params.get('LocalQueueFlag'):
1158
+ processedInputs = []
1159
+ for unit in work:
1160
+ processedInputs.extend(list(unit['Inputs']))
1161
+ self.backend.updateInboxElements(inbound.id, ProcessedInputs=processedInputs,
1162
+ RejectedInputs=rejectedWork)
1163
+ # if global queue, then update workflow stats to request mgr couch doc
1164
+ # remove the "UnittestFlag" - need to create the reqmgrSvc emulator
1165
+ if not self.params.get("UnittestFlag", False):
1166
+ self.reqmgrSvc.updateRequestStats(inbound['WMSpec'].name(), totalStats)
1167
+
1168
+ if badWork:
1169
+ msg = "Request with the following unprocessable input data: %s" % badWork
1170
+ self.logdb.post(inbound['RequestName'], msg, 'warning')
1171
+ except TERMINAL_EXCEPTIONS as ex:
1172
+ msg = 'Terminal exception splitting WQE: %s' % inbound
1173
+ self.logger.error(msg)
1174
+ self.logdb.post(inbound['RequestName'], msg, 'error')
1175
+ if not continuous:
1176
+ # Only fail on first splitting
1177
+ self.logger.error('Failing workflow "%s": %s', inbound['RequestName'], str(ex))
1178
+ self.backend.updateInboxElements(inbound.id, Status='Failed')
1179
+ if throw:
1180
+ raise
1181
+ except Exception as ex:
1182
+ if continuous:
1183
+ continue
1184
+ msg = 'Exception splitting wqe %s for %s: %s' % (inbound.id, inbound['RequestName'], str(ex))
1185
+ self.logger.exception(msg)
1186
+ self.logdb.post(inbound['RequestName'], msg, 'error')
1187
+
1188
+ if throw:
1189
+ raise
1190
+ continue
1191
+ else:
1192
+ result.extend(work)
1193
+
1194
+ requests = ', '.join(list(set(['"%s"' % x['RequestName'] for x in result])))
1195
+ if requests:
1196
+ self.logger.info('Split work for request(s): %s', requests)
1197
+
1198
+ return result
1199
+
1200
+ def getWMBSInjectionStatus(self, workflowName=None, drainMode=False):
1201
+ """
1202
+ if the parent queue exist return the result from parent queue.
1203
+ other wise return the result from the current queue.
1204
+ (In general parent queue always exist when it is called from local queue
1205
+ except T1 skim case)
1206
+ returns list of [{workflowName: injection status (True or False)}]
1207
+ if the workflow is not exist return []
1208
+ """
1209
+ if self.parent_queue and not drainMode:
1210
+ return self.parent_queue.getWMBSInjectStatus(workflowName)
1211
+ return self.backend.getWMBSInjectStatus(workflowName)
1212
+
1213
+ def monitorWorkQueue(self, status=None):
1214
+ """
1215
+ Uses the workqueue data-service to retrieve a few basic information
1216
+ regarding all the elements in the queue.
1217
+ """
1218
+ status = status or []
1219
+ results = {}
1220
+ start = int(time.time())
1221
+ results['workByStatus'] = self.workqueueDS.getJobsByStatus()
1222
+ results['workByStatusAndPriority'] = self.workqueueDS.getJobsByStatusAndPriority()
1223
+ results['workByAgentAndStatus'] = self.workqueueDS.getChildQueuesAndStatus()
1224
+ results['workByAgentAndPriority'] = self.workqueueDS.getChildQueuesAndPriority()
1225
+
1226
+ # now the heavy procesing for the site information
1227
+ elements = self.workqueueDS.getElementsByStatus(status)
1228
+ uniSites, posSites = getGlobalSiteStatusSummary(elements, status=status)
1229
+ results['uniqueJobsPerSiteAAA'] = uniSites
1230
+ results['possibleJobsPerSiteAAA'] = posSites
1231
+ uniSites, posSites = getGlobalSiteStatusSummary(elements, status=status, dataLocality=True)
1232
+ results['uniqueJobsPerSite'] = uniSites
1233
+ results['possibleJobsPerSite'] = posSites
1234
+
1235
+ end = int(time.time())
1236
+ results["total_query_time"] = end - start
1237
+ return results