toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -22,28 +22,24 @@ import sqlite3
22
22
  import stat
23
23
  import threading
24
24
  import time
25
+ from collections.abc import Generator, Iterator, Sequence
25
26
  from contextlib import contextmanager
26
27
  from tempfile import mkstemp
27
- from typing import (Any,
28
- Callable,
29
- Generator,
30
- Iterator,
31
- Optional,
32
- Sequence,
33
- Tuple)
34
-
35
- from toil.common import cacheDirName, getDirSizeRecursively, getFileSystemSize
28
+ from typing import Any, Callable, Optional
29
+
30
+ from toil.common import cacheDirName, getFileSystemSize
36
31
  from toil.fileStores import FileID
37
32
  from toil.fileStores.abstractFileStore import AbstractFileStore
38
33
  from toil.job import Job, JobDescription
39
34
  from toil.jobStores.abstractJobStore import AbstractJobStore
40
35
  from toil.lib.compatibility import deprecated
41
- from toil.lib.conversions import bytes2human
42
- from toil.lib.io import (atomic_copy,
43
- atomic_copyobj,
44
- make_public_dir,
45
- mkdtemp,
46
- robust_rmtree)
36
+ from toil.lib.io import (
37
+ atomic_copy,
38
+ atomic_copyobj,
39
+ make_public_dir,
40
+ mkdtemp,
41
+ robust_rmtree,
42
+ )
47
43
  from toil.lib.retry import ErrorCondition, retry
48
44
  from toil.lib.threading import get_process_name, process_name_exists
49
45
 
@@ -67,9 +63,12 @@ class CacheUnbalancedError(CacheError):
67
63
  """
68
64
  Raised if file store can't free enough space for caching
69
65
  """
70
- message = 'Unable unable to free enough space for caching. This error frequently arises due ' \
71
- 'to jobs using more disk than they have requested. Turn on debug logging to see ' \
72
- 'more information leading up to this error through cache usage logs.'
66
+
67
+ message = (
68
+ "Unable unable to free enough space for caching. This error frequently arises due "
69
+ "to jobs using more disk than they have requested. Turn on debug logging to see "
70
+ "more information leading up to this error through cache usage logs."
71
+ )
73
72
 
74
73
  def __init__(self):
75
74
  super().__init__(self.message)
@@ -88,9 +87,11 @@ class IllegalDeletionCacheError(CacheError):
88
87
  """
89
88
 
90
89
  def __init__(self, deletedFile):
91
- message = 'Cache tracked file (%s) has been deleted or moved by user ' \
92
- ' without updating cache database. Use deleteLocalFile to ' \
93
- 'delete such files.' % deletedFile
90
+ message = (
91
+ "Cache tracked file (%s) has been deleted or moved by user "
92
+ " without updating cache database. Use deleteLocalFile to "
93
+ "delete such files." % deletedFile
94
+ )
94
95
  super().__init__(message)
95
96
 
96
97
 
@@ -209,13 +210,15 @@ class CachingFileStore(AbstractFileStore):
209
210
  # Variables related to caching
210
211
  # Decide where the cache directory will be. We put it in the local
211
212
  # workflow directory.
212
- self.localCacheDir = os.path.join(self.workflow_dir, cacheDirName(self.jobStore.config.workflowID))
213
+ self.localCacheDir = os.path.join(
214
+ self.workflow_dir, cacheDirName(self.jobStore.config.workflowID)
215
+ )
213
216
 
214
217
  # Since each worker has it's own unique CachingFileStore instance, and only one Job can run
215
218
  # at a time on a worker, we can track some stuff about the running job in ourselves.
216
219
  self.jobName: str = str(self.jobDesc)
217
220
  self.jobID = self.jobDesc.jobStoreID
218
- logger.debug('Starting job (%s) with ID (%s).', self.jobName, self.jobID)
221
+ logger.debug("Starting job (%s) with ID (%s).", self.jobName, self.jobID)
219
222
 
220
223
  # When the job actually starts, we will fill this in with the job's disk requirement.
221
224
  self.jobDiskBytes: Optional[float] = None
@@ -231,7 +234,9 @@ class CachingFileStore(AbstractFileStore):
231
234
  # the workflow left one behind without cleaning up properly; we need to
232
235
  # be able to tell that from showing up on a machine where a cache has
233
236
  # already been created.
234
- self.dbPath = os.path.join(self.coordination_dir, f'cache-{self.workflowAttemptNumber}.db')
237
+ self.dbPath = os.path.join(
238
+ self.coordination_dir, f"cache-{self.workflowAttemptNumber}.db"
239
+ )
235
240
 
236
241
  # Database connections are provided by magic properties self.con and
237
242
  # self.cur that always have the right object for the current thread to
@@ -255,7 +260,14 @@ class CachingFileStore(AbstractFileStore):
255
260
 
256
261
  # Initialize the space accounting properties
257
262
  freeSpace, _ = getFileSystemSize(self.localCacheDir)
258
- self._write([('INSERT OR IGNORE INTO properties VALUES (?, ?)', ('maxSpace', freeSpace))])
263
+ self._write(
264
+ [
265
+ (
266
+ "INSERT OR IGNORE INTO properties VALUES (?, ?)",
267
+ ("maxSpace", freeSpace),
268
+ )
269
+ ]
270
+ )
259
271
 
260
272
  # Space used by caching and by jobs is accounted with queries
261
273
 
@@ -285,10 +297,12 @@ class CachingFileStore(AbstractFileStore):
285
297
  """
286
298
  Get the database connection to be used for the current thread.
287
299
  """
288
- if not hasattr(self._thread_local, 'con'):
300
+ if not hasattr(self._thread_local, "con"):
289
301
  # Connect to the database for this thread.
290
302
  # TODO: We assume the connection closes when the thread goes away and can no longer use it.
291
- self._thread_local.con = sqlite3.connect(self.dbPath, timeout=SQLITE_TIMEOUT_SECS)
303
+ self._thread_local.con = sqlite3.connect(
304
+ self.dbPath, timeout=SQLITE_TIMEOUT_SECS
305
+ )
292
306
  return self._thread_local.con
293
307
 
294
308
  @property
@@ -296,18 +310,20 @@ class CachingFileStore(AbstractFileStore):
296
310
  """
297
311
  Get the main cursor to be used for the current thread.
298
312
  """
299
- if not hasattr(self._thread_local, 'cur'):
313
+ if not hasattr(self._thread_local, "cur"):
300
314
  # If we don't already have a main cursor for the thread, make one.
301
315
  self._thread_local.cur = self.con.cursor()
302
316
  return self._thread_local.cur
303
317
 
304
318
  @staticmethod
305
- @retry(infinite_retries=True,
306
- errors=[
307
- ErrorCondition(
308
- error=sqlite3.OperationalError,
309
- error_message_must_include='is locked')
310
- ])
319
+ @retry(
320
+ infinite_retries=True,
321
+ errors=[
322
+ ErrorCondition(
323
+ error=sqlite3.OperationalError, error_message_must_include="is locked"
324
+ )
325
+ ],
326
+ )
311
327
  def _static_write(con, cur, operations):
312
328
  """
313
329
  Write to the caching database, using the given connection.
@@ -341,7 +357,7 @@ class CachingFileStore(AbstractFileStore):
341
357
  # Do it
342
358
  cur.execute(command, args)
343
359
  except Exception as e:
344
- logging.error('Error talking to caching database: %s', str(e))
360
+ logging.error("Error talking to caching database: %s", str(e))
345
361
 
346
362
  # Try to make sure we don't somehow leave anything part-done if a
347
363
  # middle operation somehow fails.
@@ -361,13 +377,17 @@ class CachingFileStore(AbstractFileStore):
361
377
  return cur.rowcount
362
378
 
363
379
  @staticmethod
364
- @retry(infinite_retries=True,
365
- errors=[
366
- ErrorCondition(
367
- error=sqlite3.OperationalError,
368
- error_message_must_include='is locked')
369
- ])
370
- def _static_read(cur: sqlite3.Cursor, query: str, args: Optional[Sequence[Any]] = ()) -> Iterator[Any]:
380
+ @retry(
381
+ infinite_retries=True,
382
+ errors=[
383
+ ErrorCondition(
384
+ error=sqlite3.OperationalError, error_message_must_include="is locked"
385
+ )
386
+ ],
387
+ )
388
+ def _static_read(
389
+ cur: sqlite3.Cursor, query: str, args: Optional[Sequence[Any]] = ()
390
+ ) -> Iterator[Any]:
371
391
  """
372
392
  Read from the database.
373
393
 
@@ -420,7 +440,11 @@ class CachingFileStore(AbstractFileStore):
420
440
  # Get a cursor
421
441
  cur = con.cursor()
422
442
 
423
- cls._static_write(con, cur, ["""
443
+ cls._static_write(
444
+ con,
445
+ cur,
446
+ [
447
+ """
424
448
  CREATE TABLE IF NOT EXISTS files (
425
449
  id TEXT NOT NULL PRIMARY KEY,
426
450
  path TEXT UNIQUE NOT NULL,
@@ -428,7 +452,8 @@ class CachingFileStore(AbstractFileStore):
428
452
  state TEXT NOT NULL,
429
453
  owner TEXT
430
454
  )
431
- """, """
455
+ """,
456
+ """
432
457
  CREATE TABLE IF NOT EXISTS refs (
433
458
  path TEXT NOT NULL,
434
459
  file_id TEXT NOT NULL,
@@ -436,19 +461,23 @@ class CachingFileStore(AbstractFileStore):
436
461
  state TEXT NOT NULL,
437
462
  PRIMARY KEY (path, file_id)
438
463
  )
439
- """, """
464
+ """,
465
+ """
440
466
  CREATE TABLE IF NOT EXISTS jobs (
441
467
  id TEXT NOT NULL PRIMARY KEY,
442
468
  tempdir TEXT NOT NULL,
443
469
  disk INT NOT NULL,
444
470
  worker TEXT
445
471
  )
446
- """, """
472
+ """,
473
+ """
447
474
  CREATE TABLE IF NOT EXISTS properties (
448
475
  name TEXT NOT NULL PRIMARY KEY,
449
476
  value INT NOT NULL
450
477
  )
451
- """])
478
+ """,
479
+ ],
480
+ )
452
481
 
453
482
  # Caching-specific API
454
483
 
@@ -459,10 +488,12 @@ class CachingFileStore(AbstractFileStore):
459
488
  If no limit is available, raises an error.
460
489
  """
461
490
 
462
- for row in self.cur.execute('SELECT value FROM properties WHERE name = ?', ('maxSpace',)):
491
+ for row in self.cur.execute(
492
+ "SELECT value FROM properties WHERE name = ?", ("maxSpace",)
493
+ ):
463
494
  return row[0]
464
495
 
465
- raise RuntimeError('Unable to retrieve cache limit')
496
+ raise RuntimeError("Unable to retrieve cache limit")
466
497
 
467
498
  def getCacheUsed(self):
468
499
  """
@@ -475,10 +506,10 @@ class CachingFileStore(AbstractFileStore):
475
506
  if self.cachingIsFree():
476
507
  return 0
477
508
 
478
- for row in self._read('SELECT TOTAL(size) FROM files'):
509
+ for row in self._read("SELECT TOTAL(size) FROM files"):
479
510
  return row[0]
480
511
 
481
- raise RuntimeError('Unable to retrieve cache usage')
512
+ raise RuntimeError("Unable to retrieve cache usage")
482
513
 
483
514
  def getCacheExtraJobSpace(self):
484
515
  """
@@ -493,15 +524,17 @@ class CachingFileStore(AbstractFileStore):
493
524
  """
494
525
 
495
526
  # Total up the sizes of all the reads of files and subtract it from the total disk reservation of all jobs
496
- for row in self._read("""
527
+ for row in self._read(
528
+ """
497
529
  SELECT (
498
530
  (SELECT TOTAL(disk) FROM jobs) -
499
531
  (SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state == 'immutable')
500
532
  ) as result
501
- """):
533
+ """
534
+ ):
502
535
  return row[0]
503
536
 
504
- raise RuntimeError('Unable to retrieve extra job space')
537
+ raise RuntimeError("Unable to retrieve extra job space")
505
538
 
506
539
  def getCacheAvailable(self):
507
540
  """
@@ -520,33 +553,38 @@ class CachingFileStore(AbstractFileStore):
520
553
 
521
554
  # Do a little report first
522
555
  for row in self._read("SELECT value FROM properties WHERE name = 'maxSpace'"):
523
- logger.debug('Max space: %d', row[0])
556
+ logger.debug("Max space: %d", row[0])
524
557
  for row in self._read("SELECT TOTAL(size) FROM files"):
525
- logger.debug('Total file size: %d', row[0])
558
+ logger.debug("Total file size: %d", row[0])
526
559
  for row in self._read("SELECT TOTAL(disk) FROM jobs"):
527
- logger.debug('Total job disk requirement size: %d', row[0])
528
- for row in self._read("SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'"):
529
- logger.debug('Total immutable reference size: %d', row[0])
560
+ logger.debug("Total job disk requirement size: %d", row[0])
561
+ for row in self._read(
562
+ "SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'"
563
+ ):
564
+ logger.debug("Total immutable reference size: %d", row[0])
530
565
 
531
566
  if self.cachingIsFree():
532
567
  # If caching is free, we just say that all the space is always available.
533
- for row in self._read("SELECT value FROM properties WHERE name = 'maxSpace'"):
568
+ for row in self._read(
569
+ "SELECT value FROM properties WHERE name = 'maxSpace'"
570
+ ):
534
571
  return row[0]
535
572
 
536
- raise RuntimeError('Unable to retrieve available cache space')
537
-
573
+ raise RuntimeError("Unable to retrieve available cache space")
538
574
 
539
- for row in self._read("""
575
+ for row in self._read(
576
+ """
540
577
  SELECT (
541
578
  (SELECT value FROM properties WHERE name = 'maxSpace') -
542
579
  (SELECT TOTAL(size) FROM files) -
543
580
  ((SELECT TOTAL(disk) FROM jobs) -
544
581
  (SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'))
545
582
  ) as result
546
- """):
583
+ """
584
+ ):
547
585
  return row[0]
548
586
 
549
- raise RuntimeError('Unable to retrieve available cache space')
587
+ raise RuntimeError("Unable to retrieve available cache space")
550
588
 
551
589
  def getSpaceUsableForJobs(self):
552
590
  """
@@ -556,15 +594,17 @@ class CachingFileStore(AbstractFileStore):
556
594
  If not retrievable, raises an error.
557
595
  """
558
596
 
559
- for row in self._read("""
597
+ for row in self._read(
598
+ """
560
599
  SELECT (
561
600
  (SELECT value FROM properties WHERE name = 'maxSpace') -
562
601
  (SELECT TOTAL(disk) FROM jobs)
563
602
  ) as result
564
- """):
603
+ """
604
+ ):
565
605
  return row[0]
566
606
 
567
- raise RuntimeError('Unable to retrieve usabel space for jobs')
607
+ raise RuntimeError("Unable to retrieve usabel space for jobs")
568
608
 
569
609
  def getCacheUnusedJobRequirement(self):
570
610
  """
@@ -576,28 +616,36 @@ class CachingFileStore(AbstractFileStore):
576
616
  If no value is available, raises an error.
577
617
  """
578
618
 
579
- logger.debug('Get unused space for job %s', self.jobID)
580
-
581
- for row in self._read('SELECT * FROM files'):
582
- logger.debug('File record: %s', str(row))
619
+ logger.debug("Get unused space for job %s", self.jobID)
583
620
 
584
- for row in self._read('SELECT * FROM refs'):
585
- logger.debug('Ref record: %s', str(row))
621
+ for row in self._read("SELECT * FROM files"):
622
+ logger.debug("File record: %s", str(row))
586
623
 
624
+ for row in self._read("SELECT * FROM refs"):
625
+ logger.debug("Ref record: %s", str(row))
587
626
 
588
- for row in self._read('SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.job_id = ? AND refs.state != ?',
589
- (self.jobID, 'mutable')):
627
+ for row in self._read(
628
+ "SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.job_id = ? AND refs.state != ?",
629
+ (self.jobID, "mutable"),
630
+ ):
590
631
  # Sum up all the sizes of our referenced files, then subtract that from how much we came in with
591
632
  return self.jobDiskBytes - row[0]
592
633
 
593
- raise RuntimeError('Unable to retrieve unused job requirement space')
634
+ raise RuntimeError("Unable to retrieve unused job requirement space")
594
635
 
595
636
  def adjustCacheLimit(self, newTotalBytes):
596
637
  """
597
638
  Adjust the total cache size limit to the given number of bytes.
598
639
  """
599
640
 
600
- self._write([('UPDATE properties SET value = ? WHERE name = ?', (newTotalBytes, 'maxSpace'))])
641
+ self._write(
642
+ [
643
+ (
644
+ "UPDATE properties SET value = ? WHERE name = ?",
645
+ (newTotalBytes, "maxSpace"),
646
+ )
647
+ ]
648
+ )
601
649
 
602
650
  def fileIsCached(self, fileID):
603
651
  """
@@ -608,8 +656,10 @@ class CachingFileStore(AbstractFileStore):
608
656
  file you need to do it in a transaction.
609
657
  """
610
658
 
611
- for row in self._read('SELECT COUNT(*) FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
612
- (fileID, 'cached', 'uploadable', 'uploading')):
659
+ for row in self._read(
660
+ "SELECT COUNT(*) FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
661
+ (fileID, "cached", "uploadable", "uploading"),
662
+ ):
613
663
 
614
664
  return row[0] > 0
615
665
  return False
@@ -621,7 +671,7 @@ class CachingFileStore(AbstractFileStore):
621
671
  Counts mutable references too.
622
672
  """
623
673
 
624
- for row in self._read('SELECT COUNT(*) FROM refs WHERE file_id = ?', (fileID,)):
674
+ for row in self._read("SELECT COUNT(*) FROM refs WHERE file_id = ?", (fileID,)):
625
675
  return row[0]
626
676
  return 0
627
677
 
@@ -634,11 +684,14 @@ class CachingFileStore(AbstractFileStore):
634
684
  configurations, most notably the FileJobStore.
635
685
  """
636
686
 
637
- for row in self._read('SELECT value FROM properties WHERE name = ?', ('freeCaching',)):
687
+ for row in self._read(
688
+ "SELECT value FROM properties WHERE name = ?", ("freeCaching",)
689
+ ):
638
690
  return row[0] == 1
639
691
 
640
692
  # Otherwise we need to set it
641
693
  from toil.jobStores.fileJobStore import FileJobStore
694
+
642
695
  if isinstance(self.jobStore, FileJobStore) and not self.forceNonFreeCaching:
643
696
  # Caching may be free since we are using a file job store.
644
697
 
@@ -647,7 +700,7 @@ class CachingFileStore(AbstractFileStore):
647
700
 
648
701
  # Read it out to a generated name.
649
702
  destDir = mkdtemp(dir=self.localCacheDir)
650
- cachedFile = os.path.join(destDir, 'sniffLinkCount')
703
+ cachedFile = os.path.join(destDir, "sniffLinkCount")
651
704
  self.jobStore.read_file(emptyID, cachedFile, symlink=False)
652
705
 
653
706
  # Check the link count
@@ -667,7 +720,9 @@ class CachingFileStore(AbstractFileStore):
667
720
  free = 0
668
721
 
669
722
  # Save to the database if we're the first to work this out
670
- self._write([('INSERT OR IGNORE INTO properties VALUES (?, ?)', ('freeCaching', free))])
723
+ self._write(
724
+ [("INSERT OR IGNORE INTO properties VALUES (?, ?)", ("freeCaching", free))]
725
+ )
671
726
 
672
727
  # Return true if we said caching was free
673
728
  return free == 1
@@ -684,7 +739,7 @@ class CachingFileStore(AbstractFileStore):
684
739
 
685
740
  # Hash the file ID
686
741
  hasher = hashlib.sha1()
687
- hasher.update(fileStoreID.encode('utf-8'))
742
+ hasher.update(fileStoreID.encode("utf-8"))
688
743
 
689
744
  # Get a unique temp file name, including the file ID's hash to make
690
745
  # sure we can never collide even though we are going to remove the
@@ -708,17 +763,19 @@ class CachingFileStore(AbstractFileStore):
708
763
  # Get a list of all file owner processes on this node.
709
764
  # Exclude NULL because it comes out as 0 and we can't look for PID 0.
710
765
  owners = []
711
- for row in self._read('SELECT DISTINCT owner FROM files WHERE owner IS NOT NULL'):
766
+ for row in self._read(
767
+ "SELECT DISTINCT owner FROM files WHERE owner IS NOT NULL"
768
+ ):
712
769
  owners.append(row[0])
713
770
 
714
771
  # Work out which of them have died.
715
772
  deadOwners = []
716
773
  for owner in owners:
717
774
  if not process_name_exists(self.coordination_dir, owner):
718
- logger.debug('Owner %s is dead', owner)
775
+ logger.debug("Owner %s is dead", owner)
719
776
  deadOwners.append(owner)
720
777
  else:
721
- logger.debug('Owner %s is alive', owner)
778
+ logger.debug("Owner %s is alive", owner)
722
779
 
723
780
  for owner in deadOwners:
724
781
  # Try and adopt all the files that any dead owner had
@@ -737,14 +794,28 @@ class CachingFileStore(AbstractFileStore):
737
794
  #
738
795
  # TODO: if we ever let other PIDs be responsible for writing our
739
796
  # files asynchronously, this will need to change.
740
- self._write([('UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?',
741
- (me, 'deleting', owner, 'deleting')),
742
- ('UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?',
743
- (me, 'deleting', owner, 'downloading')),
744
- ('UPDATE files SET owner = NULL, state = ? WHERE owner = ? AND (state = ? OR state = ?)',
745
- ('cached', owner, 'uploadable', 'uploading'))])
797
+ self._write(
798
+ [
799
+ (
800
+ "UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?",
801
+ (me, "deleting", owner, "deleting"),
802
+ ),
803
+ (
804
+ "UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?",
805
+ (me, "deleting", owner, "downloading"),
806
+ ),
807
+ (
808
+ "UPDATE files SET owner = NULL, state = ? WHERE owner = ? AND (state = ? OR state = ?)",
809
+ ("cached", owner, "uploadable", "uploading"),
810
+ ),
811
+ ]
812
+ )
746
813
 
747
- logger.debug('Tried to adopt file operations from dead worker %s to ourselves as %s', owner, me)
814
+ logger.debug(
815
+ "Tried to adopt file operations from dead worker %s to ourselves as %s",
816
+ owner,
817
+ me,
818
+ )
748
819
 
749
820
  def _executePendingDeletions(self):
750
821
  """
@@ -758,16 +829,19 @@ class CachingFileStore(AbstractFileStore):
758
829
 
759
830
  # Remember the file IDs we are deleting
760
831
  deletedFiles = []
761
- for row in self._read('SELECT id, path FROM files WHERE owner = ? AND state = ?', (me, 'deleting')):
832
+ for row in self._read(
833
+ "SELECT id, path FROM files WHERE owner = ? AND state = ?",
834
+ (me, "deleting"),
835
+ ):
762
836
  # Grab everything we are supposed to delete and delete it
763
837
  fileID = row[0]
764
838
  filePath = row[1]
765
839
  try:
766
840
  os.unlink(filePath)
767
- logger.debug('Successfully deleted: %s', filePath)
841
+ logger.debug("Successfully deleted: %s", filePath)
768
842
  except OSError:
769
843
  # Probably already deleted
770
- logger.debug('File already gone: %s', filePath)
844
+ logger.debug("File already gone: %s", filePath)
771
845
  # Still need to mark it as deleted
772
846
 
773
847
  # Whether we deleted the file or just found out that it is gone, we
@@ -778,8 +852,15 @@ class CachingFileStore(AbstractFileStore):
778
852
  for fileID in deletedFiles:
779
853
  # Drop all the files. They should have stayed in deleting state. We move them from there to not present at all.
780
854
  # Also drop their references, if they had any from dead downloaders.
781
- self._write([('DELETE FROM files WHERE id = ? AND state = ?', (fileID, 'deleting')),
782
- ('DELETE FROM refs WHERE file_id = ?', (fileID,))])
855
+ self._write(
856
+ [
857
+ (
858
+ "DELETE FROM files WHERE id = ? AND state = ?",
859
+ (fileID, "deleting"),
860
+ ),
861
+ ("DELETE FROM refs WHERE file_id = ?", (fileID,)),
862
+ ]
863
+ )
783
864
 
784
865
  return len(deletedFiles)
785
866
 
@@ -799,7 +880,11 @@ class CachingFileStore(AbstractFileStore):
799
880
  # Try and find a file we might want to upload
800
881
  fileID = None
801
882
  filePath = None
802
- for row in self._static_read(self.cur, 'SELECT id, path FROM files WHERE state = ? AND owner = ? LIMIT 1', ('uploadable', me)):
883
+ for row in self._static_read(
884
+ self.cur,
885
+ "SELECT id, path FROM files WHERE state = ? AND owner = ? LIMIT 1",
886
+ ("uploadable", me),
887
+ ):
803
888
  fileID = row[0]
804
889
  filePath = row[1]
805
890
 
@@ -808,30 +893,57 @@ class CachingFileStore(AbstractFileStore):
808
893
  break
809
894
 
810
895
  # We need to set it to uploading in a way that we can detect that *we* won the update race instead of anyone else.
811
- rowCount = self._static_write(self.con, self.cur, [('UPDATE files SET state = ? WHERE id = ? AND state = ?', ('uploading', fileID, 'uploadable'))])
896
+ rowCount = self._static_write(
897
+ self.con,
898
+ self.cur,
899
+ [
900
+ (
901
+ "UPDATE files SET state = ? WHERE id = ? AND state = ?",
902
+ ("uploading", fileID, "uploadable"),
903
+ )
904
+ ],
905
+ )
812
906
  if rowCount != 1:
813
907
  # We didn't manage to update it. Someone else (a running job if
814
908
  # we are a committing thread, or visa versa) must have grabbed
815
909
  # it.
816
- logger.debug('Lost race to upload %s', fileID)
910
+ logger.debug("Lost race to upload %s", fileID)
817
911
  # Try again to see if there is something else to grab.
818
912
  continue
819
913
 
820
914
  # Upload the file
821
- logger.debug('Actually executing upload for file %s', fileID)
915
+ logger.debug("Actually executing upload for file %s", fileID)
822
916
  try:
823
917
  self.jobStore.update_file(fileID, filePath)
824
918
  except:
825
919
  # We need to set the state back to 'uploadable' in case of any failures to ensure
826
920
  # we can retry properly.
827
- self._static_write(self.con, self.cur, [('UPDATE files SET state = ? WHERE id = ? AND state = ?', ('uploadable', fileID, 'uploading'))])
921
+ self._static_write(
922
+ self.con,
923
+ self.cur,
924
+ [
925
+ (
926
+ "UPDATE files SET state = ? WHERE id = ? AND state = ?",
927
+ ("uploadable", fileID, "uploading"),
928
+ )
929
+ ],
930
+ )
828
931
  raise
829
932
 
830
933
  # Count it for the total uploaded files value we need to return
831
934
  uploadedCount += 1
832
935
 
833
936
  # Remember that we uploaded it in the database
834
- self._static_write(self.con, self.cur, [('UPDATE files SET state = ?, owner = NULL WHERE id = ?', ('cached', fileID))])
937
+ self._static_write(
938
+ self.con,
939
+ self.cur,
940
+ [
941
+ (
942
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
943
+ ("cached", fileID),
944
+ )
945
+ ],
946
+ )
835
947
 
836
948
  return uploadedCount
837
949
 
@@ -855,7 +967,14 @@ class CachingFileStore(AbstractFileStore):
855
967
  # But we won't actually let the job run and use any of this space until
856
968
  # the cache has been successfully cleared out.
857
969
  with self.as_process() as me:
858
- self._write([('INSERT INTO jobs VALUES (?, ?, ?, ?)', (self.jobID, self.localTempDir, newJobReqs, me))])
970
+ self._write(
971
+ [
972
+ (
973
+ "INSERT INTO jobs VALUES (?, ?, ?, ?)",
974
+ (self.jobID, self.localTempDir, newJobReqs, me),
975
+ )
976
+ ]
977
+ )
859
978
 
860
979
  # Now we need to make sure that we can fit all currently cached files,
861
980
  # and the parts of the total job requirements not currently spent on
@@ -863,7 +982,7 @@ class CachingFileStore(AbstractFileStore):
863
982
 
864
983
  available = self.getCacheAvailable()
865
984
 
866
- logger.debug('Available space with job: %d bytes', available)
985
+ logger.debug("Available space with job: %d bytes", available)
867
986
 
868
987
  if available >= 0:
869
988
  # We're fine on disk space
@@ -887,10 +1006,14 @@ class CachingFileStore(AbstractFileStore):
887
1006
  """
888
1007
 
889
1008
  # Get the job's temp dir
890
- for row in cls._static_read(cur, 'SELECT tempdir FROM jobs WHERE id = ?', (jobID,)):
1009
+ for row in cls._static_read(
1010
+ cur, "SELECT tempdir FROM jobs WHERE id = ?", (jobID,)
1011
+ ):
891
1012
  jobTemp = row[0]
892
1013
 
893
- for row in cls._static_read(cur, 'SELECT path FROM refs WHERE job_id = ?', (jobID,)):
1014
+ for row in cls._static_read(
1015
+ cur, "SELECT path FROM refs WHERE job_id = ?", (jobID,)
1016
+ ):
894
1017
  try:
895
1018
  # Delete all the reference files.
896
1019
  os.unlink(row[0])
@@ -898,7 +1021,7 @@ class CachingFileStore(AbstractFileStore):
898
1021
  # May not exist
899
1022
  pass
900
1023
  # And their database entries
901
- cls._static_write(con, cur, [('DELETE FROM refs WHERE job_id = ?', (jobID,))])
1024
+ cls._static_write(con, cur, [("DELETE FROM refs WHERE job_id = ?", (jobID,))])
902
1025
 
903
1026
  try:
904
1027
  # Delete the job's temp directory to the extent that we can.
@@ -907,7 +1030,7 @@ class CachingFileStore(AbstractFileStore):
907
1030
  pass
908
1031
 
909
1032
  # Strike the job from the database
910
- cls._static_write(con, cur, [('DELETE FROM jobs WHERE id = ?', (jobID,))])
1033
+ cls._static_write(con, cur, [("DELETE FROM jobs WHERE id = ?", (jobID,))])
911
1034
 
912
1035
  def _deallocateSpaceForJob(self):
913
1036
  """
@@ -938,12 +1061,12 @@ class CachingFileStore(AbstractFileStore):
938
1061
  if self._executePendingDeletions() > 0:
939
1062
  # We actually had something to delete, which we deleted.
940
1063
  # Maybe there is space now
941
- logger.debug('Successfully executed pending deletions to free space')
1064
+ logger.debug("Successfully executed pending deletions to free space")
942
1065
  return True
943
1066
 
944
1067
  if self._executePendingUploads() > 0:
945
1068
  # We had something to upload. Maybe it can be evicted now.
946
- logger.debug('Successfully executed pending uploads to free space')
1069
+ logger.debug("Successfully executed pending uploads to free space")
947
1070
  return True
948
1071
 
949
1072
  # Otherwise, not enough files could be found in deleting state to solve our problem.
@@ -953,37 +1076,45 @@ class CachingFileStore(AbstractFileStore):
953
1076
  # soon as we hit the cache limit.
954
1077
 
955
1078
  # Find something that has no non-mutable references and is not already being deleted.
956
- self._read("""
1079
+ self._read(
1080
+ """
957
1081
  SELECT files.id FROM files WHERE files.state = 'cached' AND NOT EXISTS (
958
1082
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
959
1083
  ) LIMIT 1
960
- """)
1084
+ """
1085
+ )
961
1086
  row = self.cur.fetchone()
962
1087
  if row is None:
963
1088
  # Nothing can be evicted by us.
964
1089
  # Someone else might be in the process of evicting something that will free up space for us too.
965
1090
  # Or someone mught be uploading something and we have to wait for them to finish before it can be deleted.
966
- logger.debug('Could not find anything to evict! Cannot free up space!')
1091
+ logger.debug("Could not find anything to evict! Cannot free up space!")
967
1092
  return False
968
1093
 
969
1094
  # Otherwise we found an eviction candidate.
970
1095
  fileID = row[0]
971
1096
 
972
1097
  # Try and grab it for deletion, subject to the condition that nothing has started reading it
973
- self._write([("""
1098
+ self._write(
1099
+ [
1100
+ (
1101
+ """
974
1102
  UPDATE files SET owner = ?, state = ? WHERE id = ? AND state = ?
975
1103
  AND owner IS NULL AND NOT EXISTS (
976
1104
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
977
1105
  )
978
1106
  """,
979
- (me, 'deleting', fileID, 'cached'))])
1107
+ (me, "deleting", fileID, "cached"),
1108
+ )
1109
+ ]
1110
+ )
980
1111
 
981
- logger.debug('Evicting file %s', fileID)
1112
+ logger.debug("Evicting file %s", fileID)
982
1113
 
983
1114
  # Whether we actually got it or not, try deleting everything we have to delete
984
1115
  if self._executePendingDeletions() > 0:
985
1116
  # We deleted something
986
- logger.debug('Successfully executed pending deletions to free space')
1117
+ logger.debug("Successfully executed pending deletions to free space")
987
1118
  return True
988
1119
 
989
1120
  def _freeUpSpace(self):
@@ -1000,7 +1131,10 @@ class CachingFileStore(AbstractFileStore):
1000
1131
 
1001
1132
  while availableSpace < 0:
1002
1133
  # While there isn't enough space for the thing we want
1003
- logger.debug('Cache is full (%d bytes free). Trying to free up space!', availableSpace)
1134
+ logger.debug(
1135
+ "Cache is full (%d bytes free). Trying to free up space!",
1136
+ availableSpace,
1137
+ )
1004
1138
  # Free up space. See if we made any progress
1005
1139
  progress = self._tryToFreeUpSpace()
1006
1140
  availableSpace = self.getCacheAvailable()
@@ -1012,19 +1146,23 @@ class CachingFileStore(AbstractFileStore):
1012
1146
  # See if we've been oversubscribed.
1013
1147
  jobSpace = self.getSpaceUsableForJobs()
1014
1148
  if jobSpace < 0:
1015
- logger.critical('Jobs on this machine have oversubscribed our total available space (%d bytes)!', jobSpace)
1149
+ logger.critical(
1150
+ "Jobs on this machine have oversubscribed our total available space (%d bytes)!",
1151
+ jobSpace,
1152
+ )
1016
1153
  raise CacheUnbalancedError
1017
1154
  else:
1018
1155
  patience -= 1
1019
1156
  if patience <= 0:
1020
- logger.critical('Waited implausibly long for active uploads and deletes.')
1157
+ logger.critical(
1158
+ "Waited implausibly long for active uploads and deletes."
1159
+ )
1021
1160
  raise CacheUnbalancedError
1022
1161
  else:
1023
1162
  # Wait a bit and come back
1024
1163
  time.sleep(2)
1025
1164
 
1026
- logger.debug('Cache has %d bytes free.', availableSpace)
1027
-
1165
+ logger.debug("Cache has %d bytes free.", availableSpace)
1028
1166
 
1029
1167
  # Normal AbstractFileStore API
1030
1168
 
@@ -1037,15 +1175,21 @@ class CachingFileStore(AbstractFileStore):
1037
1175
  # Create a working directory for the job
1038
1176
  startingDir = os.getcwd()
1039
1177
  # Move self.localTempDir from the worker directory set up in __init__ to a per-job directory.
1040
- self.localTempDir = make_public_dir(in_directory=self.localTempDir)
1178
+ self.localTempDir = make_public_dir(self.localTempDir, suggested_name="job")
1041
1179
  # Check the status of all jobs on this node. If there are jobs that started and died before
1042
1180
  # cleaning up their presence from the database, clean them up ourselves.
1043
1181
  self._removeDeadJobs(self.coordination_dir, self.con)
1044
- # Get the requirements for the job.
1182
+ # Get the disk requirement for the job, which we will use to know if we
1183
+ # have filled the cache or not.
1045
1184
  self.jobDiskBytes = job.disk
1046
1185
 
1047
- logger.debug('Actually running job (%s) with ID (%s) which wants %d of our %d bytes.',
1048
- self.jobName, self.jobID, self.jobDiskBytes, self.getCacheLimit())
1186
+ logger.debug(
1187
+ "Actually running job (%s) with ID (%s) which wants %d of our %d bytes.",
1188
+ self.jobName,
1189
+ self.jobID,
1190
+ self.jobDiskBytes,
1191
+ self.getCacheLimit(),
1192
+ )
1049
1193
 
1050
1194
  # Register the current job as taking this much space, and evict files
1051
1195
  # from the cache to make room before letting the job run.
@@ -1055,22 +1199,6 @@ class CachingFileStore(AbstractFileStore):
1055
1199
  with super().open(job):
1056
1200
  yield
1057
1201
  finally:
1058
- # See how much disk space is used at the end of the job.
1059
- # Not a real peak disk usage, but close enough to be useful for warning the user.
1060
- # TODO: Push this logic into the abstract file store
1061
- disk: int = getDirSizeRecursively(self.localTempDir)
1062
- percent: float = 0.0
1063
- if self.jobDiskBytes and self.jobDiskBytes > 0:
1064
- percent = float(disk) / self.jobDiskBytes * 100
1065
- disk_usage: str = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
1066
- f"{bytes2human(self.jobDiskBytes)}B [{self.jobDiskBytes}B] requested).")
1067
- if disk > self.jobDiskBytes:
1068
- self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
1069
- f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
1070
- level=logging.WARNING)
1071
- else:
1072
- self.log_to_leader(disk_usage, level=logging.DEBUG)
1073
-
1074
1202
  # Go back up to the per-worker local temp directory.
1075
1203
  os.chdir(startingDir)
1076
1204
  self.cleanupInProgress = True
@@ -1095,7 +1223,9 @@ class CachingFileStore(AbstractFileStore):
1095
1223
  # Create an empty file to get an ID.
1096
1224
  # Make sure to pass along the file basename.
1097
1225
  # TODO: this empty file could leak if we die now...
1098
- fileID = self.jobStore.getEmptyFileStoreID(creatorID, cleanup, os.path.basename(localFileName))
1226
+ fileID = self.jobStore.get_empty_file_store_id(
1227
+ creatorID, cleanup, os.path.basename(localFileName)
1228
+ )
1099
1229
  # Work out who we are
1100
1230
  with self.as_process() as me:
1101
1231
 
@@ -1104,10 +1234,22 @@ class CachingFileStore(AbstractFileStore):
1104
1234
 
1105
1235
  # Create a file in uploadable state and a reference, in the same transaction.
1106
1236
  # Say the reference is an immutable reference
1107
- self._write([('INSERT INTO files VALUES (?, ?, ?, ?, ?)', (fileID, cachePath, fileSize, 'uploadable', me)),
1108
- ('INSERT INTO refs VALUES (?, ?, ?, ?)', (absLocalFileName, fileID, creatorID, 'immutable'))])
1237
+ self._write(
1238
+ [
1239
+ (
1240
+ "INSERT INTO files VALUES (?, ?, ?, ?, ?)",
1241
+ (fileID, cachePath, fileSize, "uploadable", me),
1242
+ ),
1243
+ (
1244
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1245
+ (absLocalFileName, fileID, creatorID, "immutable"),
1246
+ ),
1247
+ ]
1248
+ )
1109
1249
 
1110
- if absLocalFileName.startswith(self.localTempDir) and not os.path.islink(absLocalFileName):
1250
+ if absLocalFileName.startswith(self.localTempDir) and not os.path.islink(
1251
+ absLocalFileName
1252
+ ):
1111
1253
  # We should link into the cache, because the upload is coming from our local temp dir (and not via a symlink in there)
1112
1254
  try:
1113
1255
  # Try and hardlink the file into the cache.
@@ -1118,8 +1260,14 @@ class CachingFileStore(AbstractFileStore):
1118
1260
 
1119
1261
  linkedToCache = True
1120
1262
 
1121
- logger.debug('Hardlinked file %s into cache at %s; deferring write to job store', localFileName, cachePath)
1122
- assert not os.path.islink(cachePath), "Symlink %s has invaded cache!" % cachePath
1263
+ logger.debug(
1264
+ "Hardlinked file %s into cache at %s; deferring write to job store",
1265
+ localFileName,
1266
+ cachePath,
1267
+ )
1268
+ assert not os.path.islink(cachePath), (
1269
+ "Symlink %s has invaded cache!" % cachePath
1270
+ )
1123
1271
 
1124
1272
  # Don't do the upload now. Let it be deferred until later (when the job is committing).
1125
1273
  except OSError:
@@ -1133,7 +1281,6 @@ class CachingFileStore(AbstractFileStore):
1133
1281
  # files to vanish from our cache.
1134
1282
  linkedToCache = False
1135
1283
 
1136
-
1137
1284
  if not linkedToCache:
1138
1285
  # If we can't do the link into the cache and upload from there, we
1139
1286
  # have to just upload right away. We can't guarantee sufficient
@@ -1142,27 +1289,40 @@ class CachingFileStore(AbstractFileStore):
1142
1289
 
1143
1290
  # Change the reference to 'mutable', which it will be.
1144
1291
  # And drop the file altogether.
1145
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', absLocalFileName, fileID)),
1146
- ('DELETE FROM files WHERE id = ?', (fileID,))])
1292
+ self._write(
1293
+ [
1294
+ (
1295
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1296
+ ("mutable", absLocalFileName, fileID),
1297
+ ),
1298
+ ("DELETE FROM files WHERE id = ?", (fileID,)),
1299
+ ]
1300
+ )
1147
1301
 
1148
1302
  # Save the file to the job store right now
1149
- logger.debug('Actually executing upload immediately for file %s', fileID)
1303
+ logger.debug(
1304
+ "Actually executing upload immediately for file %s", fileID
1305
+ )
1150
1306
  self.jobStore.update_file(fileID, absLocalFileName)
1151
1307
 
1152
1308
  # Ship out the completed FileID object with its real size.
1153
1309
  return FileID.forPath(fileID, absLocalFileName)
1154
1310
 
1155
- def readGlobalFile(self, fileStoreID, userPath=None, cache=True, mutable=False, symlink=False):
1311
+ def readGlobalFile(
1312
+ self, fileStoreID, userPath=None, cache=True, mutable=False, symlink=False
1313
+ ):
1156
1314
 
1157
1315
  if str(fileStoreID) in self.filesToDelete:
1158
1316
  # File has already been deleted
1159
- raise FileNotFoundError(f'Attempted to read deleted file: {fileStoreID}')
1317
+ raise FileNotFoundError(f"Attempted to read deleted file: {fileStoreID}")
1160
1318
 
1161
1319
  if userPath is not None:
1162
1320
  # Validate the destination we got
1163
1321
  localFilePath = self._resolveAbsoluteLocalPath(userPath)
1164
1322
  if os.path.exists(localFilePath):
1165
- raise RuntimeError(' File %s ' % localFilePath + ' exists. Cannot Overwrite.')
1323
+ raise RuntimeError(
1324
+ " File %s " % localFilePath + " exists. Cannot Overwrite."
1325
+ )
1166
1326
  else:
1167
1327
  # Make our own destination
1168
1328
  localFilePath = self.getLocalTempFileName()
@@ -1174,22 +1334,29 @@ class CachingFileStore(AbstractFileStore):
1174
1334
  # We want to use the cache
1175
1335
 
1176
1336
  if mutable:
1177
- finalPath = self._readGlobalFileMutablyWithCache(fileStoreID, localFilePath, readerID)
1337
+ finalPath = self._readGlobalFileMutablyWithCache(
1338
+ fileStoreID, localFilePath, readerID
1339
+ )
1178
1340
  else:
1179
- finalPath = self._readGlobalFileWithCache(fileStoreID, localFilePath, symlink, readerID)
1341
+ finalPath = self._readGlobalFileWithCache(
1342
+ fileStoreID, localFilePath, symlink, readerID
1343
+ )
1180
1344
  else:
1181
1345
  # We do not want to use the cache
1182
- finalPath = self._readGlobalFileWithoutCache(fileStoreID, localFilePath, mutable, symlink, readerID)
1346
+ finalPath = self._readGlobalFileWithoutCache(
1347
+ fileStoreID, localFilePath, mutable, symlink, readerID
1348
+ )
1183
1349
 
1184
- if getattr(fileStoreID, 'executable', False):
1350
+ if getattr(fileStoreID, "executable", False):
1185
1351
  os.chmod(finalPath, os.stat(finalPath).st_mode | stat.S_IXUSR)
1186
1352
 
1187
1353
  # Record access in case the job crashes and we have to log it
1188
1354
  self.logAccess(fileStoreID, finalPath)
1189
1355
  return finalPath
1190
1356
 
1191
-
1192
- def _readGlobalFileWithoutCache(self, fileStoreID, localFilePath, mutable, symlink, readerID):
1357
+ def _readGlobalFileWithoutCache(
1358
+ self, fileStoreID, localFilePath, mutable, symlink, readerID
1359
+ ):
1193
1360
  """
1194
1361
  Read a file without putting it into the cache.
1195
1362
 
@@ -1207,7 +1374,9 @@ class CachingFileStore(AbstractFileStore):
1207
1374
  # read a file that is 'uploadable' or 'uploading' and hasn't hit
1208
1375
  # the backing job store yet.
1209
1376
 
1210
- with self._with_copying_reference_to_upload(fileStoreID, readerID, localFilePath) as ref_path:
1377
+ with self._with_copying_reference_to_upload(
1378
+ fileStoreID, readerID, localFilePath
1379
+ ) as ref_path:
1211
1380
  if ref_path is not None:
1212
1381
  # We got a copying reference, so the file is being uploaded and
1213
1382
  # must be read from the cache for consistency. And it will
@@ -1221,11 +1390,16 @@ class CachingFileStore(AbstractFileStore):
1221
1390
 
1222
1391
  # Find where the file is cached
1223
1392
  cachedPath = None
1224
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1393
+ for row in self._read(
1394
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1395
+ ):
1225
1396
  cachedPath = row[0]
1226
1397
 
1227
1398
  if cachedPath is None:
1228
- raise RuntimeError('File %s went away while we had a reference to it!' % fileStoreID)
1399
+ raise RuntimeError(
1400
+ "File %s went away while we had a reference to it!"
1401
+ % fileStoreID
1402
+ )
1229
1403
 
1230
1404
  if self.forceDownloadDelay is not None:
1231
1405
  # Wait around to simulate a big file for testing
@@ -1234,8 +1408,14 @@ class CachingFileStore(AbstractFileStore):
1234
1408
  atomic_copy(cachedPath, ref_path)
1235
1409
 
1236
1410
  # Change the reference to mutable so it sticks around
1237
- self._write([('UPDATE refs SET state = ? WHERE path = ? and file_id = ?',
1238
- ('mutable', ref_path, fileStoreID))])
1411
+ self._write(
1412
+ [
1413
+ (
1414
+ "UPDATE refs SET state = ? WHERE path = ? and file_id = ?",
1415
+ ("mutable", ref_path, fileStoreID),
1416
+ )
1417
+ ]
1418
+ )
1239
1419
  else:
1240
1420
  # File is not being uploaded currently.
1241
1421
 
@@ -1245,8 +1425,14 @@ class CachingFileStore(AbstractFileStore):
1245
1425
 
1246
1426
  # Create a 'mutable' reference (even if we end up with a link)
1247
1427
  # so we can see this file in deleteLocalFile.
1248
- self._write([('INSERT INTO refs VALUES (?, ?, ?, ?)',
1249
- (localFilePath, fileStoreID, readerID, 'mutable'))])
1428
+ self._write(
1429
+ [
1430
+ (
1431
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1432
+ (localFilePath, fileStoreID, readerID, "mutable"),
1433
+ )
1434
+ ]
1435
+ )
1250
1436
 
1251
1437
  if self.forceDownloadDelay is not None:
1252
1438
  # Wait around to simulate a big file for testing
@@ -1306,15 +1492,32 @@ class CachingFileStore(AbstractFileStore):
1306
1492
  # Start a loop until we can do one of these
1307
1493
  while True:
1308
1494
  # Try and create a downloading entry if no entry exists
1309
- logger.debug('Trying to make file record for id %s', fileStoreID)
1310
- self._write([('INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)',
1311
- (fileStoreID, cachedPath, self.getGlobalFileSize(fileStoreID), 'downloading', me))])
1495
+ logger.debug("Trying to make file record for id %s", fileStoreID)
1496
+ self._write(
1497
+ [
1498
+ (
1499
+ "INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)",
1500
+ (
1501
+ fileStoreID,
1502
+ cachedPath,
1503
+ self.getGlobalFileSize(fileStoreID),
1504
+ "downloading",
1505
+ me,
1506
+ ),
1507
+ )
1508
+ ]
1509
+ )
1312
1510
 
1313
1511
  # See if we won the race
1314
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?', (fileStoreID, 'downloading', me))
1512
+ self._read(
1513
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1514
+ (fileStoreID, "downloading", me),
1515
+ )
1315
1516
  if self.cur.fetchone()[0] > 0:
1316
1517
  # We are responsible for downloading the file
1317
- logger.debug('We are now responsible for downloading file %s', fileStoreID)
1518
+ logger.debug(
1519
+ "We are now responsible for downloading file %s", fileStoreID
1520
+ )
1318
1521
 
1319
1522
  # Make sure we have space for this download.
1320
1523
  self._freeUpSpace()
@@ -1329,37 +1532,65 @@ class CachingFileStore(AbstractFileStore):
1329
1532
  # two readers, one cached copy, and space for two copies total.
1330
1533
 
1331
1534
  # Make the copying reference
1332
- self._write([('INSERT INTO refs VALUES (?, ?, ?, ?)',
1333
- (localFilePath, fileStoreID, readerID, 'copying'))])
1535
+ self._write(
1536
+ [
1537
+ (
1538
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1539
+ (localFilePath, fileStoreID, readerID, "copying"),
1540
+ )
1541
+ ]
1542
+ )
1334
1543
 
1335
1544
  # Fulfill it with a full copy or by giving away the cached copy
1336
- self._fulfillCopyingReference(fileStoreID, cachedPath, localFilePath)
1545
+ self._fulfillCopyingReference(
1546
+ fileStoreID, cachedPath, localFilePath
1547
+ )
1337
1548
 
1338
1549
  # Now we're done
1339
1550
  return localFilePath
1340
1551
 
1341
1552
  else:
1342
- logger.debug('Someone else is already responsible for file %s', fileStoreID)
1553
+ logger.debug(
1554
+ "Someone else is already responsible for file %s", fileStoreID
1555
+ )
1343
1556
 
1344
1557
  # A record already existed for this file.
1345
1558
  # Try and create an immutable or copying reference to an entry that
1346
1559
  # is in 'cached' or 'uploadable' or 'uploading' state.
1347
1560
  # It might be uploading because *we* are supposed to be uploading it.
1348
- logger.debug('Trying to make reference to file %s', fileStoreID)
1349
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
1350
- (localFilePath, readerID, 'copying', fileStoreID, 'cached', 'uploadable', 'uploading'))])
1561
+ logger.debug("Trying to make reference to file %s", fileStoreID)
1562
+ self._write(
1563
+ [
1564
+ (
1565
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
1566
+ (
1567
+ localFilePath,
1568
+ readerID,
1569
+ "copying",
1570
+ fileStoreID,
1571
+ "cached",
1572
+ "uploadable",
1573
+ "uploading",
1574
+ ),
1575
+ )
1576
+ ]
1577
+ )
1351
1578
 
1352
1579
  # See if we got it
1353
- self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (localFilePath, fileStoreID))
1580
+ self._read(
1581
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
1582
+ (localFilePath, fileStoreID),
1583
+ )
1354
1584
  if self.cur.fetchone()[0] > 0:
1355
1585
  # The file is cached and we can copy or link it
1356
- logger.debug('Obtained reference to file %s', fileStoreID)
1586
+ logger.debug("Obtained reference to file %s", fileStoreID)
1357
1587
 
1358
1588
  # Get the path it is actually at in the cache, instead of where we wanted to put it
1359
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1589
+ for row in self._read(
1590
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1591
+ ):
1360
1592
  cachedPath = row[0]
1361
1593
 
1362
-
1363
1594
  while self.getCacheAvailable() < 0:
1364
1595
  # Since we now have a copying reference, see if we have used too much space.
1365
1596
  # If so, try to free up some space by deleting or uploading, but
@@ -1372,15 +1603,23 @@ class CachingFileStore(AbstractFileStore):
1372
1603
 
1373
1604
  # See if we have no other references and we can give away the file.
1374
1605
  # Change it to downloading owned by us if we can grab it.
1375
- self._write([("""
1606
+ self._write(
1607
+ [
1608
+ (
1609
+ """
1376
1610
  UPDATE files SET files.owner = ?, files.state = ? WHERE files.id = ? AND files.state = ?
1377
1611
  AND files.owner IS NULL AND NOT EXISTS (
1378
1612
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
1379
1613
  )
1380
1614
  """,
1381
- (me, 'downloading', fileStoreID, 'cached'))])
1382
-
1383
- if self._giveAwayDownloadingFile(fileStoreID, cachedPath, localFilePath):
1615
+ (me, "downloading", fileStoreID, "cached"),
1616
+ )
1617
+ ]
1618
+ )
1619
+
1620
+ if self._giveAwayDownloadingFile(
1621
+ fileStoreID, cachedPath, localFilePath
1622
+ ):
1384
1623
  # We got ownership of the file and managed to give it away.
1385
1624
  return localFilePath
1386
1625
 
@@ -1401,14 +1640,23 @@ class CachingFileStore(AbstractFileStore):
1401
1640
  atomic_copy(cachedPath, localFilePath)
1402
1641
 
1403
1642
  # Change the reference to mutable
1404
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID))])
1643
+ self._write(
1644
+ [
1645
+ (
1646
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1647
+ ("mutable", localFilePath, fileStoreID),
1648
+ )
1649
+ ]
1650
+ )
1405
1651
 
1406
1652
  # Now we're done
1407
1653
  return localFilePath
1408
1654
 
1409
1655
  else:
1410
1656
  # We didn't get a reference. Maybe it is still downloading.
1411
- logger.debug('Could not obtain reference to file %s', fileStoreID)
1657
+ logger.debug(
1658
+ "Could not obtain reference to file %s", fileStoreID
1659
+ )
1412
1660
 
1413
1661
  # Loop around again and see if either we can download it or we can get a reference to it.
1414
1662
 
@@ -1448,8 +1696,14 @@ class CachingFileStore(AbstractFileStore):
1448
1696
  # Expose this file as cached so other people can copy off of it too.
1449
1697
 
1450
1698
  # Change state from downloading to cached
1451
- self._write([('UPDATE files SET state = ?, owner = NULL WHERE id = ?',
1452
- ('cached', fileStoreID))])
1699
+ self._write(
1700
+ [
1701
+ (
1702
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
1703
+ ("cached", fileStoreID),
1704
+ )
1705
+ ]
1706
+ )
1453
1707
 
1454
1708
  if self.forceDownloadDelay is not None:
1455
1709
  # Wait around to simulate a big file for testing
@@ -1459,12 +1713,18 @@ class CachingFileStore(AbstractFileStore):
1459
1713
  atomic_copy(cachedPath, localFilePath)
1460
1714
 
1461
1715
  # Change our reference to mutable
1462
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID))])
1716
+ self._write(
1717
+ [
1718
+ (
1719
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1720
+ ("mutable", localFilePath, fileStoreID),
1721
+ )
1722
+ ]
1723
+ )
1463
1724
 
1464
1725
  # Now we're done
1465
1726
  return
1466
1727
 
1467
-
1468
1728
  def _giveAwayDownloadingFile(self, fileStoreID, cachedPath, localFilePath):
1469
1729
  """
1470
1730
  Move a downloaded file in 'downloading' state, owned by us, from the cache to a user-specified destination path.
@@ -1484,8 +1744,10 @@ class CachingFileStore(AbstractFileStore):
1484
1744
  with self.as_process() as me:
1485
1745
 
1486
1746
  # See if we actually own this file and can giove it away
1487
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?',
1488
- (fileStoreID, 'downloading', me))
1747
+ self._read(
1748
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1749
+ (fileStoreID, "downloading", me),
1750
+ )
1489
1751
  if self.cur.fetchone()[0] > 0:
1490
1752
  # Now we have exclusive control of the cached copy of the file, so we can give it away.
1491
1753
 
@@ -1494,8 +1756,15 @@ class CachingFileStore(AbstractFileStore):
1494
1756
  # We are giving it away
1495
1757
  shutil.move(cachedPath, localFilePath)
1496
1758
  # Record that.
1497
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID)),
1498
- ('DELETE FROM files WHERE id = ?', (fileStoreID,))])
1759
+ self._write(
1760
+ [
1761
+ (
1762
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1763
+ ("mutable", localFilePath, fileStoreID),
1764
+ ),
1765
+ ("DELETE FROM files WHERE id = ?", (fileStoreID,)),
1766
+ ]
1767
+ )
1499
1768
 
1500
1769
  # Now we're done
1501
1770
  return True
@@ -1520,7 +1789,9 @@ class CachingFileStore(AbstractFileStore):
1520
1789
  :rtype: bool
1521
1790
  """
1522
1791
 
1523
- assert os.path.exists(cachedPath), "Cannot create link to missing cache file %s" % cachedPath
1792
+ assert os.path.exists(cachedPath), (
1793
+ "Cannot create link to missing cache file %s" % cachedPath
1794
+ )
1524
1795
 
1525
1796
  try:
1526
1797
  # Try and make the hard link.
@@ -1562,17 +1833,46 @@ class CachingFileStore(AbstractFileStore):
1562
1833
  # Try and create a downloading entry if no entry exists.
1563
1834
  # Make sure to create a reference at the same time if it succeeds, to bill it against our job's space.
1564
1835
  # Don't create the mutable reference yet because we might not necessarily be able to clear that space.
1565
- logger.debug('Trying to make file downloading file record and reference for id %s', fileStoreID)
1566
- self._write([('INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)',
1567
- (fileStoreID, cachedPath, self.getGlobalFileSize(fileStoreID), 'downloading', me)),
1568
- ('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND state = ? AND owner = ?',
1569
- (localFilePath, readerID, 'immutable', fileStoreID, 'downloading', me))])
1836
+ logger.debug(
1837
+ "Trying to make file downloading file record and reference for id %s",
1838
+ fileStoreID,
1839
+ )
1840
+ self._write(
1841
+ [
1842
+ (
1843
+ "INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)",
1844
+ (
1845
+ fileStoreID,
1846
+ cachedPath,
1847
+ self.getGlobalFileSize(fileStoreID),
1848
+ "downloading",
1849
+ me,
1850
+ ),
1851
+ ),
1852
+ (
1853
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND state = ? AND owner = ?",
1854
+ (
1855
+ localFilePath,
1856
+ readerID,
1857
+ "immutable",
1858
+ fileStoreID,
1859
+ "downloading",
1860
+ me,
1861
+ ),
1862
+ ),
1863
+ ]
1864
+ )
1570
1865
 
1571
1866
  # See if we won the race
1572
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?', (fileStoreID, 'downloading', me))
1867
+ self._read(
1868
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1869
+ (fileStoreID, "downloading", me),
1870
+ )
1573
1871
  if self.cur.fetchone()[0] > 0:
1574
1872
  # We are responsible for downloading the file (and we have the reference)
1575
- logger.debug('We are now responsible for downloading file %s', fileStoreID)
1873
+ logger.debug(
1874
+ "We are now responsible for downloading file %s", fileStoreID
1875
+ )
1576
1876
 
1577
1877
  # Make sure we have space for this download.
1578
1878
  self._freeUpSpace()
@@ -1586,8 +1886,14 @@ class CachingFileStore(AbstractFileStore):
1586
1886
  # We made the link!
1587
1887
 
1588
1888
  # Change file state from downloading to cached so other people can use it
1589
- self._write([('UPDATE files SET state = ?, owner = NULL WHERE id = ?',
1590
- ('cached', fileStoreID))])
1889
+ self._write(
1890
+ [
1891
+ (
1892
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
1893
+ ("cached", fileStoreID),
1894
+ )
1895
+ ]
1896
+ )
1591
1897
 
1592
1898
  # Now we're done!
1593
1899
  return localFilePath
@@ -1595,36 +1901,69 @@ class CachingFileStore(AbstractFileStore):
1595
1901
  # We could not make a link. We need to make a copy.
1596
1902
 
1597
1903
  # Change the reference to copying.
1598
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('copying', localFilePath, fileStoreID))])
1904
+ self._write(
1905
+ [
1906
+ (
1907
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1908
+ ("copying", localFilePath, fileStoreID),
1909
+ )
1910
+ ]
1911
+ )
1599
1912
 
1600
1913
  # Fulfill it with a full copy or by giving away the cached copy
1601
- self._fulfillCopyingReference(fileStoreID, cachedPath, localFilePath)
1914
+ self._fulfillCopyingReference(
1915
+ fileStoreID, cachedPath, localFilePath
1916
+ )
1602
1917
 
1603
1918
  # Now we're done
1604
1919
  return localFilePath
1605
1920
 
1606
1921
  else:
1607
- logger.debug('We already have an entry in the cache database for file %s', fileStoreID)
1922
+ logger.debug(
1923
+ "We already have an entry in the cache database for file %s",
1924
+ fileStoreID,
1925
+ )
1608
1926
 
1609
1927
  # A record already existed for this file.
1610
1928
  # Try and create an immutable reference to an entry that
1611
1929
  # is in 'cached' or 'uploadable' or 'uploading' state.
1612
1930
  # It might be uploading because *we* are supposed to be uploading it.
1613
- logger.debug('Trying to make reference to file %s', fileStoreID)
1614
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
1615
- (localFilePath, readerID, 'immutable', fileStoreID, 'cached', 'uploadable', 'uploading'))])
1931
+ logger.debug("Trying to make reference to file %s", fileStoreID)
1932
+ self._write(
1933
+ [
1934
+ (
1935
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
1936
+ (
1937
+ localFilePath,
1938
+ readerID,
1939
+ "immutable",
1940
+ fileStoreID,
1941
+ "cached",
1942
+ "uploadable",
1943
+ "uploading",
1944
+ ),
1945
+ )
1946
+ ]
1947
+ )
1616
1948
 
1617
1949
  # See if we got it
1618
- self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (localFilePath, fileStoreID))
1950
+ self._read(
1951
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
1952
+ (localFilePath, fileStoreID),
1953
+ )
1619
1954
  if self.cur.fetchone()[0] > 0:
1620
1955
  # The file is cached and we can copy or link it
1621
- logger.debug('Obtained reference to file %s', fileStoreID)
1956
+ logger.debug("Obtained reference to file %s", fileStoreID)
1622
1957
 
1623
1958
  # Get the path it is actually at in the cache, instead of where we wanted to put it
1624
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1959
+ for row in self._read(
1960
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1961
+ ):
1625
1962
  cachedPath = row[0]
1626
1963
 
1627
- if self._createLinkFromCache(cachedPath, localFilePath, symlink):
1964
+ if self._createLinkFromCache(
1965
+ cachedPath, localFilePath, symlink
1966
+ ):
1628
1967
  # We managed to make the link
1629
1968
  return localFilePath
1630
1969
  else:
@@ -1636,11 +1975,22 @@ class CachingFileStore(AbstractFileStore):
1636
1975
  # we already have code for that for mutable downloads,
1637
1976
  # so just clear the reference and download mutably.
1638
1977
 
1639
- self._write([('DELETE FROM refs WHERE path = ? AND file_id = ?', (localFilePath, fileStoreID))])
1640
-
1641
- return self._readGlobalFileMutablyWithCache(fileStoreID, localFilePath, readerID)
1978
+ self._write(
1979
+ [
1980
+ (
1981
+ "DELETE FROM refs WHERE path = ? AND file_id = ?",
1982
+ (localFilePath, fileStoreID),
1983
+ )
1984
+ ]
1985
+ )
1986
+
1987
+ return self._readGlobalFileMutablyWithCache(
1988
+ fileStoreID, localFilePath, readerID
1989
+ )
1642
1990
  else:
1643
- logger.debug('Could not obtain reference to file %s', fileStoreID)
1991
+ logger.debug(
1992
+ "Could not obtain reference to file %s", fileStoreID
1993
+ )
1644
1994
 
1645
1995
  # If we didn't get a download or a reference, adopt and do work from dead workers and loop again.
1646
1996
  # We may have to wait for someone else's download or delete to
@@ -1656,7 +2006,12 @@ class CachingFileStore(AbstractFileStore):
1656
2006
  time.sleep(self.contentionBackoff)
1657
2007
 
1658
2008
  @contextmanager
1659
- def _with_copying_reference_to_upload(self, file_store_id: FileID, reader_id: str, local_file_path: Optional[str] = None) -> Generator:
2009
+ def _with_copying_reference_to_upload(
2010
+ self,
2011
+ file_store_id: FileID,
2012
+ reader_id: str,
2013
+ local_file_path: Optional[str] = None,
2014
+ ) -> Generator:
1660
2015
  """
1661
2016
  Get a context manager that gives you either the local file path for a
1662
2017
  copyuing reference to the given file, or None if that file is not in an
@@ -1678,12 +2033,28 @@ class CachingFileStore(AbstractFileStore):
1678
2033
  local_file_path = self.getLocalTempFileName()
1679
2034
 
1680
2035
  # Try and make a 'copying' reference to such a file
1681
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ?)',
1682
- (local_file_path, reader_id, 'copying', file_store_id, 'uploadable', 'uploading'))])
2036
+ self._write(
2037
+ [
2038
+ (
2039
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ?)",
2040
+ (
2041
+ local_file_path,
2042
+ reader_id,
2043
+ "copying",
2044
+ file_store_id,
2045
+ "uploadable",
2046
+ "uploading",
2047
+ ),
2048
+ )
2049
+ ]
2050
+ )
1683
2051
 
1684
2052
  # See if we got it
1685
2053
  have_reference = False
1686
- for row in self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (local_file_path, file_store_id)):
2054
+ for row in self._read(
2055
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
2056
+ (local_file_path, file_store_id),
2057
+ ):
1687
2058
  have_reference = row[0] > 0
1688
2059
 
1689
2060
  if have_reference:
@@ -1692,8 +2063,14 @@ class CachingFileStore(AbstractFileStore):
1692
2063
  yield local_file_path
1693
2064
  finally:
1694
2065
  # Clean up the reference if it is unmodified
1695
- self._write([('DELETE FROM refs WHERE path = ? AND file_id = ? AND state = ?',
1696
- (local_file_path, file_store_id, 'copying'))])
2066
+ self._write(
2067
+ [
2068
+ (
2069
+ "DELETE FROM refs WHERE path = ? AND file_id = ? AND state = ?",
2070
+ (local_file_path, file_store_id, "copying"),
2071
+ )
2072
+ ]
2073
+ )
1697
2074
  else:
1698
2075
  # No reference was obtained.
1699
2076
  yield None
@@ -1702,11 +2079,13 @@ class CachingFileStore(AbstractFileStore):
1702
2079
  def readGlobalFileStream(self, fileStoreID, encoding=None, errors=None):
1703
2080
  if str(fileStoreID) in self.filesToDelete:
1704
2081
  # File has already been deleted
1705
- raise FileNotFoundError(f'Attempted to read deleted file: {fileStoreID}')
2082
+ raise FileNotFoundError(f"Attempted to read deleted file: {fileStoreID}")
1706
2083
 
1707
2084
  self.logAccess(fileStoreID)
1708
2085
 
1709
- with self._with_copying_reference_to_upload(fileStoreID, self.jobDesc.jobStoreID) as ref_path:
2086
+ with self._with_copying_reference_to_upload(
2087
+ fileStoreID, self.jobDesc.jobStoreID
2088
+ ) as ref_path:
1710
2089
  # Try and grab a reference to the file if it is being uploaded.
1711
2090
  if ref_path is not None:
1712
2091
  # We have an update in the cache that isn't written back yet.
@@ -1715,11 +2094,16 @@ class CachingFileStore(AbstractFileStore):
1715
2094
  # The ref file is not actually copied to; find the actual file
1716
2095
  # in the cache
1717
2096
  cached_path = None
1718
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
2097
+ for row in self._read(
2098
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
2099
+ ):
1719
2100
  cached_path = row[0]
1720
2101
 
1721
2102
  if cached_path is None:
1722
- raise RuntimeError('File %s went away while we had a reference to it!' % fileStoreID)
2103
+ raise RuntimeError(
2104
+ "File %s went away while we had a reference to it!"
2105
+ % fileStoreID
2106
+ )
1723
2107
 
1724
2108
  with open(cached_path, encoding=encoding, errors=errors) as result:
1725
2109
  # Pass along the results of the open context manager on the
@@ -1730,7 +2114,9 @@ class CachingFileStore(AbstractFileStore):
1730
2114
  else:
1731
2115
  # No local update, so we can stream from the job store
1732
2116
  # TODO: Maybe stream from cache even when not required for consistency?
1733
- with self.jobStore.read_file_stream(fileStoreID, encoding=encoding, errors=errors) as result:
2117
+ with self.jobStore.read_file_stream(
2118
+ fileStoreID, encoding=encoding, errors=errors
2119
+ ) as result:
1734
2120
  yield result
1735
2121
 
1736
2122
  def deleteLocalFile(self, fileStoreID):
@@ -1743,7 +2129,10 @@ class CachingFileStore(AbstractFileStore):
1743
2129
  # missing ref file, we will raise an error about it and stop deleting
1744
2130
  # things.
1745
2131
  missingFile = None
1746
- for row in self._read('SELECT path FROM refs WHERE file_id = ? AND job_id = ?', (fileStoreID, jobID)):
2132
+ for row in self._read(
2133
+ "SELECT path FROM refs WHERE file_id = ? AND job_id = ?",
2134
+ (fileStoreID, jobID),
2135
+ ):
1747
2136
  # Delete all the files that are references to this cached file (even mutable copies)
1748
2137
  path = row[0]
1749
2138
 
@@ -1764,12 +2153,22 @@ class CachingFileStore(AbstractFileStore):
1764
2153
  if len(deleted) == 0 and not missingFile:
1765
2154
  # We have to tell the user if they tried to delete 0 local copies.
1766
2155
  # But if we found a missing local copy, go on to report that instead.
1767
- raise OSError(errno.ENOENT, f"Attempting to delete local copies of a file with none: {fileStoreID}")
2156
+ raise OSError(
2157
+ errno.ENOENT,
2158
+ f"Attempting to delete local copies of a file with none: {fileStoreID}",
2159
+ )
1768
2160
 
1769
2161
  for path in deleted:
1770
2162
  # Drop the references
1771
- self._write([('DELETE FROM refs WHERE file_id = ? AND job_id = ? AND path = ?', (fileStoreID, jobID, path))])
1772
- logger.debug('Deleted local file %s for global file %s', path, fileStoreID)
2163
+ self._write(
2164
+ [
2165
+ (
2166
+ "DELETE FROM refs WHERE file_id = ? AND job_id = ? AND path = ?",
2167
+ (fileStoreID, jobID, path),
2168
+ )
2169
+ ]
2170
+ )
2171
+ logger.debug("Deleted local file %s for global file %s", path, fileStoreID)
1773
2172
 
1774
2173
  # Now space has been revoked from the cache because that job needs its space back.
1775
2174
  # That might result in stuff having to be evicted.
@@ -1797,13 +2196,25 @@ class CachingFileStore(AbstractFileStore):
1797
2196
  with self.as_process() as me:
1798
2197
 
1799
2198
  # Make sure nobody else has references to it
1800
- for row in self._read('SELECT job_id FROM refs WHERE file_id = ? AND state != ?', (fileStoreID, 'mutable')):
1801
- raise RuntimeError(f'Deleted file ID {fileStoreID} which is still in use by job {row[0]}')
2199
+ for row in self._read(
2200
+ "SELECT job_id FROM refs WHERE file_id = ? AND state != ?",
2201
+ (fileStoreID, "mutable"),
2202
+ ):
2203
+ raise RuntimeError(
2204
+ f"Deleted file ID {fileStoreID} which is still in use by job {row[0]}"
2205
+ )
1802
2206
  # TODO: should we just let other jobs and the cache keep the file until
1803
2207
  # it gets evicted, and only delete at the back end?
1804
2208
 
1805
2209
  # Pop the file into deleting state owned by us if it exists
1806
- self._write([('UPDATE files SET state = ?, owner = ? WHERE id = ?', ('deleting', me, fileStoreID))])
2210
+ self._write(
2211
+ [
2212
+ (
2213
+ "UPDATE files SET state = ?, owner = ? WHERE id = ?",
2214
+ ("deleting", me, fileStoreID),
2215
+ )
2216
+ ]
2217
+ )
1807
2218
 
1808
2219
  # Finish the delete if the file is present
1809
2220
  self._executePendingDeletions()
@@ -1811,10 +2222,13 @@ class CachingFileStore(AbstractFileStore):
1811
2222
  # Add the file to the list of files to be deleted from the job store
1812
2223
  # once the run method completes.
1813
2224
  self.filesToDelete.add(str(fileStoreID))
1814
- self.log_to_leader('Added file with ID \'%s\' to the list of files to be' % fileStoreID +
1815
- ' globally deleted.', level=logging.DEBUG)
2225
+ self.log_to_leader(
2226
+ "Added file with ID '%s' to the list of files to be" % fileStoreID
2227
+ + " globally deleted.",
2228
+ level=logging.DEBUG,
2229
+ )
1816
2230
 
1817
- @deprecated(new_function_name='export_file')
2231
+ @deprecated(new_function_name="export_file")
1818
2232
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
1819
2233
  return self.export_file(jobStoreFileID, dstUrl)
1820
2234
 
@@ -1845,7 +2259,10 @@ class CachingFileStore(AbstractFileStore):
1845
2259
  # thread. It can do some destructor work after it finishes its real
1846
2260
  # work.
1847
2261
 
1848
- if self.commitThread is not None and self.commitThread is not threading.current_thread():
2262
+ if (
2263
+ self.commitThread is not None
2264
+ and self.commitThread is not threading.current_thread()
2265
+ ):
1849
2266
  self.commitThread.join()
1850
2267
 
1851
2268
  return True
@@ -1872,17 +2289,23 @@ class CachingFileStore(AbstractFileStore):
1872
2289
  # might be necessary for later jobs to see earlier jobs' deleted
1873
2290
  # before they are committed?
1874
2291
 
1875
- logger.debug('Starting commit of %s forked from %s', state_to_commit, self.jobDesc)
2292
+ logger.debug(
2293
+ "Starting commit of %s forked from %s", state_to_commit, self.jobDesc
2294
+ )
1876
2295
  # Make sure the deep copy isn't summoning ghosts of old job
1877
2296
  # versions. It must be as new or newer at this point.
1878
- self.jobDesc.check_new_version(state_to_commit)
2297
+ self.jobDesc.assert_is_not_newer_than(state_to_commit)
1879
2298
 
1880
2299
  # Bump the original's version since saving will do that too and we
1881
2300
  # don't want duplicate versions.
1882
- self.jobDesc.reserve_versions(1 if len(state_to_commit.filesToDelete) == 0 else 2)
2301
+ self.jobDesc.reserve_versions(
2302
+ 1 if len(state_to_commit.filesToDelete) == 0 else 2
2303
+ )
1883
2304
 
1884
2305
  # Start the commit thread
1885
- self.commitThread = threading.Thread(target=self.startCommitThread, args=(state_to_commit,))
2306
+ self.commitThread = threading.Thread(
2307
+ target=self.startCommitThread, args=(state_to_commit,)
2308
+ )
1886
2309
  self.commitThread.start()
1887
2310
 
1888
2311
  def startCommitThread(self, state_to_commit: Optional[JobDescription]):
@@ -1895,7 +2318,7 @@ class CachingFileStore(AbstractFileStore):
1895
2318
  self.waitForPreviousCommit()
1896
2319
 
1897
2320
  try:
1898
- logger.debug('Committing file uploads asynchronously')
2321
+ logger.debug("Committing file uploads asynchronously")
1899
2322
 
1900
2323
  # Finish all uploads
1901
2324
  self._executePendingUploads()
@@ -1905,7 +2328,10 @@ class CachingFileStore(AbstractFileStore):
1905
2328
  if state_to_commit is not None:
1906
2329
  # Do all the things that make this job not redoable
1907
2330
 
1908
- logger.debug('Committing file deletes and job state changes asynchronously from %s', state_to_commit)
2331
+ logger.debug(
2332
+ "Committing file deletes and job state changes asynchronously from %s",
2333
+ state_to_commit,
2334
+ )
1909
2335
 
1910
2336
  # Complete the job
1911
2337
  self.jobStore.update_job(state_to_commit)
@@ -1921,10 +2347,8 @@ class CachingFileStore(AbstractFileStore):
1921
2347
  self._terminateEvent.set()
1922
2348
  raise
1923
2349
 
1924
-
1925
-
1926
2350
  @classmethod
1927
- def shutdown(cls, shutdown_info: Tuple[str, str]) -> None:
2351
+ def shutdown(cls, shutdown_info: tuple[str, str]) -> None:
1928
2352
  """
1929
2353
  :param shutdown_info: Tuple of the coordination directory (where the
1930
2354
  cache database is) and the cache directory (where the cached data is).
@@ -1951,7 +2375,7 @@ class CachingFileStore(AbstractFileStore):
1951
2375
  # So we just go and find the cache-n.db with the largest n value,
1952
2376
  # and use that.
1953
2377
  dbFilename = None
1954
- dbAttempt = float('-inf')
2378
+ dbAttempt = float("-inf")
1955
2379
 
1956
2380
  # We also need to remember all the plausible database files and
1957
2381
  # journals
@@ -1959,12 +2383,15 @@ class CachingFileStore(AbstractFileStore):
1959
2383
 
1960
2384
  for dbCandidate in os.listdir(coordination_dir):
1961
2385
  # For each thing in the coordination directory, see if it starts like a database file.
1962
- match = re.match('^cache-([0-9]+).db.*', dbCandidate)
2386
+ match = re.match("^cache-([0-9]+).db.*", dbCandidate)
1963
2387
  if match:
1964
2388
  # This is caching-related.
1965
2389
  all_db_files.append(dbCandidate)
1966
2390
  attempt_number = int(match.group(1))
1967
- if attempt_number > dbAttempt and dbCandidate == f"cache-{attempt_number}.db":
2391
+ if (
2392
+ attempt_number > dbAttempt
2393
+ and dbCandidate == f"cache-{attempt_number}.db"
2394
+ ):
1968
2395
  # This is a main database, and the newest we have seen.
1969
2396
  dbFilename = dbCandidate
1970
2397
  dbAttempt = attempt_number
@@ -1972,7 +2399,9 @@ class CachingFileStore(AbstractFileStore):
1972
2399
  if dbFilename is not None:
1973
2400
  # We found a caching database
1974
2401
 
1975
- logger.debug('Connecting to latest caching database %s for cleanup', dbFilename)
2402
+ logger.debug(
2403
+ "Connecting to latest caching database %s for cleanup", dbFilename
2404
+ )
1976
2405
 
1977
2406
  dbPath = os.path.join(coordination_dir, dbFilename)
1978
2407
 
@@ -1996,7 +2425,7 @@ class CachingFileStore(AbstractFileStore):
1996
2425
 
1997
2426
  con.close()
1998
2427
  else:
1999
- logger.debug('No caching database found in %s', dir_)
2428
+ logger.debug("No caching database found in %s", dir_)
2000
2429
 
2001
2430
  # Whether or not we found a database, we need to clean up the cache
2002
2431
  # directory. Delete everything cached.
@@ -2033,7 +2462,9 @@ class CachingFileStore(AbstractFileStore):
2033
2462
 
2034
2463
  # Get all the dead worker PIDs
2035
2464
  workers = []
2036
- for row in cls._static_read(cur, 'SELECT DISTINCT worker FROM jobs WHERE worker IS NOT NULL'):
2465
+ for row in cls._static_read(
2466
+ cur, "SELECT DISTINCT worker FROM jobs WHERE worker IS NOT NULL"
2467
+ ):
2037
2468
  workers.append(row[0])
2038
2469
 
2039
2470
  # Work out which of them are not currently running.
@@ -2046,14 +2477,18 @@ class CachingFileStore(AbstractFileStore):
2046
2477
  # Now we know which workers are dead.
2047
2478
  # Clear them off of the jobs they had.
2048
2479
  for deadWorker in deadWorkers:
2049
- cls._static_write(con, cur, [('UPDATE jobs SET worker = NULL WHERE worker = ?', (deadWorker,))])
2480
+ cls._static_write(
2481
+ con,
2482
+ cur,
2483
+ [("UPDATE jobs SET worker = NULL WHERE worker = ?", (deadWorker,))],
2484
+ )
2050
2485
  if len(deadWorkers) > 0:
2051
- logger.debug('Reaped %d dead workers', len(deadWorkers))
2486
+ logger.debug("Reaped %d dead workers", len(deadWorkers))
2052
2487
 
2053
2488
  while True:
2054
2489
  # Find an unowned job.
2055
2490
  # Don't take all of them; other people could come along and want to help us with the other jobs.
2056
- cls._static_read(cur, 'SELECT id FROM jobs WHERE worker IS NULL LIMIT 1')
2491
+ cls._static_read(cur, "SELECT id FROM jobs WHERE worker IS NULL LIMIT 1")
2057
2492
  row = cur.fetchone()
2058
2493
  if row is None:
2059
2494
  # We cleaned up all the jobs
@@ -2062,10 +2497,23 @@ class CachingFileStore(AbstractFileStore):
2062
2497
  jobID = row[0]
2063
2498
 
2064
2499
  # Try to own this job
2065
- cls._static_write(con, cur, [('UPDATE jobs SET worker = ? WHERE id = ? AND worker IS NULL', (me, jobID))])
2500
+ cls._static_write(
2501
+ con,
2502
+ cur,
2503
+ [
2504
+ (
2505
+ "UPDATE jobs SET worker = ? WHERE id = ? AND worker IS NULL",
2506
+ (me, jobID),
2507
+ )
2508
+ ],
2509
+ )
2066
2510
 
2067
2511
  # See if we won the race
2068
- cls._static_read(cur, 'SELECT id, tempdir FROM jobs WHERE id = ? AND worker = ?', (jobID, me))
2512
+ cls._static_read(
2513
+ cur,
2514
+ "SELECT id, tempdir FROM jobs WHERE id = ? AND worker = ?",
2515
+ (jobID, me),
2516
+ )
2069
2517
  row = cur.fetchone()
2070
2518
  if row is None:
2071
2519
  # We didn't win the race. Try another one.
@@ -2074,6 +2522,6 @@ class CachingFileStore(AbstractFileStore):
2074
2522
  # If we did win, delete the job and its files and temp dir
2075
2523
  cls._removeJob(con, cur, jobID)
2076
2524
 
2077
- logger.debug('Cleaned up orphaned job %s', jobID)
2525
+ logger.debug("Cleaned up orphaned job %s", jobID)
2078
2526
 
2079
2527
  # Now we have cleaned up all the jobs that belonged to dead workers that were dead when we entered this function.