toil 7.0.0__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (190) hide show
  1. toil/__init__.py +121 -83
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +137 -77
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +211 -101
  5. toil/batchSystems/awsBatch.py +237 -128
  6. toil/batchSystems/cleanup_support.py +22 -16
  7. toil/batchSystems/contained_executor.py +30 -26
  8. toil/batchSystems/gridengine.py +85 -49
  9. toil/batchSystems/htcondor.py +164 -87
  10. toil/batchSystems/kubernetes.py +622 -386
  11. toil/batchSystems/local_support.py +17 -12
  12. toil/batchSystems/lsf.py +132 -79
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +288 -149
  16. toil/batchSystems/mesos/executor.py +77 -49
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +38 -29
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +293 -123
  21. toil/batchSystems/slurm.py +489 -137
  22. toil/batchSystems/torque.py +46 -32
  23. toil/bus.py +141 -73
  24. toil/common.py +630 -359
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1114 -532
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +62 -41
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +88 -57
  32. toil/fileStores/cachingFileStore.py +711 -247
  33. toil/fileStores/nonCachingFileStore.py +113 -75
  34. toil/job.py +988 -315
  35. toil/jobStores/abstractJobStore.py +387 -243
  36. toil/jobStores/aws/jobStore.py +727 -403
  37. toil/jobStores/aws/utils.py +161 -109
  38. toil/jobStores/conftest.py +1 -0
  39. toil/jobStores/fileJobStore.py +289 -151
  40. toil/jobStores/googleJobStore.py +137 -70
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +614 -269
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +55 -28
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +193 -58
  49. toil/lib/aws/utils.py +238 -218
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +83 -49
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +322 -209
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +4 -2
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +99 -11
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +65 -18
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +19 -7
  71. toil/lib/retry.py +115 -77
  72. toil/lib/threading.py +282 -80
  73. toil/lib/throttle.py +15 -14
  74. toil/options/common.py +834 -401
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +70 -19
  78. toil/provisioners/__init__.py +111 -46
  79. toil/provisioners/abstractProvisioner.py +322 -157
  80. toil/provisioners/aws/__init__.py +62 -30
  81. toil/provisioners/aws/awsProvisioner.py +980 -627
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +147 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +127 -61
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +148 -64
  98. toil/test/__init__.py +263 -179
  99. toil/test/batchSystems/batchSystemTest.py +438 -195
  100. toil/test/batchSystems/batch_system_plugin_test.py +18 -7
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +93 -47
  104. toil/test/cactus/test_cactus_integration.py +20 -22
  105. toil/test/cwl/cwlTest.py +271 -71
  106. toil/test/cwl/measure_default_memory.cwl +12 -0
  107. toil/test/cwl/not_run_required_input.cwl +29 -0
  108. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  109. toil/test/docs/scriptsTest.py +60 -34
  110. toil/test/jobStores/jobStoreTest.py +412 -235
  111. toil/test/lib/aws/test_iam.py +116 -48
  112. toil/test/lib/aws/test_s3.py +16 -9
  113. toil/test/lib/aws/test_utils.py +5 -6
  114. toil/test/lib/dockerTest.py +118 -141
  115. toil/test/lib/test_conversions.py +113 -115
  116. toil/test/lib/test_ec2.py +57 -49
  117. toil/test/lib/test_integration.py +104 -0
  118. toil/test/lib/test_misc.py +12 -5
  119. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  120. toil/test/mesos/helloWorld.py +7 -6
  121. toil/test/mesos/stress.py +25 -20
  122. toil/test/options/options.py +7 -2
  123. toil/test/provisioners/aws/awsProvisionerTest.py +293 -140
  124. toil/test/provisioners/clusterScalerTest.py +440 -250
  125. toil/test/provisioners/clusterTest.py +81 -42
  126. toil/test/provisioners/gceProvisionerTest.py +174 -100
  127. toil/test/provisioners/provisionerTest.py +25 -13
  128. toil/test/provisioners/restartScript.py +5 -4
  129. toil/test/server/serverTest.py +188 -141
  130. toil/test/sort/restart_sort.py +137 -68
  131. toil/test/sort/sort.py +134 -66
  132. toil/test/sort/sortTest.py +91 -49
  133. toil/test/src/autoDeploymentTest.py +140 -100
  134. toil/test/src/busTest.py +20 -18
  135. toil/test/src/checkpointTest.py +8 -2
  136. toil/test/src/deferredFunctionTest.py +49 -35
  137. toil/test/src/dockerCheckTest.py +33 -26
  138. toil/test/src/environmentTest.py +20 -10
  139. toil/test/src/fileStoreTest.py +538 -271
  140. toil/test/src/helloWorldTest.py +7 -4
  141. toil/test/src/importExportFileTest.py +61 -31
  142. toil/test/src/jobDescriptionTest.py +32 -17
  143. toil/test/src/jobEncapsulationTest.py +2 -0
  144. toil/test/src/jobFileStoreTest.py +74 -50
  145. toil/test/src/jobServiceTest.py +187 -73
  146. toil/test/src/jobTest.py +120 -70
  147. toil/test/src/miscTests.py +19 -18
  148. toil/test/src/promisedRequirementTest.py +82 -36
  149. toil/test/src/promisesTest.py +7 -6
  150. toil/test/src/realtimeLoggerTest.py +6 -6
  151. toil/test/src/regularLogTest.py +71 -37
  152. toil/test/src/resourceTest.py +80 -49
  153. toil/test/src/restartDAGTest.py +36 -22
  154. toil/test/src/resumabilityTest.py +9 -2
  155. toil/test/src/retainTempDirTest.py +45 -14
  156. toil/test/src/systemTest.py +12 -8
  157. toil/test/src/threadingTest.py +44 -25
  158. toil/test/src/toilContextManagerTest.py +10 -7
  159. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  160. toil/test/src/workerTest.py +33 -16
  161. toil/test/utils/toilDebugTest.py +70 -58
  162. toil/test/utils/toilKillTest.py +4 -5
  163. toil/test/utils/utilsTest.py +239 -102
  164. toil/test/wdl/wdltoil_test.py +789 -148
  165. toil/test/wdl/wdltoil_test_kubernetes.py +37 -23
  166. toil/toilState.py +52 -26
  167. toil/utils/toilConfig.py +13 -4
  168. toil/utils/toilDebugFile.py +44 -27
  169. toil/utils/toilDebugJob.py +85 -25
  170. toil/utils/toilDestroyCluster.py +11 -6
  171. toil/utils/toilKill.py +8 -3
  172. toil/utils/toilLaunchCluster.py +251 -145
  173. toil/utils/toilMain.py +37 -16
  174. toil/utils/toilRsyncCluster.py +27 -14
  175. toil/utils/toilSshCluster.py +45 -22
  176. toil/utils/toilStats.py +75 -36
  177. toil/utils/toilStatus.py +226 -119
  178. toil/utils/toilUpdateEC2Instances.py +3 -1
  179. toil/version.py +11 -11
  180. toil/wdl/utils.py +5 -5
  181. toil/wdl/wdltoil.py +3513 -1052
  182. toil/worker.py +269 -128
  183. toil-8.0.0.dist-info/METADATA +173 -0
  184. toil-8.0.0.dist-info/RECORD +253 -0
  185. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  186. toil-7.0.0.dist-info/METADATA +0 -158
  187. toil-7.0.0.dist-info/RECORD +0 -244
  188. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/LICENSE +0 -0
  189. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  190. {toil-7.0.0.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -22,15 +22,10 @@ import sqlite3
22
22
  import stat
23
23
  import threading
24
24
  import time
25
+ from collections.abc import Generator, Iterator, Sequence
25
26
  from contextlib import contextmanager
26
27
  from tempfile import mkstemp
27
- from typing import (Any,
28
- Callable,
29
- Generator,
30
- Iterator,
31
- Optional,
32
- Sequence,
33
- Tuple)
28
+ from typing import Any, Callable, Optional
34
29
 
35
30
  from toil.common import cacheDirName, getFileSystemSize
36
31
  from toil.fileStores import FileID
@@ -38,11 +33,13 @@ from toil.fileStores.abstractFileStore import AbstractFileStore
38
33
  from toil.job import Job, JobDescription
39
34
  from toil.jobStores.abstractJobStore import AbstractJobStore
40
35
  from toil.lib.compatibility import deprecated
41
- from toil.lib.io import (atomic_copy,
42
- atomic_copyobj,
43
- make_public_dir,
44
- mkdtemp,
45
- robust_rmtree)
36
+ from toil.lib.io import (
37
+ atomic_copy,
38
+ atomic_copyobj,
39
+ make_public_dir,
40
+ mkdtemp,
41
+ robust_rmtree,
42
+ )
46
43
  from toil.lib.retry import ErrorCondition, retry
47
44
  from toil.lib.threading import get_process_name, process_name_exists
48
45
 
@@ -66,9 +63,12 @@ class CacheUnbalancedError(CacheError):
66
63
  """
67
64
  Raised if file store can't free enough space for caching
68
65
  """
69
- message = 'Unable unable to free enough space for caching. This error frequently arises due ' \
70
- 'to jobs using more disk than they have requested. Turn on debug logging to see ' \
71
- 'more information leading up to this error through cache usage logs.'
66
+
67
+ message = (
68
+ "Unable unable to free enough space for caching. This error frequently arises due "
69
+ "to jobs using more disk than they have requested. Turn on debug logging to see "
70
+ "more information leading up to this error through cache usage logs."
71
+ )
72
72
 
73
73
  def __init__(self):
74
74
  super().__init__(self.message)
@@ -87,9 +87,11 @@ class IllegalDeletionCacheError(CacheError):
87
87
  """
88
88
 
89
89
  def __init__(self, deletedFile):
90
- message = 'Cache tracked file (%s) has been deleted or moved by user ' \
91
- ' without updating cache database. Use deleteLocalFile to ' \
92
- 'delete such files.' % deletedFile
90
+ message = (
91
+ "Cache tracked file (%s) has been deleted or moved by user "
92
+ " without updating cache database. Use deleteLocalFile to "
93
+ "delete such files." % deletedFile
94
+ )
93
95
  super().__init__(message)
94
96
 
95
97
 
@@ -208,13 +210,15 @@ class CachingFileStore(AbstractFileStore):
208
210
  # Variables related to caching
209
211
  # Decide where the cache directory will be. We put it in the local
210
212
  # workflow directory.
211
- self.localCacheDir = os.path.join(self.workflow_dir, cacheDirName(self.jobStore.config.workflowID))
213
+ self.localCacheDir = os.path.join(
214
+ self.workflow_dir, cacheDirName(self.jobStore.config.workflowID)
215
+ )
212
216
 
213
217
  # Since each worker has it's own unique CachingFileStore instance, and only one Job can run
214
218
  # at a time on a worker, we can track some stuff about the running job in ourselves.
215
219
  self.jobName: str = str(self.jobDesc)
216
220
  self.jobID = self.jobDesc.jobStoreID
217
- logger.debug('Starting job (%s) with ID (%s).', self.jobName, self.jobID)
221
+ logger.debug("Starting job (%s) with ID (%s).", self.jobName, self.jobID)
218
222
 
219
223
  # When the job actually starts, we will fill this in with the job's disk requirement.
220
224
  self.jobDiskBytes: Optional[float] = None
@@ -230,7 +234,9 @@ class CachingFileStore(AbstractFileStore):
230
234
  # the workflow left one behind without cleaning up properly; we need to
231
235
  # be able to tell that from showing up on a machine where a cache has
232
236
  # already been created.
233
- self.dbPath = os.path.join(self.coordination_dir, f'cache-{self.workflowAttemptNumber}.db')
237
+ self.dbPath = os.path.join(
238
+ self.coordination_dir, f"cache-{self.workflowAttemptNumber}.db"
239
+ )
234
240
 
235
241
  # Database connections are provided by magic properties self.con and
236
242
  # self.cur that always have the right object for the current thread to
@@ -254,7 +260,14 @@ class CachingFileStore(AbstractFileStore):
254
260
 
255
261
  # Initialize the space accounting properties
256
262
  freeSpace, _ = getFileSystemSize(self.localCacheDir)
257
- self._write([('INSERT OR IGNORE INTO properties VALUES (?, ?)', ('maxSpace', freeSpace))])
263
+ self._write(
264
+ [
265
+ (
266
+ "INSERT OR IGNORE INTO properties VALUES (?, ?)",
267
+ ("maxSpace", freeSpace),
268
+ )
269
+ ]
270
+ )
258
271
 
259
272
  # Space used by caching and by jobs is accounted with queries
260
273
 
@@ -284,10 +297,12 @@ class CachingFileStore(AbstractFileStore):
284
297
  """
285
298
  Get the database connection to be used for the current thread.
286
299
  """
287
- if not hasattr(self._thread_local, 'con'):
300
+ if not hasattr(self._thread_local, "con"):
288
301
  # Connect to the database for this thread.
289
302
  # TODO: We assume the connection closes when the thread goes away and can no longer use it.
290
- self._thread_local.con = sqlite3.connect(self.dbPath, timeout=SQLITE_TIMEOUT_SECS)
303
+ self._thread_local.con = sqlite3.connect(
304
+ self.dbPath, timeout=SQLITE_TIMEOUT_SECS
305
+ )
291
306
  return self._thread_local.con
292
307
 
293
308
  @property
@@ -295,18 +310,20 @@ class CachingFileStore(AbstractFileStore):
295
310
  """
296
311
  Get the main cursor to be used for the current thread.
297
312
  """
298
- if not hasattr(self._thread_local, 'cur'):
313
+ if not hasattr(self._thread_local, "cur"):
299
314
  # If we don't already have a main cursor for the thread, make one.
300
315
  self._thread_local.cur = self.con.cursor()
301
316
  return self._thread_local.cur
302
317
 
303
318
  @staticmethod
304
- @retry(infinite_retries=True,
305
- errors=[
306
- ErrorCondition(
307
- error=sqlite3.OperationalError,
308
- error_message_must_include='is locked')
309
- ])
319
+ @retry(
320
+ infinite_retries=True,
321
+ errors=[
322
+ ErrorCondition(
323
+ error=sqlite3.OperationalError, error_message_must_include="is locked"
324
+ )
325
+ ],
326
+ )
310
327
  def _static_write(con, cur, operations):
311
328
  """
312
329
  Write to the caching database, using the given connection.
@@ -340,7 +357,7 @@ class CachingFileStore(AbstractFileStore):
340
357
  # Do it
341
358
  cur.execute(command, args)
342
359
  except Exception as e:
343
- logging.error('Error talking to caching database: %s', str(e))
360
+ logging.error("Error talking to caching database: %s", str(e))
344
361
 
345
362
  # Try to make sure we don't somehow leave anything part-done if a
346
363
  # middle operation somehow fails.
@@ -360,13 +377,17 @@ class CachingFileStore(AbstractFileStore):
360
377
  return cur.rowcount
361
378
 
362
379
  @staticmethod
363
- @retry(infinite_retries=True,
364
- errors=[
365
- ErrorCondition(
366
- error=sqlite3.OperationalError,
367
- error_message_must_include='is locked')
368
- ])
369
- def _static_read(cur: sqlite3.Cursor, query: str, args: Optional[Sequence[Any]] = ()) -> Iterator[Any]:
380
+ @retry(
381
+ infinite_retries=True,
382
+ errors=[
383
+ ErrorCondition(
384
+ error=sqlite3.OperationalError, error_message_must_include="is locked"
385
+ )
386
+ ],
387
+ )
388
+ def _static_read(
389
+ cur: sqlite3.Cursor, query: str, args: Optional[Sequence[Any]] = ()
390
+ ) -> Iterator[Any]:
370
391
  """
371
392
  Read from the database.
372
393
 
@@ -419,7 +440,11 @@ class CachingFileStore(AbstractFileStore):
419
440
  # Get a cursor
420
441
  cur = con.cursor()
421
442
 
422
- cls._static_write(con, cur, ["""
443
+ cls._static_write(
444
+ con,
445
+ cur,
446
+ [
447
+ """
423
448
  CREATE TABLE IF NOT EXISTS files (
424
449
  id TEXT NOT NULL PRIMARY KEY,
425
450
  path TEXT UNIQUE NOT NULL,
@@ -427,7 +452,8 @@ class CachingFileStore(AbstractFileStore):
427
452
  state TEXT NOT NULL,
428
453
  owner TEXT
429
454
  )
430
- """, """
455
+ """,
456
+ """
431
457
  CREATE TABLE IF NOT EXISTS refs (
432
458
  path TEXT NOT NULL,
433
459
  file_id TEXT NOT NULL,
@@ -435,19 +461,23 @@ class CachingFileStore(AbstractFileStore):
435
461
  state TEXT NOT NULL,
436
462
  PRIMARY KEY (path, file_id)
437
463
  )
438
- """, """
464
+ """,
465
+ """
439
466
  CREATE TABLE IF NOT EXISTS jobs (
440
467
  id TEXT NOT NULL PRIMARY KEY,
441
468
  tempdir TEXT NOT NULL,
442
469
  disk INT NOT NULL,
443
470
  worker TEXT
444
471
  )
445
- """, """
472
+ """,
473
+ """
446
474
  CREATE TABLE IF NOT EXISTS properties (
447
475
  name TEXT NOT NULL PRIMARY KEY,
448
476
  value INT NOT NULL
449
477
  )
450
- """])
478
+ """,
479
+ ],
480
+ )
451
481
 
452
482
  # Caching-specific API
453
483
 
@@ -458,10 +488,12 @@ class CachingFileStore(AbstractFileStore):
458
488
  If no limit is available, raises an error.
459
489
  """
460
490
 
461
- for row in self.cur.execute('SELECT value FROM properties WHERE name = ?', ('maxSpace',)):
491
+ for row in self.cur.execute(
492
+ "SELECT value FROM properties WHERE name = ?", ("maxSpace",)
493
+ ):
462
494
  return row[0]
463
495
 
464
- raise RuntimeError('Unable to retrieve cache limit')
496
+ raise RuntimeError("Unable to retrieve cache limit")
465
497
 
466
498
  def getCacheUsed(self):
467
499
  """
@@ -474,10 +506,10 @@ class CachingFileStore(AbstractFileStore):
474
506
  if self.cachingIsFree():
475
507
  return 0
476
508
 
477
- for row in self._read('SELECT TOTAL(size) FROM files'):
509
+ for row in self._read("SELECT TOTAL(size) FROM files"):
478
510
  return row[0]
479
511
 
480
- raise RuntimeError('Unable to retrieve cache usage')
512
+ raise RuntimeError("Unable to retrieve cache usage")
481
513
 
482
514
  def getCacheExtraJobSpace(self):
483
515
  """
@@ -492,15 +524,17 @@ class CachingFileStore(AbstractFileStore):
492
524
  """
493
525
 
494
526
  # Total up the sizes of all the reads of files and subtract it from the total disk reservation of all jobs
495
- for row in self._read("""
527
+ for row in self._read(
528
+ """
496
529
  SELECT (
497
530
  (SELECT TOTAL(disk) FROM jobs) -
498
531
  (SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state == 'immutable')
499
532
  ) as result
500
- """):
533
+ """
534
+ ):
501
535
  return row[0]
502
536
 
503
- raise RuntimeError('Unable to retrieve extra job space')
537
+ raise RuntimeError("Unable to retrieve extra job space")
504
538
 
505
539
  def getCacheAvailable(self):
506
540
  """
@@ -519,33 +553,38 @@ class CachingFileStore(AbstractFileStore):
519
553
 
520
554
  # Do a little report first
521
555
  for row in self._read("SELECT value FROM properties WHERE name = 'maxSpace'"):
522
- logger.debug('Max space: %d', row[0])
556
+ logger.debug("Max space: %d", row[0])
523
557
  for row in self._read("SELECT TOTAL(size) FROM files"):
524
- logger.debug('Total file size: %d', row[0])
558
+ logger.debug("Total file size: %d", row[0])
525
559
  for row in self._read("SELECT TOTAL(disk) FROM jobs"):
526
- logger.debug('Total job disk requirement size: %d', row[0])
527
- for row in self._read("SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'"):
528
- logger.debug('Total immutable reference size: %d', row[0])
560
+ logger.debug("Total job disk requirement size: %d", row[0])
561
+ for row in self._read(
562
+ "SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'"
563
+ ):
564
+ logger.debug("Total immutable reference size: %d", row[0])
529
565
 
530
566
  if self.cachingIsFree():
531
567
  # If caching is free, we just say that all the space is always available.
532
- for row in self._read("SELECT value FROM properties WHERE name = 'maxSpace'"):
568
+ for row in self._read(
569
+ "SELECT value FROM properties WHERE name = 'maxSpace'"
570
+ ):
533
571
  return row[0]
534
572
 
535
- raise RuntimeError('Unable to retrieve available cache space')
573
+ raise RuntimeError("Unable to retrieve available cache space")
536
574
 
537
-
538
- for row in self._read("""
575
+ for row in self._read(
576
+ """
539
577
  SELECT (
540
578
  (SELECT value FROM properties WHERE name = 'maxSpace') -
541
579
  (SELECT TOTAL(size) FROM files) -
542
580
  ((SELECT TOTAL(disk) FROM jobs) -
543
581
  (SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.state = 'immutable'))
544
582
  ) as result
545
- """):
583
+ """
584
+ ):
546
585
  return row[0]
547
586
 
548
- raise RuntimeError('Unable to retrieve available cache space')
587
+ raise RuntimeError("Unable to retrieve available cache space")
549
588
 
550
589
  def getSpaceUsableForJobs(self):
551
590
  """
@@ -555,15 +594,17 @@ class CachingFileStore(AbstractFileStore):
555
594
  If not retrievable, raises an error.
556
595
  """
557
596
 
558
- for row in self._read("""
597
+ for row in self._read(
598
+ """
559
599
  SELECT (
560
600
  (SELECT value FROM properties WHERE name = 'maxSpace') -
561
601
  (SELECT TOTAL(disk) FROM jobs)
562
602
  ) as result
563
- """):
603
+ """
604
+ ):
564
605
  return row[0]
565
606
 
566
- raise RuntimeError('Unable to retrieve usabel space for jobs')
607
+ raise RuntimeError("Unable to retrieve usabel space for jobs")
567
608
 
568
609
  def getCacheUnusedJobRequirement(self):
569
610
  """
@@ -575,28 +616,36 @@ class CachingFileStore(AbstractFileStore):
575
616
  If no value is available, raises an error.
576
617
  """
577
618
 
578
- logger.debug('Get unused space for job %s', self.jobID)
579
-
580
- for row in self._read('SELECT * FROM files'):
581
- logger.debug('File record: %s', str(row))
619
+ logger.debug("Get unused space for job %s", self.jobID)
582
620
 
583
- for row in self._read('SELECT * FROM refs'):
584
- logger.debug('Ref record: %s', str(row))
621
+ for row in self._read("SELECT * FROM files"):
622
+ logger.debug("File record: %s", str(row))
585
623
 
624
+ for row in self._read("SELECT * FROM refs"):
625
+ logger.debug("Ref record: %s", str(row))
586
626
 
587
- for row in self._read('SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.job_id = ? AND refs.state != ?',
588
- (self.jobID, 'mutable')):
627
+ for row in self._read(
628
+ "SELECT TOTAL(files.size) FROM refs INNER JOIN files ON refs.file_id = files.id WHERE refs.job_id = ? AND refs.state != ?",
629
+ (self.jobID, "mutable"),
630
+ ):
589
631
  # Sum up all the sizes of our referenced files, then subtract that from how much we came in with
590
632
  return self.jobDiskBytes - row[0]
591
633
 
592
- raise RuntimeError('Unable to retrieve unused job requirement space')
634
+ raise RuntimeError("Unable to retrieve unused job requirement space")
593
635
 
594
636
  def adjustCacheLimit(self, newTotalBytes):
595
637
  """
596
638
  Adjust the total cache size limit to the given number of bytes.
597
639
  """
598
640
 
599
- self._write([('UPDATE properties SET value = ? WHERE name = ?', (newTotalBytes, 'maxSpace'))])
641
+ self._write(
642
+ [
643
+ (
644
+ "UPDATE properties SET value = ? WHERE name = ?",
645
+ (newTotalBytes, "maxSpace"),
646
+ )
647
+ ]
648
+ )
600
649
 
601
650
  def fileIsCached(self, fileID):
602
651
  """
@@ -607,8 +656,10 @@ class CachingFileStore(AbstractFileStore):
607
656
  file you need to do it in a transaction.
608
657
  """
609
658
 
610
- for row in self._read('SELECT COUNT(*) FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
611
- (fileID, 'cached', 'uploadable', 'uploading')):
659
+ for row in self._read(
660
+ "SELECT COUNT(*) FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
661
+ (fileID, "cached", "uploadable", "uploading"),
662
+ ):
612
663
 
613
664
  return row[0] > 0
614
665
  return False
@@ -620,7 +671,7 @@ class CachingFileStore(AbstractFileStore):
620
671
  Counts mutable references too.
621
672
  """
622
673
 
623
- for row in self._read('SELECT COUNT(*) FROM refs WHERE file_id = ?', (fileID,)):
674
+ for row in self._read("SELECT COUNT(*) FROM refs WHERE file_id = ?", (fileID,)):
624
675
  return row[0]
625
676
  return 0
626
677
 
@@ -633,11 +684,14 @@ class CachingFileStore(AbstractFileStore):
633
684
  configurations, most notably the FileJobStore.
634
685
  """
635
686
 
636
- for row in self._read('SELECT value FROM properties WHERE name = ?', ('freeCaching',)):
687
+ for row in self._read(
688
+ "SELECT value FROM properties WHERE name = ?", ("freeCaching",)
689
+ ):
637
690
  return row[0] == 1
638
691
 
639
692
  # Otherwise we need to set it
640
693
  from toil.jobStores.fileJobStore import FileJobStore
694
+
641
695
  if isinstance(self.jobStore, FileJobStore) and not self.forceNonFreeCaching:
642
696
  # Caching may be free since we are using a file job store.
643
697
 
@@ -646,7 +700,7 @@ class CachingFileStore(AbstractFileStore):
646
700
 
647
701
  # Read it out to a generated name.
648
702
  destDir = mkdtemp(dir=self.localCacheDir)
649
- cachedFile = os.path.join(destDir, 'sniffLinkCount')
703
+ cachedFile = os.path.join(destDir, "sniffLinkCount")
650
704
  self.jobStore.read_file(emptyID, cachedFile, symlink=False)
651
705
 
652
706
  # Check the link count
@@ -666,7 +720,9 @@ class CachingFileStore(AbstractFileStore):
666
720
  free = 0
667
721
 
668
722
  # Save to the database if we're the first to work this out
669
- self._write([('INSERT OR IGNORE INTO properties VALUES (?, ?)', ('freeCaching', free))])
723
+ self._write(
724
+ [("INSERT OR IGNORE INTO properties VALUES (?, ?)", ("freeCaching", free))]
725
+ )
670
726
 
671
727
  # Return true if we said caching was free
672
728
  return free == 1
@@ -683,7 +739,7 @@ class CachingFileStore(AbstractFileStore):
683
739
 
684
740
  # Hash the file ID
685
741
  hasher = hashlib.sha1()
686
- hasher.update(fileStoreID.encode('utf-8'))
742
+ hasher.update(fileStoreID.encode("utf-8"))
687
743
 
688
744
  # Get a unique temp file name, including the file ID's hash to make
689
745
  # sure we can never collide even though we are going to remove the
@@ -707,17 +763,19 @@ class CachingFileStore(AbstractFileStore):
707
763
  # Get a list of all file owner processes on this node.
708
764
  # Exclude NULL because it comes out as 0 and we can't look for PID 0.
709
765
  owners = []
710
- for row in self._read('SELECT DISTINCT owner FROM files WHERE owner IS NOT NULL'):
766
+ for row in self._read(
767
+ "SELECT DISTINCT owner FROM files WHERE owner IS NOT NULL"
768
+ ):
711
769
  owners.append(row[0])
712
770
 
713
771
  # Work out which of them have died.
714
772
  deadOwners = []
715
773
  for owner in owners:
716
774
  if not process_name_exists(self.coordination_dir, owner):
717
- logger.debug('Owner %s is dead', owner)
775
+ logger.debug("Owner %s is dead", owner)
718
776
  deadOwners.append(owner)
719
777
  else:
720
- logger.debug('Owner %s is alive', owner)
778
+ logger.debug("Owner %s is alive", owner)
721
779
 
722
780
  for owner in deadOwners:
723
781
  # Try and adopt all the files that any dead owner had
@@ -736,14 +794,28 @@ class CachingFileStore(AbstractFileStore):
736
794
  #
737
795
  # TODO: if we ever let other PIDs be responsible for writing our
738
796
  # files asynchronously, this will need to change.
739
- self._write([('UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?',
740
- (me, 'deleting', owner, 'deleting')),
741
- ('UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?',
742
- (me, 'deleting', owner, 'downloading')),
743
- ('UPDATE files SET owner = NULL, state = ? WHERE owner = ? AND (state = ? OR state = ?)',
744
- ('cached', owner, 'uploadable', 'uploading'))])
797
+ self._write(
798
+ [
799
+ (
800
+ "UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?",
801
+ (me, "deleting", owner, "deleting"),
802
+ ),
803
+ (
804
+ "UPDATE files SET owner = ?, state = ? WHERE owner = ? AND state = ?",
805
+ (me, "deleting", owner, "downloading"),
806
+ ),
807
+ (
808
+ "UPDATE files SET owner = NULL, state = ? WHERE owner = ? AND (state = ? OR state = ?)",
809
+ ("cached", owner, "uploadable", "uploading"),
810
+ ),
811
+ ]
812
+ )
745
813
 
746
- logger.debug('Tried to adopt file operations from dead worker %s to ourselves as %s', owner, me)
814
+ logger.debug(
815
+ "Tried to adopt file operations from dead worker %s to ourselves as %s",
816
+ owner,
817
+ me,
818
+ )
747
819
 
748
820
  def _executePendingDeletions(self):
749
821
  """
@@ -757,16 +829,19 @@ class CachingFileStore(AbstractFileStore):
757
829
 
758
830
  # Remember the file IDs we are deleting
759
831
  deletedFiles = []
760
- for row in self._read('SELECT id, path FROM files WHERE owner = ? AND state = ?', (me, 'deleting')):
832
+ for row in self._read(
833
+ "SELECT id, path FROM files WHERE owner = ? AND state = ?",
834
+ (me, "deleting"),
835
+ ):
761
836
  # Grab everything we are supposed to delete and delete it
762
837
  fileID = row[0]
763
838
  filePath = row[1]
764
839
  try:
765
840
  os.unlink(filePath)
766
- logger.debug('Successfully deleted: %s', filePath)
841
+ logger.debug("Successfully deleted: %s", filePath)
767
842
  except OSError:
768
843
  # Probably already deleted
769
- logger.debug('File already gone: %s', filePath)
844
+ logger.debug("File already gone: %s", filePath)
770
845
  # Still need to mark it as deleted
771
846
 
772
847
  # Whether we deleted the file or just found out that it is gone, we
@@ -777,8 +852,15 @@ class CachingFileStore(AbstractFileStore):
777
852
  for fileID in deletedFiles:
778
853
  # Drop all the files. They should have stayed in deleting state. We move them from there to not present at all.
779
854
  # Also drop their references, if they had any from dead downloaders.
780
- self._write([('DELETE FROM files WHERE id = ? AND state = ?', (fileID, 'deleting')),
781
- ('DELETE FROM refs WHERE file_id = ?', (fileID,))])
855
+ self._write(
856
+ [
857
+ (
858
+ "DELETE FROM files WHERE id = ? AND state = ?",
859
+ (fileID, "deleting"),
860
+ ),
861
+ ("DELETE FROM refs WHERE file_id = ?", (fileID,)),
862
+ ]
863
+ )
782
864
 
783
865
  return len(deletedFiles)
784
866
 
@@ -798,7 +880,11 @@ class CachingFileStore(AbstractFileStore):
798
880
  # Try and find a file we might want to upload
799
881
  fileID = None
800
882
  filePath = None
801
- for row in self._static_read(self.cur, 'SELECT id, path FROM files WHERE state = ? AND owner = ? LIMIT 1', ('uploadable', me)):
883
+ for row in self._static_read(
884
+ self.cur,
885
+ "SELECT id, path FROM files WHERE state = ? AND owner = ? LIMIT 1",
886
+ ("uploadable", me),
887
+ ):
802
888
  fileID = row[0]
803
889
  filePath = row[1]
804
890
 
@@ -807,30 +893,57 @@ class CachingFileStore(AbstractFileStore):
807
893
  break
808
894
 
809
895
  # We need to set it to uploading in a way that we can detect that *we* won the update race instead of anyone else.
810
- rowCount = self._static_write(self.con, self.cur, [('UPDATE files SET state = ? WHERE id = ? AND state = ?', ('uploading', fileID, 'uploadable'))])
896
+ rowCount = self._static_write(
897
+ self.con,
898
+ self.cur,
899
+ [
900
+ (
901
+ "UPDATE files SET state = ? WHERE id = ? AND state = ?",
902
+ ("uploading", fileID, "uploadable"),
903
+ )
904
+ ],
905
+ )
811
906
  if rowCount != 1:
812
907
  # We didn't manage to update it. Someone else (a running job if
813
908
  # we are a committing thread, or visa versa) must have grabbed
814
909
  # it.
815
- logger.debug('Lost race to upload %s', fileID)
910
+ logger.debug("Lost race to upload %s", fileID)
816
911
  # Try again to see if there is something else to grab.
817
912
  continue
818
913
 
819
914
  # Upload the file
820
- logger.debug('Actually executing upload for file %s', fileID)
915
+ logger.debug("Actually executing upload for file %s", fileID)
821
916
  try:
822
917
  self.jobStore.update_file(fileID, filePath)
823
918
  except:
824
919
  # We need to set the state back to 'uploadable' in case of any failures to ensure
825
920
  # we can retry properly.
826
- self._static_write(self.con, self.cur, [('UPDATE files SET state = ? WHERE id = ? AND state = ?', ('uploadable', fileID, 'uploading'))])
921
+ self._static_write(
922
+ self.con,
923
+ self.cur,
924
+ [
925
+ (
926
+ "UPDATE files SET state = ? WHERE id = ? AND state = ?",
927
+ ("uploadable", fileID, "uploading"),
928
+ )
929
+ ],
930
+ )
827
931
  raise
828
932
 
829
933
  # Count it for the total uploaded files value we need to return
830
934
  uploadedCount += 1
831
935
 
832
936
  # Remember that we uploaded it in the database
833
- self._static_write(self.con, self.cur, [('UPDATE files SET state = ?, owner = NULL WHERE id = ?', ('cached', fileID))])
937
+ self._static_write(
938
+ self.con,
939
+ self.cur,
940
+ [
941
+ (
942
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
943
+ ("cached", fileID),
944
+ )
945
+ ],
946
+ )
834
947
 
835
948
  return uploadedCount
836
949
 
@@ -854,7 +967,14 @@ class CachingFileStore(AbstractFileStore):
854
967
  # But we won't actually let the job run and use any of this space until
855
968
  # the cache has been successfully cleared out.
856
969
  with self.as_process() as me:
857
- self._write([('INSERT INTO jobs VALUES (?, ?, ?, ?)', (self.jobID, self.localTempDir, newJobReqs, me))])
970
+ self._write(
971
+ [
972
+ (
973
+ "INSERT INTO jobs VALUES (?, ?, ?, ?)",
974
+ (self.jobID, self.localTempDir, newJobReqs, me),
975
+ )
976
+ ]
977
+ )
858
978
 
859
979
  # Now we need to make sure that we can fit all currently cached files,
860
980
  # and the parts of the total job requirements not currently spent on
@@ -862,7 +982,7 @@ class CachingFileStore(AbstractFileStore):
862
982
 
863
983
  available = self.getCacheAvailable()
864
984
 
865
- logger.debug('Available space with job: %d bytes', available)
985
+ logger.debug("Available space with job: %d bytes", available)
866
986
 
867
987
  if available >= 0:
868
988
  # We're fine on disk space
@@ -886,10 +1006,14 @@ class CachingFileStore(AbstractFileStore):
886
1006
  """
887
1007
 
888
1008
  # Get the job's temp dir
889
- for row in cls._static_read(cur, 'SELECT tempdir FROM jobs WHERE id = ?', (jobID,)):
1009
+ for row in cls._static_read(
1010
+ cur, "SELECT tempdir FROM jobs WHERE id = ?", (jobID,)
1011
+ ):
890
1012
  jobTemp = row[0]
891
1013
 
892
- for row in cls._static_read(cur, 'SELECT path FROM refs WHERE job_id = ?', (jobID,)):
1014
+ for row in cls._static_read(
1015
+ cur, "SELECT path FROM refs WHERE job_id = ?", (jobID,)
1016
+ ):
893
1017
  try:
894
1018
  # Delete all the reference files.
895
1019
  os.unlink(row[0])
@@ -897,7 +1021,7 @@ class CachingFileStore(AbstractFileStore):
897
1021
  # May not exist
898
1022
  pass
899
1023
  # And their database entries
900
- cls._static_write(con, cur, [('DELETE FROM refs WHERE job_id = ?', (jobID,))])
1024
+ cls._static_write(con, cur, [("DELETE FROM refs WHERE job_id = ?", (jobID,))])
901
1025
 
902
1026
  try:
903
1027
  # Delete the job's temp directory to the extent that we can.
@@ -906,7 +1030,7 @@ class CachingFileStore(AbstractFileStore):
906
1030
  pass
907
1031
 
908
1032
  # Strike the job from the database
909
- cls._static_write(con, cur, [('DELETE FROM jobs WHERE id = ?', (jobID,))])
1033
+ cls._static_write(con, cur, [("DELETE FROM jobs WHERE id = ?", (jobID,))])
910
1034
 
911
1035
  def _deallocateSpaceForJob(self):
912
1036
  """
@@ -937,12 +1061,12 @@ class CachingFileStore(AbstractFileStore):
937
1061
  if self._executePendingDeletions() > 0:
938
1062
  # We actually had something to delete, which we deleted.
939
1063
  # Maybe there is space now
940
- logger.debug('Successfully executed pending deletions to free space')
1064
+ logger.debug("Successfully executed pending deletions to free space")
941
1065
  return True
942
1066
 
943
1067
  if self._executePendingUploads() > 0:
944
1068
  # We had something to upload. Maybe it can be evicted now.
945
- logger.debug('Successfully executed pending uploads to free space')
1069
+ logger.debug("Successfully executed pending uploads to free space")
946
1070
  return True
947
1071
 
948
1072
  # Otherwise, not enough files could be found in deleting state to solve our problem.
@@ -952,37 +1076,45 @@ class CachingFileStore(AbstractFileStore):
952
1076
  # soon as we hit the cache limit.
953
1077
 
954
1078
  # Find something that has no non-mutable references and is not already being deleted.
955
- self._read("""
1079
+ self._read(
1080
+ """
956
1081
  SELECT files.id FROM files WHERE files.state = 'cached' AND NOT EXISTS (
957
1082
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
958
1083
  ) LIMIT 1
959
- """)
1084
+ """
1085
+ )
960
1086
  row = self.cur.fetchone()
961
1087
  if row is None:
962
1088
  # Nothing can be evicted by us.
963
1089
  # Someone else might be in the process of evicting something that will free up space for us too.
964
1090
  # Or someone mught be uploading something and we have to wait for them to finish before it can be deleted.
965
- logger.debug('Could not find anything to evict! Cannot free up space!')
1091
+ logger.debug("Could not find anything to evict! Cannot free up space!")
966
1092
  return False
967
1093
 
968
1094
  # Otherwise we found an eviction candidate.
969
1095
  fileID = row[0]
970
1096
 
971
1097
  # Try and grab it for deletion, subject to the condition that nothing has started reading it
972
- self._write([("""
1098
+ self._write(
1099
+ [
1100
+ (
1101
+ """
973
1102
  UPDATE files SET owner = ?, state = ? WHERE id = ? AND state = ?
974
1103
  AND owner IS NULL AND NOT EXISTS (
975
1104
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
976
1105
  )
977
1106
  """,
978
- (me, 'deleting', fileID, 'cached'))])
1107
+ (me, "deleting", fileID, "cached"),
1108
+ )
1109
+ ]
1110
+ )
979
1111
 
980
- logger.debug('Evicting file %s', fileID)
1112
+ logger.debug("Evicting file %s", fileID)
981
1113
 
982
1114
  # Whether we actually got it or not, try deleting everything we have to delete
983
1115
  if self._executePendingDeletions() > 0:
984
1116
  # We deleted something
985
- logger.debug('Successfully executed pending deletions to free space')
1117
+ logger.debug("Successfully executed pending deletions to free space")
986
1118
  return True
987
1119
 
988
1120
  def _freeUpSpace(self):
@@ -999,7 +1131,10 @@ class CachingFileStore(AbstractFileStore):
999
1131
 
1000
1132
  while availableSpace < 0:
1001
1133
  # While there isn't enough space for the thing we want
1002
- logger.debug('Cache is full (%d bytes free). Trying to free up space!', availableSpace)
1134
+ logger.debug(
1135
+ "Cache is full (%d bytes free). Trying to free up space!",
1136
+ availableSpace,
1137
+ )
1003
1138
  # Free up space. See if we made any progress
1004
1139
  progress = self._tryToFreeUpSpace()
1005
1140
  availableSpace = self.getCacheAvailable()
@@ -1011,19 +1146,23 @@ class CachingFileStore(AbstractFileStore):
1011
1146
  # See if we've been oversubscribed.
1012
1147
  jobSpace = self.getSpaceUsableForJobs()
1013
1148
  if jobSpace < 0:
1014
- logger.critical('Jobs on this machine have oversubscribed our total available space (%d bytes)!', jobSpace)
1149
+ logger.critical(
1150
+ "Jobs on this machine have oversubscribed our total available space (%d bytes)!",
1151
+ jobSpace,
1152
+ )
1015
1153
  raise CacheUnbalancedError
1016
1154
  else:
1017
1155
  patience -= 1
1018
1156
  if patience <= 0:
1019
- logger.critical('Waited implausibly long for active uploads and deletes.')
1157
+ logger.critical(
1158
+ "Waited implausibly long for active uploads and deletes."
1159
+ )
1020
1160
  raise CacheUnbalancedError
1021
1161
  else:
1022
1162
  # Wait a bit and come back
1023
1163
  time.sleep(2)
1024
1164
 
1025
- logger.debug('Cache has %d bytes free.', availableSpace)
1026
-
1165
+ logger.debug("Cache has %d bytes free.", availableSpace)
1027
1166
 
1028
1167
  # Normal AbstractFileStore API
1029
1168
 
@@ -1044,8 +1183,13 @@ class CachingFileStore(AbstractFileStore):
1044
1183
  # have filled the cache or not.
1045
1184
  self.jobDiskBytes = job.disk
1046
1185
 
1047
- logger.debug('Actually running job (%s) with ID (%s) which wants %d of our %d bytes.',
1048
- self.jobName, self.jobID, self.jobDiskBytes, self.getCacheLimit())
1186
+ logger.debug(
1187
+ "Actually running job (%s) with ID (%s) which wants %d of our %d bytes.",
1188
+ self.jobName,
1189
+ self.jobID,
1190
+ self.jobDiskBytes,
1191
+ self.getCacheLimit(),
1192
+ )
1049
1193
 
1050
1194
  # Register the current job as taking this much space, and evict files
1051
1195
  # from the cache to make room before letting the job run.
@@ -1079,7 +1223,9 @@ class CachingFileStore(AbstractFileStore):
1079
1223
  # Create an empty file to get an ID.
1080
1224
  # Make sure to pass along the file basename.
1081
1225
  # TODO: this empty file could leak if we die now...
1082
- fileID = self.jobStore.get_empty_file_store_id(creatorID, cleanup, os.path.basename(localFileName))
1226
+ fileID = self.jobStore.get_empty_file_store_id(
1227
+ creatorID, cleanup, os.path.basename(localFileName)
1228
+ )
1083
1229
  # Work out who we are
1084
1230
  with self.as_process() as me:
1085
1231
 
@@ -1088,10 +1234,22 @@ class CachingFileStore(AbstractFileStore):
1088
1234
 
1089
1235
  # Create a file in uploadable state and a reference, in the same transaction.
1090
1236
  # Say the reference is an immutable reference
1091
- self._write([('INSERT INTO files VALUES (?, ?, ?, ?, ?)', (fileID, cachePath, fileSize, 'uploadable', me)),
1092
- ('INSERT INTO refs VALUES (?, ?, ?, ?)', (absLocalFileName, fileID, creatorID, 'immutable'))])
1237
+ self._write(
1238
+ [
1239
+ (
1240
+ "INSERT INTO files VALUES (?, ?, ?, ?, ?)",
1241
+ (fileID, cachePath, fileSize, "uploadable", me),
1242
+ ),
1243
+ (
1244
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1245
+ (absLocalFileName, fileID, creatorID, "immutable"),
1246
+ ),
1247
+ ]
1248
+ )
1093
1249
 
1094
- if absLocalFileName.startswith(self.localTempDir) and not os.path.islink(absLocalFileName):
1250
+ if absLocalFileName.startswith(self.localTempDir) and not os.path.islink(
1251
+ absLocalFileName
1252
+ ):
1095
1253
  # We should link into the cache, because the upload is coming from our local temp dir (and not via a symlink in there)
1096
1254
  try:
1097
1255
  # Try and hardlink the file into the cache.
@@ -1102,8 +1260,14 @@ class CachingFileStore(AbstractFileStore):
1102
1260
 
1103
1261
  linkedToCache = True
1104
1262
 
1105
- logger.debug('Hardlinked file %s into cache at %s; deferring write to job store', localFileName, cachePath)
1106
- assert not os.path.islink(cachePath), "Symlink %s has invaded cache!" % cachePath
1263
+ logger.debug(
1264
+ "Hardlinked file %s into cache at %s; deferring write to job store",
1265
+ localFileName,
1266
+ cachePath,
1267
+ )
1268
+ assert not os.path.islink(cachePath), (
1269
+ "Symlink %s has invaded cache!" % cachePath
1270
+ )
1107
1271
 
1108
1272
  # Don't do the upload now. Let it be deferred until later (when the job is committing).
1109
1273
  except OSError:
@@ -1117,7 +1281,6 @@ class CachingFileStore(AbstractFileStore):
1117
1281
  # files to vanish from our cache.
1118
1282
  linkedToCache = False
1119
1283
 
1120
-
1121
1284
  if not linkedToCache:
1122
1285
  # If we can't do the link into the cache and upload from there, we
1123
1286
  # have to just upload right away. We can't guarantee sufficient
@@ -1126,27 +1289,40 @@ class CachingFileStore(AbstractFileStore):
1126
1289
 
1127
1290
  # Change the reference to 'mutable', which it will be.
1128
1291
  # And drop the file altogether.
1129
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', absLocalFileName, fileID)),
1130
- ('DELETE FROM files WHERE id = ?', (fileID,))])
1292
+ self._write(
1293
+ [
1294
+ (
1295
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1296
+ ("mutable", absLocalFileName, fileID),
1297
+ ),
1298
+ ("DELETE FROM files WHERE id = ?", (fileID,)),
1299
+ ]
1300
+ )
1131
1301
 
1132
1302
  # Save the file to the job store right now
1133
- logger.debug('Actually executing upload immediately for file %s', fileID)
1303
+ logger.debug(
1304
+ "Actually executing upload immediately for file %s", fileID
1305
+ )
1134
1306
  self.jobStore.update_file(fileID, absLocalFileName)
1135
1307
 
1136
1308
  # Ship out the completed FileID object with its real size.
1137
1309
  return FileID.forPath(fileID, absLocalFileName)
1138
1310
 
1139
- def readGlobalFile(self, fileStoreID, userPath=None, cache=True, mutable=False, symlink=False):
1311
+ def readGlobalFile(
1312
+ self, fileStoreID, userPath=None, cache=True, mutable=False, symlink=False
1313
+ ):
1140
1314
 
1141
1315
  if str(fileStoreID) in self.filesToDelete:
1142
1316
  # File has already been deleted
1143
- raise FileNotFoundError(f'Attempted to read deleted file: {fileStoreID}')
1317
+ raise FileNotFoundError(f"Attempted to read deleted file: {fileStoreID}")
1144
1318
 
1145
1319
  if userPath is not None:
1146
1320
  # Validate the destination we got
1147
1321
  localFilePath = self._resolveAbsoluteLocalPath(userPath)
1148
1322
  if os.path.exists(localFilePath):
1149
- raise RuntimeError(' File %s ' % localFilePath + ' exists. Cannot Overwrite.')
1323
+ raise RuntimeError(
1324
+ " File %s " % localFilePath + " exists. Cannot Overwrite."
1325
+ )
1150
1326
  else:
1151
1327
  # Make our own destination
1152
1328
  localFilePath = self.getLocalTempFileName()
@@ -1158,22 +1334,29 @@ class CachingFileStore(AbstractFileStore):
1158
1334
  # We want to use the cache
1159
1335
 
1160
1336
  if mutable:
1161
- finalPath = self._readGlobalFileMutablyWithCache(fileStoreID, localFilePath, readerID)
1337
+ finalPath = self._readGlobalFileMutablyWithCache(
1338
+ fileStoreID, localFilePath, readerID
1339
+ )
1162
1340
  else:
1163
- finalPath = self._readGlobalFileWithCache(fileStoreID, localFilePath, symlink, readerID)
1341
+ finalPath = self._readGlobalFileWithCache(
1342
+ fileStoreID, localFilePath, symlink, readerID
1343
+ )
1164
1344
  else:
1165
1345
  # We do not want to use the cache
1166
- finalPath = self._readGlobalFileWithoutCache(fileStoreID, localFilePath, mutable, symlink, readerID)
1346
+ finalPath = self._readGlobalFileWithoutCache(
1347
+ fileStoreID, localFilePath, mutable, symlink, readerID
1348
+ )
1167
1349
 
1168
- if getattr(fileStoreID, 'executable', False):
1350
+ if getattr(fileStoreID, "executable", False):
1169
1351
  os.chmod(finalPath, os.stat(finalPath).st_mode | stat.S_IXUSR)
1170
1352
 
1171
1353
  # Record access in case the job crashes and we have to log it
1172
1354
  self.logAccess(fileStoreID, finalPath)
1173
1355
  return finalPath
1174
1356
 
1175
-
1176
- def _readGlobalFileWithoutCache(self, fileStoreID, localFilePath, mutable, symlink, readerID):
1357
+ def _readGlobalFileWithoutCache(
1358
+ self, fileStoreID, localFilePath, mutable, symlink, readerID
1359
+ ):
1177
1360
  """
1178
1361
  Read a file without putting it into the cache.
1179
1362
 
@@ -1191,7 +1374,9 @@ class CachingFileStore(AbstractFileStore):
1191
1374
  # read a file that is 'uploadable' or 'uploading' and hasn't hit
1192
1375
  # the backing job store yet.
1193
1376
 
1194
- with self._with_copying_reference_to_upload(fileStoreID, readerID, localFilePath) as ref_path:
1377
+ with self._with_copying_reference_to_upload(
1378
+ fileStoreID, readerID, localFilePath
1379
+ ) as ref_path:
1195
1380
  if ref_path is not None:
1196
1381
  # We got a copying reference, so the file is being uploaded and
1197
1382
  # must be read from the cache for consistency. And it will
@@ -1205,11 +1390,16 @@ class CachingFileStore(AbstractFileStore):
1205
1390
 
1206
1391
  # Find where the file is cached
1207
1392
  cachedPath = None
1208
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1393
+ for row in self._read(
1394
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1395
+ ):
1209
1396
  cachedPath = row[0]
1210
1397
 
1211
1398
  if cachedPath is None:
1212
- raise RuntimeError('File %s went away while we had a reference to it!' % fileStoreID)
1399
+ raise RuntimeError(
1400
+ "File %s went away while we had a reference to it!"
1401
+ % fileStoreID
1402
+ )
1213
1403
 
1214
1404
  if self.forceDownloadDelay is not None:
1215
1405
  # Wait around to simulate a big file for testing
@@ -1218,8 +1408,14 @@ class CachingFileStore(AbstractFileStore):
1218
1408
  atomic_copy(cachedPath, ref_path)
1219
1409
 
1220
1410
  # Change the reference to mutable so it sticks around
1221
- self._write([('UPDATE refs SET state = ? WHERE path = ? and file_id = ?',
1222
- ('mutable', ref_path, fileStoreID))])
1411
+ self._write(
1412
+ [
1413
+ (
1414
+ "UPDATE refs SET state = ? WHERE path = ? and file_id = ?",
1415
+ ("mutable", ref_path, fileStoreID),
1416
+ )
1417
+ ]
1418
+ )
1223
1419
  else:
1224
1420
  # File is not being uploaded currently.
1225
1421
 
@@ -1229,8 +1425,14 @@ class CachingFileStore(AbstractFileStore):
1229
1425
 
1230
1426
  # Create a 'mutable' reference (even if we end up with a link)
1231
1427
  # so we can see this file in deleteLocalFile.
1232
- self._write([('INSERT INTO refs VALUES (?, ?, ?, ?)',
1233
- (localFilePath, fileStoreID, readerID, 'mutable'))])
1428
+ self._write(
1429
+ [
1430
+ (
1431
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1432
+ (localFilePath, fileStoreID, readerID, "mutable"),
1433
+ )
1434
+ ]
1435
+ )
1234
1436
 
1235
1437
  if self.forceDownloadDelay is not None:
1236
1438
  # Wait around to simulate a big file for testing
@@ -1290,15 +1492,32 @@ class CachingFileStore(AbstractFileStore):
1290
1492
  # Start a loop until we can do one of these
1291
1493
  while True:
1292
1494
  # Try and create a downloading entry if no entry exists
1293
- logger.debug('Trying to make file record for id %s', fileStoreID)
1294
- self._write([('INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)',
1295
- (fileStoreID, cachedPath, self.getGlobalFileSize(fileStoreID), 'downloading', me))])
1495
+ logger.debug("Trying to make file record for id %s", fileStoreID)
1496
+ self._write(
1497
+ [
1498
+ (
1499
+ "INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)",
1500
+ (
1501
+ fileStoreID,
1502
+ cachedPath,
1503
+ self.getGlobalFileSize(fileStoreID),
1504
+ "downloading",
1505
+ me,
1506
+ ),
1507
+ )
1508
+ ]
1509
+ )
1296
1510
 
1297
1511
  # See if we won the race
1298
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?', (fileStoreID, 'downloading', me))
1512
+ self._read(
1513
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1514
+ (fileStoreID, "downloading", me),
1515
+ )
1299
1516
  if self.cur.fetchone()[0] > 0:
1300
1517
  # We are responsible for downloading the file
1301
- logger.debug('We are now responsible for downloading file %s', fileStoreID)
1518
+ logger.debug(
1519
+ "We are now responsible for downloading file %s", fileStoreID
1520
+ )
1302
1521
 
1303
1522
  # Make sure we have space for this download.
1304
1523
  self._freeUpSpace()
@@ -1313,37 +1532,65 @@ class CachingFileStore(AbstractFileStore):
1313
1532
  # two readers, one cached copy, and space for two copies total.
1314
1533
 
1315
1534
  # Make the copying reference
1316
- self._write([('INSERT INTO refs VALUES (?, ?, ?, ?)',
1317
- (localFilePath, fileStoreID, readerID, 'copying'))])
1535
+ self._write(
1536
+ [
1537
+ (
1538
+ "INSERT INTO refs VALUES (?, ?, ?, ?)",
1539
+ (localFilePath, fileStoreID, readerID, "copying"),
1540
+ )
1541
+ ]
1542
+ )
1318
1543
 
1319
1544
  # Fulfill it with a full copy or by giving away the cached copy
1320
- self._fulfillCopyingReference(fileStoreID, cachedPath, localFilePath)
1545
+ self._fulfillCopyingReference(
1546
+ fileStoreID, cachedPath, localFilePath
1547
+ )
1321
1548
 
1322
1549
  # Now we're done
1323
1550
  return localFilePath
1324
1551
 
1325
1552
  else:
1326
- logger.debug('Someone else is already responsible for file %s', fileStoreID)
1553
+ logger.debug(
1554
+ "Someone else is already responsible for file %s", fileStoreID
1555
+ )
1327
1556
 
1328
1557
  # A record already existed for this file.
1329
1558
  # Try and create an immutable or copying reference to an entry that
1330
1559
  # is in 'cached' or 'uploadable' or 'uploading' state.
1331
1560
  # It might be uploading because *we* are supposed to be uploading it.
1332
- logger.debug('Trying to make reference to file %s', fileStoreID)
1333
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
1334
- (localFilePath, readerID, 'copying', fileStoreID, 'cached', 'uploadable', 'uploading'))])
1561
+ logger.debug("Trying to make reference to file %s", fileStoreID)
1562
+ self._write(
1563
+ [
1564
+ (
1565
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
1566
+ (
1567
+ localFilePath,
1568
+ readerID,
1569
+ "copying",
1570
+ fileStoreID,
1571
+ "cached",
1572
+ "uploadable",
1573
+ "uploading",
1574
+ ),
1575
+ )
1576
+ ]
1577
+ )
1335
1578
 
1336
1579
  # See if we got it
1337
- self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (localFilePath, fileStoreID))
1580
+ self._read(
1581
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
1582
+ (localFilePath, fileStoreID),
1583
+ )
1338
1584
  if self.cur.fetchone()[0] > 0:
1339
1585
  # The file is cached and we can copy or link it
1340
- logger.debug('Obtained reference to file %s', fileStoreID)
1586
+ logger.debug("Obtained reference to file %s", fileStoreID)
1341
1587
 
1342
1588
  # Get the path it is actually at in the cache, instead of where we wanted to put it
1343
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1589
+ for row in self._read(
1590
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1591
+ ):
1344
1592
  cachedPath = row[0]
1345
1593
 
1346
-
1347
1594
  while self.getCacheAvailable() < 0:
1348
1595
  # Since we now have a copying reference, see if we have used too much space.
1349
1596
  # If so, try to free up some space by deleting or uploading, but
@@ -1356,15 +1603,23 @@ class CachingFileStore(AbstractFileStore):
1356
1603
 
1357
1604
  # See if we have no other references and we can give away the file.
1358
1605
  # Change it to downloading owned by us if we can grab it.
1359
- self._write([("""
1606
+ self._write(
1607
+ [
1608
+ (
1609
+ """
1360
1610
  UPDATE files SET files.owner = ?, files.state = ? WHERE files.id = ? AND files.state = ?
1361
1611
  AND files.owner IS NULL AND NOT EXISTS (
1362
1612
  SELECT NULL FROM refs WHERE refs.file_id = files.id AND refs.state != 'mutable'
1363
1613
  )
1364
1614
  """,
1365
- (me, 'downloading', fileStoreID, 'cached'))])
1366
-
1367
- if self._giveAwayDownloadingFile(fileStoreID, cachedPath, localFilePath):
1615
+ (me, "downloading", fileStoreID, "cached"),
1616
+ )
1617
+ ]
1618
+ )
1619
+
1620
+ if self._giveAwayDownloadingFile(
1621
+ fileStoreID, cachedPath, localFilePath
1622
+ ):
1368
1623
  # We got ownership of the file and managed to give it away.
1369
1624
  return localFilePath
1370
1625
 
@@ -1385,14 +1640,23 @@ class CachingFileStore(AbstractFileStore):
1385
1640
  atomic_copy(cachedPath, localFilePath)
1386
1641
 
1387
1642
  # Change the reference to mutable
1388
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID))])
1643
+ self._write(
1644
+ [
1645
+ (
1646
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1647
+ ("mutable", localFilePath, fileStoreID),
1648
+ )
1649
+ ]
1650
+ )
1389
1651
 
1390
1652
  # Now we're done
1391
1653
  return localFilePath
1392
1654
 
1393
1655
  else:
1394
1656
  # We didn't get a reference. Maybe it is still downloading.
1395
- logger.debug('Could not obtain reference to file %s', fileStoreID)
1657
+ logger.debug(
1658
+ "Could not obtain reference to file %s", fileStoreID
1659
+ )
1396
1660
 
1397
1661
  # Loop around again and see if either we can download it or we can get a reference to it.
1398
1662
 
@@ -1432,8 +1696,14 @@ class CachingFileStore(AbstractFileStore):
1432
1696
  # Expose this file as cached so other people can copy off of it too.
1433
1697
 
1434
1698
  # Change state from downloading to cached
1435
- self._write([('UPDATE files SET state = ?, owner = NULL WHERE id = ?',
1436
- ('cached', fileStoreID))])
1699
+ self._write(
1700
+ [
1701
+ (
1702
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
1703
+ ("cached", fileStoreID),
1704
+ )
1705
+ ]
1706
+ )
1437
1707
 
1438
1708
  if self.forceDownloadDelay is not None:
1439
1709
  # Wait around to simulate a big file for testing
@@ -1443,12 +1713,18 @@ class CachingFileStore(AbstractFileStore):
1443
1713
  atomic_copy(cachedPath, localFilePath)
1444
1714
 
1445
1715
  # Change our reference to mutable
1446
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID))])
1716
+ self._write(
1717
+ [
1718
+ (
1719
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1720
+ ("mutable", localFilePath, fileStoreID),
1721
+ )
1722
+ ]
1723
+ )
1447
1724
 
1448
1725
  # Now we're done
1449
1726
  return
1450
1727
 
1451
-
1452
1728
  def _giveAwayDownloadingFile(self, fileStoreID, cachedPath, localFilePath):
1453
1729
  """
1454
1730
  Move a downloaded file in 'downloading' state, owned by us, from the cache to a user-specified destination path.
@@ -1468,8 +1744,10 @@ class CachingFileStore(AbstractFileStore):
1468
1744
  with self.as_process() as me:
1469
1745
 
1470
1746
  # See if we actually own this file and can giove it away
1471
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?',
1472
- (fileStoreID, 'downloading', me))
1747
+ self._read(
1748
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1749
+ (fileStoreID, "downloading", me),
1750
+ )
1473
1751
  if self.cur.fetchone()[0] > 0:
1474
1752
  # Now we have exclusive control of the cached copy of the file, so we can give it away.
1475
1753
 
@@ -1478,8 +1756,15 @@ class CachingFileStore(AbstractFileStore):
1478
1756
  # We are giving it away
1479
1757
  shutil.move(cachedPath, localFilePath)
1480
1758
  # Record that.
1481
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('mutable', localFilePath, fileStoreID)),
1482
- ('DELETE FROM files WHERE id = ?', (fileStoreID,))])
1759
+ self._write(
1760
+ [
1761
+ (
1762
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1763
+ ("mutable", localFilePath, fileStoreID),
1764
+ ),
1765
+ ("DELETE FROM files WHERE id = ?", (fileStoreID,)),
1766
+ ]
1767
+ )
1483
1768
 
1484
1769
  # Now we're done
1485
1770
  return True
@@ -1504,7 +1789,9 @@ class CachingFileStore(AbstractFileStore):
1504
1789
  :rtype: bool
1505
1790
  """
1506
1791
 
1507
- assert os.path.exists(cachedPath), "Cannot create link to missing cache file %s" % cachedPath
1792
+ assert os.path.exists(cachedPath), (
1793
+ "Cannot create link to missing cache file %s" % cachedPath
1794
+ )
1508
1795
 
1509
1796
  try:
1510
1797
  # Try and make the hard link.
@@ -1546,17 +1833,46 @@ class CachingFileStore(AbstractFileStore):
1546
1833
  # Try and create a downloading entry if no entry exists.
1547
1834
  # Make sure to create a reference at the same time if it succeeds, to bill it against our job's space.
1548
1835
  # Don't create the mutable reference yet because we might not necessarily be able to clear that space.
1549
- logger.debug('Trying to make file downloading file record and reference for id %s', fileStoreID)
1550
- self._write([('INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)',
1551
- (fileStoreID, cachedPath, self.getGlobalFileSize(fileStoreID), 'downloading', me)),
1552
- ('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND state = ? AND owner = ?',
1553
- (localFilePath, readerID, 'immutable', fileStoreID, 'downloading', me))])
1836
+ logger.debug(
1837
+ "Trying to make file downloading file record and reference for id %s",
1838
+ fileStoreID,
1839
+ )
1840
+ self._write(
1841
+ [
1842
+ (
1843
+ "INSERT OR IGNORE INTO files VALUES (?, ?, ?, ?, ?)",
1844
+ (
1845
+ fileStoreID,
1846
+ cachedPath,
1847
+ self.getGlobalFileSize(fileStoreID),
1848
+ "downloading",
1849
+ me,
1850
+ ),
1851
+ ),
1852
+ (
1853
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND state = ? AND owner = ?",
1854
+ (
1855
+ localFilePath,
1856
+ readerID,
1857
+ "immutable",
1858
+ fileStoreID,
1859
+ "downloading",
1860
+ me,
1861
+ ),
1862
+ ),
1863
+ ]
1864
+ )
1554
1865
 
1555
1866
  # See if we won the race
1556
- self._read('SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?', (fileStoreID, 'downloading', me))
1867
+ self._read(
1868
+ "SELECT COUNT(*) FROM files WHERE id = ? AND state = ? AND owner = ?",
1869
+ (fileStoreID, "downloading", me),
1870
+ )
1557
1871
  if self.cur.fetchone()[0] > 0:
1558
1872
  # We are responsible for downloading the file (and we have the reference)
1559
- logger.debug('We are now responsible for downloading file %s', fileStoreID)
1873
+ logger.debug(
1874
+ "We are now responsible for downloading file %s", fileStoreID
1875
+ )
1560
1876
 
1561
1877
  # Make sure we have space for this download.
1562
1878
  self._freeUpSpace()
@@ -1570,8 +1886,14 @@ class CachingFileStore(AbstractFileStore):
1570
1886
  # We made the link!
1571
1887
 
1572
1888
  # Change file state from downloading to cached so other people can use it
1573
- self._write([('UPDATE files SET state = ?, owner = NULL WHERE id = ?',
1574
- ('cached', fileStoreID))])
1889
+ self._write(
1890
+ [
1891
+ (
1892
+ "UPDATE files SET state = ?, owner = NULL WHERE id = ?",
1893
+ ("cached", fileStoreID),
1894
+ )
1895
+ ]
1896
+ )
1575
1897
 
1576
1898
  # Now we're done!
1577
1899
  return localFilePath
@@ -1579,36 +1901,69 @@ class CachingFileStore(AbstractFileStore):
1579
1901
  # We could not make a link. We need to make a copy.
1580
1902
 
1581
1903
  # Change the reference to copying.
1582
- self._write([('UPDATE refs SET state = ? WHERE path = ? AND file_id = ?', ('copying', localFilePath, fileStoreID))])
1904
+ self._write(
1905
+ [
1906
+ (
1907
+ "UPDATE refs SET state = ? WHERE path = ? AND file_id = ?",
1908
+ ("copying", localFilePath, fileStoreID),
1909
+ )
1910
+ ]
1911
+ )
1583
1912
 
1584
1913
  # Fulfill it with a full copy or by giving away the cached copy
1585
- self._fulfillCopyingReference(fileStoreID, cachedPath, localFilePath)
1914
+ self._fulfillCopyingReference(
1915
+ fileStoreID, cachedPath, localFilePath
1916
+ )
1586
1917
 
1587
1918
  # Now we're done
1588
1919
  return localFilePath
1589
1920
 
1590
1921
  else:
1591
- logger.debug('We already have an entry in the cache database for file %s', fileStoreID)
1922
+ logger.debug(
1923
+ "We already have an entry in the cache database for file %s",
1924
+ fileStoreID,
1925
+ )
1592
1926
 
1593
1927
  # A record already existed for this file.
1594
1928
  # Try and create an immutable reference to an entry that
1595
1929
  # is in 'cached' or 'uploadable' or 'uploading' state.
1596
1930
  # It might be uploading because *we* are supposed to be uploading it.
1597
- logger.debug('Trying to make reference to file %s', fileStoreID)
1598
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)',
1599
- (localFilePath, readerID, 'immutable', fileStoreID, 'cached', 'uploadable', 'uploading'))])
1931
+ logger.debug("Trying to make reference to file %s", fileStoreID)
1932
+ self._write(
1933
+ [
1934
+ (
1935
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ? OR state = ?)",
1936
+ (
1937
+ localFilePath,
1938
+ readerID,
1939
+ "immutable",
1940
+ fileStoreID,
1941
+ "cached",
1942
+ "uploadable",
1943
+ "uploading",
1944
+ ),
1945
+ )
1946
+ ]
1947
+ )
1600
1948
 
1601
1949
  # See if we got it
1602
- self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (localFilePath, fileStoreID))
1950
+ self._read(
1951
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
1952
+ (localFilePath, fileStoreID),
1953
+ )
1603
1954
  if self.cur.fetchone()[0] > 0:
1604
1955
  # The file is cached and we can copy or link it
1605
- logger.debug('Obtained reference to file %s', fileStoreID)
1956
+ logger.debug("Obtained reference to file %s", fileStoreID)
1606
1957
 
1607
1958
  # Get the path it is actually at in the cache, instead of where we wanted to put it
1608
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
1959
+ for row in self._read(
1960
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
1961
+ ):
1609
1962
  cachedPath = row[0]
1610
1963
 
1611
- if self._createLinkFromCache(cachedPath, localFilePath, symlink):
1964
+ if self._createLinkFromCache(
1965
+ cachedPath, localFilePath, symlink
1966
+ ):
1612
1967
  # We managed to make the link
1613
1968
  return localFilePath
1614
1969
  else:
@@ -1620,11 +1975,22 @@ class CachingFileStore(AbstractFileStore):
1620
1975
  # we already have code for that for mutable downloads,
1621
1976
  # so just clear the reference and download mutably.
1622
1977
 
1623
- self._write([('DELETE FROM refs WHERE path = ? AND file_id = ?', (localFilePath, fileStoreID))])
1624
-
1625
- return self._readGlobalFileMutablyWithCache(fileStoreID, localFilePath, readerID)
1978
+ self._write(
1979
+ [
1980
+ (
1981
+ "DELETE FROM refs WHERE path = ? AND file_id = ?",
1982
+ (localFilePath, fileStoreID),
1983
+ )
1984
+ ]
1985
+ )
1986
+
1987
+ return self._readGlobalFileMutablyWithCache(
1988
+ fileStoreID, localFilePath, readerID
1989
+ )
1626
1990
  else:
1627
- logger.debug('Could not obtain reference to file %s', fileStoreID)
1991
+ logger.debug(
1992
+ "Could not obtain reference to file %s", fileStoreID
1993
+ )
1628
1994
 
1629
1995
  # If we didn't get a download or a reference, adopt and do work from dead workers and loop again.
1630
1996
  # We may have to wait for someone else's download or delete to
@@ -1640,7 +2006,12 @@ class CachingFileStore(AbstractFileStore):
1640
2006
  time.sleep(self.contentionBackoff)
1641
2007
 
1642
2008
  @contextmanager
1643
- def _with_copying_reference_to_upload(self, file_store_id: FileID, reader_id: str, local_file_path: Optional[str] = None) -> Generator:
2009
+ def _with_copying_reference_to_upload(
2010
+ self,
2011
+ file_store_id: FileID,
2012
+ reader_id: str,
2013
+ local_file_path: Optional[str] = None,
2014
+ ) -> Generator:
1644
2015
  """
1645
2016
  Get a context manager that gives you either the local file path for a
1646
2017
  copyuing reference to the given file, or None if that file is not in an
@@ -1662,12 +2033,28 @@ class CachingFileStore(AbstractFileStore):
1662
2033
  local_file_path = self.getLocalTempFileName()
1663
2034
 
1664
2035
  # Try and make a 'copying' reference to such a file
1665
- self._write([('INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ?)',
1666
- (local_file_path, reader_id, 'copying', file_store_id, 'uploadable', 'uploading'))])
2036
+ self._write(
2037
+ [
2038
+ (
2039
+ "INSERT INTO refs SELECT ?, id, ?, ? FROM files WHERE id = ? AND (state = ? OR state = ?)",
2040
+ (
2041
+ local_file_path,
2042
+ reader_id,
2043
+ "copying",
2044
+ file_store_id,
2045
+ "uploadable",
2046
+ "uploading",
2047
+ ),
2048
+ )
2049
+ ]
2050
+ )
1667
2051
 
1668
2052
  # See if we got it
1669
2053
  have_reference = False
1670
- for row in self._read('SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?', (local_file_path, file_store_id)):
2054
+ for row in self._read(
2055
+ "SELECT COUNT(*) FROM refs WHERE path = ? and file_id = ?",
2056
+ (local_file_path, file_store_id),
2057
+ ):
1671
2058
  have_reference = row[0] > 0
1672
2059
 
1673
2060
  if have_reference:
@@ -1676,8 +2063,14 @@ class CachingFileStore(AbstractFileStore):
1676
2063
  yield local_file_path
1677
2064
  finally:
1678
2065
  # Clean up the reference if it is unmodified
1679
- self._write([('DELETE FROM refs WHERE path = ? AND file_id = ? AND state = ?',
1680
- (local_file_path, file_store_id, 'copying'))])
2066
+ self._write(
2067
+ [
2068
+ (
2069
+ "DELETE FROM refs WHERE path = ? AND file_id = ? AND state = ?",
2070
+ (local_file_path, file_store_id, "copying"),
2071
+ )
2072
+ ]
2073
+ )
1681
2074
  else:
1682
2075
  # No reference was obtained.
1683
2076
  yield None
@@ -1686,11 +2079,13 @@ class CachingFileStore(AbstractFileStore):
1686
2079
  def readGlobalFileStream(self, fileStoreID, encoding=None, errors=None):
1687
2080
  if str(fileStoreID) in self.filesToDelete:
1688
2081
  # File has already been deleted
1689
- raise FileNotFoundError(f'Attempted to read deleted file: {fileStoreID}')
2082
+ raise FileNotFoundError(f"Attempted to read deleted file: {fileStoreID}")
1690
2083
 
1691
2084
  self.logAccess(fileStoreID)
1692
2085
 
1693
- with self._with_copying_reference_to_upload(fileStoreID, self.jobDesc.jobStoreID) as ref_path:
2086
+ with self._with_copying_reference_to_upload(
2087
+ fileStoreID, self.jobDesc.jobStoreID
2088
+ ) as ref_path:
1694
2089
  # Try and grab a reference to the file if it is being uploaded.
1695
2090
  if ref_path is not None:
1696
2091
  # We have an update in the cache that isn't written back yet.
@@ -1699,11 +2094,16 @@ class CachingFileStore(AbstractFileStore):
1699
2094
  # The ref file is not actually copied to; find the actual file
1700
2095
  # in the cache
1701
2096
  cached_path = None
1702
- for row in self._read('SELECT path FROM files WHERE id = ?', (fileStoreID,)):
2097
+ for row in self._read(
2098
+ "SELECT path FROM files WHERE id = ?", (fileStoreID,)
2099
+ ):
1703
2100
  cached_path = row[0]
1704
2101
 
1705
2102
  if cached_path is None:
1706
- raise RuntimeError('File %s went away while we had a reference to it!' % fileStoreID)
2103
+ raise RuntimeError(
2104
+ "File %s went away while we had a reference to it!"
2105
+ % fileStoreID
2106
+ )
1707
2107
 
1708
2108
  with open(cached_path, encoding=encoding, errors=errors) as result:
1709
2109
  # Pass along the results of the open context manager on the
@@ -1714,7 +2114,9 @@ class CachingFileStore(AbstractFileStore):
1714
2114
  else:
1715
2115
  # No local update, so we can stream from the job store
1716
2116
  # TODO: Maybe stream from cache even when not required for consistency?
1717
- with self.jobStore.read_file_stream(fileStoreID, encoding=encoding, errors=errors) as result:
2117
+ with self.jobStore.read_file_stream(
2118
+ fileStoreID, encoding=encoding, errors=errors
2119
+ ) as result:
1718
2120
  yield result
1719
2121
 
1720
2122
  def deleteLocalFile(self, fileStoreID):
@@ -1727,7 +2129,10 @@ class CachingFileStore(AbstractFileStore):
1727
2129
  # missing ref file, we will raise an error about it and stop deleting
1728
2130
  # things.
1729
2131
  missingFile = None
1730
- for row in self._read('SELECT path FROM refs WHERE file_id = ? AND job_id = ?', (fileStoreID, jobID)):
2132
+ for row in self._read(
2133
+ "SELECT path FROM refs WHERE file_id = ? AND job_id = ?",
2134
+ (fileStoreID, jobID),
2135
+ ):
1731
2136
  # Delete all the files that are references to this cached file (even mutable copies)
1732
2137
  path = row[0]
1733
2138
 
@@ -1748,12 +2153,22 @@ class CachingFileStore(AbstractFileStore):
1748
2153
  if len(deleted) == 0 and not missingFile:
1749
2154
  # We have to tell the user if they tried to delete 0 local copies.
1750
2155
  # But if we found a missing local copy, go on to report that instead.
1751
- raise OSError(errno.ENOENT, f"Attempting to delete local copies of a file with none: {fileStoreID}")
2156
+ raise OSError(
2157
+ errno.ENOENT,
2158
+ f"Attempting to delete local copies of a file with none: {fileStoreID}",
2159
+ )
1752
2160
 
1753
2161
  for path in deleted:
1754
2162
  # Drop the references
1755
- self._write([('DELETE FROM refs WHERE file_id = ? AND job_id = ? AND path = ?', (fileStoreID, jobID, path))])
1756
- logger.debug('Deleted local file %s for global file %s', path, fileStoreID)
2163
+ self._write(
2164
+ [
2165
+ (
2166
+ "DELETE FROM refs WHERE file_id = ? AND job_id = ? AND path = ?",
2167
+ (fileStoreID, jobID, path),
2168
+ )
2169
+ ]
2170
+ )
2171
+ logger.debug("Deleted local file %s for global file %s", path, fileStoreID)
1757
2172
 
1758
2173
  # Now space has been revoked from the cache because that job needs its space back.
1759
2174
  # That might result in stuff having to be evicted.
@@ -1781,13 +2196,25 @@ class CachingFileStore(AbstractFileStore):
1781
2196
  with self.as_process() as me:
1782
2197
 
1783
2198
  # Make sure nobody else has references to it
1784
- for row in self._read('SELECT job_id FROM refs WHERE file_id = ? AND state != ?', (fileStoreID, 'mutable')):
1785
- raise RuntimeError(f'Deleted file ID {fileStoreID} which is still in use by job {row[0]}')
2199
+ for row in self._read(
2200
+ "SELECT job_id FROM refs WHERE file_id = ? AND state != ?",
2201
+ (fileStoreID, "mutable"),
2202
+ ):
2203
+ raise RuntimeError(
2204
+ f"Deleted file ID {fileStoreID} which is still in use by job {row[0]}"
2205
+ )
1786
2206
  # TODO: should we just let other jobs and the cache keep the file until
1787
2207
  # it gets evicted, and only delete at the back end?
1788
2208
 
1789
2209
  # Pop the file into deleting state owned by us if it exists
1790
- self._write([('UPDATE files SET state = ?, owner = ? WHERE id = ?', ('deleting', me, fileStoreID))])
2210
+ self._write(
2211
+ [
2212
+ (
2213
+ "UPDATE files SET state = ?, owner = ? WHERE id = ?",
2214
+ ("deleting", me, fileStoreID),
2215
+ )
2216
+ ]
2217
+ )
1791
2218
 
1792
2219
  # Finish the delete if the file is present
1793
2220
  self._executePendingDeletions()
@@ -1795,10 +2222,13 @@ class CachingFileStore(AbstractFileStore):
1795
2222
  # Add the file to the list of files to be deleted from the job store
1796
2223
  # once the run method completes.
1797
2224
  self.filesToDelete.add(str(fileStoreID))
1798
- self.log_to_leader('Added file with ID \'%s\' to the list of files to be' % fileStoreID +
1799
- ' globally deleted.', level=logging.DEBUG)
2225
+ self.log_to_leader(
2226
+ "Added file with ID '%s' to the list of files to be" % fileStoreID
2227
+ + " globally deleted.",
2228
+ level=logging.DEBUG,
2229
+ )
1800
2230
 
1801
- @deprecated(new_function_name='export_file')
2231
+ @deprecated(new_function_name="export_file")
1802
2232
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
1803
2233
  return self.export_file(jobStoreFileID, dstUrl)
1804
2234
 
@@ -1829,7 +2259,10 @@ class CachingFileStore(AbstractFileStore):
1829
2259
  # thread. It can do some destructor work after it finishes its real
1830
2260
  # work.
1831
2261
 
1832
- if self.commitThread is not None and self.commitThread is not threading.current_thread():
2262
+ if (
2263
+ self.commitThread is not None
2264
+ and self.commitThread is not threading.current_thread()
2265
+ ):
1833
2266
  self.commitThread.join()
1834
2267
 
1835
2268
  return True
@@ -1856,17 +2289,23 @@ class CachingFileStore(AbstractFileStore):
1856
2289
  # might be necessary for later jobs to see earlier jobs' deleted
1857
2290
  # before they are committed?
1858
2291
 
1859
- logger.debug('Starting commit of %s forked from %s', state_to_commit, self.jobDesc)
2292
+ logger.debug(
2293
+ "Starting commit of %s forked from %s", state_to_commit, self.jobDesc
2294
+ )
1860
2295
  # Make sure the deep copy isn't summoning ghosts of old job
1861
2296
  # versions. It must be as new or newer at this point.
1862
2297
  self.jobDesc.assert_is_not_newer_than(state_to_commit)
1863
2298
 
1864
2299
  # Bump the original's version since saving will do that too and we
1865
2300
  # don't want duplicate versions.
1866
- self.jobDesc.reserve_versions(1 if len(state_to_commit.filesToDelete) == 0 else 2)
2301
+ self.jobDesc.reserve_versions(
2302
+ 1 if len(state_to_commit.filesToDelete) == 0 else 2
2303
+ )
1867
2304
 
1868
2305
  # Start the commit thread
1869
- self.commitThread = threading.Thread(target=self.startCommitThread, args=(state_to_commit,))
2306
+ self.commitThread = threading.Thread(
2307
+ target=self.startCommitThread, args=(state_to_commit,)
2308
+ )
1870
2309
  self.commitThread.start()
1871
2310
 
1872
2311
  def startCommitThread(self, state_to_commit: Optional[JobDescription]):
@@ -1879,7 +2318,7 @@ class CachingFileStore(AbstractFileStore):
1879
2318
  self.waitForPreviousCommit()
1880
2319
 
1881
2320
  try:
1882
- logger.debug('Committing file uploads asynchronously')
2321
+ logger.debug("Committing file uploads asynchronously")
1883
2322
 
1884
2323
  # Finish all uploads
1885
2324
  self._executePendingUploads()
@@ -1889,7 +2328,10 @@ class CachingFileStore(AbstractFileStore):
1889
2328
  if state_to_commit is not None:
1890
2329
  # Do all the things that make this job not redoable
1891
2330
 
1892
- logger.debug('Committing file deletes and job state changes asynchronously from %s', state_to_commit)
2331
+ logger.debug(
2332
+ "Committing file deletes and job state changes asynchronously from %s",
2333
+ state_to_commit,
2334
+ )
1893
2335
 
1894
2336
  # Complete the job
1895
2337
  self.jobStore.update_job(state_to_commit)
@@ -1905,10 +2347,8 @@ class CachingFileStore(AbstractFileStore):
1905
2347
  self._terminateEvent.set()
1906
2348
  raise
1907
2349
 
1908
-
1909
-
1910
2350
  @classmethod
1911
- def shutdown(cls, shutdown_info: Tuple[str, str]) -> None:
2351
+ def shutdown(cls, shutdown_info: tuple[str, str]) -> None:
1912
2352
  """
1913
2353
  :param shutdown_info: Tuple of the coordination directory (where the
1914
2354
  cache database is) and the cache directory (where the cached data is).
@@ -1935,7 +2375,7 @@ class CachingFileStore(AbstractFileStore):
1935
2375
  # So we just go and find the cache-n.db with the largest n value,
1936
2376
  # and use that.
1937
2377
  dbFilename = None
1938
- dbAttempt = float('-inf')
2378
+ dbAttempt = float("-inf")
1939
2379
 
1940
2380
  # We also need to remember all the plausible database files and
1941
2381
  # journals
@@ -1943,12 +2383,15 @@ class CachingFileStore(AbstractFileStore):
1943
2383
 
1944
2384
  for dbCandidate in os.listdir(coordination_dir):
1945
2385
  # For each thing in the coordination directory, see if it starts like a database file.
1946
- match = re.match('^cache-([0-9]+).db.*', dbCandidate)
2386
+ match = re.match("^cache-([0-9]+).db.*", dbCandidate)
1947
2387
  if match:
1948
2388
  # This is caching-related.
1949
2389
  all_db_files.append(dbCandidate)
1950
2390
  attempt_number = int(match.group(1))
1951
- if attempt_number > dbAttempt and dbCandidate == f"cache-{attempt_number}.db":
2391
+ if (
2392
+ attempt_number > dbAttempt
2393
+ and dbCandidate == f"cache-{attempt_number}.db"
2394
+ ):
1952
2395
  # This is a main database, and the newest we have seen.
1953
2396
  dbFilename = dbCandidate
1954
2397
  dbAttempt = attempt_number
@@ -1956,7 +2399,9 @@ class CachingFileStore(AbstractFileStore):
1956
2399
  if dbFilename is not None:
1957
2400
  # We found a caching database
1958
2401
 
1959
- logger.debug('Connecting to latest caching database %s for cleanup', dbFilename)
2402
+ logger.debug(
2403
+ "Connecting to latest caching database %s for cleanup", dbFilename
2404
+ )
1960
2405
 
1961
2406
  dbPath = os.path.join(coordination_dir, dbFilename)
1962
2407
 
@@ -1980,7 +2425,7 @@ class CachingFileStore(AbstractFileStore):
1980
2425
 
1981
2426
  con.close()
1982
2427
  else:
1983
- logger.debug('No caching database found in %s', dir_)
2428
+ logger.debug("No caching database found in %s", dir_)
1984
2429
 
1985
2430
  # Whether or not we found a database, we need to clean up the cache
1986
2431
  # directory. Delete everything cached.
@@ -2017,7 +2462,9 @@ class CachingFileStore(AbstractFileStore):
2017
2462
 
2018
2463
  # Get all the dead worker PIDs
2019
2464
  workers = []
2020
- for row in cls._static_read(cur, 'SELECT DISTINCT worker FROM jobs WHERE worker IS NOT NULL'):
2465
+ for row in cls._static_read(
2466
+ cur, "SELECT DISTINCT worker FROM jobs WHERE worker IS NOT NULL"
2467
+ ):
2021
2468
  workers.append(row[0])
2022
2469
 
2023
2470
  # Work out which of them are not currently running.
@@ -2030,14 +2477,18 @@ class CachingFileStore(AbstractFileStore):
2030
2477
  # Now we know which workers are dead.
2031
2478
  # Clear them off of the jobs they had.
2032
2479
  for deadWorker in deadWorkers:
2033
- cls._static_write(con, cur, [('UPDATE jobs SET worker = NULL WHERE worker = ?', (deadWorker,))])
2480
+ cls._static_write(
2481
+ con,
2482
+ cur,
2483
+ [("UPDATE jobs SET worker = NULL WHERE worker = ?", (deadWorker,))],
2484
+ )
2034
2485
  if len(deadWorkers) > 0:
2035
- logger.debug('Reaped %d dead workers', len(deadWorkers))
2486
+ logger.debug("Reaped %d dead workers", len(deadWorkers))
2036
2487
 
2037
2488
  while True:
2038
2489
  # Find an unowned job.
2039
2490
  # Don't take all of them; other people could come along and want to help us with the other jobs.
2040
- cls._static_read(cur, 'SELECT id FROM jobs WHERE worker IS NULL LIMIT 1')
2491
+ cls._static_read(cur, "SELECT id FROM jobs WHERE worker IS NULL LIMIT 1")
2041
2492
  row = cur.fetchone()
2042
2493
  if row is None:
2043
2494
  # We cleaned up all the jobs
@@ -2046,10 +2497,23 @@ class CachingFileStore(AbstractFileStore):
2046
2497
  jobID = row[0]
2047
2498
 
2048
2499
  # Try to own this job
2049
- cls._static_write(con, cur, [('UPDATE jobs SET worker = ? WHERE id = ? AND worker IS NULL', (me, jobID))])
2500
+ cls._static_write(
2501
+ con,
2502
+ cur,
2503
+ [
2504
+ (
2505
+ "UPDATE jobs SET worker = ? WHERE id = ? AND worker IS NULL",
2506
+ (me, jobID),
2507
+ )
2508
+ ],
2509
+ )
2050
2510
 
2051
2511
  # See if we won the race
2052
- cls._static_read(cur, 'SELECT id, tempdir FROM jobs WHERE id = ? AND worker = ?', (jobID, me))
2512
+ cls._static_read(
2513
+ cur,
2514
+ "SELECT id, tempdir FROM jobs WHERE id = ? AND worker = ?",
2515
+ (jobID, me),
2516
+ )
2053
2517
  row = cur.fetchone()
2054
2518
  if row is None:
2055
2519
  # We didn't win the race. Try another one.
@@ -2058,6 +2522,6 @@ class CachingFileStore(AbstractFileStore):
2058
2522
  # If we did win, delete the job and its files and temp dir
2059
2523
  cls._removeJob(con, cur, jobID)
2060
2524
 
2061
- logger.debug('Cleaned up orphaned job %s', jobID)
2525
+ logger.debug("Cleaned up orphaned job %s", jobID)
2062
2526
 
2063
2527
  # Now we have cleaned up all the jobs that belonged to dead workers that were dead when we entered this function.