toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -19,30 +19,29 @@ import random
19
19
  import re
20
20
  import shutil
21
21
  import stat
22
- import sys
23
22
  import time
24
23
  import uuid
24
+ from collections.abc import Iterable, Iterator
25
25
  from contextlib import contextmanager
26
- from typing import IO, Iterable, Iterator, List, Optional, Union, overload
26
+ from typing import IO, Literal, Optional, Union, overload
27
27
  from urllib.parse import ParseResult, quote, unquote
28
28
 
29
- if sys.version_info >= (3, 8):
30
- from typing import Literal
31
- else:
32
- from typing_extensions import Literal
33
-
34
29
  from toil.fileStores import FileID
35
30
  from toil.job import TemporaryID
36
- from toil.jobStores.abstractJobStore import (AbstractJobStore,
37
- JobStoreExistsException,
38
- NoSuchFileException,
39
- NoSuchJobException,
40
- NoSuchJobStoreException)
41
- from toil.lib.io import (AtomicFileCreate,
42
- atomic_copy,
43
- atomic_copyobj,
44
- mkdtemp,
45
- robust_rmtree)
31
+ from toil.jobStores.abstractJobStore import (
32
+ AbstractJobStore,
33
+ JobStoreExistsException,
34
+ NoSuchFileException,
35
+ NoSuchJobException,
36
+ NoSuchJobStoreException,
37
+ )
38
+ from toil.lib.io import (
39
+ AtomicFileCreate,
40
+ atomic_copy,
41
+ atomic_copyobj,
42
+ mkdtemp,
43
+ robust_rmtree,
44
+ )
46
45
 
47
46
  logger = logging.getLogger(__name__)
48
47
 
@@ -59,14 +58,19 @@ class FileJobStore(AbstractJobStore):
59
58
 
60
59
  # What prefix should be on the per-job job directories, to distinguish them
61
60
  # from the spray directories?
62
- JOB_DIR_PREFIX = 'instance-'
61
+ JOB_DIR_PREFIX = "instance-"
63
62
 
64
63
  # What prefix do we put on the per-job-name directories we sort jobs into?
65
- JOB_NAME_DIR_PREFIX = 'kind-'
64
+ JOB_NAME_DIR_PREFIX = "kind-"
66
65
 
67
66
  # 10Mb RAM chunks when reading/writing files
68
67
  BUFFER_SIZE = 10485760 # 10Mb
69
68
 
69
+ # When a log file is still being written, what will its name end with?
70
+ LOG_TEMP_SUFFIX = ".new"
71
+ # All log files start with this prefix
72
+ LOG_PREFIX = "stats"
73
+
70
74
  def default_caching(self) -> bool:
71
75
  """
72
76
  Jobstore's preference as to whether it likes caching or doesn't care about it.
@@ -88,47 +92,58 @@ class FileJobStore(AbstractJobStore):
88
92
  logger.debug("Path to job store directory is '%s'.", self.jobStoreDir)
89
93
 
90
94
  # Directory where actual job files go, and their job-associated temp files
91
- self.jobsDir = os.path.join(self.jobStoreDir, 'jobs')
95
+ self.jobsDir = os.path.join(self.jobStoreDir, "jobs")
92
96
  # Directory where stats files go
93
- self.statsDir = os.path.join(self.jobStoreDir, 'stats')
97
+ self.statsDir = os.path.join(self.jobStoreDir, "stats")
98
+ # Which has subdirectories for new and seen stats files
99
+ self.stats_inbox = os.path.join(self.statsDir, "inbox")
100
+ self.stats_archive = os.path.join(self.statsDir, "archive")
94
101
  # Directory where non-job-associated files for the file store go
95
- self.filesDir = os.path.join(self.jobStoreDir, 'files/no-job')
102
+ self.filesDir = os.path.join(self.jobStoreDir, "files/no-job")
96
103
  # Directory where job-associated files for the file store go.
97
104
  # Each per-job directory in here will have separate directories for
98
105
  # files to clean up and files to not clean up when the job is deleted.
99
- self.jobFilesDir = os.path.join(self.jobStoreDir, 'files/for-job')
106
+ self.jobFilesDir = os.path.join(self.jobStoreDir, "files/for-job")
100
107
  # Directory where shared files go
101
- self.sharedFilesDir = os.path.join(self.jobStoreDir, 'files/shared')
108
+ self.sharedFilesDir = os.path.join(self.jobStoreDir, "files/shared")
102
109
 
103
110
  self.fanOut = fanOut
104
111
 
105
112
  self.linkImports = None
106
113
  self.moveExports = None
114
+ self.symlink_job_store_reads = None
107
115
 
108
116
  def __repr__(self):
109
- return f'FileJobStore({self.jobStoreDir})'
117
+ return f"FileJobStore({self.jobStoreDir})"
110
118
 
111
119
  def initialize(self, config):
112
120
  try:
113
121
  os.mkdir(self.jobStoreDir)
114
122
  except OSError as e:
115
123
  if e.errno == errno.EEXIST:
116
- raise JobStoreExistsException(self.jobStoreDir)
124
+ raise JobStoreExistsException(self.jobStoreDir, "file")
117
125
  else:
118
126
  raise
119
127
  os.makedirs(self.jobsDir, exist_ok=True)
120
128
  os.makedirs(self.statsDir, exist_ok=True)
129
+ os.makedirs(self.stats_inbox, exist_ok=True)
130
+ os.makedirs(self.stats_archive, exist_ok=True)
121
131
  os.makedirs(self.filesDir, exist_ok=True)
122
132
  os.makedirs(self.jobFilesDir, exist_ok=True)
123
133
  os.makedirs(self.sharedFilesDir, exist_ok=True)
124
134
  self.linkImports = config.symlinkImports
125
135
  self.moveExports = config.moveOutputs
136
+ self.symlink_job_store_reads = config.symlink_job_store_reads
126
137
  super().initialize(config)
127
138
 
128
139
  def resume(self):
129
140
  if not os.path.isdir(self.jobStoreDir):
130
- raise NoSuchJobStoreException(self.jobStoreDir)
141
+ raise NoSuchJobStoreException(self.jobStoreDir, "file")
131
142
  super().resume()
143
+ # TODO: Unify with initialize() configuration
144
+ self.linkImports = self.config.symlinkImports
145
+ self.moveExports = self.config.moveOutputs
146
+ self.symlink_job_store_reads = self.config.symlink_job_store_reads
132
147
 
133
148
  def destroy(self):
134
149
  if os.path.exists(self.jobStoreDir):
@@ -147,8 +162,10 @@ class FileJobStore(AbstractJobStore):
147
162
 
148
163
  # Make a unique temp directory under a directory for this job name,
149
164
  # possibly sprayed across multiple levels of subdirectories.
150
- absJobDir = mkdtemp(prefix=self.JOB_DIR_PREFIX,
151
- dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename))
165
+ absJobDir = mkdtemp(
166
+ prefix=self.JOB_DIR_PREFIX,
167
+ dir=self._get_arbitrary_jobs_dir_for_name(usefulFilename),
168
+ )
152
169
 
153
170
  job_description.jobStoreID = self._get_job_id_from_dir(absJobDir)
154
171
 
@@ -174,7 +191,9 @@ class FileJobStore(AbstractJobStore):
174
191
  Spin-wait and block for a job to appear before returning
175
192
  False if it does not.
176
193
  """
177
- return self._wait_for_file(self._get_job_file_name(jobStoreID), maxTries=maxTries, sleepTime=sleepTime)
194
+ return self._wait_for_file(
195
+ self._get_job_file_name(jobStoreID), maxTries=maxTries, sleepTime=sleepTime
196
+ )
178
197
 
179
198
  def _wait_for_file(self, fileName, maxTries=35, sleepTime=1):
180
199
  """
@@ -192,14 +211,18 @@ class FileJobStore(AbstractJobStore):
192
211
  In practice, the need for retries happens rarely, but it does happen
193
212
  over the course of large workflows with a jobStore on a busy NFS.
194
213
  """
195
- for iTry in range(1,maxTries+1):
214
+ for iTry in range(1, maxTries + 1):
196
215
  if os.path.exists(fileName):
197
216
  return True
198
217
  if iTry >= maxTries:
199
218
  return False
200
219
  elif iTry == 1:
201
- logger.warning(("Path `{}` does not exist (yet). We will try #{} more times with {}s "
202
- "intervals.").format(fileName, maxTries - iTry, sleepTime))
220
+ logger.warning(
221
+ (
222
+ "Path `{}` does not exist (yet). We will try #{} more times with {}s "
223
+ "intervals."
224
+ ).format(fileName, maxTries - iTry, sleepTime)
225
+ )
203
226
  time.sleep(sleepTime)
204
227
  return False
205
228
 
@@ -210,7 +233,7 @@ class FileJobStore(AbstractJobStore):
210
233
  self._check_job_store_file_id(jobStoreFileID)
211
234
  jobStorePath = self._get_file_path_from_id(jobStoreFileID)
212
235
  if os.path.exists(jobStorePath):
213
- return 'file:' + jobStorePath
236
+ return "file:" + jobStorePath
214
237
  else:
215
238
  raise NoSuchFileException(jobStoreFileID)
216
239
 
@@ -218,7 +241,7 @@ class FileJobStore(AbstractJobStore):
218
241
  jobStorePath = os.path.join(self.sharedFilesDir, sharedFileName)
219
242
  if not os.path.exists(jobStorePath):
220
243
  raise NoSuchFileException(sharedFileName)
221
- return 'file:' + jobStorePath
244
+ return "file:" + jobStorePath
222
245
 
223
246
  def load_job(self, job_id):
224
247
  # If the job obviously doesn't exist, note that.
@@ -226,7 +249,7 @@ class FileJobStore(AbstractJobStore):
226
249
  # Try to load a valid version of the job.
227
250
  jobFile = self._get_job_file_name(job_id)
228
251
  try:
229
- with open(jobFile, 'rb') as fileHandle:
252
+ with open(jobFile, "rb") as fileHandle:
230
253
  job = pickle.load(fileHandle)
231
254
  except FileNotFoundError:
232
255
  # We were racing a delete on a non-POSIX-compliant filesystem.
@@ -248,7 +271,9 @@ class FileJobStore(AbstractJobStore):
248
271
 
249
272
  def update_job(self, job):
250
273
  assert job.jobStoreID is not None, f"Tried to update job {job} without an ID"
251
- assert not isinstance(job.jobStoreID, TemporaryID), f"Tried to update job {job} without an assigned ID"
274
+ assert not isinstance(
275
+ job.jobStoreID, TemporaryID
276
+ ), f"Tried to update job {job} without an assigned ID"
252
277
 
253
278
  job.pre_update_hook()
254
279
 
@@ -261,10 +286,11 @@ class FileJobStore(AbstractJobStore):
261
286
  # The file is then moved to its correct path.
262
287
  # Atomicity guarantees use the fact the underlying file system's "move"
263
288
  # function is atomic.
264
- with open(dest_filename + ".new", 'xb') as f:
289
+ with open(dest_filename + ".new", "xb") as f:
265
290
  pickle.dump(job, f)
266
291
  # This should be atomic for the file system
267
292
  os.rename(dest_filename + ".new", dest_filename)
293
+
268
294
  def delete_job(self, job_id):
269
295
  # The jobStoreID is the relative path to the directory containing the job,
270
296
  # removing this directory deletes the job.
@@ -296,48 +322,50 @@ class FileJobStore(AbstractJobStore):
296
322
  # Functions that deal with temporary files associated with jobs
297
323
  ##########################################
298
324
 
299
- @contextmanager
300
- def optional_hard_copy(self, hardlink):
301
- if hardlink:
302
- saved = self.linkImports
303
- self.linkImports = False
304
- yield
305
- if hardlink:
306
- self.linkImports = saved
307
-
308
- def _copy_or_link(self, src_path, dst_path, symlink=False):
325
+ def _copy_or_link(self, src_path, dst_path, hardlink=False, symlink=False):
309
326
  # linking is not done be default because of issue #1755
310
- srcPath = self._extract_path_from_url(src_path)
311
- if self.linkImports and symlink:
312
- os.symlink(os.path.realpath(srcPath), dst_path)
327
+ # TODO: is hardlinking ever actually done?
328
+ src_path = self._extract_path_from_url(src_path)
329
+ if self.linkImports and not hardlink and symlink:
330
+ os.symlink(os.path.realpath(src_path), dst_path)
313
331
  else:
314
- atomic_copy(srcPath, dst_path)
315
-
316
- def _import_file(self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True):
317
- # symlink argument says whether the caller can take symlinks or not
318
- # ex: if false, it implies the workflow cannot work with symlinks and thus will hardlink imports
332
+ atomic_copy(src_path, dst_path)
333
+
334
+ def _import_file(
335
+ self, otherCls, uri, shared_file_name=None, hardlink=False, symlink=True
336
+ ):
337
+ # symlink argument says whether the caller can take symlinks or not.
338
+ # ex: if false, it means the workflow cannot work with symlinks and we need to hardlink or copy.
339
+ # TODO: Do we ever actually hardlink?
319
340
  # default is true since symlinking everything is ideal
320
341
  uri_path = unquote(uri.path)
321
342
  if issubclass(otherCls, FileJobStore):
322
343
  if os.path.isdir(uri_path):
323
344
  # Don't allow directories (unless someone is racing us)
324
- raise IsADirectoryError(f"URI {uri} points to a directory but a file was expected")
345
+ raise IsADirectoryError(
346
+ f"URI {uri} points to a directory but a file was expected"
347
+ )
325
348
  if shared_file_name is None:
326
349
  executable = os.stat(uri_path).st_mode & stat.S_IXUSR != 0
327
- absPath = self._get_unique_file_path(uri_path) # use this to get a valid path to write to in job store
328
- with self.optional_hard_copy(hardlink):
329
- self._copy_or_link(uri, absPath, symlink=symlink)
350
+ # use this to get a valid path to write to in job store
351
+ absPath = self._get_unique_file_path(uri_path)
352
+ self._copy_or_link(uri, absPath, hardlink=hardlink, symlink=symlink)
330
353
  # TODO: os.stat(absPath).st_size consistently gives values lower than
331
354
  # getDirSizeRecursively()
332
- return FileID(self._get_file_id_from_path(absPath), os.stat(absPath).st_size, executable)
355
+ return FileID(
356
+ self._get_file_id_from_path(absPath),
357
+ os.stat(absPath).st_size,
358
+ executable,
359
+ )
333
360
  else:
334
361
  self._requireValidSharedFileName(shared_file_name)
335
362
  path = self._get_shared_file_path(shared_file_name)
336
- with self.optional_hard_copy(hardlink):
337
- self._copy_or_link(uri, path, symlink=symlink)
363
+ self._copy_or_link(uri, path, hardlink=hardlink, symlink=symlink)
338
364
  return None
339
365
  else:
340
- return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
366
+ return super()._import_file(
367
+ otherCls, uri, shared_file_name=shared_file_name
368
+ )
341
369
 
342
370
  def _export_file(self, otherCls, file_id, uri):
343
371
  if issubclass(otherCls, FileJobStore):
@@ -346,7 +374,7 @@ class FileJobStore(AbstractJobStore):
346
374
  # Make sure we don't need to worry about directories when exporting
347
375
  # to local files, just like for cloud storage.
348
376
  os.makedirs(os.path.dirname(destPath), exist_ok=True)
349
- executable = getattr(file_id, 'executable', False)
377
+ executable = getattr(file_id, "executable", False)
350
378
  if self.moveExports:
351
379
  self._move_and_linkback(srcPath, destPath, executable=executable)
352
380
  else:
@@ -355,7 +383,11 @@ class FileJobStore(AbstractJobStore):
355
383
  super()._default_export_file(otherCls, file_id, uri)
356
384
 
357
385
  def _move_and_linkback(self, srcPath, destPath, executable):
358
- logger.debug("moveExports option, Moving src=%s to dest=%s ; then symlinking dest to src", srcPath, destPath)
386
+ logger.debug(
387
+ "moveExports option, Moving src=%s to dest=%s ; then symlinking dest to src",
388
+ srcPath,
389
+ destPath,
390
+ )
359
391
  shutil.move(srcPath, destPath)
360
392
  os.symlink(destPath, srcPath)
361
393
  if executable:
@@ -391,7 +423,7 @@ class FileJobStore(AbstractJobStore):
391
423
  """
392
424
  Open a file URL as a binary stream.
393
425
  """
394
- return open(cls._extract_path_from_url(url), 'rb')
426
+ return open(cls._extract_path_from_url(url), "rb")
395
427
 
396
428
  @classmethod
397
429
  def _write_to_url(cls, readable, url, executable=False):
@@ -403,20 +435,24 @@ class FileJobStore(AbstractJobStore):
403
435
  :param object readable: An open file object to read from.
404
436
  """
405
437
  # we use a ~10Mb buffer to improve speed
406
- atomic_copyobj(readable,
407
- cls._extract_path_from_url(url),
408
- length=cls.BUFFER_SIZE,
409
- executable=executable)
438
+ atomic_copyobj(
439
+ readable,
440
+ cls._extract_path_from_url(url),
441
+ length=cls.BUFFER_SIZE,
442
+ executable=executable,
443
+ )
410
444
 
411
445
  @classmethod
412
- def _list_url(cls, url: ParseResult) -> List[str]:
446
+ def _list_url(cls, url: ParseResult) -> list[str]:
413
447
  path = cls._extract_path_from_url(url)
414
448
  listing = []
415
449
  for p in os.listdir(path):
416
450
  # We know there are no slashes in these
417
451
  component = quote(p)
418
452
  # Return directories with trailing slashes and files without
419
- listing.append((component + '/') if os.path.isdir(os.path.join(path, p)) else component)
453
+ listing.append(
454
+ (component + "/") if os.path.isdir(os.path.join(path, p)) else component
455
+ )
420
456
  return listing
421
457
 
422
458
  @classmethod
@@ -429,13 +465,13 @@ class FileJobStore(AbstractJobStore):
429
465
  """
430
466
  :return: local file path of file pointed at by the given URL
431
467
  """
432
- if url.netloc != '' and url.netloc != 'localhost':
468
+ if url.netloc != "" and url.netloc != "localhost":
433
469
  raise RuntimeError("The URL '%s' is invalid" % url.geturl())
434
470
  return unquote(url.path)
435
471
 
436
472
  @classmethod
437
473
  def _supports_url(cls, url, export=False):
438
- return url.scheme.lower() == 'file'
474
+ return url.scheme.lower() == "file"
439
475
 
440
476
  def _make_string_filename_safe(self, arbitraryString, maxLength=240):
441
477
  """
@@ -464,7 +500,7 @@ class FileJobStore(AbstractJobStore):
464
500
  parts.append("UNPRINTABLE")
465
501
 
466
502
  # Glue it all together, and truncate to length
467
- return '_'.join(parts)[:maxLength]
503
+ return "_".join(parts)[:maxLength]
468
504
 
469
505
  def write_file(self, local_path, job_id=None, cleanup=False):
470
506
  absPath = self._get_unique_file_path(local_path, job_id, cleanup)
@@ -473,20 +509,30 @@ class FileJobStore(AbstractJobStore):
473
509
  return relPath
474
510
 
475
511
  @contextmanager
476
- def write_file_stream(self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None):
512
+ def write_file_stream(
513
+ self, job_id=None, cleanup=False, basename=None, encoding=None, errors=None
514
+ ):
477
515
  if not basename:
478
- basename = 'stream'
516
+ basename = "stream"
479
517
  absPath = self._get_unique_file_path(basename, job_id, cleanup)
480
518
  relPath = self._get_file_id_from_path(absPath)
481
519
 
482
- with open(absPath, 'wb' if encoding == None else 'wt', encoding=encoding, errors=errors) as f:
520
+ with open(
521
+ absPath,
522
+ "wb" if encoding == None else "wt",
523
+ encoding=encoding,
524
+ errors=errors,
525
+ ) as f:
483
526
  # Don't yield while holding an open file descriptor to the temp
484
527
  # file. That can result in temp files still being open when we try
485
528
  # to clean ourselves up, somehow, for certain workloads.
486
529
  yield f, relPath
487
530
 
488
531
  def get_empty_file_store_id(self, jobStoreID=None, cleanup=False, basename=None):
489
- with self.write_file_stream(jobStoreID, cleanup, basename) as (fileHandle, jobStoreFileID):
532
+ with self.write_file_stream(jobStoreID, cleanup, basename) as (
533
+ fileHandle,
534
+ jobStoreFileID,
535
+ ):
490
536
  return jobStoreFileID
491
537
 
492
538
  def update_file(self, file_id, local_path):
@@ -503,20 +549,23 @@ class FileJobStore(AbstractJobStore):
503
549
  self._check_job_store_file_id(file_id)
504
550
  jobStoreFilePath = self._get_file_path_from_id(file_id)
505
551
  localDirPath = os.path.dirname(local_path)
506
- executable = getattr(file_id, 'executable', False)
552
+ executable = getattr(file_id, "executable", False)
507
553
 
508
554
  if not symlink and os.path.islink(local_path):
509
555
  # We had a symlink and want to clobber it with a hardlink or copy.
510
556
  os.unlink(local_path)
511
557
 
512
- if os.path.exists(local_path) and os.path.samefile(jobStoreFilePath, local_path):
558
+ if os.path.exists(local_path) and os.path.samefile(
559
+ jobStoreFilePath, local_path
560
+ ):
513
561
  # The files are already the same: same name, hardlinked, or
514
562
  # symlinked. There is nothing to do, and trying to shutil.copyfile
515
563
  # one over the other will fail.
516
564
  return
517
565
 
518
- if symlink:
519
- # If the reader will accept a symlink, so always give them one.
566
+ if symlink and self.symlink_job_store_reads:
567
+ # If the reader will accept a symlink, and we are willing to
568
+ # symlink into the jobstore, always give them one.
520
569
  # There's less that can go wrong.
521
570
  try:
522
571
  os.symlink(jobStoreFilePath, local_path)
@@ -537,7 +586,9 @@ class FileJobStore(AbstractJobStore):
537
586
  # In this case, we try to make a hard link.
538
587
  pass
539
588
  else:
540
- logger.error(f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store")
589
+ logger.error(
590
+ f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
591
+ )
541
592
  raise
542
593
 
543
594
  # If we get here, symlinking isn't an option.
@@ -581,7 +632,9 @@ class FileJobStore(AbstractJobStore):
581
632
  # hit the file copy case.
582
633
  pass
583
634
  else:
584
- logger.error(f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store")
635
+ logger.error(
636
+ f"Unexpected OSError when reading file '{jobStoreFilePath}' from job store"
637
+ )
585
638
  raise
586
639
 
587
640
  # If we get here, neither a symlink nor a hardlink will work.
@@ -596,15 +649,17 @@ class FileJobStore(AbstractJobStore):
596
649
  def file_exists(self, file_id):
597
650
  absPath = self._get_file_path_from_id(file_id)
598
651
 
599
- if (not absPath.startswith(self.jobsDir) and
600
- not absPath.startswith(self.filesDir) and
601
- not absPath.startswith(self.jobFilesDir)):
652
+ if (
653
+ not absPath.startswith(self.jobsDir)
654
+ and not absPath.startswith(self.filesDir)
655
+ and not absPath.startswith(self.jobFilesDir)
656
+ ):
602
657
  # Don't even look for it, it is out of bounds.
603
658
  raise NoSuchFileException(file_id)
604
659
 
605
660
  try:
606
661
  st = os.stat(absPath)
607
- except os.error:
662
+ except OSError:
608
663
  return False
609
664
  if not stat.S_ISREG(st.st_mode):
610
665
  raise NoSuchFileException(file_id)
@@ -614,15 +669,17 @@ class FileJobStore(AbstractJobStore):
614
669
  # Duplicate a bunch of fileExists to save on stat calls
615
670
  absPath = self._get_file_path_from_id(file_id)
616
671
 
617
- if (not absPath.startswith(self.jobsDir) and
618
- not absPath.startswith(self.filesDir) and
619
- not absPath.startswith(self.jobFilesDir)):
672
+ if (
673
+ not absPath.startswith(self.jobsDir)
674
+ and not absPath.startswith(self.filesDir)
675
+ and not absPath.startswith(self.jobFilesDir)
676
+ ):
620
677
  # Don't even look for it, it is out of bounds.
621
678
  raise NoSuchFileException(file_id)
622
679
 
623
680
  try:
624
681
  st = os.stat(absPath)
625
- except os.error:
682
+ except OSError:
626
683
  return 0
627
684
  return st.st_size
628
685
 
@@ -632,7 +689,12 @@ class FileJobStore(AbstractJobStore):
632
689
  # File objects are context managers (CM) so we could simply return what open returns.
633
690
  # However, it is better to wrap it in another CM so as to prevent users from accessing
634
691
  # the file object directly, without a with statement.
635
- with open(self._get_file_path_from_id(file_id), 'wb' if encoding == None else 'wt', encoding=encoding, errors=errors) as f:
692
+ with open(
693
+ self._get_file_path_from_id(file_id),
694
+ "wb" if encoding == None else "wt",
695
+ encoding=encoding,
696
+ errors=errors,
697
+ ) as f:
636
698
  yield f
637
699
 
638
700
  @contextmanager
@@ -642,15 +704,13 @@ class FileJobStore(AbstractJobStore):
642
704
  file_id: Union[str, FileID],
643
705
  encoding: Literal[None] = None,
644
706
  errors: Optional[str] = None,
645
- ) -> Iterator[IO[bytes]]:
646
- ...
707
+ ) -> Iterator[IO[bytes]]: ...
647
708
 
648
709
  @contextmanager
649
710
  @overload
650
711
  def read_file_stream(
651
712
  self, file_id: Union[str, FileID], encoding: str, errors: Optional[str] = None
652
- ) -> Iterator[IO[str]]:
653
- ...
713
+ ) -> Iterator[IO[str]]: ...
654
714
 
655
715
  @contextmanager
656
716
  @overload
@@ -659,8 +719,7 @@ class FileJobStore(AbstractJobStore):
659
719
  file_id: Union[str, FileID],
660
720
  encoding: Optional[str] = None,
661
721
  errors: Optional[str] = None,
662
- ) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]:
663
- ...
722
+ ) -> Union[Iterator[IO[bytes]], Iterator[IO[str]]]: ...
664
723
 
665
724
  @contextmanager
666
725
  def read_file_stream(
@@ -696,18 +755,32 @@ class FileJobStore(AbstractJobStore):
696
755
  return os.path.join(self.sharedFilesDir, sharedFileName)
697
756
 
698
757
  @contextmanager
699
- def write_shared_file_stream(self, shared_file_name, encrypted=None, encoding=None, errors=None):
758
+ def write_shared_file_stream(
759
+ self, shared_file_name, encrypted=None, encoding=None, errors=None
760
+ ):
700
761
  # the isProtected parameter has no effect on the fileStore
701
762
  self._requireValidSharedFileName(shared_file_name)
702
- with AtomicFileCreate(self._get_shared_file_path(shared_file_name)) as tmpSharedFilePath:
703
- with open(tmpSharedFilePath, 'wb' if encoding == None else 'wt', encoding=encoding, errors=None) as f:
763
+ with AtomicFileCreate(
764
+ self._get_shared_file_path(shared_file_name)
765
+ ) as tmpSharedFilePath:
766
+ with open(
767
+ tmpSharedFilePath,
768
+ "wb" if encoding == None else "wt",
769
+ encoding=encoding,
770
+ errors=None,
771
+ ) as f:
704
772
  yield f
705
773
 
706
774
  @contextmanager
707
775
  def read_shared_file_stream(self, shared_file_name, encoding=None, errors=None):
708
776
  self._requireValidSharedFileName(shared_file_name)
709
777
  try:
710
- with open(self._get_shared_file_path(shared_file_name), 'rb' if encoding == None else 'rt', encoding=encoding, errors=errors) as f:
778
+ with open(
779
+ self._get_shared_file_path(shared_file_name),
780
+ "rb" if encoding == None else "rt",
781
+ encoding=encoding,
782
+ errors=errors,
783
+ ) as f:
711
784
  yield f
712
785
 
713
786
  except OSError as e:
@@ -745,15 +818,11 @@ class FileJobStore(AbstractJobStore):
745
818
  job_id = self._get_job_id_from_files_dir(job_instance_dir)
746
819
  jobs.append(job_id)
747
820
 
748
- for name in os.listdir(self.sharedFilesDir):
749
- # Announce all the shared files
750
- yield name
821
+ yield from os.listdir(self.sharedFilesDir)
751
822
 
752
823
  for file_dir_path in self._list_dynamic_spray_dir(self.filesDir):
753
824
  # Run on all the no-job files
754
- for dir_file in os.listdir(file_dir_path):
755
- # There ought to be just one file in here.
756
- yield dir_file
825
+ yield from os.listdir(file_dir_path)
757
826
 
758
827
  for job_store_id in jobs:
759
828
  # Files from _get_job_files_dir
@@ -765,9 +834,7 @@ class FileJobStore(AbstractJobStore):
765
834
  # Except the cleanup directory which we do later.
766
835
  continue
767
836
  file_dir_path = os.path.join(job_files_dir, file_dir)
768
- for dir_file in os.listdir(file_dir_path):
769
- # There ought to be just one file in here.
770
- yield dir_file
837
+ yield from os.listdir(file_dir_path)
771
838
 
772
839
  # Files from _get_job_files_cleanup_dir
773
840
  job_cleanup_files_dir = os.path.join(job_files_dir, "cleanup")
@@ -775,35 +842,70 @@ class FileJobStore(AbstractJobStore):
775
842
  for file_dir in os.listdir(job_cleanup_files_dir):
776
843
  # Each file is in its own directory
777
844
  file_dir_path = os.path.join(job_cleanup_files_dir, file_dir)
778
- for dir_file in os.listdir(file_dir_path):
779
- # There ought to be just one file in here.
780
- yield dir_file
845
+ yield from os.listdir(file_dir_path)
781
846
 
782
847
  def write_logs(self, msg):
783
848
  # Temporary files are placed in the stats directory tree
784
- tempStatsFileName = "stats" + str(uuid.uuid4().hex) + ".new"
785
- tempStatsFile = os.path.join(self._get_arbitrary_stats_dir(), tempStatsFileName)
786
- writeFormat = 'w' if isinstance(msg, str) else 'wb'
849
+ tempStatsFileName = self.LOG_PREFIX + str(uuid.uuid4().hex) + self.LOG_TEMP_SUFFIX
850
+ tempStatsFile = os.path.join(self._get_arbitrary_stats_inbox_dir(), tempStatsFileName)
851
+ writeFormat = "w" if isinstance(msg, str) else "wb"
787
852
  with open(tempStatsFile, writeFormat) as f:
788
853
  f.write(msg)
789
- os.rename(tempStatsFile, tempStatsFile[:-4]) # This operation is atomic
854
+ os.rename(tempStatsFile, tempStatsFile[:-len(self.LOG_TEMP_SUFFIX)]) # This operation is atomic
790
855
 
791
856
  def read_logs(self, callback, read_all=False):
792
- numberOfFilesProcessed = 0
793
- for tempDir in self._stats_directories():
794
- for tempFile in os.listdir(tempDir):
795
- if tempFile.startswith('stats'):
796
- absTempFile = os.path.join(tempDir, tempFile)
797
- if os.path.isfile(absTempFile):
798
- if read_all or not tempFile.endswith('.new'):
799
- with open(absTempFile, 'rb') as fH:
800
- callback(fH)
801
- numberOfFilesProcessed += 1
802
- newName = tempFile.rsplit('.', 1)[0] + '.new'
803
- newAbsTempFile = os.path.join(tempDir, newName)
857
+ files_processed = 0
858
+
859
+ # Holds pairs of a function to call to get directories to look at, and
860
+ # a flag for whether to archive the files found.
861
+ queries = []
862
+ if read_all:
863
+ # If looking at all logs, check the archive
864
+ queries.append((self._stats_archive_directories, False))
865
+ # Always check the inbox and archive from it. But do it after checking
866
+ # the archive to avoid duplicates in the same pass.
867
+ queries.append((self._stats_inbox_directories, True))
868
+
869
+ for to_call, should_archive in queries:
870
+ for log_dir in to_call():
871
+ for log_file in os.listdir(log_dir):
872
+ if not log_file.startswith(self.LOG_PREFIX):
873
+ # Skip anything not a log file (like the other spray
874
+ # directories)
875
+ continue
876
+ if log_file.endswith(self.LOG_TEMP_SUFFIX):
877
+ # Skip partially-written files, always.
878
+ continue
879
+
880
+ abs_log_file = os.path.join(log_dir, log_file)
881
+ if not os.path.isfile(abs_log_file):
882
+ # This can't be a log file.
883
+ continue
884
+ try:
885
+ opened_file = open(abs_log_file, "rb")
886
+ except FileNotFoundError:
887
+ # File disappeared before we could open it.
888
+ # Maybe someone else is reading logs?
889
+ continue
890
+ with opened_file as f:
891
+ callback(f)
892
+ files_processed += 1
893
+
894
+ if should_archive:
895
+ # We need to move the stats file to the archive.
896
+ # Since we have UUID stats file names we don't need
897
+ # to worry about collisions when it gets there.
898
+ new_dir = self._get_arbitrary_stats_archive_dir()
899
+ new_abs_log_file = os.path.join(new_dir, log_file)
900
+ try:
804
901
  # Mark this item as read
805
- os.rename(absTempFile, newAbsTempFile)
806
- return numberOfFilesProcessed
902
+ os.rename(abs_log_file, new_abs_log_file)
903
+ except FileNotFoundError:
904
+ # File we wanted to archive disappeared.
905
+ # Maybe someone else is reading logs?
906
+ # TODO: Raise ConcurrentFileModificationException?
907
+ continue
908
+ return files_processed
807
909
 
808
910
  ##########################################
809
911
  # Private methods
@@ -824,14 +926,14 @@ class FileJobStore(AbstractJobStore):
824
926
  :param str absPath: The absolute path to a job directory under self.jobsDir which represents a job.
825
927
  :rtype : string, string is the job ID, which is a path relative to self.jobsDir
826
928
  """
827
- return absPath[len(self.jobsDir)+1:]
929
+ return absPath[len(self.jobsDir) + 1 :]
828
930
 
829
931
  def _get_job_id_from_files_dir(self, absPath: str) -> str:
830
932
  """
831
933
  :param str absPath: The absolute path to a job directory under self.jobFilesDir which holds a job's files.
832
934
  :rtype : string, string is the job ID
833
935
  """
834
- return absPath[len(self.jobFilesDir)+1:]
936
+ return absPath[len(self.jobFilesDir) + 1 :]
835
937
 
836
938
  def _get_job_file_name(self, jobStoreID):
837
939
  """
@@ -913,14 +1015,14 @@ class FileJobStore(AbstractJobStore):
913
1015
  :rtype : string, string is the file ID.
914
1016
  """
915
1017
 
916
- return quote(absPath[len(self.jobStoreDir)+1:])
1018
+ return quote(absPath[len(self.jobStoreDir) + 1 :])
917
1019
 
918
1020
  def _check_job_store_file_id(self, jobStoreFileID):
919
1021
  """
920
1022
  :raise NoSuchFileException: if the file with ID jobStoreFileID does
921
1023
  not exist or is not a file
922
1024
  """
923
- if not self.file_exists(unquote(jobStoreFileID)):
1025
+ if not self.file_exists(jobStoreFileID):
924
1026
  raise NoSuchFileException(jobStoreFileID)
925
1027
 
926
1028
  def _get_arbitrary_jobs_dir_for_name(self, jobNameSlug):
@@ -943,15 +1045,35 @@ class FileJobStore(AbstractJobStore):
943
1045
  if len(os.listdir(self.jobsDir)) > self.fanOut:
944
1046
  # Make sure that we don't over-fill the root with too many unique job names.
945
1047
  # Go in a subdirectory tree, and then go by job name and make another tree.
946
- return self._get_dynamic_spray_dir(os.path.join(self._get_dynamic_spray_dir(self.jobsDir),
947
- self.JOB_NAME_DIR_PREFIX + jobNameSlug))
1048
+ return self._get_dynamic_spray_dir(
1049
+ os.path.join(
1050
+ self._get_dynamic_spray_dir(self.jobsDir),
1051
+ self.JOB_NAME_DIR_PREFIX + jobNameSlug,
1052
+ )
1053
+ )
948
1054
  else:
949
1055
  # Just go in the root
950
- return self._get_dynamic_spray_dir(os.path.join(self.jobsDir, self.JOB_NAME_DIR_PREFIX + jobNameSlug))
1056
+ return self._get_dynamic_spray_dir(
1057
+ os.path.join(self.jobsDir, self.JOB_NAME_DIR_PREFIX + jobNameSlug)
1058
+ )
1059
+
1060
+ def _get_arbitrary_stats_inbox_dir(self):
1061
+ """
1062
+ Gets a temporary directory in a multi-level hierarchy in
1063
+ self.stats_inbox, where stats files not yet seen by the leader live.
1064
+ The directory is not unique and may already have other stats files in it.
1065
+
1066
+ :rtype : string, path to temporary directory in which to place files/directories.
951
1067
 
952
- def _get_arbitrary_stats_dir(self):
1068
+
1069
+ """
1070
+
1071
+ return self._get_dynamic_spray_dir(self.stats_inbox)
1072
+
1073
+ def _get_arbitrary_stats_archive_dir(self):
953
1074
  """
954
- Gets a temporary directory in a multi-level hierarchy in self.statsDir.
1075
+ Gets a temporary directory in a multi-level hierarchy in
1076
+ self.stats_archive, where stats files already seen by the leader live.
955
1077
  The directory is not unique and may already have other stats files in it.
956
1078
 
957
1079
  :rtype : string, path to temporary directory in which to place files/directories.
@@ -959,7 +1081,7 @@ class FileJobStore(AbstractJobStore):
959
1081
 
960
1082
  """
961
1083
 
962
- return self._get_dynamic_spray_dir(self.statsDir)
1084
+ return self._get_dynamic_spray_dir(self.stats_archive)
963
1085
 
964
1086
  def _get_arbitrary_files_dir(self):
965
1087
  """
@@ -1091,17 +1213,27 @@ class FileJobStore(AbstractJobStore):
1091
1213
  continue
1092
1214
 
1093
1215
  # Now we have only the directories that are named after jobs. Look inside them.
1094
- yield from self._walk_dynamic_spray_dir(os.path.join(jobHoldingDir, jobNameDir))
1216
+ yield from self._walk_dynamic_spray_dir(
1217
+ os.path.join(jobHoldingDir, jobNameDir)
1218
+ )
1095
1219
 
1220
+ def _stats_inbox_directories(self):
1221
+ """
1222
+ :returns: an iterator to the temporary directories containing new stats
1223
+ files. They may also contain directories containing more stats
1224
+ files.
1225
+ """
1096
1226
 
1097
- def _stats_directories(self):
1227
+ return self._walk_dynamic_spray_dir(self.stats_inbox)
1228
+
1229
+ def _stats_archive_directories(self):
1098
1230
  """
1099
- :rtype : an iterator to the temporary directories containing stats
1100
- files. They may also contain directories containing more
1101
- stats files.
1231
+ :returns: an iterator to the temporary directories containing
1232
+ previously observed stats files. They may also contain directories
1233
+ containing more stats files.
1102
1234
  """
1103
1235
 
1104
- return self._walk_dynamic_spray_dir(self.statsDir)
1236
+ return self._walk_dynamic_spray_dir(self.stats_archive)
1105
1237
 
1106
1238
  def _get_unique_file_path(self, fileName, jobStoreID=None, cleanup=False):
1107
1239
  """
@@ -1142,18 +1274,24 @@ class FileJobStore(AbstractJobStore):
1142
1274
  self._check_job_store_id_assigned(jobStoreID)
1143
1275
  # Find where all its created files should live, depending on if
1144
1276
  # they need to go away when the job is deleted or not.
1145
- jobFilesDir = self._get_job_files_dir(jobStoreID) if not cleanup else self._get_job_files_cleanup_dir(jobStoreID)
1277
+ jobFilesDir = (
1278
+ self._get_job_files_dir(jobStoreID)
1279
+ if not cleanup
1280
+ else self._get_job_files_cleanup_dir(jobStoreID)
1281
+ )
1146
1282
 
1147
1283
  # Lazily create the parent directory.
1148
1284
  # We don't want our tree filled with confusingly empty directories.
1149
1285
  os.makedirs(jobFilesDir, exist_ok=True)
1150
1286
 
1151
1287
  # Then make a temp directory inside it
1152
- filesDir = os.path.join(jobFilesDir, 'file-' + uuid.uuid4().hex)
1288
+ filesDir = os.path.join(jobFilesDir, "file-" + uuid.uuid4().hex)
1153
1289
  os.mkdir(filesDir)
1154
1290
  return filesDir
1155
1291
  else:
1156
1292
  # Make a temporary file within the non-job-associated files hierarchy
1157
- filesDir = os.path.join(self._get_arbitrary_files_dir(), 'file-' + uuid.uuid4().hex)
1293
+ filesDir = os.path.join(
1294
+ self._get_arbitrary_files_dir(), "file-" + uuid.uuid4().hex
1295
+ )
1158
1296
  os.mkdir(filesDir)
1159
1297
  return filesDir