toil 6.1.0a1__py3-none-any.whl → 8.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (193) hide show
  1. toil/__init__.py +122 -315
  2. toil/batchSystems/__init__.py +1 -0
  3. toil/batchSystems/abstractBatchSystem.py +173 -89
  4. toil/batchSystems/abstractGridEngineBatchSystem.py +272 -148
  5. toil/batchSystems/awsBatch.py +244 -135
  6. toil/batchSystems/cleanup_support.py +26 -16
  7. toil/batchSystems/contained_executor.py +31 -28
  8. toil/batchSystems/gridengine.py +86 -50
  9. toil/batchSystems/htcondor.py +166 -89
  10. toil/batchSystems/kubernetes.py +632 -382
  11. toil/batchSystems/local_support.py +20 -15
  12. toil/batchSystems/lsf.py +134 -81
  13. toil/batchSystems/lsfHelper.py +13 -11
  14. toil/batchSystems/mesos/__init__.py +41 -29
  15. toil/batchSystems/mesos/batchSystem.py +290 -151
  16. toil/batchSystems/mesos/executor.py +79 -50
  17. toil/batchSystems/mesos/test/__init__.py +31 -23
  18. toil/batchSystems/options.py +46 -28
  19. toil/batchSystems/registry.py +53 -19
  20. toil/batchSystems/singleMachine.py +296 -125
  21. toil/batchSystems/slurm.py +603 -138
  22. toil/batchSystems/torque.py +47 -33
  23. toil/bus.py +186 -76
  24. toil/common.py +664 -368
  25. toil/cwl/__init__.py +1 -1
  26. toil/cwl/cwltoil.py +1136 -483
  27. toil/cwl/utils.py +17 -22
  28. toil/deferred.py +63 -42
  29. toil/exceptions.py +5 -3
  30. toil/fileStores/__init__.py +5 -5
  31. toil/fileStores/abstractFileStore.py +140 -60
  32. toil/fileStores/cachingFileStore.py +717 -269
  33. toil/fileStores/nonCachingFileStore.py +116 -87
  34. toil/job.py +1225 -368
  35. toil/jobStores/abstractJobStore.py +416 -266
  36. toil/jobStores/aws/jobStore.py +863 -477
  37. toil/jobStores/aws/utils.py +201 -120
  38. toil/jobStores/conftest.py +3 -2
  39. toil/jobStores/fileJobStore.py +292 -154
  40. toil/jobStores/googleJobStore.py +140 -74
  41. toil/jobStores/utils.py +36 -15
  42. toil/leader.py +668 -272
  43. toil/lib/accelerators.py +115 -18
  44. toil/lib/aws/__init__.py +74 -31
  45. toil/lib/aws/ami.py +122 -87
  46. toil/lib/aws/iam.py +284 -108
  47. toil/lib/aws/s3.py +31 -0
  48. toil/lib/aws/session.py +214 -39
  49. toil/lib/aws/utils.py +287 -231
  50. toil/lib/bioio.py +13 -5
  51. toil/lib/compatibility.py +11 -6
  52. toil/lib/conversions.py +104 -47
  53. toil/lib/docker.py +131 -103
  54. toil/lib/ec2.py +361 -199
  55. toil/lib/ec2nodes.py +174 -106
  56. toil/lib/encryption/_dummy.py +5 -3
  57. toil/lib/encryption/_nacl.py +10 -6
  58. toil/lib/encryption/conftest.py +1 -0
  59. toil/lib/exceptions.py +26 -7
  60. toil/lib/expando.py +5 -3
  61. toil/lib/ftp_utils.py +217 -0
  62. toil/lib/generatedEC2Lists.py +127 -19
  63. toil/lib/humanize.py +6 -2
  64. toil/lib/integration.py +341 -0
  65. toil/lib/io.py +141 -15
  66. toil/lib/iterables.py +4 -2
  67. toil/lib/memoize.py +12 -8
  68. toil/lib/misc.py +66 -21
  69. toil/lib/objects.py +2 -2
  70. toil/lib/resources.py +68 -15
  71. toil/lib/retry.py +126 -81
  72. toil/lib/threading.py +299 -82
  73. toil/lib/throttle.py +16 -15
  74. toil/options/common.py +843 -409
  75. toil/options/cwl.py +175 -90
  76. toil/options/runner.py +50 -0
  77. toil/options/wdl.py +73 -17
  78. toil/provisioners/__init__.py +117 -46
  79. toil/provisioners/abstractProvisioner.py +332 -157
  80. toil/provisioners/aws/__init__.py +70 -33
  81. toil/provisioners/aws/awsProvisioner.py +1145 -715
  82. toil/provisioners/clusterScaler.py +541 -279
  83. toil/provisioners/gceProvisioner.py +282 -179
  84. toil/provisioners/node.py +155 -79
  85. toil/realtimeLogger.py +34 -22
  86. toil/resource.py +137 -75
  87. toil/server/app.py +128 -62
  88. toil/server/celery_app.py +3 -1
  89. toil/server/cli/wes_cwl_runner.py +82 -53
  90. toil/server/utils.py +54 -28
  91. toil/server/wes/abstract_backend.py +64 -26
  92. toil/server/wes/amazon_wes_utils.py +21 -15
  93. toil/server/wes/tasks.py +121 -63
  94. toil/server/wes/toil_backend.py +142 -107
  95. toil/server/wsgi_app.py +4 -3
  96. toil/serviceManager.py +58 -22
  97. toil/statsAndLogging.py +224 -70
  98. toil/test/__init__.py +282 -183
  99. toil/test/batchSystems/batchSystemTest.py +460 -210
  100. toil/test/batchSystems/batch_system_plugin_test.py +90 -0
  101. toil/test/batchSystems/test_gridengine.py +173 -0
  102. toil/test/batchSystems/test_lsf_helper.py +67 -58
  103. toil/test/batchSystems/test_slurm.py +110 -49
  104. toil/test/cactus/__init__.py +0 -0
  105. toil/test/cactus/test_cactus_integration.py +56 -0
  106. toil/test/cwl/cwlTest.py +496 -287
  107. toil/test/cwl/measure_default_memory.cwl +12 -0
  108. toil/test/cwl/not_run_required_input.cwl +29 -0
  109. toil/test/cwl/scatter_duplicate_outputs.cwl +40 -0
  110. toil/test/cwl/seqtk_seq.cwl +1 -1
  111. toil/test/docs/scriptsTest.py +69 -46
  112. toil/test/jobStores/jobStoreTest.py +427 -264
  113. toil/test/lib/aws/test_iam.py +118 -50
  114. toil/test/lib/aws/test_s3.py +16 -9
  115. toil/test/lib/aws/test_utils.py +5 -6
  116. toil/test/lib/dockerTest.py +118 -141
  117. toil/test/lib/test_conversions.py +113 -115
  118. toil/test/lib/test_ec2.py +58 -50
  119. toil/test/lib/test_integration.py +104 -0
  120. toil/test/lib/test_misc.py +12 -5
  121. toil/test/mesos/MesosDataStructuresTest.py +23 -10
  122. toil/test/mesos/helloWorld.py +7 -6
  123. toil/test/mesos/stress.py +25 -20
  124. toil/test/options/__init__.py +13 -0
  125. toil/test/options/options.py +42 -0
  126. toil/test/provisioners/aws/awsProvisionerTest.py +320 -150
  127. toil/test/provisioners/clusterScalerTest.py +440 -250
  128. toil/test/provisioners/clusterTest.py +166 -44
  129. toil/test/provisioners/gceProvisionerTest.py +174 -100
  130. toil/test/provisioners/provisionerTest.py +25 -13
  131. toil/test/provisioners/restartScript.py +5 -4
  132. toil/test/server/serverTest.py +188 -141
  133. toil/test/sort/restart_sort.py +137 -68
  134. toil/test/sort/sort.py +134 -66
  135. toil/test/sort/sortTest.py +91 -49
  136. toil/test/src/autoDeploymentTest.py +141 -101
  137. toil/test/src/busTest.py +20 -18
  138. toil/test/src/checkpointTest.py +8 -2
  139. toil/test/src/deferredFunctionTest.py +49 -35
  140. toil/test/src/dockerCheckTest.py +32 -24
  141. toil/test/src/environmentTest.py +135 -0
  142. toil/test/src/fileStoreTest.py +539 -272
  143. toil/test/src/helloWorldTest.py +7 -4
  144. toil/test/src/importExportFileTest.py +61 -31
  145. toil/test/src/jobDescriptionTest.py +46 -21
  146. toil/test/src/jobEncapsulationTest.py +2 -0
  147. toil/test/src/jobFileStoreTest.py +74 -50
  148. toil/test/src/jobServiceTest.py +187 -73
  149. toil/test/src/jobTest.py +121 -71
  150. toil/test/src/miscTests.py +19 -18
  151. toil/test/src/promisedRequirementTest.py +82 -36
  152. toil/test/src/promisesTest.py +7 -6
  153. toil/test/src/realtimeLoggerTest.py +10 -6
  154. toil/test/src/regularLogTest.py +71 -37
  155. toil/test/src/resourceTest.py +80 -49
  156. toil/test/src/restartDAGTest.py +36 -22
  157. toil/test/src/resumabilityTest.py +9 -2
  158. toil/test/src/retainTempDirTest.py +45 -14
  159. toil/test/src/systemTest.py +12 -8
  160. toil/test/src/threadingTest.py +44 -25
  161. toil/test/src/toilContextManagerTest.py +10 -7
  162. toil/test/src/userDefinedJobArgTypeTest.py +8 -5
  163. toil/test/src/workerTest.py +73 -23
  164. toil/test/utils/toilDebugTest.py +103 -33
  165. toil/test/utils/toilKillTest.py +4 -5
  166. toil/test/utils/utilsTest.py +245 -106
  167. toil/test/wdl/wdltoil_test.py +818 -149
  168. toil/test/wdl/wdltoil_test_kubernetes.py +91 -0
  169. toil/toilState.py +120 -35
  170. toil/utils/toilConfig.py +13 -4
  171. toil/utils/toilDebugFile.py +44 -27
  172. toil/utils/toilDebugJob.py +214 -27
  173. toil/utils/toilDestroyCluster.py +11 -6
  174. toil/utils/toilKill.py +8 -3
  175. toil/utils/toilLaunchCluster.py +256 -140
  176. toil/utils/toilMain.py +37 -16
  177. toil/utils/toilRsyncCluster.py +32 -14
  178. toil/utils/toilSshCluster.py +49 -22
  179. toil/utils/toilStats.py +356 -273
  180. toil/utils/toilStatus.py +292 -139
  181. toil/utils/toilUpdateEC2Instances.py +3 -1
  182. toil/version.py +12 -12
  183. toil/wdl/utils.py +5 -5
  184. toil/wdl/wdltoil.py +3913 -1033
  185. toil/worker.py +367 -184
  186. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/LICENSE +25 -0
  187. toil-8.0.0.dist-info/METADATA +173 -0
  188. toil-8.0.0.dist-info/RECORD +253 -0
  189. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/WHEEL +1 -1
  190. toil-6.1.0a1.dist-info/METADATA +0 -125
  191. toil-6.1.0a1.dist-info/RECORD +0 -237
  192. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/entry_points.txt +0 -0
  193. {toil-6.1.0a1.dist-info → toil-8.0.0.dist-info}/top_level.txt +0 -0
@@ -12,39 +12,41 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
  import errno
15
- import fcntl
16
15
  import logging
17
16
  import os
18
17
  import tempfile
19
18
  from collections import defaultdict
19
+ from collections.abc import Generator, Iterator
20
20
  from contextlib import contextmanager
21
- from typing import (IO,
22
- Any,
23
- Callable,
24
- ContextManager,
25
- DefaultDict,
26
- Dict,
27
- Generator,
28
- Iterator,
29
- List,
30
- Literal,
31
- Optional,
32
- Union,
33
- cast,
34
- overload)
21
+ from typing import (
22
+ IO,
23
+ Any,
24
+ Callable,
25
+ ContextManager,
26
+ DefaultDict,
27
+ Literal,
28
+ Optional,
29
+ Union,
30
+ cast,
31
+ overload,
32
+ )
35
33
 
36
34
  import dill
37
35
 
38
- from toil.common import getDirSizeRecursively, getFileSystemSize
36
+ from toil.common import getFileSystemSize
39
37
  from toil.fileStores import FileID
40
38
  from toil.fileStores.abstractFileStore import AbstractFileStore
41
39
  from toil.job import Job, JobDescription
42
40
  from toil.jobStores.abstractJobStore import AbstractJobStore
43
41
  from toil.lib.compatibility import deprecated
44
- from toil.lib.conversions import bytes2human
45
42
  from toil.lib.io import make_public_dir, robust_rmtree
46
43
  from toil.lib.retry import ErrorCondition, retry
47
- from toil.lib.threading import get_process_name, process_name_exists
44
+ from toil.lib.threading import (
45
+ get_process_name,
46
+ process_name_exists,
47
+ safe_lock,
48
+ safe_unlock_and_close,
49
+ )
48
50
 
49
51
  logger: logging.Logger = logging.getLogger(__name__)
50
52
 
@@ -60,7 +62,7 @@ class NonCachingFileStore(AbstractFileStore):
60
62
  super().__init__(jobStore, jobDesc, file_store_dir, waitForPreviousCommit)
61
63
  # This will be defined in the `open` method.
62
64
  self.jobStateFile: Optional[str] = None
63
- self.localFileMap: DefaultDict[str, List[str]] = defaultdict(list)
65
+ self.localFileMap: DefaultDict[str, list[str]] = defaultdict(list)
64
66
 
65
67
  self.check_for_state_corruption()
66
68
 
@@ -79,10 +81,10 @@ class NonCachingFileStore(AbstractFileStore):
79
81
 
80
82
  if coordination_dir and not os.path.exists(coordination_dir):
81
83
  raise RuntimeError(
82
- f'The Toil coordination directory at {coordination_dir} '
83
- f'was removed while the workflow was running! Please provide a '
84
- f'TOIL_COORDINATION_DIR or --coordinationDir at a location that '
85
- f'is safe from automated cleanup during the workflow run.'
84
+ f"The Toil coordination directory at {coordination_dir} "
85
+ f"was removed while the workflow was running! Please provide a "
86
+ f"TOIL_COORDINATION_DIR or --coordinationDir at a location that "
87
+ f"is safe from automated cleanup during the workflow run."
86
88
  )
87
89
 
88
90
  def check_for_state_corruption(self) -> None:
@@ -94,48 +96,43 @@ class NonCachingFileStore(AbstractFileStore):
94
96
 
95
97
  if self.jobStateFile and not os.path.exists(self.jobStateFile):
96
98
  raise RuntimeError(
97
- f'The job state file {self.jobStateFile} '
98
- f'was removed while the workflow was running! Please provide a '
99
- f'TOIL_COORDINATION_DIR or --coordinationDir at a location that '
100
- f'is safe from automated cleanup during the workflow run.'
99
+ f"The job state file {self.jobStateFile} "
100
+ f"was removed while the workflow was running! Please provide a "
101
+ f"TOIL_COORDINATION_DIR or --coordinationDir at a location that "
102
+ f"is safe from automated cleanup during the workflow run."
101
103
  )
102
104
 
103
105
  @contextmanager
104
106
  def open(self, job: Job) -> Generator[None, None, None]:
105
- jobReqs = job.disk
106
107
  startingDir = os.getcwd()
107
- self.localTempDir: str = make_public_dir(in_directory=self.localTempDir)
108
+ self.localTempDir: str = make_public_dir(
109
+ self.localTempDir, suggested_name="job"
110
+ )
108
111
  self._removeDeadJobs(self.coordination_dir)
109
112
  self.jobStateFile = self._createJobStateFile()
110
113
  self.check_for_state_corruption()
111
114
  freeSpace, diskSize = getFileSystemSize(self.localTempDir)
112
115
  if freeSpace <= 0.1 * diskSize:
113
- logger.warning(f'Starting job {self.jobName} with less than 10%% of disk space remaining.')
116
+ logger.warning(
117
+ f"Starting job {self.jobName} with less than 10%% of disk space remaining."
118
+ )
114
119
  try:
115
120
  os.chdir(self.localTempDir)
116
121
  with super().open(job):
117
122
  yield
118
123
  finally:
119
- disk = getDirSizeRecursively(self.localTempDir)
120
- percent = float(disk) / jobReqs * 100 if jobReqs > 0 else 0.0
121
- disk_usage = (f"Job {self.jobName} used {percent:.2f}% disk ({bytes2human(disk)}B [{disk}B] used, "
122
- f"{bytes2human(jobReqs)}B [{jobReqs}B] requested).")
123
- if disk > jobReqs:
124
- self.log_to_leader("Job used more disk than requested. For CWL, consider increasing the outdirMin "
125
- f"requirement, otherwise, consider increasing the disk requirement. {disk_usage}",
126
- level=logging.WARNING)
127
- else:
128
- self.log_to_leader(disk_usage, level=logging.DEBUG)
129
124
  os.chdir(startingDir)
130
125
  # Finally delete the job from the worker
131
126
  self.check_for_state_corruption()
132
127
  try:
133
128
  os.remove(self.jobStateFile)
134
129
  except FileNotFoundError:
135
- logger.exception('Job state file %s has gone missing unexpectedly; some cleanup for failed jobs may be getting skipped!', self.jobStateFile)
136
- pass
130
+ logger.exception(
131
+ "Job state file %s has gone missing unexpectedly; some cleanup for failed jobs may be getting skipped!",
132
+ self.jobStateFile,
133
+ )
137
134
 
138
- def writeGlobalFile(self, localFileName: str, cleanup: bool=False) -> FileID:
135
+ def writeGlobalFile(self, localFileName: str, cleanup: bool = False) -> FileID:
139
136
  absLocalFileName = self._resolveAbsoluteLocalPath(localFileName)
140
137
  creatorID = str(self.jobDesc.jobStoreID)
141
138
  fileStoreID = self.jobStore.write_file(absLocalFileName, creatorID, cleanup)
@@ -145,12 +142,20 @@ class NonCachingFileStore(AbstractFileStore):
145
142
  self.localFileMap[fileStoreID].append(absLocalFileName)
146
143
  return FileID.forPath(fileStoreID, absLocalFileName)
147
144
 
148
- def readGlobalFile(self, fileStoreID: str, userPath: Optional[str] = None, cache: bool=True, mutable: bool=False,
149
- symlink: bool=False) -> str:
145
+ def readGlobalFile(
146
+ self,
147
+ fileStoreID: str,
148
+ userPath: Optional[str] = None,
149
+ cache: bool = True,
150
+ mutable: bool = False,
151
+ symlink: bool = False,
152
+ ) -> str:
150
153
  if userPath is not None:
151
154
  localFilePath = self._resolveAbsoluteLocalPath(userPath)
152
155
  if os.path.exists(localFilePath):
153
- raise RuntimeError(' File %s ' % localFilePath + ' exists. Cannot Overwrite.')
156
+ raise RuntimeError(
157
+ " File %s " % localFilePath + " exists. Cannot Overwrite."
158
+ )
154
159
  else:
155
160
  localFilePath = self.getLocalTempFileName()
156
161
 
@@ -165,25 +170,30 @@ class NonCachingFileStore(AbstractFileStore):
165
170
  fileStoreID: str,
166
171
  encoding: Literal[None] = None,
167
172
  errors: Optional[str] = None,
168
- ) -> ContextManager[IO[bytes]]:
169
- ...
173
+ ) -> ContextManager[IO[bytes]]: ...
170
174
 
171
175
  @overload
172
176
  def readGlobalFileStream(
173
177
  self, fileStoreID: str, encoding: str, errors: Optional[str] = None
174
- ) -> ContextManager[IO[str]]:
175
- ...
178
+ ) -> ContextManager[IO[str]]: ...
176
179
 
177
180
  # TODO: This seems to hit https://github.com/python/mypy/issues/11373
178
181
  # But that is supposedly fixed.
179
182
 
180
- @contextmanager # type: ignore
181
- def readGlobalFileStream(self, fileStoreID: str, encoding: Optional[str] = None, errors: Optional[str] = None) -> Iterator[Union[IO[bytes], IO[str]]]:
182
- with self.jobStore.read_file_stream(fileStoreID, encoding=encoding, errors=errors) as f:
183
+ @contextmanager # type: ignore
184
+ def readGlobalFileStream(
185
+ self,
186
+ fileStoreID: str,
187
+ encoding: Optional[str] = None,
188
+ errors: Optional[str] = None,
189
+ ) -> Iterator[Union[IO[bytes], IO[str]]]:
190
+ with self.jobStore.read_file_stream(
191
+ fileStoreID, encoding=encoding, errors=errors
192
+ ) as f:
183
193
  self.logAccess(fileStoreID)
184
194
  yield f
185
195
 
186
- @deprecated(new_function_name='export_file')
196
+ @deprecated(new_function_name="export_file")
187
197
  def exportFile(self, jobStoreFileID: FileID, dstUrl: str) -> None:
188
198
  return self.export_file(jobStoreFileID, dstUrl)
189
199
 
@@ -194,7 +204,9 @@ class NonCachingFileStore(AbstractFileStore):
194
204
  try:
195
205
  localFilePaths = self.localFileMap.pop(fileStoreID)
196
206
  except KeyError:
197
- raise OSError(errno.ENOENT, "Attempting to delete local copies of a file with none")
207
+ raise OSError(
208
+ errno.ENOENT, "Attempting to delete local copies of a file with none"
209
+ )
198
210
  else:
199
211
  for localFilePath in localFilePaths:
200
212
  os.remove(localFilePath)
@@ -245,7 +257,6 @@ class NonCachingFileStore(AbstractFileStore):
245
257
  self._terminateEvent.set()
246
258
  raise
247
259
 
248
-
249
260
  def __del__(self) -> None:
250
261
  """
251
262
  Cleanup function that is run when destroying the class instance. Nothing to do since there
@@ -253,7 +264,9 @@ class NonCachingFileStore(AbstractFileStore):
253
264
  """
254
265
 
255
266
  @classmethod
256
- def _removeDeadJobs(cls, coordination_dir: str, batchSystemShutdown: bool=False) -> None:
267
+ def _removeDeadJobs(
268
+ cls, coordination_dir: str, batchSystemShutdown: bool = False
269
+ ) -> None:
257
270
  """
258
271
  Look at the state of all jobs registered in the individual job state files, and handle them
259
272
  (clean up the disk)
@@ -266,26 +279,34 @@ class NonCachingFileStore(AbstractFileStore):
266
279
  cls.check_for_coordination_corruption(coordination_dir)
267
280
 
268
281
  for jobState in cls._getAllJobStates(coordination_dir):
269
- if not process_name_exists(coordination_dir, jobState['jobProcessName']):
282
+ if not process_name_exists(coordination_dir, jobState["jobProcessName"]):
270
283
  # We need to have a race to pick someone to clean up.
271
284
 
272
285
  try:
273
- # Open the directory
274
- dirFD = os.open(jobState['jobDir'], os.O_RDONLY)
286
+ # Open the directory.
287
+ # We can't open a directory for write, only for read.
288
+ dirFD = os.open(jobState["jobDir"], os.O_RDONLY)
275
289
  except FileNotFoundError:
276
290
  # The cleanup has happened and we can't contest for it
277
291
  continue
278
292
 
279
293
  try:
280
- # Try and lock it
281
- fcntl.lockf(dirFD, fcntl.LOCK_EX | fcntl.LOCK_NB)
294
+ # Try and lock it non-blocking
295
+ safe_lock(dirFD, block=False)
282
296
  except OSError as e:
283
- # We lost the race. Someone else is alive and has it locked.
284
297
  os.close(dirFD)
298
+ if e.errno not in (errno.EACCES, errno.EAGAIN):
299
+ # Something went wrong
300
+ raise
301
+ # Otherwise, we lost the race. Someone else is alive and
302
+ # has it locked. So loop around again.
285
303
  else:
286
304
  # We got it
287
- logger.warning('Detected that job (%s) prematurely terminated. Fixing the '
288
- 'state of the job on disk.', jobState['jobName'])
305
+ logger.warning(
306
+ "Detected that job (%s) prematurely terminated. Fixing the "
307
+ "state of the job on disk.",
308
+ jobState["jobName"],
309
+ )
289
310
 
290
311
  try:
291
312
  if not batchSystemShutdown:
@@ -293,13 +314,12 @@ class NonCachingFileStore(AbstractFileStore):
293
314
  # Delete the old work directory if it still exists. Do this only during
294
315
  # the life of the program and dont' do it during the batch system
295
316
  # cleanup. Leave that to the batch system cleanup code.
296
- robust_rmtree(jobState['jobDir'])
317
+ robust_rmtree(jobState["jobDir"])
297
318
  finally:
298
- fcntl.lockf(dirFD, fcntl.LOCK_UN)
299
- os.close(dirFD)
319
+ safe_unlock_and_close(dirFD)
300
320
 
301
321
  @classmethod
302
- def _getAllJobStates(cls, coordination_dir: str) -> Iterator[Dict[str, str]]:
322
+ def _getAllJobStates(cls, coordination_dir: str) -> Iterator[dict[str, str]]:
303
323
  """
304
324
  Generator function that deserializes and yields the job state for every job on the node,
305
325
  one at a time.
@@ -316,7 +336,7 @@ class NonCachingFileStore(AbstractFileStore):
316
336
  # So we need to work in bytes.
317
337
  for entry in os.scandir(os.fsencode(coordination_dir)):
318
338
  # For each job state file in the coordination directory
319
- if entry.name.endswith(b'.jobState'):
339
+ if entry.name.endswith(b".jobState"):
320
340
  # This is the state of a job
321
341
  jobStateFiles.append(os.fsdecode(entry.path))
322
342
 
@@ -329,7 +349,7 @@ class NonCachingFileStore(AbstractFileStore):
329
349
  # job finished & deleted its jobState file since the jobState files were discovered
330
350
  continue
331
351
  elif e.errno == 5:
332
- # This is a OSError: [Errno 5] Input/output error (jobStatefile seems to disappear
352
+ # This is a OSError: [Errno 5] Input/output error (jobStatefile seems to disappear
333
353
  # on network file system sometimes)
334
354
  continue
335
355
  else:
@@ -337,16 +357,16 @@ class NonCachingFileStore(AbstractFileStore):
337
357
 
338
358
  @staticmethod
339
359
  # Retry on any OSError except FileNotFoundError, which we throw immediately
340
- @retry(errors=[
341
- OSError,
342
- ErrorCondition(
343
- error=FileNotFoundError,
344
- retry_on_this_condition=False
345
- )])
346
- def _readJobState(jobStateFileName: str) -> Dict[str, str]:
347
- with open(jobStateFileName, 'rb') as fH:
360
+ @retry(
361
+ errors=[
362
+ OSError,
363
+ ErrorCondition(error=FileNotFoundError, retry_on_this_condition=False),
364
+ ]
365
+ )
366
+ def _readJobState(jobStateFileName: str) -> dict[str, str]:
367
+ with open(jobStateFileName, "rb") as fH:
348
368
  state = dill.load(fH)
349
- return cast(Dict[str, str], state)
369
+ return cast(dict[str, str], state)
350
370
 
351
371
  def _createJobStateFile(self) -> str:
352
372
  """
@@ -359,17 +379,26 @@ class NonCachingFileStore(AbstractFileStore):
359
379
  :rtype: str
360
380
  """
361
381
  self.check_for_state_corruption()
362
- jobState = {'jobProcessName': get_process_name(self.coordination_dir),
363
- 'jobName': self.jobName,
364
- 'jobDir': self.localTempDir}
365
- (fd, jobStateFile) = tempfile.mkstemp(suffix='.jobState.tmp', dir=self.coordination_dir)
366
- with open(fd, 'wb') as fH:
382
+ jobState = {
383
+ "jobProcessName": get_process_name(self.coordination_dir),
384
+ "jobName": self.jobName,
385
+ "jobDir": self.localTempDir,
386
+ }
387
+ try:
388
+ (fd, jobStateFile) = tempfile.mkstemp(
389
+ suffix=".jobState.tmp", dir=self.coordination_dir
390
+ )
391
+ except Exception as e:
392
+ raise RuntimeError(
393
+ "Could not make state file in " + self.coordination_dir
394
+ ) from e
395
+ with open(fd, "wb") as fH:
367
396
  # Write data
368
397
  dill.dump(jobState, fH)
369
398
  # Drop suffix
370
- jobStateFile = jobStateFile[:-len('.tmp')]
399
+ jobStateFile = jobStateFile[: -len(".tmp")]
371
400
  # Put in place
372
- os.rename(jobStateFile + '.tmp', jobStateFile)
401
+ os.rename(jobStateFile + ".tmp", jobStateFile)
373
402
  return jobStateFile
374
403
 
375
404
  @classmethod