toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
toil/lib/io.py
CHANGED
|
@@ -2,6 +2,7 @@ import logging
|
|
|
2
2
|
import os
|
|
3
3
|
import shutil
|
|
4
4
|
import stat
|
|
5
|
+
import tempfile
|
|
5
6
|
import uuid
|
|
6
7
|
from contextlib import contextmanager
|
|
7
8
|
from io import BytesIO
|
|
@@ -9,6 +10,26 @@ from typing import IO, Any, Callable, Iterator, Optional, Union
|
|
|
9
10
|
|
|
10
11
|
logger = logging.getLogger(__name__)
|
|
11
12
|
|
|
13
|
+
def mkdtemp(suffix: Optional[str] = None, prefix: Optional[str] = None, dir: Optional[str] = None) -> str:
|
|
14
|
+
"""
|
|
15
|
+
Make a temporary directory like tempfile.mkdtemp, but with relaxed permissions.
|
|
16
|
+
|
|
17
|
+
The permissions on the directory will be 711 instead of 700, allowing the
|
|
18
|
+
group and all other users to traverse the directory. This is necessary if
|
|
19
|
+
the direcotry is on NFS and the Docker daemon would like to mount it or a
|
|
20
|
+
file inside it into a container, because on NFS even the Docker daemon
|
|
21
|
+
appears bound by the file permissions.
|
|
22
|
+
|
|
23
|
+
See <https://github.com/DataBiosphere/toil/issues/4644>, and
|
|
24
|
+
<https://stackoverflow.com/a/67928880> which talks about a similar problem
|
|
25
|
+
but in the context of user namespaces.
|
|
26
|
+
"""
|
|
27
|
+
# Make the directory
|
|
28
|
+
result = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
|
|
29
|
+
# Grant all the permissions: full control for user, and execute for group and other
|
|
30
|
+
os.chmod(result, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
31
|
+
# Return the path created
|
|
32
|
+
return result
|
|
12
33
|
|
|
13
34
|
def robust_rmtree(path: Union[str, bytes]) -> None:
|
|
14
35
|
"""
|
|
@@ -161,17 +182,43 @@ def make_public_dir(in_directory: Optional[str] = None) -> str:
|
|
|
161
182
|
os.chmod(this_should_never_happen, 0o777)
|
|
162
183
|
return this_should_never_happen
|
|
163
184
|
|
|
164
|
-
def try_path(path: str) -> Optional[str]:
|
|
185
|
+
def try_path(path: str, min_size: int = 100 * 1024 * 1024) -> Optional[str]:
|
|
165
186
|
"""
|
|
166
187
|
Try to use the given path. Return it if it exists or can be made,
|
|
167
188
|
and we can make things within it, or None otherwise.
|
|
189
|
+
|
|
190
|
+
:param min_size: Reject paths on filesystems smaller than this many bytes.
|
|
168
191
|
"""
|
|
192
|
+
|
|
169
193
|
try:
|
|
170
194
|
os.makedirs(path, exist_ok=True)
|
|
171
195
|
except OSError:
|
|
172
196
|
# Maybe we lack permissions
|
|
173
197
|
return None
|
|
174
|
-
|
|
198
|
+
|
|
199
|
+
if not os.path.exists(path):
|
|
200
|
+
# We didn't manage to make it
|
|
201
|
+
return None
|
|
202
|
+
|
|
203
|
+
if not os.access(path, os.W_OK):
|
|
204
|
+
# It doesn't look writable
|
|
205
|
+
return None
|
|
206
|
+
|
|
207
|
+
try:
|
|
208
|
+
stats = os.statvfs(path)
|
|
209
|
+
except OSError:
|
|
210
|
+
# Maybe we lack permissions
|
|
211
|
+
return None
|
|
212
|
+
|
|
213
|
+
# Is the filesystem big enough?
|
|
214
|
+
# We need to look at the FS size and not the free space so we don't change
|
|
215
|
+
# over to a different filesystem when this one fills up.
|
|
216
|
+
fs_size = stats.f_frsize * stats.f_blocks
|
|
217
|
+
if fs_size < min_size:
|
|
218
|
+
# Too small
|
|
219
|
+
return None
|
|
220
|
+
|
|
221
|
+
return path
|
|
175
222
|
|
|
176
223
|
|
|
177
224
|
class WriteWatchingStream:
|
toil/lib/misc.py
CHANGED
toil/lib/resources.py
CHANGED
|
@@ -13,6 +13,8 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import fnmatch
|
|
15
15
|
import os
|
|
16
|
+
import math
|
|
17
|
+
import sys
|
|
16
18
|
import resource
|
|
17
19
|
from typing import List, Tuple
|
|
18
20
|
|
|
@@ -20,12 +22,17 @@ from typing import List, Tuple
|
|
|
20
22
|
def get_total_cpu_time_and_memory_usage() -> Tuple[float, int]:
|
|
21
23
|
"""
|
|
22
24
|
Gives the total cpu time of itself and all its children, and the maximum RSS memory usage of
|
|
23
|
-
itself and its single largest child.
|
|
25
|
+
itself and its single largest child (in kibibytes).
|
|
24
26
|
"""
|
|
25
27
|
me = resource.getrusage(resource.RUSAGE_SELF)
|
|
26
28
|
children = resource.getrusage(resource.RUSAGE_CHILDREN)
|
|
27
29
|
total_cpu_time = me.ru_utime + me.ru_stime + children.ru_utime + children.ru_stime
|
|
28
30
|
total_memory_usage = me.ru_maxrss + children.ru_maxrss
|
|
31
|
+
if sys.platform == "darwin":
|
|
32
|
+
# On Linux, getrusage works in "kilobytes" (really kibibytes), but on
|
|
33
|
+
# Mac it works in bytes. See
|
|
34
|
+
# <https://github.com/python/cpython/issues/74698>
|
|
35
|
+
total_memory_usage = int(math.ceil(total_memory_usage / 1024))
|
|
29
36
|
return total_cpu_time, total_memory_usage
|
|
30
37
|
|
|
31
38
|
|
|
@@ -42,7 +49,7 @@ def glob(glob_pattern: str, directoryname: str) -> List[str]:
|
|
|
42
49
|
the glob_pattern and returns a list=[].
|
|
43
50
|
|
|
44
51
|
:param directoryname: Any accessible folder name on the filesystem.
|
|
45
|
-
:param glob_pattern: A string like
|
|
52
|
+
:param glob_pattern: A string like ``*.txt``, which would find all text files.
|
|
46
53
|
:return: A list=[] of absolute filepaths matching the glob pattern.
|
|
47
54
|
"""
|
|
48
55
|
matches = []
|
toil/lib/threading.py
CHANGED
|
@@ -16,6 +16,7 @@
|
|
|
16
16
|
# Note: renamed from "threading.py" to "threading.py" to avoid conflicting imports
|
|
17
17
|
# from the built-in "threading" from psutil in python3.9
|
|
18
18
|
import atexit
|
|
19
|
+
import errno
|
|
19
20
|
import fcntl
|
|
20
21
|
import logging
|
|
21
22
|
import math
|
|
@@ -25,7 +26,7 @@ import tempfile
|
|
|
25
26
|
import threading
|
|
26
27
|
import traceback
|
|
27
28
|
from contextlib import contextmanager
|
|
28
|
-
from typing import
|
|
29
|
+
from typing import Dict, Iterator, Optional, Union, cast
|
|
29
30
|
|
|
30
31
|
import psutil # type: ignore
|
|
31
32
|
|
|
@@ -108,9 +109,12 @@ def cpu_count() -> int:
|
|
|
108
109
|
return cast(int, cached)
|
|
109
110
|
|
|
110
111
|
# Get the fallback answer of all the CPUs on the machine
|
|
111
|
-
|
|
112
|
+
psutil_cpu_count = cast(Optional[int], psutil.cpu_count(logical=True))
|
|
113
|
+
if psutil_cpu_count is None:
|
|
114
|
+
logger.debug('Could not retrieve the logical CPU count.')
|
|
112
115
|
|
|
113
|
-
|
|
116
|
+
total_machine_size: Union[float, int] = psutil_cpu_count if psutil_cpu_count is not None else float('inf')
|
|
117
|
+
logger.debug('Total machine size: %s core(s)', total_machine_size)
|
|
114
118
|
|
|
115
119
|
# cgroups may limit the size
|
|
116
120
|
cgroup_size: Union[float, int] = float('inf')
|
|
@@ -150,13 +154,13 @@ def cpu_count() -> int:
|
|
|
150
154
|
if quota == -1:
|
|
151
155
|
# But the quota can be -1 for unset.
|
|
152
156
|
# Assume we can use the whole machine.
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
157
|
+
cgroup_size = float('inf')
|
|
158
|
+
else:
|
|
159
|
+
# The thread count is how many multiples of a wall clock period we
|
|
160
|
+
# can burn in that period.
|
|
161
|
+
cgroup_size = int(math.ceil(float(quota)/float(period)))
|
|
158
162
|
|
|
159
|
-
logger.debug('Control group size in cores: %
|
|
163
|
+
logger.debug('Control group size in cores: %s', cgroup_size)
|
|
160
164
|
except:
|
|
161
165
|
# We can't actually read these cgroup fields. Maybe we are a mac or something.
|
|
162
166
|
logger.debug('Could not inspect cgroup: %s', traceback.format_exc())
|
|
@@ -174,9 +178,16 @@ def cpu_count() -> int:
|
|
|
174
178
|
else:
|
|
175
179
|
logger.debug('CPU affinity not available')
|
|
176
180
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
181
|
+
limit: Union[float, int] = float('inf')
|
|
182
|
+
# Apply all the limits to take the smallest
|
|
183
|
+
limit = min(limit, total_machine_size)
|
|
184
|
+
limit = min(limit, cgroup_size)
|
|
185
|
+
limit = min(limit, affinity_size)
|
|
186
|
+
if limit < 1 or limit == float('inf'):
|
|
187
|
+
# Fall back to 1 if we can't get a size
|
|
188
|
+
limit = 1
|
|
189
|
+
result = int(limit)
|
|
190
|
+
logger.debug('cpu_count: %s', result)
|
|
180
191
|
# Make sure to remember it for the next call
|
|
181
192
|
setattr(cpu_count, 'result', result)
|
|
182
193
|
return result
|
|
@@ -358,6 +369,9 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
|
|
|
358
369
|
:param str mutex: Mutex to lock. Must be a permissible path component.
|
|
359
370
|
"""
|
|
360
371
|
|
|
372
|
+
if not os.path.isdir(base_dir):
|
|
373
|
+
raise RuntimeError(f"Directory {base_dir} for mutex does not exist")
|
|
374
|
+
|
|
361
375
|
# Define a filename
|
|
362
376
|
lock_filename = os.path.join(base_dir, 'toil-mutex-' + mutex)
|
|
363
377
|
|
|
@@ -368,18 +382,32 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
|
|
|
368
382
|
# get a lock on the deleted file.
|
|
369
383
|
|
|
370
384
|
while True:
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
try:
|
|
374
|
-
# Try to create the file, ignoring if it exists or not.
|
|
375
|
-
fd = os.open(lock_filename, os.O_CREAT | os.O_WRONLY)
|
|
385
|
+
# Try to create the file, ignoring if it exists or not.
|
|
386
|
+
fd = os.open(lock_filename, os.O_CREAT | os.O_WRONLY)
|
|
376
387
|
|
|
377
|
-
|
|
378
|
-
|
|
388
|
+
# Wait until we can exclusively lock it.
|
|
389
|
+
fcntl.lockf(fd, fcntl.LOCK_EX)
|
|
379
390
|
|
|
380
|
-
|
|
391
|
+
# Holding the lock, make sure we are looking at the same file on disk still.
|
|
392
|
+
try:
|
|
393
|
+
# So get the stats from the open file
|
|
381
394
|
fd_stats = os.fstat(fd)
|
|
395
|
+
except OSError as e:
|
|
396
|
+
if e.errno == errno.ESTALE:
|
|
397
|
+
# The file handle has gone stale, because somebody removed the file.
|
|
398
|
+
# Try again.
|
|
399
|
+
try:
|
|
400
|
+
fcntl.lockf(fd, fcntl.LOCK_UN)
|
|
401
|
+
except OSError:
|
|
402
|
+
pass
|
|
403
|
+
os.close(fd)
|
|
404
|
+
continue
|
|
405
|
+
else:
|
|
406
|
+
# Something else broke
|
|
407
|
+
raise
|
|
382
408
|
|
|
409
|
+
try:
|
|
410
|
+
# And get the stats for the name in the directory
|
|
383
411
|
path_stats: Optional[os.stat_result] = os.stat(lock_filename)
|
|
384
412
|
except FileNotFoundError:
|
|
385
413
|
path_stats = None
|
|
@@ -389,10 +417,9 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
|
|
|
389
417
|
# any). This usually happens, because before someone releases a
|
|
390
418
|
# lock, they delete the file. Go back and contend again. TODO: This
|
|
391
419
|
# allows a lot of queue jumping on our mutex.
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
continue
|
|
420
|
+
fcntl.lockf(fd, fcntl.LOCK_UN)
|
|
421
|
+
os.close(fd)
|
|
422
|
+
continue
|
|
396
423
|
else:
|
|
397
424
|
# We have a lock on the file that the name points to. Since we
|
|
398
425
|
# hold the lock, nobody will be deleting it or can be in the
|
|
@@ -407,14 +434,40 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
|
|
|
407
434
|
# Delete it while we still own it, so we can't delete it from out from
|
|
408
435
|
# under someone else who thinks they are holding it.
|
|
409
436
|
logger.debug('PID %d releasing mutex %s', os.getpid(), lock_filename)
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
437
|
+
|
|
438
|
+
# We have had observations in the wild of the lock file not exisiting
|
|
439
|
+
# when we go to unlink it, causing a crash on mutex release. See
|
|
440
|
+
# <https://github.com/DataBiosphere/toil/issues/4654>.
|
|
441
|
+
#
|
|
442
|
+
# We want to tolerate this; maybe unlink() interacts with fcntl() locks
|
|
443
|
+
# on NFS in a way that is actually fine, somehow? But we also want to
|
|
444
|
+
# complain loudly if something is tampering with our locks or not
|
|
445
|
+
# really enforcing locks on the filesystem, so we will notice if it is
|
|
446
|
+
# the cause of further problems.
|
|
447
|
+
try:
|
|
448
|
+
path_stats = os.stat(lock_filename)
|
|
449
|
+
except FileNotFoundError:
|
|
450
|
+
path_stats = None
|
|
451
|
+
|
|
452
|
+
# Check to make sure it still looks locked before we unlink.
|
|
453
|
+
if path_stats is None:
|
|
454
|
+
logger.error('PID %d had mutex %s disappear while locked! Mutex system is not working!', os.getpid(), lock_filename)
|
|
455
|
+
elif fd_stats.st_dev != path_stats.st_dev or fd_stats.st_ino != path_stats.st_ino:
|
|
456
|
+
logger.error('PID %d had mutex %s get replaced while locked! Mutex system is not working!', os.getpid(), lock_filename)
|
|
457
|
+
|
|
458
|
+
if path_stats is not None:
|
|
459
|
+
try:
|
|
460
|
+
# Unlink the file
|
|
461
|
+
os.unlink(lock_filename)
|
|
462
|
+
except FileNotFoundError:
|
|
463
|
+
logger.error('PID %d had mutex %s disappear between stat and unlink while unlocking! Mutex system is not working!', os.getpid(), lock_filename)
|
|
464
|
+
|
|
465
|
+
# Note that we are unlinking it and then unlocking it; a lot of people
|
|
466
|
+
# might have opened it before we unlinked it and will wake up when they
|
|
467
|
+
# get the worthless lock on the now-unlinked file. We have to do some
|
|
468
|
+
# stat gymnastics above to work around this.
|
|
469
|
+
fcntl.lockf(fd, fcntl.LOCK_UN)
|
|
470
|
+
os.close(fd)
|
|
418
471
|
|
|
419
472
|
|
|
420
473
|
class LastProcessStandingArena:
|
|
@@ -475,8 +528,8 @@ class LastProcessStandingArena:
|
|
|
475
528
|
logger.debug('Joining arena %s', self.lockfileDir)
|
|
476
529
|
|
|
477
530
|
# Make sure we're not in it already.
|
|
478
|
-
|
|
479
|
-
|
|
531
|
+
if self.lockfileName is not None or self.lockfileFD is not None:
|
|
532
|
+
raise RuntimeError("A process is already in the arena")
|
|
480
533
|
|
|
481
534
|
with global_mutex(self.base_dir, self.mutex):
|
|
482
535
|
# Now nobody else should also be trying to join or leave.
|
|
@@ -486,9 +539,14 @@ class LastProcessStandingArena:
|
|
|
486
539
|
os.mkdir(self.lockfileDir)
|
|
487
540
|
except FileExistsError:
|
|
488
541
|
pass
|
|
542
|
+
except Exception as e:
|
|
543
|
+
raise RuntimeError("Could not make lock file directory " + self.lockfileDir) from e
|
|
489
544
|
|
|
490
545
|
# Make ourselves a file in it and lock it to prove we are alive.
|
|
491
|
-
|
|
546
|
+
try:
|
|
547
|
+
self.lockfileFD, self.lockfileName = tempfile.mkstemp(dir=self.lockfileDir) # type: ignore
|
|
548
|
+
except Exception as e:
|
|
549
|
+
raise RuntimeError("Could not make lock file in " + self.lockfileDir) from e
|
|
492
550
|
# Nobody can see it yet, so lock it right away
|
|
493
551
|
fcntl.lockf(self.lockfileFD, fcntl.LOCK_EX) # type: ignore
|
|
494
552
|
|
|
@@ -511,8 +569,8 @@ class LastProcessStandingArena:
|
|
|
511
569
|
"""
|
|
512
570
|
|
|
513
571
|
# Make sure we're in it to start.
|
|
514
|
-
|
|
515
|
-
|
|
572
|
+
if self.lockfileName is None or self.lockfileFD is None:
|
|
573
|
+
raise RuntimeError("This process is not in the arena.")
|
|
516
574
|
|
|
517
575
|
logger.debug('Leaving arena %s', self.lockfileDir)
|
|
518
576
|
|
|
@@ -533,7 +591,12 @@ class LastProcessStandingArena:
|
|
|
533
591
|
# There is someone claiming to be here. Are they alive?
|
|
534
592
|
full_path = os.path.join(self.lockfileDir, item)
|
|
535
593
|
|
|
536
|
-
|
|
594
|
+
try:
|
|
595
|
+
fd = os.open(full_path, os.O_RDONLY)
|
|
596
|
+
except OSError as e:
|
|
597
|
+
# suddenly file doesnt exist on network file system?
|
|
598
|
+
continue
|
|
599
|
+
|
|
537
600
|
try:
|
|
538
601
|
fcntl.lockf(fd, fcntl.LOCK_SH | fcntl.LOCK_NB)
|
|
539
602
|
except OSError as e:
|