toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
toil/lib/io.py CHANGED
@@ -2,6 +2,7 @@ import logging
2
2
  import os
3
3
  import shutil
4
4
  import stat
5
+ import tempfile
5
6
  import uuid
6
7
  from contextlib import contextmanager
7
8
  from io import BytesIO
@@ -9,6 +10,26 @@ from typing import IO, Any, Callable, Iterator, Optional, Union
9
10
 
10
11
  logger = logging.getLogger(__name__)
11
12
 
13
+ def mkdtemp(suffix: Optional[str] = None, prefix: Optional[str] = None, dir: Optional[str] = None) -> str:
14
+ """
15
+ Make a temporary directory like tempfile.mkdtemp, but with relaxed permissions.
16
+
17
+ The permissions on the directory will be 711 instead of 700, allowing the
18
+ group and all other users to traverse the directory. This is necessary if
19
+ the direcotry is on NFS and the Docker daemon would like to mount it or a
20
+ file inside it into a container, because on NFS even the Docker daemon
21
+ appears bound by the file permissions.
22
+
23
+ See <https://github.com/DataBiosphere/toil/issues/4644>, and
24
+ <https://stackoverflow.com/a/67928880> which talks about a similar problem
25
+ but in the context of user namespaces.
26
+ """
27
+ # Make the directory
28
+ result = tempfile.mkdtemp(suffix=suffix, prefix=prefix, dir=dir)
29
+ # Grant all the permissions: full control for user, and execute for group and other
30
+ os.chmod(result, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
31
+ # Return the path created
32
+ return result
12
33
 
13
34
  def robust_rmtree(path: Union[str, bytes]) -> None:
14
35
  """
toil/lib/misc.py CHANGED
@@ -9,7 +9,7 @@ import sys
9
9
  import time
10
10
  import typing
11
11
  from contextlib import closing
12
- from typing import Iterator, List, Optional, Union
12
+ from typing import Iterator, List, Optional
13
13
 
14
14
  import pytz
15
15
 
toil/lib/resources.py CHANGED
@@ -42,7 +42,7 @@ def glob(glob_pattern: str, directoryname: str) -> List[str]:
42
42
  the glob_pattern and returns a list=[].
43
43
 
44
44
  :param directoryname: Any accessible folder name on the filesystem.
45
- :param glob_pattern: A string like "*.txt", which would find all text files.
45
+ :param glob_pattern: A string like ``*.txt``, which would find all text files.
46
46
  :return: A list=[] of absolute filepaths matching the glob pattern.
47
47
  """
48
48
  matches = []
toil/lib/threading.py CHANGED
@@ -16,6 +16,7 @@
16
16
  # Note: renamed from "threading.py" to "threading.py" to avoid conflicting imports
17
17
  # from the built-in "threading" from psutil in python3.9
18
18
  import atexit
19
+ import errno
19
20
  import fcntl
20
21
  import logging
21
22
  import math
@@ -25,7 +26,7 @@ import tempfile
25
26
  import threading
26
27
  import traceback
27
28
  from contextlib import contextmanager
28
- from typing import Any, Dict, Iterator, Optional, Union, cast
29
+ from typing import Dict, Iterator, Optional, Union, cast
29
30
 
30
31
  import psutil # type: ignore
31
32
 
@@ -358,6 +359,9 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
358
359
  :param str mutex: Mutex to lock. Must be a permissible path component.
359
360
  """
360
361
 
362
+ if not os.path.isdir(base_dir):
363
+ raise RuntimeError(f"Directory {base_dir} for mutex does not exist")
364
+
361
365
  # Define a filename
362
366
  lock_filename = os.path.join(base_dir, 'toil-mutex-' + mutex)
363
367
 
@@ -368,18 +372,32 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
368
372
  # get a lock on the deleted file.
369
373
 
370
374
  while True:
371
- fd = -1
372
-
373
- try:
374
- # Try to create the file, ignoring if it exists or not.
375
- fd = os.open(lock_filename, os.O_CREAT | os.O_WRONLY)
375
+ # Try to create the file, ignoring if it exists or not.
376
+ fd = os.open(lock_filename, os.O_CREAT | os.O_WRONLY)
376
377
 
377
- # Wait until we can exclusively lock it.
378
- fcntl.lockf(fd, fcntl.LOCK_EX)
378
+ # Wait until we can exclusively lock it.
379
+ fcntl.lockf(fd, fcntl.LOCK_EX)
379
380
 
380
- # Holding the lock, make sure we are looking at the same file on disk still.
381
+ # Holding the lock, make sure we are looking at the same file on disk still.
382
+ try:
383
+ # So get the stats from the open file
381
384
  fd_stats = os.fstat(fd)
385
+ except OSError as e:
386
+ if e.errno == errno.ESTALE:
387
+ # The file handle has gone stale, because somebody removed the file.
388
+ # Try again.
389
+ try:
390
+ fcntl.lockf(fd, fcntl.LOCK_UN)
391
+ except OSError:
392
+ pass
393
+ os.close(fd)
394
+ continue
395
+ else:
396
+ # Something else broke
397
+ raise
382
398
 
399
+ try:
400
+ # And get the stats for the name in the directory
383
401
  path_stats: Optional[os.stat_result] = os.stat(lock_filename)
384
402
  except FileNotFoundError:
385
403
  path_stats = None
@@ -389,10 +407,9 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
389
407
  # any). This usually happens, because before someone releases a
390
408
  # lock, they delete the file. Go back and contend again. TODO: This
391
409
  # allows a lot of queue jumping on our mutex.
392
- if fd != -1:
393
- fcntl.lockf(fd, fcntl.LOCK_UN)
394
- os.close(fd)
395
- continue
410
+ fcntl.lockf(fd, fcntl.LOCK_UN)
411
+ os.close(fd)
412
+ continue
396
413
  else:
397
414
  # We have a lock on the file that the name points to. Since we
398
415
  # hold the lock, nobody will be deleting it or can be in the
@@ -407,14 +424,40 @@ def global_mutex(base_dir: str, mutex: str) -> Iterator[None]:
407
424
  # Delete it while we still own it, so we can't delete it from out from
408
425
  # under someone else who thinks they are holding it.
409
426
  logger.debug('PID %d releasing mutex %s', os.getpid(), lock_filename)
410
- os.unlink(lock_filename)
411
- if fd != -1:
412
- fcntl.lockf(fd, fcntl.LOCK_UN)
413
- # Note that we are unlinking it and then unlocking it; a lot of people
414
- # might have opened it before we unlinked it and will wake up when they
415
- # get the worthless lock on the now-unlinked file. We have to do some
416
- # stat gymnastics above to work around this.
417
- os.close(fd)
427
+
428
+ # We have had observations in the wild of the lock file not exisiting
429
+ # when we go to unlink it, causing a crash on mutex release. See
430
+ # <https://github.com/DataBiosphere/toil/issues/4654>.
431
+ #
432
+ # We want to tolerate this; maybe unlink() interacts with fcntl() locks
433
+ # on NFS in a way that is actually fine, somehow? But we also want to
434
+ # complain loudly if something is tampering with our locks or not
435
+ # really enforcing locks on the filesystem, so we will notice if it is
436
+ # the cause of further problems.
437
+ try:
438
+ path_stats = os.stat(lock_filename)
439
+ except FileNotFoundError:
440
+ path_stats = None
441
+
442
+ # Check to make sure it still looks locked before we unlink.
443
+ if path_stats is None:
444
+ logger.error('PID %d had mutex %s disappear while locked! Mutex system is not working!', os.getpid(), lock_filename)
445
+ elif fd_stats.st_dev != path_stats.st_dev or fd_stats.st_ino != path_stats.st_ino:
446
+ logger.error('PID %d had mutex %s get replaced while locked! Mutex system is not working!', os.getpid(), lock_filename)
447
+
448
+ if path_stats is not None:
449
+ try:
450
+ # Unlink the file
451
+ os.unlink(lock_filename)
452
+ except FileNotFoundError:
453
+ logger.error('PID %d had mutex %s disappear between stat and unlink while unlocking! Mutex system is not working!', os.getpid(), lock_filename)
454
+
455
+ # Note that we are unlinking it and then unlocking it; a lot of people
456
+ # might have opened it before we unlinked it and will wake up when they
457
+ # get the worthless lock on the now-unlinked file. We have to do some
458
+ # stat gymnastics above to work around this.
459
+ fcntl.lockf(fd, fcntl.LOCK_UN)
460
+ os.close(fd)
418
461
 
419
462
 
420
463
  class LastProcessStandingArena:
@@ -475,8 +518,8 @@ class LastProcessStandingArena:
475
518
  logger.debug('Joining arena %s', self.lockfileDir)
476
519
 
477
520
  # Make sure we're not in it already.
478
- assert self.lockfileName is None
479
- assert self.lockfileFD is None
521
+ if self.lockfileName is not None or self.lockfileFD is not None:
522
+ raise RuntimeError("A process is already in the arena")
480
523
 
481
524
  with global_mutex(self.base_dir, self.mutex):
482
525
  # Now nobody else should also be trying to join or leave.
@@ -511,8 +554,8 @@ class LastProcessStandingArena:
511
554
  """
512
555
 
513
556
  # Make sure we're in it to start.
514
- assert self.lockfileName is not None
515
- assert self.lockfileFD is not None
557
+ if self.lockfileName is None or self.lockfileFD is None:
558
+ raise RuntimeError("This process is not in the arena.")
516
559
 
517
560
  logger.debug('Leaving arena %s', self.lockfileDir)
518
561
 
@@ -533,7 +576,12 @@ class LastProcessStandingArena:
533
576
  # There is someone claiming to be here. Are they alive?
534
577
  full_path = os.path.join(self.lockfileDir, item)
535
578
 
536
- fd = os.open(full_path, os.O_RDONLY)
579
+ try:
580
+ fd = os.open(full_path, os.O_RDONLY)
581
+ except OSError as e:
582
+ # suddenly file doesnt exist on network file system?
583
+ continue
584
+
537
585
  try:
538
586
  fcntl.lockf(fd, fcntl.LOCK_SH | fcntl.LOCK_NB)
539
587
  except OSError as e: