toil 6.1.0a1__py3-none-any.whl → 7.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. toil/__init__.py +1 -232
  2. toil/batchSystems/abstractBatchSystem.py +41 -17
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +79 -65
  4. toil/batchSystems/awsBatch.py +8 -8
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +4 -5
  7. toil/batchSystems/gridengine.py +1 -1
  8. toil/batchSystems/htcondor.py +5 -5
  9. toil/batchSystems/kubernetes.py +25 -11
  10. toil/batchSystems/local_support.py +3 -3
  11. toil/batchSystems/lsf.py +9 -9
  12. toil/batchSystems/mesos/batchSystem.py +4 -4
  13. toil/batchSystems/mesos/executor.py +3 -2
  14. toil/batchSystems/options.py +9 -0
  15. toil/batchSystems/singleMachine.py +11 -10
  16. toil/batchSystems/slurm.py +129 -16
  17. toil/batchSystems/torque.py +1 -1
  18. toil/bus.py +45 -3
  19. toil/common.py +56 -31
  20. toil/cwl/cwltoil.py +442 -371
  21. toil/deferred.py +1 -1
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/abstractFileStore.py +69 -20
  24. toil/fileStores/cachingFileStore.py +6 -22
  25. toil/fileStores/nonCachingFileStore.py +6 -15
  26. toil/job.py +270 -86
  27. toil/jobStores/abstractJobStore.py +37 -31
  28. toil/jobStores/aws/jobStore.py +280 -218
  29. toil/jobStores/aws/utils.py +60 -31
  30. toil/jobStores/conftest.py +2 -2
  31. toil/jobStores/fileJobStore.py +3 -3
  32. toil/jobStores/googleJobStore.py +3 -4
  33. toil/leader.py +89 -38
  34. toil/lib/aws/__init__.py +26 -10
  35. toil/lib/aws/iam.py +2 -2
  36. toil/lib/aws/session.py +62 -22
  37. toil/lib/aws/utils.py +73 -37
  38. toil/lib/conversions.py +24 -1
  39. toil/lib/ec2.py +118 -69
  40. toil/lib/expando.py +1 -1
  41. toil/lib/generatedEC2Lists.py +8 -8
  42. toil/lib/io.py +42 -4
  43. toil/lib/misc.py +1 -3
  44. toil/lib/resources.py +57 -16
  45. toil/lib/retry.py +12 -5
  46. toil/lib/threading.py +29 -14
  47. toil/lib/throttle.py +1 -1
  48. toil/options/common.py +31 -30
  49. toil/options/wdl.py +5 -0
  50. toil/provisioners/__init__.py +9 -3
  51. toil/provisioners/abstractProvisioner.py +12 -2
  52. toil/provisioners/aws/__init__.py +20 -15
  53. toil/provisioners/aws/awsProvisioner.py +406 -329
  54. toil/provisioners/gceProvisioner.py +2 -2
  55. toil/provisioners/node.py +13 -5
  56. toil/server/app.py +1 -1
  57. toil/statsAndLogging.py +93 -23
  58. toil/test/__init__.py +27 -12
  59. toil/test/batchSystems/batchSystemTest.py +40 -33
  60. toil/test/batchSystems/batch_system_plugin_test.py +79 -0
  61. toil/test/batchSystems/test_slurm.py +22 -7
  62. toil/test/cactus/__init__.py +0 -0
  63. toil/test/cactus/test_cactus_integration.py +58 -0
  64. toil/test/cwl/cwlTest.py +245 -236
  65. toil/test/cwl/seqtk_seq.cwl +1 -1
  66. toil/test/docs/scriptsTest.py +11 -14
  67. toil/test/jobStores/jobStoreTest.py +40 -54
  68. toil/test/lib/aws/test_iam.py +2 -2
  69. toil/test/lib/test_ec2.py +1 -1
  70. toil/test/options/__init__.py +13 -0
  71. toil/test/options/options.py +37 -0
  72. toil/test/provisioners/aws/awsProvisionerTest.py +51 -34
  73. toil/test/provisioners/clusterTest.py +99 -16
  74. toil/test/server/serverTest.py +2 -2
  75. toil/test/src/autoDeploymentTest.py +1 -1
  76. toil/test/src/dockerCheckTest.py +2 -1
  77. toil/test/src/environmentTest.py +125 -0
  78. toil/test/src/fileStoreTest.py +1 -1
  79. toil/test/src/jobDescriptionTest.py +18 -8
  80. toil/test/src/jobTest.py +1 -1
  81. toil/test/src/realtimeLoggerTest.py +4 -0
  82. toil/test/src/workerTest.py +52 -19
  83. toil/test/utils/toilDebugTest.py +62 -4
  84. toil/test/utils/utilsTest.py +23 -21
  85. toil/test/wdl/wdltoil_test.py +49 -21
  86. toil/test/wdl/wdltoil_test_kubernetes.py +77 -0
  87. toil/toilState.py +68 -9
  88. toil/utils/toilDebugFile.py +1 -1
  89. toil/utils/toilDebugJob.py +153 -26
  90. toil/utils/toilLaunchCluster.py +12 -2
  91. toil/utils/toilRsyncCluster.py +7 -2
  92. toil/utils/toilSshCluster.py +7 -3
  93. toil/utils/toilStats.py +310 -266
  94. toil/utils/toilStatus.py +98 -52
  95. toil/version.py +11 -11
  96. toil/wdl/wdltoil.py +644 -225
  97. toil/worker.py +125 -83
  98. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/LICENSE +25 -0
  99. toil-7.0.0.dist-info/METADATA +158 -0
  100. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/RECORD +103 -96
  101. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/WHEEL +1 -1
  102. toil-6.1.0a1.dist-info/METADATA +0 -125
  103. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/entry_points.txt +0 -0
  104. {toil-6.1.0a1.dist-info → toil-7.0.0.dist-info}/top_level.txt +0 -0
toil/__init__.py CHANGED
@@ -22,7 +22,6 @@ from datetime import datetime
22
22
  from typing import TYPE_CHECKING, Optional, Tuple
23
23
 
24
24
  import requests
25
- from pytz import timezone
26
25
 
27
26
  from docker.errors import ImageNotFound
28
27
  from toil.lib.memoize import memoize
@@ -210,7 +209,7 @@ def customDockerInitCmd() -> str:
210
209
  private docker registries). Any single quotes are escaped and the command cannot contain a
211
210
  set of blacklisted chars (newline or tab).
212
211
 
213
- :returns: The custom commmand, or an empty string is returned if the environment variable is not set.
212
+ :returns: The custom command, or an empty string is returned if the environment variable is not set.
214
213
  """
215
214
  command = lookupEnvVar(name='user-defined custom docker init command',
216
215
  envName='TOIL_CUSTOM_DOCKER_INIT_COMMAND',
@@ -440,7 +439,6 @@ def logProcessContext(config: "Config") -> None:
440
439
 
441
440
 
442
441
  try:
443
- from boto import provider
444
442
  from botocore.credentials import (JSONFileCache,
445
443
  RefreshableCredentials,
446
444
  create_credential_resolver)
@@ -477,234 +475,5 @@ try:
477
475
  """
478
476
  return datetime.strptime(s, datetime_format)
479
477
 
480
-
481
- class BotoCredentialAdapter(provider.Provider):
482
- """
483
- Boto 2 Adapter to use AWS credentials obtained via Boto 3's credential finding logic.
484
-
485
- This allows for automatic role assumption
486
- respecting the Boto 3 config files, even when parts of the app still use
487
- Boto 2.
488
-
489
- This class also handles caching credentials in multi-process environments
490
- to avoid loads of processes swamping the EC2 metadata service.
491
- """
492
-
493
- # TODO: We take kwargs because new boto2 versions have an 'anon'
494
- # argument and we want to be future proof
495
-
496
- def __init__(self, name, access_key=None, secret_key=None,
497
- security_token=None, profile_name=None, **kwargs):
498
- """Create a new BotoCredentialAdapter."""
499
- # TODO: We take kwargs because new boto2 versions have an 'anon'
500
- # argument and we want to be future proof
501
-
502
- if (name == 'aws' or name is None) and access_key is None and not kwargs.get('anon', False):
503
- # We are on AWS and we don't have credentials passed along and we aren't anonymous.
504
- # We will backend into a boto3 resolver for getting credentials.
505
- # Make sure to enable boto3's own caching, so we can share that
506
- # cache with pure boto3 code elsewhere in Toil.
507
- # Keep synced with toil.lib.aws.session.establish_boto3_session
508
- self._boto3_resolver = create_credential_resolver(Session(profile=profile_name), cache=JSONFileCache())
509
- else:
510
- # We will use the normal flow
511
- self._boto3_resolver = None
512
-
513
- # Pass along all the arguments
514
- super().__init__(name, access_key=access_key,
515
- secret_key=secret_key, security_token=security_token,
516
- profile_name=profile_name, **kwargs)
517
-
518
- def get_credentials(self, access_key=None, secret_key=None, security_token=None, profile_name=None):
519
- """
520
- Make sure our credential fields are populated.
521
-
522
- Called by the base class constructor.
523
- """
524
- if self._boto3_resolver is not None:
525
- # Go get the credentials from the cache, or from boto3 if not cached.
526
- # We need to be eager here; having the default None
527
- # _credential_expiry_time makes the accessors never try to refresh.
528
- self._obtain_credentials_from_cache_or_boto3()
529
- else:
530
- # We're not on AWS, or they passed a key, or we're anonymous.
531
- # Use the normal route; our credentials shouldn't expire.
532
- super().get_credentials(access_key=access_key,
533
- secret_key=secret_key, security_token=security_token,
534
- profile_name=profile_name)
535
-
536
- def _populate_keys_from_metadata_server(self):
537
- """
538
- Hack to catch _credential_expiry_time being too soon and refresh the credentials.
539
-
540
- This override is misnamed; it's actually the only hook we have to catch
541
- _credential_expiry_time being too soon and refresh the credentials. We
542
- actually just go back and poke the cache to see if it feels like
543
- getting us new credentials.
544
-
545
- Boto 2 hardcodes a refresh within 5 minutes of expiry:
546
- https://github.com/boto/boto/blob/591911db1029f2fbb8ba1842bfcc514159b37b32/boto/provider.py#L247
547
-
548
- Boto 3 wants to refresh 15 or 10 minutes before expiry:
549
- https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L279
550
-
551
- So if we ever want to refresh, Boto 3 wants to refresh too.
552
- """
553
- # This should only happen if we have expiring credentials, which we should only get from boto3
554
- if self._boto3_resolver is None:
555
- raise RuntimeError("The Boto3 resolver should not be None.")
556
-
557
- self._obtain_credentials_from_cache_or_boto3()
558
-
559
- @retry()
560
- def _obtain_credentials_from_boto3(self):
561
- """
562
- Fill our credential fields from Boto 3.
563
-
564
- We know the current cached credentials are not good, and that we
565
- need to get them from Boto 3. Fill in our credential fields
566
- (_access_key, _secret_key, _security_token,
567
- _credential_expiry_time) from Boto 3.
568
- """
569
- # We get a Credentials object
570
- # <https://github.com/boto/botocore/blob/8d3ea0e61473fba43774eb3c74e1b22995ee7370/botocore/credentials.py#L227>
571
- # or a RefreshableCredentials, or None on failure.
572
- creds = self._boto3_resolver.load_credentials()
573
-
574
- if creds is None:
575
- try:
576
- resolvers = str(self._boto3_resolver.providers)
577
- except:
578
- resolvers = "(Resolvers unavailable)"
579
- raise RuntimeError("Could not obtain AWS credentials from Boto3. Resolvers tried: " + resolvers)
580
-
581
- # Make sure the credentials actually has some credentials if it is lazy
582
- creds.get_frozen_credentials()
583
-
584
- # Get when the credentials will expire, if ever
585
- if isinstance(creds, RefreshableCredentials):
586
- # Credentials may expire.
587
- # Get a naive UTC datetime like boto 2 uses from the boto 3 time.
588
- self._credential_expiry_time = creds._expiry_time.astimezone(timezone('UTC')).replace(tzinfo=None)
589
- else:
590
- # Credentials never expire
591
- self._credential_expiry_time = None
592
-
593
- # Then, atomically get all the credentials bits. They may be newer than we think they are, but never older.
594
- frozen = creds.get_frozen_credentials()
595
-
596
- # Copy them into us
597
- self._access_key = frozen.access_key
598
- self._secret_key = frozen.secret_key
599
- self._security_token = frozen.token
600
-
601
- def _obtain_credentials_from_cache_or_boto3(self):
602
- """
603
- Get the cached credentials.
604
-
605
- Or retrieve them from Boto 3 and cache them
606
- (or wait for another cooperating process to do so) if they are missing
607
- or not fresh enough.
608
- """
609
- cache_path = '~/.cache/aws/cached_temporary_credentials'
610
- path = os.path.expanduser(cache_path)
611
- tmp_path = path + '.tmp'
612
- while True:
613
- log.debug('Attempting to read cached credentials from %s.', path)
614
- try:
615
- with open(path) as f:
616
- content = f.read()
617
- if content:
618
- record = content.split('\n')
619
- if len(record) != 4:
620
- raise RuntimeError("Number of cached credentials is not 4.")
621
- self._access_key = record[0]
622
- self._secret_key = record[1]
623
- self._security_token = record[2]
624
- self._credential_expiry_time = str_to_datetime(record[3])
625
- else:
626
- log.debug('%s is empty. Credentials are not temporary.', path)
627
- self._obtain_credentials_from_boto3()
628
- return
629
- except OSError as e:
630
- if e.errno == errno.ENOENT:
631
- log.debug('Cached credentials are missing.')
632
- dir_path = os.path.dirname(path)
633
- if not os.path.exists(dir_path):
634
- log.debug('Creating parent directory %s', dir_path)
635
- try:
636
- # A race would be ok at this point
637
- os.makedirs(dir_path, exist_ok=True)
638
- except OSError as e2:
639
- if e2.errno == errno.EROFS:
640
- # Sometimes we don't actually have write access to ~.
641
- # We may be running in a non-writable Toil container.
642
- # We should just go get our own credentials
643
- log.debug('Cannot use the credentials cache because we are working on a read-only filesystem.')
644
- self._obtain_credentials_from_boto3()
645
- else:
646
- raise
647
- else:
648
- raise
649
- else:
650
- if self._credentials_need_refresh():
651
- log.debug('Cached credentials are expired.')
652
- else:
653
- log.debug('Cached credentials exist and are still fresh.')
654
- return
655
- # We get here if credentials are missing or expired
656
- log.debug('Racing to create %s.', tmp_path)
657
- # Only one process, the winner, will succeed
658
- try:
659
- fd = os.open(tmp_path, os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o600)
660
- except OSError as e:
661
- if e.errno == errno.EEXIST:
662
- log.debug('Lost the race to create %s. Waiting on winner to remove it.', tmp_path)
663
- while os.path.exists(tmp_path):
664
- time.sleep(0.1)
665
- log.debug('Winner removed %s. Trying from the top.', tmp_path)
666
- else:
667
- raise
668
- else:
669
- try:
670
- log.debug('Won the race to create %s. Requesting credentials from backend.', tmp_path)
671
- self._obtain_credentials_from_boto3()
672
- except:
673
- os.close(fd)
674
- fd = None
675
- log.debug('Failed to obtain credentials, removing %s.', tmp_path)
676
- # This unblocks the losers.
677
- os.unlink(tmp_path)
678
- # Bail out. It's too likely to happen repeatedly
679
- raise
680
- else:
681
- if self._credential_expiry_time is None:
682
- os.close(fd)
683
- fd = None
684
- log.debug('Credentials are not temporary. Leaving %s empty and renaming it to %s.',
685
- tmp_path, path)
686
- # No need to actually cache permanent credentials,
687
- # because we know we aren't getting them from the
688
- # metadata server or by assuming a role. Those both
689
- # give temporary credentials.
690
- else:
691
- log.debug('Writing credentials to %s.', tmp_path)
692
- with os.fdopen(fd, 'w') as fh:
693
- fd = None
694
- fh.write('\n'.join([
695
- self._access_key,
696
- self._secret_key,
697
- self._security_token,
698
- datetime_to_str(self._credential_expiry_time)]))
699
- log.debug('Wrote credentials to %s. Renaming to %s.', tmp_path, path)
700
- os.rename(tmp_path, path)
701
- return
702
- finally:
703
- if fd is not None:
704
- os.close(fd)
705
-
706
-
707
- provider.Provider = BotoCredentialAdapter
708
-
709
478
  except ImportError:
710
479
  pass
@@ -58,6 +58,27 @@ class BatchJobExitReason(enum.IntEnum):
58
58
  """Internal error."""
59
59
  MEMLIMIT: int = 6
60
60
  """Job hit batch system imposed memory limit."""
61
+ MISSING: int = 7
62
+ """Job disappeared from the scheduler without actually stopping, so Toil killed it."""
63
+ MAXJOBDURATION: int = 8
64
+ """Job ran longer than --maxJobDuration, so Toil killed it."""
65
+ PARTITION: int = 9
66
+ """Job was not able to talk to the leader via the job store, so Toil declared it failed."""
67
+
68
+
69
+ @classmethod
70
+ def to_string(cls, value: int) -> str:
71
+ """
72
+ Convert to human-readable string.
73
+
74
+ Given an int that may be or may be equal to a value from the enum,
75
+ produce the string value of its matching enum entry, or a stringified
76
+ int.
77
+ """
78
+ try:
79
+ return cls(value).name
80
+ except ValueError:
81
+ return str(value)
61
82
 
62
83
  class UpdatedBatchJobInfo(NamedTuple):
63
84
  jobID: int
@@ -65,7 +86,8 @@ class UpdatedBatchJobInfo(NamedTuple):
65
86
  """
66
87
  The exit status (integer value) of the job. 0 implies successful.
67
88
 
68
- EXIT_STATUS_UNAVAILABLE_VALUE is used when the exit status is not available (e.g. job is lost).
89
+ EXIT_STATUS_UNAVAILABLE_VALUE is used when the exit status is not available
90
+ (e.g. job is lost, or otherwise died but actual exit code was not reported).
69
91
  """
70
92
 
71
93
  exitReason: Optional[BatchJobExitReason]
@@ -141,17 +163,19 @@ class AbstractBatchSystem(ABC):
141
163
  """
142
164
 
143
165
  @abstractmethod
144
- def issueBatchJob(self, jobDesc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int:
166
+ def issueBatchJob(self, command: str, job_desc: JobDescription, job_environment: Optional[Dict[str, str]] = None) -> int:
145
167
  """
146
168
  Issues a job with the specified command to the batch system and returns
147
- a unique jobID.
169
+ a unique job ID number.
148
170
 
149
- :param jobDesc: a toil.job.JobDescription
171
+ :param command: the command to execute somewhere to run the Toil
172
+ worker process
173
+ :param job_desc: the JobDescription for the job being run
150
174
  :param job_environment: a collection of job-specific environment
151
- variables to be set on the worker.
175
+ variables to be set on the worker.
152
176
 
153
- :return: a unique jobID that can be used to reference the newly issued
154
- job
177
+ :return: a unique job ID number that can be used to reference the newly
178
+ issued job
155
179
  """
156
180
  raise NotImplementedError()
157
181
 
@@ -173,20 +197,20 @@ class AbstractBatchSystem(ABC):
173
197
  """
174
198
  Gets all currently issued jobs
175
199
 
176
- :return: A list of jobs (as jobIDs) currently issued (may be running, or may be
177
- waiting to be run). Despite the result being a list, the ordering should not
178
- be depended upon.
200
+ :return: A list of jobs (as job ID numbers) currently issued (may be
201
+ running, or may be waiting to be run). Despite the result being a
202
+ list, the ordering should not be depended upon.
179
203
  """
180
204
  raise NotImplementedError()
181
205
 
182
206
  @abstractmethod
183
207
  def getRunningBatchJobIDs(self) -> Dict[int, float]:
184
208
  """
185
- Gets a map of jobs as jobIDs that are currently running (not just waiting)
186
- and how long they have been running, in seconds.
209
+ Gets a map of jobs as job ID numbers that are currently running (not
210
+ just waiting) and how long they have been running, in seconds.
187
211
 
188
- :return: dictionary with currently running jobID keys and how many seconds they have
189
- been running as the value
212
+ :return: dictionary with currently running job ID number keys and how
213
+ many seconds they have been running as the value
190
214
  """
191
215
  raise NotImplementedError()
192
216
 
@@ -437,7 +461,7 @@ class BatchSystemSupport(AbstractBatchSystem):
437
461
  file_name: str = f'toil_{self.config.workflowID}.{toil_job_id}.{cluster_job_id}.{std}.log'
438
462
  logs_dir: str = self.get_batch_logs_dir()
439
463
  return os.path.join(logs_dir, file_name)
440
-
464
+
441
465
  def format_std_out_err_glob(self, toil_job_id: int) -> str:
442
466
  """
443
467
  Get a glob string that will match all file paths generated by format_std_out_err_path for a job.
@@ -445,7 +469,7 @@ class BatchSystemSupport(AbstractBatchSystem):
445
469
  file_glob: str = f'toil_{self.config.workflowID}.{toil_job_id}.*.log'
446
470
  logs_dir: str = self.get_batch_logs_dir()
447
471
  return os.path.join(logs_dir, file_glob)
448
-
472
+
449
473
  @staticmethod
450
474
  def workerCleanup(info: WorkerCleanupInfo) -> None:
451
475
  """
@@ -509,7 +533,7 @@ class AbstractScalableBatchSystem(AbstractBatchSystem):
509
533
  """
510
534
  A batch system that supports a variable number of worker nodes.
511
535
 
512
- Used by :class:`toil.provisioners.clusterScaler.ClusterScaler`
536
+ Used by :class:`toil.provisioners.clusterScaler.ClusterScaler`
513
537
  to scale the number of worker nodes in the cluster
514
538
  up or down depending on overall load.
515
539
  """