toil 8.2.0__py3-none-any.whl → 9.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. toil/batchSystems/registry.py +15 -118
  2. toil/common.py +20 -1
  3. toil/cwl/cwltoil.py +80 -37
  4. toil/cwl/utils.py +103 -3
  5. toil/jobStores/abstractJobStore.py +11 -236
  6. toil/jobStores/aws/jobStore.py +2 -1
  7. toil/jobStores/fileJobStore.py +2 -1
  8. toil/jobStores/googleJobStore.py +7 -4
  9. toil/lib/accelerators.py +1 -1
  10. toil/lib/generatedEC2Lists.py +81 -19
  11. toil/lib/misc.py +1 -1
  12. toil/lib/plugins.py +106 -0
  13. toil/lib/url.py +320 -0
  14. toil/options/cwl.py +13 -1
  15. toil/options/runner.py +17 -10
  16. toil/options/wdl.py +12 -1
  17. toil/provisioners/aws/awsProvisioner.py +25 -2
  18. toil/server/app.py +12 -6
  19. toil/server/cli/wes_cwl_runner.py +2 -2
  20. toil/server/wes/abstract_backend.py +21 -43
  21. toil/server/wes/toil_backend.py +2 -2
  22. toil/test/__init__.py +2 -2
  23. toil/test/batchSystems/batchSystemTest.py +2 -9
  24. toil/test/batchSystems/batch_system_plugin_test.py +7 -0
  25. toil/test/cwl/cwlTest.py +181 -8
  26. toil/test/docs/scriptsTest.py +2 -1
  27. toil/test/lib/test_url.py +69 -0
  28. toil/test/lib/url_plugin_test.py +105 -0
  29. toil/test/provisioners/aws/awsProvisionerTest.py +1 -1
  30. toil/test/provisioners/clusterTest.py +15 -2
  31. toil/test/provisioners/gceProvisionerTest.py +1 -1
  32. toil/test/server/serverTest.py +78 -36
  33. toil/test/wdl/md5sum/md5sum-gs.json +1 -1
  34. toil/test/wdl/testfiles/read_file.wdl +18 -0
  35. toil/test/wdl/testfiles/url_to_optional_file.wdl +2 -1
  36. toil/test/wdl/wdltoil_test.py +74 -125
  37. toil/utils/toilSshCluster.py +23 -0
  38. toil/utils/toilUpdateEC2Instances.py +1 -0
  39. toil/version.py +9 -9
  40. toil/wdl/wdltoil.py +182 -314
  41. toil/worker.py +11 -6
  42. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/METADATA +23 -23
  43. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/RECORD +47 -42
  44. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
  45. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
  46. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/licenses/LICENSE +0 -0
  47. {toil-8.2.0.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0
@@ -32,6 +32,7 @@ from typing import (
32
32
  Union,
33
33
  cast,
34
34
  overload,
35
+ Type,
35
36
  )
36
37
  from urllib.error import HTTPError
37
38
  from urllib.parse import ParseResult, urlparse
@@ -52,6 +53,7 @@ from toil.lib.exceptions import UnimplementedURLException
52
53
  from toil.lib.io import WriteWatchingStream
53
54
  from toil.lib.memoize import memoize
54
55
  from toil.lib.retry import ErrorCondition, retry
56
+ from toil.lib.url import URLAccess
55
57
 
56
58
  if TYPE_CHECKING:
57
59
  from toil.job import TemporaryID
@@ -354,23 +356,6 @@ class AbstractJobStore(ABC):
354
356
  jobStoreClasses.append(jobStoreClass)
355
357
  return jobStoreClasses
356
358
 
357
- @classmethod
358
- def _findJobStoreForUrl(
359
- cls, url: ParseResult, export: bool = False
360
- ) -> "AbstractJobStore":
361
- """
362
- Returns the AbstractJobStore subclass that supports the given URL.
363
-
364
- :param ParseResult url: The given URL
365
-
366
- :param bool export: Determines if the url is supported for exporting
367
-
368
- :rtype: toil.jobStore.AbstractJobStore
369
- """
370
- for implementation in cls._get_job_store_classes():
371
- if implementation._supports_url(url, export):
372
- return implementation
373
- raise UnimplementedURLException(url, "export" if export else "import")
374
359
 
375
360
  # Importing a file with a shared file name returns None, but without one it
376
361
  # returns a file ID. Explain this to MyPy.
@@ -464,7 +449,7 @@ class AbstractJobStore(ABC):
464
449
  # optimizations that circumvent this, the _import_file method should be overridden by
465
450
  # subclasses of AbstractJobStore.
466
451
  parseResult = urlparse(src_uri)
467
- otherCls = self._findJobStoreForUrl(parseResult)
452
+ otherCls = URLAccess._find_url_implementation(parseResult)
468
453
  logger.info("Importing input %s...", src_uri)
469
454
  return self._import_file(
470
455
  otherCls,
@@ -476,7 +461,7 @@ class AbstractJobStore(ABC):
476
461
 
477
462
  def _import_file(
478
463
  self,
479
- otherCls: "AbstractJobStore",
464
+ otherCls: Type["URLAccess"],
480
465
  uri: ParseResult,
481
466
  shared_file_name: Optional[str] = None,
482
467
  hardlink: bool = False,
@@ -490,7 +475,7 @@ class AbstractJobStore(ABC):
490
475
 
491
476
  Raises FileNotFoundError if the file does not exist.
492
477
 
493
- :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
478
+ :param URLAccess otherCls: The class of URLAccess that supports
494
479
  reading from the given URL and getting the file size from the URL.
495
480
 
496
481
  :param ParseResult uri: The location of the file to import.
@@ -535,16 +520,16 @@ class AbstractJobStore(ABC):
535
520
  from toil.common import Toil
536
521
  dst_uri = Toil.normalize_uri(dst_uri)
537
522
  parseResult = urlparse(dst_uri)
538
- otherCls = self._findJobStoreForUrl(parseResult, export=True)
523
+ otherCls = URLAccess._find_url_implementation(parseResult, export=True)
539
524
  self._export_file(otherCls, file_id, parseResult)
540
525
 
541
526
  def _export_file(
542
- self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
527
+ self, otherCls: Type["URLAccess"], jobStoreFileID: FileID, url: ParseResult
543
528
  ) -> None:
544
529
  """
545
530
  Refer to exportFile docstring for information about this method.
546
531
 
547
- :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
532
+ :param URLAccess otherCls: The class of URLAccess that supports
548
533
  exporting to the given URL. Note that the type annotation here is not completely
549
534
  accurate. This is not an instance, it's a class, but there is no way to reflect
550
535
  that in :pep:`484` type hints.
@@ -556,12 +541,12 @@ class AbstractJobStore(ABC):
556
541
  self._default_export_file(otherCls, jobStoreFileID, url)
557
542
 
558
543
  def _default_export_file(
559
- self, otherCls: "AbstractJobStore", jobStoreFileID: FileID, url: ParseResult
544
+ self, otherCls: Type["URLAccess"], jobStoreFileID: FileID, url: ParseResult
560
545
  ) -> None:
561
546
  """
562
547
  Refer to exportFile docstring for information about this method.
563
548
 
564
- :param AbstractJobStore otherCls: The concrete subclass of AbstractJobStore that supports
549
+ :param URLAccess otherCls: The class of URLAccess that supports
565
550
  exporting to the given URL. Note that the type annotation here is not completely
566
551
  accurate. This is not an instance, it's a class, but there is no way to reflect
567
552
  that in :pep:`484` type hints.
@@ -576,216 +561,6 @@ class AbstractJobStore(ABC):
576
561
  executable = jobStoreFileID.executable
577
562
  otherCls._write_to_url(readable, url, executable)
578
563
 
579
- @classmethod
580
- def url_exists(cls, src_uri: str) -> bool:
581
- """
582
- Return True if the file at the given URI exists, and False otherwise.
583
-
584
- May raise an error if file existence cannot be determined.
585
-
586
- :param src_uri: URL that points to a file or object in the storage
587
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
588
- """
589
- parseResult = urlparse(src_uri)
590
- otherCls = cls._findJobStoreForUrl(parseResult)
591
- return otherCls._url_exists(parseResult)
592
-
593
- @classmethod
594
- def get_size(cls, src_uri: str) -> Optional[int]:
595
- """
596
- Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
597
-
598
- :param src_uri: URL that points to a file or object in the storage
599
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
600
- """
601
- parseResult = urlparse(src_uri)
602
- otherCls = cls._findJobStoreForUrl(parseResult)
603
- return otherCls._get_size(parseResult)
604
-
605
- @classmethod
606
- def get_is_directory(cls, src_uri: str) -> bool:
607
- """
608
- Return True if the thing at the given URL is a directory, and False if
609
- it is a file. The URL may or may not end in '/'.
610
- """
611
- parseResult = urlparse(src_uri)
612
- otherCls = cls._findJobStoreForUrl(parseResult)
613
- return otherCls._get_is_directory(parseResult)
614
-
615
- @classmethod
616
- def list_url(cls, src_uri: str) -> list[str]:
617
- """
618
- List the directory at the given URL. Returned path components can be
619
- joined with '/' onto the passed URL to form new URLs. Those that end in
620
- '/' correspond to directories. The provided URL may or may not end with
621
- '/'.
622
-
623
- Currently supported schemes are:
624
-
625
- - 's3' for objects in Amazon S3
626
- e.g. s3://bucket/prefix/
627
-
628
- - 'file' for local files
629
- e.g. file:///local/dir/path/
630
-
631
- :param str src_uri: URL that points to a directory or prefix in the storage mechanism of a
632
- supported URL scheme e.g. a prefix in an AWS s3 bucket.
633
-
634
- :return: A list of URL components in the given directory, already URL-encoded.
635
- """
636
- parseResult = urlparse(src_uri)
637
- otherCls = cls._findJobStoreForUrl(parseResult)
638
- return otherCls._list_url(parseResult)
639
-
640
- @classmethod
641
- def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> tuple[int, bool]:
642
- """
643
- Read the given URL and write its content into the given writable stream.
644
-
645
- Raises FileNotFoundError if the URL doesn't exist.
646
-
647
- :return: The size of the file in bytes and whether the executable permission bit is set
648
- """
649
- parseResult = urlparse(src_uri)
650
- otherCls = cls._findJobStoreForUrl(parseResult)
651
- return otherCls._read_from_url(parseResult, writable)
652
-
653
- @classmethod
654
- def open_url(cls, src_uri: str) -> IO[bytes]:
655
- """
656
- Read from the given URI.
657
-
658
- Raises FileNotFoundError if the URL doesn't exist.
659
-
660
- Has a readable stream interface, unlike :meth:`read_from_url` which
661
- takes a writable stream.
662
- """
663
- parseResult = urlparse(src_uri)
664
- otherCls = cls._findJobStoreForUrl(parseResult)
665
- return otherCls._open_url(parseResult)
666
-
667
- @classmethod
668
- @abstractmethod
669
- def _url_exists(cls, url: ParseResult) -> bool:
670
- """
671
- Return True if the item at the given URL exists, and Flase otherwise.
672
-
673
- May raise an error if file existence cannot be determined.
674
- """
675
- raise NotImplementedError(f"No implementation for {url}")
676
-
677
- @classmethod
678
- @abstractmethod
679
- def _get_size(cls, url: ParseResult) -> Optional[int]:
680
- """
681
- Get the size of the object at the given URL, or None if it cannot be obtained.
682
- """
683
- raise NotImplementedError(f"No implementation for {url}")
684
-
685
- @classmethod
686
- @abstractmethod
687
- def _get_is_directory(cls, url: ParseResult) -> bool:
688
- """
689
- Return True if the thing at the given URL is a directory, and False if
690
- it is a file or it is known not to exist. The URL may or may not end in
691
- '/'.
692
-
693
- :param url: URL that points to a file or object, or directory or prefix,
694
- in the storage mechanism of a supported URL scheme e.g. a blob
695
- in an AWS s3 bucket.
696
- """
697
- raise NotImplementedError(f"No implementation for {url}")
698
-
699
- @classmethod
700
- @abstractmethod
701
- def _read_from_url(cls, url: ParseResult, writable: IO[bytes]) -> tuple[int, bool]:
702
- """
703
- Reads the contents of the object at the specified location and writes it to the given
704
- writable stream.
705
-
706
- Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
707
-
708
- Raises FileNotFoundError if the thing at the URL is not found.
709
-
710
- :param ParseResult url: URL that points to a file or object in the storage
711
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
712
-
713
- :param IO[bytes] writable: a writable stream
714
-
715
- :return: The size of the file in bytes and whether the executable permission bit is set
716
- """
717
- raise NotImplementedError(f"No implementation for {url}")
718
-
719
- @classmethod
720
- @abstractmethod
721
- def _list_url(cls, url: ParseResult) -> list[str]:
722
- """
723
- List the contents of the given URL, which may or may not end in '/'
724
-
725
- Returns a list of URL components. Those that end in '/' are meant to be
726
- directories, while those that do not are meant to be files.
727
-
728
- Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
729
-
730
- :param ParseResult url: URL that points to a directory or prefix in the
731
- storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
732
- bucket.
733
-
734
- :return: The children of the given URL, already URL-encoded if
735
- appropriate. (If the URL is a bare path, no encoding is done.)
736
- """
737
- raise NotImplementedError(f"No implementation for {url}")
738
-
739
- @classmethod
740
- @abstractmethod
741
- def _open_url(cls, url: ParseResult) -> IO[bytes]:
742
- """
743
- Get a stream of the object at the specified location.
744
-
745
- Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
746
-
747
- Raises FileNotFoundError if the thing at the URL is not found.
748
- """
749
- raise NotImplementedError(f"No implementation for {url}")
750
-
751
- @classmethod
752
- @abstractmethod
753
- def _write_to_url(
754
- cls,
755
- readable: Union[IO[bytes], IO[str]],
756
- url: ParseResult,
757
- executable: bool = False,
758
- ) -> None:
759
- """
760
- Reads the contents of the given readable stream and writes it to the object at the
761
- specified location. Raises FileNotFoundError if the URL doesn't exist..
762
-
763
- Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
764
-
765
- :param Union[IO[bytes], IO[str]] readable: a readable stream
766
-
767
- :param ParseResult url: URL that points to a file or object in the storage
768
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
769
-
770
- :param bool executable: determines if the file has executable permissions
771
- """
772
- raise NotImplementedError(f"No implementation for {url}")
773
-
774
- @classmethod
775
- @abstractmethod
776
- def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
777
- """
778
- Returns True if the job store supports the URL's scheme.
779
-
780
- Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
781
-
782
- :param ParseResult url: a parsed URL that may be supported
783
-
784
- :param bool export: Determines if the url is supported for exported
785
-
786
- :return bool: returns true if the cls supports the URL
787
- """
788
- raise NotImplementedError(f"No implementation for {url}")
789
564
 
790
565
  @abstractmethod
791
566
  def destroy(self) -> None:
@@ -1872,7 +1647,7 @@ class AbstractJobStore(ABC):
1872
1647
  raise ValueError("Not a valid shared file name: '%s'." % sharedFileName)
1873
1648
 
1874
1649
 
1875
- class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1650
+ class JobStoreSupport(AbstractJobStore, URLAccess, metaclass=ABCMeta):
1876
1651
  """
1877
1652
  A mostly fake JobStore to access URLs not really associated with real job
1878
1653
  stores.
@@ -75,6 +75,7 @@ from toil.lib.io import AtomicFileCreate
75
75
  from toil.lib.memoize import strict_bool
76
76
  from toil.lib.objects import InnerClass
77
77
  from toil.lib.retry import get_error_code, get_error_status, retry
78
+ from toil.lib.url import URLAccess
78
79
 
79
80
  if TYPE_CHECKING:
80
81
  from mypy_boto3_sdb.type_defs import (
@@ -110,7 +111,7 @@ class DomainDoesNotExist(Exception):
110
111
  super().__init__(f"Expected domain {domain_name} to exist!")
111
112
 
112
113
 
113
- class AWSJobStore(AbstractJobStore):
114
+ class AWSJobStore(AbstractJobStore, URLAccess):
114
115
  """
115
116
  A job store that uses Amazon's S3 for file storage and SimpleDB for storing job info and
116
117
  enforcing strong consistency on the S3 file storage. There will be SDB domains for jobs and
@@ -42,11 +42,12 @@ from toil.lib.io import (
42
42
  mkdtemp,
43
43
  robust_rmtree,
44
44
  )
45
+ from toil.lib.url import URLAccess
45
46
 
46
47
  logger = logging.getLogger(__name__)
47
48
 
48
49
 
49
- class FileJobStore(AbstractJobStore):
50
+ class FileJobStore(AbstractJobStore, URLAccess):
50
51
  """
51
52
  A job store that uses a directory on a locally attached file system. To be compatible with
52
53
  distributed batch systems, that file system must be shared by all worker nodes.
@@ -28,9 +28,10 @@ from google.api_core.exceptions import (
28
28
  InternalServerError,
29
29
  ServiceUnavailable,
30
30
  )
31
- from google.auth.exceptions import DefaultCredentialsError
31
+ from google.auth.exceptions import DefaultCredentialsError, InvalidOperation
32
32
  from google.cloud import exceptions, storage
33
33
 
34
+ from toil import memoize
34
35
  from toil.jobStores.abstractJobStore import (
35
36
  AbstractJobStore,
36
37
  JobStoreExistsException,
@@ -43,6 +44,7 @@ from toil.lib.compatibility import compat_bytes
43
44
  from toil.lib.io import AtomicFileCreate
44
45
  from toil.lib.misc import truncExpBackoff
45
46
  from toil.lib.retry import old_retry
47
+ from toil.lib.url import URLAccess
46
48
 
47
49
  log = logging.getLogger(__name__)
48
50
 
@@ -116,7 +118,7 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
116
118
  """
117
119
  try:
118
120
  yield
119
- except exceptions.InvalidOperation as e:
121
+ except InvalidOperation as e:
120
122
  if "Anonymous credentials cannot be refreshed" in str(e):
121
123
  raise RuntimeError(
122
124
  "Google Storage tried to refresh anonymous credentials. "
@@ -131,7 +133,7 @@ def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
131
133
 
132
134
 
133
135
 
134
- class GoogleJobStore(AbstractJobStore):
136
+ class GoogleJobStore(AbstractJobStore, URLAccess):
135
137
 
136
138
  nodeServiceAccountJson = "/root/service_account.json"
137
139
 
@@ -160,9 +162,10 @@ class GoogleJobStore(AbstractJobStore):
160
162
  self.storageClient, self.auth_notes = self.create_client()
161
163
 
162
164
  @classmethod
165
+ @memoize
163
166
  def create_client(cls) -> tuple[storage.Client, str]:
164
167
  """
165
- Produce a client for Google Sotrage with the highest level of access we can get.
168
+ Produce a client for Google Storage with the highest level of access we can get.
166
169
 
167
170
  Fall back to anonymous access if no project is available, unlike the
168
171
  Google Storage module's behavior.
toil/lib/accelerators.py CHANGED
@@ -34,7 +34,7 @@ def have_working_nvidia_smi() -> bool:
34
34
  it can fulfill a CUDARequirement.
35
35
  """
36
36
  try:
37
- subprocess.check_call(["nvidia-smi"])
37
+ subprocess.check_call(["nvidia-smi"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
38
38
  except (
39
39
  FileNotFoundError,
40
40
  PermissionError,