toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (157) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +21 -10
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/contained_executor.py +3 -3
  6. toil/batchSystems/htcondor.py +0 -1
  7. toil/batchSystems/kubernetes.py +34 -31
  8. toil/batchSystems/local_support.py +3 -1
  9. toil/batchSystems/mesos/batchSystem.py +7 -7
  10. toil/batchSystems/options.py +32 -83
  11. toil/batchSystems/registry.py +104 -23
  12. toil/batchSystems/singleMachine.py +16 -13
  13. toil/batchSystems/slurm.py +3 -3
  14. toil/batchSystems/torque.py +0 -1
  15. toil/bus.py +6 -8
  16. toil/common.py +532 -743
  17. toil/cwl/__init__.py +28 -32
  18. toil/cwl/cwltoil.py +523 -520
  19. toil/cwl/utils.py +55 -10
  20. toil/fileStores/__init__.py +2 -2
  21. toil/fileStores/abstractFileStore.py +36 -11
  22. toil/fileStores/cachingFileStore.py +607 -530
  23. toil/fileStores/nonCachingFileStore.py +43 -10
  24. toil/job.py +140 -75
  25. toil/jobStores/abstractJobStore.py +147 -79
  26. toil/jobStores/aws/jobStore.py +23 -9
  27. toil/jobStores/aws/utils.py +1 -2
  28. toil/jobStores/fileJobStore.py +117 -19
  29. toil/jobStores/googleJobStore.py +16 -7
  30. toil/jobStores/utils.py +5 -6
  31. toil/leader.py +71 -43
  32. toil/lib/accelerators.py +10 -5
  33. toil/lib/aws/__init__.py +3 -14
  34. toil/lib/aws/ami.py +22 -9
  35. toil/lib/aws/iam.py +21 -13
  36. toil/lib/aws/session.py +2 -16
  37. toil/lib/aws/utils.py +4 -5
  38. toil/lib/compatibility.py +1 -1
  39. toil/lib/conversions.py +7 -3
  40. toil/lib/docker.py +22 -23
  41. toil/lib/ec2.py +10 -6
  42. toil/lib/ec2nodes.py +106 -100
  43. toil/lib/encryption/_nacl.py +2 -1
  44. toil/lib/generatedEC2Lists.py +325 -18
  45. toil/lib/io.py +21 -0
  46. toil/lib/misc.py +1 -1
  47. toil/lib/resources.py +1 -1
  48. toil/lib/threading.py +74 -26
  49. toil/options/common.py +738 -0
  50. toil/options/cwl.py +336 -0
  51. toil/options/wdl.py +32 -0
  52. toil/provisioners/abstractProvisioner.py +1 -4
  53. toil/provisioners/aws/__init__.py +3 -6
  54. toil/provisioners/aws/awsProvisioner.py +6 -0
  55. toil/provisioners/clusterScaler.py +3 -2
  56. toil/provisioners/gceProvisioner.py +2 -2
  57. toil/realtimeLogger.py +2 -1
  58. toil/resource.py +24 -18
  59. toil/server/app.py +2 -3
  60. toil/server/cli/wes_cwl_runner.py +4 -4
  61. toil/server/utils.py +1 -1
  62. toil/server/wes/abstract_backend.py +3 -2
  63. toil/server/wes/amazon_wes_utils.py +5 -4
  64. toil/server/wes/tasks.py +2 -3
  65. toil/server/wes/toil_backend.py +2 -10
  66. toil/server/wsgi_app.py +2 -0
  67. toil/serviceManager.py +12 -10
  68. toil/statsAndLogging.py +5 -1
  69. toil/test/__init__.py +29 -54
  70. toil/test/batchSystems/batchSystemTest.py +11 -111
  71. toil/test/batchSystems/test_slurm.py +3 -2
  72. toil/test/cwl/cwlTest.py +213 -90
  73. toil/test/cwl/glob_dir.cwl +15 -0
  74. toil/test/cwl/preemptible.cwl +21 -0
  75. toil/test/cwl/preemptible_expression.cwl +28 -0
  76. toil/test/cwl/revsort.cwl +1 -1
  77. toil/test/cwl/revsort2.cwl +1 -1
  78. toil/test/docs/scriptsTest.py +0 -1
  79. toil/test/jobStores/jobStoreTest.py +27 -16
  80. toil/test/lib/aws/test_iam.py +4 -14
  81. toil/test/lib/aws/test_utils.py +0 -3
  82. toil/test/lib/dockerTest.py +4 -4
  83. toil/test/lib/test_ec2.py +11 -16
  84. toil/test/mesos/helloWorld.py +4 -5
  85. toil/test/mesos/stress.py +1 -1
  86. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  87. toil/test/provisioners/clusterScalerTest.py +6 -4
  88. toil/test/provisioners/clusterTest.py +14 -3
  89. toil/test/provisioners/gceProvisionerTest.py +0 -6
  90. toil/test/provisioners/restartScript.py +3 -2
  91. toil/test/server/serverTest.py +1 -1
  92. toil/test/sort/restart_sort.py +2 -1
  93. toil/test/sort/sort.py +2 -1
  94. toil/test/sort/sortTest.py +2 -13
  95. toil/test/src/autoDeploymentTest.py +45 -45
  96. toil/test/src/busTest.py +5 -5
  97. toil/test/src/checkpointTest.py +2 -2
  98. toil/test/src/deferredFunctionTest.py +1 -1
  99. toil/test/src/fileStoreTest.py +32 -16
  100. toil/test/src/helloWorldTest.py +1 -1
  101. toil/test/src/importExportFileTest.py +1 -1
  102. toil/test/src/jobDescriptionTest.py +2 -1
  103. toil/test/src/jobServiceTest.py +1 -1
  104. toil/test/src/jobTest.py +18 -18
  105. toil/test/src/miscTests.py +5 -3
  106. toil/test/src/promisedRequirementTest.py +3 -3
  107. toil/test/src/realtimeLoggerTest.py +1 -1
  108. toil/test/src/resourceTest.py +2 -2
  109. toil/test/src/restartDAGTest.py +1 -1
  110. toil/test/src/resumabilityTest.py +36 -2
  111. toil/test/src/retainTempDirTest.py +1 -1
  112. toil/test/src/systemTest.py +2 -2
  113. toil/test/src/toilContextManagerTest.py +2 -2
  114. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  115. toil/test/utils/toilDebugTest.py +98 -32
  116. toil/test/utils/toilKillTest.py +2 -2
  117. toil/test/utils/utilsTest.py +20 -0
  118. toil/test/wdl/wdltoil_test.py +148 -45
  119. toil/toilState.py +7 -6
  120. toil/utils/toilClean.py +1 -1
  121. toil/utils/toilConfig.py +36 -0
  122. toil/utils/toilDebugFile.py +60 -33
  123. toil/utils/toilDebugJob.py +39 -12
  124. toil/utils/toilDestroyCluster.py +1 -1
  125. toil/utils/toilKill.py +1 -1
  126. toil/utils/toilLaunchCluster.py +13 -2
  127. toil/utils/toilMain.py +3 -2
  128. toil/utils/toilRsyncCluster.py +1 -1
  129. toil/utils/toilSshCluster.py +1 -1
  130. toil/utils/toilStats.py +240 -143
  131. toil/utils/toilStatus.py +1 -4
  132. toil/version.py +11 -11
  133. toil/wdl/utils.py +2 -122
  134. toil/wdl/wdltoil.py +999 -386
  135. toil/worker.py +25 -31
  136. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
  137. toil-6.1.0a1.dist-info/RECORD +237 -0
  138. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
  139. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
  140. toil/batchSystems/parasol.py +0 -379
  141. toil/batchSystems/tes.py +0 -459
  142. toil/test/batchSystems/parasolTestSupport.py +0 -117
  143. toil/test/wdl/builtinTest.py +0 -506
  144. toil/test/wdl/conftest.py +0 -23
  145. toil/test/wdl/toilwdlTest.py +0 -522
  146. toil/wdl/toilwdl.py +0 -141
  147. toil/wdl/versions/dev.py +0 -107
  148. toil/wdl/versions/draft2.py +0 -980
  149. toil/wdl/versions/v1.py +0 -794
  150. toil/wdl/wdl_analysis.py +0 -116
  151. toil/wdl/wdl_functions.py +0 -997
  152. toil/wdl/wdl_synthesis.py +0 -1011
  153. toil/wdl/wdl_types.py +0 -243
  154. toil-5.12.0.dist-info/RECORD +0 -244
  155. /toil/{wdl/versions → options}/__init__.py +0 -0
  156. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
  157. {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from typing import (IO,
27
27
  Callable,
28
28
  ContextManager,
29
29
  Dict,
30
+ Iterable,
30
31
  Iterator,
31
32
  List,
32
33
  Optional,
@@ -42,8 +43,8 @@ if sys.version_info >= (3, 8):
42
43
  else:
43
44
  from typing_extensions import Literal
44
45
 
45
- from urllib.parse import ParseResult, urlparse
46
46
  from urllib.error import HTTPError
47
+ from urllib.parse import ParseResult, urlparse
47
48
  from urllib.request import urlopen
48
49
  from uuid import uuid4
49
50
 
@@ -536,6 +537,40 @@ class AbstractJobStore(ABC):
536
537
  executable = jobStoreFileID.executable
537
538
  otherCls._write_to_url(readable, url, executable)
538
539
 
540
+ @classmethod
541
+ def url_exists(cls, src_uri: str) -> bool:
542
+ """
543
+ Return True if the file at the given URI exists, and False otherwise.
544
+
545
+ :param src_uri: URL that points to a file or object in the storage
546
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
547
+ """
548
+ parseResult = urlparse(src_uri)
549
+ otherCls = cls._findJobStoreForUrl(parseResult)
550
+ return otherCls._url_exists(parseResult)
551
+
552
+ @classmethod
553
+ def get_size(cls, src_uri: str) -> Optional[int]:
554
+ """
555
+ Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
556
+
557
+ :param src_uri: URL that points to a file or object in the storage
558
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
559
+ """
560
+ parseResult = urlparse(src_uri)
561
+ otherCls = cls._findJobStoreForUrl(parseResult)
562
+ return otherCls._get_size(parseResult)
563
+
564
+ @classmethod
565
+ def get_is_directory(cls, src_uri: str) -> bool:
566
+ """
567
+ Return True if the thing at the given URL is a directory, and False if
568
+ it is a file. The URL may or may not end in '/'.
569
+ """
570
+ parseResult = urlparse(src_uri)
571
+ otherCls = cls._findJobStoreForUrl(parseResult)
572
+ return otherCls._get_is_directory(parseResult)
573
+
539
574
  @classmethod
540
575
  def list_url(cls, src_uri: str) -> List[str]:
541
576
  """
@@ -562,59 +597,61 @@ class AbstractJobStore(ABC):
562
597
  return otherCls._list_url(parseResult)
563
598
 
564
599
  @classmethod
565
- def get_is_directory(cls, src_uri: str) -> bool:
566
- """
567
- Return True if the thing at the given URL is a directory, and False if
568
- it is a file. The URL may or may not end in '/'.
600
+ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
569
601
  """
570
- parseResult = urlparse(src_uri)
571
- otherCls = cls._findJobStoreForUrl(parseResult)
572
- return otherCls._get_is_directory(parseResult)
602
+ Read the given URL and write its content into the given writable stream.
573
603
 
574
- @classmethod
575
- @abstractmethod
576
- def _get_is_directory(cls, url: ParseResult) -> bool:
577
- """
578
- Return True if the thing at the given URL is a directory, and False if
579
- it is a file or it is known not to exist. The URL may or may not end in
580
- '/'.
604
+ Raises FileNotFoundError if the URL doesn't exist.
581
605
 
582
- :param url: URL that points to a file or object, or directory or prefix,
583
- in the storage mechanism of a supported URL scheme e.g. a blob
584
- in an AWS s3 bucket.
606
+ :return: The size of the file in bytes and whether the executable permission bit is set
585
607
  """
586
- raise NotImplementedError
608
+ parseResult = urlparse(src_uri)
609
+ otherCls = cls._findJobStoreForUrl(parseResult)
610
+ return otherCls._read_from_url(parseResult, writable)
587
611
 
588
612
  @classmethod
589
- def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
613
+ def open_url(cls, src_uri: str) -> IO[bytes]:
590
614
  """
591
- Read the given URL and write its content into the given writable stream.
615
+ Read from the given URI.
592
616
 
593
617
  Raises FileNotFoundError if the URL doesn't exist.
594
618
 
595
- :return: The size of the file in bytes and whether the executable permission bit is set
596
- :rtype: Tuple[int, bool]
619
+ Has a readable stream interface, unlike :meth:`read_from_url` which
620
+ takes a writable stream.
597
621
  """
598
622
  parseResult = urlparse(src_uri)
599
623
  otherCls = cls._findJobStoreForUrl(parseResult)
600
- return otherCls._read_from_url(parseResult, writable)
624
+ return otherCls._open_url(parseResult)
601
625
 
602
626
  @classmethod
603
- @deprecated(new_function_name='get_size')
604
- def getSize(cls, url: ParseResult) -> None:
605
- return cls.get_size(url)
627
+ @abstractmethod
628
+ def _url_exists(cls, url: ParseResult) -> bool:
629
+ """
630
+ Return True if the item at the given URL exists, and Flase otherwise.
631
+ """
632
+ raise NotImplementedError(f"No implementation for {url}")
606
633
 
607
634
  @classmethod
608
635
  @abstractmethod
609
- def get_size(cls, src_uri: ParseResult) -> None:
636
+ def _get_size(cls, url: ParseResult) -> Optional[int]:
610
637
  """
611
- Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
638
+ Get the size of the object at the given URL, or None if it cannot be obtained.
639
+ """
640
+ raise NotImplementedError(f"No implementation for {url}")
612
641
 
613
- :param src_uri: URL that points to a file or object in the storage
614
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
642
+ @classmethod
643
+ @abstractmethod
644
+ def _get_is_directory(cls, url: ParseResult) -> bool:
615
645
  """
616
- raise NotImplementedError
646
+ Return True if the thing at the given URL is a directory, and False if
647
+ it is a file or it is known not to exist. The URL may or may not end in
648
+ '/'.
617
649
 
650
+ :param url: URL that points to a file or object, or directory or prefix,
651
+ in the storage mechanism of a supported URL scheme e.g. a blob
652
+ in an AWS s3 bucket.
653
+ """
654
+ raise NotImplementedError(f"No implementation for {url}")
618
655
 
619
656
  @classmethod
620
657
  @abstractmethod
@@ -623,8 +660,6 @@ class AbstractJobStore(ABC):
623
660
  Reads the contents of the object at the specified location and writes it to the given
624
661
  writable stream.
625
662
 
626
- Raises FileNotFoundError if the URL doesn't exist.
627
-
628
663
  Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
629
664
 
630
665
  Raises FileNotFoundError if the thing at the URL is not found.
@@ -635,46 +670,58 @@ class AbstractJobStore(ABC):
635
670
  :param IO[bytes] writable: a writable stream
636
671
 
637
672
  :return: The size of the file in bytes and whether the executable permission bit is set
638
- :rtype: Tuple[int, bool]
639
673
  """
640
- raise NotImplementedError()
674
+ raise NotImplementedError(f"No implementation for {url}")
641
675
 
642
676
  @classmethod
643
677
  @abstractmethod
644
- def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
678
+ def _list_url(cls, url: ParseResult) -> List[str]:
645
679
  """
646
- Reads the contents of the given readable stream and writes it to the object at the
647
- specified location. Raises FileNotFoundError if the URL doesn't exist..
680
+ List the contents of the given URL, which may or may not end in '/'
648
681
 
649
- Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
682
+ Returns a list of URL components. Those that end in '/' are meant to be
683
+ directories, while those that do not are meant to be files.
650
684
 
651
- :param Union[IO[bytes], IO[str]] readable: a readable stream
685
+ Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
652
686
 
653
- :param ParseResult url: URL that points to a file or object in the storage
654
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
687
+ :param ParseResult url: URL that points to a directory or prefix in the
688
+ storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
689
+ bucket.
655
690
 
656
- :param bool executable: determines if the file has executable permissions
691
+ :return: The children of the given URL, already URL-encoded if
692
+ appropriate. (If the URL is a bare path, no encoding is done.)
657
693
  """
658
- raise NotImplementedError()
694
+ raise NotImplementedError(f"No implementation for {url}")
659
695
 
660
696
  @classmethod
661
697
  @abstractmethod
662
- def _list_url(cls, url: ParseResult) -> List[str]:
698
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
663
699
  """
664
- List the contents of the given URL, which may or may not end in '/'
665
-
666
- Returns a list of URL components. Those that end in '/' are meant to be
667
- directories, while those that do not are meant to be files.
700
+ Get a stream of the object at the specified location.
668
701
 
669
702
  Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
670
703
 
671
- :param ParseResult url: URL that points to a directory or prefix in the
672
- storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
673
- bucket.
704
+ Raises FileNotFoundError if the thing at the URL is not found.
705
+ """
706
+ raise NotImplementedError(f"No implementation for {url}")
674
707
 
675
- :return: The children of the given URL, already URL-encoded.
708
+ @classmethod
709
+ @abstractmethod
710
+ def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
676
711
  """
677
- raise NotImplementedError()
712
+ Reads the contents of the given readable stream and writes it to the object at the
713
+ specified location. Raises FileNotFoundError if the URL doesn't exist..
714
+
715
+ Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
716
+
717
+ :param Union[IO[bytes], IO[str]] readable: a readable stream
718
+
719
+ :param ParseResult url: URL that points to a file or object in the storage
720
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
721
+
722
+ :param bool executable: determines if the file has executable permissions
723
+ """
724
+ raise NotImplementedError(f"No implementation for {url}")
678
725
 
679
726
  @classmethod
680
727
  @abstractmethod
@@ -690,7 +737,7 @@ class AbstractJobStore(ABC):
690
737
 
691
738
  :return bool: returns true if the cls supports the URL
692
739
  """
693
- raise NotImplementedError()
740
+ raise NotImplementedError(f"No implementation for {url}")
694
741
 
695
742
  @abstractmethod
696
743
  def destroy(self) -> None:
@@ -794,6 +841,10 @@ class AbstractJobStore(ABC):
794
841
  for service_jobstore_id in root_job_description.services:
795
842
  if haveJob(service_jobstore_id):
796
843
  reachable_from_root.add(service_jobstore_id)
844
+ for merged_jobstore_id in root_job_description.merged_jobs:
845
+ # Keep merged-in jobs around themselves, but don't bother
846
+ # exploring them, since we took their successors.
847
+ reachable_from_root.add(merged_jobstore_id)
797
848
 
798
849
  # Unprocessed means it might have successor jobs we need to add.
799
850
  unprocessed_job_descriptions = [root_job_description]
@@ -815,6 +866,10 @@ class AbstractJobStore(ABC):
815
866
  reachable_from_root.add(service_jobstore_id)
816
867
 
817
868
  new_job_descriptions_to_process.append(successor_job_description)
869
+ for merged_jobstore_id in job_description.merged_jobs:
870
+ # Keep merged-in jobs around themselves, but don't bother
871
+ # exploring them, since we took their successors.
872
+ reachable_from_root.add(merged_jobstore_id)
818
873
  unprocessed_job_descriptions = new_job_descriptions_to_process
819
874
 
820
875
  logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
@@ -824,8 +879,8 @@ class AbstractJobStore(ABC):
824
879
 
825
880
  # Cleanup jobs that are not reachable from the root, and therefore orphaned
826
881
  # TODO: Avoid reiterating reachable_from_root (which may be very large)
827
- jobsToDelete = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
828
- for jobDescription in jobsToDelete:
882
+ unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
883
+ for jobDescription in unreachable:
829
884
  # clean up any associated files before deletion
830
885
  for fileID in jobDescription.filesToDelete:
831
886
  # Delete any files that should already be deleted
@@ -1688,6 +1743,16 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1688
1743
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
1689
1744
  return url.scheme.lower() in ('http', 'https', 'ftp') and not export
1690
1745
 
1746
+ @classmethod
1747
+ def _url_exists(cls, url: ParseResult) -> bool:
1748
+ try:
1749
+ # TODO: Figure out how to HEAD instead of this.
1750
+ with cls._open_url(url):
1751
+ return True
1752
+ except:
1753
+ pass
1754
+ return False
1755
+
1691
1756
  @classmethod
1692
1757
  @retry(
1693
1758
  errors=[
@@ -1695,7 +1760,7 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1695
1760
  ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1696
1761
  ]
1697
1762
  )
1698
- def get_size(cls, url: ParseResult) -> Optional[int]:
1763
+ def _get_size(cls, url: ParseResult) -> Optional[int]:
1699
1764
  if url.scheme.lower() == 'ftp':
1700
1765
  return None
1701
1766
  with closing(urlopen(url.geturl())) as readable:
@@ -1703,6 +1768,27 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1703
1768
  size = readable.info().get('content-length')
1704
1769
  return int(size) if size is not None else None
1705
1770
 
1771
+ @classmethod
1772
+ def _read_from_url(
1773
+ cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1774
+ ) -> Tuple[int, bool]:
1775
+ # We can't actually retry after we start writing.
1776
+ # TODO: Implement retry with byte range requests
1777
+ with cls._open_url(url) as readable:
1778
+ # Make something to count the bytes we get
1779
+ # We need to put the actual count in a container so our
1780
+ # nested function can modify it without creating its own
1781
+ # local with the same name.
1782
+ size = [0]
1783
+ def count(l: int) -> None:
1784
+ size[0] += l
1785
+ counter = WriteWatchingStream(writable)
1786
+ counter.onWrite(count)
1787
+
1788
+ # Do the download
1789
+ shutil.copyfileobj(readable, counter)
1790
+ return size[0], False
1791
+
1706
1792
  @classmethod
1707
1793
  @retry(
1708
1794
  errors=[
@@ -1710,27 +1796,9 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1710
1796
  ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1711
1797
  ]
1712
1798
  )
1713
- def _read_from_url(
1714
- cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1715
- ) -> Tuple[int, bool]:
1716
- # We can only retry on errors that happen as responses to the request.
1717
- # If we start getting file data, and the connection drops, we fail.
1718
- # So we don't have to worry about writing the start of the file twice.
1799
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
1719
1800
  try:
1720
- with closing(urlopen(url.geturl())) as readable:
1721
- # Make something to count the bytes we get
1722
- # We need to put the actual count in a container so our
1723
- # nested function can modify it without creating its own
1724
- # local with the same name.
1725
- size = [0]
1726
- def count(l: int) -> None:
1727
- size[0] += l
1728
- counter = WriteWatchingStream(writable)
1729
- counter.onWrite(count)
1730
-
1731
- # Do the download
1732
- shutil.copyfileobj(readable, counter)
1733
- return size[0], False
1801
+ return cast(IO[bytes], closing(urlopen(url.geturl())))
1734
1802
  except HTTPError as e:
1735
1803
  if e.code == 404:
1736
1804
  # Translate into a FileNotFoundError for detecting
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  import hashlib
15
15
  import itertools
16
- import json
17
16
  import logging
18
17
  import os
19
18
  import pickle
@@ -21,12 +20,10 @@ import re
21
20
  import reprlib
22
21
  import stat
23
22
  import time
24
- import urllib.error
25
- import urllib.request
26
23
  import uuid
27
24
  from contextlib import contextmanager
28
25
  from io import BytesIO
29
- from typing import List, Optional
26
+ from typing import List, Optional, IO
30
27
  from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
31
28
 
32
29
  import boto.s3.connection
@@ -35,7 +32,6 @@ from boto.exception import SDBResponseError
35
32
  from botocore.exceptions import ClientError
36
33
 
37
34
  import toil.lib.encryption as encryption
38
- from toil.lib.aws import build_tag_dict_from_env
39
35
  from toil.fileStores import FileID
40
36
  from toil.jobStores.abstractJobStore import (AbstractJobStore,
41
37
  ConcurrentFileModificationException,
@@ -56,6 +52,7 @@ from toil.jobStores.aws.utils import (SDBHelper,
56
52
  from toil.jobStores.utils import (ReadablePipe,
57
53
  ReadableTransformingPipe,
58
54
  WritablePipe)
55
+ from toil.lib.aws import build_tag_dict_from_env
59
56
  from toil.lib.aws.session import establish_boto3_session
60
57
  from toil.lib.aws.utils import (create_s3_bucket,
61
58
  enable_public_objects,
@@ -450,7 +447,6 @@ class AWSJobStore(AbstractJobStore):
450
447
  except ServerSideCopyProhibitedError:
451
448
  # AWS refuses to do this copy for us
452
449
  logger.warning("Falling back to copying via the local machine. This could get expensive!")
453
- pass
454
450
 
455
451
  # copy if exception
456
452
  return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
@@ -465,12 +461,21 @@ class AWSJobStore(AbstractJobStore):
465
461
  except ServerSideCopyProhibitedError:
466
462
  # AWS refuses to do this copy for us
467
463
  logger.warning("Falling back to copying via the local machine. This could get expensive!")
468
- pass
469
464
  else:
470
465
  super()._default_export_file(otherCls, file_id, uri)
471
466
 
472
467
  @classmethod
473
- def get_size(cls, url):
468
+ def _url_exists(cls, url: ParseResult) -> bool:
469
+ try:
470
+ get_object_for_url(url, existing=True)
471
+ return True
472
+ except FileNotFoundError:
473
+ # Not a file
474
+ # Might be a directory.
475
+ return cls._get_is_directory(url)
476
+
477
+ @classmethod
478
+ def _get_size(cls, url):
474
479
  return get_object_for_url(url, existing=True).content_length
475
480
 
476
481
  @classmethod
@@ -482,6 +487,15 @@ class AWSJobStore(AbstractJobStore):
482
487
  False # executable bit is always False
483
488
  )
484
489
 
490
+ @classmethod
491
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
492
+ src_obj = get_object_for_url(url, existing=True)
493
+ response = src_obj.get()
494
+ # We should get back a response with a stream in 'Body'
495
+ if 'Body' not in response:
496
+ raise RuntimeError(f"Could not fetch body stream for {url}")
497
+ return response['Body']
498
+
485
499
  @classmethod
486
500
  def _write_to_url(cls, readable, url, executable=False):
487
501
  dstObj = get_object_for_url(url)
@@ -757,7 +771,7 @@ class AWSJobStore(AbstractJobStore):
757
771
  bucket_tagging.put(Tagging={'TagSet': flat_tags})
758
772
 
759
773
  # Configure bucket so that we can make objects in
760
- # it public, which was the historical default.
774
+ # it public, which was the historical default.
761
775
  enable_public_objects(bucket_name)
762
776
  elif block:
763
777
  raise
@@ -20,7 +20,7 @@ from ssl import SSLError
20
20
  from typing import Optional, cast
21
21
 
22
22
  from boto3.s3.transfer import TransferConfig
23
- from boto.exception import BotoServerError, S3ResponseError, SDBResponseError
23
+ from boto.exception import SDBResponseError
24
24
  from botocore.client import Config
25
25
  from botocore.exceptions import ClientError
26
26
  from mypy_boto3_s3 import S3Client, S3ServiceResource
@@ -285,7 +285,6 @@ class ServerSideCopyProhibitedError(RuntimeError):
285
285
  Raised when AWS refuses to perform a server-side copy between S3 keys, and
286
286
  insists that you pay to download and upload the data yourself instead.
287
287
  """
288
- pass
289
288
 
290
289
  @retry(errors=[ErrorCondition(
291
290
  error=ClientError,