toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. toil/__init__.py +18 -13
  2. toil/batchSystems/abstractBatchSystem.py +39 -13
  3. toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
  4. toil/batchSystems/awsBatch.py +14 -14
  5. toil/batchSystems/cleanup_support.py +7 -3
  6. toil/batchSystems/contained_executor.py +3 -3
  7. toil/batchSystems/htcondor.py +0 -1
  8. toil/batchSystems/kubernetes.py +34 -31
  9. toil/batchSystems/local_support.py +3 -1
  10. toil/batchSystems/lsf.py +7 -7
  11. toil/batchSystems/mesos/batchSystem.py +7 -7
  12. toil/batchSystems/options.py +32 -83
  13. toil/batchSystems/registry.py +104 -23
  14. toil/batchSystems/singleMachine.py +16 -13
  15. toil/batchSystems/slurm.py +87 -16
  16. toil/batchSystems/torque.py +0 -1
  17. toil/bus.py +44 -8
  18. toil/common.py +544 -753
  19. toil/cwl/__init__.py +28 -32
  20. toil/cwl/cwltoil.py +595 -574
  21. toil/cwl/utils.py +55 -10
  22. toil/exceptions.py +1 -1
  23. toil/fileStores/__init__.py +2 -2
  24. toil/fileStores/abstractFileStore.py +88 -14
  25. toil/fileStores/cachingFileStore.py +610 -549
  26. toil/fileStores/nonCachingFileStore.py +46 -22
  27. toil/job.py +182 -101
  28. toil/jobStores/abstractJobStore.py +161 -95
  29. toil/jobStores/aws/jobStore.py +23 -9
  30. toil/jobStores/aws/utils.py +6 -6
  31. toil/jobStores/fileJobStore.py +116 -18
  32. toil/jobStores/googleJobStore.py +16 -7
  33. toil/jobStores/utils.py +5 -6
  34. toil/leader.py +87 -56
  35. toil/lib/accelerators.py +10 -5
  36. toil/lib/aws/__init__.py +3 -14
  37. toil/lib/aws/ami.py +22 -9
  38. toil/lib/aws/iam.py +21 -13
  39. toil/lib/aws/session.py +2 -16
  40. toil/lib/aws/utils.py +4 -5
  41. toil/lib/compatibility.py +1 -1
  42. toil/lib/conversions.py +26 -3
  43. toil/lib/docker.py +22 -23
  44. toil/lib/ec2.py +10 -6
  45. toil/lib/ec2nodes.py +106 -100
  46. toil/lib/encryption/_nacl.py +2 -1
  47. toil/lib/generatedEC2Lists.py +325 -18
  48. toil/lib/io.py +49 -2
  49. toil/lib/misc.py +1 -1
  50. toil/lib/resources.py +9 -2
  51. toil/lib/threading.py +101 -38
  52. toil/options/common.py +736 -0
  53. toil/options/cwl.py +336 -0
  54. toil/options/wdl.py +37 -0
  55. toil/provisioners/abstractProvisioner.py +9 -4
  56. toil/provisioners/aws/__init__.py +3 -6
  57. toil/provisioners/aws/awsProvisioner.py +6 -0
  58. toil/provisioners/clusterScaler.py +3 -2
  59. toil/provisioners/gceProvisioner.py +2 -2
  60. toil/realtimeLogger.py +2 -1
  61. toil/resource.py +24 -18
  62. toil/server/app.py +2 -3
  63. toil/server/cli/wes_cwl_runner.py +4 -4
  64. toil/server/utils.py +1 -1
  65. toil/server/wes/abstract_backend.py +3 -2
  66. toil/server/wes/amazon_wes_utils.py +5 -4
  67. toil/server/wes/tasks.py +2 -3
  68. toil/server/wes/toil_backend.py +2 -10
  69. toil/server/wsgi_app.py +2 -0
  70. toil/serviceManager.py +12 -10
  71. toil/statsAndLogging.py +41 -9
  72. toil/test/__init__.py +29 -54
  73. toil/test/batchSystems/batchSystemTest.py +11 -111
  74. toil/test/batchSystems/test_slurm.py +24 -8
  75. toil/test/cactus/__init__.py +0 -0
  76. toil/test/cactus/test_cactus_integration.py +58 -0
  77. toil/test/cwl/cwlTest.py +438 -223
  78. toil/test/cwl/glob_dir.cwl +15 -0
  79. toil/test/cwl/preemptible.cwl +21 -0
  80. toil/test/cwl/preemptible_expression.cwl +28 -0
  81. toil/test/cwl/revsort.cwl +1 -1
  82. toil/test/cwl/revsort2.cwl +1 -1
  83. toil/test/docs/scriptsTest.py +2 -3
  84. toil/test/jobStores/jobStoreTest.py +34 -21
  85. toil/test/lib/aws/test_iam.py +4 -14
  86. toil/test/lib/aws/test_utils.py +0 -3
  87. toil/test/lib/dockerTest.py +4 -4
  88. toil/test/lib/test_ec2.py +12 -17
  89. toil/test/mesos/helloWorld.py +4 -5
  90. toil/test/mesos/stress.py +1 -1
  91. toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
  92. toil/test/options/options.py +37 -0
  93. toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
  94. toil/test/provisioners/clusterScalerTest.py +6 -4
  95. toil/test/provisioners/clusterTest.py +23 -11
  96. toil/test/provisioners/gceProvisionerTest.py +0 -6
  97. toil/test/provisioners/restartScript.py +3 -2
  98. toil/test/server/serverTest.py +1 -1
  99. toil/test/sort/restart_sort.py +2 -1
  100. toil/test/sort/sort.py +2 -1
  101. toil/test/sort/sortTest.py +2 -13
  102. toil/test/src/autoDeploymentTest.py +45 -45
  103. toil/test/src/busTest.py +5 -5
  104. toil/test/src/checkpointTest.py +2 -2
  105. toil/test/src/deferredFunctionTest.py +1 -1
  106. toil/test/src/fileStoreTest.py +32 -16
  107. toil/test/src/helloWorldTest.py +1 -1
  108. toil/test/src/importExportFileTest.py +1 -1
  109. toil/test/src/jobDescriptionTest.py +2 -1
  110. toil/test/src/jobServiceTest.py +1 -1
  111. toil/test/src/jobTest.py +18 -18
  112. toil/test/src/miscTests.py +5 -3
  113. toil/test/src/promisedRequirementTest.py +3 -3
  114. toil/test/src/realtimeLoggerTest.py +1 -1
  115. toil/test/src/resourceTest.py +2 -2
  116. toil/test/src/restartDAGTest.py +1 -1
  117. toil/test/src/resumabilityTest.py +36 -2
  118. toil/test/src/retainTempDirTest.py +1 -1
  119. toil/test/src/systemTest.py +2 -2
  120. toil/test/src/toilContextManagerTest.py +2 -2
  121. toil/test/src/userDefinedJobArgTypeTest.py +1 -1
  122. toil/test/utils/toilDebugTest.py +98 -32
  123. toil/test/utils/toilKillTest.py +2 -2
  124. toil/test/utils/utilsTest.py +23 -3
  125. toil/test/wdl/wdltoil_test.py +223 -45
  126. toil/toilState.py +7 -6
  127. toil/utils/toilClean.py +1 -1
  128. toil/utils/toilConfig.py +36 -0
  129. toil/utils/toilDebugFile.py +60 -33
  130. toil/utils/toilDebugJob.py +39 -12
  131. toil/utils/toilDestroyCluster.py +1 -1
  132. toil/utils/toilKill.py +1 -1
  133. toil/utils/toilLaunchCluster.py +13 -2
  134. toil/utils/toilMain.py +3 -2
  135. toil/utils/toilRsyncCluster.py +1 -1
  136. toil/utils/toilSshCluster.py +1 -1
  137. toil/utils/toilStats.py +445 -305
  138. toil/utils/toilStatus.py +2 -5
  139. toil/version.py +10 -10
  140. toil/wdl/utils.py +2 -122
  141. toil/wdl/wdltoil.py +1257 -492
  142. toil/worker.py +55 -46
  143. toil-6.1.0.dist-info/METADATA +124 -0
  144. toil-6.1.0.dist-info/RECORD +241 -0
  145. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
  146. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
  147. toil/batchSystems/parasol.py +0 -379
  148. toil/batchSystems/tes.py +0 -459
  149. toil/test/batchSystems/parasolTestSupport.py +0 -117
  150. toil/test/wdl/builtinTest.py +0 -506
  151. toil/test/wdl/toilwdlTest.py +0 -522
  152. toil/wdl/toilwdl.py +0 -141
  153. toil/wdl/versions/dev.py +0 -107
  154. toil/wdl/versions/draft2.py +0 -980
  155. toil/wdl/versions/v1.py +0 -794
  156. toil/wdl/wdl_analysis.py +0 -116
  157. toil/wdl/wdl_functions.py +0 -997
  158. toil/wdl/wdl_synthesis.py +0 -1011
  159. toil/wdl/wdl_types.py +0 -243
  160. toil-5.12.0.dist-info/METADATA +0 -118
  161. toil-5.12.0.dist-info/RECORD +0 -244
  162. /toil/{wdl/versions → options}/__init__.py +0 -0
  163. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
  164. {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
@@ -27,6 +27,7 @@ from typing import (IO,
27
27
  Callable,
28
28
  ContextManager,
29
29
  Dict,
30
+ Iterable,
30
31
  Iterator,
31
32
  List,
32
33
  Optional,
@@ -42,8 +43,8 @@ if sys.version_info >= (3, 8):
42
43
  else:
43
44
  from typing_extensions import Literal
44
45
 
45
- from urllib.parse import ParseResult, urlparse
46
46
  from urllib.error import HTTPError
47
+ from urllib.parse import ParseResult, urlparse
47
48
  from urllib.request import urlopen
48
49
  from uuid import uuid4
49
50
 
@@ -536,6 +537,40 @@ class AbstractJobStore(ABC):
536
537
  executable = jobStoreFileID.executable
537
538
  otherCls._write_to_url(readable, url, executable)
538
539
 
540
+ @classmethod
541
+ def url_exists(cls, src_uri: str) -> bool:
542
+ """
543
+ Return True if the file at the given URI exists, and False otherwise.
544
+
545
+ :param src_uri: URL that points to a file or object in the storage
546
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
547
+ """
548
+ parseResult = urlparse(src_uri)
549
+ otherCls = cls._findJobStoreForUrl(parseResult)
550
+ return otherCls._url_exists(parseResult)
551
+
552
+ @classmethod
553
+ def get_size(cls, src_uri: str) -> Optional[int]:
554
+ """
555
+ Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
556
+
557
+ :param src_uri: URL that points to a file or object in the storage
558
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
559
+ """
560
+ parseResult = urlparse(src_uri)
561
+ otherCls = cls._findJobStoreForUrl(parseResult)
562
+ return otherCls._get_size(parseResult)
563
+
564
+ @classmethod
565
+ def get_is_directory(cls, src_uri: str) -> bool:
566
+ """
567
+ Return True if the thing at the given URL is a directory, and False if
568
+ it is a file. The URL may or may not end in '/'.
569
+ """
570
+ parseResult = urlparse(src_uri)
571
+ otherCls = cls._findJobStoreForUrl(parseResult)
572
+ return otherCls._get_is_directory(parseResult)
573
+
539
574
  @classmethod
540
575
  def list_url(cls, src_uri: str) -> List[str]:
541
576
  """
@@ -562,59 +597,61 @@ class AbstractJobStore(ABC):
562
597
  return otherCls._list_url(parseResult)
563
598
 
564
599
  @classmethod
565
- def get_is_directory(cls, src_uri: str) -> bool:
566
- """
567
- Return True if the thing at the given URL is a directory, and False if
568
- it is a file. The URL may or may not end in '/'.
600
+ def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
569
601
  """
570
- parseResult = urlparse(src_uri)
571
- otherCls = cls._findJobStoreForUrl(parseResult)
572
- return otherCls._get_is_directory(parseResult)
602
+ Read the given URL and write its content into the given writable stream.
573
603
 
574
- @classmethod
575
- @abstractmethod
576
- def _get_is_directory(cls, url: ParseResult) -> bool:
577
- """
578
- Return True if the thing at the given URL is a directory, and False if
579
- it is a file or it is known not to exist. The URL may or may not end in
580
- '/'.
604
+ Raises FileNotFoundError if the URL doesn't exist.
581
605
 
582
- :param url: URL that points to a file or object, or directory or prefix,
583
- in the storage mechanism of a supported URL scheme e.g. a blob
584
- in an AWS s3 bucket.
606
+ :return: The size of the file in bytes and whether the executable permission bit is set
585
607
  """
586
- raise NotImplementedError
608
+ parseResult = urlparse(src_uri)
609
+ otherCls = cls._findJobStoreForUrl(parseResult)
610
+ return otherCls._read_from_url(parseResult, writable)
587
611
 
588
612
  @classmethod
589
- def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
613
+ def open_url(cls, src_uri: str) -> IO[bytes]:
590
614
  """
591
- Read the given URL and write its content into the given writable stream.
615
+ Read from the given URI.
592
616
 
593
617
  Raises FileNotFoundError if the URL doesn't exist.
594
618
 
595
- :return: The size of the file in bytes and whether the executable permission bit is set
596
- :rtype: Tuple[int, bool]
619
+ Has a readable stream interface, unlike :meth:`read_from_url` which
620
+ takes a writable stream.
597
621
  """
598
622
  parseResult = urlparse(src_uri)
599
623
  otherCls = cls._findJobStoreForUrl(parseResult)
600
- return otherCls._read_from_url(parseResult, writable)
624
+ return otherCls._open_url(parseResult)
601
625
 
602
626
  @classmethod
603
- @deprecated(new_function_name='get_size')
604
- def getSize(cls, url: ParseResult) -> None:
605
- return cls.get_size(url)
627
+ @abstractmethod
628
+ def _url_exists(cls, url: ParseResult) -> bool:
629
+ """
630
+ Return True if the item at the given URL exists, and Flase otherwise.
631
+ """
632
+ raise NotImplementedError(f"No implementation for {url}")
606
633
 
607
634
  @classmethod
608
635
  @abstractmethod
609
- def get_size(cls, src_uri: ParseResult) -> None:
636
+ def _get_size(cls, url: ParseResult) -> Optional[int]:
610
637
  """
611
- Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
638
+ Get the size of the object at the given URL, or None if it cannot be obtained.
639
+ """
640
+ raise NotImplementedError(f"No implementation for {url}")
612
641
 
613
- :param src_uri: URL that points to a file or object in the storage
614
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
642
+ @classmethod
643
+ @abstractmethod
644
+ def _get_is_directory(cls, url: ParseResult) -> bool:
615
645
  """
616
- raise NotImplementedError
646
+ Return True if the thing at the given URL is a directory, and False if
647
+ it is a file or it is known not to exist. The URL may or may not end in
648
+ '/'.
617
649
 
650
+ :param url: URL that points to a file or object, or directory or prefix,
651
+ in the storage mechanism of a supported URL scheme e.g. a blob
652
+ in an AWS s3 bucket.
653
+ """
654
+ raise NotImplementedError(f"No implementation for {url}")
618
655
 
619
656
  @classmethod
620
657
  @abstractmethod
@@ -623,8 +660,6 @@ class AbstractJobStore(ABC):
623
660
  Reads the contents of the object at the specified location and writes it to the given
624
661
  writable stream.
625
662
 
626
- Raises FileNotFoundError if the URL doesn't exist.
627
-
628
663
  Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
629
664
 
630
665
  Raises FileNotFoundError if the thing at the URL is not found.
@@ -635,46 +670,58 @@ class AbstractJobStore(ABC):
635
670
  :param IO[bytes] writable: a writable stream
636
671
 
637
672
  :return: The size of the file in bytes and whether the executable permission bit is set
638
- :rtype: Tuple[int, bool]
639
673
  """
640
- raise NotImplementedError()
674
+ raise NotImplementedError(f"No implementation for {url}")
641
675
 
642
676
  @classmethod
643
677
  @abstractmethod
644
- def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
678
+ def _list_url(cls, url: ParseResult) -> List[str]:
645
679
  """
646
- Reads the contents of the given readable stream and writes it to the object at the
647
- specified location. Raises FileNotFoundError if the URL doesn't exist..
680
+ List the contents of the given URL, which may or may not end in '/'
648
681
 
649
- Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
682
+ Returns a list of URL components. Those that end in '/' are meant to be
683
+ directories, while those that do not are meant to be files.
650
684
 
651
- :param Union[IO[bytes], IO[str]] readable: a readable stream
685
+ Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
652
686
 
653
- :param ParseResult url: URL that points to a file or object in the storage
654
- mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
687
+ :param ParseResult url: URL that points to a directory or prefix in the
688
+ storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
689
+ bucket.
655
690
 
656
- :param bool executable: determines if the file has executable permissions
691
+ :return: The children of the given URL, already URL-encoded if
692
+ appropriate. (If the URL is a bare path, no encoding is done.)
657
693
  """
658
- raise NotImplementedError()
694
+ raise NotImplementedError(f"No implementation for {url}")
659
695
 
660
696
  @classmethod
661
697
  @abstractmethod
662
- def _list_url(cls, url: ParseResult) -> List[str]:
698
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
663
699
  """
664
- List the contents of the given URL, which may or may not end in '/'
665
-
666
- Returns a list of URL components. Those that end in '/' are meant to be
667
- directories, while those that do not are meant to be files.
700
+ Get a stream of the object at the specified location.
668
701
 
669
702
  Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
670
703
 
671
- :param ParseResult url: URL that points to a directory or prefix in the
672
- storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
673
- bucket.
704
+ Raises FileNotFoundError if the thing at the URL is not found.
705
+ """
706
+ raise NotImplementedError(f"No implementation for {url}")
674
707
 
675
- :return: The children of the given URL, already URL-encoded.
708
+ @classmethod
709
+ @abstractmethod
710
+ def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
676
711
  """
677
- raise NotImplementedError()
712
+ Reads the contents of the given readable stream and writes it to the object at the
713
+ specified location. Raises FileNotFoundError if the URL doesn't exist..
714
+
715
+ Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
716
+
717
+ :param Union[IO[bytes], IO[str]] readable: a readable stream
718
+
719
+ :param ParseResult url: URL that points to a file or object in the storage
720
+ mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
721
+
722
+ :param bool executable: determines if the file has executable permissions
723
+ """
724
+ raise NotImplementedError(f"No implementation for {url}")
678
725
 
679
726
  @classmethod
680
727
  @abstractmethod
@@ -690,7 +737,7 @@ class AbstractJobStore(ABC):
690
737
 
691
738
  :return bool: returns true if the cls supports the URL
692
739
  """
693
- raise NotImplementedError()
740
+ raise NotImplementedError(f"No implementation for {url}")
694
741
 
695
742
  @abstractmethod
696
743
  def destroy(self) -> None:
@@ -788,12 +835,17 @@ class AbstractJobStore(ABC):
788
835
  root_job_description = self.load_root_job()
789
836
  reachable_from_root: Set[str] = set()
790
837
 
791
- # Add first root job outside of the loop below.
792
- reachable_from_root.add(str(root_job_description.jobStoreID))
838
+
839
+ for merged_in in root_job_description.get_chain():
840
+ # Add the job itself and any other jobs that chained with it.
841
+ # Keep merged-in jobs around themselves, but don't bother
842
+ # exploring them, since we took their successors.
843
+ reachable_from_root.add(merged_in.job_store_id)
793
844
  # add all of root's linked service jobs as well
794
- for service_jobstore_id in root_job_description.services:
795
- if haveJob(service_jobstore_id):
796
- reachable_from_root.add(service_jobstore_id)
845
+ for service_job_store_id in root_job_description.services:
846
+ if haveJob(service_job_store_id):
847
+ reachable_from_root.add(service_job_store_id)
848
+
797
849
 
798
850
  # Unprocessed means it might have successor jobs we need to add.
799
851
  unprocessed_job_descriptions = [root_job_description]
@@ -801,18 +853,19 @@ class AbstractJobStore(ABC):
801
853
  while unprocessed_job_descriptions:
802
854
  new_job_descriptions_to_process = [] # Reset.
803
855
  for job_description in unprocessed_job_descriptions:
804
- for successor_jobstore_id in job_description.allSuccessors():
805
- if successor_jobstore_id not in reachable_from_root and haveJob(successor_jobstore_id):
806
- successor_job_description = getJobDescription(successor_jobstore_id)
807
-
808
- # Add each successor job.
809
- reachable_from_root.add(
810
- str(successor_job_description.jobStoreID)
811
- )
856
+ for merged_in in job_description.get_chain():
857
+ # Add the job and anything chained with it.
858
+ # Keep merged-in jobs around themselves, but don't bother
859
+ # exploring them, since we took their successors.
860
+ reachable_from_root.add(merged_in.job_store_id)
861
+ for successor_job_store_id in job_description.allSuccessors():
862
+ if successor_job_store_id not in reachable_from_root and haveJob(successor_job_store_id):
863
+ successor_job_description = getJobDescription(successor_job_store_id)
864
+
812
865
  # Add all of the successor's linked service jobs as well.
813
- for service_jobstore_id in successor_job_description.services:
814
- if haveJob(service_jobstore_id):
815
- reachable_from_root.add(service_jobstore_id)
866
+ for service_job_store_id in successor_job_description.services:
867
+ if haveJob(service_job_store_id):
868
+ reachable_from_root.add(service_job_store_id)
816
869
 
817
870
  new_job_descriptions_to_process.append(successor_job_description)
818
871
  unprocessed_job_descriptions = new_job_descriptions_to_process
@@ -824,8 +877,8 @@ class AbstractJobStore(ABC):
824
877
 
825
878
  # Cleanup jobs that are not reachable from the root, and therefore orphaned
826
879
  # TODO: Avoid reiterating reachable_from_root (which may be very large)
827
- jobsToDelete = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
828
- for jobDescription in jobsToDelete:
880
+ unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
881
+ for jobDescription in unreachable:
829
882
  # clean up any associated files before deletion
830
883
  for fileID in jobDescription.filesToDelete:
831
884
  # Delete any files that should already be deleted
@@ -1688,6 +1741,16 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1688
1741
  def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
1689
1742
  return url.scheme.lower() in ('http', 'https', 'ftp') and not export
1690
1743
 
1744
+ @classmethod
1745
+ def _url_exists(cls, url: ParseResult) -> bool:
1746
+ try:
1747
+ # TODO: Figure out how to HEAD instead of this.
1748
+ with cls._open_url(url):
1749
+ return True
1750
+ except:
1751
+ pass
1752
+ return False
1753
+
1691
1754
  @classmethod
1692
1755
  @retry(
1693
1756
  errors=[
@@ -1695,7 +1758,7 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1695
1758
  ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1696
1759
  ]
1697
1760
  )
1698
- def get_size(cls, url: ParseResult) -> Optional[int]:
1761
+ def _get_size(cls, url: ParseResult) -> Optional[int]:
1699
1762
  if url.scheme.lower() == 'ftp':
1700
1763
  return None
1701
1764
  with closing(urlopen(url.geturl())) as readable:
@@ -1703,6 +1766,27 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1703
1766
  size = readable.info().get('content-length')
1704
1767
  return int(size) if size is not None else None
1705
1768
 
1769
+ @classmethod
1770
+ def _read_from_url(
1771
+ cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1772
+ ) -> Tuple[int, bool]:
1773
+ # We can't actually retry after we start writing.
1774
+ # TODO: Implement retry with byte range requests
1775
+ with cls._open_url(url) as readable:
1776
+ # Make something to count the bytes we get
1777
+ # We need to put the actual count in a container so our
1778
+ # nested function can modify it without creating its own
1779
+ # local with the same name.
1780
+ size = [0]
1781
+ def count(l: int) -> None:
1782
+ size[0] += l
1783
+ counter = WriteWatchingStream(writable)
1784
+ counter.onWrite(count)
1785
+
1786
+ # Do the download
1787
+ shutil.copyfileobj(readable, counter)
1788
+ return size[0], False
1789
+
1706
1790
  @classmethod
1707
1791
  @retry(
1708
1792
  errors=[
@@ -1710,27 +1794,9 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
1710
1794
  ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
1711
1795
  ]
1712
1796
  )
1713
- def _read_from_url(
1714
- cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
1715
- ) -> Tuple[int, bool]:
1716
- # We can only retry on errors that happen as responses to the request.
1717
- # If we start getting file data, and the connection drops, we fail.
1718
- # So we don't have to worry about writing the start of the file twice.
1797
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
1719
1798
  try:
1720
- with closing(urlopen(url.geturl())) as readable:
1721
- # Make something to count the bytes we get
1722
- # We need to put the actual count in a container so our
1723
- # nested function can modify it without creating its own
1724
- # local with the same name.
1725
- size = [0]
1726
- def count(l: int) -> None:
1727
- size[0] += l
1728
- counter = WriteWatchingStream(writable)
1729
- counter.onWrite(count)
1730
-
1731
- # Do the download
1732
- shutil.copyfileobj(readable, counter)
1733
- return size[0], False
1799
+ return cast(IO[bytes], closing(urlopen(url.geturl())))
1734
1800
  except HTTPError as e:
1735
1801
  if e.code == 404:
1736
1802
  # Translate into a FileNotFoundError for detecting
@@ -13,7 +13,6 @@
13
13
  # limitations under the License.
14
14
  import hashlib
15
15
  import itertools
16
- import json
17
16
  import logging
18
17
  import os
19
18
  import pickle
@@ -21,12 +20,10 @@ import re
21
20
  import reprlib
22
21
  import stat
23
22
  import time
24
- import urllib.error
25
- import urllib.request
26
23
  import uuid
27
24
  from contextlib import contextmanager
28
25
  from io import BytesIO
29
- from typing import List, Optional
26
+ from typing import List, Optional, IO
30
27
  from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
31
28
 
32
29
  import boto.s3.connection
@@ -35,7 +32,6 @@ from boto.exception import SDBResponseError
35
32
  from botocore.exceptions import ClientError
36
33
 
37
34
  import toil.lib.encryption as encryption
38
- from toil.lib.aws import build_tag_dict_from_env
39
35
  from toil.fileStores import FileID
40
36
  from toil.jobStores.abstractJobStore import (AbstractJobStore,
41
37
  ConcurrentFileModificationException,
@@ -56,6 +52,7 @@ from toil.jobStores.aws.utils import (SDBHelper,
56
52
  from toil.jobStores.utils import (ReadablePipe,
57
53
  ReadableTransformingPipe,
58
54
  WritablePipe)
55
+ from toil.lib.aws import build_tag_dict_from_env
59
56
  from toil.lib.aws.session import establish_boto3_session
60
57
  from toil.lib.aws.utils import (create_s3_bucket,
61
58
  enable_public_objects,
@@ -450,7 +447,6 @@ class AWSJobStore(AbstractJobStore):
450
447
  except ServerSideCopyProhibitedError:
451
448
  # AWS refuses to do this copy for us
452
449
  logger.warning("Falling back to copying via the local machine. This could get expensive!")
453
- pass
454
450
 
455
451
  # copy if exception
456
452
  return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
@@ -465,12 +461,21 @@ class AWSJobStore(AbstractJobStore):
465
461
  except ServerSideCopyProhibitedError:
466
462
  # AWS refuses to do this copy for us
467
463
  logger.warning("Falling back to copying via the local machine. This could get expensive!")
468
- pass
469
464
  else:
470
465
  super()._default_export_file(otherCls, file_id, uri)
471
466
 
472
467
  @classmethod
473
- def get_size(cls, url):
468
+ def _url_exists(cls, url: ParseResult) -> bool:
469
+ try:
470
+ get_object_for_url(url, existing=True)
471
+ return True
472
+ except FileNotFoundError:
473
+ # Not a file
474
+ # Might be a directory.
475
+ return cls._get_is_directory(url)
476
+
477
+ @classmethod
478
+ def _get_size(cls, url):
474
479
  return get_object_for_url(url, existing=True).content_length
475
480
 
476
481
  @classmethod
@@ -482,6 +487,15 @@ class AWSJobStore(AbstractJobStore):
482
487
  False # executable bit is always False
483
488
  )
484
489
 
490
+ @classmethod
491
+ def _open_url(cls, url: ParseResult) -> IO[bytes]:
492
+ src_obj = get_object_for_url(url, existing=True)
493
+ response = src_obj.get()
494
+ # We should get back a response with a stream in 'Body'
495
+ if 'Body' not in response:
496
+ raise RuntimeError(f"Could not fetch body stream for {url}")
497
+ return response['Body']
498
+
485
499
  @classmethod
486
500
  def _write_to_url(cls, readable, url, executable=False):
487
501
  dstObj = get_object_for_url(url)
@@ -757,7 +771,7 @@ class AWSJobStore(AbstractJobStore):
757
771
  bucket_tagging.put(Tagging={'TagSet': flat_tags})
758
772
 
759
773
  # Configure bucket so that we can make objects in
760
- # it public, which was the historical default.
774
+ # it public, which was the historical default.
761
775
  enable_public_objects(bucket_name)
762
776
  elif block:
763
777
  raise
@@ -17,13 +17,12 @@ import logging
17
17
  import os
18
18
  import types
19
19
  from ssl import SSLError
20
- from typing import Optional, cast
20
+ from typing import Optional, cast, TYPE_CHECKING
21
21
 
22
22
  from boto3.s3.transfer import TransferConfig
23
- from boto.exception import BotoServerError, S3ResponseError, SDBResponseError
23
+ from boto.exception import SDBResponseError
24
24
  from botocore.client import Config
25
25
  from botocore.exceptions import ClientError
26
- from mypy_boto3_s3 import S3Client, S3ServiceResource
27
26
 
28
27
  from toil.lib.aws import session
29
28
  from toil.lib.aws.utils import connection_reset, get_bucket_region
@@ -36,6 +35,8 @@ from toil.lib.retry import (DEFAULT_DELAYS,
36
35
  get_error_status,
37
36
  old_retry,
38
37
  retry)
38
+ if TYPE_CHECKING:
39
+ from mypy_boto3_s3 import S3Client, S3ServiceResource
39
40
 
40
41
  logger = logging.getLogger(__name__)
41
42
 
@@ -285,13 +286,12 @@ class ServerSideCopyProhibitedError(RuntimeError):
285
286
  Raised when AWS refuses to perform a server-side copy between S3 keys, and
286
287
  insists that you pay to download and upload the data yourself instead.
287
288
  """
288
- pass
289
289
 
290
290
  @retry(errors=[ErrorCondition(
291
291
  error=ClientError,
292
292
  error_codes=[404, 500, 502, 503, 504]
293
293
  )])
294
- def copyKeyMultipart(resource: S3ServiceResource,
294
+ def copyKeyMultipart(resource: "S3ServiceResource",
295
295
  srcBucketName: str,
296
296
  srcKeyName: str,
297
297
  srcKeyVersion: str,
@@ -347,7 +347,7 @@ def copyKeyMultipart(resource: S3ServiceResource,
347
347
  # not wherever the bucket virtual hostnames go.
348
348
  source_region = get_bucket_region(srcBucketName)
349
349
  source_client = cast(
350
- S3Client,
350
+ "S3Client",
351
351
  session.client(
352
352
  's3',
353
353
  region_name=source_region,