toil 5.12.0__py3-none-any.whl → 6.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +39 -13
- toil/batchSystems/abstractGridEngineBatchSystem.py +24 -24
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/cleanup_support.py +7 -3
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/lsf.py +7 -7
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +87 -16
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +44 -8
- toil/common.py +544 -753
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +595 -574
- toil/cwl/utils.py +55 -10
- toil/exceptions.py +1 -1
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +88 -14
- toil/fileStores/cachingFileStore.py +610 -549
- toil/fileStores/nonCachingFileStore.py +46 -22
- toil/job.py +182 -101
- toil/jobStores/abstractJobStore.py +161 -95
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +6 -6
- toil/jobStores/fileJobStore.py +116 -18
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +87 -56
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +26 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +49 -2
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +9 -2
- toil/lib/threading.py +101 -38
- toil/options/common.py +736 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +37 -0
- toil/provisioners/abstractProvisioner.py +9 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +41 -9
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +24 -8
- toil/test/cactus/__init__.py +0 -0
- toil/test/cactus/test_cactus_integration.py +58 -0
- toil/test/cwl/cwlTest.py +438 -223
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +2 -3
- toil/test/jobStores/jobStoreTest.py +34 -21
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +12 -17
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/{wdl/conftest.py → options/__init__.py} +0 -10
- toil/test/options/options.py +37 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +23 -11
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +23 -3
- toil/test/wdl/wdltoil_test.py +223 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +445 -305
- toil/utils/toilStatus.py +2 -5
- toil/version.py +10 -10
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +1257 -492
- toil/worker.py +55 -46
- toil-6.1.0.dist-info/METADATA +124 -0
- toil-6.1.0.dist-info/RECORD +241 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/METADATA +0 -118
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ from typing import (IO,
|
|
|
27
27
|
Callable,
|
|
28
28
|
ContextManager,
|
|
29
29
|
Dict,
|
|
30
|
+
Iterable,
|
|
30
31
|
Iterator,
|
|
31
32
|
List,
|
|
32
33
|
Optional,
|
|
@@ -42,8 +43,8 @@ if sys.version_info >= (3, 8):
|
|
|
42
43
|
else:
|
|
43
44
|
from typing_extensions import Literal
|
|
44
45
|
|
|
45
|
-
from urllib.parse import ParseResult, urlparse
|
|
46
46
|
from urllib.error import HTTPError
|
|
47
|
+
from urllib.parse import ParseResult, urlparse
|
|
47
48
|
from urllib.request import urlopen
|
|
48
49
|
from uuid import uuid4
|
|
49
50
|
|
|
@@ -536,6 +537,40 @@ class AbstractJobStore(ABC):
|
|
|
536
537
|
executable = jobStoreFileID.executable
|
|
537
538
|
otherCls._write_to_url(readable, url, executable)
|
|
538
539
|
|
|
540
|
+
@classmethod
|
|
541
|
+
def url_exists(cls, src_uri: str) -> bool:
|
|
542
|
+
"""
|
|
543
|
+
Return True if the file at the given URI exists, and False otherwise.
|
|
544
|
+
|
|
545
|
+
:param src_uri: URL that points to a file or object in the storage
|
|
546
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
547
|
+
"""
|
|
548
|
+
parseResult = urlparse(src_uri)
|
|
549
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
550
|
+
return otherCls._url_exists(parseResult)
|
|
551
|
+
|
|
552
|
+
@classmethod
|
|
553
|
+
def get_size(cls, src_uri: str) -> Optional[int]:
|
|
554
|
+
"""
|
|
555
|
+
Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
|
|
556
|
+
|
|
557
|
+
:param src_uri: URL that points to a file or object in the storage
|
|
558
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
559
|
+
"""
|
|
560
|
+
parseResult = urlparse(src_uri)
|
|
561
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
562
|
+
return otherCls._get_size(parseResult)
|
|
563
|
+
|
|
564
|
+
@classmethod
|
|
565
|
+
def get_is_directory(cls, src_uri: str) -> bool:
|
|
566
|
+
"""
|
|
567
|
+
Return True if the thing at the given URL is a directory, and False if
|
|
568
|
+
it is a file. The URL may or may not end in '/'.
|
|
569
|
+
"""
|
|
570
|
+
parseResult = urlparse(src_uri)
|
|
571
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
572
|
+
return otherCls._get_is_directory(parseResult)
|
|
573
|
+
|
|
539
574
|
@classmethod
|
|
540
575
|
def list_url(cls, src_uri: str) -> List[str]:
|
|
541
576
|
"""
|
|
@@ -562,59 +597,61 @@ class AbstractJobStore(ABC):
|
|
|
562
597
|
return otherCls._list_url(parseResult)
|
|
563
598
|
|
|
564
599
|
@classmethod
|
|
565
|
-
def
|
|
566
|
-
"""
|
|
567
|
-
Return True if the thing at the given URL is a directory, and False if
|
|
568
|
-
it is a file. The URL may or may not end in '/'.
|
|
600
|
+
def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
|
|
569
601
|
"""
|
|
570
|
-
|
|
571
|
-
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
572
|
-
return otherCls._get_is_directory(parseResult)
|
|
602
|
+
Read the given URL and write its content into the given writable stream.
|
|
573
603
|
|
|
574
|
-
|
|
575
|
-
@abstractmethod
|
|
576
|
-
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
577
|
-
"""
|
|
578
|
-
Return True if the thing at the given URL is a directory, and False if
|
|
579
|
-
it is a file or it is known not to exist. The URL may or may not end in
|
|
580
|
-
'/'.
|
|
604
|
+
Raises FileNotFoundError if the URL doesn't exist.
|
|
581
605
|
|
|
582
|
-
:
|
|
583
|
-
in the storage mechanism of a supported URL scheme e.g. a blob
|
|
584
|
-
in an AWS s3 bucket.
|
|
606
|
+
:return: The size of the file in bytes and whether the executable permission bit is set
|
|
585
607
|
"""
|
|
586
|
-
|
|
608
|
+
parseResult = urlparse(src_uri)
|
|
609
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
610
|
+
return otherCls._read_from_url(parseResult, writable)
|
|
587
611
|
|
|
588
612
|
@classmethod
|
|
589
|
-
def
|
|
613
|
+
def open_url(cls, src_uri: str) -> IO[bytes]:
|
|
590
614
|
"""
|
|
591
|
-
Read
|
|
615
|
+
Read from the given URI.
|
|
592
616
|
|
|
593
617
|
Raises FileNotFoundError if the URL doesn't exist.
|
|
594
618
|
|
|
595
|
-
|
|
596
|
-
|
|
619
|
+
Has a readable stream interface, unlike :meth:`read_from_url` which
|
|
620
|
+
takes a writable stream.
|
|
597
621
|
"""
|
|
598
622
|
parseResult = urlparse(src_uri)
|
|
599
623
|
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
600
|
-
return otherCls.
|
|
624
|
+
return otherCls._open_url(parseResult)
|
|
601
625
|
|
|
602
626
|
@classmethod
|
|
603
|
-
@
|
|
604
|
-
def
|
|
605
|
-
|
|
627
|
+
@abstractmethod
|
|
628
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
629
|
+
"""
|
|
630
|
+
Return True if the item at the given URL exists, and Flase otherwise.
|
|
631
|
+
"""
|
|
632
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
606
633
|
|
|
607
634
|
@classmethod
|
|
608
635
|
@abstractmethod
|
|
609
|
-
def
|
|
636
|
+
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
610
637
|
"""
|
|
611
|
-
Get the size
|
|
638
|
+
Get the size of the object at the given URL, or None if it cannot be obtained.
|
|
639
|
+
"""
|
|
640
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
612
641
|
|
|
613
|
-
|
|
614
|
-
|
|
642
|
+
@classmethod
|
|
643
|
+
@abstractmethod
|
|
644
|
+
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
615
645
|
"""
|
|
616
|
-
|
|
646
|
+
Return True if the thing at the given URL is a directory, and False if
|
|
647
|
+
it is a file or it is known not to exist. The URL may or may not end in
|
|
648
|
+
'/'.
|
|
617
649
|
|
|
650
|
+
:param url: URL that points to a file or object, or directory or prefix,
|
|
651
|
+
in the storage mechanism of a supported URL scheme e.g. a blob
|
|
652
|
+
in an AWS s3 bucket.
|
|
653
|
+
"""
|
|
654
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
618
655
|
|
|
619
656
|
@classmethod
|
|
620
657
|
@abstractmethod
|
|
@@ -623,8 +660,6 @@ class AbstractJobStore(ABC):
|
|
|
623
660
|
Reads the contents of the object at the specified location and writes it to the given
|
|
624
661
|
writable stream.
|
|
625
662
|
|
|
626
|
-
Raises FileNotFoundError if the URL doesn't exist.
|
|
627
|
-
|
|
628
663
|
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
629
664
|
|
|
630
665
|
Raises FileNotFoundError if the thing at the URL is not found.
|
|
@@ -635,46 +670,58 @@ class AbstractJobStore(ABC):
|
|
|
635
670
|
:param IO[bytes] writable: a writable stream
|
|
636
671
|
|
|
637
672
|
:return: The size of the file in bytes and whether the executable permission bit is set
|
|
638
|
-
:rtype: Tuple[int, bool]
|
|
639
673
|
"""
|
|
640
|
-
raise NotImplementedError()
|
|
674
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
641
675
|
|
|
642
676
|
@classmethod
|
|
643
677
|
@abstractmethod
|
|
644
|
-
def
|
|
678
|
+
def _list_url(cls, url: ParseResult) -> List[str]:
|
|
645
679
|
"""
|
|
646
|
-
|
|
647
|
-
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
680
|
+
List the contents of the given URL, which may or may not end in '/'
|
|
648
681
|
|
|
649
|
-
|
|
682
|
+
Returns a list of URL components. Those that end in '/' are meant to be
|
|
683
|
+
directories, while those that do not are meant to be files.
|
|
650
684
|
|
|
651
|
-
:
|
|
685
|
+
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
652
686
|
|
|
653
|
-
:param ParseResult url: URL that points to a
|
|
654
|
-
|
|
687
|
+
:param ParseResult url: URL that points to a directory or prefix in the
|
|
688
|
+
storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
|
|
689
|
+
bucket.
|
|
655
690
|
|
|
656
|
-
:
|
|
691
|
+
:return: The children of the given URL, already URL-encoded if
|
|
692
|
+
appropriate. (If the URL is a bare path, no encoding is done.)
|
|
657
693
|
"""
|
|
658
|
-
raise NotImplementedError()
|
|
694
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
659
695
|
|
|
660
696
|
@classmethod
|
|
661
697
|
@abstractmethod
|
|
662
|
-
def
|
|
698
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
663
699
|
"""
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
Returns a list of URL components. Those that end in '/' are meant to be
|
|
667
|
-
directories, while those that do not are meant to be files.
|
|
700
|
+
Get a stream of the object at the specified location.
|
|
668
701
|
|
|
669
702
|
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
670
703
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
704
|
+
Raises FileNotFoundError if the thing at the URL is not found.
|
|
705
|
+
"""
|
|
706
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
674
707
|
|
|
675
|
-
|
|
708
|
+
@classmethod
|
|
709
|
+
@abstractmethod
|
|
710
|
+
def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
|
|
676
711
|
"""
|
|
677
|
-
|
|
712
|
+
Reads the contents of the given readable stream and writes it to the object at the
|
|
713
|
+
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
714
|
+
|
|
715
|
+
Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
|
|
716
|
+
|
|
717
|
+
:param Union[IO[bytes], IO[str]] readable: a readable stream
|
|
718
|
+
|
|
719
|
+
:param ParseResult url: URL that points to a file or object in the storage
|
|
720
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
721
|
+
|
|
722
|
+
:param bool executable: determines if the file has executable permissions
|
|
723
|
+
"""
|
|
724
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
678
725
|
|
|
679
726
|
@classmethod
|
|
680
727
|
@abstractmethod
|
|
@@ -690,7 +737,7 @@ class AbstractJobStore(ABC):
|
|
|
690
737
|
|
|
691
738
|
:return bool: returns true if the cls supports the URL
|
|
692
739
|
"""
|
|
693
|
-
raise NotImplementedError()
|
|
740
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
694
741
|
|
|
695
742
|
@abstractmethod
|
|
696
743
|
def destroy(self) -> None:
|
|
@@ -788,12 +835,17 @@ class AbstractJobStore(ABC):
|
|
|
788
835
|
root_job_description = self.load_root_job()
|
|
789
836
|
reachable_from_root: Set[str] = set()
|
|
790
837
|
|
|
791
|
-
|
|
792
|
-
|
|
838
|
+
|
|
839
|
+
for merged_in in root_job_description.get_chain():
|
|
840
|
+
# Add the job itself and any other jobs that chained with it.
|
|
841
|
+
# Keep merged-in jobs around themselves, but don't bother
|
|
842
|
+
# exploring them, since we took their successors.
|
|
843
|
+
reachable_from_root.add(merged_in.job_store_id)
|
|
793
844
|
# add all of root's linked service jobs as well
|
|
794
|
-
for
|
|
795
|
-
if haveJob(
|
|
796
|
-
reachable_from_root.add(
|
|
845
|
+
for service_job_store_id in root_job_description.services:
|
|
846
|
+
if haveJob(service_job_store_id):
|
|
847
|
+
reachable_from_root.add(service_job_store_id)
|
|
848
|
+
|
|
797
849
|
|
|
798
850
|
# Unprocessed means it might have successor jobs we need to add.
|
|
799
851
|
unprocessed_job_descriptions = [root_job_description]
|
|
@@ -801,18 +853,19 @@ class AbstractJobStore(ABC):
|
|
|
801
853
|
while unprocessed_job_descriptions:
|
|
802
854
|
new_job_descriptions_to_process = [] # Reset.
|
|
803
855
|
for job_description in unprocessed_job_descriptions:
|
|
804
|
-
for
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
)
|
|
856
|
+
for merged_in in job_description.get_chain():
|
|
857
|
+
# Add the job and anything chained with it.
|
|
858
|
+
# Keep merged-in jobs around themselves, but don't bother
|
|
859
|
+
# exploring them, since we took their successors.
|
|
860
|
+
reachable_from_root.add(merged_in.job_store_id)
|
|
861
|
+
for successor_job_store_id in job_description.allSuccessors():
|
|
862
|
+
if successor_job_store_id not in reachable_from_root and haveJob(successor_job_store_id):
|
|
863
|
+
successor_job_description = getJobDescription(successor_job_store_id)
|
|
864
|
+
|
|
812
865
|
# Add all of the successor's linked service jobs as well.
|
|
813
|
-
for
|
|
814
|
-
if haveJob(
|
|
815
|
-
reachable_from_root.add(
|
|
866
|
+
for service_job_store_id in successor_job_description.services:
|
|
867
|
+
if haveJob(service_job_store_id):
|
|
868
|
+
reachable_from_root.add(service_job_store_id)
|
|
816
869
|
|
|
817
870
|
new_job_descriptions_to_process.append(successor_job_description)
|
|
818
871
|
unprocessed_job_descriptions = new_job_descriptions_to_process
|
|
@@ -824,8 +877,8 @@ class AbstractJobStore(ABC):
|
|
|
824
877
|
|
|
825
878
|
# Cleanup jobs that are not reachable from the root, and therefore orphaned
|
|
826
879
|
# TODO: Avoid reiterating reachable_from_root (which may be very large)
|
|
827
|
-
|
|
828
|
-
for jobDescription in
|
|
880
|
+
unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
|
|
881
|
+
for jobDescription in unreachable:
|
|
829
882
|
# clean up any associated files before deletion
|
|
830
883
|
for fileID in jobDescription.filesToDelete:
|
|
831
884
|
# Delete any files that should already be deleted
|
|
@@ -1688,6 +1741,16 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1688
1741
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
1689
1742
|
return url.scheme.lower() in ('http', 'https', 'ftp') and not export
|
|
1690
1743
|
|
|
1744
|
+
@classmethod
|
|
1745
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
1746
|
+
try:
|
|
1747
|
+
# TODO: Figure out how to HEAD instead of this.
|
|
1748
|
+
with cls._open_url(url):
|
|
1749
|
+
return True
|
|
1750
|
+
except:
|
|
1751
|
+
pass
|
|
1752
|
+
return False
|
|
1753
|
+
|
|
1691
1754
|
@classmethod
|
|
1692
1755
|
@retry(
|
|
1693
1756
|
errors=[
|
|
@@ -1695,7 +1758,7 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1695
1758
|
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1696
1759
|
]
|
|
1697
1760
|
)
|
|
1698
|
-
def
|
|
1761
|
+
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
1699
1762
|
if url.scheme.lower() == 'ftp':
|
|
1700
1763
|
return None
|
|
1701
1764
|
with closing(urlopen(url.geturl())) as readable:
|
|
@@ -1703,6 +1766,27 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1703
1766
|
size = readable.info().get('content-length')
|
|
1704
1767
|
return int(size) if size is not None else None
|
|
1705
1768
|
|
|
1769
|
+
@classmethod
|
|
1770
|
+
def _read_from_url(
|
|
1771
|
+
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1772
|
+
) -> Tuple[int, bool]:
|
|
1773
|
+
# We can't actually retry after we start writing.
|
|
1774
|
+
# TODO: Implement retry with byte range requests
|
|
1775
|
+
with cls._open_url(url) as readable:
|
|
1776
|
+
# Make something to count the bytes we get
|
|
1777
|
+
# We need to put the actual count in a container so our
|
|
1778
|
+
# nested function can modify it without creating its own
|
|
1779
|
+
# local with the same name.
|
|
1780
|
+
size = [0]
|
|
1781
|
+
def count(l: int) -> None:
|
|
1782
|
+
size[0] += l
|
|
1783
|
+
counter = WriteWatchingStream(writable)
|
|
1784
|
+
counter.onWrite(count)
|
|
1785
|
+
|
|
1786
|
+
# Do the download
|
|
1787
|
+
shutil.copyfileobj(readable, counter)
|
|
1788
|
+
return size[0], False
|
|
1789
|
+
|
|
1706
1790
|
@classmethod
|
|
1707
1791
|
@retry(
|
|
1708
1792
|
errors=[
|
|
@@ -1710,27 +1794,9 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1710
1794
|
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1711
1795
|
]
|
|
1712
1796
|
)
|
|
1713
|
-
def
|
|
1714
|
-
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1715
|
-
) -> Tuple[int, bool]:
|
|
1716
|
-
# We can only retry on errors that happen as responses to the request.
|
|
1717
|
-
# If we start getting file data, and the connection drops, we fail.
|
|
1718
|
-
# So we don't have to worry about writing the start of the file twice.
|
|
1797
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
1719
1798
|
try:
|
|
1720
|
-
|
|
1721
|
-
# Make something to count the bytes we get
|
|
1722
|
-
# We need to put the actual count in a container so our
|
|
1723
|
-
# nested function can modify it without creating its own
|
|
1724
|
-
# local with the same name.
|
|
1725
|
-
size = [0]
|
|
1726
|
-
def count(l: int) -> None:
|
|
1727
|
-
size[0] += l
|
|
1728
|
-
counter = WriteWatchingStream(writable)
|
|
1729
|
-
counter.onWrite(count)
|
|
1730
|
-
|
|
1731
|
-
# Do the download
|
|
1732
|
-
shutil.copyfileobj(readable, counter)
|
|
1733
|
-
return size[0], False
|
|
1799
|
+
return cast(IO[bytes], closing(urlopen(url.geturl())))
|
|
1734
1800
|
except HTTPError as e:
|
|
1735
1801
|
if e.code == 404:
|
|
1736
1802
|
# Translate into a FileNotFoundError for detecting
|
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import hashlib
|
|
15
15
|
import itertools
|
|
16
|
-
import json
|
|
17
16
|
import logging
|
|
18
17
|
import os
|
|
19
18
|
import pickle
|
|
@@ -21,12 +20,10 @@ import re
|
|
|
21
20
|
import reprlib
|
|
22
21
|
import stat
|
|
23
22
|
import time
|
|
24
|
-
import urllib.error
|
|
25
|
-
import urllib.request
|
|
26
23
|
import uuid
|
|
27
24
|
from contextlib import contextmanager
|
|
28
25
|
from io import BytesIO
|
|
29
|
-
from typing import List, Optional
|
|
26
|
+
from typing import List, Optional, IO
|
|
30
27
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
31
28
|
|
|
32
29
|
import boto.s3.connection
|
|
@@ -35,7 +32,6 @@ from boto.exception import SDBResponseError
|
|
|
35
32
|
from botocore.exceptions import ClientError
|
|
36
33
|
|
|
37
34
|
import toil.lib.encryption as encryption
|
|
38
|
-
from toil.lib.aws import build_tag_dict_from_env
|
|
39
35
|
from toil.fileStores import FileID
|
|
40
36
|
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
41
37
|
ConcurrentFileModificationException,
|
|
@@ -56,6 +52,7 @@ from toil.jobStores.aws.utils import (SDBHelper,
|
|
|
56
52
|
from toil.jobStores.utils import (ReadablePipe,
|
|
57
53
|
ReadableTransformingPipe,
|
|
58
54
|
WritablePipe)
|
|
55
|
+
from toil.lib.aws import build_tag_dict_from_env
|
|
59
56
|
from toil.lib.aws.session import establish_boto3_session
|
|
60
57
|
from toil.lib.aws.utils import (create_s3_bucket,
|
|
61
58
|
enable_public_objects,
|
|
@@ -450,7 +447,6 @@ class AWSJobStore(AbstractJobStore):
|
|
|
450
447
|
except ServerSideCopyProhibitedError:
|
|
451
448
|
# AWS refuses to do this copy for us
|
|
452
449
|
logger.warning("Falling back to copying via the local machine. This could get expensive!")
|
|
453
|
-
pass
|
|
454
450
|
|
|
455
451
|
# copy if exception
|
|
456
452
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
@@ -465,12 +461,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
465
461
|
except ServerSideCopyProhibitedError:
|
|
466
462
|
# AWS refuses to do this copy for us
|
|
467
463
|
logger.warning("Falling back to copying via the local machine. This could get expensive!")
|
|
468
|
-
pass
|
|
469
464
|
else:
|
|
470
465
|
super()._default_export_file(otherCls, file_id, uri)
|
|
471
466
|
|
|
472
467
|
@classmethod
|
|
473
|
-
def
|
|
468
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
469
|
+
try:
|
|
470
|
+
get_object_for_url(url, existing=True)
|
|
471
|
+
return True
|
|
472
|
+
except FileNotFoundError:
|
|
473
|
+
# Not a file
|
|
474
|
+
# Might be a directory.
|
|
475
|
+
return cls._get_is_directory(url)
|
|
476
|
+
|
|
477
|
+
@classmethod
|
|
478
|
+
def _get_size(cls, url):
|
|
474
479
|
return get_object_for_url(url, existing=True).content_length
|
|
475
480
|
|
|
476
481
|
@classmethod
|
|
@@ -482,6 +487,15 @@ class AWSJobStore(AbstractJobStore):
|
|
|
482
487
|
False # executable bit is always False
|
|
483
488
|
)
|
|
484
489
|
|
|
490
|
+
@classmethod
|
|
491
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
492
|
+
src_obj = get_object_for_url(url, existing=True)
|
|
493
|
+
response = src_obj.get()
|
|
494
|
+
# We should get back a response with a stream in 'Body'
|
|
495
|
+
if 'Body' not in response:
|
|
496
|
+
raise RuntimeError(f"Could not fetch body stream for {url}")
|
|
497
|
+
return response['Body']
|
|
498
|
+
|
|
485
499
|
@classmethod
|
|
486
500
|
def _write_to_url(cls, readable, url, executable=False):
|
|
487
501
|
dstObj = get_object_for_url(url)
|
|
@@ -757,7 +771,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
757
771
|
bucket_tagging.put(Tagging={'TagSet': flat_tags})
|
|
758
772
|
|
|
759
773
|
# Configure bucket so that we can make objects in
|
|
760
|
-
# it public, which was the historical default.
|
|
774
|
+
# it public, which was the historical default.
|
|
761
775
|
enable_public_objects(bucket_name)
|
|
762
776
|
elif block:
|
|
763
777
|
raise
|
toil/jobStores/aws/utils.py
CHANGED
|
@@ -17,13 +17,12 @@ import logging
|
|
|
17
17
|
import os
|
|
18
18
|
import types
|
|
19
19
|
from ssl import SSLError
|
|
20
|
-
from typing import Optional, cast
|
|
20
|
+
from typing import Optional, cast, TYPE_CHECKING
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
|
-
from boto.exception import
|
|
23
|
+
from boto.exception import SDBResponseError
|
|
24
24
|
from botocore.client import Config
|
|
25
25
|
from botocore.exceptions import ClientError
|
|
26
|
-
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
27
26
|
|
|
28
27
|
from toil.lib.aws import session
|
|
29
28
|
from toil.lib.aws.utils import connection_reset, get_bucket_region
|
|
@@ -36,6 +35,8 @@ from toil.lib.retry import (DEFAULT_DELAYS,
|
|
|
36
35
|
get_error_status,
|
|
37
36
|
old_retry,
|
|
38
37
|
retry)
|
|
38
|
+
if TYPE_CHECKING:
|
|
39
|
+
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
39
40
|
|
|
40
41
|
logger = logging.getLogger(__name__)
|
|
41
42
|
|
|
@@ -285,13 +286,12 @@ class ServerSideCopyProhibitedError(RuntimeError):
|
|
|
285
286
|
Raised when AWS refuses to perform a server-side copy between S3 keys, and
|
|
286
287
|
insists that you pay to download and upload the data yourself instead.
|
|
287
288
|
"""
|
|
288
|
-
pass
|
|
289
289
|
|
|
290
290
|
@retry(errors=[ErrorCondition(
|
|
291
291
|
error=ClientError,
|
|
292
292
|
error_codes=[404, 500, 502, 503, 504]
|
|
293
293
|
)])
|
|
294
|
-
def copyKeyMultipart(resource: S3ServiceResource,
|
|
294
|
+
def copyKeyMultipart(resource: "S3ServiceResource",
|
|
295
295
|
srcBucketName: str,
|
|
296
296
|
srcKeyName: str,
|
|
297
297
|
srcKeyVersion: str,
|
|
@@ -347,7 +347,7 @@ def copyKeyMultipart(resource: S3ServiceResource,
|
|
|
347
347
|
# not wherever the bucket virtual hostnames go.
|
|
348
348
|
source_region = get_bucket_region(srcBucketName)
|
|
349
349
|
source_client = cast(
|
|
350
|
-
S3Client,
|
|
350
|
+
"S3Client",
|
|
351
351
|
session.client(
|
|
352
352
|
's3',
|
|
353
353
|
region_name=source_region,
|