toil 5.12.0__py3-none-any.whl → 6.1.0a1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +18 -13
- toil/batchSystems/abstractBatchSystem.py +21 -10
- toil/batchSystems/abstractGridEngineBatchSystem.py +2 -2
- toil/batchSystems/awsBatch.py +14 -14
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/htcondor.py +0 -1
- toil/batchSystems/kubernetes.py +34 -31
- toil/batchSystems/local_support.py +3 -1
- toil/batchSystems/mesos/batchSystem.py +7 -7
- toil/batchSystems/options.py +32 -83
- toil/batchSystems/registry.py +104 -23
- toil/batchSystems/singleMachine.py +16 -13
- toil/batchSystems/slurm.py +3 -3
- toil/batchSystems/torque.py +0 -1
- toil/bus.py +6 -8
- toil/common.py +532 -743
- toil/cwl/__init__.py +28 -32
- toil/cwl/cwltoil.py +523 -520
- toil/cwl/utils.py +55 -10
- toil/fileStores/__init__.py +2 -2
- toil/fileStores/abstractFileStore.py +36 -11
- toil/fileStores/cachingFileStore.py +607 -530
- toil/fileStores/nonCachingFileStore.py +43 -10
- toil/job.py +140 -75
- toil/jobStores/abstractJobStore.py +147 -79
- toil/jobStores/aws/jobStore.py +23 -9
- toil/jobStores/aws/utils.py +1 -2
- toil/jobStores/fileJobStore.py +117 -19
- toil/jobStores/googleJobStore.py +16 -7
- toil/jobStores/utils.py +5 -6
- toil/leader.py +71 -43
- toil/lib/accelerators.py +10 -5
- toil/lib/aws/__init__.py +3 -14
- toil/lib/aws/ami.py +22 -9
- toil/lib/aws/iam.py +21 -13
- toil/lib/aws/session.py +2 -16
- toil/lib/aws/utils.py +4 -5
- toil/lib/compatibility.py +1 -1
- toil/lib/conversions.py +7 -3
- toil/lib/docker.py +22 -23
- toil/lib/ec2.py +10 -6
- toil/lib/ec2nodes.py +106 -100
- toil/lib/encryption/_nacl.py +2 -1
- toil/lib/generatedEC2Lists.py +325 -18
- toil/lib/io.py +21 -0
- toil/lib/misc.py +1 -1
- toil/lib/resources.py +1 -1
- toil/lib/threading.py +74 -26
- toil/options/common.py +738 -0
- toil/options/cwl.py +336 -0
- toil/options/wdl.py +32 -0
- toil/provisioners/abstractProvisioner.py +1 -4
- toil/provisioners/aws/__init__.py +3 -6
- toil/provisioners/aws/awsProvisioner.py +6 -0
- toil/provisioners/clusterScaler.py +3 -2
- toil/provisioners/gceProvisioner.py +2 -2
- toil/realtimeLogger.py +2 -1
- toil/resource.py +24 -18
- toil/server/app.py +2 -3
- toil/server/cli/wes_cwl_runner.py +4 -4
- toil/server/utils.py +1 -1
- toil/server/wes/abstract_backend.py +3 -2
- toil/server/wes/amazon_wes_utils.py +5 -4
- toil/server/wes/tasks.py +2 -3
- toil/server/wes/toil_backend.py +2 -10
- toil/server/wsgi_app.py +2 -0
- toil/serviceManager.py +12 -10
- toil/statsAndLogging.py +5 -1
- toil/test/__init__.py +29 -54
- toil/test/batchSystems/batchSystemTest.py +11 -111
- toil/test/batchSystems/test_slurm.py +3 -2
- toil/test/cwl/cwlTest.py +213 -90
- toil/test/cwl/glob_dir.cwl +15 -0
- toil/test/cwl/preemptible.cwl +21 -0
- toil/test/cwl/preemptible_expression.cwl +28 -0
- toil/test/cwl/revsort.cwl +1 -1
- toil/test/cwl/revsort2.cwl +1 -1
- toil/test/docs/scriptsTest.py +0 -1
- toil/test/jobStores/jobStoreTest.py +27 -16
- toil/test/lib/aws/test_iam.py +4 -14
- toil/test/lib/aws/test_utils.py +0 -3
- toil/test/lib/dockerTest.py +4 -4
- toil/test/lib/test_ec2.py +11 -16
- toil/test/mesos/helloWorld.py +4 -5
- toil/test/mesos/stress.py +1 -1
- toil/test/provisioners/aws/awsProvisionerTest.py +9 -5
- toil/test/provisioners/clusterScalerTest.py +6 -4
- toil/test/provisioners/clusterTest.py +14 -3
- toil/test/provisioners/gceProvisionerTest.py +0 -6
- toil/test/provisioners/restartScript.py +3 -2
- toil/test/server/serverTest.py +1 -1
- toil/test/sort/restart_sort.py +2 -1
- toil/test/sort/sort.py +2 -1
- toil/test/sort/sortTest.py +2 -13
- toil/test/src/autoDeploymentTest.py +45 -45
- toil/test/src/busTest.py +5 -5
- toil/test/src/checkpointTest.py +2 -2
- toil/test/src/deferredFunctionTest.py +1 -1
- toil/test/src/fileStoreTest.py +32 -16
- toil/test/src/helloWorldTest.py +1 -1
- toil/test/src/importExportFileTest.py +1 -1
- toil/test/src/jobDescriptionTest.py +2 -1
- toil/test/src/jobServiceTest.py +1 -1
- toil/test/src/jobTest.py +18 -18
- toil/test/src/miscTests.py +5 -3
- toil/test/src/promisedRequirementTest.py +3 -3
- toil/test/src/realtimeLoggerTest.py +1 -1
- toil/test/src/resourceTest.py +2 -2
- toil/test/src/restartDAGTest.py +1 -1
- toil/test/src/resumabilityTest.py +36 -2
- toil/test/src/retainTempDirTest.py +1 -1
- toil/test/src/systemTest.py +2 -2
- toil/test/src/toilContextManagerTest.py +2 -2
- toil/test/src/userDefinedJobArgTypeTest.py +1 -1
- toil/test/utils/toilDebugTest.py +98 -32
- toil/test/utils/toilKillTest.py +2 -2
- toil/test/utils/utilsTest.py +20 -0
- toil/test/wdl/wdltoil_test.py +148 -45
- toil/toilState.py +7 -6
- toil/utils/toilClean.py +1 -1
- toil/utils/toilConfig.py +36 -0
- toil/utils/toilDebugFile.py +60 -33
- toil/utils/toilDebugJob.py +39 -12
- toil/utils/toilDestroyCluster.py +1 -1
- toil/utils/toilKill.py +1 -1
- toil/utils/toilLaunchCluster.py +13 -2
- toil/utils/toilMain.py +3 -2
- toil/utils/toilRsyncCluster.py +1 -1
- toil/utils/toilSshCluster.py +1 -1
- toil/utils/toilStats.py +240 -143
- toil/utils/toilStatus.py +1 -4
- toil/version.py +11 -11
- toil/wdl/utils.py +2 -122
- toil/wdl/wdltoil.py +999 -386
- toil/worker.py +25 -31
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/METADATA +60 -53
- toil-6.1.0a1.dist-info/RECORD +237 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/WHEEL +1 -1
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/entry_points.txt +0 -1
- toil/batchSystems/parasol.py +0 -379
- toil/batchSystems/tes.py +0 -459
- toil/test/batchSystems/parasolTestSupport.py +0 -117
- toil/test/wdl/builtinTest.py +0 -506
- toil/test/wdl/conftest.py +0 -23
- toil/test/wdl/toilwdlTest.py +0 -522
- toil/wdl/toilwdl.py +0 -141
- toil/wdl/versions/dev.py +0 -107
- toil/wdl/versions/draft2.py +0 -980
- toil/wdl/versions/v1.py +0 -794
- toil/wdl/wdl_analysis.py +0 -116
- toil/wdl/wdl_functions.py +0 -997
- toil/wdl/wdl_synthesis.py +0 -1011
- toil/wdl/wdl_types.py +0 -243
- toil-5.12.0.dist-info/RECORD +0 -244
- /toil/{wdl/versions → options}/__init__.py +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/LICENSE +0 -0
- {toil-5.12.0.dist-info → toil-6.1.0a1.dist-info}/top_level.txt +0 -0
|
@@ -27,6 +27,7 @@ from typing import (IO,
|
|
|
27
27
|
Callable,
|
|
28
28
|
ContextManager,
|
|
29
29
|
Dict,
|
|
30
|
+
Iterable,
|
|
30
31
|
Iterator,
|
|
31
32
|
List,
|
|
32
33
|
Optional,
|
|
@@ -42,8 +43,8 @@ if sys.version_info >= (3, 8):
|
|
|
42
43
|
else:
|
|
43
44
|
from typing_extensions import Literal
|
|
44
45
|
|
|
45
|
-
from urllib.parse import ParseResult, urlparse
|
|
46
46
|
from urllib.error import HTTPError
|
|
47
|
+
from urllib.parse import ParseResult, urlparse
|
|
47
48
|
from urllib.request import urlopen
|
|
48
49
|
from uuid import uuid4
|
|
49
50
|
|
|
@@ -536,6 +537,40 @@ class AbstractJobStore(ABC):
|
|
|
536
537
|
executable = jobStoreFileID.executable
|
|
537
538
|
otherCls._write_to_url(readable, url, executable)
|
|
538
539
|
|
|
540
|
+
@classmethod
|
|
541
|
+
def url_exists(cls, src_uri: str) -> bool:
|
|
542
|
+
"""
|
|
543
|
+
Return True if the file at the given URI exists, and False otherwise.
|
|
544
|
+
|
|
545
|
+
:param src_uri: URL that points to a file or object in the storage
|
|
546
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
547
|
+
"""
|
|
548
|
+
parseResult = urlparse(src_uri)
|
|
549
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
550
|
+
return otherCls._url_exists(parseResult)
|
|
551
|
+
|
|
552
|
+
@classmethod
|
|
553
|
+
def get_size(cls, src_uri: str) -> Optional[int]:
|
|
554
|
+
"""
|
|
555
|
+
Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
|
|
556
|
+
|
|
557
|
+
:param src_uri: URL that points to a file or object in the storage
|
|
558
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
559
|
+
"""
|
|
560
|
+
parseResult = urlparse(src_uri)
|
|
561
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
562
|
+
return otherCls._get_size(parseResult)
|
|
563
|
+
|
|
564
|
+
@classmethod
|
|
565
|
+
def get_is_directory(cls, src_uri: str) -> bool:
|
|
566
|
+
"""
|
|
567
|
+
Return True if the thing at the given URL is a directory, and False if
|
|
568
|
+
it is a file. The URL may or may not end in '/'.
|
|
569
|
+
"""
|
|
570
|
+
parseResult = urlparse(src_uri)
|
|
571
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
572
|
+
return otherCls._get_is_directory(parseResult)
|
|
573
|
+
|
|
539
574
|
@classmethod
|
|
540
575
|
def list_url(cls, src_uri: str) -> List[str]:
|
|
541
576
|
"""
|
|
@@ -562,59 +597,61 @@ class AbstractJobStore(ABC):
|
|
|
562
597
|
return otherCls._list_url(parseResult)
|
|
563
598
|
|
|
564
599
|
@classmethod
|
|
565
|
-
def
|
|
566
|
-
"""
|
|
567
|
-
Return True if the thing at the given URL is a directory, and False if
|
|
568
|
-
it is a file. The URL may or may not end in '/'.
|
|
600
|
+
def read_from_url(cls, src_uri: str, writable: IO[bytes]) -> Tuple[int, bool]:
|
|
569
601
|
"""
|
|
570
|
-
|
|
571
|
-
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
572
|
-
return otherCls._get_is_directory(parseResult)
|
|
602
|
+
Read the given URL and write its content into the given writable stream.
|
|
573
603
|
|
|
574
|
-
|
|
575
|
-
@abstractmethod
|
|
576
|
-
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
577
|
-
"""
|
|
578
|
-
Return True if the thing at the given URL is a directory, and False if
|
|
579
|
-
it is a file or it is known not to exist. The URL may or may not end in
|
|
580
|
-
'/'.
|
|
604
|
+
Raises FileNotFoundError if the URL doesn't exist.
|
|
581
605
|
|
|
582
|
-
:
|
|
583
|
-
in the storage mechanism of a supported URL scheme e.g. a blob
|
|
584
|
-
in an AWS s3 bucket.
|
|
606
|
+
:return: The size of the file in bytes and whether the executable permission bit is set
|
|
585
607
|
"""
|
|
586
|
-
|
|
608
|
+
parseResult = urlparse(src_uri)
|
|
609
|
+
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
610
|
+
return otherCls._read_from_url(parseResult, writable)
|
|
587
611
|
|
|
588
612
|
@classmethod
|
|
589
|
-
def
|
|
613
|
+
def open_url(cls, src_uri: str) -> IO[bytes]:
|
|
590
614
|
"""
|
|
591
|
-
Read
|
|
615
|
+
Read from the given URI.
|
|
592
616
|
|
|
593
617
|
Raises FileNotFoundError if the URL doesn't exist.
|
|
594
618
|
|
|
595
|
-
|
|
596
|
-
|
|
619
|
+
Has a readable stream interface, unlike :meth:`read_from_url` which
|
|
620
|
+
takes a writable stream.
|
|
597
621
|
"""
|
|
598
622
|
parseResult = urlparse(src_uri)
|
|
599
623
|
otherCls = cls._findJobStoreForUrl(parseResult)
|
|
600
|
-
return otherCls.
|
|
624
|
+
return otherCls._open_url(parseResult)
|
|
601
625
|
|
|
602
626
|
@classmethod
|
|
603
|
-
@
|
|
604
|
-
def
|
|
605
|
-
|
|
627
|
+
@abstractmethod
|
|
628
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
629
|
+
"""
|
|
630
|
+
Return True if the item at the given URL exists, and Flase otherwise.
|
|
631
|
+
"""
|
|
632
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
606
633
|
|
|
607
634
|
@classmethod
|
|
608
635
|
@abstractmethod
|
|
609
|
-
def
|
|
636
|
+
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
610
637
|
"""
|
|
611
|
-
Get the size
|
|
638
|
+
Get the size of the object at the given URL, or None if it cannot be obtained.
|
|
639
|
+
"""
|
|
640
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
612
641
|
|
|
613
|
-
|
|
614
|
-
|
|
642
|
+
@classmethod
|
|
643
|
+
@abstractmethod
|
|
644
|
+
def _get_is_directory(cls, url: ParseResult) -> bool:
|
|
615
645
|
"""
|
|
616
|
-
|
|
646
|
+
Return True if the thing at the given URL is a directory, and False if
|
|
647
|
+
it is a file or it is known not to exist. The URL may or may not end in
|
|
648
|
+
'/'.
|
|
617
649
|
|
|
650
|
+
:param url: URL that points to a file or object, or directory or prefix,
|
|
651
|
+
in the storage mechanism of a supported URL scheme e.g. a blob
|
|
652
|
+
in an AWS s3 bucket.
|
|
653
|
+
"""
|
|
654
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
618
655
|
|
|
619
656
|
@classmethod
|
|
620
657
|
@abstractmethod
|
|
@@ -623,8 +660,6 @@ class AbstractJobStore(ABC):
|
|
|
623
660
|
Reads the contents of the object at the specified location and writes it to the given
|
|
624
661
|
writable stream.
|
|
625
662
|
|
|
626
|
-
Raises FileNotFoundError if the URL doesn't exist.
|
|
627
|
-
|
|
628
663
|
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
629
664
|
|
|
630
665
|
Raises FileNotFoundError if the thing at the URL is not found.
|
|
@@ -635,46 +670,58 @@ class AbstractJobStore(ABC):
|
|
|
635
670
|
:param IO[bytes] writable: a writable stream
|
|
636
671
|
|
|
637
672
|
:return: The size of the file in bytes and whether the executable permission bit is set
|
|
638
|
-
:rtype: Tuple[int, bool]
|
|
639
673
|
"""
|
|
640
|
-
raise NotImplementedError()
|
|
674
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
641
675
|
|
|
642
676
|
@classmethod
|
|
643
677
|
@abstractmethod
|
|
644
|
-
def
|
|
678
|
+
def _list_url(cls, url: ParseResult) -> List[str]:
|
|
645
679
|
"""
|
|
646
|
-
|
|
647
|
-
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
680
|
+
List the contents of the given URL, which may or may not end in '/'
|
|
648
681
|
|
|
649
|
-
|
|
682
|
+
Returns a list of URL components. Those that end in '/' are meant to be
|
|
683
|
+
directories, while those that do not are meant to be files.
|
|
650
684
|
|
|
651
|
-
:
|
|
685
|
+
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
652
686
|
|
|
653
|
-
:param ParseResult url: URL that points to a
|
|
654
|
-
|
|
687
|
+
:param ParseResult url: URL that points to a directory or prefix in the
|
|
688
|
+
storage mechanism of a supported URL scheme e.g. a prefix in an AWS s3
|
|
689
|
+
bucket.
|
|
655
690
|
|
|
656
|
-
:
|
|
691
|
+
:return: The children of the given URL, already URL-encoded if
|
|
692
|
+
appropriate. (If the URL is a bare path, no encoding is done.)
|
|
657
693
|
"""
|
|
658
|
-
raise NotImplementedError()
|
|
694
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
659
695
|
|
|
660
696
|
@classmethod
|
|
661
697
|
@abstractmethod
|
|
662
|
-
def
|
|
698
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
663
699
|
"""
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
Returns a list of URL components. Those that end in '/' are meant to be
|
|
667
|
-
directories, while those that do not are meant to be files.
|
|
700
|
+
Get a stream of the object at the specified location.
|
|
668
701
|
|
|
669
702
|
Refer to :func:`~AbstractJobStore.importFile` documentation for currently supported URL schemes.
|
|
670
703
|
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
704
|
+
Raises FileNotFoundError if the thing at the URL is not found.
|
|
705
|
+
"""
|
|
706
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
674
707
|
|
|
675
|
-
|
|
708
|
+
@classmethod
|
|
709
|
+
@abstractmethod
|
|
710
|
+
def _write_to_url(cls, readable: Union[IO[bytes], IO[str]], url: ParseResult, executable: bool = False) -> None:
|
|
676
711
|
"""
|
|
677
|
-
|
|
712
|
+
Reads the contents of the given readable stream and writes it to the object at the
|
|
713
|
+
specified location. Raises FileNotFoundError if the URL doesn't exist..
|
|
714
|
+
|
|
715
|
+
Refer to AbstractJobStore.importFile documentation for currently supported URL schemes.
|
|
716
|
+
|
|
717
|
+
:param Union[IO[bytes], IO[str]] readable: a readable stream
|
|
718
|
+
|
|
719
|
+
:param ParseResult url: URL that points to a file or object in the storage
|
|
720
|
+
mechanism of a supported URL scheme e.g. a blob in an AWS s3 bucket.
|
|
721
|
+
|
|
722
|
+
:param bool executable: determines if the file has executable permissions
|
|
723
|
+
"""
|
|
724
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
678
725
|
|
|
679
726
|
@classmethod
|
|
680
727
|
@abstractmethod
|
|
@@ -690,7 +737,7 @@ class AbstractJobStore(ABC):
|
|
|
690
737
|
|
|
691
738
|
:return bool: returns true if the cls supports the URL
|
|
692
739
|
"""
|
|
693
|
-
raise NotImplementedError()
|
|
740
|
+
raise NotImplementedError(f"No implementation for {url}")
|
|
694
741
|
|
|
695
742
|
@abstractmethod
|
|
696
743
|
def destroy(self) -> None:
|
|
@@ -794,6 +841,10 @@ class AbstractJobStore(ABC):
|
|
|
794
841
|
for service_jobstore_id in root_job_description.services:
|
|
795
842
|
if haveJob(service_jobstore_id):
|
|
796
843
|
reachable_from_root.add(service_jobstore_id)
|
|
844
|
+
for merged_jobstore_id in root_job_description.merged_jobs:
|
|
845
|
+
# Keep merged-in jobs around themselves, but don't bother
|
|
846
|
+
# exploring them, since we took their successors.
|
|
847
|
+
reachable_from_root.add(merged_jobstore_id)
|
|
797
848
|
|
|
798
849
|
# Unprocessed means it might have successor jobs we need to add.
|
|
799
850
|
unprocessed_job_descriptions = [root_job_description]
|
|
@@ -815,6 +866,10 @@ class AbstractJobStore(ABC):
|
|
|
815
866
|
reachable_from_root.add(service_jobstore_id)
|
|
816
867
|
|
|
817
868
|
new_job_descriptions_to_process.append(successor_job_description)
|
|
869
|
+
for merged_jobstore_id in job_description.merged_jobs:
|
|
870
|
+
# Keep merged-in jobs around themselves, but don't bother
|
|
871
|
+
# exploring them, since we took their successors.
|
|
872
|
+
reachable_from_root.add(merged_jobstore_id)
|
|
818
873
|
unprocessed_job_descriptions = new_job_descriptions_to_process
|
|
819
874
|
|
|
820
875
|
logger.debug(f"{len(reachable_from_root)} jobs reachable from root.")
|
|
@@ -824,8 +879,8 @@ class AbstractJobStore(ABC):
|
|
|
824
879
|
|
|
825
880
|
# Cleanup jobs that are not reachable from the root, and therefore orphaned
|
|
826
881
|
# TODO: Avoid reiterating reachable_from_root (which may be very large)
|
|
827
|
-
|
|
828
|
-
for jobDescription in
|
|
882
|
+
unreachable = [x for x in getJobDescriptions() if x.jobStoreID not in reachable_from_root]
|
|
883
|
+
for jobDescription in unreachable:
|
|
829
884
|
# clean up any associated files before deletion
|
|
830
885
|
for fileID in jobDescription.filesToDelete:
|
|
831
886
|
# Delete any files that should already be deleted
|
|
@@ -1688,6 +1743,16 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1688
1743
|
def _supports_url(cls, url: ParseResult, export: bool = False) -> bool:
|
|
1689
1744
|
return url.scheme.lower() in ('http', 'https', 'ftp') and not export
|
|
1690
1745
|
|
|
1746
|
+
@classmethod
|
|
1747
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
1748
|
+
try:
|
|
1749
|
+
# TODO: Figure out how to HEAD instead of this.
|
|
1750
|
+
with cls._open_url(url):
|
|
1751
|
+
return True
|
|
1752
|
+
except:
|
|
1753
|
+
pass
|
|
1754
|
+
return False
|
|
1755
|
+
|
|
1691
1756
|
@classmethod
|
|
1692
1757
|
@retry(
|
|
1693
1758
|
errors=[
|
|
@@ -1695,7 +1760,7 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1695
1760
|
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1696
1761
|
]
|
|
1697
1762
|
)
|
|
1698
|
-
def
|
|
1763
|
+
def _get_size(cls, url: ParseResult) -> Optional[int]:
|
|
1699
1764
|
if url.scheme.lower() == 'ftp':
|
|
1700
1765
|
return None
|
|
1701
1766
|
with closing(urlopen(url.geturl())) as readable:
|
|
@@ -1703,6 +1768,27 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1703
1768
|
size = readable.info().get('content-length')
|
|
1704
1769
|
return int(size) if size is not None else None
|
|
1705
1770
|
|
|
1771
|
+
@classmethod
|
|
1772
|
+
def _read_from_url(
|
|
1773
|
+
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1774
|
+
) -> Tuple[int, bool]:
|
|
1775
|
+
# We can't actually retry after we start writing.
|
|
1776
|
+
# TODO: Implement retry with byte range requests
|
|
1777
|
+
with cls._open_url(url) as readable:
|
|
1778
|
+
# Make something to count the bytes we get
|
|
1779
|
+
# We need to put the actual count in a container so our
|
|
1780
|
+
# nested function can modify it without creating its own
|
|
1781
|
+
# local with the same name.
|
|
1782
|
+
size = [0]
|
|
1783
|
+
def count(l: int) -> None:
|
|
1784
|
+
size[0] += l
|
|
1785
|
+
counter = WriteWatchingStream(writable)
|
|
1786
|
+
counter.onWrite(count)
|
|
1787
|
+
|
|
1788
|
+
# Do the download
|
|
1789
|
+
shutil.copyfileobj(readable, counter)
|
|
1790
|
+
return size[0], False
|
|
1791
|
+
|
|
1706
1792
|
@classmethod
|
|
1707
1793
|
@retry(
|
|
1708
1794
|
errors=[
|
|
@@ -1710,27 +1796,9 @@ class JobStoreSupport(AbstractJobStore, metaclass=ABCMeta):
|
|
|
1710
1796
|
ErrorCondition(error=HTTPError, error_codes=[408, 500, 503]),
|
|
1711
1797
|
]
|
|
1712
1798
|
)
|
|
1713
|
-
def
|
|
1714
|
-
cls, url: ParseResult, writable: Union[IO[bytes], IO[str]]
|
|
1715
|
-
) -> Tuple[int, bool]:
|
|
1716
|
-
# We can only retry on errors that happen as responses to the request.
|
|
1717
|
-
# If we start getting file data, and the connection drops, we fail.
|
|
1718
|
-
# So we don't have to worry about writing the start of the file twice.
|
|
1799
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
1719
1800
|
try:
|
|
1720
|
-
|
|
1721
|
-
# Make something to count the bytes we get
|
|
1722
|
-
# We need to put the actual count in a container so our
|
|
1723
|
-
# nested function can modify it without creating its own
|
|
1724
|
-
# local with the same name.
|
|
1725
|
-
size = [0]
|
|
1726
|
-
def count(l: int) -> None:
|
|
1727
|
-
size[0] += l
|
|
1728
|
-
counter = WriteWatchingStream(writable)
|
|
1729
|
-
counter.onWrite(count)
|
|
1730
|
-
|
|
1731
|
-
# Do the download
|
|
1732
|
-
shutil.copyfileobj(readable, counter)
|
|
1733
|
-
return size[0], False
|
|
1801
|
+
return cast(IO[bytes], closing(urlopen(url.geturl())))
|
|
1734
1802
|
except HTTPError as e:
|
|
1735
1803
|
if e.code == 404:
|
|
1736
1804
|
# Translate into a FileNotFoundError for detecting
|
toil/jobStores/aws/jobStore.py
CHANGED
|
@@ -13,7 +13,6 @@
|
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import hashlib
|
|
15
15
|
import itertools
|
|
16
|
-
import json
|
|
17
16
|
import logging
|
|
18
17
|
import os
|
|
19
18
|
import pickle
|
|
@@ -21,12 +20,10 @@ import re
|
|
|
21
20
|
import reprlib
|
|
22
21
|
import stat
|
|
23
22
|
import time
|
|
24
|
-
import urllib.error
|
|
25
|
-
import urllib.request
|
|
26
23
|
import uuid
|
|
27
24
|
from contextlib import contextmanager
|
|
28
25
|
from io import BytesIO
|
|
29
|
-
from typing import List, Optional
|
|
26
|
+
from typing import List, Optional, IO
|
|
30
27
|
from urllib.parse import ParseResult, parse_qs, urlencode, urlsplit, urlunsplit
|
|
31
28
|
|
|
32
29
|
import boto.s3.connection
|
|
@@ -35,7 +32,6 @@ from boto.exception import SDBResponseError
|
|
|
35
32
|
from botocore.exceptions import ClientError
|
|
36
33
|
|
|
37
34
|
import toil.lib.encryption as encryption
|
|
38
|
-
from toil.lib.aws import build_tag_dict_from_env
|
|
39
35
|
from toil.fileStores import FileID
|
|
40
36
|
from toil.jobStores.abstractJobStore import (AbstractJobStore,
|
|
41
37
|
ConcurrentFileModificationException,
|
|
@@ -56,6 +52,7 @@ from toil.jobStores.aws.utils import (SDBHelper,
|
|
|
56
52
|
from toil.jobStores.utils import (ReadablePipe,
|
|
57
53
|
ReadableTransformingPipe,
|
|
58
54
|
WritablePipe)
|
|
55
|
+
from toil.lib.aws import build_tag_dict_from_env
|
|
59
56
|
from toil.lib.aws.session import establish_boto3_session
|
|
60
57
|
from toil.lib.aws.utils import (create_s3_bucket,
|
|
61
58
|
enable_public_objects,
|
|
@@ -450,7 +447,6 @@ class AWSJobStore(AbstractJobStore):
|
|
|
450
447
|
except ServerSideCopyProhibitedError:
|
|
451
448
|
# AWS refuses to do this copy for us
|
|
452
449
|
logger.warning("Falling back to copying via the local machine. This could get expensive!")
|
|
453
|
-
pass
|
|
454
450
|
|
|
455
451
|
# copy if exception
|
|
456
452
|
return super()._import_file(otherCls, uri, shared_file_name=shared_file_name)
|
|
@@ -465,12 +461,21 @@ class AWSJobStore(AbstractJobStore):
|
|
|
465
461
|
except ServerSideCopyProhibitedError:
|
|
466
462
|
# AWS refuses to do this copy for us
|
|
467
463
|
logger.warning("Falling back to copying via the local machine. This could get expensive!")
|
|
468
|
-
pass
|
|
469
464
|
else:
|
|
470
465
|
super()._default_export_file(otherCls, file_id, uri)
|
|
471
466
|
|
|
472
467
|
@classmethod
|
|
473
|
-
def
|
|
468
|
+
def _url_exists(cls, url: ParseResult) -> bool:
|
|
469
|
+
try:
|
|
470
|
+
get_object_for_url(url, existing=True)
|
|
471
|
+
return True
|
|
472
|
+
except FileNotFoundError:
|
|
473
|
+
# Not a file
|
|
474
|
+
# Might be a directory.
|
|
475
|
+
return cls._get_is_directory(url)
|
|
476
|
+
|
|
477
|
+
@classmethod
|
|
478
|
+
def _get_size(cls, url):
|
|
474
479
|
return get_object_for_url(url, existing=True).content_length
|
|
475
480
|
|
|
476
481
|
@classmethod
|
|
@@ -482,6 +487,15 @@ class AWSJobStore(AbstractJobStore):
|
|
|
482
487
|
False # executable bit is always False
|
|
483
488
|
)
|
|
484
489
|
|
|
490
|
+
@classmethod
|
|
491
|
+
def _open_url(cls, url: ParseResult) -> IO[bytes]:
|
|
492
|
+
src_obj = get_object_for_url(url, existing=True)
|
|
493
|
+
response = src_obj.get()
|
|
494
|
+
# We should get back a response with a stream in 'Body'
|
|
495
|
+
if 'Body' not in response:
|
|
496
|
+
raise RuntimeError(f"Could not fetch body stream for {url}")
|
|
497
|
+
return response['Body']
|
|
498
|
+
|
|
485
499
|
@classmethod
|
|
486
500
|
def _write_to_url(cls, readable, url, executable=False):
|
|
487
501
|
dstObj = get_object_for_url(url)
|
|
@@ -757,7 +771,7 @@ class AWSJobStore(AbstractJobStore):
|
|
|
757
771
|
bucket_tagging.put(Tagging={'TagSet': flat_tags})
|
|
758
772
|
|
|
759
773
|
# Configure bucket so that we can make objects in
|
|
760
|
-
# it public, which was the historical default.
|
|
774
|
+
# it public, which was the historical default.
|
|
761
775
|
enable_public_objects(bucket_name)
|
|
762
776
|
elif block:
|
|
763
777
|
raise
|
toil/jobStores/aws/utils.py
CHANGED
|
@@ -20,7 +20,7 @@ from ssl import SSLError
|
|
|
20
20
|
from typing import Optional, cast
|
|
21
21
|
|
|
22
22
|
from boto3.s3.transfer import TransferConfig
|
|
23
|
-
from boto.exception import
|
|
23
|
+
from boto.exception import SDBResponseError
|
|
24
24
|
from botocore.client import Config
|
|
25
25
|
from botocore.exceptions import ClientError
|
|
26
26
|
from mypy_boto3_s3 import S3Client, S3ServiceResource
|
|
@@ -285,7 +285,6 @@ class ServerSideCopyProhibitedError(RuntimeError):
|
|
|
285
285
|
Raised when AWS refuses to perform a server-side copy between S3 keys, and
|
|
286
286
|
insists that you pay to download and upload the data yourself instead.
|
|
287
287
|
"""
|
|
288
|
-
pass
|
|
289
288
|
|
|
290
289
|
@retry(errors=[ErrorCondition(
|
|
291
290
|
error=ClientError,
|