PyPI - toil - Versions diffs - 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl - Mend

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

toil/__init__.py +4 -39
toil/batchSystems/abstractBatchSystem.py +1 -1
toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
toil/batchSystems/awsBatch.py +1 -1
toil/batchSystems/cleanup_support.py +1 -1
toil/batchSystems/kubernetes.py +53 -7
toil/batchSystems/local_support.py +1 -1
toil/batchSystems/mesos/batchSystem.py +13 -8
toil/batchSystems/mesos/test/__init__.py +3 -2
toil/batchSystems/options.py +1 -0
toil/batchSystems/singleMachine.py +1 -1
toil/batchSystems/slurm.py +229 -84
toil/bus.py +5 -3
toil/common.py +198 -54
toil/cwl/cwltoil.py +32 -11
toil/job.py +110 -86
toil/jobStores/abstractJobStore.py +24 -3
toil/jobStores/aws/jobStore.py +46 -10
toil/jobStores/fileJobStore.py +25 -1
toil/jobStores/googleJobStore.py +104 -30
toil/leader.py +9 -0
toil/lib/accelerators.py +3 -1
toil/lib/aws/session.py +14 -3
toil/lib/aws/utils.py +92 -35
toil/lib/aws/utils.py.orig +504 -0
toil/lib/bioio.py +1 -1
toil/lib/docker.py +252 -91
toil/lib/dockstore.py +387 -0
toil/lib/ec2nodes.py +3 -2
toil/lib/exceptions.py +5 -3
toil/lib/history.py +1345 -0
toil/lib/history_submission.py +695 -0
toil/lib/io.py +56 -23
toil/lib/misc.py +25 -1
toil/lib/resources.py +2 -1
toil/lib/retry.py +10 -10
toil/lib/threading.py +11 -10
toil/lib/{integration.py → trs.py} +95 -46
toil/lib/web.py +38 -0
toil/options/common.py +25 -2
toil/options/cwl.py +10 -0
toil/options/wdl.py +11 -0
toil/provisioners/gceProvisioner.py +4 -4
toil/server/api_spec/LICENSE +201 -0
toil/server/api_spec/README.rst +5 -0
toil/server/cli/wes_cwl_runner.py +5 -4
toil/server/utils.py +2 -3
toil/statsAndLogging.py +35 -1
toil/test/__init__.py +275 -115
toil/test/batchSystems/batchSystemTest.py +227 -205
toil/test/batchSystems/test_slurm.py +199 -2
toil/test/cactus/pestis.tar.gz +0 -0
toil/test/conftest.py +7 -0
toil/test/cwl/2.fasta +11 -0
toil/test/cwl/2.fastq +12 -0
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +1015 -780
toil/test/cwl/directory/directory/file.txt +15 -0
toil/test/cwl/download_directory_file.json +4 -0
toil/test/cwl/download_directory_s3.json +4 -0
toil/test/cwl/download_file.json +6 -0
toil/test/cwl/download_http.json +6 -0
toil/test/cwl/download_https.json +6 -0
toil/test/cwl/download_s3.json +6 -0
toil/test/cwl/download_subdirectory_file.json +5 -0
toil/test/cwl/download_subdirectory_s3.json +5 -0
toil/test/cwl/empty.json +1 -0
toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
toil/test/cwl/optional-file-exists.json +6 -0
toil/test/cwl/optional-file-missing.json +6 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/preemptible_expression.json +1 -0
toil/test/cwl/revsort-job-missing.json +6 -0
toil/test/cwl/revsort-job.json +6 -0
toil/test/cwl/s3_secondary_file.json +16 -0
toil/test/cwl/seqtk_seq_job.json +6 -0
toil/test/cwl/stream.json +6 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
toil/test/cwl/whale.txt +16 -0
toil/test/docs/scripts/example_alwaysfail.py +38 -0
toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
toil/test/docs/scripts/tutorial_arguments.py +23 -0
toil/test/docs/scripts/tutorial_debugging.patch +12 -0
toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
toil/test/docs/scripts/tutorial_docker.py +20 -0
toil/test/docs/scripts/tutorial_dynamic.py +24 -0
toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
toil/test/docs/scripts/tutorial_helloworld.py +15 -0
toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
toil/test/docs/scripts/tutorial_managing.py +29 -0
toil/test/docs/scripts/tutorial_managing2.py +56 -0
toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
toil/test/docs/scripts/tutorial_promises.py +25 -0
toil/test/docs/scripts/tutorial_promises2.py +30 -0
toil/test/docs/scripts/tutorial_quickstart.py +22 -0
toil/test/docs/scripts/tutorial_requirements.py +44 -0
toil/test/docs/scripts/tutorial_services.py +45 -0
toil/test/docs/scripts/tutorial_staging.py +45 -0
toil/test/docs/scripts/tutorial_stats.py +64 -0
toil/test/lib/aws/test_iam.py +3 -1
toil/test/lib/dockerTest.py +205 -122
toil/test/lib/test_history.py +236 -0
toil/test/lib/test_trs.py +161 -0
toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
toil/test/provisioners/clusterTest.py +4 -4
toil/test/provisioners/gceProvisionerTest.py +16 -14
toil/test/sort/sort.py +4 -1
toil/test/src/busTest.py +17 -17
toil/test/src/deferredFunctionTest.py +145 -132
toil/test/src/importExportFileTest.py +71 -63
toil/test/src/jobEncapsulationTest.py +27 -28
toil/test/src/jobServiceTest.py +149 -133
toil/test/src/jobTest.py +219 -211
toil/test/src/miscTests.py +66 -60
toil/test/src/promisedRequirementTest.py +163 -169
toil/test/src/regularLogTest.py +24 -24
toil/test/src/resourceTest.py +82 -76
toil/test/src/restartDAGTest.py +51 -47
toil/test/src/resumabilityTest.py +24 -19
toil/test/src/retainTempDirTest.py +60 -57
toil/test/src/systemTest.py +17 -13
toil/test/src/threadingTest.py +29 -32
toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
toil/test/utils/toilDebugTest.py +117 -102
toil/test/utils/toilKillTest.py +54 -53
toil/test/utils/utilsTest.py +303 -229
toil/test/wdl/lint_error.wdl +9 -0
toil/test/wdl/md5sum/empty_file.json +1 -0
toil/test/wdl/md5sum/md5sum-gs.json +1 -0
toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
toil/test/wdl/md5sum/md5sum.input +1 -0
toil/test/wdl/md5sum/md5sum.json +1 -0
toil/test/wdl/md5sum/md5sum.wdl +25 -0
toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
toil/test/wdl/standard_library/as_map.json +16 -0
toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
toil/test/wdl/standard_library/as_pairs.json +7 -0
toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
toil/test/wdl/standard_library/ceil.json +3 -0
toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
toil/test/wdl/standard_library/collect_by_key.json +1 -0
toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
toil/test/wdl/standard_library/cross.json +11 -0
toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
toil/test/wdl/standard_library/flatten.json +7 -0
toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
toil/test/wdl/standard_library/floor.json +3 -0
toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
toil/test/wdl/standard_library/keys.json +8 -0
toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
toil/test/wdl/standard_library/length.json +7 -0
toil/test/wdl/standard_library/length_as_input.wdl +16 -0
toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
toil/test/wdl/standard_library/length_invalid.json +3 -0
toil/test/wdl/standard_library/range.json +3 -0
toil/test/wdl/standard_library/range_0.json +3 -0
toil/test/wdl/standard_library/range_as_input.wdl +17 -0
toil/test/wdl/standard_library/range_invalid.json +3 -0
toil/test/wdl/standard_library/read_boolean.json +3 -0
toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_float.json +3 -0
toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_int.json +3 -0
toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_json.json +3 -0
toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_lines.json +3 -0
toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_map.json +3 -0
toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_string.json +3 -0
toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_tsv.json +3 -0
toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
toil/test/wdl/standard_library/round.json +3 -0
toil/test/wdl/standard_library/round_as_command.wdl +16 -0
toil/test/wdl/standard_library/round_as_input.wdl +16 -0
toil/test/wdl/standard_library/size.json +3 -0
toil/test/wdl/standard_library/size_as_command.wdl +17 -0
toil/test/wdl/standard_library/size_as_output.wdl +36 -0
toil/test/wdl/standard_library/stderr.json +3 -0
toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
toil/test/wdl/standard_library/stdout.json +3 -0
toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
toil/test/wdl/standard_library/sub.json +3 -0
toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
toil/test/wdl/standard_library/transpose.json +6 -0
toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
toil/test/wdl/standard_library/write_json.json +6 -0
toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_lines.json +7 -0
toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_map.json +6 -0
toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_tsv.json +6 -0
toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
toil/test/wdl/standard_library/zip.json +12 -0
toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
toil/test/wdl/test.csv +3 -0
toil/test/wdl/test.tsv +3 -0
toil/test/wdl/testfiles/croo.wdl +38 -0
toil/test/wdl/testfiles/drop_files.wdl +62 -0
toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
toil/test/wdl/testfiles/empty.txt +0 -0
toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
toil/test/wdl/testfiles/random.wdl +66 -0
toil/test/wdl/testfiles/string_file_coercion.json +1 -0
toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
toil/test/wdl/testfiles/test.json +4 -0
toil/test/wdl/testfiles/test_boolean.txt +1 -0
toil/test/wdl/testfiles/test_float.txt +1 -0
toil/test/wdl/testfiles/test_int.txt +1 -0
toil/test/wdl/testfiles/test_lines.txt +5 -0
toil/test/wdl/testfiles/test_map.txt +2 -0
toil/test/wdl/testfiles/test_string.txt +1 -0
toil/test/wdl/testfiles/url_to_file.wdl +13 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
toil/test/wdl/testfiles/vocab.json +1 -0
toil/test/wdl/testfiles/vocab.wdl +66 -0
toil/test/wdl/testfiles/wait.wdl +34 -0
toil/test/wdl/wdl_specification/type_pair.json +23 -0
toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
toil/test/wdl/wdl_specification/v1_spec.json +1 -0
toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
toil/test/wdl/wdltoil_test.py +681 -408
toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
toil/version.py +10 -10
toil/wdl/wdltoil.py +350 -123
toil/worker.py +113 -33
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
toil-8.2.0.dist-info/RECORD +439 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
toil/test/lib/test_integration.py +0 -104
toil-8.0.0.dist-info/RECORD +0 -253
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0

toil/jobStores/googleJobStore.py CHANGED Viewed

@@ -20,8 +20,8 @@ import uuid
 from contextlib import contextmanager
 from functools import wraps
 from io import BytesIO
-from typing import IO, Optional
-from urllib.parse import ParseResult
+from typing import Any, IO, Iterator, Optional
+from urllib.parse import ParseResult, urlunparse
 from google.api_core.exceptions import (
     GoogleAPICallError,
@@ -90,6 +90,46 @@ def google_retry(f):
     return wrapper
+@contextmanager
+def permission_error_reporter(url: ParseResult, notes: str) -> Iterator[None]:
+    """
+    Detect and usefully report permission errors.
+    If we fall back to anonymous credentials, but they don't have permission
+    for something, the Google Cloud Storage module will try to refresh them
+    behind the scenes. Then it will complain::
+        <class 'google.auth.exceptions.InvalidOperation'>: Anonymous credentials cannot be refreshed.
+    We need to detect this and report that the real problem is that the user
+    has not set up any credentials. When you try to make the client
+    non-anonymously and don't have credentials set up, you get a nice error
+    from Google::
+        google.auth.exceptions.DefaultCredentialsError: Your default credentials were not found. To set up Application Default Credentials, see https://cloud.google.com/docs/authentication/external/set-up-adc for more information.
+    But we swallow that when we fall back to anonymous access.
+    So we take the URL and any notes from client setup here, and if something
+    goes wrong that looks like a permission problem we complain with the notes
+    attached.
+    """
+    try:
+        yield
+    except exceptions.InvalidOperation as e:
+        if "Anonymous credentials cannot be refreshed" in str(e):
+            raise RuntimeError(
+                "Google Storage tried to refresh anonymous credentials. "
+                "Are you sure you have set up your Google Account login "
+                "for applications with permission to access "
+                f"{urlunparse(url)}? "
+                "Maybe try `gcloud auth application-default login`? "
+                f"Client setup said: {notes}"
+            ) from e
+        else:
+            raise
 class GoogleJobStore(AbstractJobStore):
@@ -117,10 +157,10 @@ class GoogleJobStore(AbstractJobStore):
         self.readStatsBaseID = self.statsReadPrefix + self.statsBaseID
         self.sseKey = None
-        self.storageClient = self.create_client()
+        self.storageClient, self.auth_notes = self.create_client()
     @classmethod
-    def create_client(cls) -> storage.Client:
+    def create_client(cls) -> tuple[storage.Client, str]:
         """
         Produce a client for Google Sotrage with the highest level of access we can get.
@@ -128,8 +168,28 @@ class GoogleJobStore(AbstractJobStore):
         Google Storage module's behavior.
         Warn if GOOGLE_APPLICATION_CREDENTIALS is set but not actually present.
+        :returns: the client, and any notes about why it might not have permissions.
         """
+        notes: list[str] = []
+        def add_note(message: str, *args: Any, warn: bool = False) -> None:
+            """
+            Add and possibly warn with a note about the client permissions.
+            """
+            note = message % args
+            if warn:
+                log.warning(note)
+            notes.append(note)
+        def compile_notes() -> str:
+            """
+            Make one string explainign why we might not have expected permissions.
+            """
+            if notes:
+                return f"Google authentication had {len(notes)} potential issues: {'; '.join(notes)}"
+            else:
+                return "Google authentication appeared successful."
         # Determine if we have an override environment variable for our credentials.
         # We get the path to check existence, but Google Storage works out what
         # to use later by looking at the environment again.
@@ -139,38 +199,42 @@ class GoogleJobStore(AbstractJobStore):
         if credentials_path is not None and not os.path.exists(credentials_path):
             # If the file is missing, complain.
             # This variable holds a file name and not any sensitive data itself.
-            log.warning(
+            add_note(
                 "File '%s' from GOOGLE_APPLICATION_CREDENTIALS is unavailable! "
                 "We may not be able to authenticate!",
                 credentials_path,
+                warn=True
             )
         if credentials_path is None and os.path.exists(cls.nodeServiceAccountJson):
             try:
-                # load credentials from a particular file on GCE nodes if an override path is not set
+                # load credentials from a particular file on GCE nodes if an
+                # override path is not set
                 return storage.Client.from_service_account_json(
                     cls.nodeServiceAccountJson
-                )
+                ), compile_notes()
             except OSError:
                 # Probably we don't have permission to use the file.
-                log.warning(
+                add_note(
                     "File '%s' exists but didn't work to authenticate!",
                     cls.nodeServiceAccountJson,
+                    warn=True
                 )
         # Either a filename is specified, or our fallback file isn't there.
         try:
             # See if Google can work out how to authenticate.
-            return storage.Client()
-        except (DefaultCredentialsError, OSError):
+            return storage.Client(), compile_notes()
+        except (DefaultCredentialsError, OSError) as e:
             # Depending on which Google codepath or module version (???)
             # realizes we have no credentials, we can get an EnvironemntError,
             # or the new DefaultCredentialsError we are supposedly specced to
             # get.
+            add_note("Could not make authenticated client: %s", e)
             # Google can't find credentials, fall back to being anonymous.
             # This is likely to happen all the time so don't warn.
-            return storage.Client.create_anonymous_client()
+            return storage.Client.create_anonymous_client(), compile_notes()
     @google_retry
     def initialize(self, config=None):
@@ -406,19 +470,20 @@ class GoogleJobStore(AbstractJobStore):
     @classmethod
     @google_retry
-    def _get_blob_from_url(cls, url, exists=False):
+    def _get_blob_from_url(cls, client: storage.Client, url: ParseResult, exists: bool = False) -> storage.blob.Blob:
         """
         Gets the blob specified by the url.
         caution: makes no api request. blob may not ACTUALLY exist
-        :param urlparse.ParseResult url: the URL
+        :param client: The Google Sotrage client to use to connect with.
-        :param bool exists: if True, then syncs local blob object with cloud
+        :param url: the URL
+        :param exists: if True, then syncs local blob object with cloud
         and raises exceptions if it doesn't exist remotely
         :return: the blob requested
-        :rtype: :class:`~google.cloud.storage.blob.Blob`
         """
         bucketName = url.netloc
         fileName = url.path
@@ -427,8 +492,7 @@ class GoogleJobStore(AbstractJobStore):
         if fileName.startswith("/"):
             fileName = fileName[1:]
-        storageClient = cls.create_client()
-        bucket = storageClient.bucket(bucket_name=bucketName)
+        bucket = client.bucket(bucket_name=bucketName)
         blob = bucket.blob(compat_bytes(fileName))
         if exists:
@@ -440,26 +504,34 @@ class GoogleJobStore(AbstractJobStore):
     @classmethod
     def _url_exists(cls, url: ParseResult) -> bool:
-        try:
-            cls._get_blob_from_url(url, exists=True)
-            return True
-        except NoSuchFileException:
-            return False
+        client, auth_notes = cls.create_client()
+        with permission_error_reporter(url, auth_notes):
+            try:
+                cls._get_blob_from_url(client, url, exists=True)
+                return True
+            except NoSuchFileException:
+                return False
     @classmethod
     def _get_size(cls, url):
-        return cls._get_blob_from_url(url, exists=True).size
+        client, auth_notes = cls.create_client()
+        with permission_error_reporter(url, auth_notes):
+            return cls._get_blob_from_url(client, url, exists=True).size
     @classmethod
     def _read_from_url(cls, url, writable):
-        blob = cls._get_blob_from_url(url, exists=True)
-        blob.download_to_file(writable)
-        return blob.size, False
+        client, auth_notes = cls.create_client()
+        with permission_error_reporter(url, auth_notes):
+            blob = cls._get_blob_from_url(client, url, exists=True)
+            blob.download_to_file(writable)
+            return blob.size, False
     @classmethod
     def _open_url(cls, url: ParseResult) -> IO[bytes]:
-        blob = cls._get_blob_from_url(url, exists=True)
-        return blob.open("rb")
+        client, auth_notes = cls.create_client()
+        with permission_error_reporter(url, auth_notes):
+            blob = cls._get_blob_from_url(client, url, exists=True)
+            return blob.open("rb")
     @classmethod
     def _supports_url(cls, url, export=False):
@@ -467,8 +539,10 @@ class GoogleJobStore(AbstractJobStore):
     @classmethod
     def _write_to_url(cls, readable: bytes, url: str, executable: bool = False) -> None:
-        blob = cls._get_blob_from_url(url)
-        blob.upload_from_file(readable)
+        client, auth_notes = cls.create_client()
+        with permission_error_reporter(url, auth_notes):
+            blob = cls._get_blob_from_url(client, url)
+            blob.upload_from_file(readable)
     @classmethod
     def _list_url(cls, url: ParseResult) -> list[str]:

toil/leader.py CHANGED Viewed

@@ -1780,6 +1780,15 @@ class Leader:
                 self._updatePredecessorStatus(job_id)
+        if self.config.stop_on_first_failure:
+            # We want to stop the workflow on the first complete failure of a job.
+            logger.error("Stopping workflow on first failure, which was: %s", job_desc)
+            raise FailedJobsException(
+                self.jobStore,
+                [self.toilState.get_job(job_id)],
+                exit_code=self.recommended_fail_exit_code,
+            )
     def _updatePredecessorStatus(self, jobStoreID: str) -> None:
         """Update status of predecessors for finished (possibly failed) successor job."""
         if jobStoreID in self.toilState.service_to_client:

toil/lib/accelerators.py CHANGED Viewed

@@ -103,7 +103,9 @@ def have_working_nvidia_docker_runtime() -> bool:
                 "all",
                 "ubuntu:20.04",
                 "nvidia-smi",
-            ]
+            ],
+            stdout=subprocess.DEVNULL,
+            stderr=subprocess.DEVNULL
         )
     except (
         FileNotFoundError,

toil/lib/aws/session.py CHANGED Viewed

@@ -35,6 +35,9 @@ if TYPE_CHECKING:
 logger = logging.getLogger(__name__)
+# You can pass config=ANONYMOUS_CONFIG to make anonymous S3 accesses
+ANONYMOUS_CONFIG = Config(signature_version=botocore.UNSIGNED)
 # A note on thread safety:
 #
 # Boto3 Session: Not thread safe, 1 per thread is required.
@@ -148,6 +151,7 @@ class AWSConnectionManager:
         region: Optional[str],
         service_name: Literal["s3"],
         endpoint_url: Optional[str] = None,
+        config: Optional[Config] = None,
     ) -> "S3ServiceResource": ...
     @overload
     def resource(
@@ -155,6 +159,7 @@ class AWSConnectionManager:
         region: Optional[str],
         service_name: Literal["iam"],
         endpoint_url: Optional[str] = None,
+        config: Optional[Config] = None,
     ) -> "IAMServiceResource": ...
     @overload
     def resource(
@@ -162,6 +167,7 @@ class AWSConnectionManager:
         region: Optional[str],
         service_name: Literal["ec2"],
         endpoint_url: Optional[str] = None,
+        config: Optional[Config] = None,
     ) -> "EC2ServiceResource": ...
     def resource(
@@ -169,6 +175,7 @@ class AWSConnectionManager:
         region: Optional[str],
         service_name: str,
         endpoint_url: Optional[str] = None,
+        config: Optional[Config] = None,
     ) -> boto3.resources.base.ServiceResource:
         """
         Get the Boto3 Resource to use with the given service (like 'ec2') in the given region.
@@ -188,10 +195,10 @@ class AWSConnectionManager:
                     # The Boto3 stubs are missing an overload for `resource` that takes
                     # a non-literal string. See
                     # <https://github.com/vemel/mypy_boto3_builder/issues/121#issuecomment-1011322636>
-                    storage.item = self.session(region).resource(service_name, endpoint_url=endpoint_url)  # type: ignore
+                    storage.item = self.session(region).resource(service_name, endpoint_url=endpoint_url, config=config)  # type: ignore
                 else:
                     # We might not be able to pass None to Boto3 and have it be the same as no argument.
-                    storage.item = self.session(region).resource(service_name)  # type: ignore
+                    storage.item = self.session(region).resource(service_name, config=config)  # type: ignore
         return cast(boto3.resources.base.ServiceResource, storage.item)
@@ -369,18 +376,21 @@ def resource(
     service_name: Literal["s3"],
     region_name: Optional[str] = None,
     endpoint_url: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> "S3ServiceResource": ...
 @overload
 def resource(
     service_name: Literal["iam"],
     region_name: Optional[str] = None,
     endpoint_url: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> "IAMServiceResource": ...
 @overload
 def resource(
     service_name: Literal["ec2"],
     region_name: Optional[str] = None,
     endpoint_url: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> "EC2ServiceResource": ...
@@ -388,6 +398,7 @@ def resource(
     service_name: Literal["s3", "iam", "ec2"],
     region_name: Optional[str] = None,
     endpoint_url: Optional[str] = None,
+    config: Optional[Config] = None,
 ) -> boto3.resources.base.ServiceResource:
     """
     Get a Boto 3 resource for a particular AWS service, usable by the current thread.
@@ -397,5 +408,5 @@ def resource(
     # Just use a global version of the manager. Note that we change the argument order!
     return _global_manager.resource(
-        region_name, service_name, endpoint_url=endpoint_url
+        region_name, service_name, endpoint_url=endpoint_url, config=config
     )

toil/lib/aws/utils.py CHANGED Viewed

@@ -19,8 +19,10 @@ from collections.abc import Iterable, Iterator
 from typing import TYPE_CHECKING, Any, Callable, ContextManager, Optional, cast
 from urllib.parse import ParseResult
+# To import toil.lib.aws.session, the AWS libraries must be installed
 from toil.lib.aws import AWSRegionName, AWSServerErrors, session
 from toil.lib.conversions import strtobool
+from toil.lib.memoize import memoize
 from toil.lib.misc import printq
 from toil.lib.retry import (
     DEFAULT_DELAYS,
@@ -37,12 +39,7 @@ if TYPE_CHECKING:
     from mypy_boto3_s3.service_resource import Object as S3Object
     from mypy_boto3_sdb.type_defs import AttributeTypeDef
-try:
-    from botocore.exceptions import ClientError, EndpointConnectionError
-except ImportError:
-    ClientError = None  # type: ignore
-    EndpointConnectionError = None  # type: ignore
-    # AWS/boto extra is not installed
+from botocore.exceptions import ClientError, EndpointConnectionError
 logger = logging.getLogger(__name__)
@@ -232,6 +229,7 @@ def get_bucket_region(
     bucket_name: str,
     endpoint_url: Optional[str] = None,
     only_strategies: Optional[set[int]] = None,
+    anonymous: Optional[bool] = None
 ) -> str:
     """
     Get the AWS region name associated with the given S3 bucket, or raise NoBucketLocationError.
@@ -241,9 +239,13 @@ def get_bucket_region(
     Takes an optional S3 API URL override.
     :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
+    :raises NoBucketLocationError: if the bucket's region cannot be determined
+        (possibly due to lack of permissions).
     """
-    s3_client = session.client("s3", endpoint_url=endpoint_url)
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    s3_client = session.client("s3", endpoint_url=endpoint_url, config=config)
     def attempt_get_bucket_location() -> Optional[str]:
         """
@@ -267,7 +269,7 @@ def get_bucket_region(
         # It could also be because AWS open data buckets (which we tend to
         # encounter this problem for) tend to actually themselves be in
         # us-east-1.
-        backup_s3_client = session.client("s3", region_name="us-east-1")
+        backup_s3_client = session.client("s3", region_name="us-east-1", config=config)
         return backup_s3_client.get_bucket_location(Bucket=bucket_name).get(
             "LocationConstraint", None
         )
@@ -337,6 +339,30 @@ def get_bucket_region(
         "Could not get bucket location: " + "\n".join(error_messages)
     ) from last_error
+@memoize
+def get_bucket_region_if_available(
+    bucket_name: str,
+    endpoint_url: Optional[str] = None,
+    only_strategies: Optional[set[int]] = None,
+    anonymous: Optional[bool] = None
+) -> Optional[str]:
+    """
+    Get the AWS region name associated with the given S3 bucket, or return None.
+    Caches results, so may not return the location for a bucket that has been
+    created but was previously observed to be nonexistent.
+    :param only_strategies: For testing, use only strategies with 1-based numbers in this set.
+    """
+    try:
+        return get_bucket_region(bucket_name, endpoint_url, only_strategies, anonymous)
+    except Exception as e:
+        if isinstance(e, NoBucketLocationError) or (isinstance(e, ClientError) and get_error_status(e) == 403):
+            # We can't know
+            return None
+        else:
+            raise
 def region_to_bucket_location(region: str) -> str:
     return "" if region == "us-east-1" else region
@@ -346,7 +372,7 @@ def bucket_location_to_region(location: Optional[str]) -> str:
     return "us-east-1" if location == "" or location is None else location
-def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3Object":
+def get_object_for_url(url: ParseResult, existing: Optional[bool] = None, anonymous: Optional[bool] = None) -> "S3Object":
     """
     Extracts a key (object) from a given parsed s3:// URL.
@@ -354,6 +380,10 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
     :param bool existing: If True, key is expected to exist. If False, key is expected not to
             exists and it will be created. If None, the key will be created if it doesn't exist.
+    :raises FileNotFoundError: when existing is True and the object does not exist.
+    :raises RuntimeError: when existing is False but the object exists.
+    :raises PermissionError: when we are not authorized to look at the object.
     """
     key_name = url.path[1:]
@@ -372,17 +402,19 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
     # TODO: OrdinaryCallingFormat equivalent in boto3?
     # if botoargs:
     #     botoargs['calling_format'] = boto.s3.connection.OrdinaryCallingFormat()
-    try:
-        # Get the bucket's region to avoid a redirect per request
-        region = get_bucket_region(bucket_name, endpoint_url=endpoint_url)
-        s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url)
-    except NoBucketLocationError as e:
-        # Probably don't have permission.
-        # TODO: check if it is that
-        logger.debug("Couldn't get bucket location: %s", e)
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    # Get the bucket's region to avoid a redirect per request.
+    # Cache the result
+    region = get_bucket_region_if_available(bucket_name, endpoint_url=endpoint_url, anonymous=anonymous)
+    if region is not None:
+        s3 = session.resource("s3", region_name=region, endpoint_url=endpoint_url, config=config)
+    else:
+        # We can't get the bucket location, perhaps because we don't have
+        # permission to do that.
+        logger.debug("Couldn't get bucket location")
         logger.debug("Fall back to not specifying location")
-        s3 = session.resource("s3", endpoint_url=endpoint_url)
+        s3 = session.resource("s3", endpoint_url=endpoint_url, config=config)
     obj = s3.Object(bucket_name, key_name)
     objExists = True
@@ -392,6 +424,10 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
     except ClientError as e:
         if get_error_status(e) == 404:
             objExists = False
+        elif get_error_status(e) == 403:
+            raise PermissionError(
+                f"Key '{key_name}' is not accessible in bucket '{bucket_name}'."
+            ) from e
         else:
             raise
     if existing is True and not objExists:
@@ -402,16 +438,27 @@ def get_object_for_url(url: ParseResult, existing: Optional[bool] = None) -> "S3
         raise RuntimeError(f"Key '{key_name}' exists in bucket '{bucket_name}'.")
     if not objExists:
-        obj.put()  # write an empty file
+        try:
+            obj.put()  # write an empty file
+        except ClientError as e:
+            if get_error_status(e) == 403:
+                raise PermissionError(
+                    f"Key '{key_name}' is not writable in bucket '{bucket_name}'."
+                ) from e
+            else:
+                raise
     return obj
 @retry(errors=[AWSServerErrors])
-def list_objects_for_url(url: ParseResult) -> list[str]:
+def list_objects_for_url(url: ParseResult, anonymous: Optional[bool] = None) -> list[str]:
     """
     Extracts a key (object) from a given parsed s3:// URL. The URL will be
     supplemented with a trailing slash if it is missing.
+    :raises PermissionError: when we are not authorized to do the list operation.
     """
     key_name = url.path[1:]
     bucket_name = url.netloc
@@ -430,23 +477,33 @@ def list_objects_for_url(url: ParseResult) -> list[str]:
         protocol = "http"
     if host:
         endpoint_url = f"{protocol}://{host}" + f":{port}" if port else ""
-    client = session.client("s3", endpoint_url=endpoint_url)
+    config = session.ANONYMOUS_CONFIG if anonymous else None
+    client = session.client("s3", endpoint_url=endpoint_url, config=config)
     listing = []
+    try:
+        paginator = client.get_paginator("list_objects_v2")
+        result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
+        for page in result:
+            if "CommonPrefixes" in page:
+                for prefix_item in page["CommonPrefixes"]:
+                    listing.append(prefix_item["Prefix"][len(key_name) :])
+            if "Contents" in page:
+                for content_item in page["Contents"]:
+                    if content_item["Key"] == key_name:
+                        # Ignore folder name itself
+                        continue
+                    listing.append(content_item["Key"][len(key_name) :])
+    except ClientError as e:
+        if get_error_status(e) == 403:
+            raise PermissionError(
+                f"Prefix '{key_name}' is not authorized to be listed in bucket '{bucket_name}'."
+            ) from e
+        else:
+            raise
-    paginator = client.get_paginator("list_objects_v2")
-    result = paginator.paginate(Bucket=bucket_name, Prefix=key_name, Delimiter="/")
-    for page in result:
-        if "CommonPrefixes" in page:
-            for prefix_item in page["CommonPrefixes"]:
-                listing.append(prefix_item["Prefix"][len(key_name) :])
-        if "Contents" in page:
-            for content_item in page["Contents"]:
-                if content_item["Key"] == key_name:
-                    # Ignore folder name itself
-                    continue
-                listing.append(content_item["Key"][len(key_name) :])
     logger.debug("Found in %s items: %s", url, listing)
     return listing

toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl