PyPI - toil - Versions diffs - 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl - Mend

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (270) hide show

toil/__init__.py +4 -39
toil/batchSystems/abstractBatchSystem.py +1 -1
toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
toil/batchSystems/awsBatch.py +1 -1
toil/batchSystems/cleanup_support.py +1 -1
toil/batchSystems/kubernetes.py +53 -7
toil/batchSystems/local_support.py +1 -1
toil/batchSystems/mesos/batchSystem.py +13 -8
toil/batchSystems/mesos/test/__init__.py +3 -2
toil/batchSystems/options.py +1 -0
toil/batchSystems/singleMachine.py +1 -1
toil/batchSystems/slurm.py +229 -84
toil/bus.py +5 -3
toil/common.py +198 -54
toil/cwl/cwltoil.py +32 -11
toil/job.py +110 -86
toil/jobStores/abstractJobStore.py +24 -3
toil/jobStores/aws/jobStore.py +46 -10
toil/jobStores/fileJobStore.py +25 -1
toil/jobStores/googleJobStore.py +104 -30
toil/leader.py +9 -0
toil/lib/accelerators.py +3 -1
toil/lib/aws/session.py +14 -3
toil/lib/aws/utils.py +92 -35
toil/lib/aws/utils.py.orig +504 -0
toil/lib/bioio.py +1 -1
toil/lib/docker.py +252 -91
toil/lib/dockstore.py +387 -0
toil/lib/ec2nodes.py +3 -2
toil/lib/exceptions.py +5 -3
toil/lib/history.py +1345 -0
toil/lib/history_submission.py +695 -0
toil/lib/io.py +56 -23
toil/lib/misc.py +25 -1
toil/lib/resources.py +2 -1
toil/lib/retry.py +10 -10
toil/lib/threading.py +11 -10
toil/lib/{integration.py → trs.py} +95 -46
toil/lib/web.py +38 -0
toil/options/common.py +25 -2
toil/options/cwl.py +10 -0
toil/options/wdl.py +11 -0
toil/provisioners/gceProvisioner.py +4 -4
toil/server/api_spec/LICENSE +201 -0
toil/server/api_spec/README.rst +5 -0
toil/server/cli/wes_cwl_runner.py +5 -4
toil/server/utils.py +2 -3
toil/statsAndLogging.py +35 -1
toil/test/__init__.py +275 -115
toil/test/batchSystems/batchSystemTest.py +227 -205
toil/test/batchSystems/test_slurm.py +199 -2
toil/test/cactus/pestis.tar.gz +0 -0
toil/test/conftest.py +7 -0
toil/test/cwl/2.fasta +11 -0
toil/test/cwl/2.fastq +12 -0
toil/test/cwl/conftest.py +39 -0
toil/test/cwl/cwlTest.py +1015 -780
toil/test/cwl/directory/directory/file.txt +15 -0
toil/test/cwl/download_directory_file.json +4 -0
toil/test/cwl/download_directory_s3.json +4 -0
toil/test/cwl/download_file.json +6 -0
toil/test/cwl/download_http.json +6 -0
toil/test/cwl/download_https.json +6 -0
toil/test/cwl/download_s3.json +6 -0
toil/test/cwl/download_subdirectory_file.json +5 -0
toil/test/cwl/download_subdirectory_s3.json +5 -0
toil/test/cwl/empty.json +1 -0
toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
toil/test/cwl/optional-file-exists.json +6 -0
toil/test/cwl/optional-file-missing.json +6 -0
toil/test/cwl/optional-file.cwl +18 -0
toil/test/cwl/preemptible_expression.json +1 -0
toil/test/cwl/revsort-job-missing.json +6 -0
toil/test/cwl/revsort-job.json +6 -0
toil/test/cwl/s3_secondary_file.json +16 -0
toil/test/cwl/seqtk_seq_job.json +6 -0
toil/test/cwl/stream.json +6 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
toil/test/cwl/whale.txt +16 -0
toil/test/docs/scripts/example_alwaysfail.py +38 -0
toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
toil/test/docs/scripts/tutorial_arguments.py +23 -0
toil/test/docs/scripts/tutorial_debugging.patch +12 -0
toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
toil/test/docs/scripts/tutorial_docker.py +20 -0
toil/test/docs/scripts/tutorial_dynamic.py +24 -0
toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
toil/test/docs/scripts/tutorial_helloworld.py +15 -0
toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
toil/test/docs/scripts/tutorial_managing.py +29 -0
toil/test/docs/scripts/tutorial_managing2.py +56 -0
toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
toil/test/docs/scripts/tutorial_promises.py +25 -0
toil/test/docs/scripts/tutorial_promises2.py +30 -0
toil/test/docs/scripts/tutorial_quickstart.py +22 -0
toil/test/docs/scripts/tutorial_requirements.py +44 -0
toil/test/docs/scripts/tutorial_services.py +45 -0
toil/test/docs/scripts/tutorial_staging.py +45 -0
toil/test/docs/scripts/tutorial_stats.py +64 -0
toil/test/lib/aws/test_iam.py +3 -1
toil/test/lib/dockerTest.py +205 -122
toil/test/lib/test_history.py +236 -0
toil/test/lib/test_trs.py +161 -0
toil/test/provisioners/aws/awsProvisionerTest.py +12 -9
toil/test/provisioners/clusterTest.py +4 -4
toil/test/provisioners/gceProvisionerTest.py +16 -14
toil/test/sort/sort.py +4 -1
toil/test/src/busTest.py +17 -17
toil/test/src/deferredFunctionTest.py +145 -132
toil/test/src/importExportFileTest.py +71 -63
toil/test/src/jobEncapsulationTest.py +27 -28
toil/test/src/jobServiceTest.py +149 -133
toil/test/src/jobTest.py +219 -211
toil/test/src/miscTests.py +66 -60
toil/test/src/promisedRequirementTest.py +163 -169
toil/test/src/regularLogTest.py +24 -24
toil/test/src/resourceTest.py +82 -76
toil/test/src/restartDAGTest.py +51 -47
toil/test/src/resumabilityTest.py +24 -19
toil/test/src/retainTempDirTest.py +60 -57
toil/test/src/systemTest.py +17 -13
toil/test/src/threadingTest.py +29 -32
toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
toil/test/utils/toilDebugTest.py +117 -102
toil/test/utils/toilKillTest.py +54 -53
toil/test/utils/utilsTest.py +303 -229
toil/test/wdl/lint_error.wdl +9 -0
toil/test/wdl/md5sum/empty_file.json +1 -0
toil/test/wdl/md5sum/md5sum-gs.json +1 -0
toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
toil/test/wdl/md5sum/md5sum.input +1 -0
toil/test/wdl/md5sum/md5sum.json +1 -0
toil/test/wdl/md5sum/md5sum.wdl +25 -0
toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
toil/test/wdl/standard_library/as_map.json +16 -0
toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
toil/test/wdl/standard_library/as_pairs.json +7 -0
toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
toil/test/wdl/standard_library/ceil.json +3 -0
toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
toil/test/wdl/standard_library/collect_by_key.json +1 -0
toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
toil/test/wdl/standard_library/cross.json +11 -0
toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
toil/test/wdl/standard_library/flatten.json +7 -0
toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
toil/test/wdl/standard_library/floor.json +3 -0
toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
toil/test/wdl/standard_library/keys.json +8 -0
toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
toil/test/wdl/standard_library/length.json +7 -0
toil/test/wdl/standard_library/length_as_input.wdl +16 -0
toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
toil/test/wdl/standard_library/length_invalid.json +3 -0
toil/test/wdl/standard_library/range.json +3 -0
toil/test/wdl/standard_library/range_0.json +3 -0
toil/test/wdl/standard_library/range_as_input.wdl +17 -0
toil/test/wdl/standard_library/range_invalid.json +3 -0
toil/test/wdl/standard_library/read_boolean.json +3 -0
toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_float.json +3 -0
toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_int.json +3 -0
toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_json.json +3 -0
toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_lines.json +3 -0
toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_map.json +3 -0
toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
toil/test/wdl/standard_library/read_string.json +3 -0
toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
toil/test/wdl/standard_library/read_tsv.json +3 -0
toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
toil/test/wdl/standard_library/round.json +3 -0
toil/test/wdl/standard_library/round_as_command.wdl +16 -0
toil/test/wdl/standard_library/round_as_input.wdl +16 -0
toil/test/wdl/standard_library/size.json +3 -0
toil/test/wdl/standard_library/size_as_command.wdl +17 -0
toil/test/wdl/standard_library/size_as_output.wdl +36 -0
toil/test/wdl/standard_library/stderr.json +3 -0
toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
toil/test/wdl/standard_library/stdout.json +3 -0
toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
toil/test/wdl/standard_library/sub.json +3 -0
toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
toil/test/wdl/standard_library/transpose.json +6 -0
toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
toil/test/wdl/standard_library/write_json.json +6 -0
toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_lines.json +7 -0
toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_map.json +6 -0
toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
toil/test/wdl/standard_library/write_tsv.json +6 -0
toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
toil/test/wdl/standard_library/zip.json +12 -0
toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
toil/test/wdl/test.csv +3 -0
toil/test/wdl/test.tsv +3 -0
toil/test/wdl/testfiles/croo.wdl +38 -0
toil/test/wdl/testfiles/drop_files.wdl +62 -0
toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
toil/test/wdl/testfiles/empty.txt +0 -0
toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
toil/test/wdl/testfiles/random.wdl +66 -0
toil/test/wdl/testfiles/string_file_coercion.json +1 -0
toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
toil/test/wdl/testfiles/test.json +4 -0
toil/test/wdl/testfiles/test_boolean.txt +1 -0
toil/test/wdl/testfiles/test_float.txt +1 -0
toil/test/wdl/testfiles/test_int.txt +1 -0
toil/test/wdl/testfiles/test_lines.txt +5 -0
toil/test/wdl/testfiles/test_map.txt +2 -0
toil/test/wdl/testfiles/test_string.txt +1 -0
toil/test/wdl/testfiles/url_to_file.wdl +13 -0
toil/test/wdl/testfiles/url_to_optional_file.wdl +13 -0
toil/test/wdl/testfiles/vocab.json +1 -0
toil/test/wdl/testfiles/vocab.wdl +66 -0
toil/test/wdl/testfiles/wait.wdl +34 -0
toil/test/wdl/wdl_specification/type_pair.json +23 -0
toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
toil/test/wdl/wdl_specification/v1_spec.json +1 -0
toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
toil/test/wdl/wdltoil_test.py +681 -408
toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
toil/version.py +10 -10
toil/wdl/wdltoil.py +350 -123
toil/worker.py +113 -33
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/METADATA +13 -7
toil-8.2.0.dist-info/RECORD +439 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/WHEEL +1 -1
toil/test/lib/test_integration.py +0 -104
toil-8.0.0.dist-info/RECORD +0 -253
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/entry_points.txt +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info/licenses}/LICENSE +0 -0
{toil-8.0.0.dist-info → toil-8.2.0.dist-info}/top_level.txt +0 -0

toil/common.py CHANGED Viewed

@@ -14,6 +14,7 @@
 import json
 import logging
 import os
+import platform
 import pickle
 import re
 import signal
@@ -53,6 +54,7 @@ import requests
 from configargparse import ArgParser, YAMLConfigFileParser
 from ruamel.yaml import YAML
 from ruamel.yaml.comments import CommentedMap
+from ruamel.yaml.scalarstring import DoubleQuotedScalarString
 from toil import logProcessContext, lookupEnvVar
 from toil.batchSystems.options import set_batchsystem_options
@@ -69,7 +71,11 @@ from toil.bus import (
 )
 from toil.fileStores import FileID
 from toil.lib.compatibility import deprecated
-from toil.lib.io import AtomicFileCreate, try_path
+from toil.lib.history import HistoryManager
+from toil.lib.history_submission import ask_user_about_publishing_metrics, create_history_submission, create_current_submission
+from toil.lib.io import AtomicFileCreate, try_path, get_toil_home
+from toil.lib.misc import StrPath
+from toil.lib.memoize import memoize
 from toil.lib.retry import retry
 from toil.lib.threading import ensure_filesystem_lockable
 from toil.options.common import JOBSTORE_HELP, add_base_toil_options
@@ -79,7 +85,7 @@ from toil.options.wdl import add_wdl_options
 from toil.provisioners import add_provisioner_options, cluster_factory
 from toil.realtimeLogger import RealtimeLogger
 from toil.statsAndLogging import add_logging_options, set_logging_from_options
-from toil.version import dockerRegistry, dockerTag, version
+from toil.version import dockerRegistry, dockerTag, version, baseVersion
 if TYPE_CHECKING:
     from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
@@ -92,11 +98,14 @@ if TYPE_CHECKING:
 UUID_LENGTH = 32
 logger = logging.getLogger(__name__)
-# TODO: should this use an XDG config directory or ~/.config to not clutter the
-# base home directory?
-TOIL_HOME_DIR: str = os.path.join(os.path.expanduser("~"), ".toil")
-DEFAULT_CONFIG_FILE: str = os.path.join(TOIL_HOME_DIR, "default.yaml")
+@memoize
+def get_default_config_path() -> str:
+    """
+    Get the default path where the Toil configuration file lives.
+    The file at the path will not necessarily exist.
+    """
+    return os.path.join(get_toil_home(), "default.yaml")
 class Config:
     """Class to represent configuration operations for a toil workflow run."""
@@ -118,6 +127,8 @@ class Config:
     kubernetes_service_account: Optional[str]
     kubernetes_pod_timeout: float
     kubernetes_privileged: bool
+    kubernetes_pod_security_context: Optional[str]
+    kubernetes_security_context: Optional[str]
     tes_endpoint: str
     tes_user: str
     tes_password: str
@@ -130,7 +141,7 @@ class Config:
     batch_logs_dir: Optional[str]
     """The backing scheduler will be instructed, if possible, to save logs
     to this directory, where the leader can read them."""
-    statePollingWait: int
+    statePollingWait: float
     state_polling_timeout: int
     disableAutoDeployment: bool
@@ -200,6 +211,7 @@ class Config:
     # Retrying/rescuing jobs
     retryCount: int
+    stop_on_first_failure: bool
     enableUnlimitedPreemptibleRetries: bool
     doubleMem: bool
     maxJobDuration: int
@@ -214,6 +226,9 @@ class Config:
     write_messages: Optional[str]
     realTimeLogging: bool
+    # Data publishing
+    publish_workflow_metrics: Union[Literal["all"], Literal["current"], Literal["no"], None]
     # Misc
     environment: dict[str, str]
     disableChaining: bool
@@ -375,6 +390,7 @@ class Config:
         # Retrying/rescuing jobs
         set_option("retryCount")
+        set_option("stop_on_first_failure")
         set_option("enableUnlimitedPreemptibleRetries")
         set_option("doubleMem")
         set_option("maxJobDuration")
@@ -388,6 +404,9 @@ class Config:
         set_option("writeLogsFromAllJobs")
         set_option("write_messages")
+        # Data Publishing Options
+        set_option("publish_workflow_metrics")
         if self.write_messages is None:
             # The user hasn't specified a place for the message bus so we
             # should make one.
@@ -463,44 +482,20 @@ class Config:
     def __hash__(self) -> int:
         return self.__dict__.__hash__()  # type: ignore
-def check_and_create_toil_home_dir() -> None:
-    """
-    Ensure that TOIL_HOME_DIR exists.
-    Raises an error if it does not exist and cannot be created. Safe to run
-    simultaneously in multiple processes.
+def ensure_config(filepath: str) -> None:
     """
+    If the config file at the filepath does not exist, create it.
+    The parent directory should be created prior to calling this.
-    dir_path = try_path(TOIL_HOME_DIR)
-    if dir_path is None:
-        raise RuntimeError(
-            f"Cannot create or access Toil configuration directory {TOIL_HOME_DIR}"
-        )
-def check_and_create_default_config_file() -> None:
-    """
-    If the default config file does not exist, create it in the Toil home directory. Create the Toil home directory
-    if needed
-    Raises an error if the default config file cannot be created.
+    Raises an error if the config file cannot be created.
     Safe to run simultaneously in multiple processes. If this process runs
-    this function, it will always see the default config file existing with
+    this function, it will always see the config file existing with
     parseable contents, even if other processes are racing to create it.
-    No process will see an empty or partially-written default config file.
-    """
-    check_and_create_toil_home_dir()
-    # The default config file did not appear to exist when we checked.
-    # It might exist now, though. Try creating it.
-    check_and_create_config_file(DEFAULT_CONFIG_FILE)
+    No process will see a new empty or partially-written config file. The
+    caller should still check to make sure there isn't a preexisting empty file
+    here.
-def check_and_create_config_file(filepath: str) -> None:
-    """
-    If the config file at the filepath does not exist, try creating it.
-    The parent directory should be created prior to calling this
     :param filepath: path to config file
     :return: None
     """
@@ -648,9 +643,39 @@ def generate_config(filepath: str) -> None:
                 yaml.dump(
                     data,
                     f,
+                    # Comment everything out, Unix config file style, to show defaults
                     transform=lambda s: re.sub(r"^(.)", r"#\1", s, flags=re.MULTILINE),
                 )
+def update_config(filepath: str, key: str, new_value: Union[str, bool, int, float]) -> None:
+    """
+    Set the given top-level key to the given value in the given YAML config
+    file.
+    Does not dramatically alter comments or formatting, and does not make a
+    partially-written file visible.
+    :param key: Setting to set. Must be the command-line option name, not the
+        destination variable name.
+    """
+    yaml = YAML(typ="rt")
+    data = yaml.load(open(filepath))
+    logger.info("Change config field %s from %s to %s", key, repr(data.get(key, None)), repr(new_value))
+    if isinstance(new_value, str):
+        # Strings with some values (no, yes) will be interpreted as booleans on
+        # load if not quoted. But ruamel is not determining that this is needed
+        # on serialization for newly-added values. So if we set something to a
+        # string we always quote it.
+        data[key] = DoubleQuotedScalarString(new_value)
+    else:
+        data[key] = new_value
+    with AtomicFileCreate(filepath) as temp_path:
+        with open(temp_path, "w") as f:
+            yaml.dump(data, f)
 def parser_with_common_options(
     provisioner_options: bool = False,
@@ -658,6 +683,17 @@ def parser_with_common_options(
     prog: Optional[str] = None,
     default_log_level: Optional[int] = None,
 ) -> ArgParser:
+    """
+    Get a command-line option parser for a Toil subcommand.
+    The returned parser just has basic options (like version reporting and
+    logging) used by all Toil subcommands.
+    Toil Python workflows should use
+    :meth:`toil.job.Job.Runner.getDefaultArgumentParser` instead, which makes
+    sure to add all the important options for actually running a workflow.
+    """
     parser = ArgParser(
         prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter
     )
@@ -708,11 +744,13 @@ def addOptions(
             f"Unanticipated class: {parser.__class__}.  Must be: argparse.ArgumentParser or ArgumentGroup."
         )
+    config_path = get_default_config_path()
     if isinstance(parser, ArgParser):
         # in case the user passes in their own configargparse instance instead of calling getDefaultArgumentParser()
         # this forces configargparser to process the config file in YAML rather than in it's own format
         parser._config_file_parser = YAMLConfigFileParser()  # type: ignore[misc]
-        parser._default_config_files = [DEFAULT_CONFIG_FILE]  # type: ignore[misc]
+        parser._default_config_files = [config_path]  # type: ignore[misc]
     else:
         # configargparse advertises itself as a drag and drop replacement, and running the normal argparse ArgumentParser
         # through this code still seems to work (with the exception of --config and environmental variables)
@@ -723,24 +761,24 @@ def addOptions(
             DeprecationWarning,
         )
-    check_and_create_default_config_file()
+    ensure_config(config_path)
     # Check on the config file to make sure it is sensible
-    config_status = os.stat(DEFAULT_CONFIG_FILE)
+    config_status = os.stat(config_path)
     if config_status.st_size == 0:
         # If we have an empty config file, someone has to manually delete
         # it before we will work again.
         raise RuntimeError(
-            f"Config file {DEFAULT_CONFIG_FILE} exists but is empty. Delete it! Stat says: {config_status}"
+            f"Config file {config_path} exists but is empty. Delete it! Stat says: {config_status}"
         )
     try:
-        with open(DEFAULT_CONFIG_FILE) as f:
+        with open(config_path) as f:
             yaml = YAML(typ="safe")
             s = yaml.load(f)
             logger.debug("Initialized default configuration: %s", json.dumps(s))
     except:
         # Something went wrong reading the default config, so dump its
         # contents to the log.
-        logger.info("Configuration file contents: %s", open(DEFAULT_CONFIG_FILE).read())
+        logger.info("Configuration file contents: %s", open(config_path).read())
         raise
     # Add base toil options
@@ -759,7 +797,7 @@ def addOptions(
         :param typ: string of either "cwl" or "wdl" to specify which runner to check against
         :return: None, raise parser error if option is found
         """
-        check_parser = ArgParser()
+        check_parser = ArgParser(allow_abbrev=False)
         if typ == "wdl":
             add_cwl_options(check_parser)
         if typ == "cwl":
@@ -902,8 +940,9 @@ class Toil(ContextManager["Toil"]):
     _jobStore: "AbstractJobStore"
     _batchSystem: "AbstractBatchSystem"
     _provisioner: Optional["AbstractProvisioner"]
+    _start_time: float
-    def __init__(self, options: Namespace) -> None:
+    def __init__(self, options: Namespace, workflow_name: Optional[str] = None, trs_spec: Optional[str] = None) -> None:
         """
         Initialize a Toil object from the given options.
@@ -911,6 +950,12 @@ class Toil(ContextManager["Toil"]):
         done when the context is entered.
         :param options: command line options specified by the user
+        :param workflow_name: A human-readable name (probably a filename, URL,
+            or TRS specifier) for the workflow being run. Used for Toil history
+            storage.
+        :param trs_spec: A TRS id:version string for the workflow being run, if
+            any. Used for Toil history storage and publishing workflow
+            execution metrics to Dockstore.
         """
         super().__init__()
         self.options = options
@@ -918,6 +963,17 @@ class Toil(ContextManager["Toil"]):
         self._inContextManager: bool = False
         self._inRestart: bool = False
+        if workflow_name is None:
+            # Try to use the entrypoint file.
+            import __main__
+            if hasattr(__main__, '__file__'):
+                workflow_name = __main__.__file__
+        if workflow_name is None:
+            # If there's no file, say this is an interactive usage of Toil.
+            workflow_name = "<interactive>"
+        self._workflow_name: str = workflow_name
+        self._trs_spec = trs_spec
     def __enter__(self) -> "Toil":
         """
         Derive configuration from the command line options.
@@ -937,9 +993,16 @@ class Toil(ContextManager["Toil"]):
             # Set the caching option because it wasn't set originally, resuming jobstore rebuilds config from CLI options
             self.options.caching = config.caching
+        if self._trs_spec and config.publish_workflow_metrics is None:
+            # We could potentially publish this workflow run. Get a call from the user.
+            config.publish_workflow_metrics = ask_user_about_publishing_metrics()
         if not config.restart:
             config.prepare_start()
             jobStore.initialize(config)
+            assert config.workflowID is not None
+            # Record that there is a workflow beign run
+            HistoryManager.record_workflow_creation(config.workflowID, self.canonical_locator(config.jobStore))
         else:
             jobStore.resume()
             # Merge configuration from job store with command line options
@@ -949,6 +1012,7 @@ class Toil(ContextManager["Toil"]):
             jobStore.write_config()
         self.config = config
         self._jobStore = jobStore
+        self._start_time = time.time()
         self._inContextManager = True
         # This will make sure `self.__exit__()` is called when we get a SIGTERM signal.
@@ -968,6 +1032,50 @@ class Toil(ContextManager["Toil"]):
         Depending on the configuration, delete the job store.
         """
         try:
+            if self.config.workflowID is not None:
+                # Record that this attempt to run the workflow succeeded or failed.
+                # TODO: Get ahold of the timing from statsAndLogging instead of redoing it here!
+                # To record the batch system, we need to avoid capturing typos/random text the user types instead of a real batch system.
+                batch_system_type="<Not Initialized>"
+                if hasattr(self, "_batchSystem"):
+                    batch_system_type = type(self._batchSystem).__module__ + "." + type(self._batchSystem).__qualname__
+                HistoryManager.record_workflow_attempt(
+                    self.config.workflowID,
+                    self.config.workflowAttemptNumber,
+                    exc_type is None,
+                    self._start_time,
+                    time.time() - self._start_time,
+                    batch_system=batch_system_type,
+                    caching=self.config.caching,
+                    # Use the git-hash-free Toil version which should not be unique
+                    toil_version=baseVersion,
+                    # This should always be major.minor.patch.
+                    python_version=platform.python_version(),
+                    platform_system=platform.system(),
+                    platform_machine=platform.machine()
+                )
+            if self.config.publish_workflow_metrics == "all":
+                # Publish metrics for all workflows, including previous ones.
+                submission = create_history_submission()
+                while not submission.empty():
+                    if not submission.submit():
+                        # Submitting this batch failed. An item might be broken
+                        # and we don't want to get stuck making no progress on
+                        # a batch of stuff that can't really be submitted.
+                        break
+                    # Keep making submissions until we've uploaded the whole
+                    # history or something goes wrong.
+                    submission = create_history_submission()
+            elif self.config.publish_workflow_metrics == "current" and self.config.workflowID is not None:
+                # Publish metrics for this run only. Might be empty if we had no TRS ID.
+                create_current_submission(self.config.workflowID, self.config.workflowAttemptNumber).submit()
+            # Make sure the history doesn't stay too big
+            HistoryManager.enforce_byte_size_limit()
             if (
                 exc_type is not None
                 and self.config.clean == "onError"
@@ -1012,6 +1120,9 @@ class Toil(ContextManager["Toil"]):
         """
         self._assertContextManagerUsed()
+        assert self.config.workflowID is not None
+        HistoryManager.record_workflow_metadata(self.config.workflowID, self._workflow_name, self._trs_spec)
         from toil.job import Job
         # Check that the rootJob is an instance of the Job class
@@ -1110,6 +1221,8 @@ class Toil(ContextManager["Toil"]):
             )
             self._provisioner.setAutoscaledNodeTypes(self.config.nodeTypes)
+    JOB_STORE_TYPES = ["file", "aws", "google"]
     @classmethod
     def getJobStore(cls, locator: str) -> "AbstractJobStore":
         """
@@ -1137,6 +1250,14 @@ class Toil(ContextManager["Toil"]):
     @staticmethod
     def parseLocator(locator: str) -> tuple[str, str]:
+        """
+        Parse a job store locator to a type string and the data needed for that
+        implementation to connect to it.
+        Does not validate the set of possible job store types.
+        :raises RuntimeError: if the locator is not in the approproate syntax.
+        """
         if locator[0] in "/." or ":" not in locator:
             return "file", locator
         else:
@@ -1153,6 +1274,17 @@ class Toil(ContextManager["Toil"]):
             raise ValueError(f"Can't have a ':' in the name: '{name}'.")
         return f"{name}:{rest}"
+    @classmethod
+    def canonical_locator(cls, locator: str) -> str:
+        """
+        Turn a job store locator into one that will work from any directory and
+        always includes the explicit type of job store.
+        """
+        job_store_type, rest = cls.parseLocator(locator)
+        if job_store_type == "file":
+            rest = os.path.abspath(rest)
+        return cls.buildLocator(job_store_type, rest)
     @classmethod
     def resumeJobStore(cls, locator: str) -> "AbstractJobStore":
         jobStore = cls.getJobStore(locator)
@@ -1373,21 +1505,33 @@ class Toil(ContextManager["Toil"]):
         self._jobStore.export_file(file_id, dst_uri)
     @staticmethod
-    def normalize_uri(uri: str, check_existence: bool = False) -> str:
+    def normalize_uri(uri: str, check_existence: bool = False, dir_path: Optional[str] = None) -> str:
         """
-        Given a URI, if it has no scheme, prepend "file:".
+        Given a URI, if it has no scheme, make it a properly quoted file: URI.
         :param check_existence: If set, raise FileNotFoundError if a URI points to
                a local file that does not exist.
+        :param dir_path: If specified, interpret relative paths relative to the
+            given directory path instead of the current one.
         """
-        if urlparse(uri).scheme == "file":
+        parsed = urlparse(uri)
+        if parsed.scheme == "file":
             uri = unquote(
-                urlparse(uri).path
+                parsed.path
             )  # this should strip off the local file scheme; it will be added back
+            parsed = urlparse(uri)
         # account for the scheme-less case, which should be coerced to a local absolute path
-        if urlparse(uri).scheme == "":
-            abs_path = os.path.abspath(uri)
+        if parsed.scheme == "":
+            if dir_path is not None:
+                # To support relative paths from a particular directory, join
+                # the directory on. If uri is already an abs path, join() will
+                # not do anything
+                abs_path = os.path.join(dir_path, uri)
+            else:
+                abs_path = os.path.abspath(uri)
             if not os.path.exists(abs_path) and check_existence:
                 raise FileNotFoundError(
                     f'Could not find local file "{abs_path}" when importing "{uri}".\n'
@@ -1903,7 +2047,7 @@ def cacheDirName(workflowID: str) -> str:
     return f"cache-{workflowID}"
-def getDirSizeRecursively(dirPath: str) -> int:
+def getDirSizeRecursively(dirPath: StrPath) -> int:
     """
     This method will return the cumulative number of bytes occupied by the files
     on disk in the directory and its subdirectories.

toil/cwl/cwltoil.py CHANGED Viewed

@@ -111,7 +111,7 @@ from toil.batchSystems.abstractBatchSystem import InsufficientSystemResources
 from toil.batchSystems.registry import DEFAULT_BATCH_SYSTEM
 from toil.common import Config, Toil, addOptions
 from toil.cwl import check_cwltool_version
-from toil.lib.integration import resolve_workflow
+from toil.lib.trs import resolve_workflow
 from toil.lib.misc import call_command
 from toil.provisioners.clusterScaler import JobTooBigError
@@ -1214,7 +1214,7 @@ def toil_make_tool(
     return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
-# When a file we want to have is missing, we can give it this sentinal location
+# When a file we want to have is missing, we can give it this sentinel location
 # URI instead of raising an error right away, in case it is optional.
 MISSING_FILE = "missing://"
@@ -1812,6 +1812,9 @@ def convert_file_uri_to_toil_uri(
     # with unsupportedRequirement when retrieving later with getFile
     elif file_uri.startswith("_:"):
         return file_uri
+    elif file_uri.startswith(MISSING_FILE):
+        # We cannot import a missing file
+        raise FileNotFoundError(f"Could not find {file_uri[len(MISSING_FILE):]}")
     else:
         file_uri = existing.get(file_uri, file_uri)
         if file_uri not in index:
@@ -1876,7 +1879,7 @@ def extract_file_uri_once(
     ):
         if mark_broken:
             logger.debug("File %s is missing", file_metadata)
-            file_metadata["location"] = location = MISSING_FILE
+            file_metadata["location"] = location = MISSING_FILE + location
         else:
             raise cwl_utils.errors.WorkflowException(
                 "File is missing: %s" % file_metadata
@@ -2976,7 +2979,7 @@ def makeRootJob(
         # This will consist of files that we were not able to get a file size for
         leader_metadata = dict()
         for filename, file_data in metadata.items():
-            if file_data.size is None:
+            if file_data[2] is None:  # size
                 leader_metadata[filename] = file_data
             else:
                 worker_metadata[filename] = file_data
@@ -3599,6 +3602,7 @@ class CWLInstallImportsJob(Job):
         """
         Given a mapping of filenames to Toil file IDs, replace the filename with the file IDs throughout the CWL object.
         """
         def fill_in_file(filename: str) -> FileID:
             """
             Return the file name's associated Toil file ID
@@ -3954,10 +3958,10 @@ def filtered_secondary_files(
                 sf,
             )
     # remove secondary files that are not present in the filestore or pointing
-    # to existant things on disk
+    # to existent things on disk
     for sf in intermediate_secondary_files:
         sf_loc = cast(str, sf.get("location", ""))
-        if sf_loc != MISSING_FILE or sf.get("class", "") == "Directory":
+        if not sf_loc.startswith(MISSING_FILE) or sf.get("class", "") == "Directory":
             # Pass imported files, and all Directories
             final_secondary_files.append(sf)
         else:
@@ -4166,15 +4170,15 @@ def get_options(args: list[str]) -> Namespace:
         description=textwrap.dedent(
             """
             positional arguments:
               WORKFLOW              CWL file to run.
               INFILE                YAML or JSON file of workflow inputs.
               WF_OPTIONS            Additional inputs to the workflow as command-line
                                     flags. If CWL workflow takes an input, the name of the
                                     input can be used as an option. For example:
                                       %(prog)s workflow.cwl --file1 file
                                     If an input has the same name as a Toil option, pass
@@ -4261,8 +4265,18 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
     runtime_context.move_outputs = "leave"
     runtime_context.rm_tmpdir = False
     runtime_context.streaming_allowed = not options.disable_streaming
+    if options.cachedir is not None:
+        runtime_context.cachedir = os.path.abspath(options.cachedir)
+        # Automatically bypass the file store to be compatible with cwltool caching
+        # Otherwise, the CWL caching code makes links to temporary local copies
+        # of filestore files and caches those.
+        logger.debug("CWL task caching is turned on. Bypassing file store.")
+        options.bypass_file_store = True
     if options.mpi_config_file is not None:
         runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
+    if cwltool.main.check_working_directories(runtime_context) is not None:
+        logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
+        return 1
     setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
     if options.bypass_file_store and options.destBucket:
         # We use the file store to write to buckets, so we can't do this (yet?)
@@ -4293,6 +4307,10 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
     try:
+        # We might have workflow metadata to pass to Toil
+        workflow_name=None
+        trs_spec = None
         if not options.restart:
             # Make a version of the config based on the initial options, for
             # setting up CWL option stuff
@@ -4302,7 +4320,9 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
             # Before showing the options to any cwltool stuff that wants to
             # load the workflow, transform options.cwltool, where our
             # argument for what to run is, to handle Dockstore workflows.
-            options.cwltool = resolve_workflow(options.cwltool)
+            options.cwltool, trs_spec = resolve_workflow(options.cwltool)
+            # Figure out what to call the workflow
+            workflow_name = trs_spec or options.cwltool
             # TODO: why are we doing this? Does this get applied to all
             # tools as a default or something?
@@ -4474,7 +4494,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
             logger.debug("Root tool: %s", tool)
             tool = remove_pickle_problems(tool)
-        with Toil(options) as toil:
+        with Toil(options, workflow_name=workflow_name, trs_spec=trs_spec) as toil:
             if options.restart:
                 outobj = toil.restart()
             else:
@@ -4575,6 +4595,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
         InvalidImportExportUrlException,
         UnimplementedURLException,
         JobTooBigError,
+        FileNotFoundError
     ) as err:
         logging.error(err)
         return 1

toil 8.0.0__py3-none-any.whl → 8.2.0__py3-none-any.whl

toil 8.0.0py3-none-any.whl → 8.2.0py3-none-any.whl