toil 8.1.0b1__py3-none-any.whl → 9.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +0 -35
- toil/batchSystems/abstractBatchSystem.py +1 -1
- toil/batchSystems/abstractGridEngineBatchSystem.py +1 -1
- toil/batchSystems/awsBatch.py +1 -1
- toil/batchSystems/cleanup_support.py +1 -1
- toil/batchSystems/kubernetes.py +53 -7
- toil/batchSystems/local_support.py +1 -1
- toil/batchSystems/mesos/batchSystem.py +13 -8
- toil/batchSystems/mesos/test/__init__.py +3 -2
- toil/batchSystems/registry.py +15 -118
- toil/batchSystems/singleMachine.py +1 -1
- toil/batchSystems/slurm.py +27 -26
- toil/bus.py +5 -3
- toil/common.py +59 -12
- toil/cwl/cwltoil.py +81 -38
- toil/cwl/utils.py +103 -3
- toil/job.py +64 -49
- toil/jobStores/abstractJobStore.py +35 -239
- toil/jobStores/aws/jobStore.py +2 -1
- toil/jobStores/fileJobStore.py +27 -2
- toil/jobStores/googleJobStore.py +110 -33
- toil/leader.py +9 -0
- toil/lib/accelerators.py +4 -2
- toil/lib/aws/utils.py.orig +504 -0
- toil/lib/bioio.py +1 -1
- toil/lib/docker.py +252 -91
- toil/lib/dockstore.py +11 -3
- toil/lib/exceptions.py +5 -3
- toil/lib/generatedEC2Lists.py +81 -19
- toil/lib/history.py +87 -13
- toil/lib/history_submission.py +23 -9
- toil/lib/io.py +34 -22
- toil/lib/misc.py +8 -2
- toil/lib/plugins.py +106 -0
- toil/lib/resources.py +2 -1
- toil/lib/threading.py +11 -10
- toil/lib/url.py +320 -0
- toil/options/common.py +8 -0
- toil/options/cwl.py +13 -1
- toil/options/runner.py +17 -10
- toil/options/wdl.py +22 -0
- toil/provisioners/aws/awsProvisioner.py +25 -2
- toil/server/api_spec/LICENSE +201 -0
- toil/server/api_spec/README.rst +5 -0
- toil/server/app.py +12 -6
- toil/server/cli/wes_cwl_runner.py +3 -2
- toil/server/wes/abstract_backend.py +21 -43
- toil/server/wes/toil_backend.py +2 -2
- toil/test/__init__.py +275 -115
- toil/test/batchSystems/batchSystemTest.py +228 -213
- toil/test/batchSystems/batch_system_plugin_test.py +7 -0
- toil/test/batchSystems/test_slurm.py +27 -0
- toil/test/cactus/pestis.tar.gz +0 -0
- toil/test/conftest.py +7 -0
- toil/test/cwl/2.fasta +11 -0
- toil/test/cwl/2.fastq +12 -0
- toil/test/cwl/conftest.py +1 -1
- toil/test/cwl/cwlTest.py +1175 -870
- toil/test/cwl/directory/directory/file.txt +15 -0
- toil/test/cwl/download_directory_file.json +4 -0
- toil/test/cwl/download_directory_s3.json +4 -0
- toil/test/cwl/download_file.json +6 -0
- toil/test/cwl/download_http.json +6 -0
- toil/test/cwl/download_https.json +6 -0
- toil/test/cwl/download_s3.json +6 -0
- toil/test/cwl/download_subdirectory_file.json +5 -0
- toil/test/cwl/download_subdirectory_s3.json +5 -0
- toil/test/cwl/empty.json +1 -0
- toil/test/cwl/mock_mpi/fake_mpi.yml +8 -0
- toil/test/cwl/mock_mpi/fake_mpi_run.py +42 -0
- toil/test/cwl/optional-file-exists.json +6 -0
- toil/test/cwl/optional-file-missing.json +6 -0
- toil/test/cwl/preemptible_expression.json +1 -0
- toil/test/cwl/revsort-job-missing.json +6 -0
- toil/test/cwl/revsort-job.json +6 -0
- toil/test/cwl/s3_secondary_file.json +16 -0
- toil/test/cwl/seqtk_seq_job.json +6 -0
- toil/test/cwl/stream.json +6 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.dat +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f1i +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f2_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f3_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f4_TSM0 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.f5 +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.info +0 -0
- toil/test/cwl/test_filename_conflict_resolution.ms/table.lock +0 -0
- toil/test/cwl/whale.txt +16 -0
- toil/test/docs/scripts/example_alwaysfail.py +38 -0
- toil/test/docs/scripts/example_alwaysfail_with_files.wdl +33 -0
- toil/test/docs/scripts/example_cachingbenchmark.py +117 -0
- toil/test/docs/scripts/stagingExampleFiles/in.txt +1 -0
- toil/test/docs/scripts/stagingExampleFiles/out.txt +2 -0
- toil/test/docs/scripts/tutorial_arguments.py +23 -0
- toil/test/docs/scripts/tutorial_debugging.patch +12 -0
- toil/test/docs/scripts/tutorial_debugging_hangs.wdl +126 -0
- toil/test/docs/scripts/tutorial_debugging_works.wdl +129 -0
- toil/test/docs/scripts/tutorial_docker.py +20 -0
- toil/test/docs/scripts/tutorial_dynamic.py +24 -0
- toil/test/docs/scripts/tutorial_encapsulation.py +28 -0
- toil/test/docs/scripts/tutorial_encapsulation2.py +29 -0
- toil/test/docs/scripts/tutorial_helloworld.py +15 -0
- toil/test/docs/scripts/tutorial_invokeworkflow.py +27 -0
- toil/test/docs/scripts/tutorial_invokeworkflow2.py +30 -0
- toil/test/docs/scripts/tutorial_jobfunctions.py +22 -0
- toil/test/docs/scripts/tutorial_managing.py +29 -0
- toil/test/docs/scripts/tutorial_managing2.py +56 -0
- toil/test/docs/scripts/tutorial_multiplejobs.py +25 -0
- toil/test/docs/scripts/tutorial_multiplejobs2.py +21 -0
- toil/test/docs/scripts/tutorial_multiplejobs3.py +22 -0
- toil/test/docs/scripts/tutorial_promises.py +25 -0
- toil/test/docs/scripts/tutorial_promises2.py +30 -0
- toil/test/docs/scripts/tutorial_quickstart.py +22 -0
- toil/test/docs/scripts/tutorial_requirements.py +44 -0
- toil/test/docs/scripts/tutorial_services.py +45 -0
- toil/test/docs/scripts/tutorial_staging.py +45 -0
- toil/test/docs/scripts/tutorial_stats.py +64 -0
- toil/test/docs/scriptsTest.py +2 -1
- toil/test/lib/aws/test_iam.py +3 -1
- toil/test/lib/dockerTest.py +205 -122
- toil/test/lib/test_history.py +101 -77
- toil/test/lib/test_url.py +69 -0
- toil/test/lib/url_plugin_test.py +105 -0
- toil/test/provisioners/aws/awsProvisionerTest.py +13 -10
- toil/test/provisioners/clusterTest.py +17 -4
- toil/test/provisioners/gceProvisionerTest.py +17 -15
- toil/test/server/serverTest.py +78 -36
- toil/test/sort/sort.py +4 -1
- toil/test/src/busTest.py +17 -17
- toil/test/src/deferredFunctionTest.py +145 -132
- toil/test/src/importExportFileTest.py +71 -63
- toil/test/src/jobEncapsulationTest.py +27 -28
- toil/test/src/jobServiceTest.py +149 -133
- toil/test/src/jobTest.py +219 -211
- toil/test/src/miscTests.py +66 -60
- toil/test/src/promisedRequirementTest.py +163 -169
- toil/test/src/regularLogTest.py +24 -24
- toil/test/src/resourceTest.py +82 -76
- toil/test/src/restartDAGTest.py +51 -47
- toil/test/src/resumabilityTest.py +24 -19
- toil/test/src/retainTempDirTest.py +60 -57
- toil/test/src/systemTest.py +17 -13
- toil/test/src/threadingTest.py +29 -32
- toil/test/utils/ABCWorkflowDebug/B_file.txt +1 -0
- toil/test/utils/ABCWorkflowDebug/debugWorkflow.py +204 -0
- toil/test/utils/ABCWorkflowDebug/mkFile.py +16 -0
- toil/test/utils/ABCWorkflowDebug/sleep.cwl +12 -0
- toil/test/utils/ABCWorkflowDebug/sleep.yaml +1 -0
- toil/test/utils/toilDebugTest.py +117 -102
- toil/test/utils/toilKillTest.py +54 -53
- toil/test/utils/utilsTest.py +303 -229
- toil/test/wdl/lint_error.wdl +9 -0
- toil/test/wdl/md5sum/empty_file.json +1 -0
- toil/test/wdl/md5sum/md5sum-gs.json +1 -0
- toil/test/wdl/md5sum/md5sum.1.0.wdl +32 -0
- toil/test/wdl/md5sum/md5sum.input +1 -0
- toil/test/wdl/md5sum/md5sum.json +1 -0
- toil/test/wdl/md5sum/md5sum.wdl +25 -0
- toil/test/wdl/miniwdl_self_test/inputs-namespaced.json +1 -0
- toil/test/wdl/miniwdl_self_test/inputs.json +1 -0
- toil/test/wdl/miniwdl_self_test/self_test.wdl +40 -0
- toil/test/wdl/standard_library/as_map.json +16 -0
- toil/test/wdl/standard_library/as_map_as_input.wdl +23 -0
- toil/test/wdl/standard_library/as_pairs.json +7 -0
- toil/test/wdl/standard_library/as_pairs_as_input.wdl +23 -0
- toil/test/wdl/standard_library/ceil.json +3 -0
- toil/test/wdl/standard_library/ceil_as_command.wdl +16 -0
- toil/test/wdl/standard_library/ceil_as_input.wdl +16 -0
- toil/test/wdl/standard_library/collect_by_key.json +1 -0
- toil/test/wdl/standard_library/collect_by_key_as_input.wdl +23 -0
- toil/test/wdl/standard_library/cross.json +11 -0
- toil/test/wdl/standard_library/cross_as_input.wdl +19 -0
- toil/test/wdl/standard_library/flatten.json +7 -0
- toil/test/wdl/standard_library/flatten_as_input.wdl +18 -0
- toil/test/wdl/standard_library/floor.json +3 -0
- toil/test/wdl/standard_library/floor_as_command.wdl +16 -0
- toil/test/wdl/standard_library/floor_as_input.wdl +16 -0
- toil/test/wdl/standard_library/keys.json +8 -0
- toil/test/wdl/standard_library/keys_as_input.wdl +24 -0
- toil/test/wdl/standard_library/length.json +7 -0
- toil/test/wdl/standard_library/length_as_input.wdl +16 -0
- toil/test/wdl/standard_library/length_as_input_with_map.json +7 -0
- toil/test/wdl/standard_library/length_as_input_with_map.wdl +17 -0
- toil/test/wdl/standard_library/length_invalid.json +3 -0
- toil/test/wdl/standard_library/range.json +3 -0
- toil/test/wdl/standard_library/range_0.json +3 -0
- toil/test/wdl/standard_library/range_as_input.wdl +17 -0
- toil/test/wdl/standard_library/range_invalid.json +3 -0
- toil/test/wdl/standard_library/read_boolean.json +3 -0
- toil/test/wdl/standard_library/read_boolean_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_float.json +3 -0
- toil/test/wdl/standard_library/read_float_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_int.json +3 -0
- toil/test/wdl/standard_library/read_int_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_json.json +3 -0
- toil/test/wdl/standard_library/read_json_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_lines.json +3 -0
- toil/test/wdl/standard_library/read_lines_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_map.json +3 -0
- toil/test/wdl/standard_library/read_map_as_output.wdl +31 -0
- toil/test/wdl/standard_library/read_string.json +3 -0
- toil/test/wdl/standard_library/read_string_as_command.wdl +17 -0
- toil/test/wdl/standard_library/read_tsv.json +3 -0
- toil/test/wdl/standard_library/read_tsv_as_output.wdl +31 -0
- toil/test/wdl/standard_library/round.json +3 -0
- toil/test/wdl/standard_library/round_as_command.wdl +16 -0
- toil/test/wdl/standard_library/round_as_input.wdl +16 -0
- toil/test/wdl/standard_library/size.json +3 -0
- toil/test/wdl/standard_library/size_as_command.wdl +17 -0
- toil/test/wdl/standard_library/size_as_output.wdl +36 -0
- toil/test/wdl/standard_library/stderr.json +3 -0
- toil/test/wdl/standard_library/stderr_as_output.wdl +30 -0
- toil/test/wdl/standard_library/stdout.json +3 -0
- toil/test/wdl/standard_library/stdout_as_output.wdl +30 -0
- toil/test/wdl/standard_library/sub.json +3 -0
- toil/test/wdl/standard_library/sub_as_input.wdl +17 -0
- toil/test/wdl/standard_library/sub_as_input_with_file.wdl +17 -0
- toil/test/wdl/standard_library/transpose.json +6 -0
- toil/test/wdl/standard_library/transpose_as_input.wdl +18 -0
- toil/test/wdl/standard_library/write_json.json +6 -0
- toil/test/wdl/standard_library/write_json_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_lines.json +7 -0
- toil/test/wdl/standard_library/write_lines_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_map.json +6 -0
- toil/test/wdl/standard_library/write_map_as_command.wdl +17 -0
- toil/test/wdl/standard_library/write_tsv.json +6 -0
- toil/test/wdl/standard_library/write_tsv_as_command.wdl +17 -0
- toil/test/wdl/standard_library/zip.json +12 -0
- toil/test/wdl/standard_library/zip_as_input.wdl +19 -0
- toil/test/wdl/test.csv +3 -0
- toil/test/wdl/test.tsv +3 -0
- toil/test/wdl/testfiles/croo.wdl +38 -0
- toil/test/wdl/testfiles/drop_files.wdl +62 -0
- toil/test/wdl/testfiles/drop_files_subworkflow.wdl +13 -0
- toil/test/wdl/testfiles/empty.txt +0 -0
- toil/test/wdl/testfiles/not_enough_outputs.wdl +33 -0
- toil/test/wdl/testfiles/random.wdl +66 -0
- toil/test/wdl/testfiles/read_file.wdl +18 -0
- toil/test/wdl/testfiles/string_file_coercion.json +1 -0
- toil/test/wdl/testfiles/string_file_coercion.wdl +35 -0
- toil/test/wdl/testfiles/test.json +4 -0
- toil/test/wdl/testfiles/test_boolean.txt +1 -0
- toil/test/wdl/testfiles/test_float.txt +1 -0
- toil/test/wdl/testfiles/test_int.txt +1 -0
- toil/test/wdl/testfiles/test_lines.txt +5 -0
- toil/test/wdl/testfiles/test_map.txt +2 -0
- toil/test/wdl/testfiles/test_string.txt +1 -0
- toil/test/wdl/testfiles/url_to_file.wdl +13 -0
- toil/test/wdl/testfiles/url_to_optional_file.wdl +14 -0
- toil/test/wdl/testfiles/vocab.json +1 -0
- toil/test/wdl/testfiles/vocab.wdl +66 -0
- toil/test/wdl/testfiles/wait.wdl +34 -0
- toil/test/wdl/wdl_specification/type_pair.json +23 -0
- toil/test/wdl/wdl_specification/type_pair_basic.wdl +36 -0
- toil/test/wdl/wdl_specification/type_pair_with_files.wdl +36 -0
- toil/test/wdl/wdl_specification/v1_spec.json +1 -0
- toil/test/wdl/wdl_specification/v1_spec_declaration.wdl +39 -0
- toil/test/wdl/wdltoil_test.py +751 -529
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/utils/toilSshCluster.py +23 -0
- toil/utils/toilUpdateEC2Instances.py +1 -0
- toil/version.py +5 -5
- toil/wdl/wdltoil.py +518 -437
- toil/worker.py +11 -6
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/METADATA +25 -24
- toil-9.0.0.dist-info/RECORD +444 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/WHEEL +1 -1
- toil-8.1.0b1.dist-info/RECORD +0 -259
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/entry_points.txt +0 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info/licenses}/LICENSE +0 -0
- {toil-8.1.0b1.dist-info → toil-9.0.0.dist-info}/top_level.txt +0 -0
toil/common.py
CHANGED
|
@@ -74,6 +74,7 @@ from toil.lib.compatibility import deprecated
|
|
|
74
74
|
from toil.lib.history import HistoryManager
|
|
75
75
|
from toil.lib.history_submission import ask_user_about_publishing_metrics, create_history_submission, create_current_submission
|
|
76
76
|
from toil.lib.io import AtomicFileCreate, try_path, get_toil_home
|
|
77
|
+
from toil.lib.misc import StrPath
|
|
77
78
|
from toil.lib.memoize import memoize
|
|
78
79
|
from toil.lib.retry import retry
|
|
79
80
|
from toil.lib.threading import ensure_filesystem_lockable
|
|
@@ -85,6 +86,7 @@ from toil.provisioners import add_provisioner_options, cluster_factory
|
|
|
85
86
|
from toil.realtimeLogger import RealtimeLogger
|
|
86
87
|
from toil.statsAndLogging import add_logging_options, set_logging_from_options
|
|
87
88
|
from toil.version import dockerRegistry, dockerTag, version, baseVersion
|
|
89
|
+
from toil.lib.url import URLAccess
|
|
88
90
|
|
|
89
91
|
if TYPE_CHECKING:
|
|
90
92
|
from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
|
|
@@ -126,6 +128,8 @@ class Config:
|
|
|
126
128
|
kubernetes_service_account: Optional[str]
|
|
127
129
|
kubernetes_pod_timeout: float
|
|
128
130
|
kubernetes_privileged: bool
|
|
131
|
+
kubernetes_pod_security_context: Optional[str]
|
|
132
|
+
kubernetes_security_context: Optional[str]
|
|
129
133
|
tes_endpoint: str
|
|
130
134
|
tes_user: str
|
|
131
135
|
tes_password: str
|
|
@@ -138,7 +142,7 @@ class Config:
|
|
|
138
142
|
batch_logs_dir: Optional[str]
|
|
139
143
|
"""The backing scheduler will be instructed, if possible, to save logs
|
|
140
144
|
to this directory, where the leader can read them."""
|
|
141
|
-
statePollingWait:
|
|
145
|
+
statePollingWait: float
|
|
142
146
|
state_polling_timeout: int
|
|
143
147
|
disableAutoDeployment: bool
|
|
144
148
|
|
|
@@ -208,6 +212,7 @@ class Config:
|
|
|
208
212
|
|
|
209
213
|
# Retrying/rescuing jobs
|
|
210
214
|
retryCount: int
|
|
215
|
+
stop_on_first_failure: bool
|
|
211
216
|
enableUnlimitedPreemptibleRetries: bool
|
|
212
217
|
doubleMem: bool
|
|
213
218
|
maxJobDuration: int
|
|
@@ -386,6 +391,7 @@ class Config:
|
|
|
386
391
|
|
|
387
392
|
# Retrying/rescuing jobs
|
|
388
393
|
set_option("retryCount")
|
|
394
|
+
set_option("stop_on_first_failure")
|
|
389
395
|
set_option("enableUnlimitedPreemptibleRetries")
|
|
390
396
|
set_option("doubleMem")
|
|
391
397
|
set_option("maxJobDuration")
|
|
@@ -398,7 +404,7 @@ class Config:
|
|
|
398
404
|
set_option("writeLogsGzip")
|
|
399
405
|
set_option("writeLogsFromAllJobs")
|
|
400
406
|
set_option("write_messages")
|
|
401
|
-
|
|
407
|
+
|
|
402
408
|
# Data Publishing Options
|
|
403
409
|
set_option("publish_workflow_metrics")
|
|
404
410
|
|
|
@@ -444,6 +450,11 @@ class Config:
|
|
|
444
450
|
|
|
445
451
|
self.check_configuration_consistency()
|
|
446
452
|
|
|
453
|
+
# Check for deprecated Toil built-in autoscaling
|
|
454
|
+
# --provisioner is guaranteed to be set
|
|
455
|
+
if self.provisioner is not None and self.batchSystem == "mesos":
|
|
456
|
+
logger.warning("Toil built-in autoscaling with Mesos is deprecated as Mesos is no longer active. Please use Kubernetes-based autoscaling instead.")
|
|
457
|
+
|
|
447
458
|
def check_configuration_consistency(self) -> None:
|
|
448
459
|
"""Old checks that cannot be fit into an action class for argparse"""
|
|
449
460
|
if self.writeLogs and self.writeLogsGzip:
|
|
@@ -540,6 +551,19 @@ def generate_config(filepath: str) -> None:
|
|
|
540
551
|
"enableCaching",
|
|
541
552
|
"disableCaching",
|
|
542
553
|
"version",
|
|
554
|
+
# Toil built-in autoscaling with mesos is deprecated as mesos has not been updated since Python 3.10
|
|
555
|
+
"provisioner",
|
|
556
|
+
"nodeTypes"
|
|
557
|
+
"minNodes",
|
|
558
|
+
"maxNodes",
|
|
559
|
+
"targetTime",
|
|
560
|
+
"betaInertia",
|
|
561
|
+
"scaleInterval",
|
|
562
|
+
"preemtibleCompensation",
|
|
563
|
+
"nodeStorage",
|
|
564
|
+
"nodeStorageOverrides",
|
|
565
|
+
"metrics",
|
|
566
|
+
"assumeZeroOverhead"
|
|
543
567
|
)
|
|
544
568
|
|
|
545
569
|
def create_config_dict_from_parser(parser: ArgumentParser) -> CommentedMap:
|
|
@@ -653,7 +677,7 @@ def update_config(filepath: str, key: str, new_value: Union[str, bool, int, floa
|
|
|
653
677
|
:param key: Setting to set. Must be the command-line option name, not the
|
|
654
678
|
destination variable name.
|
|
655
679
|
"""
|
|
656
|
-
|
|
680
|
+
|
|
657
681
|
yaml = YAML(typ="rt")
|
|
658
682
|
data = yaml.load(open(filepath))
|
|
659
683
|
|
|
@@ -678,6 +702,17 @@ def parser_with_common_options(
|
|
|
678
702
|
prog: Optional[str] = None,
|
|
679
703
|
default_log_level: Optional[int] = None,
|
|
680
704
|
) -> ArgParser:
|
|
705
|
+
"""
|
|
706
|
+
Get a command-line option parser for a Toil subcommand.
|
|
707
|
+
|
|
708
|
+
The returned parser just has basic options (like version reporting and
|
|
709
|
+
logging) used by all Toil subcommands.
|
|
710
|
+
|
|
711
|
+
Toil Python workflows should use
|
|
712
|
+
:meth:`toil.job.Job.Runner.getDefaultArgumentParser` instead, which makes
|
|
713
|
+
sure to add all the important options for actually running a workflow.
|
|
714
|
+
"""
|
|
715
|
+
|
|
681
716
|
parser = ArgParser(
|
|
682
717
|
prog=prog or "Toil", formatter_class=ArgumentDefaultsHelpFormatter
|
|
683
718
|
)
|
|
@@ -781,7 +816,7 @@ def addOptions(
|
|
|
781
816
|
:param typ: string of either "cwl" or "wdl" to specify which runner to check against
|
|
782
817
|
:return: None, raise parser error if option is found
|
|
783
818
|
"""
|
|
784
|
-
check_parser = ArgParser()
|
|
819
|
+
check_parser = ArgParser(allow_abbrev=False)
|
|
785
820
|
if typ == "wdl":
|
|
786
821
|
add_cwl_options(check_parser)
|
|
787
822
|
if typ == "cwl":
|
|
@@ -1381,7 +1416,7 @@ class Toil(ContextManager["Toil"]):
|
|
|
1381
1416
|
self._batchSystem.setUserScript(userScriptResource)
|
|
1382
1417
|
|
|
1383
1418
|
def url_exists(self, src_uri: str) -> bool:
|
|
1384
|
-
return
|
|
1419
|
+
return URLAccess.url_exists(self.normalize_uri(src_uri))
|
|
1385
1420
|
|
|
1386
1421
|
# Importing a file with a shared file name returns None, but without one it
|
|
1387
1422
|
# returns a file ID. Explain this to MyPy.
|
|
@@ -1489,21 +1524,33 @@ class Toil(ContextManager["Toil"]):
|
|
|
1489
1524
|
self._jobStore.export_file(file_id, dst_uri)
|
|
1490
1525
|
|
|
1491
1526
|
@staticmethod
|
|
1492
|
-
def normalize_uri(uri: str, check_existence: bool = False) -> str:
|
|
1527
|
+
def normalize_uri(uri: str, check_existence: bool = False, dir_path: Optional[str] = None) -> str:
|
|
1493
1528
|
"""
|
|
1494
|
-
Given a URI, if it has no scheme,
|
|
1529
|
+
Given a URI, if it has no scheme, make it a properly quoted file: URI.
|
|
1495
1530
|
|
|
1496
1531
|
:param check_existence: If set, raise FileNotFoundError if a URI points to
|
|
1497
1532
|
a local file that does not exist.
|
|
1533
|
+
|
|
1534
|
+
:param dir_path: If specified, interpret relative paths relative to the
|
|
1535
|
+
given directory path instead of the current one.
|
|
1498
1536
|
"""
|
|
1499
|
-
|
|
1537
|
+
|
|
1538
|
+
parsed = urlparse(uri)
|
|
1539
|
+
if parsed.scheme == "file":
|
|
1500
1540
|
uri = unquote(
|
|
1501
|
-
|
|
1541
|
+
parsed.path
|
|
1502
1542
|
) # this should strip off the local file scheme; it will be added back
|
|
1543
|
+
parsed = urlparse(uri)
|
|
1503
1544
|
|
|
1504
1545
|
# account for the scheme-less case, which should be coerced to a local absolute path
|
|
1505
|
-
if
|
|
1506
|
-
|
|
1546
|
+
if parsed.scheme == "":
|
|
1547
|
+
if dir_path is not None:
|
|
1548
|
+
# To support relative paths from a particular directory, join
|
|
1549
|
+
# the directory on. If uri is already an abs path, join() will
|
|
1550
|
+
# not do anything
|
|
1551
|
+
abs_path = os.path.join(dir_path, uri)
|
|
1552
|
+
else:
|
|
1553
|
+
abs_path = os.path.abspath(uri)
|
|
1507
1554
|
if not os.path.exists(abs_path) and check_existence:
|
|
1508
1555
|
raise FileNotFoundError(
|
|
1509
1556
|
f'Could not find local file "{abs_path}" when importing "{uri}".\n'
|
|
@@ -2019,7 +2066,7 @@ def cacheDirName(workflowID: str) -> str:
|
|
|
2019
2066
|
return f"cache-{workflowID}"
|
|
2020
2067
|
|
|
2021
2068
|
|
|
2022
|
-
def getDirSizeRecursively(dirPath:
|
|
2069
|
+
def getDirSizeRecursively(dirPath: StrPath) -> int:
|
|
2023
2070
|
"""
|
|
2024
2071
|
This method will return the cumulative number of bytes occupied by the files
|
|
2025
2072
|
on disk in the directory and its subdirectories.
|
toil/cwl/cwltoil.py
CHANGED
|
@@ -34,7 +34,6 @@ import stat
|
|
|
34
34
|
import sys
|
|
35
35
|
import textwrap
|
|
36
36
|
import uuid
|
|
37
|
-
from collections.abc import Iterator, Mapping, MutableMapping, MutableSequence
|
|
38
37
|
from tempfile import NamedTemporaryFile, TemporaryFile, gettempdir
|
|
39
38
|
from threading import Thread
|
|
40
39
|
from typing import (
|
|
@@ -122,6 +121,7 @@ from toil.cwl.utils import (
|
|
|
122
121
|
download_structure,
|
|
123
122
|
get_from_structure,
|
|
124
123
|
visit_cwl_class_and_reduce,
|
|
124
|
+
remove_redundant_mounts
|
|
125
125
|
)
|
|
126
126
|
from toil.exceptions import FailedJobsException
|
|
127
127
|
from toil.fileStores import FileID
|
|
@@ -149,6 +149,7 @@ from toil.jobStores.utils import JobStoreUnavailableException, generate_locator
|
|
|
149
149
|
from toil.lib.io import mkdtemp
|
|
150
150
|
from toil.lib.threading import ExceptionalThread, global_mutex
|
|
151
151
|
from toil.statsAndLogging import DEFAULT_LOGLEVEL
|
|
152
|
+
from toil.lib.url import URLAccess
|
|
152
153
|
|
|
153
154
|
logger = logging.getLogger(__name__)
|
|
154
155
|
|
|
@@ -1395,7 +1396,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1395
1396
|
destination = path
|
|
1396
1397
|
else:
|
|
1397
1398
|
# The destination is something else.
|
|
1398
|
-
if
|
|
1399
|
+
if URLAccess.get_is_directory(path):
|
|
1399
1400
|
# Treat this as a directory
|
|
1400
1401
|
if path not in self.dir_to_download:
|
|
1401
1402
|
logger.debug(
|
|
@@ -1405,14 +1406,14 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1405
1406
|
|
|
1406
1407
|
# Recursively fetch all the files in the directory.
|
|
1407
1408
|
def download_to(url: str, dest: str) -> None:
|
|
1408
|
-
if
|
|
1409
|
+
if URLAccess.get_is_directory(url):
|
|
1409
1410
|
os.mkdir(dest)
|
|
1410
|
-
for part in
|
|
1411
|
+
for part in URLAccess.list_url(url):
|
|
1411
1412
|
download_to(
|
|
1412
1413
|
os.path.join(url, part), os.path.join(dest, part)
|
|
1413
1414
|
)
|
|
1414
1415
|
else:
|
|
1415
|
-
|
|
1416
|
+
URLAccess.read_from_url(url, open(dest, "wb"))
|
|
1416
1417
|
|
|
1417
1418
|
download_to(path, dest_dir)
|
|
1418
1419
|
self.dir_to_download[path] = dest_dir
|
|
@@ -1425,7 +1426,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1425
1426
|
# Try to grab it with a jobstore implementation, and save it
|
|
1426
1427
|
# somewhere arbitrary.
|
|
1427
1428
|
dest_file = NamedTemporaryFile(delete=False)
|
|
1428
|
-
|
|
1429
|
+
URLAccess.read_from_url(path, dest_file)
|
|
1429
1430
|
dest_file.close()
|
|
1430
1431
|
self.dir_to_download[path] = dest_file.name
|
|
1431
1432
|
destination = self.dir_to_download[path]
|
|
@@ -1483,7 +1484,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1483
1484
|
return open(self._abs(fn), mode)
|
|
1484
1485
|
else:
|
|
1485
1486
|
# This should be supported by a job store.
|
|
1486
|
-
byte_stream =
|
|
1487
|
+
byte_stream = URLAccess.open_url(fn)
|
|
1487
1488
|
if "b" in mode:
|
|
1488
1489
|
# Pass stream along in binary
|
|
1489
1490
|
return byte_stream
|
|
@@ -1520,7 +1521,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1520
1521
|
return True
|
|
1521
1522
|
else:
|
|
1522
1523
|
# This should be supported by a job store.
|
|
1523
|
-
return
|
|
1524
|
+
return URLAccess.url_exists(path)
|
|
1524
1525
|
|
|
1525
1526
|
def size(self, path: str) -> int:
|
|
1526
1527
|
parse = urlparse(path)
|
|
@@ -1549,7 +1550,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1549
1550
|
)
|
|
1550
1551
|
else:
|
|
1551
1552
|
# This should be supported by a job store.
|
|
1552
|
-
size =
|
|
1553
|
+
size = URLAccess.get_size(path)
|
|
1553
1554
|
if size is None:
|
|
1554
1555
|
# get_size can be unimplemented or unavailable
|
|
1555
1556
|
raise RuntimeError(f"Could not get size of {path}")
|
|
@@ -1572,7 +1573,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1572
1573
|
# TODO: we assume CWL can't call deleteGlobalFile and so the file always exists
|
|
1573
1574
|
return isinstance(found, str)
|
|
1574
1575
|
else:
|
|
1575
|
-
return self.exists(fn) and not
|
|
1576
|
+
return self.exists(fn) and not URLAccess.get_is_directory(fn)
|
|
1576
1577
|
|
|
1577
1578
|
def isdir(self, fn: str) -> bool:
|
|
1578
1579
|
logger.debug("ToilFsAccess checking type of %s", fn)
|
|
@@ -1592,7 +1593,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1592
1593
|
# TODO: We assume directories can't be deleted.
|
|
1593
1594
|
return isinstance(found, dict)
|
|
1594
1595
|
else:
|
|
1595
|
-
status =
|
|
1596
|
+
status = URLAccess.get_is_directory(fn)
|
|
1596
1597
|
logger.debug("AbstractJobStore said: %s", status)
|
|
1597
1598
|
return status
|
|
1598
1599
|
|
|
@@ -1626,7 +1627,7 @@ class ToilFsAccess(StdFsAccess):
|
|
|
1626
1627
|
else:
|
|
1627
1628
|
return [
|
|
1628
1629
|
os.path.join(fn, entry.rstrip("/"))
|
|
1629
|
-
for entry in
|
|
1630
|
+
for entry in URLAccess.list_url(fn)
|
|
1630
1631
|
]
|
|
1631
1632
|
|
|
1632
1633
|
def join(self, path: str, *paths: str) -> str:
|
|
@@ -1736,7 +1737,7 @@ def toil_get_file(
|
|
|
1736
1737
|
pipe.write(data)
|
|
1737
1738
|
else:
|
|
1738
1739
|
# Stream from some other URI
|
|
1739
|
-
|
|
1740
|
+
URLAccess.read_from_url(uri, pipe)
|
|
1740
1741
|
except OSError as e:
|
|
1741
1742
|
# The other side of the pipe may have been closed by the
|
|
1742
1743
|
# reading thread, which is OK.
|
|
@@ -1779,7 +1780,7 @@ def toil_get_file(
|
|
|
1779
1780
|
# Open that path exclusively to make sure we created it
|
|
1780
1781
|
with open(src_path, "xb") as fh:
|
|
1781
1782
|
# Download into the file
|
|
1782
|
-
size, executable =
|
|
1783
|
+
size, executable = URLAccess.read_from_url(uri, fh)
|
|
1783
1784
|
if executable:
|
|
1784
1785
|
# Set the execute bit in the file's permissions
|
|
1785
1786
|
os.chmod(src_path, os.stat(src_path).st_mode | stat.S_IXUSR)
|
|
@@ -2583,7 +2584,7 @@ class CWLJob(CWLNamedJob):
|
|
|
2583
2584
|
resources={},
|
|
2584
2585
|
mutation_manager=runtime_context.mutation_manager,
|
|
2585
2586
|
formatgraph=tool.formatgraph,
|
|
2586
|
-
make_fs_access=
|
|
2587
|
+
make_fs_access=runtime_context.make_fs_access,
|
|
2587
2588
|
fs_access=runtime_context.make_fs_access(""),
|
|
2588
2589
|
job_script_provider=runtime_context.job_script_provider,
|
|
2589
2590
|
timeout=runtime_context.eval_timeout,
|
|
@@ -2613,6 +2614,12 @@ class CWLJob(CWLNamedJob):
|
|
|
2613
2614
|
else:
|
|
2614
2615
|
# We use a None requirement and the Toil default applies.
|
|
2615
2616
|
memory = None
|
|
2617
|
+
|
|
2618
|
+
# Imposing a minimum memory limit
|
|
2619
|
+
min_ram = getattr(runtime_context, "cwl_min_ram")
|
|
2620
|
+
if min_ram is not None and memory is not None:
|
|
2621
|
+
# Note: if the job is using the toil default memory, it won't be increased
|
|
2622
|
+
memory = max(memory, min_ram)
|
|
2616
2623
|
|
|
2617
2624
|
accelerators: Optional[list[AcceleratorRequirement]] = None
|
|
2618
2625
|
if req.get("cudaDeviceCount", 0) > 0:
|
|
@@ -2751,6 +2758,9 @@ class CWLJob(CWLNamedJob):
|
|
|
2751
2758
|
|
|
2752
2759
|
cwljob = resolve_dict_w_promises(self.cwljob, file_store)
|
|
2753
2760
|
|
|
2761
|
+
# Deletes duplicate listings
|
|
2762
|
+
remove_redundant_mounts(cwljob)
|
|
2763
|
+
|
|
2754
2764
|
if self.conditional.is_false(cwljob):
|
|
2755
2765
|
return self.conditional.skipped_outputs()
|
|
2756
2766
|
|
|
@@ -2979,29 +2989,28 @@ def makeRootJob(
|
|
|
2979
2989
|
# This will consist of files that we were not able to get a file size for
|
|
2980
2990
|
leader_metadata = dict()
|
|
2981
2991
|
for filename, file_data in metadata.items():
|
|
2982
|
-
if file_data
|
|
2992
|
+
if file_data[2] is None: # size
|
|
2983
2993
|
leader_metadata[filename] = file_data
|
|
2984
2994
|
else:
|
|
2985
2995
|
worker_metadata[filename] = file_data
|
|
2986
2996
|
|
|
2997
|
+
if worker_metadata:
|
|
2998
|
+
logger.info(
|
|
2999
|
+
"Planning to import %s files on workers",
|
|
3000
|
+
len(worker_metadata),
|
|
3001
|
+
)
|
|
3002
|
+
|
|
2987
3003
|
# import the files for the leader first
|
|
2988
3004
|
path_to_fileid = WorkerImportJob.import_files(
|
|
2989
3005
|
list(leader_metadata.keys()), toil._jobStore
|
|
2990
3006
|
)
|
|
2991
3007
|
|
|
2992
|
-
#
|
|
2993
|
-
#
|
|
2994
|
-
|
|
2995
|
-
initialized_job_order,
|
|
2996
|
-
tool,
|
|
2997
|
-
path_to_fileid,
|
|
2998
|
-
options.basedir,
|
|
2999
|
-
options.reference_inputs,
|
|
3000
|
-
options.bypass_file_store,
|
|
3001
|
-
)
|
|
3008
|
+
# Because installing the imported files expects all files to have been
|
|
3009
|
+
# imported, we don't do that here; we combine the leader imports and
|
|
3010
|
+
# the worker imports and install them all at once.
|
|
3002
3011
|
|
|
3003
3012
|
import_job = CWLImportWrapper(
|
|
3004
|
-
initialized_job_order, tool, runtime_context, worker_metadata, options
|
|
3013
|
+
initialized_job_order, tool, runtime_context, worker_metadata, path_to_fileid, options
|
|
3005
3014
|
)
|
|
3006
3015
|
return import_job
|
|
3007
3016
|
else:
|
|
@@ -3573,7 +3582,7 @@ class CWLInstallImportsJob(Job):
|
|
|
3573
3582
|
basedir: str,
|
|
3574
3583
|
skip_remote: bool,
|
|
3575
3584
|
bypass_file_store: bool,
|
|
3576
|
-
import_data: Promised[dict[str, FileID]],
|
|
3585
|
+
import_data: list[Promised[dict[str, FileID]]],
|
|
3577
3586
|
**kwargs: Any,
|
|
3578
3587
|
) -> None:
|
|
3579
3588
|
"""
|
|
@@ -3581,6 +3590,8 @@ class CWLInstallImportsJob(Job):
|
|
|
3581
3590
|
to convert all file locations to URIs.
|
|
3582
3591
|
|
|
3583
3592
|
This class is only used when runImportsOnWorkers is enabled.
|
|
3593
|
+
|
|
3594
|
+
:param import_data: List of mappings from file URI to imported file ID.
|
|
3584
3595
|
"""
|
|
3585
3596
|
super().__init__(local=True, **kwargs)
|
|
3586
3597
|
self.initialized_job_order = initialized_job_order
|
|
@@ -3590,6 +3601,8 @@ class CWLInstallImportsJob(Job):
|
|
|
3590
3601
|
self.bypass_file_store = bypass_file_store
|
|
3591
3602
|
self.import_data = import_data
|
|
3592
3603
|
|
|
3604
|
+
# TODO: Since we only call this from the class itself now it doesn't really
|
|
3605
|
+
# need to be static anymore.
|
|
3593
3606
|
@staticmethod
|
|
3594
3607
|
def fill_in_files(
|
|
3595
3608
|
initialized_job_order: CWLObjectType,
|
|
@@ -3607,7 +3620,12 @@ class CWLInstallImportsJob(Job):
|
|
|
3607
3620
|
"""
|
|
3608
3621
|
Return the file name's associated Toil file ID
|
|
3609
3622
|
"""
|
|
3610
|
-
|
|
3623
|
+
try:
|
|
3624
|
+
return candidate_to_fileid[filename]
|
|
3625
|
+
except KeyError:
|
|
3626
|
+
# Give something more useful than a KeyError if something went
|
|
3627
|
+
# wrong with the importing.
|
|
3628
|
+
raise RuntimeError(f"File at \"{filename}\" was never imported.")
|
|
3611
3629
|
|
|
3612
3630
|
file_convert_function = functools.partial(
|
|
3613
3631
|
extract_and_convert_file_to_toil_uri, fill_in_file
|
|
@@ -3654,11 +3672,19 @@ class CWLInstallImportsJob(Job):
|
|
|
3654
3672
|
Convert the filenames in the workflow inputs into the URIs
|
|
3655
3673
|
:return: Promise of transformed workflow inputs. A tuple of the job order and process
|
|
3656
3674
|
"""
|
|
3657
|
-
|
|
3675
|
+
|
|
3676
|
+
# Merge all the input dicts down to one to check.
|
|
3677
|
+
candidate_to_fileid: dict[str, FileID] = {
|
|
3678
|
+
k: v for mapping in unwrap(
|
|
3679
|
+
self.import_data
|
|
3680
|
+
) for k, v in unwrap(mapping).items()
|
|
3681
|
+
}
|
|
3658
3682
|
|
|
3659
3683
|
initialized_job_order = unwrap(self.initialized_job_order)
|
|
3660
3684
|
tool = unwrap(self.tool)
|
|
3661
|
-
|
|
3685
|
+
|
|
3686
|
+
# Install the imported files in the tool and job order
|
|
3687
|
+
return self.fill_in_files(
|
|
3662
3688
|
initialized_job_order,
|
|
3663
3689
|
tool,
|
|
3664
3690
|
candidate_to_fileid,
|
|
@@ -3682,33 +3708,46 @@ class CWLImportWrapper(CWLNamedJob):
|
|
|
3682
3708
|
tool: Process,
|
|
3683
3709
|
runtime_context: cwltool.context.RuntimeContext,
|
|
3684
3710
|
file_to_data: dict[str, FileMetadata],
|
|
3711
|
+
imported_files: dict[str, FileID],
|
|
3685
3712
|
options: Namespace,
|
|
3686
3713
|
):
|
|
3687
|
-
|
|
3714
|
+
"""
|
|
3715
|
+
Make a job to do file imports on workers and then run the workflow.
|
|
3716
|
+
|
|
3717
|
+
:param file_to_data: Metadata for files that need to be imported on the
|
|
3718
|
+
worker.
|
|
3719
|
+
:param imported_files: Files already imported on the leader.
|
|
3720
|
+
"""
|
|
3721
|
+
super().__init__(local=False, disk=options.import_workers_batchsize)
|
|
3688
3722
|
self.initialized_job_order = initialized_job_order
|
|
3689
3723
|
self.tool = tool
|
|
3690
|
-
self.options = options
|
|
3691
3724
|
self.runtime_context = runtime_context
|
|
3692
3725
|
self.file_to_data = file_to_data
|
|
3726
|
+
self.imported_files = imported_files
|
|
3727
|
+
self.options = options
|
|
3693
3728
|
|
|
3694
3729
|
def run(self, file_store: AbstractFileStore) -> Any:
|
|
3730
|
+
# Do the worker-based imports
|
|
3695
3731
|
imports_job = ImportsJob(
|
|
3696
3732
|
self.file_to_data,
|
|
3697
|
-
self.options.
|
|
3733
|
+
self.options.import_workers_batchsize,
|
|
3698
3734
|
self.options.import_workers_disk,
|
|
3699
3735
|
)
|
|
3700
3736
|
self.addChild(imports_job)
|
|
3737
|
+
|
|
3738
|
+
# Install the worker imports and any leader imports
|
|
3701
3739
|
install_imports_job = CWLInstallImportsJob(
|
|
3702
3740
|
initialized_job_order=self.initialized_job_order,
|
|
3703
3741
|
tool=self.tool,
|
|
3704
3742
|
basedir=self.options.basedir,
|
|
3705
3743
|
skip_remote=self.options.reference_inputs,
|
|
3706
3744
|
bypass_file_store=self.options.bypass_file_store,
|
|
3707
|
-
import_data=imports_job.rv(0),
|
|
3745
|
+
import_data=[self.imported_files, imports_job.rv(0)],
|
|
3708
3746
|
)
|
|
3709
3747
|
self.addChild(install_imports_job)
|
|
3710
3748
|
imports_job.addFollowOn(install_imports_job)
|
|
3711
3749
|
|
|
3750
|
+
# Run the workflow
|
|
3712
3751
|
start_job = CWLStartJob(
|
|
3713
3752
|
install_imports_job.rv(0),
|
|
3714
3753
|
install_imports_job.rv(1),
|
|
@@ -4212,6 +4251,8 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4212
4251
|
options.tmpdir_prefix or DEFAULT_TMPDIR_PREFIX
|
|
4213
4252
|
)
|
|
4214
4253
|
tmp_outdir_prefix = options.tmp_outdir_prefix or tmpdir_prefix
|
|
4254
|
+
# tmpdir_prefix and tmp_outdir_prefix must not be checked for existence as they may exist on a worker only path
|
|
4255
|
+
# See https://github.com/DataBiosphere/toil/issues/5310
|
|
4215
4256
|
workdir = options.workDir or tmp_outdir_prefix
|
|
4216
4257
|
|
|
4217
4258
|
if options.jobStore is None:
|
|
@@ -4262,6 +4303,7 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4262
4303
|
runtime_context.workdir = workdir # type: ignore[attr-defined]
|
|
4263
4304
|
runtime_context.outdir = outdir
|
|
4264
4305
|
setattr(runtime_context, "cwl_default_ram", options.cwl_default_ram)
|
|
4306
|
+
setattr(runtime_context, "cwl_min_ram", options.cwl_min_ram)
|
|
4265
4307
|
runtime_context.move_outputs = "leave"
|
|
4266
4308
|
runtime_context.rm_tmpdir = False
|
|
4267
4309
|
runtime_context.streaming_allowed = not options.disable_streaming
|
|
@@ -4272,11 +4314,12 @@ def main(args: Optional[list[str]] = None, stdout: TextIO = sys.stdout) -> int:
|
|
|
4272
4314
|
# of filestore files and caches those.
|
|
4273
4315
|
logger.debug("CWL task caching is turned on. Bypassing file store.")
|
|
4274
4316
|
options.bypass_file_store = True
|
|
4317
|
+
|
|
4318
|
+
# Ensure the cache directory exists
|
|
4319
|
+
# Only ensure the caching directory exists as that must be local.
|
|
4320
|
+
os.makedirs(os.path.abspath(options.cachedir), exist_ok=True)
|
|
4275
4321
|
if options.mpi_config_file is not None:
|
|
4276
4322
|
runtime_context.mpi_config = MpiConfig.load(options.mpi_config_file)
|
|
4277
|
-
if cwltool.main.check_working_directories(runtime_context) is not None:
|
|
4278
|
-
logger.error("Failed to create directory. If using tmpdir_prefix, tmpdir_outdir_prefix, or cachedir, consider changing directory locations.")
|
|
4279
|
-
return 1
|
|
4280
4323
|
setattr(runtime_context, "bypass_file_store", options.bypass_file_store)
|
|
4281
4324
|
if options.bypass_file_store and options.destBucket:
|
|
4282
4325
|
# We use the file store to write to buckets, so we can't do this (yet?)
|
toil/cwl/utils.py
CHANGED
|
@@ -20,11 +20,26 @@ import posixpath
|
|
|
20
20
|
import stat
|
|
21
21
|
from collections.abc import Iterable, MutableMapping, MutableSequence
|
|
22
22
|
from pathlib import PurePosixPath
|
|
23
|
-
from typing import
|
|
24
|
-
|
|
23
|
+
from typing import (
|
|
24
|
+
Any,
|
|
25
|
+
Callable,
|
|
26
|
+
TypeVar,
|
|
27
|
+
Union,
|
|
28
|
+
Optional,
|
|
29
|
+
cast,
|
|
30
|
+
MutableSequence,
|
|
31
|
+
MutableMapping,
|
|
32
|
+
TYPE_CHECKING,
|
|
33
|
+
)
|
|
34
|
+
from urllib.parse import unquote, urlparse
|
|
35
|
+
|
|
36
|
+
if TYPE_CHECKING:
|
|
37
|
+
# This module needs to be importable even if cwltool is not installed.
|
|
38
|
+
from cwltool.utils import CWLObjectType, CWLOutputType
|
|
25
39
|
from toil.fileStores import FileID
|
|
26
40
|
from toil.fileStores.abstractFileStore import AbstractFileStore
|
|
27
41
|
from toil.jobStores.abstractJobStore import AbstractJobStore
|
|
42
|
+
from toil.lib.url import URLAccess
|
|
28
43
|
|
|
29
44
|
logger = logging.getLogger(__name__)
|
|
30
45
|
|
|
@@ -208,7 +223,7 @@ def download_structure(
|
|
|
208
223
|
)
|
|
209
224
|
else:
|
|
210
225
|
# We need to download from some other kind of URL.
|
|
211
|
-
size, executable =
|
|
226
|
+
size, executable = URLAccess.read_from_url(
|
|
212
227
|
value, open(dest_path, "wb")
|
|
213
228
|
)
|
|
214
229
|
if executable:
|
|
@@ -219,3 +234,88 @@ def download_structure(
|
|
|
219
234
|
# TODO: why?
|
|
220
235
|
index[dest_path] = value
|
|
221
236
|
existing[value] = dest_path
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def trim_mounts_op_down(file_or_directory: "CWLObjectType") -> None:
|
|
240
|
+
"""
|
|
241
|
+
No-op function for mount-point trimming.
|
|
242
|
+
"""
|
|
243
|
+
return
|
|
244
|
+
|
|
245
|
+
|
|
246
|
+
def sniff_location(file_or_directory: "CWLObjectType") -> Optional[str]:
|
|
247
|
+
"""
|
|
248
|
+
Get the local bare path for a CWL file or directory, or None.
|
|
249
|
+
|
|
250
|
+
:return: None if we don't have a local path or file URI
|
|
251
|
+
"""
|
|
252
|
+
if file_or_directory.get('location') is None and file_or_directory.get('path') is None:
|
|
253
|
+
# file or directory is defined by contents or listing respectively, this is not redundant
|
|
254
|
+
return None
|
|
255
|
+
# Since we only consider mountable paths, if path is not file URI or bare path, don't consider it
|
|
256
|
+
path_or_url = cast(str, file_or_directory.get('location') or file_or_directory.get('path'))
|
|
257
|
+
parsed = urlparse(path_or_url)
|
|
258
|
+
if parsed.scheme == 'file':
|
|
259
|
+
return unquote(parsed.path)
|
|
260
|
+
elif parsed.scheme == '':
|
|
261
|
+
return path_or_url
|
|
262
|
+
else:
|
|
263
|
+
return None
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def trim_mounts_op_up(file_or_directory: "CWLObjectType", op_down_ret: None, child_results: list[bool]) -> bool:
|
|
267
|
+
"""
|
|
268
|
+
Remove subtrees of the CWL file or directory object tree that only have redundant stuff in them.
|
|
269
|
+
|
|
270
|
+
Nonredundant for something in a directory means its path or location is not within the parent directory or doesn't match its basename
|
|
271
|
+
Nonredundant for something in a secondary file means its path or location is not adjacent to the primary file or doesn't match its basename
|
|
272
|
+
|
|
273
|
+
If on a File:
|
|
274
|
+
Returns True if anything in secondary files is nonredundant or has nonredundant children to this file, false otherwise
|
|
275
|
+
If on a Directory:
|
|
276
|
+
Returns True if anything in top level listing is nonredundant or has nonredundant children, otherwise false.
|
|
277
|
+
If something in the listing is redundant and all children are redundant, then delete it
|
|
278
|
+
:param file_or_directory: CWL file or CWL directory type
|
|
279
|
+
:return: boolean
|
|
280
|
+
"""
|
|
281
|
+
own_path = sniff_location(file_or_directory)
|
|
282
|
+
if own_path is None:
|
|
283
|
+
return True
|
|
284
|
+
# basename should be set as we are the implementation
|
|
285
|
+
own_basename = cast(str, file_or_directory['basename'])
|
|
286
|
+
|
|
287
|
+
# If the basename does not match the path, then this is nonredundant
|
|
288
|
+
if not own_path.endswith("/" + own_basename):
|
|
289
|
+
return True
|
|
290
|
+
|
|
291
|
+
if file_or_directory['class'] == 'File':
|
|
292
|
+
if any(child_results):
|
|
293
|
+
# one of the children was detected as not redundant
|
|
294
|
+
return True
|
|
295
|
+
for secondary in cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('secondaryFiles', [])):
|
|
296
|
+
# secondary files should already be flagged nonredundant if they don't have either a path or location
|
|
297
|
+
secondary_path = sniff_location(secondary)
|
|
298
|
+
secondary_basename = cast(str, secondary['basename'])
|
|
299
|
+
# If we swap the secondary basename for the primary basename in the primary path, and they don't match, then they are nonredundant
|
|
300
|
+
if os.path.join(own_path[:-len(own_basename)], secondary_basename) != secondary_path:
|
|
301
|
+
return True
|
|
302
|
+
else:
|
|
303
|
+
listings = cast(MutableSequence[MutableMapping[str, "CWLOutputType"]], file_or_directory.get('listing', []))
|
|
304
|
+
if len(listings) == 0:
|
|
305
|
+
return False
|
|
306
|
+
# We assume child_results is in the same order as the directory listing
|
|
307
|
+
# iterate backwards to avoid iteration issues
|
|
308
|
+
for i in range(len(listings) - 1, -1, -1):
|
|
309
|
+
if child_results[i] is False:
|
|
310
|
+
if os.path.join(own_path, cast(str, listings[i]['basename'])) == sniff_location(listings[i]):
|
|
311
|
+
del listings[i]
|
|
312
|
+
# If one of the listings was nonredundant, then this directory is also nonredundant
|
|
313
|
+
if any(child_results):
|
|
314
|
+
return True
|
|
315
|
+
return False
|
|
316
|
+
|
|
317
|
+
def remove_redundant_mounts(cwljob: "CWLObjectType") -> None:
|
|
318
|
+
"""
|
|
319
|
+
Remove any redundant mount points from the listing. Modifies the CWL object in place.
|
|
320
|
+
"""
|
|
321
|
+
visit_cwl_class_and_reduce(cwljob, ["Directory", "File"], trim_mounts_op_down, trim_mounts_op_up)
|