toil 9.1.2__py3-none-any.whl → 9.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- toil/__init__.py +5 -9
- toil/batchSystems/abstractBatchSystem.py +23 -22
- toil/batchSystems/abstractGridEngineBatchSystem.py +17 -12
- toil/batchSystems/awsBatch.py +8 -8
- toil/batchSystems/cleanup_support.py +4 -4
- toil/batchSystems/contained_executor.py +3 -3
- toil/batchSystems/gridengine.py +3 -4
- toil/batchSystems/htcondor.py +5 -5
- toil/batchSystems/kubernetes.py +65 -63
- toil/batchSystems/local_support.py +2 -3
- toil/batchSystems/lsf.py +6 -7
- toil/batchSystems/mesos/batchSystem.py +11 -7
- toil/batchSystems/mesos/test/__init__.py +1 -2
- toil/batchSystems/options.py +9 -10
- toil/batchSystems/registry.py +3 -7
- toil/batchSystems/singleMachine.py +8 -11
- toil/batchSystems/slurm.py +49 -38
- toil/batchSystems/torque.py +3 -4
- toil/bus.py +36 -34
- toil/common.py +129 -89
- toil/cwl/cwltoil.py +857 -729
- toil/cwl/utils.py +44 -35
- toil/fileStores/__init__.py +3 -1
- toil/fileStores/abstractFileStore.py +28 -30
- toil/fileStores/cachingFileStore.py +8 -8
- toil/fileStores/nonCachingFileStore.py +10 -21
- toil/job.py +159 -158
- toil/jobStores/abstractJobStore.py +68 -69
- toil/jobStores/aws/jobStore.py +249 -213
- toil/jobStores/aws/utils.py +13 -24
- toil/jobStores/fileJobStore.py +28 -22
- toil/jobStores/googleJobStore.py +21 -17
- toil/jobStores/utils.py +3 -7
- toil/leader.py +14 -14
- toil/lib/accelerators.py +6 -4
- toil/lib/aws/__init__.py +9 -10
- toil/lib/aws/ami.py +33 -19
- toil/lib/aws/iam.py +6 -6
- toil/lib/aws/s3.py +259 -157
- toil/lib/aws/session.py +76 -76
- toil/lib/aws/utils.py +51 -43
- toil/lib/checksum.py +19 -15
- toil/lib/compatibility.py +3 -2
- toil/lib/conversions.py +45 -18
- toil/lib/directory.py +29 -26
- toil/lib/docker.py +93 -99
- toil/lib/dockstore.py +77 -50
- toil/lib/ec2.py +39 -38
- toil/lib/ec2nodes.py +11 -4
- toil/lib/exceptions.py +8 -5
- toil/lib/ftp_utils.py +9 -14
- toil/lib/generatedEC2Lists.py +161 -20
- toil/lib/history.py +141 -97
- toil/lib/history_submission.py +163 -72
- toil/lib/io.py +27 -17
- toil/lib/memoize.py +2 -1
- toil/lib/misc.py +15 -11
- toil/lib/pipes.py +40 -25
- toil/lib/plugins.py +12 -8
- toil/lib/resources.py +1 -0
- toil/lib/retry.py +32 -38
- toil/lib/threading.py +12 -12
- toil/lib/throttle.py +1 -2
- toil/lib/trs.py +113 -51
- toil/lib/url.py +14 -23
- toil/lib/web.py +7 -2
- toil/options/common.py +18 -15
- toil/options/cwl.py +2 -2
- toil/options/runner.py +9 -5
- toil/options/wdl.py +1 -3
- toil/provisioners/__init__.py +9 -9
- toil/provisioners/abstractProvisioner.py +22 -20
- toil/provisioners/aws/__init__.py +20 -14
- toil/provisioners/aws/awsProvisioner.py +10 -8
- toil/provisioners/clusterScaler.py +19 -18
- toil/provisioners/gceProvisioner.py +2 -3
- toil/provisioners/node.py +11 -13
- toil/realtimeLogger.py +4 -4
- toil/resource.py +5 -5
- toil/server/app.py +2 -2
- toil/server/cli/wes_cwl_runner.py +11 -11
- toil/server/utils.py +18 -21
- toil/server/wes/abstract_backend.py +9 -8
- toil/server/wes/amazon_wes_utils.py +3 -3
- toil/server/wes/tasks.py +3 -5
- toil/server/wes/toil_backend.py +17 -21
- toil/server/wsgi_app.py +3 -3
- toil/serviceManager.py +3 -4
- toil/statsAndLogging.py +12 -13
- toil/test/__init__.py +33 -24
- toil/test/batchSystems/batchSystemTest.py +12 -11
- toil/test/batchSystems/batch_system_plugin_test.py +3 -5
- toil/test/batchSystems/test_slurm.py +38 -24
- toil/test/cwl/conftest.py +5 -6
- toil/test/cwl/cwlTest.py +194 -78
- toil/test/cwl/download_file_uri.json +6 -0
- toil/test/cwl/download_file_uri_no_hostname.json +6 -0
- toil/test/docs/scripts/tutorial_staging.py +1 -0
- toil/test/jobStores/jobStoreTest.py +9 -7
- toil/test/lib/aws/test_iam.py +1 -3
- toil/test/lib/aws/test_s3.py +1 -1
- toil/test/lib/dockerTest.py +9 -9
- toil/test/lib/test_ec2.py +12 -11
- toil/test/lib/test_history.py +4 -4
- toil/test/lib/test_trs.py +16 -14
- toil/test/lib/test_url.py +7 -6
- toil/test/lib/url_plugin_test.py +12 -18
- toil/test/provisioners/aws/awsProvisionerTest.py +10 -8
- toil/test/provisioners/clusterScalerTest.py +2 -5
- toil/test/provisioners/clusterTest.py +1 -3
- toil/test/server/serverTest.py +13 -4
- toil/test/sort/restart_sort.py +2 -6
- toil/test/sort/sort.py +3 -8
- toil/test/src/deferredFunctionTest.py +7 -7
- toil/test/src/environmentTest.py +1 -2
- toil/test/src/fileStoreTest.py +5 -5
- toil/test/src/importExportFileTest.py +5 -6
- toil/test/src/jobServiceTest.py +22 -14
- toil/test/src/jobTest.py +121 -25
- toil/test/src/miscTests.py +5 -7
- toil/test/src/promisedRequirementTest.py +8 -7
- toil/test/src/regularLogTest.py +2 -3
- toil/test/src/resourceTest.py +5 -8
- toil/test/src/restartDAGTest.py +5 -6
- toil/test/src/resumabilityTest.py +2 -2
- toil/test/src/retainTempDirTest.py +3 -3
- toil/test/src/systemTest.py +3 -3
- toil/test/src/threadingTest.py +1 -1
- toil/test/src/workerTest.py +1 -2
- toil/test/utils/toilDebugTest.py +6 -4
- toil/test/utils/toilKillTest.py +1 -1
- toil/test/utils/utilsTest.py +15 -14
- toil/test/wdl/wdltoil_test.py +247 -124
- toil/test/wdl/wdltoil_test_kubernetes.py +2 -2
- toil/toilState.py +2 -3
- toil/utils/toilDebugFile.py +3 -8
- toil/utils/toilDebugJob.py +1 -2
- toil/utils/toilLaunchCluster.py +1 -2
- toil/utils/toilSshCluster.py +2 -0
- toil/utils/toilStats.py +19 -24
- toil/utils/toilStatus.py +11 -14
- toil/version.py +10 -10
- toil/wdl/wdltoil.py +313 -209
- toil/worker.py +18 -12
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/METADATA +11 -14
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/RECORD +150 -153
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/WHEEL +1 -1
- toil/test/cwl/staging_cat.cwl +0 -27
- toil/test/cwl/staging_make_file.cwl +0 -25
- toil/test/cwl/staging_workflow.cwl +0 -43
- toil/test/cwl/zero_default.cwl +0 -61
- toil/test/utils/ABCWorkflowDebug/ABC.txt +0 -1
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/entry_points.txt +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/licenses/LICENSE +0 -0
- {toil-9.1.2.dist-info → toil-9.2.0.dist-info}/top_level.txt +0 -0
toil/lib/threading.py
CHANGED
|
@@ -30,7 +30,7 @@ import time
|
|
|
30
30
|
import traceback
|
|
31
31
|
from collections.abc import Iterator
|
|
32
32
|
from contextlib import contextmanager
|
|
33
|
-
from typing import
|
|
33
|
+
from typing import cast
|
|
34
34
|
|
|
35
35
|
import psutil
|
|
36
36
|
|
|
@@ -42,7 +42,7 @@ logger = logging.getLogger(__name__)
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def ensure_filesystem_lockable(
|
|
45
|
-
path: StrPath, timeout: float = 30, hint:
|
|
45
|
+
path: StrPath, timeout: float = 30, hint: str | None = None
|
|
46
46
|
) -> None:
|
|
47
47
|
"""
|
|
48
48
|
Make sure that the filesystem used at the given path is one where locks are safe to use.
|
|
@@ -221,12 +221,12 @@ class ExceptionalThread(threading.Thread):
|
|
|
221
221
|
def tryRun(self) -> None:
|
|
222
222
|
super().run()
|
|
223
223
|
|
|
224
|
-
def join(self, *args:
|
|
224
|
+
def join(self, *args: float | None, **kwargs: float | None) -> None:
|
|
225
225
|
super().join(*args, **kwargs)
|
|
226
226
|
if not self.is_alive() and self.exc_info is not None:
|
|
227
227
|
exc_type, exc_value, traceback = self.exc_info
|
|
228
228
|
self.exc_info = None
|
|
229
|
-
raise_(exc_type, exc_value, traceback)
|
|
229
|
+
raise_(exc_type, exc_value, traceback)
|
|
230
230
|
|
|
231
231
|
|
|
232
232
|
def cpu_count() -> int:
|
|
@@ -257,18 +257,18 @@ def cpu_count() -> int:
|
|
|
257
257
|
if psutil_cpu_count is None:
|
|
258
258
|
logger.debug("Could not retrieve the logical CPU count.")
|
|
259
259
|
|
|
260
|
-
total_machine_size:
|
|
260
|
+
total_machine_size: float | int = (
|
|
261
261
|
psutil_cpu_count if psutil_cpu_count is not None else float("inf")
|
|
262
262
|
)
|
|
263
263
|
logger.debug("Total machine size: %s core(s)", total_machine_size)
|
|
264
264
|
|
|
265
265
|
# cgroups may limit the size
|
|
266
|
-
cgroup_size:
|
|
266
|
+
cgroup_size: float | int = float("inf")
|
|
267
267
|
|
|
268
268
|
try:
|
|
269
269
|
# See if we can fetch these and use them
|
|
270
|
-
quota:
|
|
271
|
-
period:
|
|
270
|
+
quota: int | None = None
|
|
271
|
+
period: int | None = None
|
|
272
272
|
|
|
273
273
|
# CGroups v1 keeps quota and period separate
|
|
274
274
|
CGROUP1_QUOTA_FILE = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"
|
|
@@ -312,7 +312,7 @@ def cpu_count() -> int:
|
|
|
312
312
|
logger.debug("Could not inspect cgroup: %s", traceback.format_exc())
|
|
313
313
|
|
|
314
314
|
# CPU affinity may limit the size
|
|
315
|
-
affinity_size:
|
|
315
|
+
affinity_size: float | int = float("inf")
|
|
316
316
|
if hasattr(os, "sched_getaffinity"):
|
|
317
317
|
try:
|
|
318
318
|
logger.debug("CPU affinity available")
|
|
@@ -326,7 +326,7 @@ def cpu_count() -> int:
|
|
|
326
326
|
else:
|
|
327
327
|
logger.debug("CPU affinity not available")
|
|
328
328
|
|
|
329
|
-
limit:
|
|
329
|
+
limit: float | int = float("inf")
|
|
330
330
|
# Apply all the limits to take the smallest
|
|
331
331
|
limit = min(limit, total_machine_size)
|
|
332
332
|
limit = min(limit, cgroup_size)
|
|
@@ -582,7 +582,7 @@ def global_mutex(base_dir: StrPath, mutex: str) -> Iterator[None]:
|
|
|
582
582
|
|
|
583
583
|
try:
|
|
584
584
|
# And get the stats for the name in the directory
|
|
585
|
-
path_stats:
|
|
585
|
+
path_stats: os.stat_result | None = os.stat(lock_filename)
|
|
586
586
|
except FileNotFoundError:
|
|
587
587
|
path_stats = None
|
|
588
588
|
|
|
@@ -786,7 +786,7 @@ class LastProcessStandingArena:
|
|
|
786
786
|
|
|
787
787
|
try:
|
|
788
788
|
fd = os.open(full_path, os.O_RDONLY)
|
|
789
|
-
except OSError
|
|
789
|
+
except OSError:
|
|
790
790
|
# suddenly file doesnt exist on network file system?
|
|
791
791
|
continue
|
|
792
792
|
|
toil/lib/throttle.py
CHANGED
|
@@ -15,7 +15,6 @@
|
|
|
15
15
|
# 5.14.2018: copied into Toil from https://github.com/BD2KGenomics/bd2k-python-lib
|
|
16
16
|
import threading
|
|
17
17
|
import time
|
|
18
|
-
from typing import Union
|
|
19
18
|
|
|
20
19
|
|
|
21
20
|
class LocalThrottle:
|
|
@@ -143,7 +142,7 @@ class throttle:
|
|
|
143
142
|
True
|
|
144
143
|
"""
|
|
145
144
|
|
|
146
|
-
def __init__(self, min_interval:
|
|
145
|
+
def __init__(self, min_interval: int | float) -> None:
|
|
147
146
|
self.min_interval = min_interval
|
|
148
147
|
|
|
149
148
|
def __enter__(self):
|
toil/lib/trs.py
CHANGED
|
@@ -21,21 +21,25 @@ import hashlib
|
|
|
21
21
|
import logging
|
|
22
22
|
import os
|
|
23
23
|
import shutil
|
|
24
|
-
import sys
|
|
25
24
|
import tempfile
|
|
26
25
|
import zipfile
|
|
27
|
-
from typing import Any
|
|
26
|
+
from typing import Any
|
|
27
|
+
from urllib.parse import quote, unquote, urlparse
|
|
28
28
|
|
|
29
|
-
from urllib.parse import urlparse, unquote, quote
|
|
30
29
|
import requests
|
|
31
30
|
|
|
32
|
-
from toil.lib.retry import retry
|
|
33
31
|
from toil.lib.io import file_digest, robust_rmtree
|
|
32
|
+
from toil.lib.retry import retry
|
|
34
33
|
from toil.lib.web import web_session
|
|
35
34
|
|
|
36
35
|
logger = logging.getLogger(__name__)
|
|
37
36
|
|
|
38
|
-
TRS_ROOT =
|
|
37
|
+
TRS_ROOT = (
|
|
38
|
+
"https://dockstore.org"
|
|
39
|
+
if "TOIL_TRS_ROOT" not in os.environ
|
|
40
|
+
else os.environ["TOIL_TRS_ROOT"]
|
|
41
|
+
)
|
|
42
|
+
|
|
39
43
|
|
|
40
44
|
def is_trs_workflow(workflow: str) -> bool:
|
|
41
45
|
"""
|
|
@@ -44,7 +48,12 @@ def is_trs_workflow(workflow: str) -> bool:
|
|
|
44
48
|
Detects Dockstore page URLs and strings that could be Dockstore TRS IDs.
|
|
45
49
|
"""
|
|
46
50
|
|
|
47
|
-
return
|
|
51
|
+
return (
|
|
52
|
+
workflow.startswith(f"{TRS_ROOT}/workflows/")
|
|
53
|
+
or workflow.startswith(f"{TRS_ROOT}/my-workflows/")
|
|
54
|
+
or workflow.startswith("#workflow/")
|
|
55
|
+
)
|
|
56
|
+
|
|
48
57
|
|
|
49
58
|
def extract_trs_spec(workflow: str) -> str:
|
|
50
59
|
"""
|
|
@@ -62,20 +71,21 @@ def extract_trs_spec(workflow: str) -> str:
|
|
|
62
71
|
# TODO: We assume the Dockstore page URL structure and the TRS IDs are basically the same.
|
|
63
72
|
page_path = unquote(parsed.path)
|
|
64
73
|
if page_path.startswith("/workflows/"):
|
|
65
|
-
trs_spec = "#workflow/" + page_path[len("/workflows/"):]
|
|
74
|
+
trs_spec = "#workflow/" + page_path[len("/workflows/") :]
|
|
66
75
|
elif page_path.startswith("/my-workflows/"):
|
|
67
|
-
trs_spec = "#workflow/" + page_path[len("/my-workflows/"):]
|
|
76
|
+
trs_spec = "#workflow/" + page_path[len("/my-workflows/") :]
|
|
68
77
|
else:
|
|
69
78
|
raise RuntimeError("Cannot parse Dockstore URL " + workflow)
|
|
70
79
|
logger.debug("Translated %s to TRS: %s", workflow, trs_spec)
|
|
71
80
|
|
|
72
81
|
return trs_spec
|
|
73
82
|
|
|
74
|
-
|
|
83
|
+
|
|
84
|
+
def parse_trs_spec(trs_spec: str) -> tuple[str, str | None]:
|
|
75
85
|
"""
|
|
76
86
|
Parse a TRS ID to workflow and optional version.
|
|
77
87
|
"""
|
|
78
|
-
parts = trs_spec.split(
|
|
88
|
+
parts = trs_spec.split(":", 1)
|
|
79
89
|
trs_workflow_id = parts[0]
|
|
80
90
|
if len(parts) > 1:
|
|
81
91
|
# The ID has the version we want after a colon
|
|
@@ -85,14 +95,18 @@ def parse_trs_spec(trs_spec: str) -> tuple[str, Optional[str]]:
|
|
|
85
95
|
trs_version = None
|
|
86
96
|
return trs_workflow_id, trs_version
|
|
87
97
|
|
|
98
|
+
|
|
88
99
|
def compose_trs_spec(trs_workflow_id: str, trs_version: str) -> str:
|
|
89
100
|
"""
|
|
90
101
|
Compose a TRS ID from a workflow ID and version ID.
|
|
91
102
|
"""
|
|
92
103
|
return f"{trs_workflow_id}:{trs_version}"
|
|
93
104
|
|
|
105
|
+
|
|
94
106
|
@retry(errors=[requests.exceptions.ConnectionError])
|
|
95
|
-
def find_workflow(
|
|
107
|
+
def find_workflow(
|
|
108
|
+
workflow: str, supported_languages: set[str] | None = None
|
|
109
|
+
) -> tuple[str, str, str]:
|
|
96
110
|
"""
|
|
97
111
|
Given a Dockstore URL or TRS identifier, get the root WDL or CWL URL for the workflow, along with the TRS workflow ID and version.
|
|
98
112
|
|
|
@@ -107,7 +121,7 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
|
|
|
107
121
|
|
|
108
122
|
:raises FileNotFoundError: if the workflow or version doesn't exist.
|
|
109
123
|
:raises ValueError: if the version is not specified but cannot be
|
|
110
|
-
automatically determined.
|
|
124
|
+
automatically determined.
|
|
111
125
|
"""
|
|
112
126
|
|
|
113
127
|
if supported_languages is not None and len(supported_languages) == 0:
|
|
@@ -118,11 +132,18 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
|
|
|
118
132
|
# Parse out workflow and possible version
|
|
119
133
|
trs_workflow_id, trs_version = parse_trs_spec(trs_spec)
|
|
120
134
|
|
|
121
|
-
logger.debug(
|
|
135
|
+
logger.debug(
|
|
136
|
+
"TRS %s parses to workflow %s and version %s",
|
|
137
|
+
trs_spec,
|
|
138
|
+
trs_workflow_id,
|
|
139
|
+
trs_version,
|
|
140
|
+
)
|
|
122
141
|
|
|
123
142
|
# Fetch the main TRS document.
|
|
124
143
|
# See e.g. https://dockstore.org/api/ga4gh/trs/v2/tools/%23workflow%2Fgithub.com%2Fdockstore-testing%2Fmd5sum-checker
|
|
125
|
-
trs_workflow_url =
|
|
144
|
+
trs_workflow_url = (
|
|
145
|
+
f"{TRS_ROOT}/api/ga4gh/trs/v2/tools/{quote(trs_workflow_id, safe='')}"
|
|
146
|
+
)
|
|
126
147
|
logger.debug("Get versions: %s", trs_workflow_url)
|
|
127
148
|
trs_workflow_response = web_session.get(trs_workflow_url)
|
|
128
149
|
if trs_workflow_response.status_code in (400, 404):
|
|
@@ -168,7 +189,6 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
|
|
|
168
189
|
trs_version = next(iter(eligible_workflow_versions))
|
|
169
190
|
logger.debug("Defaulting to only eligible workflow version %s", trs_version)
|
|
170
191
|
|
|
171
|
-
|
|
172
192
|
# If we don't like what we found we compose a useful error message.
|
|
173
193
|
problems: list[str] = []
|
|
174
194
|
problem_type: type[Exception] = RuntimeError
|
|
@@ -176,7 +196,9 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
|
|
|
176
196
|
problems.append(f"Workflow {workflow} does not specify a version")
|
|
177
197
|
problem_type = ValueError
|
|
178
198
|
elif trs_version not in workflow_versions:
|
|
179
|
-
problems.append(
|
|
199
|
+
problems.append(
|
|
200
|
+
f"Workflow version {trs_version} from {workflow} does not exist"
|
|
201
|
+
)
|
|
180
202
|
problem_type = FileNotFoundError
|
|
181
203
|
elif trs_version not in eligible_workflow_versions:
|
|
182
204
|
message = f"Workflow version {trs_version} from {workflow} is not available"
|
|
@@ -192,24 +214,33 @@ def find_workflow(workflow: str, supported_languages: Optional[set[str]] = None)
|
|
|
192
214
|
problems.append(message)
|
|
193
215
|
problem_type = FileNotFoundError
|
|
194
216
|
elif trs_version is None:
|
|
195
|
-
problems.append(
|
|
217
|
+
problems.append(
|
|
218
|
+
f"Add ':' and the name of a workflow version ({', '.join(eligible_workflow_versions)}) after '{trs_workflow_id}'"
|
|
219
|
+
)
|
|
196
220
|
else:
|
|
197
|
-
problems.append(
|
|
221
|
+
problems.append(
|
|
222
|
+
f"Replace '{trs_version}' with one of ({', '.join(eligible_workflow_versions)})"
|
|
223
|
+
)
|
|
198
224
|
raise problem_type("; ".join(problems))
|
|
199
225
|
|
|
200
226
|
# Tell MyPy we now have a version, or we would have raised
|
|
201
227
|
assert trs_version is not None
|
|
202
228
|
|
|
203
229
|
# Select the language we will actually run
|
|
204
|
-
chosen_version_languages: list[str] = workflow_versions[trs_version][
|
|
230
|
+
chosen_version_languages: list[str] = workflow_versions[trs_version][
|
|
231
|
+
"descriptor_type"
|
|
232
|
+
]
|
|
205
233
|
for candidate_language in chosen_version_languages:
|
|
206
234
|
if supported_languages is None or candidate_language in supported_languages:
|
|
207
235
|
language = candidate_language
|
|
208
236
|
|
|
209
|
-
logger.debug(
|
|
237
|
+
logger.debug(
|
|
238
|
+
"Going to use %s version %s in %s", trs_workflow_id, trs_version, language
|
|
239
|
+
)
|
|
210
240
|
|
|
211
241
|
return trs_workflow_id, trs_version, language
|
|
212
|
-
|
|
242
|
+
|
|
243
|
+
|
|
213
244
|
@retry(errors=[requests.exceptions.ConnectionError])
|
|
214
245
|
def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str:
|
|
215
246
|
"""
|
|
@@ -233,22 +264,30 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
|
|
|
233
264
|
# That's successful, so we need to handle it specifically. See
|
|
234
265
|
# <https://github.com/dockstore/dockstore/issues/6048>
|
|
235
266
|
# We can also get a 400 if the workflow ID is not in Dockstore's expected format (3 slash-separated segments).
|
|
236
|
-
raise FileNotFoundError(
|
|
267
|
+
raise FileNotFoundError(
|
|
268
|
+
f"Workflow {trs_workflow_id} version {trs_version} in language {language} does not exist."
|
|
269
|
+
)
|
|
237
270
|
trs_files_response.raise_for_status()
|
|
238
271
|
trs_files_document = trs_files_response.json()
|
|
239
272
|
|
|
240
273
|
# Find the information we need to ID the primary descriptor file
|
|
241
|
-
primary_descriptor_path:
|
|
242
|
-
primary_descriptor_hash_algorithm:
|
|
243
|
-
primary_descriptor_hash:
|
|
274
|
+
primary_descriptor_path: str | None = None
|
|
275
|
+
primary_descriptor_hash_algorithm: str | None = None
|
|
276
|
+
primary_descriptor_hash: str | None = None
|
|
244
277
|
for file_info in trs_files_document:
|
|
245
278
|
if file_info["file_type"] == "PRIMARY_DESCRIPTOR":
|
|
246
279
|
primary_descriptor_path = file_info["path"]
|
|
247
280
|
primary_descriptor_hash_algorithm = file_info["checksum"]["type"]
|
|
248
281
|
primary_descriptor_hash = file_info["checksum"]["checksum"]
|
|
249
282
|
break
|
|
250
|
-
if
|
|
251
|
-
|
|
283
|
+
if (
|
|
284
|
+
primary_descriptor_path is None
|
|
285
|
+
or primary_descriptor_hash is None
|
|
286
|
+
or primary_descriptor_hash_algorithm is None
|
|
287
|
+
):
|
|
288
|
+
raise RuntimeError(
|
|
289
|
+
f"Could not find a primary descriptor file for workflow {trs_workflow_id} version {trs_version} in language {language}"
|
|
290
|
+
)
|
|
252
291
|
primary_descriptor_basename = os.path.basename(primary_descriptor_path)
|
|
253
292
|
|
|
254
293
|
# Work out how to compute the hash we are looking for. See
|
|
@@ -257,16 +296,25 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
|
|
|
257
296
|
# for the Python names.
|
|
258
297
|
#
|
|
259
298
|
# TODO: We don't support the various truncated hash flavors or the other checksums not in hashlib.
|
|
260
|
-
python_hash_name =
|
|
299
|
+
python_hash_name = (
|
|
300
|
+
primary_descriptor_hash_algorithm.replace("sha-", "sha")
|
|
301
|
+
.replace("blake2b-512", "blake2b")
|
|
302
|
+
.replace("-", "_")
|
|
303
|
+
)
|
|
261
304
|
if python_hash_name not in hashlib.algorithms_available:
|
|
262
|
-
raise RuntimeError(
|
|
305
|
+
raise RuntimeError(
|
|
306
|
+
f"Primary descriptor is identified by a {primary_descriptor_hash_algorithm} hash but {python_hash_name} is not available in hashlib"
|
|
307
|
+
)
|
|
263
308
|
|
|
264
309
|
# Figure out where to store the workflow. We don't want to deal with temp
|
|
265
310
|
# dir cleanup since we don't want to run the whole workflow setup and
|
|
266
311
|
# execution in a context manager. So we declare a cache.
|
|
267
312
|
# Note that it's still not safe to symlink out of this cache since XDG
|
|
268
313
|
# cache directories aren't guaranteed to be on shared storage.
|
|
269
|
-
cache_base_dir = os.path.join(
|
|
314
|
+
cache_base_dir = os.path.join(
|
|
315
|
+
os.environ.get("XDG_CACHE_HOME", os.path.expanduser("~/.cache")),
|
|
316
|
+
"toil/workflows",
|
|
317
|
+
)
|
|
270
318
|
|
|
271
319
|
# Hash the workflow file list.
|
|
272
320
|
hasher = hashlib.sha256()
|
|
@@ -297,13 +345,17 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
|
|
|
297
345
|
"Accept-Encoding": "identity",
|
|
298
346
|
# Help Dockstore avoid serving ZIP with a JSON content type. See
|
|
299
347
|
# <https://github.com/dockstore/dockstore/issues/6010>.
|
|
300
|
-
"Accept": "application/zip"
|
|
348
|
+
"Accept": "application/zip",
|
|
301
349
|
}
|
|
302
350
|
# If we don't set stream=True, we can't actually read anything from the
|
|
303
351
|
# raw stream, since Requests will have done it already.
|
|
304
|
-
with web_session.get(
|
|
352
|
+
with web_session.get(
|
|
353
|
+
trs_zip_file_url, headers=headers, stream=True
|
|
354
|
+
) as response:
|
|
305
355
|
response_content_length = response.headers.get("Content-Length")
|
|
306
|
-
logger.debug(
|
|
356
|
+
logger.debug(
|
|
357
|
+
"Server reports content length: %s", response_content_length
|
|
358
|
+
)
|
|
307
359
|
shutil.copyfileobj(response.raw, zip_file)
|
|
308
360
|
zip_file.flush()
|
|
309
361
|
|
|
@@ -323,34 +375,52 @@ def fetch_workflow(trs_workflow_id: str, trs_version: str, language: str) -> str
|
|
|
323
375
|
except OSError:
|
|
324
376
|
# Collision. Someone else installed the workflow before we could.
|
|
325
377
|
robust_rmtree(workflow_temp_dir)
|
|
326
|
-
logger.debug(
|
|
378
|
+
logger.debug(
|
|
379
|
+
"Workflow cached at %s by someone else while we were donwloading it",
|
|
380
|
+
cache_workflow_dir,
|
|
381
|
+
)
|
|
327
382
|
|
|
328
383
|
# Hunt throught he directory for a file with the right basename and hash
|
|
329
|
-
found_path:
|
|
384
|
+
found_path: str | None = None
|
|
330
385
|
for containing_dir, subdirectories, files in os.walk(cache_workflow_dir):
|
|
331
386
|
for filename in files:
|
|
332
387
|
if filename == primary_descriptor_basename:
|
|
333
388
|
# This could be it. Open the file off disk and hash it with the right algorithm.
|
|
334
389
|
file_path = os.path.join(containing_dir, filename)
|
|
335
|
-
file_hash = file_digest(
|
|
390
|
+
file_hash = file_digest(
|
|
391
|
+
open(file_path, "rb"), python_hash_name
|
|
392
|
+
).hexdigest()
|
|
336
393
|
if file_hash == primary_descriptor_hash:
|
|
337
394
|
# This looks like the right file
|
|
338
395
|
logger.debug("Found candidate primary descriptor %s", file_path)
|
|
339
396
|
if found_path is not None:
|
|
340
397
|
# But there are multiple instances of it so we can't know which to run.
|
|
341
398
|
# TODO: Find out the right path from Dockstore somehow!
|
|
342
|
-
raise RuntimeError(
|
|
399
|
+
raise RuntimeError(
|
|
400
|
+
f"Workflow contains multiple files named {primary_descriptor_basename} with {python_hash_name} hash {file_hash}: {found_path} and {file_path}"
|
|
401
|
+
)
|
|
343
402
|
# This is the first file with the right name and hash
|
|
344
403
|
found_path = file_path
|
|
345
404
|
else:
|
|
346
|
-
logger.debug(
|
|
405
|
+
logger.debug(
|
|
406
|
+
"Rejected %s because its %s hash %s is not %s",
|
|
407
|
+
file_path,
|
|
408
|
+
python_hash_name,
|
|
409
|
+
file_hash,
|
|
410
|
+
primary_descriptor_hash,
|
|
411
|
+
)
|
|
347
412
|
if found_path is None:
|
|
348
413
|
# We couldn't find the promised primary descriptor
|
|
349
|
-
raise RuntimeError(
|
|
414
|
+
raise RuntimeError(
|
|
415
|
+
f"Could not find a {primary_descriptor_basename} with {primary_descriptor_hash_algorithm} hash {primary_descriptor_hash} for workflow {trs_workflow_id} version {trs_version} in language {language}"
|
|
416
|
+
)
|
|
350
417
|
|
|
351
418
|
return found_path
|
|
352
419
|
|
|
353
|
-
|
|
420
|
+
|
|
421
|
+
def resolve_workflow(
|
|
422
|
+
workflow: str, supported_languages: set[str] | None = None
|
|
423
|
+
) -> tuple[str, str | None]:
|
|
354
424
|
"""
|
|
355
425
|
Find the real workflow URL or filename from a command line argument.
|
|
356
426
|
|
|
@@ -370,7 +440,9 @@ def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = No
|
|
|
370
440
|
|
|
371
441
|
if is_trs_workflow(workflow):
|
|
372
442
|
# Ask TRS host where to find TRS-looking things
|
|
373
|
-
trs_workflow_id, trs_version, language = find_workflow(
|
|
443
|
+
trs_workflow_id, trs_version, language = find_workflow(
|
|
444
|
+
workflow, supported_languages
|
|
445
|
+
)
|
|
374
446
|
resolved = fetch_workflow(trs_workflow_id, trs_version, language)
|
|
375
447
|
logger.info("Resolved TRS workflow %s to %s", workflow, resolved)
|
|
376
448
|
return resolved, compose_trs_spec(trs_workflow_id, trs_version)
|
|
@@ -378,13 +450,3 @@ def resolve_workflow(workflow: str, supported_languages: Optional[set[str]] = No
|
|
|
378
450
|
# Pass other things through.
|
|
379
451
|
# TODO: Find out if they have TRS names.
|
|
380
452
|
return workflow, None
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
toil/lib/url.py
CHANGED
|
@@ -12,26 +12,12 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
import logging
|
|
15
|
-
import
|
|
16
|
-
from
|
|
17
|
-
from typing import (
|
|
18
|
-
IO,
|
|
19
|
-
TYPE_CHECKING,
|
|
20
|
-
Any,
|
|
21
|
-
Callable,
|
|
22
|
-
ContextManager,
|
|
23
|
-
Literal,
|
|
24
|
-
Optional,
|
|
25
|
-
Union,
|
|
26
|
-
cast,
|
|
27
|
-
overload,
|
|
28
|
-
Type,
|
|
29
|
-
)
|
|
15
|
+
from abc import abstractmethod
|
|
16
|
+
from typing import IO, cast
|
|
30
17
|
from urllib.parse import ParseResult, urlparse
|
|
31
18
|
|
|
32
19
|
from toil.lib.exceptions import UnimplementedURLException
|
|
33
|
-
from toil.lib.
|
|
34
|
-
from toil.lib.plugins import register_plugin, get_plugin
|
|
20
|
+
from toil.lib.plugins import get_plugin, register_plugin
|
|
35
21
|
|
|
36
22
|
try:
|
|
37
23
|
from botocore.exceptions import ProxyConnectionError
|
|
@@ -40,8 +26,10 @@ except ImportError:
|
|
|
40
26
|
class ProxyConnectionError(BaseException): # type: ignore
|
|
41
27
|
"""Dummy class."""
|
|
42
28
|
|
|
29
|
+
|
|
43
30
|
logger = logging.getLogger(__name__)
|
|
44
31
|
|
|
32
|
+
|
|
45
33
|
class URLAccess:
|
|
46
34
|
"""
|
|
47
35
|
Widget for accessing external storage (URLs).
|
|
@@ -62,7 +50,7 @@ class URLAccess:
|
|
|
62
50
|
return otherCls._url_exists(parseResult)
|
|
63
51
|
|
|
64
52
|
@classmethod
|
|
65
|
-
def get_size(cls, src_uri: str) ->
|
|
53
|
+
def get_size(cls, src_uri: str) -> int | None:
|
|
66
54
|
"""
|
|
67
55
|
Get the size in bytes of the file at the given URL, or None if it cannot be obtained.
|
|
68
56
|
|
|
@@ -147,7 +135,7 @@ class URLAccess:
|
|
|
147
135
|
|
|
148
136
|
@classmethod
|
|
149
137
|
@abstractmethod
|
|
150
|
-
def _get_size(cls, url: ParseResult) ->
|
|
138
|
+
def _get_size(cls, url: ParseResult) -> int | None:
|
|
151
139
|
"""
|
|
152
140
|
Get the size of the object at the given URL, or None if it cannot be obtained.
|
|
153
141
|
"""
|
|
@@ -217,7 +205,7 @@ class URLAccess:
|
|
|
217
205
|
@abstractmethod
|
|
218
206
|
def _write_to_url(
|
|
219
207
|
cls,
|
|
220
|
-
readable:
|
|
208
|
+
readable: IO[bytes] | IO[str],
|
|
221
209
|
url: ParseResult,
|
|
222
210
|
executable: bool = False,
|
|
223
211
|
) -> None:
|
|
@@ -264,9 +252,9 @@ class URLAccess:
|
|
|
264
252
|
implementation_factory = get_plugin("url_access", url.scheme.lower())
|
|
265
253
|
except KeyError:
|
|
266
254
|
raise UnimplementedURLException(url, "export" if export else "import")
|
|
267
|
-
|
|
255
|
+
|
|
268
256
|
try:
|
|
269
|
-
implementation = cast(
|
|
257
|
+
implementation = cast(type[URLAccess], implementation_factory())
|
|
270
258
|
except (ImportError, ProxyConnectionError):
|
|
271
259
|
logger.debug(
|
|
272
260
|
"Unable to import implementation for scheme '%s', as is expected if the corresponding extra was "
|
|
@@ -279,10 +267,12 @@ class URLAccess:
|
|
|
279
267
|
return implementation
|
|
280
268
|
raise UnimplementedURLException(url, "export" if export else "import")
|
|
281
269
|
|
|
270
|
+
|
|
282
271
|
#####
|
|
283
272
|
# Built-in url access
|
|
284
273
|
#####
|
|
285
274
|
|
|
275
|
+
|
|
286
276
|
def file_job_store_factory() -> type[URLAccess]:
|
|
287
277
|
from toil.jobStores.fileJobStore import FileJobStore
|
|
288
278
|
|
|
@@ -306,7 +296,8 @@ def job_store_support_factory() -> type[URLAccess]:
|
|
|
306
296
|
|
|
307
297
|
return JobStoreSupport
|
|
308
298
|
|
|
309
|
-
|
|
299
|
+
|
|
300
|
+
# make sure my py still works and the tests work
|
|
310
301
|
# can then get rid of _url_access_classes method
|
|
311
302
|
|
|
312
303
|
#####
|
toil/lib/web.py
CHANGED
|
@@ -23,9 +23,10 @@ user agent.
|
|
|
23
23
|
>>> web_session.get(httpserver.url_for("/path"))
|
|
24
24
|
<Response [200]>
|
|
25
25
|
"""
|
|
26
|
-
import requests
|
|
27
26
|
import sys
|
|
28
27
|
|
|
28
|
+
import requests
|
|
29
|
+
|
|
29
30
|
from toil.version import baseVersion
|
|
30
31
|
|
|
31
32
|
# We manage a Requests session at the module level in case we're supposed to be
|
|
@@ -34,4 +35,8 @@ from toil.version import baseVersion
|
|
|
34
35
|
# in theory (someone might make a new Toil version first, but there's no way
|
|
35
36
|
# to know for sure that nobody else did the same thing).
|
|
36
37
|
web_session = requests.Session()
|
|
37
|
-
web_session.headers.update(
|
|
38
|
+
web_session.headers.update(
|
|
39
|
+
{
|
|
40
|
+
"User-Agent": f"Toil {baseVersion} on Python {'.'.join([str(v) for v in sys.version_info])}"
|
|
41
|
+
}
|
|
42
|
+
)
|